ManojAlexender's picture
Upload folder using huggingface_hub
93073c5 verified
{
"best_metric": 0.2031262218952179,
"best_model_checkpoint": "final_roberta_with_new_400k_plus_37k/checkpoint-5300",
"epoch": 0.37867962274935696,
"eval_steps": 100,
"global_step": 5300,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"grad_norm": 259361.75,
"learning_rate": 1.0000000000000002e-06,
"loss": 1.1264,
"step": 10
},
{
"epoch": 0.0,
"grad_norm": 272740.9375,
"learning_rate": 2.0000000000000003e-06,
"loss": 1.1058,
"step": 20
},
{
"epoch": 0.0,
"grad_norm": 244078.703125,
"learning_rate": 3e-06,
"loss": 1.0828,
"step": 30
},
{
"epoch": 0.0,
"grad_norm": 234958.875,
"learning_rate": 4.000000000000001e-06,
"loss": 1.0388,
"step": 40
},
{
"epoch": 0.0,
"grad_norm": 270513.0625,
"learning_rate": 5e-06,
"loss": 0.985,
"step": 50
},
{
"epoch": 0.0,
"grad_norm": 186214.65625,
"learning_rate": 6e-06,
"loss": 0.8671,
"step": 60
},
{
"epoch": 0.01,
"grad_norm": 174922.8125,
"learning_rate": 7.000000000000001e-06,
"loss": 0.7386,
"step": 70
},
{
"epoch": 0.01,
"grad_norm": 191114.640625,
"learning_rate": 8.000000000000001e-06,
"loss": 0.6231,
"step": 80
},
{
"epoch": 0.01,
"grad_norm": 195687.9375,
"learning_rate": 9e-06,
"loss": 0.4322,
"step": 90
},
{
"epoch": 0.01,
"grad_norm": 333151.59375,
"learning_rate": 1e-05,
"loss": 0.3174,
"step": 100
},
{
"epoch": 0.01,
"eval_accuracy": 0.8927859705061778,
"eval_f1": 0.8918917476204463,
"eval_loss": 0.32540708780288696,
"eval_precision": 0.8963585084239793,
"eval_recall": 0.8927859705061778,
"eval_runtime": 12.4623,
"eval_samples_per_second": 201.328,
"eval_steps_per_second": 3.21,
"step": 100
},
{
"epoch": 0.01,
"grad_norm": 581737.1875,
"learning_rate": 1.1000000000000001e-05,
"loss": 0.3052,
"step": 110
},
{
"epoch": 0.01,
"grad_norm": 789745.375,
"learning_rate": 1.2e-05,
"loss": 0.3366,
"step": 120
},
{
"epoch": 0.01,
"grad_norm": 282232.71875,
"learning_rate": 1.3000000000000001e-05,
"loss": 0.3257,
"step": 130
},
{
"epoch": 0.01,
"grad_norm": 337977.96875,
"learning_rate": 1.4000000000000001e-05,
"loss": 0.2982,
"step": 140
},
{
"epoch": 0.01,
"grad_norm": 622948.125,
"learning_rate": 1.5e-05,
"loss": 0.3382,
"step": 150
},
{
"epoch": 0.01,
"grad_norm": 438774.15625,
"learning_rate": 1.6000000000000003e-05,
"loss": 0.2975,
"step": 160
},
{
"epoch": 0.01,
"grad_norm": 715256.4375,
"learning_rate": 1.7000000000000003e-05,
"loss": 0.3695,
"step": 170
},
{
"epoch": 0.01,
"grad_norm": 294961.75,
"learning_rate": 1.8e-05,
"loss": 0.3001,
"step": 180
},
{
"epoch": 0.01,
"grad_norm": 526643.5,
"learning_rate": 1.9e-05,
"loss": 0.2853,
"step": 190
},
{
"epoch": 0.01,
"grad_norm": 432135.15625,
"learning_rate": 2e-05,
"loss": 0.3285,
"step": 200
},
{
"epoch": 0.01,
"eval_accuracy": 0.8955759266640095,
"eval_f1": 0.8950953630781538,
"eval_loss": 0.2577860653400421,
"eval_precision": 0.8967688785864537,
"eval_recall": 0.8955759266640095,
"eval_runtime": 12.4549,
"eval_samples_per_second": 201.447,
"eval_steps_per_second": 3.212,
"step": 200
},
{
"epoch": 0.02,
"grad_norm": 344853.5625,
"learning_rate": 2.1e-05,
"loss": 0.3057,
"step": 210
},
{
"epoch": 0.02,
"grad_norm": 325491.0,
"learning_rate": 2.2000000000000003e-05,
"loss": 0.2563,
"step": 220
},
{
"epoch": 0.02,
"grad_norm": 788922.3125,
"learning_rate": 2.3000000000000003e-05,
"loss": 0.3054,
"step": 230
},
{
"epoch": 0.02,
"grad_norm": 589439.25,
"learning_rate": 2.4e-05,
"loss": 0.3409,
"step": 240
},
{
"epoch": 0.02,
"grad_norm": 213858.8125,
"learning_rate": 2.5e-05,
"loss": 0.2863,
"step": 250
},
{
"epoch": 0.02,
"grad_norm": 457191.5,
"learning_rate": 2.6000000000000002e-05,
"loss": 0.266,
"step": 260
},
{
"epoch": 0.02,
"grad_norm": 456034.78125,
"learning_rate": 2.7000000000000002e-05,
"loss": 0.2825,
"step": 270
},
{
"epoch": 0.02,
"grad_norm": 460380.375,
"learning_rate": 2.8000000000000003e-05,
"loss": 0.2809,
"step": 280
},
{
"epoch": 0.02,
"grad_norm": 318752.53125,
"learning_rate": 2.9e-05,
"loss": 0.2558,
"step": 290
},
{
"epoch": 0.02,
"grad_norm": 487526.53125,
"learning_rate": 3e-05,
"loss": 0.247,
"step": 300
},
{
"epoch": 0.02,
"eval_accuracy": 0.8620964527700279,
"eval_f1": 0.8588044269388889,
"eval_loss": 0.39129751920700073,
"eval_precision": 0.8782950809046319,
"eval_recall": 0.8620964527700279,
"eval_runtime": 12.4793,
"eval_samples_per_second": 201.052,
"eval_steps_per_second": 3.205,
"step": 300
},
{
"epoch": 0.02,
"grad_norm": 810950.4375,
"learning_rate": 3.1e-05,
"loss": 0.3178,
"step": 310
},
{
"epoch": 0.02,
"grad_norm": 197358.0625,
"learning_rate": 3.2000000000000005e-05,
"loss": 0.2416,
"step": 320
},
{
"epoch": 0.02,
"grad_norm": 660009.25,
"learning_rate": 3.3e-05,
"loss": 0.1957,
"step": 330
},
{
"epoch": 0.02,
"grad_norm": 782952.625,
"learning_rate": 3.4000000000000007e-05,
"loss": 0.3032,
"step": 340
},
{
"epoch": 0.03,
"grad_norm": 910589.1875,
"learning_rate": 3.5e-05,
"loss": 0.2858,
"step": 350
},
{
"epoch": 0.03,
"grad_norm": 217997.765625,
"learning_rate": 3.6e-05,
"loss": 0.2892,
"step": 360
},
{
"epoch": 0.03,
"grad_norm": 353057.21875,
"learning_rate": 3.7e-05,
"loss": 0.2023,
"step": 370
},
{
"epoch": 0.03,
"grad_norm": 473318.84375,
"learning_rate": 3.8e-05,
"loss": 0.2521,
"step": 380
},
{
"epoch": 0.03,
"grad_norm": 176609.578125,
"learning_rate": 3.9000000000000006e-05,
"loss": 0.2648,
"step": 390
},
{
"epoch": 0.03,
"grad_norm": 272719.65625,
"learning_rate": 4e-05,
"loss": 0.2853,
"step": 400
},
{
"epoch": 0.03,
"eval_accuracy": 0.8736548425667596,
"eval_f1": 0.8710864907473246,
"eval_loss": 0.3394368290901184,
"eval_precision": 0.8871063648493269,
"eval_recall": 0.8736548425667596,
"eval_runtime": 12.4669,
"eval_samples_per_second": 201.253,
"eval_steps_per_second": 3.208,
"step": 400
},
{
"epoch": 0.03,
"grad_norm": 398616.40625,
"learning_rate": 4.1e-05,
"loss": 0.2679,
"step": 410
},
{
"epoch": 0.03,
"grad_norm": 185647.96875,
"learning_rate": 4.2e-05,
"loss": 0.2532,
"step": 420
},
{
"epoch": 0.03,
"grad_norm": 436418.59375,
"learning_rate": 4.3e-05,
"loss": 0.2724,
"step": 430
},
{
"epoch": 0.03,
"grad_norm": 299492.25,
"learning_rate": 4.4000000000000006e-05,
"loss": 0.2548,
"step": 440
},
{
"epoch": 0.03,
"grad_norm": 482227.65625,
"learning_rate": 4.5e-05,
"loss": 0.2769,
"step": 450
},
{
"epoch": 0.03,
"grad_norm": 246368.28125,
"learning_rate": 4.600000000000001e-05,
"loss": 0.2869,
"step": 460
},
{
"epoch": 0.03,
"grad_norm": 391130.0625,
"learning_rate": 4.7e-05,
"loss": 0.3358,
"step": 470
},
{
"epoch": 0.03,
"grad_norm": 284843.15625,
"learning_rate": 4.8e-05,
"loss": 0.2601,
"step": 480
},
{
"epoch": 0.04,
"grad_norm": 512920.8125,
"learning_rate": 4.9e-05,
"loss": 0.3797,
"step": 490
},
{
"epoch": 0.04,
"grad_norm": 320267.75,
"learning_rate": 5e-05,
"loss": 0.3031,
"step": 500
},
{
"epoch": 0.04,
"eval_accuracy": 0.8537265842965325,
"eval_f1": 0.8491421277003748,
"eval_loss": 0.3923502266407013,
"eval_precision": 0.8770289219330052,
"eval_recall": 0.8537265842965325,
"eval_runtime": 12.4502,
"eval_samples_per_second": 201.524,
"eval_steps_per_second": 3.213,
"step": 500
},
{
"epoch": 0.04,
"grad_norm": 944106.25,
"learning_rate": 4.9987948322406484e-05,
"loss": 0.3445,
"step": 510
},
{
"epoch": 0.04,
"grad_norm": 650689.8125,
"learning_rate": 4.997589664481296e-05,
"loss": 0.2683,
"step": 520
},
{
"epoch": 0.04,
"grad_norm": 404230.5,
"learning_rate": 4.996384496721944e-05,
"loss": 0.2732,
"step": 530
},
{
"epoch": 0.04,
"grad_norm": 253872.78125,
"learning_rate": 4.995179328962592e-05,
"loss": 0.2637,
"step": 540
},
{
"epoch": 0.04,
"grad_norm": 173572.625,
"learning_rate": 4.9939741612032395e-05,
"loss": 0.2878,
"step": 550
},
{
"epoch": 0.04,
"grad_norm": 234455.234375,
"learning_rate": 4.9927689934438876e-05,
"loss": 0.2105,
"step": 560
},
{
"epoch": 0.04,
"grad_norm": 238566.0,
"learning_rate": 4.991563825684535e-05,
"loss": 0.3066,
"step": 570
},
{
"epoch": 0.04,
"grad_norm": 476733.5,
"learning_rate": 4.990358657925183e-05,
"loss": 0.2801,
"step": 580
},
{
"epoch": 0.04,
"grad_norm": 279763.1875,
"learning_rate": 4.9891534901658313e-05,
"loss": 0.241,
"step": 590
},
{
"epoch": 0.04,
"grad_norm": 332317.40625,
"learning_rate": 4.9879483224064795e-05,
"loss": 0.2747,
"step": 600
},
{
"epoch": 0.04,
"eval_accuracy": 0.9079314467915505,
"eval_f1": 0.9079421645959964,
"eval_loss": 0.2531713545322418,
"eval_precision": 0.9079554973313584,
"eval_recall": 0.9079314467915505,
"eval_runtime": 12.4925,
"eval_samples_per_second": 200.84,
"eval_steps_per_second": 3.202,
"step": 600
},
{
"epoch": 0.04,
"grad_norm": 156262.09375,
"learning_rate": 4.986743154647127e-05,
"loss": 0.2498,
"step": 610
},
{
"epoch": 0.04,
"grad_norm": 731199.375,
"learning_rate": 4.985537986887775e-05,
"loss": 0.2715,
"step": 620
},
{
"epoch": 0.05,
"grad_norm": 360661.875,
"learning_rate": 4.9843328191284225e-05,
"loss": 0.2522,
"step": 630
},
{
"epoch": 0.05,
"grad_norm": 338785.125,
"learning_rate": 4.9831276513690706e-05,
"loss": 0.2912,
"step": 640
},
{
"epoch": 0.05,
"grad_norm": 376656.71875,
"learning_rate": 4.981922483609719e-05,
"loss": 0.2842,
"step": 650
},
{
"epoch": 0.05,
"grad_norm": 173638.25,
"learning_rate": 4.980717315850367e-05,
"loss": 0.2145,
"step": 660
},
{
"epoch": 0.05,
"grad_norm": 799034.5,
"learning_rate": 4.979512148091014e-05,
"loss": 0.2167,
"step": 670
},
{
"epoch": 0.05,
"grad_norm": 274675.84375,
"learning_rate": 4.9783069803316624e-05,
"loss": 0.2779,
"step": 680
},
{
"epoch": 0.05,
"grad_norm": 194338.96875,
"learning_rate": 4.97710181257231e-05,
"loss": 0.269,
"step": 690
},
{
"epoch": 0.05,
"grad_norm": 284438.125,
"learning_rate": 4.975896644812959e-05,
"loss": 0.2797,
"step": 700
},
{
"epoch": 0.05,
"eval_accuracy": 0.863690713431646,
"eval_f1": 0.8606919701702621,
"eval_loss": 0.36067071557044983,
"eval_precision": 0.8781306500206725,
"eval_recall": 0.863690713431646,
"eval_runtime": 12.4463,
"eval_samples_per_second": 201.586,
"eval_steps_per_second": 3.214,
"step": 700
},
{
"epoch": 0.05,
"grad_norm": 737474.625,
"learning_rate": 4.974691477053606e-05,
"loss": 0.3834,
"step": 710
},
{
"epoch": 0.05,
"grad_norm": 414523.40625,
"learning_rate": 4.973486309294254e-05,
"loss": 0.3192,
"step": 720
},
{
"epoch": 0.05,
"grad_norm": 240870.953125,
"learning_rate": 4.972281141534902e-05,
"loss": 0.2479,
"step": 730
},
{
"epoch": 0.05,
"grad_norm": 280922.09375,
"learning_rate": 4.97107597377555e-05,
"loss": 0.2549,
"step": 740
},
{
"epoch": 0.05,
"grad_norm": 258415.796875,
"learning_rate": 4.969870806016197e-05,
"loss": 0.294,
"step": 750
},
{
"epoch": 0.05,
"grad_norm": 409388.15625,
"learning_rate": 4.968665638256846e-05,
"loss": 0.2806,
"step": 760
},
{
"epoch": 0.06,
"grad_norm": 399257.46875,
"learning_rate": 4.9674604704974935e-05,
"loss": 0.2841,
"step": 770
},
{
"epoch": 0.06,
"grad_norm": 363038.375,
"learning_rate": 4.966255302738142e-05,
"loss": 0.3085,
"step": 780
},
{
"epoch": 0.06,
"grad_norm": 351745.78125,
"learning_rate": 4.965050134978789e-05,
"loss": 0.2652,
"step": 790
},
{
"epoch": 0.06,
"grad_norm": 297461.6875,
"learning_rate": 4.963844967219437e-05,
"loss": 0.2211,
"step": 800
},
{
"epoch": 0.06,
"eval_accuracy": 0.8880031885213232,
"eval_f1": 0.8871620549900273,
"eval_loss": 0.2910105884075165,
"eval_precision": 0.8908776073001764,
"eval_recall": 0.8880031885213232,
"eval_runtime": 12.4814,
"eval_samples_per_second": 201.018,
"eval_steps_per_second": 3.205,
"step": 800
},
{
"epoch": 0.06,
"grad_norm": 287442.3125,
"learning_rate": 4.962639799460085e-05,
"loss": 0.2431,
"step": 810
},
{
"epoch": 0.06,
"grad_norm": 277648.125,
"learning_rate": 4.9614346317007335e-05,
"loss": 0.3058,
"step": 820
},
{
"epoch": 0.06,
"grad_norm": 309109.34375,
"learning_rate": 4.960229463941381e-05,
"loss": 0.1934,
"step": 830
},
{
"epoch": 0.06,
"grad_norm": 487191.03125,
"learning_rate": 4.959024296182029e-05,
"loss": 0.276,
"step": 840
},
{
"epoch": 0.06,
"grad_norm": 326520.6875,
"learning_rate": 4.9578191284226765e-05,
"loss": 0.2385,
"step": 850
},
{
"epoch": 0.06,
"grad_norm": 396849.90625,
"learning_rate": 4.9566139606633246e-05,
"loss": 0.2689,
"step": 860
},
{
"epoch": 0.06,
"grad_norm": 405153.84375,
"learning_rate": 4.955408792903972e-05,
"loss": 0.2374,
"step": 870
},
{
"epoch": 0.06,
"grad_norm": 228344.0,
"learning_rate": 4.95420362514462e-05,
"loss": 0.2317,
"step": 880
},
{
"epoch": 0.06,
"grad_norm": 232430.0625,
"learning_rate": 4.9529984573852683e-05,
"loss": 0.2397,
"step": 890
},
{
"epoch": 0.06,
"grad_norm": 343631.5,
"learning_rate": 4.9517932896259165e-05,
"loss": 0.2769,
"step": 900
},
{
"epoch": 0.06,
"eval_accuracy": 0.8824232762056596,
"eval_f1": 0.8810340691374341,
"eval_loss": 0.2833768129348755,
"eval_precision": 0.8884468905314342,
"eval_recall": 0.8824232762056596,
"eval_runtime": 12.4726,
"eval_samples_per_second": 201.162,
"eval_steps_per_second": 3.207,
"step": 900
},
{
"epoch": 0.07,
"grad_norm": 313915.84375,
"learning_rate": 4.950588121866564e-05,
"loss": 0.2364,
"step": 910
},
{
"epoch": 0.07,
"grad_norm": 216686.984375,
"learning_rate": 4.949382954107212e-05,
"loss": 0.1952,
"step": 920
},
{
"epoch": 0.07,
"grad_norm": 254563.671875,
"learning_rate": 4.94817778634786e-05,
"loss": 0.3364,
"step": 930
},
{
"epoch": 0.07,
"grad_norm": 446411.09375,
"learning_rate": 4.9469726185885076e-05,
"loss": 0.208,
"step": 940
},
{
"epoch": 0.07,
"grad_norm": 236561.890625,
"learning_rate": 4.945767450829156e-05,
"loss": 0.2634,
"step": 950
},
{
"epoch": 0.07,
"grad_norm": 255751.90625,
"learning_rate": 4.944562283069803e-05,
"loss": 0.2675,
"step": 960
},
{
"epoch": 0.07,
"grad_norm": 314748.65625,
"learning_rate": 4.943357115310451e-05,
"loss": 0.2196,
"step": 970
},
{
"epoch": 0.07,
"grad_norm": 439258.875,
"learning_rate": 4.9421519475510994e-05,
"loss": 0.1836,
"step": 980
},
{
"epoch": 0.07,
"grad_norm": 437849.875,
"learning_rate": 4.9409467797917476e-05,
"loss": 0.2723,
"step": 990
},
{
"epoch": 0.07,
"grad_norm": 232659.609375,
"learning_rate": 4.939741612032395e-05,
"loss": 0.2412,
"step": 1000
},
{
"epoch": 0.07,
"eval_accuracy": 0.9063371861299322,
"eval_f1": 0.9060552663380613,
"eval_loss": 0.23936249315738678,
"eval_precision": 0.9068644806871264,
"eval_recall": 0.9063371861299322,
"eval_runtime": 12.4655,
"eval_samples_per_second": 201.276,
"eval_steps_per_second": 3.209,
"step": 1000
},
{
"epoch": 0.07,
"grad_norm": 230467.96875,
"learning_rate": 4.938536444273043e-05,
"loss": 0.2282,
"step": 1010
},
{
"epoch": 0.07,
"grad_norm": 471223.4375,
"learning_rate": 4.9373312765136906e-05,
"loss": 0.2779,
"step": 1020
},
{
"epoch": 0.07,
"grad_norm": 358035.625,
"learning_rate": 4.936126108754339e-05,
"loss": 0.3137,
"step": 1030
},
{
"epoch": 0.07,
"grad_norm": 254541.125,
"learning_rate": 4.934920940994986e-05,
"loss": 0.2323,
"step": 1040
},
{
"epoch": 0.08,
"grad_norm": 736008.625,
"learning_rate": 4.933715773235635e-05,
"loss": 0.281,
"step": 1050
},
{
"epoch": 0.08,
"grad_norm": 191029.328125,
"learning_rate": 4.9325106054762824e-05,
"loss": 0.2438,
"step": 1060
},
{
"epoch": 0.08,
"grad_norm": 392851.15625,
"learning_rate": 4.9313054377169305e-05,
"loss": 0.3204,
"step": 1070
},
{
"epoch": 0.08,
"grad_norm": 252810.3125,
"learning_rate": 4.930100269957578e-05,
"loss": 0.2651,
"step": 1080
},
{
"epoch": 0.08,
"grad_norm": 406698.71875,
"learning_rate": 4.928895102198226e-05,
"loss": 0.279,
"step": 1090
},
{
"epoch": 0.08,
"grad_norm": 383913.09375,
"learning_rate": 4.927689934438874e-05,
"loss": 0.3386,
"step": 1100
},
{
"epoch": 0.08,
"eval_accuracy": 0.9015544041450777,
"eval_f1": 0.9012812455239371,
"eval_loss": 0.2400408387184143,
"eval_precision": 0.9019548153454997,
"eval_recall": 0.9015544041450777,
"eval_runtime": 12.4676,
"eval_samples_per_second": 201.242,
"eval_steps_per_second": 3.208,
"step": 1100
},
{
"epoch": 0.08,
"grad_norm": 216896.21875,
"learning_rate": 4.9264847666795224e-05,
"loss": 0.2311,
"step": 1110
},
{
"epoch": 0.08,
"grad_norm": 290117.3125,
"learning_rate": 4.92527959892017e-05,
"loss": 0.277,
"step": 1120
},
{
"epoch": 0.08,
"grad_norm": 219654.265625,
"learning_rate": 4.924074431160818e-05,
"loss": 0.2339,
"step": 1130
},
{
"epoch": 0.08,
"grad_norm": 342770.09375,
"learning_rate": 4.9228692634014654e-05,
"loss": 0.3051,
"step": 1140
},
{
"epoch": 0.08,
"grad_norm": 246765.90625,
"learning_rate": 4.9216640956421135e-05,
"loss": 0.2695,
"step": 1150
},
{
"epoch": 0.08,
"grad_norm": 208931.578125,
"learning_rate": 4.9204589278827617e-05,
"loss": 0.2747,
"step": 1160
},
{
"epoch": 0.08,
"grad_norm": 342173.96875,
"learning_rate": 4.91925376012341e-05,
"loss": 0.2172,
"step": 1170
},
{
"epoch": 0.08,
"grad_norm": 255617.609375,
"learning_rate": 4.918048592364057e-05,
"loss": 0.2835,
"step": 1180
},
{
"epoch": 0.09,
"grad_norm": 149436.703125,
"learning_rate": 4.9168434246047054e-05,
"loss": 0.2432,
"step": 1190
},
{
"epoch": 0.09,
"grad_norm": 225822.0625,
"learning_rate": 4.915638256845353e-05,
"loss": 0.2743,
"step": 1200
},
{
"epoch": 0.09,
"eval_accuracy": 0.904742925468314,
"eval_f1": 0.9047753527069451,
"eval_loss": 0.24210092425346375,
"eval_precision": 0.9048312118166199,
"eval_recall": 0.904742925468314,
"eval_runtime": 12.4909,
"eval_samples_per_second": 200.866,
"eval_steps_per_second": 3.202,
"step": 1200
},
{
"epoch": 0.09,
"grad_norm": 156733.046875,
"learning_rate": 4.914433089086001e-05,
"loss": 0.2321,
"step": 1210
},
{
"epoch": 0.09,
"grad_norm": 139717.796875,
"learning_rate": 4.913227921326649e-05,
"loss": 0.1887,
"step": 1220
},
{
"epoch": 0.09,
"grad_norm": 534506.8125,
"learning_rate": 4.912022753567297e-05,
"loss": 0.2929,
"step": 1230
},
{
"epoch": 0.09,
"grad_norm": 190213.25,
"learning_rate": 4.9108175858079446e-05,
"loss": 0.2494,
"step": 1240
},
{
"epoch": 0.09,
"grad_norm": 462159.28125,
"learning_rate": 4.909612418048593e-05,
"loss": 0.3134,
"step": 1250
},
{
"epoch": 0.09,
"grad_norm": 290829.84375,
"learning_rate": 4.90840725028924e-05,
"loss": 0.2327,
"step": 1260
},
{
"epoch": 0.09,
"grad_norm": 115473.984375,
"learning_rate": 4.907202082529889e-05,
"loss": 0.2199,
"step": 1270
},
{
"epoch": 0.09,
"grad_norm": 172480.3125,
"learning_rate": 4.9059969147705365e-05,
"loss": 0.2344,
"step": 1280
},
{
"epoch": 0.09,
"grad_norm": 271795.9375,
"learning_rate": 4.9047917470111846e-05,
"loss": 0.2757,
"step": 1290
},
{
"epoch": 0.09,
"grad_norm": 134259.4375,
"learning_rate": 4.903586579251832e-05,
"loss": 0.2682,
"step": 1300
},
{
"epoch": 0.09,
"eval_accuracy": 0.8768433638899961,
"eval_f1": 0.875214624309524,
"eval_loss": 0.2833414375782013,
"eval_precision": 0.8838506474460517,
"eval_recall": 0.8768433638899961,
"eval_runtime": 12.4785,
"eval_samples_per_second": 201.066,
"eval_steps_per_second": 3.206,
"step": 1300
},
{
"epoch": 0.09,
"grad_norm": 162955.359375,
"learning_rate": 4.90238141149248e-05,
"loss": 0.2077,
"step": 1310
},
{
"epoch": 0.09,
"grad_norm": 345381.34375,
"learning_rate": 4.9011762437331276e-05,
"loss": 0.2563,
"step": 1320
},
{
"epoch": 0.1,
"grad_norm": 353178.6875,
"learning_rate": 4.899971075973776e-05,
"loss": 0.2536,
"step": 1330
},
{
"epoch": 0.1,
"grad_norm": 341959.53125,
"learning_rate": 4.898765908214424e-05,
"loss": 0.2174,
"step": 1340
},
{
"epoch": 0.1,
"grad_norm": 366022.53125,
"learning_rate": 4.897560740455071e-05,
"loss": 0.3057,
"step": 1350
},
{
"epoch": 0.1,
"grad_norm": 393534.71875,
"learning_rate": 4.8963555726957194e-05,
"loss": 0.2376,
"step": 1360
},
{
"epoch": 0.1,
"grad_norm": 274654.625,
"learning_rate": 4.8951504049363676e-05,
"loss": 0.2659,
"step": 1370
},
{
"epoch": 0.1,
"grad_norm": 138208.84375,
"learning_rate": 4.893945237177015e-05,
"loss": 0.1862,
"step": 1380
},
{
"epoch": 0.1,
"grad_norm": 467137.15625,
"learning_rate": 4.892740069417663e-05,
"loss": 0.2283,
"step": 1390
},
{
"epoch": 0.1,
"grad_norm": 317242.65625,
"learning_rate": 4.891534901658311e-05,
"loss": 0.3219,
"step": 1400
},
{
"epoch": 0.1,
"eval_accuracy": 0.9071343164607414,
"eval_f1": 0.9070423350315097,
"eval_loss": 0.23825575411319733,
"eval_precision": 0.9071045116108353,
"eval_recall": 0.9071343164607414,
"eval_runtime": 12.4977,
"eval_samples_per_second": 200.757,
"eval_steps_per_second": 3.201,
"step": 1400
},
{
"epoch": 0.1,
"grad_norm": 181615.84375,
"learning_rate": 4.890329733898959e-05,
"loss": 0.2165,
"step": 1410
},
{
"epoch": 0.1,
"grad_norm": 161155.140625,
"learning_rate": 4.889124566139607e-05,
"loss": 0.2607,
"step": 1420
},
{
"epoch": 0.1,
"grad_norm": 398813.90625,
"learning_rate": 4.887919398380254e-05,
"loss": 0.2696,
"step": 1430
},
{
"epoch": 0.1,
"grad_norm": 315529.625,
"learning_rate": 4.8867142306209024e-05,
"loss": 0.2688,
"step": 1440
},
{
"epoch": 0.1,
"grad_norm": 518022.09375,
"learning_rate": 4.8855090628615505e-05,
"loss": 0.3062,
"step": 1450
},
{
"epoch": 0.1,
"grad_norm": 271555.0,
"learning_rate": 4.8843038951021987e-05,
"loss": 0.2141,
"step": 1460
},
{
"epoch": 0.11,
"grad_norm": 287849.21875,
"learning_rate": 4.883098727342846e-05,
"loss": 0.2469,
"step": 1470
},
{
"epoch": 0.11,
"grad_norm": 269480.84375,
"learning_rate": 4.881893559583494e-05,
"loss": 0.2037,
"step": 1480
},
{
"epoch": 0.11,
"grad_norm": 225872.734375,
"learning_rate": 4.880688391824142e-05,
"loss": 0.2867,
"step": 1490
},
{
"epoch": 0.11,
"grad_norm": 291168.03125,
"learning_rate": 4.8794832240647905e-05,
"loss": 0.2211,
"step": 1500
},
{
"epoch": 0.11,
"eval_accuracy": 0.904742925468314,
"eval_f1": 0.9047391668676202,
"eval_loss": 0.24535924196243286,
"eval_precision": 0.9047356979299059,
"eval_recall": 0.904742925468314,
"eval_runtime": 12.5209,
"eval_samples_per_second": 200.385,
"eval_steps_per_second": 3.195,
"step": 1500
},
{
"epoch": 0.11,
"grad_norm": 305500.625,
"learning_rate": 4.878278056305438e-05,
"loss": 0.2604,
"step": 1510
},
{
"epoch": 0.11,
"grad_norm": 322610.6875,
"learning_rate": 4.877072888546086e-05,
"loss": 0.2416,
"step": 1520
},
{
"epoch": 0.11,
"grad_norm": 244146.640625,
"learning_rate": 4.8758677207867335e-05,
"loss": 0.2461,
"step": 1530
},
{
"epoch": 0.11,
"grad_norm": 319704.53125,
"learning_rate": 4.8746625530273816e-05,
"loss": 0.2368,
"step": 1540
},
{
"epoch": 0.11,
"grad_norm": 292252.0,
"learning_rate": 4.873457385268029e-05,
"loss": 0.2351,
"step": 1550
},
{
"epoch": 0.11,
"grad_norm": 134507.875,
"learning_rate": 4.872252217508678e-05,
"loss": 0.2423,
"step": 1560
},
{
"epoch": 0.11,
"grad_norm": 228724.5625,
"learning_rate": 4.871047049749325e-05,
"loss": 0.1909,
"step": 1570
},
{
"epoch": 0.11,
"grad_norm": 315720.09375,
"learning_rate": 4.8698418819899735e-05,
"loss": 0.2611,
"step": 1580
},
{
"epoch": 0.11,
"grad_norm": 232667.03125,
"learning_rate": 4.868636714230621e-05,
"loss": 0.1903,
"step": 1590
},
{
"epoch": 0.11,
"grad_norm": 263891.90625,
"learning_rate": 4.867431546471269e-05,
"loss": 0.2606,
"step": 1600
},
{
"epoch": 0.11,
"eval_accuracy": 0.9222797927461139,
"eval_f1": 0.9220731260773486,
"eval_loss": 0.20830760896205902,
"eval_precision": 0.9227685265016082,
"eval_recall": 0.9222797927461139,
"eval_runtime": 16.2543,
"eval_samples_per_second": 154.359,
"eval_steps_per_second": 2.461,
"step": 1600
},
{
"epoch": 0.12,
"grad_norm": 221386.890625,
"learning_rate": 4.8662263787119165e-05,
"loss": 0.2349,
"step": 1610
},
{
"epoch": 0.12,
"grad_norm": 363135.0,
"learning_rate": 4.865021210952565e-05,
"loss": 0.25,
"step": 1620
},
{
"epoch": 0.12,
"grad_norm": 264439.53125,
"learning_rate": 4.863816043193213e-05,
"loss": 0.1789,
"step": 1630
},
{
"epoch": 0.12,
"grad_norm": 325613.53125,
"learning_rate": 4.862610875433861e-05,
"loss": 0.2143,
"step": 1640
},
{
"epoch": 0.12,
"grad_norm": 220411.890625,
"learning_rate": 4.861405707674508e-05,
"loss": 0.2629,
"step": 1650
},
{
"epoch": 0.12,
"grad_norm": 259412.40625,
"learning_rate": 4.8602005399151564e-05,
"loss": 0.2525,
"step": 1660
},
{
"epoch": 0.12,
"grad_norm": 614391.375,
"learning_rate": 4.8589953721558046e-05,
"loss": 0.2439,
"step": 1670
},
{
"epoch": 0.12,
"grad_norm": 275747.875,
"learning_rate": 4.857790204396453e-05,
"loss": 0.2651,
"step": 1680
},
{
"epoch": 0.12,
"grad_norm": 212869.203125,
"learning_rate": 4.8565850366371e-05,
"loss": 0.1677,
"step": 1690
},
{
"epoch": 0.12,
"grad_norm": 259202.96875,
"learning_rate": 4.855379868877748e-05,
"loss": 0.1966,
"step": 1700
},
{
"epoch": 0.12,
"eval_accuracy": 0.9003587086488641,
"eval_f1": 0.9000880085084791,
"eval_loss": 0.2688085734844208,
"eval_precision": 0.9007296682986318,
"eval_recall": 0.9003587086488641,
"eval_runtime": 12.4695,
"eval_samples_per_second": 201.21,
"eval_steps_per_second": 3.208,
"step": 1700
},
{
"epoch": 0.12,
"grad_norm": 164794.625,
"learning_rate": 4.854174701118396e-05,
"loss": 0.3116,
"step": 1710
},
{
"epoch": 0.12,
"grad_norm": 1090182.375,
"learning_rate": 4.852969533359044e-05,
"loss": 0.2294,
"step": 1720
},
{
"epoch": 0.12,
"grad_norm": 393550.84375,
"learning_rate": 4.851764365599692e-05,
"loss": 0.2659,
"step": 1730
},
{
"epoch": 0.12,
"grad_norm": 227773.296875,
"learning_rate": 4.8505591978403394e-05,
"loss": 0.2151,
"step": 1740
},
{
"epoch": 0.13,
"grad_norm": 259306.171875,
"learning_rate": 4.8493540300809875e-05,
"loss": 0.2661,
"step": 1750
},
{
"epoch": 0.13,
"grad_norm": 237662.640625,
"learning_rate": 4.8481488623216357e-05,
"loss": 0.2268,
"step": 1760
},
{
"epoch": 0.13,
"grad_norm": 385510.71875,
"learning_rate": 4.846943694562283e-05,
"loss": 0.2745,
"step": 1770
},
{
"epoch": 0.13,
"grad_norm": 163655.578125,
"learning_rate": 4.845738526802931e-05,
"loss": 0.3146,
"step": 1780
},
{
"epoch": 0.13,
"grad_norm": 243399.0,
"learning_rate": 4.8445333590435794e-05,
"loss": 0.2467,
"step": 1790
},
{
"epoch": 0.13,
"grad_norm": 327880.625,
"learning_rate": 4.843328191284227e-05,
"loss": 0.2205,
"step": 1800
},
{
"epoch": 0.13,
"eval_accuracy": 0.8776404942208051,
"eval_f1": 0.875183931389359,
"eval_loss": 0.30761781334877014,
"eval_precision": 0.8910948763461308,
"eval_recall": 0.8776404942208051,
"eval_runtime": 12.4538,
"eval_samples_per_second": 201.465,
"eval_steps_per_second": 3.212,
"step": 1800
},
{
"epoch": 0.13,
"grad_norm": 256714.03125,
"learning_rate": 4.842123023524875e-05,
"loss": 0.2844,
"step": 1810
},
{
"epoch": 0.13,
"grad_norm": 262816.8125,
"learning_rate": 4.8409178557655224e-05,
"loss": 0.2276,
"step": 1820
},
{
"epoch": 0.13,
"grad_norm": 316480.125,
"learning_rate": 4.8397126880061705e-05,
"loss": 0.2421,
"step": 1830
},
{
"epoch": 0.13,
"grad_norm": 225589.65625,
"learning_rate": 4.8385075202468186e-05,
"loss": 0.3464,
"step": 1840
},
{
"epoch": 0.13,
"grad_norm": 185817.125,
"learning_rate": 4.837302352487467e-05,
"loss": 0.2356,
"step": 1850
},
{
"epoch": 0.13,
"grad_norm": 88735.1875,
"learning_rate": 4.836097184728114e-05,
"loss": 0.182,
"step": 1860
},
{
"epoch": 0.13,
"grad_norm": 794250.3125,
"learning_rate": 4.834892016968762e-05,
"loss": 0.2339,
"step": 1870
},
{
"epoch": 0.13,
"grad_norm": 560309.375,
"learning_rate": 4.83368684920941e-05,
"loss": 0.2894,
"step": 1880
},
{
"epoch": 0.14,
"grad_norm": 272938.0625,
"learning_rate": 4.832481681450058e-05,
"loss": 0.329,
"step": 1890
},
{
"epoch": 0.14,
"grad_norm": 211817.265625,
"learning_rate": 4.831276513690706e-05,
"loss": 0.2242,
"step": 1900
},
{
"epoch": 0.14,
"eval_accuracy": 0.9151056197688322,
"eval_f1": 0.9149543121007149,
"eval_loss": 0.2171379029750824,
"eval_precision": 0.9152515101201911,
"eval_recall": 0.9151056197688322,
"eval_runtime": 12.4488,
"eval_samples_per_second": 201.545,
"eval_steps_per_second": 3.213,
"step": 1900
},
{
"epoch": 0.14,
"grad_norm": 143415.46875,
"learning_rate": 4.830071345931354e-05,
"loss": 0.1574,
"step": 1910
},
{
"epoch": 0.14,
"grad_norm": 282922.125,
"learning_rate": 4.8288661781720016e-05,
"loss": 0.2423,
"step": 1920
},
{
"epoch": 0.14,
"grad_norm": 146414.75,
"learning_rate": 4.82766101041265e-05,
"loss": 0.2286,
"step": 1930
},
{
"epoch": 0.14,
"grad_norm": 376618.875,
"learning_rate": 4.826455842653297e-05,
"loss": 0.2082,
"step": 1940
},
{
"epoch": 0.14,
"grad_norm": 347305.625,
"learning_rate": 4.825250674893945e-05,
"loss": 0.2707,
"step": 1950
},
{
"epoch": 0.14,
"grad_norm": 120735.5,
"learning_rate": 4.8240455071345934e-05,
"loss": 0.27,
"step": 1960
},
{
"epoch": 0.14,
"grad_norm": 327705.75,
"learning_rate": 4.8228403393752416e-05,
"loss": 0.2446,
"step": 1970
},
{
"epoch": 0.14,
"grad_norm": 204558.703125,
"learning_rate": 4.821635171615889e-05,
"loss": 0.2253,
"step": 1980
},
{
"epoch": 0.14,
"grad_norm": 379880.46875,
"learning_rate": 4.820430003856537e-05,
"loss": 0.2475,
"step": 1990
},
{
"epoch": 0.14,
"grad_norm": 275538.9375,
"learning_rate": 4.8192248360971846e-05,
"loss": 0.257,
"step": 2000
},
{
"epoch": 0.14,
"eval_accuracy": 0.8911917098445595,
"eval_f1": 0.8905276298091929,
"eval_loss": 0.26427793502807617,
"eval_precision": 0.893198513619984,
"eval_recall": 0.8911917098445595,
"eval_runtime": 12.4635,
"eval_samples_per_second": 201.308,
"eval_steps_per_second": 3.209,
"step": 2000
},
{
"epoch": 0.14,
"grad_norm": 353166.90625,
"learning_rate": 4.818019668337833e-05,
"loss": 0.2724,
"step": 2010
},
{
"epoch": 0.14,
"grad_norm": 226420.90625,
"learning_rate": 4.816814500578481e-05,
"loss": 0.2908,
"step": 2020
},
{
"epoch": 0.15,
"grad_norm": 342758.125,
"learning_rate": 4.815609332819129e-05,
"loss": 0.2236,
"step": 2030
},
{
"epoch": 0.15,
"grad_norm": 255585.25,
"learning_rate": 4.8144041650597764e-05,
"loss": 0.2951,
"step": 2040
},
{
"epoch": 0.15,
"grad_norm": 179796.921875,
"learning_rate": 4.8131989973004245e-05,
"loss": 0.1814,
"step": 2050
},
{
"epoch": 0.15,
"grad_norm": 214087.140625,
"learning_rate": 4.811993829541072e-05,
"loss": 0.3827,
"step": 2060
},
{
"epoch": 0.15,
"grad_norm": 250333.71875,
"learning_rate": 4.810788661781721e-05,
"loss": 0.2592,
"step": 2070
},
{
"epoch": 0.15,
"grad_norm": 334693.625,
"learning_rate": 4.809583494022368e-05,
"loss": 0.3138,
"step": 2080
},
{
"epoch": 0.15,
"grad_norm": 204259.46875,
"learning_rate": 4.8083783262630164e-05,
"loss": 0.2725,
"step": 2090
},
{
"epoch": 0.15,
"grad_norm": 362242.4375,
"learning_rate": 4.807173158503664e-05,
"loss": 0.2238,
"step": 2100
},
{
"epoch": 0.15,
"eval_accuracy": 0.9131127939418094,
"eval_f1": 0.9127816210997458,
"eval_loss": 0.21650490164756775,
"eval_precision": 0.9140733290376809,
"eval_recall": 0.9131127939418094,
"eval_runtime": 12.4586,
"eval_samples_per_second": 201.386,
"eval_steps_per_second": 3.211,
"step": 2100
},
{
"epoch": 0.15,
"grad_norm": 427524.21875,
"learning_rate": 4.805967990744312e-05,
"loss": 0.1926,
"step": 2110
},
{
"epoch": 0.15,
"grad_norm": 376668.125,
"learning_rate": 4.8047628229849594e-05,
"loss": 0.1828,
"step": 2120
},
{
"epoch": 0.15,
"grad_norm": 181697.09375,
"learning_rate": 4.803557655225608e-05,
"loss": 0.2484,
"step": 2130
},
{
"epoch": 0.15,
"grad_norm": 405359.8125,
"learning_rate": 4.8023524874662556e-05,
"loss": 0.3022,
"step": 2140
},
{
"epoch": 0.15,
"grad_norm": 227001.171875,
"learning_rate": 4.801147319706904e-05,
"loss": 0.3152,
"step": 2150
},
{
"epoch": 0.15,
"grad_norm": 291323.65625,
"learning_rate": 4.799942151947551e-05,
"loss": 0.2439,
"step": 2160
},
{
"epoch": 0.16,
"grad_norm": 174109.375,
"learning_rate": 4.7987369841881993e-05,
"loss": 0.2465,
"step": 2170
},
{
"epoch": 0.16,
"grad_norm": 337487.75,
"learning_rate": 4.797531816428847e-05,
"loss": 0.2281,
"step": 2180
},
{
"epoch": 0.16,
"grad_norm": 225118.296875,
"learning_rate": 4.796326648669495e-05,
"loss": 0.2357,
"step": 2190
},
{
"epoch": 0.16,
"grad_norm": 159729.390625,
"learning_rate": 4.795121480910143e-05,
"loss": 0.2313,
"step": 2200
},
{
"epoch": 0.16,
"eval_accuracy": 0.899561578318055,
"eval_f1": 0.8995921014681665,
"eval_loss": 0.2312317192554474,
"eval_precision": 0.8996410329041024,
"eval_recall": 0.899561578318055,
"eval_runtime": 12.481,
"eval_samples_per_second": 201.026,
"eval_steps_per_second": 3.205,
"step": 2200
},
{
"epoch": 0.16,
"grad_norm": 215213.28125,
"learning_rate": 4.7939163131507905e-05,
"loss": 0.2576,
"step": 2210
},
{
"epoch": 0.16,
"grad_norm": 691060.25,
"learning_rate": 4.7927111453914386e-05,
"loss": 0.2706,
"step": 2220
},
{
"epoch": 0.16,
"grad_norm": 306584.34375,
"learning_rate": 4.791505977632087e-05,
"loss": 0.1879,
"step": 2230
},
{
"epoch": 0.16,
"grad_norm": 60201.84375,
"learning_rate": 4.790300809872735e-05,
"loss": 0.2517,
"step": 2240
},
{
"epoch": 0.16,
"grad_norm": 318122.8125,
"learning_rate": 4.789095642113382e-05,
"loss": 0.2004,
"step": 2250
},
{
"epoch": 0.16,
"grad_norm": 737994.875,
"learning_rate": 4.7878904743540304e-05,
"loss": 0.3025,
"step": 2260
},
{
"epoch": 0.16,
"grad_norm": 617771.5,
"learning_rate": 4.786685306594678e-05,
"loss": 0.2708,
"step": 2270
},
{
"epoch": 0.16,
"grad_norm": 271784.375,
"learning_rate": 4.785480138835326e-05,
"loss": 0.2333,
"step": 2280
},
{
"epoch": 0.16,
"grad_norm": 348172.15625,
"learning_rate": 4.7842749710759735e-05,
"loss": 0.2371,
"step": 2290
},
{
"epoch": 0.16,
"grad_norm": 501798.375,
"learning_rate": 4.783069803316622e-05,
"loss": 0.1856,
"step": 2300
},
{
"epoch": 0.16,
"eval_accuracy": 0.9107214029493822,
"eval_f1": 0.9107676605487075,
"eval_loss": 0.22687236964702606,
"eval_precision": 0.9108709640812914,
"eval_recall": 0.9107214029493822,
"eval_runtime": 12.4871,
"eval_samples_per_second": 200.927,
"eval_steps_per_second": 3.203,
"step": 2300
},
{
"epoch": 0.17,
"grad_norm": 294219.71875,
"learning_rate": 4.78186463555727e-05,
"loss": 0.2343,
"step": 2310
},
{
"epoch": 0.17,
"grad_norm": 546334.75,
"learning_rate": 4.780659467797918e-05,
"loss": 0.2301,
"step": 2320
},
{
"epoch": 0.17,
"grad_norm": 173881.875,
"learning_rate": 4.779454300038565e-05,
"loss": 0.2491,
"step": 2330
},
{
"epoch": 0.17,
"grad_norm": 237170.28125,
"learning_rate": 4.7782491322792134e-05,
"loss": 0.2194,
"step": 2340
},
{
"epoch": 0.17,
"grad_norm": 319085.8125,
"learning_rate": 4.777043964519861e-05,
"loss": 0.2308,
"step": 2350
},
{
"epoch": 0.17,
"grad_norm": 365797.4375,
"learning_rate": 4.77583879676051e-05,
"loss": 0.2445,
"step": 2360
},
{
"epoch": 0.17,
"grad_norm": 255985.921875,
"learning_rate": 4.774633629001157e-05,
"loss": 0.2578,
"step": 2370
},
{
"epoch": 0.17,
"grad_norm": 253771.796875,
"learning_rate": 4.773428461241805e-05,
"loss": 0.2471,
"step": 2380
},
{
"epoch": 0.17,
"grad_norm": 140455.671875,
"learning_rate": 4.772223293482453e-05,
"loss": 0.2496,
"step": 2390
},
{
"epoch": 0.17,
"grad_norm": 356603.71875,
"learning_rate": 4.771018125723101e-05,
"loss": 0.2201,
"step": 2400
},
{
"epoch": 0.17,
"eval_accuracy": 0.9059386209645277,
"eval_f1": 0.9056498912765502,
"eval_loss": 0.24249590933322906,
"eval_precision": 0.9064880886538065,
"eval_recall": 0.9059386209645277,
"eval_runtime": 12.497,
"eval_samples_per_second": 200.768,
"eval_steps_per_second": 3.201,
"step": 2400
},
{
"epoch": 0.17,
"grad_norm": 284860.53125,
"learning_rate": 4.769812957963749e-05,
"loss": 0.1525,
"step": 2410
},
{
"epoch": 0.17,
"grad_norm": 250776.8125,
"learning_rate": 4.768607790204397e-05,
"loss": 0.2262,
"step": 2420
},
{
"epoch": 0.17,
"grad_norm": 211438.5,
"learning_rate": 4.7674026224450445e-05,
"loss": 0.2277,
"step": 2430
},
{
"epoch": 0.17,
"grad_norm": 368441.25,
"learning_rate": 4.7661974546856926e-05,
"loss": 0.259,
"step": 2440
},
{
"epoch": 0.18,
"grad_norm": 241326.5,
"learning_rate": 4.76499228692634e-05,
"loss": 0.2286,
"step": 2450
},
{
"epoch": 0.18,
"grad_norm": 98535.6640625,
"learning_rate": 4.763787119166988e-05,
"loss": 0.2078,
"step": 2460
},
{
"epoch": 0.18,
"grad_norm": 254980.625,
"learning_rate": 4.7625819514076363e-05,
"loss": 0.2449,
"step": 2470
},
{
"epoch": 0.18,
"grad_norm": 167483.0625,
"learning_rate": 4.7613767836482845e-05,
"loss": 0.2702,
"step": 2480
},
{
"epoch": 0.18,
"grad_norm": 222062.484375,
"learning_rate": 4.760171615888932e-05,
"loss": 0.1956,
"step": 2490
},
{
"epoch": 0.18,
"grad_norm": 405875.75,
"learning_rate": 4.75896644812958e-05,
"loss": 0.3332,
"step": 2500
},
{
"epoch": 0.18,
"eval_accuracy": 0.9043443603029095,
"eval_f1": 0.9044486833245423,
"eval_loss": 0.22543533146381378,
"eval_precision": 0.9048483388492391,
"eval_recall": 0.9043443603029095,
"eval_runtime": 12.4439,
"eval_samples_per_second": 201.624,
"eval_steps_per_second": 3.214,
"step": 2500
},
{
"epoch": 0.18,
"grad_norm": 152190.84375,
"learning_rate": 4.7577612803702275e-05,
"loss": 0.2661,
"step": 2510
},
{
"epoch": 0.18,
"grad_norm": 174183.640625,
"learning_rate": 4.7565561126108756e-05,
"loss": 0.2293,
"step": 2520
},
{
"epoch": 0.18,
"grad_norm": 413301.1875,
"learning_rate": 4.755350944851524e-05,
"loss": 0.2136,
"step": 2530
},
{
"epoch": 0.18,
"grad_norm": 536887.125,
"learning_rate": 4.754145777092172e-05,
"loss": 0.2134,
"step": 2540
},
{
"epoch": 0.18,
"grad_norm": 276406.9375,
"learning_rate": 4.752940609332819e-05,
"loss": 0.2286,
"step": 2550
},
{
"epoch": 0.18,
"grad_norm": 186448.703125,
"learning_rate": 4.7517354415734674e-05,
"loss": 0.2546,
"step": 2560
},
{
"epoch": 0.18,
"grad_norm": 185627.4375,
"learning_rate": 4.750530273814115e-05,
"loss": 0.2528,
"step": 2570
},
{
"epoch": 0.18,
"grad_norm": 368845.34375,
"learning_rate": 4.749325106054763e-05,
"loss": 0.2174,
"step": 2580
},
{
"epoch": 0.19,
"grad_norm": 362864.34375,
"learning_rate": 4.748119938295411e-05,
"loss": 0.2209,
"step": 2590
},
{
"epoch": 0.19,
"grad_norm": 91713.015625,
"learning_rate": 4.7469147705360586e-05,
"loss": 0.1843,
"step": 2600
},
{
"epoch": 0.19,
"eval_accuracy": 0.8979673176564368,
"eval_f1": 0.8970726226158635,
"eval_loss": 0.2523791491985321,
"eval_precision": 0.9019878131456466,
"eval_recall": 0.8979673176564368,
"eval_runtime": 12.4843,
"eval_samples_per_second": 200.972,
"eval_steps_per_second": 3.204,
"step": 2600
},
{
"epoch": 0.19,
"grad_norm": 283047.71875,
"learning_rate": 4.745709602776707e-05,
"loss": 0.2767,
"step": 2610
},
{
"epoch": 0.19,
"grad_norm": 256224.40625,
"learning_rate": 4.744504435017355e-05,
"loss": 0.2588,
"step": 2620
},
{
"epoch": 0.19,
"grad_norm": 156059.90625,
"learning_rate": 4.743299267258002e-05,
"loss": 0.2688,
"step": 2630
},
{
"epoch": 0.19,
"grad_norm": 165222.90625,
"learning_rate": 4.7420940994986504e-05,
"loss": 0.2281,
"step": 2640
},
{
"epoch": 0.19,
"grad_norm": 84012.734375,
"learning_rate": 4.7408889317392985e-05,
"loss": 0.1764,
"step": 2650
},
{
"epoch": 0.19,
"grad_norm": 146292.03125,
"learning_rate": 4.739683763979946e-05,
"loss": 0.2546,
"step": 2660
},
{
"epoch": 0.19,
"grad_norm": 197499.578125,
"learning_rate": 4.738478596220594e-05,
"loss": 0.258,
"step": 2670
},
{
"epoch": 0.19,
"grad_norm": 139515.015625,
"learning_rate": 4.7372734284612416e-05,
"loss": 0.2412,
"step": 2680
},
{
"epoch": 0.19,
"grad_norm": 206191.359375,
"learning_rate": 4.73606826070189e-05,
"loss": 0.1909,
"step": 2690
},
{
"epoch": 0.19,
"grad_norm": 263304.375,
"learning_rate": 4.734863092942538e-05,
"loss": 0.2728,
"step": 2700
},
{
"epoch": 0.19,
"eval_accuracy": 0.8967716221602232,
"eval_f1": 0.8957472911028488,
"eval_loss": 0.23479728400707245,
"eval_precision": 0.9016807725080417,
"eval_recall": 0.8967716221602232,
"eval_runtime": 12.4638,
"eval_samples_per_second": 201.303,
"eval_steps_per_second": 3.209,
"step": 2700
},
{
"epoch": 0.19,
"grad_norm": 232704.671875,
"learning_rate": 4.733657925183186e-05,
"loss": 0.2076,
"step": 2710
},
{
"epoch": 0.19,
"grad_norm": 479017.40625,
"learning_rate": 4.7324527574238334e-05,
"loss": 0.2137,
"step": 2720
},
{
"epoch": 0.2,
"grad_norm": 279608.53125,
"learning_rate": 4.7312475896644815e-05,
"loss": 0.1979,
"step": 2730
},
{
"epoch": 0.2,
"grad_norm": 185551.0,
"learning_rate": 4.730042421905129e-05,
"loss": 0.1977,
"step": 2740
},
{
"epoch": 0.2,
"grad_norm": 222985.421875,
"learning_rate": 4.728837254145777e-05,
"loss": 0.2625,
"step": 2750
},
{
"epoch": 0.2,
"grad_norm": 205608.65625,
"learning_rate": 4.727632086386425e-05,
"loss": 0.2073,
"step": 2760
},
{
"epoch": 0.2,
"grad_norm": 251234.265625,
"learning_rate": 4.7264269186270734e-05,
"loss": 0.2447,
"step": 2770
},
{
"epoch": 0.2,
"grad_norm": 513352.03125,
"learning_rate": 4.725221750867721e-05,
"loss": 0.3065,
"step": 2780
},
{
"epoch": 0.2,
"grad_norm": 231737.0625,
"learning_rate": 4.724016583108369e-05,
"loss": 0.2311,
"step": 2790
},
{
"epoch": 0.2,
"grad_norm": 309214.3125,
"learning_rate": 4.7228114153490164e-05,
"loss": 0.2131,
"step": 2800
},
{
"epoch": 0.2,
"eval_accuracy": 0.913511359107214,
"eval_f1": 0.9135765070264794,
"eval_loss": 0.2209855616092682,
"eval_precision": 0.9137703030306349,
"eval_recall": 0.913511359107214,
"eval_runtime": 12.5042,
"eval_samples_per_second": 200.652,
"eval_steps_per_second": 3.199,
"step": 2800
},
{
"epoch": 0.2,
"grad_norm": 501373.90625,
"learning_rate": 4.721606247589665e-05,
"loss": 0.3289,
"step": 2810
},
{
"epoch": 0.2,
"grad_norm": 121567.8046875,
"learning_rate": 4.7204010798303126e-05,
"loss": 0.1986,
"step": 2820
},
{
"epoch": 0.2,
"grad_norm": 300041.3125,
"learning_rate": 4.719195912070961e-05,
"loss": 0.2294,
"step": 2830
},
{
"epoch": 0.2,
"grad_norm": 458868.6875,
"learning_rate": 4.717990744311608e-05,
"loss": 0.2133,
"step": 2840
},
{
"epoch": 0.2,
"grad_norm": 147848.984375,
"learning_rate": 4.716785576552256e-05,
"loss": 0.2769,
"step": 2850
},
{
"epoch": 0.2,
"grad_norm": 332015.1875,
"learning_rate": 4.715580408792904e-05,
"loss": 0.22,
"step": 2860
},
{
"epoch": 0.21,
"grad_norm": 472249.4375,
"learning_rate": 4.7143752410335526e-05,
"loss": 0.2495,
"step": 2870
},
{
"epoch": 0.21,
"grad_norm": 967262.6875,
"learning_rate": 4.7131700732742e-05,
"loss": 0.2312,
"step": 2880
},
{
"epoch": 0.21,
"grad_norm": 341954.1875,
"learning_rate": 4.711964905514848e-05,
"loss": 0.1765,
"step": 2890
},
{
"epoch": 0.21,
"grad_norm": 464634.5625,
"learning_rate": 4.7107597377554956e-05,
"loss": 0.19,
"step": 2900
},
{
"epoch": 0.21,
"eval_accuracy": 0.9123156636110004,
"eval_f1": 0.9120359608178479,
"eval_loss": 0.22591687738895416,
"eval_precision": 0.9129663507904072,
"eval_recall": 0.9123156636110004,
"eval_runtime": 12.4552,
"eval_samples_per_second": 201.442,
"eval_steps_per_second": 3.212,
"step": 2900
},
{
"epoch": 0.21,
"grad_norm": 241272.703125,
"learning_rate": 4.709554569996144e-05,
"loss": 0.1981,
"step": 2910
},
{
"epoch": 0.21,
"grad_norm": 204981.140625,
"learning_rate": 4.708349402236791e-05,
"loss": 0.1911,
"step": 2920
},
{
"epoch": 0.21,
"grad_norm": 327311.375,
"learning_rate": 4.70714423447744e-05,
"loss": 0.249,
"step": 2930
},
{
"epoch": 0.21,
"grad_norm": 375379.96875,
"learning_rate": 4.7059390667180874e-05,
"loss": 0.1629,
"step": 2940
},
{
"epoch": 0.21,
"grad_norm": 277301.8125,
"learning_rate": 4.7047338989587356e-05,
"loss": 0.2026,
"step": 2950
},
{
"epoch": 0.21,
"grad_norm": 327376.71875,
"learning_rate": 4.703528731199383e-05,
"loss": 0.271,
"step": 2960
},
{
"epoch": 0.21,
"grad_norm": 136987.484375,
"learning_rate": 4.702323563440031e-05,
"loss": 0.1453,
"step": 2970
},
{
"epoch": 0.21,
"grad_norm": 353965.59375,
"learning_rate": 4.701118395680679e-05,
"loss": 0.2407,
"step": 2980
},
{
"epoch": 0.21,
"grad_norm": 247193.09375,
"learning_rate": 4.6999132279213274e-05,
"loss": 0.1739,
"step": 2990
},
{
"epoch": 0.21,
"grad_norm": 219308.640625,
"learning_rate": 4.698708060161975e-05,
"loss": 0.2099,
"step": 3000
},
{
"epoch": 0.21,
"eval_accuracy": 0.9023515344758868,
"eval_f1": 0.9016339685231911,
"eval_loss": 0.2813716530799866,
"eval_precision": 0.9054013251007967,
"eval_recall": 0.9023515344758868,
"eval_runtime": 12.4697,
"eval_samples_per_second": 201.208,
"eval_steps_per_second": 3.208,
"step": 3000
},
{
"epoch": 0.22,
"grad_norm": 480314.5625,
"learning_rate": 4.697502892402623e-05,
"loss": 0.2492,
"step": 3010
},
{
"epoch": 0.22,
"grad_norm": 327667.5,
"learning_rate": 4.6962977246432704e-05,
"loss": 0.2386,
"step": 3020
},
{
"epoch": 0.22,
"grad_norm": 361772.9375,
"learning_rate": 4.6950925568839185e-05,
"loss": 0.2363,
"step": 3030
},
{
"epoch": 0.22,
"grad_norm": 281198.5625,
"learning_rate": 4.6938873891245667e-05,
"loss": 0.2199,
"step": 3040
},
{
"epoch": 0.22,
"grad_norm": 435042.5,
"learning_rate": 4.692682221365214e-05,
"loss": 0.2985,
"step": 3050
},
{
"epoch": 0.22,
"grad_norm": 175672.875,
"learning_rate": 4.691477053605862e-05,
"loss": 0.2279,
"step": 3060
},
{
"epoch": 0.22,
"grad_norm": 342795.84375,
"learning_rate": 4.69027188584651e-05,
"loss": 0.2441,
"step": 3070
},
{
"epoch": 0.22,
"grad_norm": 288277.53125,
"learning_rate": 4.689066718087158e-05,
"loss": 0.2576,
"step": 3080
},
{
"epoch": 0.22,
"grad_norm": 121460.078125,
"learning_rate": 4.687861550327806e-05,
"loss": 0.247,
"step": 3090
},
{
"epoch": 0.22,
"grad_norm": 401488.15625,
"learning_rate": 4.686656382568454e-05,
"loss": 0.2209,
"step": 3100
},
{
"epoch": 0.22,
"eval_accuracy": 0.9051414906337186,
"eval_f1": 0.9046116025347221,
"eval_loss": 0.2472696155309677,
"eval_precision": 0.9070370928171988,
"eval_recall": 0.9051414906337186,
"eval_runtime": 12.4813,
"eval_samples_per_second": 201.021,
"eval_steps_per_second": 3.205,
"step": 3100
},
{
"epoch": 0.22,
"grad_norm": 377680.78125,
"learning_rate": 4.6854512148091015e-05,
"loss": 0.288,
"step": 3110
},
{
"epoch": 0.22,
"grad_norm": 233974.625,
"learning_rate": 4.6842460470497496e-05,
"loss": 0.2491,
"step": 3120
},
{
"epoch": 0.22,
"grad_norm": 334937.875,
"learning_rate": 4.683040879290397e-05,
"loss": 0.2163,
"step": 3130
},
{
"epoch": 0.22,
"grad_norm": 440934.28125,
"learning_rate": 4.681835711531045e-05,
"loss": 0.2135,
"step": 3140
},
{
"epoch": 0.23,
"grad_norm": 103967.03125,
"learning_rate": 4.680630543771693e-05,
"loss": 0.1934,
"step": 3150
},
{
"epoch": 0.23,
"grad_norm": 213988.359375,
"learning_rate": 4.6794253760123415e-05,
"loss": 0.2356,
"step": 3160
},
{
"epoch": 0.23,
"grad_norm": 231522.921875,
"learning_rate": 4.678220208252989e-05,
"loss": 0.2235,
"step": 3170
},
{
"epoch": 0.23,
"grad_norm": 301749.78125,
"learning_rate": 4.677015040493637e-05,
"loss": 0.2005,
"step": 3180
},
{
"epoch": 0.23,
"grad_norm": 163900.3125,
"learning_rate": 4.6758098727342845e-05,
"loss": 0.1628,
"step": 3190
},
{
"epoch": 0.23,
"grad_norm": 220932.828125,
"learning_rate": 4.6746047049749326e-05,
"loss": 0.2366,
"step": 3200
},
{
"epoch": 0.23,
"eval_accuracy": 0.8991630131526505,
"eval_f1": 0.8983219782452526,
"eval_loss": 0.256120890378952,
"eval_precision": 0.9028826072820753,
"eval_recall": 0.8991630131526505,
"eval_runtime": 12.4914,
"eval_samples_per_second": 200.858,
"eval_steps_per_second": 3.202,
"step": 3200
},
{
"epoch": 0.23,
"grad_norm": 140245.703125,
"learning_rate": 4.673399537215581e-05,
"loss": 0.188,
"step": 3210
},
{
"epoch": 0.23,
"grad_norm": 56319.390625,
"learning_rate": 4.672194369456229e-05,
"loss": 0.2311,
"step": 3220
},
{
"epoch": 0.23,
"grad_norm": 417635.46875,
"learning_rate": 4.670989201696876e-05,
"loss": 0.2028,
"step": 3230
},
{
"epoch": 0.23,
"grad_norm": 257533.140625,
"learning_rate": 4.6697840339375244e-05,
"loss": 0.2448,
"step": 3240
},
{
"epoch": 0.23,
"grad_norm": 134527.328125,
"learning_rate": 4.668578866178172e-05,
"loss": 0.255,
"step": 3250
},
{
"epoch": 0.23,
"grad_norm": 241991.234375,
"learning_rate": 4.66737369841882e-05,
"loss": 0.2519,
"step": 3260
},
{
"epoch": 0.23,
"grad_norm": 292576.84375,
"learning_rate": 4.666168530659468e-05,
"loss": 0.2186,
"step": 3270
},
{
"epoch": 0.23,
"grad_norm": 172253.8125,
"learning_rate": 4.664963362900116e-05,
"loss": 0.2119,
"step": 3280
},
{
"epoch": 0.24,
"grad_norm": 162594.703125,
"learning_rate": 4.663758195140764e-05,
"loss": 0.2266,
"step": 3290
},
{
"epoch": 0.24,
"grad_norm": 291196.75,
"learning_rate": 4.662553027381412e-05,
"loss": 0.3156,
"step": 3300
},
{
"epoch": 0.24,
"eval_accuracy": 0.9095257074531686,
"eval_f1": 0.9094360946444322,
"eval_loss": 0.21921035647392273,
"eval_precision": 0.9095010570473924,
"eval_recall": 0.9095257074531686,
"eval_runtime": 12.4898,
"eval_samples_per_second": 200.883,
"eval_steps_per_second": 3.203,
"step": 3300
},
{
"epoch": 0.24,
"grad_norm": 359450.15625,
"learning_rate": 4.661347859622059e-05,
"loss": 0.1919,
"step": 3310
},
{
"epoch": 0.24,
"grad_norm": 356027.1875,
"learning_rate": 4.6601426918627074e-05,
"loss": 0.1946,
"step": 3320
},
{
"epoch": 0.24,
"grad_norm": 228787.578125,
"learning_rate": 4.6589375241033555e-05,
"loss": 0.2353,
"step": 3330
},
{
"epoch": 0.24,
"grad_norm": 213314.375,
"learning_rate": 4.6577323563440037e-05,
"loss": 0.21,
"step": 3340
},
{
"epoch": 0.24,
"grad_norm": 229541.46875,
"learning_rate": 4.656527188584651e-05,
"loss": 0.2157,
"step": 3350
},
{
"epoch": 0.24,
"grad_norm": 424003.65625,
"learning_rate": 4.655322020825299e-05,
"loss": 0.2202,
"step": 3360
},
{
"epoch": 0.24,
"grad_norm": 274433.0,
"learning_rate": 4.654116853065947e-05,
"loss": 0.2101,
"step": 3370
},
{
"epoch": 0.24,
"grad_norm": 585877.375,
"learning_rate": 4.6529116853065955e-05,
"loss": 0.227,
"step": 3380
},
{
"epoch": 0.24,
"grad_norm": 168976.125,
"learning_rate": 4.651706517547243e-05,
"loss": 0.2004,
"step": 3390
},
{
"epoch": 0.24,
"grad_norm": 247893.96875,
"learning_rate": 4.650501349787891e-05,
"loss": 0.197,
"step": 3400
},
{
"epoch": 0.24,
"eval_accuracy": 0.9063371861299322,
"eval_f1": 0.9056639403550047,
"eval_loss": 0.2382478266954422,
"eval_precision": 0.9093486961575057,
"eval_recall": 0.9063371861299322,
"eval_runtime": 12.4721,
"eval_samples_per_second": 201.168,
"eval_steps_per_second": 3.207,
"step": 3400
},
{
"epoch": 0.24,
"grad_norm": 418813.09375,
"learning_rate": 4.6492961820285385e-05,
"loss": 0.2256,
"step": 3410
},
{
"epoch": 0.24,
"grad_norm": 273254.46875,
"learning_rate": 4.6480910142691866e-05,
"loss": 0.3162,
"step": 3420
},
{
"epoch": 0.25,
"grad_norm": 222690.640625,
"learning_rate": 4.646885846509834e-05,
"loss": 0.2528,
"step": 3430
},
{
"epoch": 0.25,
"grad_norm": 332726.59375,
"learning_rate": 4.645680678750482e-05,
"loss": 0.222,
"step": 3440
},
{
"epoch": 0.25,
"grad_norm": 122768.3515625,
"learning_rate": 4.64447551099113e-05,
"loss": 0.2721,
"step": 3450
},
{
"epoch": 0.25,
"grad_norm": 177160.875,
"learning_rate": 4.643270343231778e-05,
"loss": 0.2322,
"step": 3460
},
{
"epoch": 0.25,
"grad_norm": 492673.46875,
"learning_rate": 4.642065175472426e-05,
"loss": 0.2355,
"step": 3470
},
{
"epoch": 0.25,
"grad_norm": 120090.96875,
"learning_rate": 4.640860007713074e-05,
"loss": 0.2207,
"step": 3480
},
{
"epoch": 0.25,
"grad_norm": 199731.515625,
"learning_rate": 4.6396548399537215e-05,
"loss": 0.1873,
"step": 3490
},
{
"epoch": 0.25,
"grad_norm": 337224.9375,
"learning_rate": 4.6384496721943696e-05,
"loss": 0.2371,
"step": 3500
},
{
"epoch": 0.25,
"eval_accuracy": 0.9139099242726185,
"eval_f1": 0.9141014321392996,
"eval_loss": 0.22432319819927216,
"eval_precision": 0.9166452044770947,
"eval_recall": 0.9139099242726185,
"eval_runtime": 12.4637,
"eval_samples_per_second": 201.305,
"eval_steps_per_second": 3.209,
"step": 3500
},
{
"epoch": 0.25,
"grad_norm": 144981.5625,
"learning_rate": 4.637244504435018e-05,
"loss": 0.2471,
"step": 3510
},
{
"epoch": 0.25,
"grad_norm": 535936.1875,
"learning_rate": 4.636039336675665e-05,
"loss": 0.2198,
"step": 3520
},
{
"epoch": 0.25,
"grad_norm": 262519.0,
"learning_rate": 4.634834168916313e-05,
"loss": 0.1173,
"step": 3530
},
{
"epoch": 0.25,
"grad_norm": 408279.0625,
"learning_rate": 4.633629001156961e-05,
"loss": 0.2529,
"step": 3540
},
{
"epoch": 0.25,
"grad_norm": 334028.15625,
"learning_rate": 4.6324238333976096e-05,
"loss": 0.2309,
"step": 3550
},
{
"epoch": 0.25,
"grad_norm": 180496.734375,
"learning_rate": 4.631218665638257e-05,
"loss": 0.1904,
"step": 3560
},
{
"epoch": 0.26,
"grad_norm": 247550.046875,
"learning_rate": 4.630013497878905e-05,
"loss": 0.2689,
"step": 3570
},
{
"epoch": 0.26,
"grad_norm": 203775.171875,
"learning_rate": 4.6288083301195526e-05,
"loss": 0.2308,
"step": 3580
},
{
"epoch": 0.26,
"grad_norm": 114762.5703125,
"learning_rate": 4.627603162360201e-05,
"loss": 0.1804,
"step": 3590
},
{
"epoch": 0.26,
"grad_norm": 333855.78125,
"learning_rate": 4.626397994600848e-05,
"loss": 0.2273,
"step": 3600
},
{
"epoch": 0.26,
"eval_accuracy": 0.913511359107214,
"eval_f1": 0.9130592850413116,
"eval_loss": 0.23622463643550873,
"eval_precision": 0.9152968112748071,
"eval_recall": 0.913511359107214,
"eval_runtime": 12.479,
"eval_samples_per_second": 201.057,
"eval_steps_per_second": 3.205,
"step": 3600
},
{
"epoch": 0.26,
"grad_norm": 156201.640625,
"learning_rate": 4.625192826841497e-05,
"loss": 0.1892,
"step": 3610
},
{
"epoch": 0.26,
"grad_norm": 208585.484375,
"learning_rate": 4.6239876590821444e-05,
"loss": 0.2036,
"step": 3620
},
{
"epoch": 0.26,
"grad_norm": 457924.40625,
"learning_rate": 4.6227824913227925e-05,
"loss": 0.2847,
"step": 3630
},
{
"epoch": 0.26,
"grad_norm": 255040.125,
"learning_rate": 4.62157732356344e-05,
"loss": 0.1938,
"step": 3640
},
{
"epoch": 0.26,
"grad_norm": 183455.359375,
"learning_rate": 4.620372155804088e-05,
"loss": 0.1977,
"step": 3650
},
{
"epoch": 0.26,
"grad_norm": 344806.9375,
"learning_rate": 4.6191669880447356e-05,
"loss": 0.255,
"step": 3660
},
{
"epoch": 0.26,
"grad_norm": 445422.90625,
"learning_rate": 4.6179618202853844e-05,
"loss": 0.167,
"step": 3670
},
{
"epoch": 0.26,
"grad_norm": 228799.640625,
"learning_rate": 4.616756652526032e-05,
"loss": 0.1926,
"step": 3680
},
{
"epoch": 0.26,
"grad_norm": 211847.53125,
"learning_rate": 4.61555148476668e-05,
"loss": 0.1977,
"step": 3690
},
{
"epoch": 0.26,
"grad_norm": 337014.65625,
"learning_rate": 4.6143463170073274e-05,
"loss": 0.2504,
"step": 3700
},
{
"epoch": 0.26,
"eval_accuracy": 0.8888003188521323,
"eval_f1": 0.8873066761801871,
"eval_loss": 0.2670985162258148,
"eval_precision": 0.8964616545609316,
"eval_recall": 0.8888003188521323,
"eval_runtime": 12.4722,
"eval_samples_per_second": 201.168,
"eval_steps_per_second": 3.207,
"step": 3700
},
{
"epoch": 0.27,
"grad_norm": 168669.875,
"learning_rate": 4.6131411492479755e-05,
"loss": 0.2568,
"step": 3710
},
{
"epoch": 0.27,
"grad_norm": 390192.53125,
"learning_rate": 4.6119359814886236e-05,
"loss": 0.292,
"step": 3720
},
{
"epoch": 0.27,
"grad_norm": 381233.5,
"learning_rate": 4.610730813729272e-05,
"loss": 0.2383,
"step": 3730
},
{
"epoch": 0.27,
"grad_norm": 201262.109375,
"learning_rate": 4.609525645969919e-05,
"loss": 0.2706,
"step": 3740
},
{
"epoch": 0.27,
"grad_norm": 159978.65625,
"learning_rate": 4.6083204782105673e-05,
"loss": 0.2194,
"step": 3750
},
{
"epoch": 0.27,
"grad_norm": 334120.59375,
"learning_rate": 4.607115310451215e-05,
"loss": 0.2306,
"step": 3760
},
{
"epoch": 0.27,
"grad_norm": 204698.265625,
"learning_rate": 4.605910142691863e-05,
"loss": 0.1931,
"step": 3770
},
{
"epoch": 0.27,
"grad_norm": 354762.46875,
"learning_rate": 4.604704974932511e-05,
"loss": 0.2498,
"step": 3780
},
{
"epoch": 0.27,
"grad_norm": 279956.8125,
"learning_rate": 4.603499807173159e-05,
"loss": 0.238,
"step": 3790
},
{
"epoch": 0.27,
"grad_norm": 204264.0625,
"learning_rate": 4.6022946394138066e-05,
"loss": 0.1978,
"step": 3800
},
{
"epoch": 0.27,
"eval_accuracy": 0.917098445595855,
"eval_f1": 0.9169802729066538,
"eval_loss": 0.20485247671604156,
"eval_precision": 0.9171643432118469,
"eval_recall": 0.917098445595855,
"eval_runtime": 12.4749,
"eval_samples_per_second": 201.125,
"eval_steps_per_second": 3.206,
"step": 3800
},
{
"epoch": 0.27,
"grad_norm": 224777.34375,
"learning_rate": 4.601089471654455e-05,
"loss": 0.1953,
"step": 3810
},
{
"epoch": 0.27,
"grad_norm": 233138.46875,
"learning_rate": 4.599884303895102e-05,
"loss": 0.2356,
"step": 3820
},
{
"epoch": 0.27,
"grad_norm": 130433.8515625,
"learning_rate": 4.59867913613575e-05,
"loss": 0.2269,
"step": 3830
},
{
"epoch": 0.27,
"grad_norm": 208775.78125,
"learning_rate": 4.5974739683763984e-05,
"loss": 0.2384,
"step": 3840
},
{
"epoch": 0.28,
"grad_norm": 195953.21875,
"learning_rate": 4.5962688006170466e-05,
"loss": 0.1926,
"step": 3850
},
{
"epoch": 0.28,
"grad_norm": 169765.890625,
"learning_rate": 4.595063632857694e-05,
"loss": 0.2259,
"step": 3860
},
{
"epoch": 0.28,
"grad_norm": 216638.4375,
"learning_rate": 4.593858465098342e-05,
"loss": 0.2306,
"step": 3870
},
{
"epoch": 0.28,
"grad_norm": 219986.3125,
"learning_rate": 4.5926532973389896e-05,
"loss": 0.2169,
"step": 3880
},
{
"epoch": 0.28,
"grad_norm": 525753.625,
"learning_rate": 4.591448129579638e-05,
"loss": 0.1942,
"step": 3890
},
{
"epoch": 0.28,
"grad_norm": 207980.359375,
"learning_rate": 4.590242961820286e-05,
"loss": 0.2189,
"step": 3900
},
{
"epoch": 0.28,
"eval_accuracy": 0.9099242726185731,
"eval_f1": 0.9098638649336764,
"eval_loss": 0.22675587236881256,
"eval_precision": 0.9098782103745078,
"eval_recall": 0.9099242726185731,
"eval_runtime": 12.4751,
"eval_samples_per_second": 201.121,
"eval_steps_per_second": 3.206,
"step": 3900
},
{
"epoch": 0.28,
"grad_norm": 167927.484375,
"learning_rate": 4.589037794060933e-05,
"loss": 0.2138,
"step": 3910
},
{
"epoch": 0.28,
"grad_norm": 549348.0,
"learning_rate": 4.5878326263015814e-05,
"loss": 0.2528,
"step": 3920
},
{
"epoch": 0.28,
"grad_norm": 86471.5546875,
"learning_rate": 4.586627458542229e-05,
"loss": 0.188,
"step": 3930
},
{
"epoch": 0.28,
"grad_norm": 349328.78125,
"learning_rate": 4.585422290782877e-05,
"loss": 0.215,
"step": 3940
},
{
"epoch": 0.28,
"grad_norm": 222607.4375,
"learning_rate": 4.584217123023525e-05,
"loss": 0.2179,
"step": 3950
},
{
"epoch": 0.28,
"grad_norm": 304011.5,
"learning_rate": 4.583011955264173e-05,
"loss": 0.2179,
"step": 3960
},
{
"epoch": 0.28,
"grad_norm": 150350.171875,
"learning_rate": 4.581806787504821e-05,
"loss": 0.2505,
"step": 3970
},
{
"epoch": 0.28,
"grad_norm": 212958.984375,
"learning_rate": 4.580601619745469e-05,
"loss": 0.2408,
"step": 3980
},
{
"epoch": 0.29,
"grad_norm": 118683.5703125,
"learning_rate": 4.579396451986116e-05,
"loss": 0.2103,
"step": 3990
},
{
"epoch": 0.29,
"grad_norm": 160386.796875,
"learning_rate": 4.5781912842267644e-05,
"loss": 0.2171,
"step": 4000
},
{
"epoch": 0.29,
"eval_accuracy": 0.9163013152650459,
"eval_f1": 0.9161651055562922,
"eval_loss": 0.213547021150589,
"eval_precision": 0.9164113566074957,
"eval_recall": 0.9163013152650459,
"eval_runtime": 12.4758,
"eval_samples_per_second": 201.11,
"eval_steps_per_second": 3.206,
"step": 4000
},
{
"epoch": 0.29,
"grad_norm": 284341.65625,
"learning_rate": 4.5769861164674125e-05,
"loss": 0.2502,
"step": 4010
},
{
"epoch": 0.29,
"grad_norm": 210621.484375,
"learning_rate": 4.5757809487080606e-05,
"loss": 0.2148,
"step": 4020
},
{
"epoch": 0.29,
"grad_norm": 437063.46875,
"learning_rate": 4.574575780948708e-05,
"loss": 0.2505,
"step": 4030
},
{
"epoch": 0.29,
"grad_norm": 212374.59375,
"learning_rate": 4.573370613189356e-05,
"loss": 0.213,
"step": 4040
},
{
"epoch": 0.29,
"grad_norm": 158491.328125,
"learning_rate": 4.572165445430004e-05,
"loss": 0.2041,
"step": 4050
},
{
"epoch": 0.29,
"grad_norm": 386332.40625,
"learning_rate": 4.570960277670652e-05,
"loss": 0.2476,
"step": 4060
},
{
"epoch": 0.29,
"grad_norm": 100670.15625,
"learning_rate": 4.5697551099113e-05,
"loss": 0.1967,
"step": 4070
},
{
"epoch": 0.29,
"grad_norm": 486919.75,
"learning_rate": 4.568549942151948e-05,
"loss": 0.1781,
"step": 4080
},
{
"epoch": 0.29,
"grad_norm": 261833.109375,
"learning_rate": 4.5673447743925955e-05,
"loss": 0.2773,
"step": 4090
},
{
"epoch": 0.29,
"grad_norm": 290765.3125,
"learning_rate": 4.5661396066332436e-05,
"loss": 0.2325,
"step": 4100
},
{
"epoch": 0.29,
"eval_accuracy": 0.8915902750099641,
"eval_f1": 0.8904847371544249,
"eval_loss": 0.26240846514701843,
"eval_precision": 0.8965748968593881,
"eval_recall": 0.8915902750099641,
"eval_runtime": 12.4947,
"eval_samples_per_second": 200.805,
"eval_steps_per_second": 3.201,
"step": 4100
},
{
"epoch": 0.29,
"grad_norm": 645535.5,
"learning_rate": 4.564934438873891e-05,
"loss": 0.1845,
"step": 4110
},
{
"epoch": 0.29,
"grad_norm": 709512.5,
"learning_rate": 4.56372927111454e-05,
"loss": 0.2641,
"step": 4120
},
{
"epoch": 0.3,
"grad_norm": 194677.859375,
"learning_rate": 4.562524103355187e-05,
"loss": 0.275,
"step": 4130
},
{
"epoch": 0.3,
"grad_norm": 153537.4375,
"learning_rate": 4.5613189355958354e-05,
"loss": 0.2616,
"step": 4140
},
{
"epoch": 0.3,
"grad_norm": 168272.890625,
"learning_rate": 4.560113767836483e-05,
"loss": 0.2226,
"step": 4150
},
{
"epoch": 0.3,
"grad_norm": 311794.625,
"learning_rate": 4.558908600077131e-05,
"loss": 0.2237,
"step": 4160
},
{
"epoch": 0.3,
"grad_norm": 312384.03125,
"learning_rate": 4.5577034323177785e-05,
"loss": 0.2552,
"step": 4170
},
{
"epoch": 0.3,
"grad_norm": 177459.375,
"learning_rate": 4.556498264558427e-05,
"loss": 0.1759,
"step": 4180
},
{
"epoch": 0.3,
"grad_norm": 281611.59375,
"learning_rate": 4.555293096799075e-05,
"loss": 0.2031,
"step": 4190
},
{
"epoch": 0.3,
"grad_norm": 201760.8125,
"learning_rate": 4.554087929039723e-05,
"loss": 0.1888,
"step": 4200
},
{
"epoch": 0.3,
"eval_accuracy": 0.8923874053407732,
"eval_f1": 0.8911264735256401,
"eval_loss": 0.2877594530582428,
"eval_precision": 0.8986690061249697,
"eval_recall": 0.8923874053407732,
"eval_runtime": 12.4645,
"eval_samples_per_second": 201.292,
"eval_steps_per_second": 3.209,
"step": 4200
},
{
"epoch": 0.3,
"grad_norm": 120513.65625,
"learning_rate": 4.55288276128037e-05,
"loss": 0.1995,
"step": 4210
},
{
"epoch": 0.3,
"grad_norm": 236276.1875,
"learning_rate": 4.5516775935210184e-05,
"loss": 0.1812,
"step": 4220
},
{
"epoch": 0.3,
"grad_norm": 315676.4375,
"learning_rate": 4.550472425761666e-05,
"loss": 0.2993,
"step": 4230
},
{
"epoch": 0.3,
"grad_norm": 184856.0,
"learning_rate": 4.549267258002315e-05,
"loss": 0.2441,
"step": 4240
},
{
"epoch": 0.3,
"grad_norm": 636716.1875,
"learning_rate": 4.548062090242962e-05,
"loss": 0.2447,
"step": 4250
},
{
"epoch": 0.3,
"grad_norm": 235927.671875,
"learning_rate": 4.54685692248361e-05,
"loss": 0.2051,
"step": 4260
},
{
"epoch": 0.31,
"grad_norm": 243305.515625,
"learning_rate": 4.545651754724258e-05,
"loss": 0.2596,
"step": 4270
},
{
"epoch": 0.31,
"grad_norm": 370063.59375,
"learning_rate": 4.544446586964906e-05,
"loss": 0.2223,
"step": 4280
},
{
"epoch": 0.31,
"grad_norm": 585004.625,
"learning_rate": 4.543241419205554e-05,
"loss": 0.2075,
"step": 4290
},
{
"epoch": 0.31,
"grad_norm": 262047.875,
"learning_rate": 4.5420362514462014e-05,
"loss": 0.2345,
"step": 4300
},
{
"epoch": 0.31,
"eval_accuracy": 0.8963730569948186,
"eval_f1": 0.8953353148714563,
"eval_loss": 0.24442929029464722,
"eval_precision": 0.9013455886413335,
"eval_recall": 0.8963730569948186,
"eval_runtime": 12.4824,
"eval_samples_per_second": 201.004,
"eval_steps_per_second": 3.205,
"step": 4300
},
{
"epoch": 0.31,
"grad_norm": 127394.015625,
"learning_rate": 4.5408310836868495e-05,
"loss": 0.1995,
"step": 4310
},
{
"epoch": 0.31,
"grad_norm": 405064.8125,
"learning_rate": 4.539625915927497e-05,
"loss": 0.2691,
"step": 4320
},
{
"epoch": 0.31,
"grad_norm": 632551.125,
"learning_rate": 4.538420748168145e-05,
"loss": 0.2758,
"step": 4330
},
{
"epoch": 0.31,
"grad_norm": 103708.234375,
"learning_rate": 4.537215580408793e-05,
"loss": 0.2373,
"step": 4340
},
{
"epoch": 0.31,
"grad_norm": 231929.875,
"learning_rate": 4.5360104126494414e-05,
"loss": 0.2152,
"step": 4350
},
{
"epoch": 0.31,
"grad_norm": 302502.03125,
"learning_rate": 4.534805244890089e-05,
"loss": 0.2068,
"step": 4360
},
{
"epoch": 0.31,
"grad_norm": 278650.0625,
"learning_rate": 4.533600077130737e-05,
"loss": 0.2494,
"step": 4370
},
{
"epoch": 0.31,
"grad_norm": 224905.484375,
"learning_rate": 4.5323949093713844e-05,
"loss": 0.2022,
"step": 4380
},
{
"epoch": 0.31,
"grad_norm": 279063.75,
"learning_rate": 4.5311897416120325e-05,
"loss": 0.2295,
"step": 4390
},
{
"epoch": 0.31,
"grad_norm": 163691.515625,
"learning_rate": 4.52998457385268e-05,
"loss": 0.1688,
"step": 4400
},
{
"epoch": 0.31,
"eval_accuracy": 0.908330011956955,
"eval_f1": 0.9077357266944889,
"eval_loss": 0.2479422241449356,
"eval_precision": 0.9108744090612344,
"eval_recall": 0.908330011956955,
"eval_runtime": 12.5049,
"eval_samples_per_second": 200.641,
"eval_steps_per_second": 3.199,
"step": 4400
},
{
"epoch": 0.32,
"grad_norm": 327312.96875,
"learning_rate": 4.528779406093329e-05,
"loss": 0.1556,
"step": 4410
},
{
"epoch": 0.32,
"grad_norm": 274013.09375,
"learning_rate": 4.527574238333976e-05,
"loss": 0.2903,
"step": 4420
},
{
"epoch": 0.32,
"grad_norm": 227320.96875,
"learning_rate": 4.526369070574624e-05,
"loss": 0.2432,
"step": 4430
},
{
"epoch": 0.32,
"grad_norm": 101987.640625,
"learning_rate": 4.525163902815272e-05,
"loss": 0.2176,
"step": 4440
},
{
"epoch": 0.32,
"grad_norm": 225351.109375,
"learning_rate": 4.52395873505592e-05,
"loss": 0.1864,
"step": 4450
},
{
"epoch": 0.32,
"grad_norm": 208051.765625,
"learning_rate": 4.5227535672965674e-05,
"loss": 0.2659,
"step": 4460
},
{
"epoch": 0.32,
"grad_norm": 342590.03125,
"learning_rate": 4.521548399537216e-05,
"loss": 0.2199,
"step": 4470
},
{
"epoch": 0.32,
"grad_norm": 216820.671875,
"learning_rate": 4.5203432317778636e-05,
"loss": 0.2149,
"step": 4480
},
{
"epoch": 0.32,
"grad_norm": 138170.34375,
"learning_rate": 4.519138064018512e-05,
"loss": 0.2442,
"step": 4490
},
{
"epoch": 0.32,
"grad_norm": 247797.890625,
"learning_rate": 4.517932896259159e-05,
"loss": 0.2083,
"step": 4500
},
{
"epoch": 0.32,
"eval_accuracy": 0.913511359107214,
"eval_f1": 0.9130955830993255,
"eval_loss": 0.21996097266674042,
"eval_precision": 0.9150368156806639,
"eval_recall": 0.913511359107214,
"eval_runtime": 12.4935,
"eval_samples_per_second": 200.824,
"eval_steps_per_second": 3.202,
"step": 4500
},
{
"epoch": 0.32,
"grad_norm": 120234.546875,
"learning_rate": 4.516727728499807e-05,
"loss": 0.1768,
"step": 4510
},
{
"epoch": 0.32,
"grad_norm": 195769.40625,
"learning_rate": 4.5155225607404554e-05,
"loss": 0.2422,
"step": 4520
},
{
"epoch": 0.32,
"grad_norm": 142246.734375,
"learning_rate": 4.5143173929811036e-05,
"loss": 0.1863,
"step": 4530
},
{
"epoch": 0.32,
"grad_norm": 208686.890625,
"learning_rate": 4.513112225221751e-05,
"loss": 0.1961,
"step": 4540
},
{
"epoch": 0.33,
"grad_norm": 344615.53125,
"learning_rate": 4.511907057462399e-05,
"loss": 0.2392,
"step": 4550
},
{
"epoch": 0.33,
"grad_norm": 542719.9375,
"learning_rate": 4.5107018897030466e-05,
"loss": 0.2497,
"step": 4560
},
{
"epoch": 0.33,
"grad_norm": 308383.09375,
"learning_rate": 4.509496721943695e-05,
"loss": 0.1903,
"step": 4570
},
{
"epoch": 0.33,
"grad_norm": 246131.765625,
"learning_rate": 4.508291554184343e-05,
"loss": 0.2518,
"step": 4580
},
{
"epoch": 0.33,
"grad_norm": 441606.1875,
"learning_rate": 4.507086386424991e-05,
"loss": 0.2086,
"step": 4590
},
{
"epoch": 0.33,
"grad_norm": 181527.40625,
"learning_rate": 4.5058812186656384e-05,
"loss": 0.2475,
"step": 4600
},
{
"epoch": 0.33,
"eval_accuracy": 0.9035472299721005,
"eval_f1": 0.9030499023997222,
"eval_loss": 0.23530976474285126,
"eval_precision": 0.9051551236736851,
"eval_recall": 0.9035472299721005,
"eval_runtime": 12.5089,
"eval_samples_per_second": 200.577,
"eval_steps_per_second": 3.198,
"step": 4600
},
{
"epoch": 0.33,
"grad_norm": 202878.53125,
"learning_rate": 4.5046760509062865e-05,
"loss": 0.2055,
"step": 4610
},
{
"epoch": 0.33,
"grad_norm": 233816.40625,
"learning_rate": 4.503470883146934e-05,
"loss": 0.1779,
"step": 4620
},
{
"epoch": 0.33,
"grad_norm": 177604.296875,
"learning_rate": 4.502265715387582e-05,
"loss": 0.2305,
"step": 4630
},
{
"epoch": 0.33,
"grad_norm": 364165.34375,
"learning_rate": 4.50106054762823e-05,
"loss": 0.2123,
"step": 4640
},
{
"epoch": 0.33,
"grad_norm": 155471.65625,
"learning_rate": 4.4998553798688784e-05,
"loss": 0.1939,
"step": 4650
},
{
"epoch": 0.33,
"grad_norm": 337500.03125,
"learning_rate": 4.498650212109526e-05,
"loss": 0.2682,
"step": 4660
},
{
"epoch": 0.33,
"grad_norm": 128476.1171875,
"learning_rate": 4.497445044350174e-05,
"loss": 0.1988,
"step": 4670
},
{
"epoch": 0.33,
"grad_norm": 275538.78125,
"learning_rate": 4.4962398765908214e-05,
"loss": 0.2333,
"step": 4680
},
{
"epoch": 0.34,
"grad_norm": 249258.46875,
"learning_rate": 4.4950347088314695e-05,
"loss": 0.1944,
"step": 4690
},
{
"epoch": 0.34,
"grad_norm": 266623.78125,
"learning_rate": 4.4938295410721176e-05,
"loss": 0.1928,
"step": 4700
},
{
"epoch": 0.34,
"eval_accuracy": 0.894380231167796,
"eval_f1": 0.8933321704334167,
"eval_loss": 0.2986622750759125,
"eval_precision": 0.8992220854769614,
"eval_recall": 0.894380231167796,
"eval_runtime": 12.4701,
"eval_samples_per_second": 201.202,
"eval_steps_per_second": 3.208,
"step": 4700
},
{
"epoch": 0.34,
"grad_norm": 536207.0,
"learning_rate": 4.492624373312766e-05,
"loss": 0.2723,
"step": 4710
},
{
"epoch": 0.34,
"grad_norm": 152929.5,
"learning_rate": 4.491419205553413e-05,
"loss": 0.204,
"step": 4720
},
{
"epoch": 0.34,
"grad_norm": 249052.25,
"learning_rate": 4.490214037794061e-05,
"loss": 0.2062,
"step": 4730
},
{
"epoch": 0.34,
"grad_norm": 420176.1875,
"learning_rate": 4.489008870034709e-05,
"loss": 0.243,
"step": 4740
},
{
"epoch": 0.34,
"grad_norm": 225057.75,
"learning_rate": 4.487803702275357e-05,
"loss": 0.2047,
"step": 4750
},
{
"epoch": 0.34,
"grad_norm": 322600.46875,
"learning_rate": 4.486598534516005e-05,
"loss": 0.2563,
"step": 4760
},
{
"epoch": 0.34,
"grad_norm": 215948.890625,
"learning_rate": 4.4853933667566525e-05,
"loss": 0.2187,
"step": 4770
},
{
"epoch": 0.34,
"grad_norm": 304164.59375,
"learning_rate": 4.4841881989973006e-05,
"loss": 0.1994,
"step": 4780
},
{
"epoch": 0.34,
"grad_norm": 435468.0625,
"learning_rate": 4.482983031237948e-05,
"loss": 0.2137,
"step": 4790
},
{
"epoch": 0.34,
"grad_norm": 185575.71875,
"learning_rate": 4.481777863478596e-05,
"loss": 0.2008,
"step": 4800
},
{
"epoch": 0.34,
"eval_accuracy": 0.876046233559187,
"eval_f1": 0.8735264940770283,
"eval_loss": 0.299306720495224,
"eval_precision": 0.8896518291860559,
"eval_recall": 0.876046233559187,
"eval_runtime": 12.4793,
"eval_samples_per_second": 201.052,
"eval_steps_per_second": 3.205,
"step": 4800
},
{
"epoch": 0.34,
"grad_norm": 115670.9296875,
"learning_rate": 4.480572695719244e-05,
"loss": 0.1804,
"step": 4810
},
{
"epoch": 0.34,
"grad_norm": 192260.875,
"learning_rate": 4.4793675279598924e-05,
"loss": 0.2982,
"step": 4820
},
{
"epoch": 0.35,
"grad_norm": 145098.75,
"learning_rate": 4.47816236020054e-05,
"loss": 0.2516,
"step": 4830
},
{
"epoch": 0.35,
"grad_norm": 107989.4609375,
"learning_rate": 4.476957192441188e-05,
"loss": 0.1921,
"step": 4840
},
{
"epoch": 0.35,
"grad_norm": 378746.71875,
"learning_rate": 4.4757520246818355e-05,
"loss": 0.2296,
"step": 4850
},
{
"epoch": 0.35,
"grad_norm": 236829.796875,
"learning_rate": 4.474546856922484e-05,
"loss": 0.2355,
"step": 4860
},
{
"epoch": 0.35,
"grad_norm": 226571.203125,
"learning_rate": 4.473341689163132e-05,
"loss": 0.2038,
"step": 4870
},
{
"epoch": 0.35,
"grad_norm": 206681.625,
"learning_rate": 4.47213652140378e-05,
"loss": 0.2086,
"step": 4880
},
{
"epoch": 0.35,
"grad_norm": 347856.59375,
"learning_rate": 4.470931353644427e-05,
"loss": 0.2322,
"step": 4890
},
{
"epoch": 0.35,
"grad_norm": 251549.421875,
"learning_rate": 4.4697261858850754e-05,
"loss": 0.22,
"step": 4900
},
{
"epoch": 0.35,
"eval_accuracy": 0.9035472299721005,
"eval_f1": 0.9032851922362077,
"eval_loss": 0.24311725795269012,
"eval_precision": 0.9039388604136059,
"eval_recall": 0.9035472299721005,
"eval_runtime": 12.509,
"eval_samples_per_second": 200.576,
"eval_steps_per_second": 3.198,
"step": 4900
},
{
"epoch": 0.35,
"grad_norm": 365455.96875,
"learning_rate": 4.468521018125723e-05,
"loss": 0.1983,
"step": 4910
},
{
"epoch": 0.35,
"grad_norm": 135693.59375,
"learning_rate": 4.4673158503663717e-05,
"loss": 0.1764,
"step": 4920
},
{
"epoch": 0.35,
"grad_norm": 332106.21875,
"learning_rate": 4.466110682607019e-05,
"loss": 0.2139,
"step": 4930
},
{
"epoch": 0.35,
"grad_norm": 162781.546875,
"learning_rate": 4.464905514847667e-05,
"loss": 0.2205,
"step": 4940
},
{
"epoch": 0.35,
"grad_norm": 167004.28125,
"learning_rate": 4.463700347088315e-05,
"loss": 0.2255,
"step": 4950
},
{
"epoch": 0.35,
"grad_norm": 217623.34375,
"learning_rate": 4.462495179328963e-05,
"loss": 0.2438,
"step": 4960
},
{
"epoch": 0.36,
"grad_norm": 191213.296875,
"learning_rate": 4.46129001156961e-05,
"loss": 0.2283,
"step": 4970
},
{
"epoch": 0.36,
"grad_norm": 244383.078125,
"learning_rate": 4.460084843810259e-05,
"loss": 0.2034,
"step": 4980
},
{
"epoch": 0.36,
"grad_norm": 260674.09375,
"learning_rate": 4.4588796760509065e-05,
"loss": 0.1769,
"step": 4990
},
{
"epoch": 0.36,
"grad_norm": 363888.0625,
"learning_rate": 4.4576745082915546e-05,
"loss": 0.1844,
"step": 5000
},
{
"epoch": 0.36,
"eval_accuracy": 0.917098445595855,
"eval_f1": 0.9170851985417201,
"eval_loss": 0.2590126693248749,
"eval_precision": 0.9170766527573011,
"eval_recall": 0.917098445595855,
"eval_runtime": 12.478,
"eval_samples_per_second": 201.074,
"eval_steps_per_second": 3.206,
"step": 5000
},
{
"epoch": 0.36,
"grad_norm": 219406.3125,
"learning_rate": 4.456469340532202e-05,
"loss": 0.2647,
"step": 5010
},
{
"epoch": 0.36,
"grad_norm": 289715.0,
"learning_rate": 4.45526417277285e-05,
"loss": 0.2315,
"step": 5020
},
{
"epoch": 0.36,
"grad_norm": 300951.78125,
"learning_rate": 4.4540590050134977e-05,
"loss": 0.2044,
"step": 5030
},
{
"epoch": 0.36,
"grad_norm": 228631.484375,
"learning_rate": 4.4528538372541465e-05,
"loss": 0.1395,
"step": 5040
},
{
"epoch": 0.36,
"grad_norm": 258748.0625,
"learning_rate": 4.451648669494794e-05,
"loss": 0.2106,
"step": 5050
},
{
"epoch": 0.36,
"grad_norm": 261900.28125,
"learning_rate": 4.450443501735442e-05,
"loss": 0.1891,
"step": 5060
},
{
"epoch": 0.36,
"grad_norm": 567360.5,
"learning_rate": 4.4492383339760895e-05,
"loss": 0.1796,
"step": 5070
},
{
"epoch": 0.36,
"grad_norm": 247367.140625,
"learning_rate": 4.4480331662167376e-05,
"loss": 0.2354,
"step": 5080
},
{
"epoch": 0.36,
"grad_norm": 180764.234375,
"learning_rate": 4.446827998457386e-05,
"loss": 0.2308,
"step": 5090
},
{
"epoch": 0.36,
"grad_norm": 143473.34375,
"learning_rate": 4.445622830698034e-05,
"loss": 0.2235,
"step": 5100
},
{
"epoch": 0.36,
"eval_accuracy": 0.904742925468314,
"eval_f1": 0.9041326842192928,
"eval_loss": 0.2420862317085266,
"eval_precision": 0.9071727983486801,
"eval_recall": 0.904742925468314,
"eval_runtime": 12.4566,
"eval_samples_per_second": 201.419,
"eval_steps_per_second": 3.211,
"step": 5100
},
{
"epoch": 0.37,
"grad_norm": 142405.03125,
"learning_rate": 4.444417662938681e-05,
"loss": 0.2276,
"step": 5110
},
{
"epoch": 0.37,
"grad_norm": 245076.640625,
"learning_rate": 4.4432124951793294e-05,
"loss": 0.2051,
"step": 5120
},
{
"epoch": 0.37,
"grad_norm": 313735.53125,
"learning_rate": 4.442007327419977e-05,
"loss": 0.2259,
"step": 5130
},
{
"epoch": 0.37,
"grad_norm": 423567.96875,
"learning_rate": 4.440802159660625e-05,
"loss": 0.2329,
"step": 5140
},
{
"epoch": 0.37,
"grad_norm": 220593.828125,
"learning_rate": 4.439596991901273e-05,
"loss": 0.1839,
"step": 5150
},
{
"epoch": 0.37,
"grad_norm": 300864.0,
"learning_rate": 4.4383918241419206e-05,
"loss": 0.2598,
"step": 5160
},
{
"epoch": 0.37,
"grad_norm": 338582.8125,
"learning_rate": 4.437186656382569e-05,
"loss": 0.2564,
"step": 5170
},
{
"epoch": 0.37,
"grad_norm": 225039.421875,
"learning_rate": 4.435981488623216e-05,
"loss": 0.2375,
"step": 5180
},
{
"epoch": 0.37,
"grad_norm": 306696.875,
"learning_rate": 4.434776320863864e-05,
"loss": 0.1867,
"step": 5190
},
{
"epoch": 0.37,
"grad_norm": 175457.875,
"learning_rate": 4.4335711531045124e-05,
"loss": 0.2222,
"step": 5200
},
{
"epoch": 0.37,
"eval_accuracy": 0.8947787963332005,
"eval_f1": 0.8940640494290631,
"eval_loss": 0.2958182394504547,
"eval_precision": 0.8972950995667646,
"eval_recall": 0.8947787963332005,
"eval_runtime": 12.4883,
"eval_samples_per_second": 200.907,
"eval_steps_per_second": 3.203,
"step": 5200
},
{
"epoch": 0.37,
"grad_norm": 274502.125,
"learning_rate": 4.4323659853451605e-05,
"loss": 0.2213,
"step": 5210
},
{
"epoch": 0.37,
"grad_norm": 143913.453125,
"learning_rate": 4.431160817585808e-05,
"loss": 0.2485,
"step": 5220
},
{
"epoch": 0.37,
"grad_norm": 134518.640625,
"learning_rate": 4.429955649826456e-05,
"loss": 0.215,
"step": 5230
},
{
"epoch": 0.37,
"grad_norm": 203820.625,
"learning_rate": 4.4287504820671036e-05,
"loss": 0.2146,
"step": 5240
},
{
"epoch": 0.38,
"grad_norm": 322863.5625,
"learning_rate": 4.427545314307752e-05,
"loss": 0.2328,
"step": 5250
},
{
"epoch": 0.38,
"grad_norm": 356002.96875,
"learning_rate": 4.4263401465484e-05,
"loss": 0.2083,
"step": 5260
},
{
"epoch": 0.38,
"grad_norm": 703977.0625,
"learning_rate": 4.425134978789048e-05,
"loss": 0.1654,
"step": 5270
},
{
"epoch": 0.38,
"grad_norm": 221094.734375,
"learning_rate": 4.4239298110296954e-05,
"loss": 0.2014,
"step": 5280
},
{
"epoch": 0.38,
"grad_norm": 273416.5625,
"learning_rate": 4.4227246432703435e-05,
"loss": 0.2901,
"step": 5290
},
{
"epoch": 0.38,
"grad_norm": 188345.46875,
"learning_rate": 4.421519475510991e-05,
"loss": 0.2241,
"step": 5300
},
{
"epoch": 0.38,
"eval_accuracy": 0.9210840972499004,
"eval_f1": 0.9208697027387154,
"eval_loss": 0.2031262218952179,
"eval_precision": 0.9215894170459646,
"eval_recall": 0.9210840972499004,
"eval_runtime": 16.2579,
"eval_samples_per_second": 154.325,
"eval_steps_per_second": 2.46,
"step": 5300
}
],
"logging_steps": 10,
"max_steps": 41988,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 100,
"total_flos": 4.46240356466688e+16,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}