{ "best_metric": 0.8990687138182734, "best_model_checkpoint": "vit-cxr4-384/checkpoint-2400", "epoch": 2.0, "eval_steps": 100, "global_step": 2552, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 4.982366771159875e-05, "loss": 0.7452, "step": 10 }, { "epoch": 0.02, "learning_rate": 4.9627742946708465e-05, "loss": 0.5821, "step": 20 }, { "epoch": 0.02, "learning_rate": 4.943181818181818e-05, "loss": 0.5277, "step": 30 }, { "epoch": 0.03, "learning_rate": 4.92358934169279e-05, "loss": 0.5076, "step": 40 }, { "epoch": 0.04, "learning_rate": 4.903996865203762e-05, "loss": 0.4483, "step": 50 }, { "epoch": 0.05, "learning_rate": 4.8844043887147336e-05, "loss": 0.5492, "step": 60 }, { "epoch": 0.05, "learning_rate": 4.864811912225706e-05, "loss": 0.3997, "step": 70 }, { "epoch": 0.06, "learning_rate": 4.845219435736678e-05, "loss": 0.4182, "step": 80 }, { "epoch": 0.07, "learning_rate": 4.827586206896552e-05, "loss": 0.4935, "step": 90 }, { "epoch": 0.08, "learning_rate": 4.807993730407524e-05, "loss": 0.3694, "step": 100 }, { "epoch": 0.08, "eval_accuracy": 0.8233603344656388, "eval_f1": 0.8251422659079152, "eval_loss": 0.3850725293159485, "eval_precision": 0.8075949367088607, "eval_recall": 0.8434690639873083, "eval_runtime": 172.1875, "eval_samples_per_second": 22.226, "eval_steps_per_second": 0.929, "step": 100 }, { "epoch": 0.09, "learning_rate": 4.788401253918495e-05, "loss": 0.3867, "step": 110 }, { "epoch": 0.09, "learning_rate": 4.768808777429467e-05, "loss": 0.4248, "step": 120 }, { "epoch": 0.1, "learning_rate": 4.749216300940439e-05, "loss": 0.331, "step": 130 }, { "epoch": 0.11, "learning_rate": 4.7296238244514106e-05, "loss": 0.3978, "step": 140 }, { "epoch": 0.12, "learning_rate": 4.7100313479623823e-05, "loss": 0.3176, "step": 150 }, { "epoch": 0.13, "learning_rate": 4.690438871473354e-05, "loss": 0.5361, "step": 160 }, { "epoch": 0.13, "learning_rate": 4.670846394984326e-05, "loss": 0.3897, "step": 170 }, { "epoch": 0.14, "learning_rate": 4.651253918495298e-05, "loss": 0.3066, "step": 180 }, { "epoch": 0.15, "learning_rate": 4.63166144200627e-05, "loss": 0.3042, "step": 190 }, { "epoch": 0.16, "learning_rate": 4.612068965517242e-05, "loss": 0.3084, "step": 200 }, { "epoch": 0.16, "eval_accuracy": 0.8364253984844525, "eval_f1": 0.8566193311956023, "eval_loss": 0.4478375017642975, "eval_precision": 0.7555555555555555, "eval_recall": 0.9888947646747752, "eval_runtime": 168.684, "eval_samples_per_second": 22.687, "eval_steps_per_second": 0.949, "step": 200 }, { "epoch": 0.16, "learning_rate": 4.592476489028214e-05, "loss": 0.4338, "step": 210 }, { "epoch": 0.17, "learning_rate": 4.5728840125391855e-05, "loss": 0.3653, "step": 220 }, { "epoch": 0.18, "learning_rate": 4.553291536050157e-05, "loss": 0.3125, "step": 230 }, { "epoch": 0.19, "learning_rate": 4.533699059561129e-05, "loss": 0.4054, "step": 240 }, { "epoch": 0.2, "learning_rate": 4.514106583072101e-05, "loss": 0.3616, "step": 250 }, { "epoch": 0.2, "learning_rate": 4.4945141065830726e-05, "loss": 0.3286, "step": 260 }, { "epoch": 0.21, "learning_rate": 4.474921630094044e-05, "loss": 0.3067, "step": 270 }, { "epoch": 0.22, "learning_rate": 4.4553291536050155e-05, "loss": 0.3698, "step": 280 }, { "epoch": 0.23, "learning_rate": 4.435736677115987e-05, "loss": 0.3418, "step": 290 }, { "epoch": 0.24, "learning_rate": 4.416144200626959e-05, "loss": 0.3177, "step": 300 }, { "epoch": 0.24, "eval_accuracy": 0.8651685393258427, "eval_f1": 0.8752417794970986, "eval_loss": 0.30726760625839233, "eval_precision": 0.8062360801781737, "eval_recall": 0.9571655208884188, "eval_runtime": 167.4202, "eval_samples_per_second": 22.859, "eval_steps_per_second": 0.956, "step": 300 }, { "epoch": 0.24, "learning_rate": 4.396551724137931e-05, "loss": 0.2805, "step": 310 }, { "epoch": 0.25, "learning_rate": 4.3769592476489026e-05, "loss": 0.3783, "step": 320 }, { "epoch": 0.26, "learning_rate": 4.357366771159875e-05, "loss": 0.2961, "step": 330 }, { "epoch": 0.27, "learning_rate": 4.337774294670847e-05, "loss": 0.2732, "step": 340 }, { "epoch": 0.27, "learning_rate": 4.318181818181819e-05, "loss": 0.2852, "step": 350 }, { "epoch": 0.28, "learning_rate": 4.2985893416927904e-05, "loss": 0.307, "step": 360 }, { "epoch": 0.29, "learning_rate": 4.278996865203762e-05, "loss": 0.3855, "step": 370 }, { "epoch": 0.3, "learning_rate": 4.259404388714734e-05, "loss": 0.3382, "step": 380 }, { "epoch": 0.31, "learning_rate": 4.239811912225706e-05, "loss": 0.2779, "step": 390 }, { "epoch": 0.31, "learning_rate": 4.2202194357366776e-05, "loss": 0.3413, "step": 400 }, { "epoch": 0.31, "eval_accuracy": 0.8677815521296054, "eval_f1": 0.8757977417771232, "eval_loss": 0.2936372458934784, "eval_precision": 0.8172240036646816, "eval_recall": 0.9434161819143311, "eval_runtime": 166.7592, "eval_samples_per_second": 22.949, "eval_steps_per_second": 0.959, "step": 400 }, { "epoch": 0.32, "learning_rate": 4.2006269592476494e-05, "loss": 0.266, "step": 410 }, { "epoch": 0.33, "learning_rate": 4.1810344827586205e-05, "loss": 0.3177, "step": 420 }, { "epoch": 0.34, "learning_rate": 4.161442006269592e-05, "loss": 0.2613, "step": 430 }, { "epoch": 0.34, "learning_rate": 4.141849529780564e-05, "loss": 0.3307, "step": 440 }, { "epoch": 0.35, "learning_rate": 4.122257053291536e-05, "loss": 0.2463, "step": 450 }, { "epoch": 0.36, "learning_rate": 4.1026645768025076e-05, "loss": 0.2588, "step": 460 }, { "epoch": 0.37, "learning_rate": 4.08307210031348e-05, "loss": 0.4268, "step": 470 }, { "epoch": 0.38, "learning_rate": 4.063479623824452e-05, "loss": 0.3498, "step": 480 }, { "epoch": 0.38, "learning_rate": 4.0438871473354236e-05, "loss": 0.3102, "step": 490 }, { "epoch": 0.39, "learning_rate": 4.0242946708463954e-05, "loss": 0.2612, "step": 500 }, { "epoch": 0.39, "eval_accuracy": 0.8709171674941207, "eval_f1": 0.8747464503042597, "eval_loss": 0.29362666606903076, "eval_precision": 0.8402338041889917, "eval_recall": 0.9122157588577472, "eval_runtime": 166.9694, "eval_samples_per_second": 22.92, "eval_steps_per_second": 0.958, "step": 500 }, { "epoch": 0.4, "learning_rate": 4.004702194357367e-05, "loss": 0.3364, "step": 510 }, { "epoch": 0.41, "learning_rate": 3.985109717868339e-05, "loss": 0.2959, "step": 520 }, { "epoch": 0.42, "learning_rate": 3.965517241379311e-05, "loss": 0.289, "step": 530 }, { "epoch": 0.42, "learning_rate": 3.9459247648902825e-05, "loss": 0.3726, "step": 540 }, { "epoch": 0.43, "learning_rate": 3.926332288401254e-05, "loss": 0.2789, "step": 550 }, { "epoch": 0.44, "learning_rate": 3.906739811912226e-05, "loss": 0.3125, "step": 560 }, { "epoch": 0.45, "learning_rate": 3.887147335423197e-05, "loss": 0.3008, "step": 570 }, { "epoch": 0.45, "learning_rate": 3.867554858934169e-05, "loss": 0.3505, "step": 580 }, { "epoch": 0.46, "learning_rate": 3.847962382445141e-05, "loss": 0.3043, "step": 590 }, { "epoch": 0.47, "learning_rate": 3.828369905956113e-05, "loss": 0.3607, "step": 600 }, { "epoch": 0.47, "eval_accuracy": 0.876927096942775, "eval_f1": 0.8852059468681452, "eval_loss": 0.2716532349586487, "eval_precision": 0.8209764918625678, "eval_recall": 0.9603384452670545, "eval_runtime": 165.7826, "eval_samples_per_second": 23.084, "eval_steps_per_second": 0.965, "step": 600 }, { "epoch": 0.48, "learning_rate": 3.808777429467085e-05, "loss": 0.3092, "step": 610 }, { "epoch": 0.49, "learning_rate": 3.789184952978057e-05, "loss": 0.2916, "step": 620 }, { "epoch": 0.49, "learning_rate": 3.7695924764890286e-05, "loss": 0.2446, "step": 630 }, { "epoch": 0.5, "learning_rate": 3.7500000000000003e-05, "loss": 0.3083, "step": 640 }, { "epoch": 0.51, "learning_rate": 3.730407523510972e-05, "loss": 0.2633, "step": 650 }, { "epoch": 0.52, "learning_rate": 3.710815047021944e-05, "loss": 0.2867, "step": 660 }, { "epoch": 0.53, "learning_rate": 3.691222570532916e-05, "loss": 0.2652, "step": 670 }, { "epoch": 0.53, "learning_rate": 3.6716300940438875e-05, "loss": 0.2317, "step": 680 }, { "epoch": 0.54, "learning_rate": 3.652037617554859e-05, "loss": 0.2298, "step": 690 }, { "epoch": 0.55, "learning_rate": 3.632445141065831e-05, "loss": 0.274, "step": 700 }, { "epoch": 0.55, "eval_accuracy": 0.8719623726156258, "eval_f1": 0.8765120967741936, "eval_loss": 0.2875473201274872, "eval_precision": 0.8372652864708714, "eval_recall": 0.9196192490745637, "eval_runtime": 165.0637, "eval_samples_per_second": 23.185, "eval_steps_per_second": 0.969, "step": 700 }, { "epoch": 0.56, "learning_rate": 3.612852664576803e-05, "loss": 0.2862, "step": 710 }, { "epoch": 0.56, "learning_rate": 3.593260188087774e-05, "loss": 0.2746, "step": 720 }, { "epoch": 0.57, "learning_rate": 3.573667711598746e-05, "loss": 0.2948, "step": 730 }, { "epoch": 0.58, "learning_rate": 3.554075235109718e-05, "loss": 0.2878, "step": 740 }, { "epoch": 0.59, "learning_rate": 3.53448275862069e-05, "loss": 0.2381, "step": 750 }, { "epoch": 0.6, "learning_rate": 3.514890282131662e-05, "loss": 0.2854, "step": 760 }, { "epoch": 0.6, "learning_rate": 3.4952978056426335e-05, "loss": 0.2571, "step": 770 }, { "epoch": 0.61, "learning_rate": 3.475705329153605e-05, "loss": 0.2587, "step": 780 }, { "epoch": 0.62, "learning_rate": 3.456112852664577e-05, "loss": 0.2867, "step": 790 }, { "epoch": 0.63, "learning_rate": 3.436520376175549e-05, "loss": 0.3127, "step": 800 }, { "epoch": 0.63, "eval_accuracy": 0.8761431931016462, "eval_f1": 0.8853965183752417, "eval_loss": 0.26641419529914856, "eval_precision": 0.8155902004454343, "eval_recall": 0.9682707562136436, "eval_runtime": 175.8421, "eval_samples_per_second": 21.764, "eval_steps_per_second": 0.91, "step": 800 }, { "epoch": 0.63, "learning_rate": 3.4169278996865206e-05, "loss": 0.3393, "step": 810 }, { "epoch": 0.64, "learning_rate": 3.3973354231974924e-05, "loss": 0.2543, "step": 820 }, { "epoch": 0.65, "learning_rate": 3.377742946708464e-05, "loss": 0.2367, "step": 830 }, { "epoch": 0.66, "learning_rate": 3.358150470219436e-05, "loss": 0.3035, "step": 840 }, { "epoch": 0.67, "learning_rate": 3.338557993730408e-05, "loss": 0.3645, "step": 850 }, { "epoch": 0.67, "learning_rate": 3.3189655172413796e-05, "loss": 0.2589, "step": 860 }, { "epoch": 0.68, "learning_rate": 3.299373040752351e-05, "loss": 0.244, "step": 870 }, { "epoch": 0.69, "learning_rate": 3.279780564263323e-05, "loss": 0.2688, "step": 880 }, { "epoch": 0.7, "learning_rate": 3.260188087774295e-05, "loss": 0.2616, "step": 890 }, { "epoch": 0.71, "learning_rate": 3.240595611285267e-05, "loss": 0.2875, "step": 900 }, { "epoch": 0.71, "eval_accuracy": 0.8771883982231513, "eval_f1": 0.8825, "eval_loss": 0.2643195688724518, "eval_precision": 0.8368895211000474, "eval_recall": 0.9333685880486515, "eval_runtime": 172.9293, "eval_samples_per_second": 22.13, "eval_steps_per_second": 0.925, "step": 900 }, { "epoch": 0.71, "learning_rate": 3.2210031347962385e-05, "loss": 0.2741, "step": 910 }, { "epoch": 0.72, "learning_rate": 3.20141065830721e-05, "loss": 0.2502, "step": 920 }, { "epoch": 0.73, "learning_rate": 3.181818181818182e-05, "loss": 0.2983, "step": 930 }, { "epoch": 0.74, "learning_rate": 3.162225705329154e-05, "loss": 0.26, "step": 940 }, { "epoch": 0.74, "learning_rate": 3.1426332288401256e-05, "loss": 0.2931, "step": 950 }, { "epoch": 0.75, "learning_rate": 3.1230407523510974e-05, "loss": 0.2665, "step": 960 }, { "epoch": 0.76, "learning_rate": 3.103448275862069e-05, "loss": 0.2586, "step": 970 }, { "epoch": 0.77, "learning_rate": 3.083855799373041e-05, "loss": 0.2245, "step": 980 }, { "epoch": 0.78, "learning_rate": 3.064263322884013e-05, "loss": 0.2973, "step": 990 }, { "epoch": 0.78, "learning_rate": 3.044670846394985e-05, "loss": 0.2652, "step": 1000 }, { "epoch": 0.78, "eval_accuracy": 0.8745753854193885, "eval_f1": 0.8841139546112989, "eval_loss": 0.2659239172935486, "eval_precision": 0.8134162594402488, "eval_recall": 0.9682707562136436, "eval_runtime": 172.1185, "eval_samples_per_second": 22.235, "eval_steps_per_second": 0.93, "step": 1000 }, { "epoch": 0.79, "learning_rate": 3.0250783699059566e-05, "loss": 0.2408, "step": 1010 }, { "epoch": 0.8, "learning_rate": 3.0054858934169277e-05, "loss": 0.284, "step": 1020 }, { "epoch": 0.81, "learning_rate": 2.9858934169278995e-05, "loss": 0.2906, "step": 1030 }, { "epoch": 0.82, "learning_rate": 2.9663009404388713e-05, "loss": 0.2415, "step": 1040 }, { "epoch": 0.82, "learning_rate": 2.946708463949843e-05, "loss": 0.2642, "step": 1050 }, { "epoch": 0.83, "learning_rate": 2.9271159874608152e-05, "loss": 0.2606, "step": 1060 }, { "epoch": 0.84, "learning_rate": 2.907523510971787e-05, "loss": 0.3017, "step": 1070 }, { "epoch": 0.85, "learning_rate": 2.8879310344827588e-05, "loss": 0.2525, "step": 1080 }, { "epoch": 0.85, "learning_rate": 2.8683385579937305e-05, "loss": 0.2373, "step": 1090 }, { "epoch": 0.86, "learning_rate": 2.8487460815047023e-05, "loss": 0.2661, "step": 1100 }, { "epoch": 0.86, "eval_accuracy": 0.8792788084661615, "eval_f1": 0.885473475458602, "eval_loss": 0.2590779662132263, "eval_precision": 0.8334111059262715, "eval_recall": 0.9444738233738763, "eval_runtime": 172.4587, "eval_samples_per_second": 22.191, "eval_steps_per_second": 0.928, "step": 1100 }, { "epoch": 0.87, "learning_rate": 2.829153605015674e-05, "loss": 0.2587, "step": 1110 }, { "epoch": 0.88, "learning_rate": 2.809561128526646e-05, "loss": 0.2987, "step": 1120 }, { "epoch": 0.89, "learning_rate": 2.7899686520376177e-05, "loss": 0.2422, "step": 1130 }, { "epoch": 0.89, "learning_rate": 2.7703761755485898e-05, "loss": 0.2469, "step": 1140 }, { "epoch": 0.9, "learning_rate": 2.7507836990595616e-05, "loss": 0.2884, "step": 1150 }, { "epoch": 0.91, "learning_rate": 2.7311912225705334e-05, "loss": 0.264, "step": 1160 }, { "epoch": 0.92, "learning_rate": 2.711598746081505e-05, "loss": 0.3332, "step": 1170 }, { "epoch": 0.92, "learning_rate": 2.6920062695924762e-05, "loss": 0.241, "step": 1180 }, { "epoch": 0.93, "learning_rate": 2.672413793103448e-05, "loss": 0.2405, "step": 1190 }, { "epoch": 0.94, "learning_rate": 2.65282131661442e-05, "loss": 0.3019, "step": 1200 }, { "epoch": 0.94, "eval_accuracy": 0.8756205905408937, "eval_f1": 0.8723175965665235, "eval_loss": 0.27287837862968445, "eval_precision": 0.8851388132825259, "eval_recall": 0.8598625066102591, "eval_runtime": 172.6345, "eval_samples_per_second": 22.168, "eval_steps_per_second": 0.927, "step": 1200 }, { "epoch": 0.95, "learning_rate": 2.633228840125392e-05, "loss": 0.241, "step": 1210 }, { "epoch": 0.96, "learning_rate": 2.6136363636363637e-05, "loss": 0.1991, "step": 1220 }, { "epoch": 0.96, "learning_rate": 2.5940438871473355e-05, "loss": 0.2499, "step": 1230 }, { "epoch": 0.97, "learning_rate": 2.5744514106583073e-05, "loss": 0.335, "step": 1240 }, { "epoch": 0.98, "learning_rate": 2.554858934169279e-05, "loss": 0.2865, "step": 1250 }, { "epoch": 0.99, "learning_rate": 2.535266457680251e-05, "loss": 0.2706, "step": 1260 }, { "epoch": 1.0, "learning_rate": 2.5156739811912226e-05, "loss": 0.3153, "step": 1270 }, { "epoch": 1.0, "learning_rate": 2.4960815047021947e-05, "loss": 0.2667, "step": 1280 }, { "epoch": 1.01, "learning_rate": 2.4764890282131662e-05, "loss": 0.2463, "step": 1290 }, { "epoch": 1.02, "learning_rate": 2.456896551724138e-05, "loss": 0.229, "step": 1300 }, { "epoch": 1.02, "eval_accuracy": 0.8871178468774497, "eval_f1": 0.8937007874015748, "eval_loss": 0.25479769706726074, "eval_precision": 0.8357109986194201, "eval_recall": 0.9603384452670545, "eval_runtime": 173.7953, "eval_samples_per_second": 22.02, "eval_steps_per_second": 0.921, "step": 1300 }, { "epoch": 1.03, "learning_rate": 2.4373040752351098e-05, "loss": 0.2328, "step": 1310 }, { "epoch": 1.03, "learning_rate": 2.4177115987460815e-05, "loss": 0.1678, "step": 1320 }, { "epoch": 1.04, "learning_rate": 2.3981191222570533e-05, "loss": 0.2616, "step": 1330 }, { "epoch": 1.05, "learning_rate": 2.378526645768025e-05, "loss": 0.2398, "step": 1340 }, { "epoch": 1.06, "learning_rate": 2.3589341692789972e-05, "loss": 0.275, "step": 1350 }, { "epoch": 1.07, "learning_rate": 2.339341692789969e-05, "loss": 0.1851, "step": 1360 }, { "epoch": 1.07, "learning_rate": 2.3197492163009404e-05, "loss": 0.2828, "step": 1370 }, { "epoch": 1.08, "learning_rate": 2.3001567398119122e-05, "loss": 0.2727, "step": 1380 }, { "epoch": 1.09, "learning_rate": 2.280564263322884e-05, "loss": 0.259, "step": 1390 }, { "epoch": 1.1, "learning_rate": 2.2609717868338558e-05, "loss": 0.1841, "step": 1400 }, { "epoch": 1.1, "eval_accuracy": 0.8863339430363208, "eval_f1": 0.8890589135424637, "eval_loss": 0.24377945065498352, "eval_precision": 0.8586206896551725, "eval_recall": 0.9217345319936542, "eval_runtime": 176.1028, "eval_samples_per_second": 21.732, "eval_steps_per_second": 0.909, "step": 1400 }, { "epoch": 1.11, "learning_rate": 2.2413793103448276e-05, "loss": 0.2321, "step": 1410 }, { "epoch": 1.11, "learning_rate": 2.2217868338557997e-05, "loss": 0.2112, "step": 1420 }, { "epoch": 1.12, "learning_rate": 2.2021943573667715e-05, "loss": 0.1736, "step": 1430 }, { "epoch": 1.13, "learning_rate": 2.182601880877743e-05, "loss": 0.2211, "step": 1440 }, { "epoch": 1.14, "learning_rate": 2.1630094043887147e-05, "loss": 0.1855, "step": 1450 }, { "epoch": 1.14, "learning_rate": 2.1434169278996865e-05, "loss": 0.2346, "step": 1460 }, { "epoch": 1.15, "learning_rate": 2.1238244514106583e-05, "loss": 0.1662, "step": 1470 }, { "epoch": 1.16, "learning_rate": 2.10423197492163e-05, "loss": 0.2776, "step": 1480 }, { "epoch": 1.17, "learning_rate": 2.0846394984326022e-05, "loss": 0.2281, "step": 1490 }, { "epoch": 1.18, "learning_rate": 2.065047021943574e-05, "loss": 0.2257, "step": 1500 }, { "epoch": 1.18, "eval_accuracy": 0.8905147635223413, "eval_f1": 0.8930849706557795, "eval_loss": 0.2364816665649414, "eval_precision": 0.8629191321499013, "eval_recall": 0.9254362771020624, "eval_runtime": 173.2602, "eval_samples_per_second": 22.088, "eval_steps_per_second": 0.923, "step": 1500 }, { "epoch": 1.18, "learning_rate": 2.0454545454545457e-05, "loss": 0.2546, "step": 1510 }, { "epoch": 1.19, "learning_rate": 2.0258620689655172e-05, "loss": 0.139, "step": 1520 }, { "epoch": 1.2, "learning_rate": 2.006269592476489e-05, "loss": 0.2382, "step": 1530 }, { "epoch": 1.21, "learning_rate": 1.9866771159874607e-05, "loss": 0.2475, "step": 1540 }, { "epoch": 1.21, "learning_rate": 1.9670846394984325e-05, "loss": 0.262, "step": 1550 }, { "epoch": 1.22, "learning_rate": 1.9474921630094046e-05, "loss": 0.211, "step": 1560 }, { "epoch": 1.23, "learning_rate": 1.9278996865203764e-05, "loss": 0.2436, "step": 1570 }, { "epoch": 1.24, "learning_rate": 1.9083072100313482e-05, "loss": 0.1954, "step": 1580 }, { "epoch": 1.25, "learning_rate": 1.8887147335423197e-05, "loss": 0.174, "step": 1590 }, { "epoch": 1.25, "learning_rate": 1.8691222570532914e-05, "loss": 0.2217, "step": 1600 }, { "epoch": 1.25, "eval_accuracy": 0.8803240135876665, "eval_f1": 0.8773433315479378, "eval_loss": 0.25091663002967834, "eval_precision": 0.8887683125339121, "eval_recall": 0.8662083553675304, "eval_runtime": 168.9717, "eval_samples_per_second": 22.649, "eval_steps_per_second": 0.947, "step": 1600 }, { "epoch": 1.26, "learning_rate": 1.8495297805642632e-05, "loss": 0.2478, "step": 1610 }, { "epoch": 1.27, "learning_rate": 1.829937304075235e-05, "loss": 0.2255, "step": 1620 }, { "epoch": 1.28, "learning_rate": 1.810344827586207e-05, "loss": 0.1823, "step": 1630 }, { "epoch": 1.29, "learning_rate": 1.790752351097179e-05, "loss": 0.1716, "step": 1640 }, { "epoch": 1.29, "learning_rate": 1.7711598746081507e-05, "loss": 0.2434, "step": 1650 }, { "epoch": 1.3, "learning_rate": 1.7515673981191225e-05, "loss": 0.1945, "step": 1660 }, { "epoch": 1.31, "learning_rate": 1.731974921630094e-05, "loss": 0.1695, "step": 1670 }, { "epoch": 1.32, "learning_rate": 1.7123824451410657e-05, "loss": 0.181, "step": 1680 }, { "epoch": 1.32, "learning_rate": 1.6927899686520378e-05, "loss": 0.2047, "step": 1690 }, { "epoch": 1.33, "learning_rate": 1.6731974921630096e-05, "loss": 0.2619, "step": 1700 }, { "epoch": 1.33, "eval_accuracy": 0.8873791481578259, "eval_f1": 0.893711467324291, "eval_loss": 0.25879478454589844, "eval_precision": 0.8373382624768947, "eval_recall": 0.958223162347964, "eval_runtime": 167.0043, "eval_samples_per_second": 22.916, "eval_steps_per_second": 0.958, "step": 1700 }, { "epoch": 1.34, "learning_rate": 1.6536050156739814e-05, "loss": 0.2562, "step": 1710 }, { "epoch": 1.35, "learning_rate": 1.634012539184953e-05, "loss": 0.2448, "step": 1720 }, { "epoch": 1.36, "learning_rate": 1.614420062695925e-05, "loss": 0.2452, "step": 1730 }, { "epoch": 1.36, "learning_rate": 1.5948275862068967e-05, "loss": 0.225, "step": 1740 }, { "epoch": 1.37, "learning_rate": 1.5752351097178682e-05, "loss": 0.1817, "step": 1750 }, { "epoch": 1.38, "learning_rate": 1.5556426332288403e-05, "loss": 0.1978, "step": 1760 }, { "epoch": 1.39, "learning_rate": 1.536050156739812e-05, "loss": 0.1928, "step": 1770 }, { "epoch": 1.39, "learning_rate": 1.5164576802507839e-05, "loss": 0.2085, "step": 1780 }, { "epoch": 1.4, "learning_rate": 1.4968652037617556e-05, "loss": 0.1174, "step": 1790 }, { "epoch": 1.41, "learning_rate": 1.4772727272727274e-05, "loss": 0.2222, "step": 1800 }, { "epoch": 1.41, "eval_accuracy": 0.8907760648027175, "eval_f1": 0.8931492842535788, "eval_loss": 0.2520654797554016, "eval_precision": 0.8644235526966848, "eval_recall": 0.9238498149127445, "eval_runtime": 168.4799, "eval_samples_per_second": 22.715, "eval_steps_per_second": 0.95, "step": 1800 }, { "epoch": 1.42, "learning_rate": 1.4576802507836992e-05, "loss": 0.2499, "step": 1810 }, { "epoch": 1.43, "learning_rate": 1.4380877742946708e-05, "loss": 0.231, "step": 1820 }, { "epoch": 1.43, "learning_rate": 1.4184952978056426e-05, "loss": 0.2446, "step": 1830 }, { "epoch": 1.44, "learning_rate": 1.3989028213166144e-05, "loss": 0.2212, "step": 1840 }, { "epoch": 1.45, "learning_rate": 1.3793103448275863e-05, "loss": 0.1873, "step": 1850 }, { "epoch": 1.46, "learning_rate": 1.3597178683385581e-05, "loss": 0.1655, "step": 1860 }, { "epoch": 1.47, "learning_rate": 1.3401253918495299e-05, "loss": 0.1798, "step": 1870 }, { "epoch": 1.47, "learning_rate": 1.3205329153605017e-05, "loss": 0.212, "step": 1880 }, { "epoch": 1.48, "learning_rate": 1.3009404388714735e-05, "loss": 0.2339, "step": 1890 }, { "epoch": 1.49, "learning_rate": 1.281347962382445e-05, "loss": 0.2044, "step": 1900 }, { "epoch": 1.49, "eval_accuracy": 0.8899921609615887, "eval_f1": 0.895972325179145, "eval_loss": 0.2598423957824707, "eval_precision": 0.8409090909090909, "eval_recall": 0.9587519830777367, "eval_runtime": 168.1275, "eval_samples_per_second": 22.762, "eval_steps_per_second": 0.952, "step": 1900 }, { "epoch": 1.5, "learning_rate": 1.2617554858934169e-05, "loss": 0.1993, "step": 1910 }, { "epoch": 1.5, "learning_rate": 1.2421630094043888e-05, "loss": 0.2176, "step": 1920 }, { "epoch": 1.51, "learning_rate": 1.2225705329153606e-05, "loss": 0.1554, "step": 1930 }, { "epoch": 1.52, "learning_rate": 1.2029780564263324e-05, "loss": 0.2332, "step": 1940 }, { "epoch": 1.53, "learning_rate": 1.1833855799373042e-05, "loss": 0.1662, "step": 1950 }, { "epoch": 1.54, "learning_rate": 1.163793103448276e-05, "loss": 0.1932, "step": 1960 }, { "epoch": 1.54, "learning_rate": 1.1442006269592477e-05, "loss": 0.1996, "step": 1970 }, { "epoch": 1.55, "learning_rate": 1.1246081504702195e-05, "loss": 0.1516, "step": 1980 }, { "epoch": 1.56, "learning_rate": 1.1050156739811913e-05, "loss": 0.2517, "step": 1990 }, { "epoch": 1.57, "learning_rate": 1.085423197492163e-05, "loss": 0.2238, "step": 2000 }, { "epoch": 1.57, "eval_accuracy": 0.8764044943820225, "eval_f1": 0.8690838638250762, "eval_loss": 0.264072448015213, "eval_precision": 0.9117305458768873, "eval_recall": 0.8302485457429931, "eval_runtime": 166.9078, "eval_samples_per_second": 22.929, "eval_steps_per_second": 0.959, "step": 2000 }, { "epoch": 1.58, "learning_rate": 1.0658307210031348e-05, "loss": 0.2006, "step": 2010 }, { "epoch": 1.58, "learning_rate": 1.0462382445141066e-05, "loss": 0.1282, "step": 2020 }, { "epoch": 1.59, "learning_rate": 1.0266457680250784e-05, "loss": 0.1714, "step": 2030 }, { "epoch": 1.6, "learning_rate": 1.0070532915360502e-05, "loss": 0.1795, "step": 2040 }, { "epoch": 1.61, "learning_rate": 9.87460815047022e-06, "loss": 0.1915, "step": 2050 }, { "epoch": 1.61, "learning_rate": 9.678683385579938e-06, "loss": 0.2153, "step": 2060 }, { "epoch": 1.62, "learning_rate": 9.482758620689655e-06, "loss": 0.2176, "step": 2070 }, { "epoch": 1.63, "learning_rate": 9.286833855799373e-06, "loss": 0.2194, "step": 2080 }, { "epoch": 1.64, "learning_rate": 9.090909090909091e-06, "loss": 0.1527, "step": 2090 }, { "epoch": 1.65, "learning_rate": 8.89498432601881e-06, "loss": 0.249, "step": 2100 }, { "epoch": 1.65, "eval_accuracy": 0.8926051737653514, "eval_f1": 0.8979389123416935, "eval_loss": 0.2367783486843109, "eval_precision": 0.846441947565543, "eval_recall": 0.9561078794288737, "eval_runtime": 167.7276, "eval_samples_per_second": 22.817, "eval_steps_per_second": 0.954, "step": 2100 }, { "epoch": 1.65, "learning_rate": 8.699059561128527e-06, "loss": 0.175, "step": 2110 }, { "epoch": 1.66, "learning_rate": 8.503134796238245e-06, "loss": 0.2238, "step": 2120 }, { "epoch": 1.67, "learning_rate": 8.307210031347962e-06, "loss": 0.2249, "step": 2130 }, { "epoch": 1.68, "learning_rate": 8.111285266457682e-06, "loss": 0.2395, "step": 2140 }, { "epoch": 1.68, "learning_rate": 7.915360501567398e-06, "loss": 0.1775, "step": 2150 }, { "epoch": 1.69, "learning_rate": 7.719435736677116e-06, "loss": 0.2111, "step": 2160 }, { "epoch": 1.7, "learning_rate": 7.5235109717868345e-06, "loss": 0.2092, "step": 2170 }, { "epoch": 1.71, "learning_rate": 7.3275862068965514e-06, "loss": 0.2313, "step": 2180 }, { "epoch": 1.72, "learning_rate": 7.131661442006269e-06, "loss": 0.1508, "step": 2190 }, { "epoch": 1.72, "learning_rate": 6.935736677115988e-06, "loss": 0.1773, "step": 2200 }, { "epoch": 1.72, "eval_accuracy": 0.894172981447609, "eval_f1": 0.8963929393706831, "eval_loss": 0.22330810129642487, "eval_precision": 0.8681863230921705, "eval_recall": 0.9264939185616076, "eval_runtime": 166.0964, "eval_samples_per_second": 23.041, "eval_steps_per_second": 0.963, "step": 2200 }, { "epoch": 1.73, "learning_rate": 6.739811912225706e-06, "loss": 0.165, "step": 2210 }, { "epoch": 1.74, "learning_rate": 6.543887147335423e-06, "loss": 0.2013, "step": 2220 }, { "epoch": 1.75, "learning_rate": 6.347962382445141e-06, "loss": 0.2334, "step": 2230 }, { "epoch": 1.76, "learning_rate": 6.152037617554859e-06, "loss": 0.1951, "step": 2240 }, { "epoch": 1.76, "learning_rate": 5.956112852664577e-06, "loss": 0.165, "step": 2250 }, { "epoch": 1.77, "learning_rate": 5.760188087774295e-06, "loss": 0.1469, "step": 2260 }, { "epoch": 1.78, "learning_rate": 5.564263322884013e-06, "loss": 0.2499, "step": 2270 }, { "epoch": 1.79, "learning_rate": 5.368338557993731e-06, "loss": 0.1992, "step": 2280 }, { "epoch": 1.79, "learning_rate": 5.172413793103448e-06, "loss": 0.2433, "step": 2290 }, { "epoch": 1.8, "learning_rate": 4.976489028213167e-06, "loss": 0.1447, "step": 2300 }, { "epoch": 1.8, "eval_accuracy": 0.8957407891298668, "eval_f1": 0.8970322580645161, "eval_loss": 0.22690534591674805, "eval_precision": 0.876008064516129, "eval_recall": 0.9190904283447912, "eval_runtime": 166.3913, "eval_samples_per_second": 23.0, "eval_steps_per_second": 0.962, "step": 2300 }, { "epoch": 1.81, "learning_rate": 4.780564263322884e-06, "loss": 0.2265, "step": 2310 }, { "epoch": 1.82, "learning_rate": 4.584639498432603e-06, "loss": 0.1754, "step": 2320 }, { "epoch": 1.83, "learning_rate": 4.38871473354232e-06, "loss": 0.1462, "step": 2330 }, { "epoch": 1.83, "learning_rate": 4.1927899686520374e-06, "loss": 0.1796, "step": 2340 }, { "epoch": 1.84, "learning_rate": 3.996865203761756e-06, "loss": 0.1956, "step": 2350 }, { "epoch": 1.85, "learning_rate": 3.800940438871473e-06, "loss": 0.2103, "step": 2360 }, { "epoch": 1.86, "learning_rate": 3.6050156739811913e-06, "loss": 0.1791, "step": 2370 }, { "epoch": 1.87, "learning_rate": 3.409090909090909e-06, "loss": 0.1634, "step": 2380 }, { "epoch": 1.87, "learning_rate": 3.2131661442006274e-06, "loss": 0.1923, "step": 2390 }, { "epoch": 1.88, "learning_rate": 3.017241379310345e-06, "loss": 0.245, "step": 2400 }, { "epoch": 1.88, "eval_accuracy": 0.8952181865691142, "eval_f1": 0.8990687138182734, "eval_loss": 0.23547519743442535, "eval_precision": 0.8578290105667628, "eval_recall": 0.9444738233738763, "eval_runtime": 166.9651, "eval_samples_per_second": 22.921, "eval_steps_per_second": 0.958, "step": 2400 }, { "epoch": 1.89, "learning_rate": 2.821316614420063e-06, "loss": 0.1853, "step": 2410 }, { "epoch": 1.9, "learning_rate": 2.625391849529781e-06, "loss": 0.1623, "step": 2420 }, { "epoch": 1.9, "learning_rate": 2.4294670846394982e-06, "loss": 0.2096, "step": 2430 }, { "epoch": 1.91, "learning_rate": 2.233542319749216e-06, "loss": 0.183, "step": 2440 }, { "epoch": 1.92, "learning_rate": 2.0376175548589343e-06, "loss": 0.1839, "step": 2450 }, { "epoch": 1.93, "learning_rate": 1.841692789968652e-06, "loss": 0.2006, "step": 2460 }, { "epoch": 1.94, "learning_rate": 1.64576802507837e-06, "loss": 0.179, "step": 2470 }, { "epoch": 1.94, "learning_rate": 1.4498432601880878e-06, "loss": 0.1804, "step": 2480 }, { "epoch": 1.95, "learning_rate": 1.2539184952978056e-06, "loss": 0.1566, "step": 2490 }, { "epoch": 1.96, "learning_rate": 1.0579937304075236e-06, "loss": 0.1685, "step": 2500 }, { "epoch": 1.96, "eval_accuracy": 0.8933890776064802, "eval_f1": 0.8964992389649924, "eval_loss": 0.23118489980697632, "eval_precision": 0.8615309605070697, "eval_recall": 0.9344262295081968, "eval_runtime": 171.1147, "eval_samples_per_second": 22.365, "eval_steps_per_second": 0.935, "step": 2500 }, { "epoch": 1.97, "learning_rate": 8.620689655172415e-07, "loss": 0.1461, "step": 2510 }, { "epoch": 1.97, "learning_rate": 6.661442006269593e-07, "loss": 0.2458, "step": 2520 }, { "epoch": 1.98, "learning_rate": 4.7021943573667715e-07, "loss": 0.1939, "step": 2530 }, { "epoch": 1.99, "learning_rate": 2.74294670846395e-07, "loss": 0.1551, "step": 2540 }, { "epoch": 2.0, "learning_rate": 7.836990595611285e-08, "loss": 0.1953, "step": 2550 }, { "epoch": 2.0, "step": 2552, "total_flos": 1.3995088604806644e+19, "train_loss": 0.259534664043345, "train_runtime": 9346.2499, "train_samples_per_second": 6.553, "train_steps_per_second": 0.273 }, { "epoch": 2.0, "eval_accuracy": 0.8926332288401254, "eval_f1": 0.8949654996166624, "eval_loss": 0.24127539992332458, "eval_precision": 0.8524829600778968, "eval_recall": 0.9419042495965573, "eval_runtime": 177.3711, "eval_samples_per_second": 21.582, "eval_steps_per_second": 0.902, "step": 2552 } ], "logging_steps": 10, "max_steps": 2552, "num_train_epochs": 2, "save_steps": 100, "total_flos": 1.3995088604806644e+19, "trial_name": null, "trial_params": null }