{ "best_metric": null, "best_model_checkpoint": null, "epoch": 9.0, "eval_steps": 500, "global_step": 199611, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03, "learning_rate": 0.00011999999999999999, "loss": 18.2932, "step": 600 }, { "epoch": 0.05, "learning_rate": 0.00023999999999999998, "loss": 4.6022, "step": 1200 }, { "epoch": 0.08, "learning_rate": 0.00029959144763720545, "loss": 2.9785, "step": 1800 }, { "epoch": 0.11, "learning_rate": 0.0002987743429116165, "loss": 2.065, "step": 2400 }, { "epoch": 0.14, "learning_rate": 0.0002979572381860275, "loss": 1.7581, "step": 3000 }, { "epoch": 0.16, "learning_rate": 0.0002971401334604385, "loss": 1.6367, "step": 3600 }, { "epoch": 0.19, "learning_rate": 0.0002963230287348495, "loss": 1.5326, "step": 4200 }, { "epoch": 0.22, "learning_rate": 0.0002955059240092605, "loss": 1.4634, "step": 4800 }, { "epoch": 0.24, "learning_rate": 0.0002946888192836715, "loss": 1.4078, "step": 5400 }, { "epoch": 0.27, "learning_rate": 0.0002938717145580825, "loss": 1.385, "step": 6000 }, { "epoch": 0.3, "learning_rate": 0.0002930546098324935, "loss": 1.3197, "step": 6600 }, { "epoch": 0.32, "learning_rate": 0.00029223750510690453, "loss": 1.3254, "step": 7200 }, { "epoch": 0.35, "learning_rate": 0.00029142040038131553, "loss": 1.3057, "step": 7800 }, { "epoch": 0.38, "learning_rate": 0.0002906032956557265, "loss": 1.2723, "step": 8400 }, { "epoch": 0.41, "learning_rate": 0.0002897861909301375, "loss": 1.2535, "step": 9000 }, { "epoch": 0.43, "learning_rate": 0.0002889690862045485, "loss": 1.2308, "step": 9600 }, { "epoch": 0.46, "learning_rate": 0.0002881519814789595, "loss": 1.2168, "step": 10200 }, { "epoch": 0.49, "learning_rate": 0.0002873348767533705, "loss": 1.2023, "step": 10800 }, { "epoch": 0.51, "learning_rate": 0.00028651777202778157, "loss": 1.2041, "step": 11400 }, { "epoch": 0.54, "learning_rate": 0.00028570066730219257, "loss": 1.1883, "step": 12000 }, { "epoch": 0.57, "learning_rate": 0.0002848835625766035, "loss": 1.1623, "step": 12600 }, { "epoch": 0.6, "learning_rate": 0.00028406645785101456, "loss": 1.1844, "step": 13200 }, { "epoch": 0.62, "learning_rate": 0.00028324935312542556, "loss": 1.1347, "step": 13800 }, { "epoch": 0.65, "learning_rate": 0.00028243224839983656, "loss": 1.1252, "step": 14400 }, { "epoch": 0.68, "learning_rate": 0.00028161514367424755, "loss": 1.1061, "step": 15000 }, { "epoch": 0.7, "learning_rate": 0.00028079803894865855, "loss": 1.1354, "step": 15600 }, { "epoch": 0.73, "learning_rate": 0.00027998093422306955, "loss": 1.1211, "step": 16200 }, { "epoch": 0.76, "learning_rate": 0.00027916382949748055, "loss": 1.0821, "step": 16800 }, { "epoch": 0.78, "learning_rate": 0.00027834672477189154, "loss": 1.0761, "step": 17400 }, { "epoch": 0.81, "learning_rate": 0.0002775296200463026, "loss": 1.0546, "step": 18000 }, { "epoch": 0.84, "learning_rate": 0.0002767125153207136, "loss": 1.0796, "step": 18600 }, { "epoch": 0.87, "learning_rate": 0.0002758954105951246, "loss": 1.0725, "step": 19200 }, { "epoch": 0.89, "learning_rate": 0.0002750783058695356, "loss": 1.0716, "step": 19800 }, { "epoch": 0.92, "learning_rate": 0.0002742612011439466, "loss": 1.0463, "step": 20400 }, { "epoch": 0.95, "learning_rate": 0.0002734440964183576, "loss": 1.0412, "step": 21000 }, { "epoch": 0.97, "learning_rate": 0.0002726269916927686, "loss": 1.0318, "step": 21600 }, { "epoch": 1.0, "learning_rate": 0.00027180988696717963, "loss": 1.0419, "step": 22200 }, { "epoch": 1.03, "learning_rate": 0.00027099278224159063, "loss": 0.9673, "step": 22800 }, { "epoch": 1.06, "learning_rate": 0.0002701756775160016, "loss": 0.9693, "step": 23400 }, { "epoch": 1.08, "learning_rate": 0.0002693585727904126, "loss": 0.9555, "step": 24000 }, { "epoch": 1.11, "learning_rate": 0.0002685414680648236, "loss": 0.9832, "step": 24600 }, { "epoch": 1.14, "learning_rate": 0.0002677243633392346, "loss": 0.9578, "step": 25200 }, { "epoch": 1.16, "learning_rate": 0.0002669072586136456, "loss": 0.9569, "step": 25800 }, { "epoch": 1.19, "learning_rate": 0.00026609015388805667, "loss": 0.9521, "step": 26400 }, { "epoch": 1.22, "learning_rate": 0.0002652730491624676, "loss": 0.953, "step": 27000 }, { "epoch": 1.24, "learning_rate": 0.0002644559444368786, "loss": 0.9757, "step": 27600 }, { "epoch": 1.27, "learning_rate": 0.00026363883971128966, "loss": 0.9517, "step": 28200 }, { "epoch": 1.3, "learning_rate": 0.00026282173498570066, "loss": 0.9552, "step": 28800 }, { "epoch": 1.33, "learning_rate": 0.00026200463026011166, "loss": 0.944, "step": 29400 }, { "epoch": 1.35, "learning_rate": 0.00026118752553452266, "loss": 0.936, "step": 30000 }, { "epoch": 1.38, "learning_rate": 0.00026037042080893365, "loss": 0.9071, "step": 30600 }, { "epoch": 1.41, "learning_rate": 0.00025955331608334465, "loss": 0.9137, "step": 31200 }, { "epoch": 1.43, "learning_rate": 0.00025873621135775565, "loss": 0.9191, "step": 31800 }, { "epoch": 1.46, "learning_rate": 0.00025791910663216665, "loss": 0.9185, "step": 32400 }, { "epoch": 1.49, "learning_rate": 0.0002571020019065777, "loss": 0.9054, "step": 33000 }, { "epoch": 1.51, "learning_rate": 0.0002562848971809887, "loss": 0.9274, "step": 33600 }, { "epoch": 1.54, "learning_rate": 0.0002554677924553997, "loss": 0.8956, "step": 34200 }, { "epoch": 1.57, "learning_rate": 0.0002546506877298107, "loss": 0.893, "step": 34800 }, { "epoch": 1.6, "learning_rate": 0.0002538335830042217, "loss": 0.9151, "step": 35400 }, { "epoch": 1.62, "learning_rate": 0.0002530164782786327, "loss": 0.8903, "step": 36000 }, { "epoch": 1.65, "learning_rate": 0.0002521993735530437, "loss": 0.8929, "step": 36600 }, { "epoch": 1.68, "learning_rate": 0.0002513822688274547, "loss": 0.8886, "step": 37200 }, { "epoch": 1.7, "learning_rate": 0.0002505651641018657, "loss": 0.8827, "step": 37800 }, { "epoch": 1.73, "learning_rate": 0.0002497480593762767, "loss": 0.8877, "step": 38400 }, { "epoch": 1.76, "learning_rate": 0.00024893095465068773, "loss": 0.868, "step": 39000 }, { "epoch": 1.79, "learning_rate": 0.0002481138499250987, "loss": 0.8731, "step": 39600 }, { "epoch": 1.81, "learning_rate": 0.0002472967451995097, "loss": 0.8649, "step": 40200 }, { "epoch": 1.84, "learning_rate": 0.0002464796404739207, "loss": 0.862, "step": 40800 }, { "epoch": 1.87, "learning_rate": 0.0002456625357483317, "loss": 0.8933, "step": 41400 }, { "epoch": 1.89, "learning_rate": 0.0002448454310227427, "loss": 0.8711, "step": 42000 }, { "epoch": 1.92, "learning_rate": 0.00024402832629715374, "loss": 0.8805, "step": 42600 }, { "epoch": 1.95, "learning_rate": 0.00024321122157156474, "loss": 0.8518, "step": 43200 }, { "epoch": 1.97, "learning_rate": 0.00024239411684597576, "loss": 0.8591, "step": 43800 }, { "epoch": 2.0, "learning_rate": 0.00024157701212038673, "loss": 0.8552, "step": 44400 }, { "epoch": 2.03, "learning_rate": 0.00024075990739479773, "loss": 0.7824, "step": 45000 }, { "epoch": 2.06, "learning_rate": 0.00023994280266920876, "loss": 0.7737, "step": 45600 }, { "epoch": 2.08, "learning_rate": 0.00023912569794361975, "loss": 0.7888, "step": 46200 }, { "epoch": 2.11, "learning_rate": 0.00023830859321803075, "loss": 0.7917, "step": 46800 }, { "epoch": 2.14, "learning_rate": 0.00023749148849244178, "loss": 0.7782, "step": 47400 }, { "epoch": 2.16, "learning_rate": 0.00023667438376685275, "loss": 0.7769, "step": 48000 }, { "epoch": 2.19, "learning_rate": 0.00023585727904126377, "loss": 0.7699, "step": 48600 }, { "epoch": 2.22, "learning_rate": 0.00023504017431567477, "loss": 0.7767, "step": 49200 }, { "epoch": 2.25, "learning_rate": 0.00023422306959008577, "loss": 0.7957, "step": 49800 }, { "epoch": 2.27, "learning_rate": 0.0002334059648644968, "loss": 0.7831, "step": 50400 }, { "epoch": 2.3, "learning_rate": 0.0002325888601389078, "loss": 0.8037, "step": 51000 }, { "epoch": 2.33, "learning_rate": 0.00023177175541331876, "loss": 0.7941, "step": 51600 }, { "epoch": 2.35, "learning_rate": 0.00023095465068772978, "loss": 0.7829, "step": 52200 }, { "epoch": 2.38, "learning_rate": 0.00023013754596214078, "loss": 0.7806, "step": 52800 }, { "epoch": 2.41, "learning_rate": 0.0002293204412365518, "loss": 0.7946, "step": 53400 }, { "epoch": 2.43, "learning_rate": 0.0002285033365109628, "loss": 0.7717, "step": 54000 }, { "epoch": 2.46, "learning_rate": 0.00022768623178537383, "loss": 0.7628, "step": 54600 }, { "epoch": 2.49, "learning_rate": 0.0002268691270597848, "loss": 0.7918, "step": 55200 }, { "epoch": 2.52, "learning_rate": 0.0002260520223341958, "loss": 0.7773, "step": 55800 }, { "epoch": 2.54, "learning_rate": 0.00022523491760860682, "loss": 0.7632, "step": 56400 }, { "epoch": 2.57, "learning_rate": 0.00022441781288301782, "loss": 0.7636, "step": 57000 }, { "epoch": 2.6, "learning_rate": 0.00022360070815742884, "loss": 0.7784, "step": 57600 }, { "epoch": 2.62, "learning_rate": 0.00022278360343183984, "loss": 0.7695, "step": 58200 }, { "epoch": 2.65, "learning_rate": 0.0002219664987062508, "loss": 0.7613, "step": 58800 }, { "epoch": 2.68, "learning_rate": 0.00022114939398066184, "loss": 0.7485, "step": 59400 }, { "epoch": 2.71, "learning_rate": 0.00022033228925507283, "loss": 0.7708, "step": 60000 }, { "epoch": 2.73, "learning_rate": 0.00021951518452948386, "loss": 0.7696, "step": 60600 }, { "epoch": 2.76, "learning_rate": 0.00021869807980389486, "loss": 0.7666, "step": 61200 }, { "epoch": 2.79, "learning_rate": 0.00021788097507830585, "loss": 0.785, "step": 61800 }, { "epoch": 2.81, "learning_rate": 0.00021706387035271685, "loss": 0.7501, "step": 62400 }, { "epoch": 2.84, "learning_rate": 0.00021624676562712785, "loss": 0.7595, "step": 63000 }, { "epoch": 2.87, "learning_rate": 0.00021542966090153885, "loss": 0.7308, "step": 63600 }, { "epoch": 2.89, "learning_rate": 0.00021461255617594987, "loss": 0.7349, "step": 64200 }, { "epoch": 2.92, "learning_rate": 0.00021379545145036087, "loss": 0.7373, "step": 64800 }, { "epoch": 2.95, "learning_rate": 0.0002129783467247719, "loss": 0.7557, "step": 65400 }, { "epoch": 2.98, "learning_rate": 0.00021216124199918286, "loss": 0.7597, "step": 66000 }, { "epoch": 3.0, "learning_rate": 0.00021134413727359386, "loss": 0.7466, "step": 66600 }, { "epoch": 3.03, "learning_rate": 0.00021052703254800489, "loss": 0.6804, "step": 67200 }, { "epoch": 3.06, "learning_rate": 0.00020970992782241588, "loss": 0.6638, "step": 67800 }, { "epoch": 3.08, "learning_rate": 0.0002088928230968269, "loss": 0.6752, "step": 68400 }, { "epoch": 3.11, "learning_rate": 0.0002080757183712379, "loss": 0.6732, "step": 69000 }, { "epoch": 3.14, "learning_rate": 0.00020725861364564888, "loss": 0.6693, "step": 69600 }, { "epoch": 3.17, "learning_rate": 0.0002064415089200599, "loss": 0.6765, "step": 70200 }, { "epoch": 3.19, "learning_rate": 0.0002056244041944709, "loss": 0.6703, "step": 70800 }, { "epoch": 3.22, "learning_rate": 0.00020480729946888192, "loss": 0.6746, "step": 71400 }, { "epoch": 3.25, "learning_rate": 0.00020399019474329292, "loss": 0.6885, "step": 72000 }, { "epoch": 3.27, "learning_rate": 0.00020317309001770394, "loss": 0.7011, "step": 72600 }, { "epoch": 3.3, "learning_rate": 0.00020235598529211492, "loss": 0.6752, "step": 73200 }, { "epoch": 3.33, "learning_rate": 0.0002015388805665259, "loss": 0.6875, "step": 73800 }, { "epoch": 3.35, "learning_rate": 0.00020072177584093694, "loss": 0.6809, "step": 74400 }, { "epoch": 3.38, "learning_rate": 0.00019990467111534793, "loss": 0.6775, "step": 75000 }, { "epoch": 3.41, "learning_rate": 0.00019908756638975893, "loss": 0.692, "step": 75600 }, { "epoch": 3.44, "learning_rate": 0.00019827046166416996, "loss": 0.68, "step": 76200 }, { "epoch": 3.46, "learning_rate": 0.00019745335693858093, "loss": 0.675, "step": 76800 }, { "epoch": 3.49, "learning_rate": 0.00019663625221299195, "loss": 0.6812, "step": 77400 }, { "epoch": 3.52, "learning_rate": 0.00019581914748740295, "loss": 0.6699, "step": 78000 }, { "epoch": 3.54, "learning_rate": 0.00019500204276181395, "loss": 0.6684, "step": 78600 }, { "epoch": 3.57, "learning_rate": 0.00019418493803622497, "loss": 0.675, "step": 79200 }, { "epoch": 3.6, "learning_rate": 0.00019336783331063597, "loss": 0.6479, "step": 79800 }, { "epoch": 3.63, "learning_rate": 0.00019255072858504697, "loss": 0.6679, "step": 80400 }, { "epoch": 3.65, "learning_rate": 0.00019173362385945796, "loss": 0.6831, "step": 81000 }, { "epoch": 3.68, "learning_rate": 0.00019091651913386896, "loss": 0.6633, "step": 81600 }, { "epoch": 3.71, "learning_rate": 0.00019009941440828, "loss": 0.6809, "step": 82200 }, { "epoch": 3.73, "learning_rate": 0.00018928230968269098, "loss": 0.6579, "step": 82800 }, { "epoch": 3.76, "learning_rate": 0.000188465204957102, "loss": 0.6539, "step": 83400 }, { "epoch": 3.79, "learning_rate": 0.00018764810023151298, "loss": 0.6607, "step": 84000 }, { "epoch": 3.81, "learning_rate": 0.00018683099550592398, "loss": 0.6615, "step": 84600 }, { "epoch": 3.84, "learning_rate": 0.000186013890780335, "loss": 0.6614, "step": 85200 }, { "epoch": 3.87, "learning_rate": 0.000185196786054746, "loss": 0.6517, "step": 85800 }, { "epoch": 3.9, "learning_rate": 0.00018437968132915702, "loss": 0.6559, "step": 86400 }, { "epoch": 3.92, "learning_rate": 0.00018356257660356802, "loss": 0.6506, "step": 87000 }, { "epoch": 3.95, "learning_rate": 0.000182745471877979, "loss": 0.6541, "step": 87600 }, { "epoch": 3.98, "learning_rate": 0.00018192836715239002, "loss": 0.6593, "step": 88200 }, { "epoch": 4.0, "learning_rate": 0.00018111126242680101, "loss": 0.6335, "step": 88800 }, { "epoch": 4.03, "learning_rate": 0.00018029415770121204, "loss": 0.5884, "step": 89400 }, { "epoch": 4.06, "learning_rate": 0.00017947705297562304, "loss": 0.5834, "step": 90000 }, { "epoch": 4.08, "learning_rate": 0.00017865994825003403, "loss": 0.596, "step": 90600 }, { "epoch": 4.11, "learning_rate": 0.00017784284352444503, "loss": 0.5839, "step": 91200 }, { "epoch": 4.14, "learning_rate": 0.00017702573879885603, "loss": 0.5738, "step": 91800 }, { "epoch": 4.17, "learning_rate": 0.00017620863407326705, "loss": 0.5835, "step": 92400 }, { "epoch": 4.19, "learning_rate": 0.00017539152934767805, "loss": 0.5738, "step": 93000 }, { "epoch": 4.22, "learning_rate": 0.00017457442462208905, "loss": 0.5773, "step": 93600 }, { "epoch": 4.25, "learning_rate": 0.00017375731989650007, "loss": 0.5866, "step": 94200 }, { "epoch": 4.27, "learning_rate": 0.00017294021517091104, "loss": 0.5843, "step": 94800 }, { "epoch": 4.3, "learning_rate": 0.00017212311044532204, "loss": 0.603, "step": 95400 }, { "epoch": 4.33, "learning_rate": 0.00017130600571973307, "loss": 0.5819, "step": 96000 }, { "epoch": 4.36, "learning_rate": 0.00017048890099414406, "loss": 0.592, "step": 96600 }, { "epoch": 4.38, "learning_rate": 0.0001696717962685551, "loss": 0.58, "step": 97200 }, { "epoch": 4.41, "learning_rate": 0.0001688546915429661, "loss": 0.5882, "step": 97800 }, { "epoch": 4.44, "learning_rate": 0.00016803758681737706, "loss": 0.5987, "step": 98400 }, { "epoch": 4.46, "learning_rate": 0.00016722048209178808, "loss": 0.585, "step": 99000 }, { "epoch": 4.49, "learning_rate": 0.00016640337736619908, "loss": 0.5769, "step": 99600 }, { "epoch": 4.52, "learning_rate": 0.0001655862726406101, "loss": 0.5813, "step": 100200 }, { "epoch": 4.54, "learning_rate": 0.0001647691679150211, "loss": 0.6053, "step": 100800 }, { "epoch": 4.57, "learning_rate": 0.00016395206318943207, "loss": 0.5889, "step": 101400 }, { "epoch": 4.6, "learning_rate": 0.0001631349584638431, "loss": 0.5877, "step": 102000 }, { "epoch": 4.63, "learning_rate": 0.0001623178537382541, "loss": 0.581, "step": 102600 }, { "epoch": 4.65, "learning_rate": 0.00016150074901266512, "loss": 0.5699, "step": 103200 }, { "epoch": 4.68, "learning_rate": 0.00016068364428707612, "loss": 0.5781, "step": 103800 }, { "epoch": 4.71, "learning_rate": 0.00015986653956148714, "loss": 0.5812, "step": 104400 }, { "epoch": 4.73, "learning_rate": 0.0001590494348358981, "loss": 0.5686, "step": 105000 }, { "epoch": 4.76, "learning_rate": 0.0001582323301103091, "loss": 0.5724, "step": 105600 }, { "epoch": 4.79, "learning_rate": 0.00015741522538472013, "loss": 0.5722, "step": 106200 }, { "epoch": 4.82, "learning_rate": 0.00015659812065913113, "loss": 0.5834, "step": 106800 }, { "epoch": 4.84, "learning_rate": 0.00015578101593354213, "loss": 0.5825, "step": 107400 }, { "epoch": 4.87, "learning_rate": 0.00015496391120795315, "loss": 0.5783, "step": 108000 }, { "epoch": 4.9, "learning_rate": 0.00015414680648236412, "loss": 0.5819, "step": 108600 }, { "epoch": 4.92, "learning_rate": 0.00015332970175677515, "loss": 0.5823, "step": 109200 }, { "epoch": 4.95, "learning_rate": 0.00015251259703118615, "loss": 0.5755, "step": 109800 }, { "epoch": 4.98, "learning_rate": 0.00015169549230559714, "loss": 0.571, "step": 110400 }, { "epoch": 5.0, "learning_rate": 0.00015087838758000817, "loss": 0.5603, "step": 111000 }, { "epoch": 5.03, "learning_rate": 0.00015006128285441917, "loss": 0.5127, "step": 111600 }, { "epoch": 5.06, "learning_rate": 0.00014924417812883016, "loss": 0.52, "step": 112200 }, { "epoch": 5.09, "learning_rate": 0.0001484270734032412, "loss": 0.5327, "step": 112800 }, { "epoch": 5.11, "learning_rate": 0.00014760996867765216, "loss": 0.5081, "step": 113400 }, { "epoch": 5.14, "learning_rate": 0.00014679286395206318, "loss": 0.5189, "step": 114000 }, { "epoch": 5.17, "learning_rate": 0.00014597575922647418, "loss": 0.5178, "step": 114600 }, { "epoch": 5.19, "learning_rate": 0.00014515865450088518, "loss": 0.5254, "step": 115200 }, { "epoch": 5.22, "learning_rate": 0.00014434154977529618, "loss": 0.5171, "step": 115800 }, { "epoch": 5.25, "learning_rate": 0.0001435244450497072, "loss": 0.5167, "step": 116400 }, { "epoch": 5.28, "learning_rate": 0.0001427073403241182, "loss": 0.5286, "step": 117000 }, { "epoch": 5.3, "learning_rate": 0.0001418902355985292, "loss": 0.5298, "step": 117600 }, { "epoch": 5.33, "learning_rate": 0.00014107313087294022, "loss": 0.5295, "step": 118200 }, { "epoch": 5.36, "learning_rate": 0.0001402560261473512, "loss": 0.5324, "step": 118800 }, { "epoch": 5.38, "learning_rate": 0.00013943892142176222, "loss": 0.5155, "step": 119400 }, { "epoch": 5.41, "learning_rate": 0.0001386218166961732, "loss": 0.5138, "step": 120000 }, { "epoch": 5.44, "learning_rate": 0.0001378047119705842, "loss": 0.5215, "step": 120600 }, { "epoch": 5.46, "learning_rate": 0.00013698760724499524, "loss": 0.5236, "step": 121200 }, { "epoch": 5.49, "learning_rate": 0.00013617050251940623, "loss": 0.5249, "step": 121800 }, { "epoch": 5.52, "learning_rate": 0.00013535339779381723, "loss": 0.5086, "step": 122400 }, { "epoch": 5.55, "learning_rate": 0.00013453629306822823, "loss": 0.5271, "step": 123000 }, { "epoch": 5.57, "learning_rate": 0.00013371918834263925, "loss": 0.5214, "step": 123600 }, { "epoch": 5.6, "learning_rate": 0.00013290208361705022, "loss": 0.5257, "step": 124200 }, { "epoch": 5.63, "learning_rate": 0.00013208497889146125, "loss": 0.5182, "step": 124800 }, { "epoch": 5.65, "learning_rate": 0.00013126787416587225, "loss": 0.5128, "step": 125400 }, { "epoch": 5.68, "learning_rate": 0.00013045076944028324, "loss": 0.5164, "step": 126000 }, { "epoch": 5.71, "learning_rate": 0.00012963366471469427, "loss": 0.5142, "step": 126600 }, { "epoch": 5.74, "learning_rate": 0.00012881655998910527, "loss": 0.5336, "step": 127200 }, { "epoch": 5.76, "learning_rate": 0.00012799945526351626, "loss": 0.5216, "step": 127800 }, { "epoch": 5.79, "learning_rate": 0.00012718235053792726, "loss": 0.5185, "step": 128400 }, { "epoch": 5.82, "learning_rate": 0.00012636524581233829, "loss": 0.5134, "step": 129000 }, { "epoch": 5.84, "learning_rate": 0.00012554814108674928, "loss": 0.5206, "step": 129600 }, { "epoch": 5.87, "learning_rate": 0.00012473103636116028, "loss": 0.5056, "step": 130200 }, { "epoch": 5.9, "learning_rate": 0.00012391393163557128, "loss": 0.4996, "step": 130800 }, { "epoch": 5.92, "learning_rate": 0.00012309682690998228, "loss": 0.51, "step": 131400 }, { "epoch": 5.95, "learning_rate": 0.0001222797221843933, "loss": 0.499, "step": 132000 }, { "epoch": 5.98, "learning_rate": 0.0001214626174588043, "loss": 0.5181, "step": 132600 }, { "epoch": 6.01, "learning_rate": 0.0001206455127332153, "loss": 0.5164, "step": 133200 }, { "epoch": 6.03, "learning_rate": 0.0001198284080076263, "loss": 0.4706, "step": 133800 }, { "epoch": 6.06, "learning_rate": 0.00011901130328203729, "loss": 0.4552, "step": 134400 }, { "epoch": 6.09, "learning_rate": 0.0001181941985564483, "loss": 0.457, "step": 135000 }, { "epoch": 6.11, "learning_rate": 0.00011737709383085931, "loss": 0.4606, "step": 135600 }, { "epoch": 6.14, "learning_rate": 0.00011655998910527031, "loss": 0.4685, "step": 136200 }, { "epoch": 6.17, "learning_rate": 0.00011574288437968132, "loss": 0.4564, "step": 136800 }, { "epoch": 6.2, "learning_rate": 0.00011492577965409233, "loss": 0.4611, "step": 137400 }, { "epoch": 6.22, "learning_rate": 0.00011410867492850332, "loss": 0.4496, "step": 138000 }, { "epoch": 6.25, "learning_rate": 0.00011329157020291433, "loss": 0.4509, "step": 138600 }, { "epoch": 6.28, "learning_rate": 0.00011247446547732534, "loss": 0.4546, "step": 139200 }, { "epoch": 6.3, "learning_rate": 0.00011165736075173634, "loss": 0.4616, "step": 139800 }, { "epoch": 6.33, "learning_rate": 0.00011084025602614733, "loss": 0.465, "step": 140400 }, { "epoch": 6.36, "learning_rate": 0.00011002315130055835, "loss": 0.4639, "step": 141000 }, { "epoch": 6.38, "learning_rate": 0.00010920604657496934, "loss": 0.4605, "step": 141600 }, { "epoch": 6.41, "learning_rate": 0.00010838894184938035, "loss": 0.4592, "step": 142200 }, { "epoch": 6.44, "learning_rate": 0.00010757183712379136, "loss": 0.4612, "step": 142800 }, { "epoch": 6.47, "learning_rate": 0.00010675473239820235, "loss": 0.4538, "step": 143400 }, { "epoch": 6.49, "learning_rate": 0.00010593762767261336, "loss": 0.452, "step": 144000 }, { "epoch": 6.52, "learning_rate": 0.00010512052294702437, "loss": 0.4701, "step": 144600 }, { "epoch": 6.55, "learning_rate": 0.00010430341822143537, "loss": 0.4518, "step": 145200 }, { "epoch": 6.57, "learning_rate": 0.00010348631349584638, "loss": 0.4594, "step": 145800 }, { "epoch": 6.6, "learning_rate": 0.00010266920877025738, "loss": 0.4593, "step": 146400 }, { "epoch": 6.63, "learning_rate": 0.00010185210404466838, "loss": 0.4651, "step": 147000 }, { "epoch": 6.65, "learning_rate": 0.00010103499931907939, "loss": 0.4547, "step": 147600 }, { "epoch": 6.68, "learning_rate": 0.0001002178945934904, "loss": 0.4544, "step": 148200 }, { "epoch": 6.71, "learning_rate": 9.940078986790138e-05, "loss": 0.4605, "step": 148800 }, { "epoch": 6.74, "learning_rate": 9.858368514231239e-05, "loss": 0.4518, "step": 149400 }, { "epoch": 6.76, "learning_rate": 9.77665804167234e-05, "loss": 0.4625, "step": 150000 }, { "epoch": 6.79, "learning_rate": 9.69494756911344e-05, "loss": 0.4537, "step": 150600 }, { "epoch": 6.82, "learning_rate": 9.613237096554541e-05, "loss": 0.4515, "step": 151200 }, { "epoch": 6.84, "learning_rate": 9.531526623995642e-05, "loss": 0.4507, "step": 151800 }, { "epoch": 6.87, "learning_rate": 9.449816151436741e-05, "loss": 0.4617, "step": 152400 }, { "epoch": 6.9, "learning_rate": 9.368105678877842e-05, "loss": 0.4494, "step": 153000 }, { "epoch": 6.93, "learning_rate": 9.286395206318943e-05, "loss": 0.4502, "step": 153600 }, { "epoch": 6.95, "learning_rate": 9.204684733760043e-05, "loss": 0.4495, "step": 154200 }, { "epoch": 6.98, "learning_rate": 9.122974261201142e-05, "loss": 0.4569, "step": 154800 }, { "epoch": 7.01, "learning_rate": 9.041263788642244e-05, "loss": 0.4416, "step": 155400 }, { "epoch": 7.03, "learning_rate": 8.959553316083343e-05, "loss": 0.4081, "step": 156000 }, { "epoch": 7.06, "learning_rate": 8.877842843524444e-05, "loss": 0.4196, "step": 156600 }, { "epoch": 7.09, "learning_rate": 8.796132370965546e-05, "loss": 0.4135, "step": 157200 }, { "epoch": 7.11, "learning_rate": 8.714421898406644e-05, "loss": 0.4088, "step": 157800 }, { "epoch": 7.14, "learning_rate": 8.632711425847745e-05, "loss": 0.4005, "step": 158400 }, { "epoch": 7.17, "learning_rate": 8.551000953288846e-05, "loss": 0.3954, "step": 159000 }, { "epoch": 7.2, "learning_rate": 8.469290480729946e-05, "loss": 0.4093, "step": 159600 }, { "epoch": 7.22, "learning_rate": 8.387580008171047e-05, "loss": 0.3998, "step": 160200 }, { "epoch": 7.25, "learning_rate": 8.305869535612147e-05, "loss": 0.4068, "step": 160800 }, { "epoch": 7.28, "learning_rate": 8.224159063053247e-05, "loss": 0.3933, "step": 161400 }, { "epoch": 7.3, "learning_rate": 8.142448590494348e-05, "loss": 0.3957, "step": 162000 }, { "epoch": 7.33, "learning_rate": 8.060738117935449e-05, "loss": 0.3954, "step": 162600 }, { "epoch": 7.36, "learning_rate": 7.979027645376547e-05, "loss": 0.3949, "step": 163200 }, { "epoch": 7.39, "learning_rate": 7.897317172817648e-05, "loss": 0.4014, "step": 163800 }, { "epoch": 7.41, "learning_rate": 7.81560670025875e-05, "loss": 0.4007, "step": 164400 }, { "epoch": 7.44, "learning_rate": 7.733896227699849e-05, "loss": 0.4037, "step": 165000 }, { "epoch": 7.47, "learning_rate": 7.65218575514095e-05, "loss": 0.4055, "step": 165600 }, { "epoch": 7.49, "learning_rate": 7.570475282582051e-05, "loss": 0.3985, "step": 166200 }, { "epoch": 7.52, "learning_rate": 7.488764810023151e-05, "loss": 0.4022, "step": 166800 }, { "epoch": 7.55, "learning_rate": 7.407054337464251e-05, "loss": 0.3872, "step": 167400 }, { "epoch": 7.57, "learning_rate": 7.325343864905351e-05, "loss": 0.3895, "step": 168000 }, { "epoch": 7.6, "learning_rate": 7.243633392346452e-05, "loss": 0.3968, "step": 168600 }, { "epoch": 7.63, "learning_rate": 7.161922919787552e-05, "loss": 0.4051, "step": 169200 }, { "epoch": 7.66, "learning_rate": 7.080212447228653e-05, "loss": 0.3915, "step": 169800 }, { "epoch": 7.68, "learning_rate": 6.998501974669754e-05, "loss": 0.3934, "step": 170400 }, { "epoch": 7.71, "learning_rate": 6.916791502110854e-05, "loss": 0.3943, "step": 171000 }, { "epoch": 7.74, "learning_rate": 6.835081029551953e-05, "loss": 0.3932, "step": 171600 }, { "epoch": 7.76, "learning_rate": 6.753370556993054e-05, "loss": 0.4063, "step": 172200 }, { "epoch": 7.79, "learning_rate": 6.671660084434154e-05, "loss": 0.3975, "step": 172800 }, { "epoch": 7.82, "learning_rate": 6.589949611875254e-05, "loss": 0.3915, "step": 173400 }, { "epoch": 7.85, "learning_rate": 6.508239139316355e-05, "loss": 0.3892, "step": 174000 }, { "epoch": 7.87, "learning_rate": 6.426528666757456e-05, "loss": 0.3831, "step": 174600 }, { "epoch": 7.9, "learning_rate": 6.344818194198556e-05, "loss": 0.3896, "step": 175200 }, { "epoch": 7.93, "learning_rate": 6.263107721639657e-05, "loss": 0.3839, "step": 175800 }, { "epoch": 7.95, "learning_rate": 6.181397249080757e-05, "loss": 0.401, "step": 176400 }, { "epoch": 7.98, "learning_rate": 6.0996867765218565e-05, "loss": 0.3888, "step": 177000 }, { "epoch": 8.01, "learning_rate": 6.0179763039629576e-05, "loss": 0.371, "step": 177600 }, { "epoch": 8.03, "learning_rate": 5.936265831404058e-05, "loss": 0.3514, "step": 178200 }, { "epoch": 8.06, "learning_rate": 5.854555358845158e-05, "loss": 0.364, "step": 178800 }, { "epoch": 8.09, "learning_rate": 5.772844886286259e-05, "loss": 0.3486, "step": 179400 }, { "epoch": 8.12, "learning_rate": 5.691134413727359e-05, "loss": 0.3531, "step": 180000 }, { "epoch": 8.14, "learning_rate": 5.609423941168459e-05, "loss": 0.3584, "step": 180600 }, { "epoch": 8.17, "learning_rate": 5.52771346860956e-05, "loss": 0.345, "step": 181200 }, { "epoch": 8.2, "learning_rate": 5.44600299605066e-05, "loss": 0.3406, "step": 181800 }, { "epoch": 8.22, "learning_rate": 5.3642925234917604e-05, "loss": 0.3519, "step": 182400 }, { "epoch": 8.25, "learning_rate": 5.28258205093286e-05, "loss": 0.3607, "step": 183000 }, { "epoch": 8.28, "learning_rate": 5.200871578373961e-05, "loss": 0.3533, "step": 183600 }, { "epoch": 8.31, "learning_rate": 5.119161105815061e-05, "loss": 0.3586, "step": 184200 }, { "epoch": 8.33, "learning_rate": 5.0374506332561615e-05, "loss": 0.3453, "step": 184800 }, { "epoch": 8.36, "learning_rate": 4.9557401606972626e-05, "loss": 0.3431, "step": 185400 }, { "epoch": 8.39, "learning_rate": 4.8740296881383624e-05, "loss": 0.3546, "step": 186000 }, { "epoch": 8.41, "learning_rate": 4.792319215579463e-05, "loss": 0.3434, "step": 186600 }, { "epoch": 8.44, "learning_rate": 4.710608743020563e-05, "loss": 0.356, "step": 187200 }, { "epoch": 8.47, "learning_rate": 4.628898270461664e-05, "loss": 0.343, "step": 187800 }, { "epoch": 8.49, "learning_rate": 4.547187797902764e-05, "loss": 0.3501, "step": 188400 }, { "epoch": 8.52, "learning_rate": 4.4654773253438645e-05, "loss": 0.3384, "step": 189000 }, { "epoch": 8.55, "learning_rate": 4.383766852784965e-05, "loss": 0.3515, "step": 189600 }, { "epoch": 8.58, "learning_rate": 4.302056380226065e-05, "loss": 0.353, "step": 190200 }, { "epoch": 8.6, "learning_rate": 4.220345907667166e-05, "loss": 0.3448, "step": 190800 }, { "epoch": 8.63, "learning_rate": 4.138635435108266e-05, "loss": 0.3438, "step": 191400 }, { "epoch": 8.66, "learning_rate": 4.056924962549366e-05, "loss": 0.3539, "step": 192000 }, { "epoch": 8.68, "learning_rate": 3.975214489990467e-05, "loss": 0.3514, "step": 192600 }, { "epoch": 8.71, "learning_rate": 3.893504017431567e-05, "loss": 0.3514, "step": 193200 }, { "epoch": 8.74, "learning_rate": 3.811793544872667e-05, "loss": 0.3349, "step": 193800 }, { "epoch": 8.77, "learning_rate": 3.730083072313768e-05, "loss": 0.3429, "step": 194400 }, { "epoch": 8.79, "learning_rate": 3.648372599754868e-05, "loss": 0.3407, "step": 195000 }, { "epoch": 8.82, "learning_rate": 3.5666621271959686e-05, "loss": 0.3444, "step": 195600 }, { "epoch": 8.85, "learning_rate": 3.484951654637069e-05, "loss": 0.3434, "step": 196200 }, { "epoch": 8.87, "learning_rate": 3.4032411820781695e-05, "loss": 0.3423, "step": 196800 }, { "epoch": 8.9, "learning_rate": 3.32153070951927e-05, "loss": 0.3387, "step": 197400 }, { "epoch": 8.93, "learning_rate": 3.23982023696037e-05, "loss": 0.3415, "step": 198000 }, { "epoch": 8.95, "learning_rate": 3.158109764401471e-05, "loss": 0.3404, "step": 198600 }, { "epoch": 8.98, "learning_rate": 3.076399291842571e-05, "loss": 0.3303, "step": 199200 } ], "logging_steps": 600, "max_steps": 221790, "num_train_epochs": 10, "save_steps": 500, "total_flos": 2.002554029520894e+20, "trial_name": null, "trial_params": null }