{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 1090, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.8348623853211012e-07, "loss": 1.6972, "step": 1 }, { "epoch": 0.0, "learning_rate": 9.174311926605506e-07, "loss": 1.6464, "step": 5 }, { "epoch": 0.01, "learning_rate": 1.8348623853211011e-06, "loss": 1.3918, "step": 10 }, { "epoch": 0.01, "learning_rate": 2.7522935779816517e-06, "loss": 1.214, "step": 15 }, { "epoch": 0.02, "learning_rate": 3.6697247706422022e-06, "loss": 1.1699, "step": 20 }, { "epoch": 0.02, "learning_rate": 4.587155963302753e-06, "loss": 1.1172, "step": 25 }, { "epoch": 0.03, "learning_rate": 5.504587155963303e-06, "loss": 1.0704, "step": 30 }, { "epoch": 0.03, "learning_rate": 6.422018348623854e-06, "loss": 1.0575, "step": 35 }, { "epoch": 0.04, "learning_rate": 7.3394495412844045e-06, "loss": 1.052, "step": 40 }, { "epoch": 0.04, "learning_rate": 8.256880733944956e-06, "loss": 1.047, "step": 45 }, { "epoch": 0.05, "learning_rate": 9.174311926605506e-06, "loss": 1.0309, "step": 50 }, { "epoch": 0.05, "learning_rate": 1.0091743119266055e-05, "loss": 1.0092, "step": 55 }, { "epoch": 0.06, "learning_rate": 1.1009174311926607e-05, "loss": 1.0362, "step": 60 }, { "epoch": 0.06, "learning_rate": 1.1926605504587156e-05, "loss": 1.0082, "step": 65 }, { "epoch": 0.06, "learning_rate": 1.2844036697247708e-05, "loss": 1.0003, "step": 70 }, { "epoch": 0.07, "learning_rate": 1.3761467889908258e-05, "loss": 1.0003, "step": 75 }, { "epoch": 0.07, "learning_rate": 1.4678899082568809e-05, "loss": 0.9919, "step": 80 }, { "epoch": 0.08, "learning_rate": 1.559633027522936e-05, "loss": 1.0151, "step": 85 }, { "epoch": 0.08, "learning_rate": 1.6513761467889912e-05, "loss": 0.9974, "step": 90 }, { "epoch": 0.09, "learning_rate": 1.743119266055046e-05, "loss": 1.0154, "step": 95 }, { "epoch": 0.09, "learning_rate": 1.834862385321101e-05, "loss": 1.0163, "step": 100 }, { "epoch": 0.1, "learning_rate": 1.9266055045871563e-05, "loss": 0.9874, "step": 105 }, { "epoch": 0.1, "learning_rate": 1.999994872196626e-05, "loss": 1.0056, "step": 110 }, { "epoch": 0.11, "learning_rate": 1.9998154046002822e-05, "loss": 1.0142, "step": 115 }, { "epoch": 0.11, "learning_rate": 1.999379599421534e-05, "loss": 1.0058, "step": 120 }, { "epoch": 0.11, "learning_rate": 1.9986875683942535e-05, "loss": 1.0201, "step": 125 }, { "epoch": 0.12, "learning_rate": 1.9977394889447526e-05, "loss": 1.0239, "step": 130 }, { "epoch": 0.12, "learning_rate": 1.9965356041462954e-05, "loss": 1.0305, "step": 135 }, { "epoch": 0.13, "learning_rate": 1.9950762226567783e-05, "loss": 1.0284, "step": 140 }, { "epoch": 0.13, "learning_rate": 1.9933617186395917e-05, "loss": 1.0047, "step": 145 }, { "epoch": 0.14, "learning_rate": 1.9913925316676946e-05, "loss": 1.0028, "step": 150 }, { "epoch": 0.14, "learning_rate": 1.9891691666109112e-05, "loss": 1.023, "step": 155 }, { "epoch": 0.15, "learning_rate": 1.9866921935064907e-05, "loss": 1.015, "step": 160 }, { "epoch": 0.15, "learning_rate": 1.9839622474129595e-05, "loss": 1.0164, "step": 165 }, { "epoch": 0.16, "learning_rate": 1.9809800282473014e-05, "loss": 0.9977, "step": 170 }, { "epoch": 0.16, "learning_rate": 1.977746300605507e-05, "loss": 1.0214, "step": 175 }, { "epoch": 0.17, "learning_rate": 1.9742618935665478e-05, "loss": 1.0083, "step": 180 }, { "epoch": 0.17, "learning_rate": 1.9705277004798072e-05, "loss": 0.998, "step": 185 }, { "epoch": 0.17, "learning_rate": 1.9665446787360444e-05, "loss": 1.0102, "step": 190 }, { "epoch": 0.18, "learning_rate": 1.9623138495219292e-05, "loss": 1.0084, "step": 195 }, { "epoch": 0.18, "learning_rate": 1.957836297558229e-05, "loss": 1.0272, "step": 200 }, { "epoch": 0.19, "learning_rate": 1.9531131708217005e-05, "loss": 0.989, "step": 205 }, { "epoch": 0.19, "learning_rate": 1.948145680250766e-05, "loss": 1.0116, "step": 210 }, { "epoch": 0.2, "learning_rate": 1.9429350994350483e-05, "loss": 0.9931, "step": 215 }, { "epoch": 0.2, "learning_rate": 1.93748276428884e-05, "loss": 1.0231, "step": 220 }, { "epoch": 0.21, "learning_rate": 1.931790072708596e-05, "loss": 1.0191, "step": 225 }, { "epoch": 0.21, "learning_rate": 1.9258584842145342e-05, "loss": 1.0017, "step": 230 }, { "epoch": 0.22, "learning_rate": 1.9196895195764363e-05, "loss": 0.9957, "step": 235 }, { "epoch": 0.22, "learning_rate": 1.913284760423745e-05, "loss": 1.0214, "step": 240 }, { "epoch": 0.22, "learning_rate": 1.9066458488400586e-05, "loss": 1.0259, "step": 245 }, { "epoch": 0.23, "learning_rate": 1.8997744869421248e-05, "loss": 1.0034, "step": 250 }, { "epoch": 0.23, "learning_rate": 1.8926724364434447e-05, "loss": 0.9952, "step": 255 }, { "epoch": 0.24, "learning_rate": 1.8853415182025953e-05, "loss": 1.0086, "step": 260 }, { "epoch": 0.24, "learning_rate": 1.8777836117563894e-05, "loss": 1.0196, "step": 265 }, { "epoch": 0.25, "learning_rate": 1.8700006548379898e-05, "loss": 0.9918, "step": 270 }, { "epoch": 0.25, "learning_rate": 1.861994642880105e-05, "loss": 1.0037, "step": 275 }, { "epoch": 0.26, "learning_rate": 1.8537676285033886e-05, "loss": 1.0062, "step": 280 }, { "epoch": 0.26, "learning_rate": 1.845321720990181e-05, "loss": 1.0095, "step": 285 }, { "epoch": 0.27, "learning_rate": 1.8366590857437182e-05, "loss": 1.0152, "step": 290 }, { "epoch": 0.27, "learning_rate": 1.8277819437329577e-05, "loss": 0.993, "step": 295 }, { "epoch": 0.28, "learning_rate": 1.8186925709231534e-05, "loss": 0.9875, "step": 300 }, { "epoch": 0.28, "learning_rate": 1.809393297692334e-05, "loss": 0.989, "step": 305 }, { "epoch": 0.28, "learning_rate": 1.799886508233829e-05, "loss": 0.9866, "step": 310 }, { "epoch": 0.29, "learning_rate": 1.790174639944997e-05, "loss": 0.9894, "step": 315 }, { "epoch": 0.29, "learning_rate": 1.780260182802314e-05, "loss": 0.9981, "step": 320 }, { "epoch": 0.3, "learning_rate": 1.7701456787229805e-05, "loss": 0.9877, "step": 325 }, { "epoch": 0.3, "learning_rate": 1.7598337209132142e-05, "loss": 1.0103, "step": 330 }, { "epoch": 0.31, "learning_rate": 1.7493269532033882e-05, "loss": 0.9938, "step": 335 }, { "epoch": 0.31, "learning_rate": 1.738628069370195e-05, "loss": 0.9979, "step": 340 }, { "epoch": 0.32, "learning_rate": 1.7277398124460022e-05, "loss": 1.007, "step": 345 }, { "epoch": 0.32, "learning_rate": 1.71666497401558e-05, "loss": 0.9926, "step": 350 }, { "epoch": 0.33, "learning_rate": 1.7054063935003813e-05, "loss": 0.9971, "step": 355 }, { "epoch": 0.33, "learning_rate": 1.6939669574305565e-05, "loss": 1.0069, "step": 360 }, { "epoch": 0.33, "learning_rate": 1.6823495987048922e-05, "loss": 0.9705, "step": 365 }, { "epoch": 0.34, "learning_rate": 1.6705572958388576e-05, "loss": 0.999, "step": 370 }, { "epoch": 0.34, "learning_rate": 1.6585930722009602e-05, "loss": 0.9952, "step": 375 }, { "epoch": 0.35, "learning_rate": 1.6464599952375998e-05, "loss": 0.9972, "step": 380 }, { "epoch": 0.35, "learning_rate": 1.63416117568662e-05, "loss": 0.9813, "step": 385 }, { "epoch": 0.36, "learning_rate": 1.621699766779763e-05, "loss": 1.0089, "step": 390 }, { "epoch": 0.36, "learning_rate": 1.6090789634342278e-05, "loss": 1.0026, "step": 395 }, { "epoch": 0.37, "learning_rate": 1.5963020014335437e-05, "loss": 0.9933, "step": 400 }, { "epoch": 0.37, "learning_rate": 1.583372156597961e-05, "loss": 0.9754, "step": 405 }, { "epoch": 0.38, "learning_rate": 1.570292743944583e-05, "loss": 1.0099, "step": 410 }, { "epoch": 0.38, "learning_rate": 1.557067116837444e-05, "loss": 0.9682, "step": 415 }, { "epoch": 0.39, "learning_rate": 1.5436986661277578e-05, "loss": 0.9963, "step": 420 }, { "epoch": 0.39, "learning_rate": 1.530190819284555e-05, "loss": 1.0133, "step": 425 }, { "epoch": 0.39, "learning_rate": 1.5165470395159314e-05, "loss": 0.9886, "step": 430 }, { "epoch": 0.4, "learning_rate": 1.5027708248811331e-05, "loss": 0.9891, "step": 435 }, { "epoch": 0.4, "learning_rate": 1.4888657073937077e-05, "loss": 0.9792, "step": 440 }, { "epoch": 0.41, "learning_rate": 1.4748352521159492e-05, "loss": 0.9769, "step": 445 }, { "epoch": 0.41, "learning_rate": 1.4606830562448692e-05, "loss": 0.9878, "step": 450 }, { "epoch": 0.42, "learning_rate": 1.4464127481899312e-05, "loss": 1.0, "step": 455 }, { "epoch": 0.42, "learning_rate": 1.4320279866427798e-05, "loss": 0.9789, "step": 460 }, { "epoch": 0.43, "learning_rate": 1.4175324596392075e-05, "loss": 0.9832, "step": 465 }, { "epoch": 0.43, "learning_rate": 1.402929883613599e-05, "loss": 0.9766, "step": 470 }, { "epoch": 0.44, "learning_rate": 1.3882240024460928e-05, "loss": 0.9861, "step": 475 }, { "epoch": 0.44, "learning_rate": 1.3734185865027061e-05, "loss": 0.9832, "step": 480 }, { "epoch": 0.44, "learning_rate": 1.358517431668672e-05, "loss": 0.9832, "step": 485 }, { "epoch": 0.45, "learning_rate": 1.3435243583752294e-05, "loss": 0.9752, "step": 490 }, { "epoch": 0.45, "learning_rate": 1.3284432106201233e-05, "loss": 0.9799, "step": 495 }, { "epoch": 0.46, "learning_rate": 1.313277854982062e-05, "loss": 0.9579, "step": 500 }, { "epoch": 0.46, "learning_rate": 1.2980321796293838e-05, "loss": 0.9886, "step": 505 }, { "epoch": 0.47, "learning_rate": 1.2827100933231904e-05, "loss": 0.9696, "step": 510 }, { "epoch": 0.47, "learning_rate": 1.2673155244151985e-05, "loss": 0.9798, "step": 515 }, { "epoch": 0.48, "learning_rate": 1.2518524198405699e-05, "loss": 0.9663, "step": 520 }, { "epoch": 0.48, "learning_rate": 1.2363247441059775e-05, "loss": 0.9711, "step": 525 }, { "epoch": 0.49, "learning_rate": 1.2207364782731657e-05, "loss": 0.98, "step": 530 }, { "epoch": 0.49, "learning_rate": 1.2050916189382646e-05, "loss": 0.9636, "step": 535 }, { "epoch": 0.5, "learning_rate": 1.189394177207125e-05, "loss": 0.9826, "step": 540 }, { "epoch": 0.5, "learning_rate": 1.1736481776669307e-05, "loss": 0.9807, "step": 545 }, { "epoch": 0.5, "learning_rate": 1.1578576573543541e-05, "loss": 0.9614, "step": 550 }, { "epoch": 0.51, "learning_rate": 1.1420266647205232e-05, "loss": 0.9692, "step": 555 }, { "epoch": 0.51, "learning_rate": 1.1261592585930576e-05, "loss": 0.9877, "step": 560 }, { "epoch": 0.52, "learning_rate": 1.1102595071354471e-05, "loss": 0.9449, "step": 565 }, { "epoch": 0.52, "learning_rate": 1.0943314868040365e-05, "loss": 0.9583, "step": 570 }, { "epoch": 0.53, "learning_rate": 1.0783792813028828e-05, "loss": 0.9779, "step": 575 }, { "epoch": 0.53, "learning_rate": 1.0624069805367558e-05, "loss": 0.9404, "step": 580 }, { "epoch": 0.54, "learning_rate": 1.0464186795625481e-05, "loss": 0.9636, "step": 585 }, { "epoch": 0.54, "learning_rate": 1.0304184775393642e-05, "loss": 0.9574, "step": 590 }, { "epoch": 0.55, "learning_rate": 1.0144104766775574e-05, "loss": 0.9702, "step": 595 }, { "epoch": 0.55, "learning_rate": 9.983987811869863e-06, "loss": 0.9671, "step": 600 }, { "epoch": 0.56, "learning_rate": 9.823874962247565e-06, "loss": 0.9655, "step": 605 }, { "epoch": 0.56, "learning_rate": 9.663807268427197e-06, "loss": 0.9518, "step": 610 }, { "epoch": 0.56, "learning_rate": 9.503825769350016e-06, "loss": 0.9739, "step": 615 }, { "epoch": 0.57, "learning_rate": 9.343971481858246e-06, "loss": 0.949, "step": 620 }, { "epoch": 0.57, "learning_rate": 9.184285390178978e-06, "loss": 0.9641, "step": 625 }, { "epoch": 0.58, "learning_rate": 9.024808435416435e-06, "loss": 0.9431, "step": 630 }, { "epoch": 0.58, "learning_rate": 8.865581505055292e-06, "loss": 0.9504, "step": 635 }, { "epoch": 0.59, "learning_rate": 8.706645422477739e-06, "loss": 0.962, "step": 640 }, { "epoch": 0.59, "learning_rate": 8.548040936496989e-06, "loss": 0.9456, "step": 645 }, { "epoch": 0.6, "learning_rate": 8.389808710909881e-06, "loss": 0.9738, "step": 650 }, { "epoch": 0.6, "learning_rate": 8.231989314071318e-06, "loss": 0.9611, "step": 655 }, { "epoch": 0.61, "learning_rate": 8.07462320849313e-06, "loss": 0.9601, "step": 660 }, { "epoch": 0.61, "learning_rate": 7.917750740470116e-06, "loss": 0.9699, "step": 665 }, { "epoch": 0.61, "learning_rate": 7.761412129735853e-06, "loss": 0.9627, "step": 670 }, { "epoch": 0.62, "learning_rate": 7.605647459150961e-06, "loss": 0.9486, "step": 675 }, { "epoch": 0.62, "learning_rate": 7.4504966644264775e-06, "loss": 0.9484, "step": 680 }, { "epoch": 0.63, "learning_rate": 7.295999523884921e-06, "loss": 0.9425, "step": 685 }, { "epoch": 0.63, "learning_rate": 7.142195648261747e-06, "loss": 0.9648, "step": 690 }, { "epoch": 0.64, "learning_rate": 6.989124470549746e-06, "loss": 0.9604, "step": 695 }, { "epoch": 0.64, "learning_rate": 6.83682523588902e-06, "loss": 0.9428, "step": 700 }, { "epoch": 0.65, "learning_rate": 6.685336991505122e-06, "loss": 0.9575, "step": 705 }, { "epoch": 0.65, "learning_rate": 6.5346985766979384e-06, "loss": 0.9501, "step": 710 }, { "epoch": 0.66, "learning_rate": 6.384948612883872e-06, "loss": 0.9475, "step": 715 }, { "epoch": 0.66, "learning_rate": 6.2361254936939e-06, "loss": 0.9386, "step": 720 }, { "epoch": 0.67, "learning_rate": 6.0882673751300235e-06, "loss": 0.9501, "step": 725 }, { "epoch": 0.67, "learning_rate": 5.941412165782645e-06, "loss": 0.9488, "step": 730 }, { "epoch": 0.67, "learning_rate": 5.79559751711138e-06, "loss": 0.9351, "step": 735 }, { "epoch": 0.68, "learning_rate": 5.650860813791786e-06, "loss": 0.9444, "step": 740 }, { "epoch": 0.68, "learning_rate": 5.507239164130501e-06, "loss": 0.9524, "step": 745 }, { "epoch": 0.69, "learning_rate": 5.364769390551225e-06, "loss": 0.9511, "step": 750 }, { "epoch": 0.69, "learning_rate": 5.223488020154028e-06, "loss": 0.9462, "step": 755 }, { "epoch": 0.7, "learning_rate": 5.083431275350312e-06, "loss": 0.9397, "step": 760 }, { "epoch": 0.7, "learning_rate": 4.9446350645759885e-06, "loss": 0.9469, "step": 765 }, { "epoch": 0.71, "learning_rate": 4.807134973085036e-06, "loss": 0.9439, "step": 770 }, { "epoch": 0.71, "learning_rate": 4.670966253826027e-06, "loss": 0.9502, "step": 775 }, { "epoch": 0.72, "learning_rate": 4.53616381840377e-06, "loss": 0.9336, "step": 780 }, { "epoch": 0.72, "learning_rate": 4.402762228128531e-06, "loss": 0.9408, "step": 785 }, { "epoch": 0.72, "learning_rate": 4.270795685155001e-06, "loss": 0.9483, "step": 790 }, { "epoch": 0.73, "learning_rate": 4.140298023713416e-06, "loss": 0.9289, "step": 795 }, { "epoch": 0.73, "learning_rate": 4.0113027014349374e-06, "loss": 0.9439, "step": 800 }, { "epoch": 0.74, "learning_rate": 3.883842790773647e-06, "loss": 0.9325, "step": 805 }, { "epoch": 0.74, "learning_rate": 3.757950970527249e-06, "loss": 0.9364, "step": 810 }, { "epoch": 0.75, "learning_rate": 3.633659517458736e-06, "loss": 0.9311, "step": 815 }, { "epoch": 0.75, "learning_rate": 3.511000298021098e-06, "loss": 0.9496, "step": 820 }, { "epoch": 0.76, "learning_rate": 3.39000476018726e-06, "loss": 0.9441, "step": 825 }, { "epoch": 0.76, "learning_rate": 3.2707039253872796e-06, "loss": 0.9352, "step": 830 }, { "epoch": 0.77, "learning_rate": 3.153128380554941e-06, "loss": 0.9418, "step": 835 }, { "epoch": 0.77, "learning_rate": 3.037308270285709e-06, "loss": 0.9204, "step": 840 }, { "epoch": 0.78, "learning_rate": 2.923273289108115e-06, "loss": 0.9462, "step": 845 }, { "epoch": 0.78, "learning_rate": 2.8110526738705345e-06, "loss": 0.9368, "step": 850 }, { "epoch": 0.78, "learning_rate": 2.700675196245288e-06, "loss": 0.9467, "step": 855 }, { "epoch": 0.79, "learning_rate": 2.592169155352031e-06, "loss": 0.9157, "step": 860 }, { "epoch": 0.79, "learning_rate": 2.485562370502279e-06, "loss": 0.9283, "step": 865 }, { "epoch": 0.8, "learning_rate": 2.3808821740669608e-06, "loss": 0.9294, "step": 870 }, { "epoch": 0.8, "learning_rate": 2.2781554044688015e-06, "loss": 0.9404, "step": 875 }, { "epoch": 0.81, "learning_rate": 2.1774083993013715e-06, "loss": 0.9363, "step": 880 }, { "epoch": 0.81, "learning_rate": 2.0786669885765044e-06, "loss": 0.9344, "step": 885 }, { "epoch": 0.82, "learning_rate": 1.981956488101898e-06, "loss": 0.9245, "step": 890 }, { "epoch": 0.82, "learning_rate": 1.8873016929904942e-06, "loss": 0.9335, "step": 895 }, { "epoch": 0.83, "learning_rate": 1.7947268713034128e-06, "loss": 0.9481, "step": 900 }, { "epoch": 0.83, "learning_rate": 1.704255757827963e-06, "loss": 0.9352, "step": 905 }, { "epoch": 0.83, "learning_rate": 1.6159115479924259e-06, "loss": 0.9238, "step": 910 }, { "epoch": 0.84, "learning_rate": 1.529716891919074e-06, "loss": 0.9213, "step": 915 }, { "epoch": 0.84, "learning_rate": 1.4456938886170413e-06, "loss": 0.938, "step": 920 }, { "epoch": 0.85, "learning_rate": 1.3638640803164516e-06, "loss": 0.9248, "step": 925 }, { "epoch": 0.85, "learning_rate": 1.2842484469453365e-06, "loss": 0.9157, "step": 930 }, { "epoch": 0.86, "learning_rate": 1.2068674007506787e-06, "loss": 0.9289, "step": 935 }, { "epoch": 0.86, "learning_rate": 1.1317407810650372e-06, "loss": 0.9215, "step": 940 }, { "epoch": 0.87, "learning_rate": 1.0588878492200261e-06, "loss": 0.9343, "step": 945 }, { "epoch": 0.87, "learning_rate": 9.883272836080116e-07, "loss": 0.9294, "step": 950 }, { "epoch": 0.88, "learning_rate": 9.200771748932513e-07, "loss": 0.9419, "step": 955 }, { "epoch": 0.88, "learning_rate": 8.541550213737171e-07, "loss": 0.924, "step": 960 }, { "epoch": 0.89, "learning_rate": 7.905777244947954e-07, "loss": 0.9194, "step": 965 }, { "epoch": 0.89, "learning_rate": 7.293615845160196e-07, "loss": 0.9124, "step": 970 }, { "epoch": 0.89, "learning_rate": 6.705222963319191e-07, "loss": 0.9282, "step": 975 }, { "epoch": 0.9, "learning_rate": 6.140749454480932e-07, "loss": 0.9384, "step": 980 }, { "epoch": 0.9, "learning_rate": 5.600340041135133e-07, "loss": 0.9259, "step": 985 }, { "epoch": 0.91, "learning_rate": 5.0841332761005e-07, "loss": 0.9277, "step": 990 }, { "epoch": 0.91, "learning_rate": 4.592261507001994e-07, "loss": 0.925, "step": 995 }, { "epoch": 0.92, "learning_rate": 4.124850842338779e-07, "loss": 0.9253, "step": 1000 }, { "epoch": 0.92, "learning_rate": 3.6820211191520127e-07, "loss": 0.9235, "step": 1005 }, { "epoch": 0.93, "learning_rate": 3.263885872300343e-07, "loss": 0.9086, "step": 1010 }, { "epoch": 0.93, "learning_rate": 2.870552305351382e-07, "loss": 0.9292, "step": 1015 }, { "epoch": 0.94, "learning_rate": 2.5021212630962246e-07, "loss": 0.9329, "step": 1020 }, { "epoch": 0.94, "learning_rate": 2.158687205694443e-07, "loss": 0.9214, "step": 1025 }, { "epoch": 0.94, "learning_rate": 1.840338184455881e-07, "loss": 0.9144, "step": 1030 }, { "epoch": 0.95, "learning_rate": 1.5471558192656776e-07, "loss": 0.923, "step": 1035 }, { "epoch": 0.95, "learning_rate": 1.279215277658097e-07, "loss": 0.9123, "step": 1040 }, { "epoch": 0.96, "learning_rate": 1.0365852555447642e-07, "loss": 0.9406, "step": 1045 }, { "epoch": 0.96, "learning_rate": 8.19327959602012e-08, "loss": 0.9364, "step": 1050 }, { "epoch": 0.97, "learning_rate": 6.274990913221035e-08, "loss": 0.9204, "step": 1055 }, { "epoch": 0.97, "learning_rate": 4.6114783273213395e-08, "loss": 0.9214, "step": 1060 }, { "epoch": 0.98, "learning_rate": 3.203168337845508e-08, "loss": 0.9126, "step": 1065 }, { "epoch": 0.98, "learning_rate": 2.05042201422323e-08, "loss": 0.9267, "step": 1070 }, { "epoch": 0.99, "learning_rate": 1.1535349032167908e-08, "loss": 0.9244, "step": 1075 }, { "epoch": 0.99, "learning_rate": 5.127369531473525e-09, "loss": 0.9455, "step": 1080 }, { "epoch": 1.0, "learning_rate": 1.2819245493955746e-09, "loss": 0.9161, "step": 1085 }, { "epoch": 1.0, "learning_rate": 0.0, "loss": 0.9077, "step": 1090 }, { "epoch": 1.0, "eval_loss": 0.9354520440101624, "eval_runtime": 142.5652, "eval_samples_per_second": 108.238, "eval_steps_per_second": 1.697, "step": 1090 }, { "epoch": 1.0, "step": 1090, "total_flos": 456447649382400.0, "train_loss": 0.977718904259008, "train_runtime": 5540.1569, "train_samples_per_second": 25.166, "train_steps_per_second": 0.197 } ], "logging_steps": 5, "max_steps": 1090, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "total_flos": 456447649382400.0, "train_batch_size": 16, "trial_name": null, "trial_params": null }