{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 1.0,
  "eval_steps": 500,
  "global_step": 761,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.001314060446780552,
      "grad_norm": 0.20481063425540924,
      "learning_rate": 2.5974025974025976e-06,
      "loss": 1.4972,
      "step": 1
    },
    {
      "epoch": 0.006570302233902759,
      "grad_norm": 0.2685558497905731,
      "learning_rate": 1.2987012987012986e-05,
      "loss": 1.4328,
      "step": 5
    },
    {
      "epoch": 0.013140604467805518,
      "grad_norm": 0.15764696896076202,
      "learning_rate": 2.5974025974025972e-05,
      "loss": 1.4448,
      "step": 10
    },
    {
      "epoch": 0.01971090670170828,
      "grad_norm": 0.10870245844125748,
      "learning_rate": 3.8961038961038966e-05,
      "loss": 1.3786,
      "step": 15
    },
    {
      "epoch": 0.026281208935611037,
      "grad_norm": 0.12210144102573395,
      "learning_rate": 5.1948051948051944e-05,
      "loss": 1.3637,
      "step": 20
    },
    {
      "epoch": 0.0328515111695138,
      "grad_norm": 0.14269188046455383,
      "learning_rate": 6.493506493506494e-05,
      "loss": 1.3934,
      "step": 25
    },
    {
      "epoch": 0.03942181340341656,
      "grad_norm": 0.13970889151096344,
      "learning_rate": 7.792207792207793e-05,
      "loss": 1.3698,
      "step": 30
    },
    {
      "epoch": 0.045992115637319315,
      "grad_norm": 0.13595248758792877,
      "learning_rate": 9.090909090909092e-05,
      "loss": 1.3411,
      "step": 35
    },
    {
      "epoch": 0.052562417871222074,
      "grad_norm": 0.12064177542924881,
      "learning_rate": 0.00010389610389610389,
      "loss": 1.3198,
      "step": 40
    },
    {
      "epoch": 0.05913272010512484,
      "grad_norm": 0.13153237104415894,
      "learning_rate": 0.00011688311688311689,
      "loss": 1.3558,
      "step": 45
    },
    {
      "epoch": 0.0657030223390276,
      "grad_norm": 0.1267818659543991,
      "learning_rate": 0.00012987012987012987,
      "loss": 1.3438,
      "step": 50
    },
    {
      "epoch": 0.07227332457293036,
      "grad_norm": 0.14356698095798492,
      "learning_rate": 0.00014285714285714287,
      "loss": 1.2924,
      "step": 55
    },
    {
      "epoch": 0.07884362680683311,
      "grad_norm": 0.12211916595697403,
      "learning_rate": 0.00015584415584415587,
      "loss": 1.2536,
      "step": 60
    },
    {
      "epoch": 0.08541392904073587,
      "grad_norm": 0.12076783180236816,
      "learning_rate": 0.00016883116883116884,
      "loss": 1.2783,
      "step": 65
    },
    {
      "epoch": 0.09198423127463863,
      "grad_norm": 0.13078632950782776,
      "learning_rate": 0.00018181818181818183,
      "loss": 1.3032,
      "step": 70
    },
    {
      "epoch": 0.09855453350854139,
      "grad_norm": 0.13422036170959473,
      "learning_rate": 0.0001948051948051948,
      "loss": 1.2695,
      "step": 75
    },
    {
      "epoch": 0.10512483574244415,
      "grad_norm": 0.12621906399726868,
      "learning_rate": 0.00019999050722505993,
      "loss": 1.304,
      "step": 80
    },
    {
      "epoch": 0.1116951379763469,
      "grad_norm": 0.12420738488435745,
      "learning_rate": 0.00019993250234920636,
      "loss": 1.2315,
      "step": 85
    },
    {
      "epoch": 0.11826544021024968,
      "grad_norm": 0.13390915095806122,
      "learning_rate": 0.000199821796913812,
      "loss": 1.2631,
      "step": 90
    },
    {
      "epoch": 0.12483574244415244,
      "grad_norm": 0.12965141236782074,
      "learning_rate": 0.000199658449300667,
      "loss": 1.2757,
      "step": 95
    },
    {
      "epoch": 0.1314060446780552,
      "grad_norm": 0.12207651138305664,
      "learning_rate": 0.00019944254565302217,
      "loss": 1.2275,
      "step": 100
    },
    {
      "epoch": 0.13797634691195795,
      "grad_norm": 0.12977205216884613,
      "learning_rate": 0.00019917419983016025,
      "loss": 1.2648,
      "step": 105
    },
    {
      "epoch": 0.1445466491458607,
      "grad_norm": 0.11946344375610352,
      "learning_rate": 0.00019885355334735082,
      "loss": 1.2639,
      "step": 110
    },
    {
      "epoch": 0.15111695137976347,
      "grad_norm": 0.12259474396705627,
      "learning_rate": 0.00019848077530122083,
      "loss": 1.2436,
      "step": 115
    },
    {
      "epoch": 0.15768725361366623,
      "grad_norm": 0.12387403845787048,
      "learning_rate": 0.00019805606228057916,
      "loss": 1.2109,
      "step": 120
    },
    {
      "epoch": 0.164257555847569,
      "grad_norm": 0.12060413509607315,
      "learning_rate": 0.00019757963826274357,
      "loss": 1.2373,
      "step": 125
    },
    {
      "epoch": 0.17082785808147175,
      "grad_norm": 0.11977066099643707,
      "learning_rate": 0.00019705175449542358,
      "loss": 1.2104,
      "step": 130
    },
    {
      "epoch": 0.1773981603153745,
      "grad_norm": 0.11596687883138657,
      "learning_rate": 0.00019647268936422206,
      "loss": 1.2233,
      "step": 135
    },
    {
      "epoch": 0.18396846254927726,
      "grad_norm": 0.1179521232843399,
      "learning_rate": 0.0001958427482458253,
      "loss": 1.2743,
      "step": 140
    },
    {
      "epoch": 0.19053876478318002,
      "grad_norm": 0.12631933391094208,
      "learning_rate": 0.0001951622633469592,
      "loss": 1.2922,
      "step": 145
    },
    {
      "epoch": 0.19710906701708278,
      "grad_norm": 0.12004056572914124,
      "learning_rate": 0.00019443159352919623,
      "loss": 1.2317,
      "step": 150
    },
    {
      "epoch": 0.20367936925098554,
      "grad_norm": 0.1243131160736084,
      "learning_rate": 0.0001936511241197055,
      "loss": 1.2963,
      "step": 155
    },
    {
      "epoch": 0.2102496714848883,
      "grad_norm": 0.11765783280134201,
      "learning_rate": 0.00019282126670804614,
      "loss": 1.275,
      "step": 160
    },
    {
      "epoch": 0.21681997371879105,
      "grad_norm": 0.11682084202766418,
      "learning_rate": 0.0001919424589291108,
      "loss": 1.2308,
      "step": 165
    },
    {
      "epoch": 0.2233902759526938,
      "grad_norm": 0.12230474501848221,
      "learning_rate": 0.00019101516423233368,
      "loss": 1.2608,
      "step": 170
    },
    {
      "epoch": 0.22996057818659657,
      "grad_norm": 0.11786483973264694,
      "learning_rate": 0.00019003987163728535,
      "loss": 1.2516,
      "step": 175
    },
    {
      "epoch": 0.23653088042049936,
      "grad_norm": 0.11655290424823761,
      "learning_rate": 0.00018901709547578245,
      "loss": 1.2032,
      "step": 180
    },
    {
      "epoch": 0.24310118265440211,
      "grad_norm": 0.11199444532394409,
      "learning_rate": 0.0001879473751206489,
      "loss": 1.2352,
      "step": 185
    },
    {
      "epoch": 0.24967148488830487,
      "grad_norm": 0.12412846833467484,
      "learning_rate": 0.0001868312747012715,
      "loss": 1.2113,
      "step": 190
    },
    {
      "epoch": 0.25624178712220763,
      "grad_norm": 0.11748431622982025,
      "learning_rate": 0.00018566938280609966,
      "loss": 1.2238,
      "step": 195
    },
    {
      "epoch": 0.2628120893561104,
      "grad_norm": 0.12820030748844147,
      "learning_rate": 0.0001844623121722465,
      "loss": 1.2296,
      "step": 200
    },
    {
      "epoch": 0.26938239159001315,
      "grad_norm": 0.11599507927894592,
      "learning_rate": 0.00018321069936235503,
      "loss": 1.2363,
      "step": 205
    },
    {
      "epoch": 0.2759526938239159,
      "grad_norm": 0.13053999841213226,
      "learning_rate": 0.0001819152044288992,
      "loss": 1.2052,
      "step": 210
    },
    {
      "epoch": 0.28252299605781866,
      "grad_norm": 0.11889121681451797,
      "learning_rate": 0.00018057651056609784,
      "loss": 1.2022,
      "step": 215
    },
    {
      "epoch": 0.2890932982917214,
      "grad_norm": 0.1182330921292305,
      "learning_rate": 0.00017919532374962416,
      "loss": 1.2397,
      "step": 220
    },
    {
      "epoch": 0.2956636005256242,
      "grad_norm": 0.11528757214546204,
      "learning_rate": 0.0001777723723643014,
      "loss": 1.2275,
      "step": 225
    },
    {
      "epoch": 0.30223390275952694,
      "grad_norm": 0.12166588008403778,
      "learning_rate": 0.00017630840681998066,
      "loss": 1.2883,
      "step": 230
    },
    {
      "epoch": 0.3088042049934297,
      "grad_norm": 0.11569472402334213,
      "learning_rate": 0.00017480419915580356,
      "loss": 1.1974,
      "step": 235
    },
    {
      "epoch": 0.31537450722733246,
      "grad_norm": 0.1137891337275505,
      "learning_rate": 0.00017326054263305847,
      "loss": 1.2236,
      "step": 240
    },
    {
      "epoch": 0.3219448094612352,
      "grad_norm": 0.1182858943939209,
      "learning_rate": 0.00017167825131684513,
      "loss": 1.2156,
      "step": 245
    },
    {
      "epoch": 0.328515111695138,
      "grad_norm": 0.11363399028778076,
      "learning_rate": 0.00017005815964676787,
      "loss": 1.2398,
      "step": 250
    },
    {
      "epoch": 0.33508541392904073,
      "grad_norm": 0.11693256348371506,
      "learning_rate": 0.00016840112199688432,
      "loss": 1.2672,
      "step": 255
    },
    {
      "epoch": 0.3416557161629435,
      "grad_norm": 0.10189284384250641,
      "learning_rate": 0.00016670801222514134,
      "loss": 1.2125,
      "step": 260
    },
    {
      "epoch": 0.34822601839684625,
      "grad_norm": 0.11468140780925751,
      "learning_rate": 0.000164979723212536,
      "loss": 1.2938,
      "step": 265
    },
    {
      "epoch": 0.354796320630749,
      "grad_norm": 0.11262702941894531,
      "learning_rate": 0.00016321716639224434,
      "loss": 1.1908,
      "step": 270
    },
    {
      "epoch": 0.36136662286465177,
      "grad_norm": 0.11424372345209122,
      "learning_rate": 0.0001614212712689668,
      "loss": 1.2362,
      "step": 275
    },
    {
      "epoch": 0.3679369250985545,
      "grad_norm": 0.10987983644008636,
      "learning_rate": 0.00015959298492874288,
      "loss": 1.2357,
      "step": 280
    },
    {
      "epoch": 0.3745072273324573,
      "grad_norm": 0.1153380423784256,
      "learning_rate": 0.00015773327153949465,
      "loss": 1.2273,
      "step": 285
    },
    {
      "epoch": 0.38107752956636004,
      "grad_norm": 0.1153273954987526,
      "learning_rate": 0.0001558431118425614,
      "loss": 1.2516,
      "step": 290
    },
    {
      "epoch": 0.3876478318002628,
      "grad_norm": 0.1217270940542221,
      "learning_rate": 0.0001539235026354946,
      "loss": 1.2697,
      "step": 295
    },
    {
      "epoch": 0.39421813403416556,
      "grad_norm": 0.1220201700925827,
      "learning_rate": 0.00015197545624638504,
      "loss": 1.2384,
      "step": 300
    },
    {
      "epoch": 0.4007884362680683,
      "grad_norm": 0.10586865991353989,
      "learning_rate": 0.00015000000000000001,
      "loss": 1.1821,
      "step": 305
    },
    {
      "epoch": 0.4073587385019711,
      "grad_norm": 0.12206688523292542,
      "learning_rate": 0.00014799817567601157,
      "loss": 1.2562,
      "step": 310
    },
    {
      "epoch": 0.41392904073587383,
      "grad_norm": 0.1146216094493866,
      "learning_rate": 0.00014597103895960226,
      "loss": 1.2326,
      "step": 315
    },
    {
      "epoch": 0.4204993429697766,
      "grad_norm": 0.11471971124410629,
      "learning_rate": 0.00014391965888473703,
      "loss": 1.2526,
      "step": 320
    },
    {
      "epoch": 0.42706964520367935,
      "grad_norm": 0.11247406154870987,
      "learning_rate": 0.00014184511727039612,
      "loss": 1.2698,
      "step": 325
    },
    {
      "epoch": 0.4336399474375821,
      "grad_norm": 0.11814497411251068,
      "learning_rate": 0.00013974850815006503,
      "loss": 1.1875,
      "step": 330
    },
    {
      "epoch": 0.44021024967148487,
      "grad_norm": 0.11835132539272308,
      "learning_rate": 0.00013763093719478358,
      "loss": 1.2311,
      "step": 335
    },
    {
      "epoch": 0.4467805519053876,
      "grad_norm": 0.1311633437871933,
      "learning_rate": 0.00013549352113005728,
      "loss": 1.2106,
      "step": 340
    },
    {
      "epoch": 0.4533508541392904,
      "grad_norm": 0.12060287594795227,
      "learning_rate": 0.00013333738714693956,
      "loss": 1.1733,
      "step": 345
    },
    {
      "epoch": 0.45992115637319314,
      "grad_norm": 0.115968257188797,
      "learning_rate": 0.00013116367230759415,
      "loss": 1.1933,
      "step": 350
    },
    {
      "epoch": 0.4664914586070959,
      "grad_norm": 0.1167786568403244,
      "learning_rate": 0.0001289735229456525,
      "loss": 1.2408,
      "step": 355
    },
    {
      "epoch": 0.4730617608409987,
      "grad_norm": 0.1175895482301712,
      "learning_rate": 0.00012676809406168133,
      "loss": 1.2432,
      "step": 360
    },
    {
      "epoch": 0.47963206307490147,
      "grad_norm": 0.1053074523806572,
      "learning_rate": 0.00012454854871407994,
      "loss": 1.1954,
      "step": 365
    },
    {
      "epoch": 0.48620236530880423,
      "grad_norm": 0.1283629983663559,
      "learning_rate": 0.00012231605740572766,
      "loss": 1.23,
      "step": 370
    },
    {
      "epoch": 0.492772667542707,
      "grad_norm": 0.11125301569700241,
      "learning_rate": 0.00012007179746670592,
      "loss": 1.1961,
      "step": 375
    },
    {
      "epoch": 0.49934296977660975,
      "grad_norm": 0.12729716300964355,
      "learning_rate": 0.00011781695243341932,
      "loss": 1.2646,
      "step": 380
    },
    {
      "epoch": 0.5059132720105125,
      "grad_norm": 0.11228667199611664,
      "learning_rate": 0.00011555271142444433,
      "loss": 1.2453,
      "step": 385
    },
    {
      "epoch": 0.5124835742444153,
      "grad_norm": 0.11379769444465637,
      "learning_rate": 0.00011328026851343367,
      "loss": 1.2444,
      "step": 390
    },
    {
      "epoch": 0.519053876478318,
      "grad_norm": 0.11798006296157837,
      "learning_rate": 0.00011100082209940795,
      "loss": 1.2136,
      "step": 395
    },
    {
      "epoch": 0.5256241787122208,
      "grad_norm": 0.11399506032466888,
      "learning_rate": 0.00010871557427476583,
      "loss": 1.188,
      "step": 400
    },
    {
      "epoch": 0.5321944809461235,
      "grad_norm": 0.11886433511972427,
      "learning_rate": 0.00010642573019134703,
      "loss": 1.2405,
      "step": 405
    },
    {
      "epoch": 0.5387647831800263,
      "grad_norm": 0.11915737390518188,
      "learning_rate": 0.00010413249742488131,
      "loss": 1.2365,
      "step": 410
    },
    {
      "epoch": 0.545335085413929,
      "grad_norm": 0.1189686506986618,
      "learning_rate": 0.00010183708533815974,
      "loss": 1.2662,
      "step": 415
    },
    {
      "epoch": 0.5519053876478318,
      "grad_norm": 0.11237179487943649,
      "learning_rate": 9.954070444326293e-05,
      "loss": 1.2118,
      "step": 420
    },
    {
      "epoch": 0.5584756898817346,
      "grad_norm": 0.11251773685216904,
      "learning_rate": 9.724456576318381e-05,
      "loss": 1.2266,
      "step": 425
    },
    {
      "epoch": 0.5650459921156373,
      "grad_norm": 0.1128716990351677,
      "learning_rate": 9.49498801931804e-05,
      "loss": 1.2424,
      "step": 430
    },
    {
      "epoch": 0.5716162943495401,
      "grad_norm": 0.11235509812831879,
      "learning_rate": 9.265785786219647e-05,
      "loss": 1.1903,
      "step": 435
    },
    {
      "epoch": 0.5781865965834428,
      "grad_norm": 0.11456587165594101,
      "learning_rate": 9.036970749468584e-05,
      "loss": 1.168,
      "step": 440
    },
    {
      "epoch": 0.5847568988173456,
      "grad_norm": 0.11781252175569534,
      "learning_rate": 8.808663577317764e-05,
      "loss": 1.3052,
      "step": 445
    },
    {
      "epoch": 0.5913272010512484,
      "grad_norm": 0.1393735557794571,
      "learning_rate": 8.580984670191848e-05,
      "loss": 1.1935,
      "step": 450
    },
    {
      "epoch": 0.5978975032851511,
      "grad_norm": 0.11526591330766678,
      "learning_rate": 8.35405409719266e-05,
      "loss": 1.1925,
      "step": 455
    },
    {
      "epoch": 0.6044678055190539,
      "grad_norm": 0.11947058141231537,
      "learning_rate": 8.127991532779401e-05,
      "loss": 1.2224,
      "step": 460
    },
    {
      "epoch": 0.6110381077529566,
      "grad_norm": 0.11296655237674713,
      "learning_rate": 7.902916193656898e-05,
      "loss": 1.1984,
      "step": 465
    },
    {
      "epoch": 0.6176084099868594,
      "grad_norm": 0.11227685958147049,
      "learning_rate": 7.678946775905324e-05,
      "loss": 1.1633,
      "step": 470
    },
    {
      "epoch": 0.6241787122207622,
      "grad_norm": 0.11681170761585236,
      "learning_rate": 7.456201392384436e-05,
      "loss": 1.2832,
      "step": 475
    },
    {
      "epoch": 0.6307490144546649,
      "grad_norm": 0.11576228588819504,
      "learning_rate": 7.234797510445411e-05,
      "loss": 1.1869,
      "step": 480
    },
    {
      "epoch": 0.6373193166885677,
      "grad_norm": 0.11033356189727783,
      "learning_rate": 7.014851889983057e-05,
      "loss": 1.2293,
      "step": 485
    },
    {
      "epoch": 0.6438896189224704,
      "grad_norm": 0.12529700994491577,
      "learning_rate": 6.79648052186115e-05,
      "loss": 1.2044,
      "step": 490
    },
    {
      "epoch": 0.6504599211563732,
      "grad_norm": 0.11405730992555618,
      "learning_rate": 6.579798566743314e-05,
      "loss": 1.2509,
      "step": 495
    },
    {
      "epoch": 0.657030223390276,
      "grad_norm": 0.11706886440515518,
      "learning_rate": 6.3649202943617e-05,
      "loss": 1.1843,
      "step": 500
    },
    {
      "epoch": 0.6636005256241787,
      "grad_norm": 0.11757861822843552,
      "learning_rate": 6.151959023255545e-05,
      "loss": 1.1951,
      "step": 505
    },
    {
      "epoch": 0.6701708278580815,
      "grad_norm": 0.1282133162021637,
      "learning_rate": 5.941027061011303e-05,
      "loss": 1.1663,
      "step": 510
    },
    {
      "epoch": 0.6767411300919842,
      "grad_norm": 0.11773235350847244,
      "learning_rate": 5.732235645035964e-05,
      "loss": 1.1371,
      "step": 515
    },
    {
      "epoch": 0.683311432325887,
      "grad_norm": 0.1168578639626503,
      "learning_rate": 5.52569488389472e-05,
      "loss": 1.2345,
      "step": 520
    },
    {
      "epoch": 0.6898817345597897,
      "grad_norm": 0.14125367999076843,
      "learning_rate": 5.321513699243924e-05,
      "loss": 1.2284,
      "step": 525
    },
    {
      "epoch": 0.6964520367936925,
      "grad_norm": 0.11345361173152924,
      "learning_rate": 5.1197997683900214e-05,
      "loss": 1.2032,
      "step": 530
    },
    {
      "epoch": 0.7030223390275953,
      "grad_norm": 0.11965469270944595,
      "learning_rate": 4.920659467504659e-05,
      "loss": 1.2142,
      "step": 535
    },
    {
      "epoch": 0.709592641261498,
      "grad_norm": 0.11439390480518341,
      "learning_rate": 4.7241978155259925e-05,
      "loss": 1.2083,
      "step": 540
    },
    {
      "epoch": 0.7161629434954008,
      "grad_norm": 0.11237988620996475,
      "learning_rate": 4.530518418775733e-05,
      "loss": 1.2324,
      "step": 545
    },
    {
      "epoch": 0.7227332457293035,
      "grad_norm": 0.1146211326122284,
      "learning_rate": 4.3397234163211483e-05,
      "loss": 1.1968,
      "step": 550
    },
    {
      "epoch": 0.7293035479632063,
      "grad_norm": 0.11229883134365082,
      "learning_rate": 4.151913426110864e-05,
      "loss": 1.2254,
      "step": 555
    },
    {
      "epoch": 0.735873850197109,
      "grad_norm": 0.11867476254701614,
      "learning_rate": 3.967187491912813e-05,
      "loss": 1.1785,
      "step": 560
    },
    {
      "epoch": 0.7424441524310118,
      "grad_norm": 0.1171465814113617,
      "learning_rate": 3.7856430310823545e-05,
      "loss": 1.1859,
      "step": 565
    },
    {
      "epoch": 0.7490144546649146,
      "grad_norm": 0.11713968962430954,
      "learning_rate": 3.607375783188125e-05,
      "loss": 1.2049,
      "step": 570
    },
    {
      "epoch": 0.7555847568988173,
      "grad_norm": 0.11092919111251831,
      "learning_rate": 3.4324797595226565e-05,
      "loss": 1.2334,
      "step": 575
    },
    {
      "epoch": 0.7621550591327201,
      "grad_norm": 0.11737989634275436,
      "learning_rate": 3.261047193524439e-05,
      "loss": 1.2249,
      "step": 580
    },
    {
      "epoch": 0.7687253613666228,
      "grad_norm": 0.11774250119924545,
      "learning_rate": 3.093168492137557e-05,
      "loss": 1.2134,
      "step": 585
    },
    {
      "epoch": 0.7752956636005256,
      "grad_norm": 0.11658301949501038,
      "learning_rate": 2.9289321881345254e-05,
      "loss": 1.2749,
      "step": 590
    },
    {
      "epoch": 0.7818659658344284,
      "grad_norm": 0.11728201061487198,
      "learning_rate": 2.7684248934275325e-05,
      "loss": 1.2288,
      "step": 595
    },
    {
      "epoch": 0.7884362680683311,
      "grad_norm": 0.11490175873041153,
      "learning_rate": 2.6117312533926362e-05,
      "loss": 1.1958,
      "step": 600
    },
    {
      "epoch": 0.7950065703022339,
      "grad_norm": 0.12242759019136429,
      "learning_rate": 2.4589339022310386e-05,
      "loss": 1.2922,
      "step": 605
    },
    {
      "epoch": 0.8015768725361366,
      "grad_norm": 0.11296830326318741,
      "learning_rate": 2.3101134193910024e-05,
      "loss": 1.1516,
      "step": 610
    },
    {
      "epoch": 0.8081471747700394,
      "grad_norm": 0.11919889599084854,
      "learning_rate": 2.165348287073339e-05,
      "loss": 1.1924,
      "step": 615
    },
    {
      "epoch": 0.8147174770039421,
      "grad_norm": 0.14579623937606812,
      "learning_rate": 2.02471484884291e-05,
      "loss": 1.2701,
      "step": 620
    },
    {
      "epoch": 0.8212877792378449,
      "grad_norm": 0.13354499638080597,
      "learning_rate": 1.888287269367979e-05,
      "loss": 1.1735,
      "step": 625
    },
    {
      "epoch": 0.8278580814717477,
      "grad_norm": 0.12233620136976242,
      "learning_rate": 1.756137495308594e-05,
      "loss": 1.2578,
      "step": 630
    },
    {
      "epoch": 0.8344283837056504,
      "grad_norm": 0.11252877116203308,
      "learning_rate": 1.6283352173747145e-05,
      "loss": 1.1774,
      "step": 635
    },
    {
      "epoch": 0.8409986859395532,
      "grad_norm": 0.145619198679924,
      "learning_rate": 1.5049478335739886e-05,
      "loss": 1.1973,
      "step": 640
    },
    {
      "epoch": 0.8475689881734559,
      "grad_norm": 0.17927619814872742,
      "learning_rate": 1.3860404136686411e-05,
      "loss": 1.237,
      "step": 645
    },
    {
      "epoch": 0.8541392904073587,
      "grad_norm": 0.1277463138103485,
      "learning_rate": 1.2716756648601857e-05,
      "loss": 1.1906,
      "step": 650
    },
    {
      "epoch": 0.8607095926412615,
      "grad_norm": 0.11752137541770935,
      "learning_rate": 1.1619138987200562e-05,
      "loss": 1.2196,
      "step": 655
    },
    {
      "epoch": 0.8672798948751642,
      "grad_norm": 0.1263829469680786,
      "learning_rate": 1.056812999383604e-05,
      "loss": 1.2017,
      "step": 660
    },
    {
      "epoch": 0.873850197109067,
      "grad_norm": 0.11384302377700806,
      "learning_rate": 9.564283930242257e-06,
      "loss": 1.2104,
      "step": 665
    },
    {
      "epoch": 0.8804204993429697,
      "grad_norm": 0.12480375170707703,
      "learning_rate": 8.608130186237329e-06,
      "loss": 1.2602,
      "step": 670
    },
    {
      "epoch": 0.8869908015768725,
      "grad_norm": 0.1103396862745285,
      "learning_rate": 7.700173000543742e-06,
      "loss": 1.2786,
      "step": 675
    },
    {
      "epoch": 0.8935611038107752,
      "grad_norm": 0.11293721199035645,
      "learning_rate": 6.840891194872112e-06,
      "loss": 1.182,
      "step": 680
    },
    {
      "epoch": 0.900131406044678,
      "grad_norm": 0.11655032634735107,
      "learning_rate": 6.030737921409169e-06,
      "loss": 1.2231,
      "step": 685
    },
    {
      "epoch": 0.9067017082785808,
      "grad_norm": 0.11646699905395508,
      "learning_rate": 5.270140423842607e-06,
      "loss": 1.2144,
      "step": 690
    },
    {
      "epoch": 0.9132720105124835,
      "grad_norm": 0.1152067482471466,
      "learning_rate": 4.559499812049251e-06,
      "loss": 1.2346,
      "step": 695
    },
    {
      "epoch": 0.9198423127463863,
      "grad_norm": 0.1144418865442276,
      "learning_rate": 3.899190850565115e-06,
      "loss": 1.1944,
      "step": 700
    },
    {
      "epoch": 0.926412614980289,
      "grad_norm": 0.12021008878946304,
      "learning_rate": 3.2895617609489336e-06,
      "loss": 1.2049,
      "step": 705
    },
    {
      "epoch": 0.9329829172141918,
      "grad_norm": 0.11006554961204529,
      "learning_rate": 2.730934038143607e-06,
      "loss": 1.1771,
      "step": 710
    },
    {
      "epoch": 0.9395532194480947,
      "grad_norm": 0.12349164485931396,
      "learning_rate": 2.22360228093208e-06,
      "loss": 1.2575,
      "step": 715
    },
    {
      "epoch": 0.9461235216819974,
      "grad_norm": 0.11703501641750336,
      "learning_rate": 1.7678340365772206e-06,
      "loss": 1.2332,
      "step": 720
    },
    {
      "epoch": 0.9526938239159002,
      "grad_norm": 0.12201967090368271,
      "learning_rate": 1.3638696597277679e-06,
      "loss": 1.2461,
      "step": 725
    },
    {
      "epoch": 0.9592641261498029,
      "grad_norm": 0.10865501314401627,
      "learning_rate": 1.0119221856644712e-06,
      "loss": 1.2051,
      "step": 730
    },
    {
      "epoch": 0.9658344283837057,
      "grad_norm": 0.1180211752653122,
      "learning_rate": 7.121772179535135e-07,
      "loss": 1.222,
      "step": 735
    },
    {
      "epoch": 0.9724047306176085,
      "grad_norm": 0.11063376069068909,
      "learning_rate": 4.647928305662852e-07,
      "loss": 1.1723,
      "step": 740
    },
    {
      "epoch": 0.9789750328515112,
      "grad_norm": 0.1198456659913063,
      "learning_rate": 2.6989948451726643e-07,
      "loss": 1.2553,
      "step": 745
    },
    {
      "epoch": 0.985545335085414,
      "grad_norm": 0.1216738149523735,
      "learning_rate": 1.2759995906392874e-07,
      "loss": 1.238,
      "step": 750
    },
    {
      "epoch": 0.9921156373193167,
      "grad_norm": 0.11744906008243561,
      "learning_rate": 3.796929750485845e-08,
      "loss": 1.1762,
      "step": 755
    },
    {
      "epoch": 0.9986859395532195,
      "grad_norm": 0.12220453470945358,
      "learning_rate": 1.0547676048688892e-09,
      "loss": 1.2372,
      "step": 760
    },
    {
      "epoch": 1.0,
      "eval_loss": 1.2166202068328857,
      "eval_runtime": 482.0207,
      "eval_samples_per_second": 27.918,
      "eval_steps_per_second": 1.747,
      "step": 761
    },
    {
      "epoch": 1.0,
      "step": 761,
      "total_flos": 4.2537508369596416e+17,
      "train_loss": 1.2369335692439536,
      "train_runtime": 2094.6914,
      "train_samples_per_second": 5.81,
      "train_steps_per_second": 0.363
    }
  ],
  "logging_steps": 5,
  "max_steps": 761,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 200,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 4.2537508369596416e+17,
  "train_batch_size": 4,
  "trial_name": null,
  "trial_params": null
}