{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 0.04523181304183943,
  "eval_steps": 500,
  "global_step": 200,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.0011307953260459858,
      "grad_norm": 0.205332413315773,
      "learning_rate": 2.5e-06,
      "loss": 0.9778,
      "step": 5
    },
    {
      "epoch": 0.0022615906520919715,
      "grad_norm": 0.2380959391593933,
      "learning_rate": 5e-06,
      "loss": 0.9816,
      "step": 10
    },
    {
      "epoch": 0.003392385978137957,
      "grad_norm": 0.22828762233257294,
      "learning_rate": 7.5e-06,
      "loss": 1.0123,
      "step": 15
    },
    {
      "epoch": 0.004523181304183943,
      "grad_norm": 0.1957542896270752,
      "learning_rate": 1e-05,
      "loss": 0.9404,
      "step": 20
    },
    {
      "epoch": 0.005653976630229929,
      "grad_norm": 0.2502771019935608,
      "learning_rate": 1.25e-05,
      "loss": 0.9604,
      "step": 25
    },
    {
      "epoch": 0.006784771956275914,
      "grad_norm": 0.24806493520736694,
      "learning_rate": 1.5e-05,
      "loss": 1.0407,
      "step": 30
    },
    {
      "epoch": 0.0079155672823219,
      "grad_norm": 0.28463977575302124,
      "learning_rate": 1.75e-05,
      "loss": 1.0461,
      "step": 35
    },
    {
      "epoch": 0.009046362608367886,
      "grad_norm": 0.2142462134361267,
      "learning_rate": 2e-05,
      "loss": 0.9104,
      "step": 40
    },
    {
      "epoch": 0.010177157934413872,
      "grad_norm": 0.21732334792613983,
      "learning_rate": 2.25e-05,
      "loss": 0.8991,
      "step": 45
    },
    {
      "epoch": 0.011307953260459858,
      "grad_norm": 0.2227325588464737,
      "learning_rate": 2.5e-05,
      "loss": 0.8901,
      "step": 50
    },
    {
      "epoch": 0.012438748586505842,
      "grad_norm": 0.19881105422973633,
      "learning_rate": 2.7500000000000004e-05,
      "loss": 0.8378,
      "step": 55
    },
    {
      "epoch": 0.013569543912551827,
      "grad_norm": 0.21935518085956573,
      "learning_rate": 3e-05,
      "loss": 0.8743,
      "step": 60
    },
    {
      "epoch": 0.014700339238597813,
      "grad_norm": 0.21730449795722961,
      "learning_rate": 3.2500000000000004e-05,
      "loss": 0.8588,
      "step": 65
    },
    {
      "epoch": 0.0158311345646438,
      "grad_norm": 0.23200418055057526,
      "learning_rate": 3.5e-05,
      "loss": 0.7527,
      "step": 70
    },
    {
      "epoch": 0.016961929890689786,
      "grad_norm": 0.20900775492191315,
      "learning_rate": 3.7500000000000003e-05,
      "loss": 0.8365,
      "step": 75
    },
    {
      "epoch": 0.018092725216735772,
      "grad_norm": 0.31192561984062195,
      "learning_rate": 4e-05,
      "loss": 0.7791,
      "step": 80
    },
    {
      "epoch": 0.019223520542781758,
      "grad_norm": 0.25915804505348206,
      "learning_rate": 4.25e-05,
      "loss": 0.8506,
      "step": 85
    },
    {
      "epoch": 0.020354315868827744,
      "grad_norm": 0.20527321100234985,
      "learning_rate": 4.5e-05,
      "loss": 0.8062,
      "step": 90
    },
    {
      "epoch": 0.02148511119487373,
      "grad_norm": 0.2385016530752182,
      "learning_rate": 4.75e-05,
      "loss": 0.7525,
      "step": 95
    },
    {
      "epoch": 0.022615906520919715,
      "grad_norm": 0.2394818663597107,
      "learning_rate": 5e-05,
      "loss": 0.7416,
      "step": 100
    },
    {
      "epoch": 0.023746701846965697,
      "grad_norm": 0.269607275724411,
      "learning_rate": 4.999983481113995e-05,
      "loss": 0.7653,
      "step": 105
    },
    {
      "epoch": 0.024877497173011683,
      "grad_norm": 0.21368731558322906,
      "learning_rate": 4.9999339246742786e-05,
      "loss": 0.75,
      "step": 110
    },
    {
      "epoch": 0.02600829249905767,
      "grad_norm": 0.25945496559143066,
      "learning_rate": 4.9998513313357435e-05,
      "loss": 0.7693,
      "step": 115
    },
    {
      "epoch": 0.027139087825103655,
      "grad_norm": 0.2617523968219757,
      "learning_rate": 4.999735702189871e-05,
      "loss": 0.7995,
      "step": 120
    },
    {
      "epoch": 0.02826988315114964,
      "grad_norm": 0.26992905139923096,
      "learning_rate": 4.999587038764713e-05,
      "loss": 0.7784,
      "step": 125
    },
    {
      "epoch": 0.029400678477195626,
      "grad_norm": 0.23823940753936768,
      "learning_rate": 4.999405343024871e-05,
      "loss": 0.7316,
      "step": 130
    },
    {
      "epoch": 0.030531473803241612,
      "grad_norm": 0.2858569920063019,
      "learning_rate": 4.9991906173714756e-05,
      "loss": 0.7796,
      "step": 135
    },
    {
      "epoch": 0.0316622691292876,
      "grad_norm": 0.25298023223876953,
      "learning_rate": 4.99894286464215e-05,
      "loss": 0.7169,
      "step": 140
    },
    {
      "epoch": 0.03279306445533359,
      "grad_norm": 0.35693949460983276,
      "learning_rate": 4.998662088110972e-05,
      "loss": 0.8062,
      "step": 145
    },
    {
      "epoch": 0.03392385978137957,
      "grad_norm": 0.42634308338165283,
      "learning_rate": 4.998348291488435e-05,
      "loss": 0.7035,
      "step": 150
    },
    {
      "epoch": 0.03505465510742556,
      "grad_norm": 0.34167715907096863,
      "learning_rate": 4.998001478921395e-05,
      "loss": 0.7683,
      "step": 155
    },
    {
      "epoch": 0.036185450433471544,
      "grad_norm": 0.2687824070453644,
      "learning_rate": 4.997621654993018e-05,
      "loss": 0.7816,
      "step": 160
    },
    {
      "epoch": 0.03731624575951753,
      "grad_norm": 0.2919199764728546,
      "learning_rate": 4.997208824722719e-05,
      "loss": 0.7392,
      "step": 165
    },
    {
      "epoch": 0.038447041085563516,
      "grad_norm": 0.24317045509815216,
      "learning_rate": 4.9967629935660944e-05,
      "loss": 0.6972,
      "step": 170
    },
    {
      "epoch": 0.0395778364116095,
      "grad_norm": 0.2556512951850891,
      "learning_rate": 4.9962841674148516e-05,
      "loss": 0.7431,
      "step": 175
    },
    {
      "epoch": 0.04070863173765549,
      "grad_norm": 0.35918310284614563,
      "learning_rate": 4.99577235259673e-05,
      "loss": 0.78,
      "step": 180
    },
    {
      "epoch": 0.04183942706370147,
      "grad_norm": 0.28553536534309387,
      "learning_rate": 4.9952275558754185e-05,
      "loss": 0.7467,
      "step": 185
    },
    {
      "epoch": 0.04297022238974746,
      "grad_norm": 0.25147977471351624,
      "learning_rate": 4.994649784450465e-05,
      "loss": 0.7579,
      "step": 190
    },
    {
      "epoch": 0.044101017715793445,
      "grad_norm": 0.3088456690311432,
      "learning_rate": 4.994039045957182e-05,
      "loss": 0.752,
      "step": 195
    },
    {
      "epoch": 0.04523181304183943,
      "grad_norm": 0.32329487800598145,
      "learning_rate": 4.993395348466544e-05,
      "loss": 0.7012,
      "step": 200
    }
  ],
  "logging_steps": 5,
  "max_steps": 4421,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 200,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": false
      },
      "attributes": {}
    }
  },
  "total_flos": 2.1310339876808294e+17,
  "train_batch_size": 1,
  "trial_name": null,
  "trial_params": null
}