{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 1.96,
  "eval_steps": 500,
  "global_step": 30,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.128,
      "grad_norm": 13.701993942260742,
      "learning_rate": 0.0001866666666666667,
      "loss": 68.1376,
      "step": 2
    },
    {
      "epoch": 0.256,
      "grad_norm": 14.573441505432129,
      "learning_rate": 0.00017333333333333334,
      "loss": 63.6084,
      "step": 4
    },
    {
      "epoch": 0.384,
      "grad_norm": 12.23367977142334,
      "learning_rate": 0.00016,
      "loss": 60.5103,
      "step": 6
    },
    {
      "epoch": 0.512,
      "grad_norm": 11.008517265319824,
      "learning_rate": 0.00014666666666666666,
      "loss": 58.3959,
      "step": 8
    },
    {
      "epoch": 0.64,
      "grad_norm": 9.491394996643066,
      "learning_rate": 0.00013333333333333334,
      "loss": 57.2558,
      "step": 10
    },
    {
      "epoch": 0.768,
      "grad_norm": 7.618191719055176,
      "learning_rate": 0.00012,
      "loss": 55.359,
      "step": 12
    },
    {
      "epoch": 0.896,
      "grad_norm": 6.041190147399902,
      "learning_rate": 0.00010666666666666667,
      "loss": 55.4167,
      "step": 14
    },
    {
      "epoch": 1.064,
      "grad_norm": 11.27497673034668,
      "learning_rate": 9.333333333333334e-05,
      "loss": 70.263,
      "step": 16
    },
    {
      "epoch": 1.192,
      "grad_norm": 7.6353678703308105,
      "learning_rate": 8e-05,
      "loss": 55.4048,
      "step": 18
    },
    {
      "epoch": 1.32,
      "grad_norm": 8.388099670410156,
      "learning_rate": 6.666666666666667e-05,
      "loss": 55.5403,
      "step": 20
    },
    {
      "epoch": 1.448,
      "grad_norm": 5.153501987457275,
      "learning_rate": 5.333333333333333e-05,
      "loss": 54.9104,
      "step": 22
    },
    {
      "epoch": 1.576,
      "grad_norm": 6.167812824249268,
      "learning_rate": 4e-05,
      "loss": 55.303,
      "step": 24
    },
    {
      "epoch": 1.704,
      "grad_norm": 5.262896537780762,
      "learning_rate": 2.6666666666666667e-05,
      "loss": 54.783,
      "step": 26
    },
    {
      "epoch": 1.8319999999999999,
      "grad_norm": 4.895148277282715,
      "learning_rate": 1.3333333333333333e-05,
      "loss": 53.8364,
      "step": 28
    },
    {
      "epoch": 1.96,
      "grad_norm": 9.489398956298828,
      "learning_rate": 0.0,
      "loss": 55.1209,
      "step": 30
    },
    {
      "epoch": 1.96,
      "step": 30,
      "total_flos": 470062485745920.0,
      "train_loss": 58.2563720703125,
      "train_runtime": 1682.7296,
      "train_samples_per_second": 0.594,
      "train_steps_per_second": 0.018
    }
  ],
  "logging_steps": 2,
  "max_steps": 30,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 2,
  "save_steps": 500,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 470062485745920.0,
  "train_batch_size": 4,
  "trial_name": null,
  "trial_params": null
}