File size: 1,693 Bytes

abc73ca
 
 
 
 
c10fcd0
abc73ca
 
 
 
 
c10fcd0
 
abc73ca
c10fcd0
abc73ca
 
 
c10fcd0
 
abc73ca
c10fcd0
abc73ca
 
 
c10fcd0
 
abc73ca
c10fcd0
abc73ca
 
 
c10fcd0
 
abc73ca
c10fcd0
abc73ca
 
 
c10fcd0
 
abc73ca
c10fcd0
abc73ca
 
 
c10fcd0
 
abc73ca
c10fcd0
abc73ca
 
 
 
c10fcd0
abc73ca
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c10fcd0
abc73ca

{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 6.0,
  "eval_steps": 500,
  "global_step": 1650,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.9090909090909091,
      "grad_norm": 1.031325101852417,
      "learning_rate": 0.0002,
      "loss": 0.8838,
      "step": 250
    },
    {
      "epoch": 1.8181818181818183,
      "grad_norm": 1.0007693767547607,
      "learning_rate": 0.0002,
      "loss": 0.4622,
      "step": 500
    },
    {
      "epoch": 2.7272727272727275,
      "grad_norm": 2.0547432899475098,
      "learning_rate": 0.0002,
      "loss": 0.3032,
      "step": 750
    },
    {
      "epoch": 3.6363636363636362,
      "grad_norm": 0.6284219026565552,
      "learning_rate": 0.0002,
      "loss": 0.2312,
      "step": 1000
    },
    {
      "epoch": 4.545454545454545,
      "grad_norm": 1.0567408800125122,
      "learning_rate": 0.0002,
      "loss": 0.1959,
      "step": 1250
    },
    {
      "epoch": 5.454545454545454,
      "grad_norm": 0.54230135679245,
      "learning_rate": 0.0002,
      "loss": 0.1746,
      "step": 1500
    }
  ],
  "logging_steps": 250,
  "max_steps": 1650,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 6,
  "save_steps": 250,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 2.103034997322547e+16,
  "train_batch_size": 4,
  "trial_name": null,
  "trial_params": null
}