File size: 1,853 Bytes

75c4c46
 
 
44be9ef
75c4c46
44be9ef
75c4c46
 
 
 
 
 
44be9ef
75c4c46
 
 
 
 
44be9ef
 
75c4c46
 
 
 
44be9ef
 
75c4c46
 
 
 
44be9ef
 
75c4c46
 
 
 
44be9ef
 
75c4c46
 
 
 
44be9ef
 
75c4c46
 
 
 
44be9ef
 
75c4c46
 
 
 
44be9ef
 
75c4c46
 
 
 
44be9ef
 
 
 
75c4c46
 
 
44be9ef
 
 
 
 
 
 
75c4c46
 
 
44be9ef
75c4c46
44be9ef
75c4c46
44be9ef
75c4c46

{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 0.9866666666666667,
  "eval_steps": 500,
  "global_step": 37,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.03,
      "learning_rate": 5e-05,
      "loss": 2.2275,
      "step": 1
    },
    {
      "epoch": 0.13,
      "learning_rate": 0.00019954719225730847,
      "loss": 1.9848,
      "step": 5
    },
    {
      "epoch": 0.27,
      "learning_rate": 0.00018412535328311814,
      "loss": 0.6529,
      "step": 10
    },
    {
      "epoch": 0.4,
      "learning_rate": 0.00015000000000000001,
      "loss": 0.2662,
      "step": 15
    },
    {
      "epoch": 0.53,
      "learning_rate": 0.00010475819158237425,
      "loss": 0.238,
      "step": 20
    },
    {
      "epoch": 0.67,
      "learning_rate": 5.845849869981137e-05,
      "loss": 0.2332,
      "step": 25
    },
    {
      "epoch": 0.8,
      "learning_rate": 2.139469052572127e-05,
      "loss": 0.2287,
      "step": 30
    },
    {
      "epoch": 0.93,
      "learning_rate": 1.8071302737293295e-06,
      "loss": 0.203,
      "step": 35
    },
    {
      "epoch": 0.99,
      "eval_loss": 0.22467635571956635,
      "eval_runtime": 9.0559,
      "eval_samples_per_second": 22.085,
      "eval_steps_per_second": 2.761,
      "step": 37
    },
    {
      "epoch": 0.99,
      "step": 37,
      "total_flos": 20586009395200.0,
      "train_loss": 0.5330296472923176,
      "train_runtime": 203.4112,
      "train_samples_per_second": 2.95,
      "train_steps_per_second": 0.182
    }
  ],
  "logging_steps": 5,
  "max_steps": 37,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 500,
  "total_flos": 20586009395200.0,
  "train_batch_size": 4,
  "trial_name": null,
  "trial_params": null
}