File size: 1,693 Bytes
abc73ca
 
 
 
 
c10fcd0
abc73ca
 
 
 
 
c10fcd0
 
abc73ca
c10fcd0
abc73ca
 
 
c10fcd0
 
abc73ca
c10fcd0
abc73ca
 
 
c10fcd0
 
abc73ca
c10fcd0
abc73ca
 
 
c10fcd0
 
abc73ca
c10fcd0
abc73ca
 
 
c10fcd0
 
abc73ca
c10fcd0
abc73ca
 
 
c10fcd0
 
abc73ca
c10fcd0
abc73ca
 
 
 
c10fcd0
abc73ca
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c10fcd0
abc73ca
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 6.0,
  "eval_steps": 500,
  "global_step": 1650,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.9090909090909091,
      "grad_norm": 1.031325101852417,
      "learning_rate": 0.0002,
      "loss": 0.8838,
      "step": 250
    },
    {
      "epoch": 1.8181818181818183,
      "grad_norm": 1.0007693767547607,
      "learning_rate": 0.0002,
      "loss": 0.4622,
      "step": 500
    },
    {
      "epoch": 2.7272727272727275,
      "grad_norm": 2.0547432899475098,
      "learning_rate": 0.0002,
      "loss": 0.3032,
      "step": 750
    },
    {
      "epoch": 3.6363636363636362,
      "grad_norm": 0.6284219026565552,
      "learning_rate": 0.0002,
      "loss": 0.2312,
      "step": 1000
    },
    {
      "epoch": 4.545454545454545,
      "grad_norm": 1.0567408800125122,
      "learning_rate": 0.0002,
      "loss": 0.1959,
      "step": 1250
    },
    {
      "epoch": 5.454545454545454,
      "grad_norm": 0.54230135679245,
      "learning_rate": 0.0002,
      "loss": 0.1746,
      "step": 1500
    }
  ],
  "logging_steps": 250,
  "max_steps": 1650,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 6,
  "save_steps": 250,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 2.103034997322547e+16,
  "train_batch_size": 4,
  "trial_name": null,
  "trial_params": null
}