File size: 2,384 Bytes
fe6f2b3
 
 
c5c3a79
fe6f2b3
c5c3a79
fe6f2b3
 
 
534a162
 
 
fb19a6f
c5c3a79
fb19a6f
534a162
 
 
 
fb19a6f
c5c3a79
fb19a6f
534a162
 
 
 
fb19a6f
c5c3a79
fb19a6f
534a162
 
 
 
fb19a6f
c5c3a79
fb19a6f
534a162
 
 
c5c3a79
fb19a6f
c5c3a79
fb19a6f
c5c3a79
 
 
 
fb19a6f
c5c3a79
fb19a6f
c5c3a79
 
 
 
fb19a6f
c5c3a79
fb19a6f
c5c3a79
 
 
 
fb19a6f
c5c3a79
fb19a6f
c5c3a79
 
 
 
 
fb19a6f
 
 
 
 
534a162
 
 
c5c3a79
fe6f2b3
c5c3a79
fe6f2b3
 
 
 
 
 
 
 
 
 
 
 
 
fb19a6f
534a162
fe6f2b3
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 4.0,
  "eval_steps": 500,
  "global_step": 2060,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.4854368932038835,
      "grad_norm": 1.9190031290054321,
      "learning_rate": 8.794946550048592e-05,
      "loss": 0.6452,
      "step": 250
    },
    {
      "epoch": 0.970873786407767,
      "grad_norm": 1.3683266639709473,
      "learning_rate": 7.580174927113704e-05,
      "loss": 0.3863,
      "step": 500
    },
    {
      "epoch": 1.4563106796116505,
      "grad_norm": 1.4516526460647583,
      "learning_rate": 6.365403304178815e-05,
      "loss": 0.3609,
      "step": 750
    },
    {
      "epoch": 1.941747572815534,
      "grad_norm": 1.76002836227417,
      "learning_rate": 5.150631681243926e-05,
      "loss": 0.3382,
      "step": 1000
    },
    {
      "epoch": 2.4271844660194173,
      "grad_norm": 1.7399638891220093,
      "learning_rate": 3.9358600583090386e-05,
      "loss": 0.3105,
      "step": 1250
    },
    {
      "epoch": 2.912621359223301,
      "grad_norm": 1.924517035484314,
      "learning_rate": 2.72108843537415e-05,
      "loss": 0.2913,
      "step": 1500
    },
    {
      "epoch": 3.3980582524271843,
      "grad_norm": 1.8956573009490967,
      "learning_rate": 1.5063168124392615e-05,
      "loss": 0.2694,
      "step": 1750
    },
    {
      "epoch": 3.883495145631068,
      "grad_norm": 1.7733403444290161,
      "learning_rate": 2.915451895043732e-06,
      "loss": 0.2591,
      "step": 2000
    },
    {
      "epoch": 4.0,
      "step": 2060,
      "total_flos": 4.996544770678579e+17,
      "train_loss": 0.35469038972576844,
      "train_runtime": 73403.6809,
      "train_samples_per_second": 7.184,
      "train_steps_per_second": 0.028
    }
  ],
  "logging_steps": 250,
  "max_steps": 2060,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 4,
  "save_steps": 500,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": false,
        "should_training_stop": false
      },
      "attributes": {}
    }
  },
  "total_flos": 4.996544770678579e+17,
  "train_batch_size": 16,
  "trial_name": null,
  "trial_params": null
}