File size: 1,450 Bytes
efe839e
 
 
 
 
7efda69
efe839e
 
 
 
 
 
62d65b0
7efda69
 
efe839e
 
 
62d65b0
7efda69
 
efe839e
 
 
62d65b0
7efda69
 
efe839e
 
 
62d65b0
7efda69
 
efe839e
 
 
62d65b0
7efda69
 
efe839e
 
 
62d65b0
7efda69
 
efe839e
 
 
7efda69
62d65b0
7efda69
 
 
 
efe839e
 
 
7efda69
efe839e
 
 
62d65b0
efe839e
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 3.0,
  "eval_steps": 500,
  "global_step": 3468,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.43,
      "learning_rate": 4.2791234140715114e-05,
      "loss": 0.725,
      "step": 500
    },
    {
      "epoch": 0.87,
      "learning_rate": 3.558246828143022e-05,
      "loss": 0.3879,
      "step": 1000
    },
    {
      "epoch": 1.3,
      "learning_rate": 2.8373702422145332e-05,
      "loss": 0.2897,
      "step": 1500
    },
    {
      "epoch": 1.73,
      "learning_rate": 2.116493656286044e-05,
      "loss": 0.2463,
      "step": 2000
    },
    {
      "epoch": 2.16,
      "learning_rate": 1.395617070357555e-05,
      "loss": 0.2074,
      "step": 2500
    },
    {
      "epoch": 2.6,
      "learning_rate": 6.747404844290659e-06,
      "loss": 0.1688,
      "step": 3000
    },
    {
      "epoch": 3.0,
      "step": 3468,
      "total_flos": 2.370754172808069e+17,
      "train_loss": 0.31381386621600615,
      "train_runtime": 2692.8552,
      "train_samples_per_second": 659.248,
      "train_steps_per_second": 1.288
    }
  ],
  "logging_steps": 500,
  "max_steps": 3468,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 3,
  "save_steps": 500,
  "total_flos": 2.370754172808069e+17,
  "train_batch_size": 64,
  "trial_name": null,
  "trial_params": null
}