File size: 2,300 Bytes
efe839e
 
 
 
 
62d65b0
efe839e
 
 
 
 
 
62d65b0
 
 
efe839e
 
 
62d65b0
 
 
efe839e
 
 
62d65b0
 
 
efe839e
 
 
62d65b0
 
 
efe839e
 
 
62d65b0
 
 
efe839e
 
 
62d65b0
 
 
efe839e
 
 
62d65b0
 
 
efe839e
 
 
62d65b0
 
 
efe839e
 
 
62d65b0
 
 
efe839e
 
 
62d65b0
 
 
efe839e
 
 
62d65b0
 
 
efe839e
 
 
62d65b0
 
 
efe839e
 
 
62d65b0
 
 
efe839e
 
 
62d65b0
 
 
 
 
 
efe839e
 
 
62d65b0
efe839e
 
 
62d65b0
efe839e
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 3.0,
  "eval_steps": 500,
  "global_step": 6936,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.22,
      "learning_rate": 4.639561707035756e-05,
      "loss": 0.8543,
      "step": 500
    },
    {
      "epoch": 0.43,
      "learning_rate": 4.2791234140715114e-05,
      "loss": 0.512,
      "step": 1000
    },
    {
      "epoch": 0.65,
      "learning_rate": 3.9186851211072664e-05,
      "loss": 0.4291,
      "step": 1500
    },
    {
      "epoch": 0.87,
      "learning_rate": 3.558246828143022e-05,
      "loss": 0.3857,
      "step": 2000
    },
    {
      "epoch": 1.08,
      "learning_rate": 3.1978085351787776e-05,
      "loss": 0.3264,
      "step": 2500
    },
    {
      "epoch": 1.3,
      "learning_rate": 2.8373702422145332e-05,
      "loss": 0.2763,
      "step": 3000
    },
    {
      "epoch": 1.51,
      "learning_rate": 2.4769319492502884e-05,
      "loss": 0.2598,
      "step": 3500
    },
    {
      "epoch": 1.73,
      "learning_rate": 2.116493656286044e-05,
      "loss": 0.2409,
      "step": 4000
    },
    {
      "epoch": 1.95,
      "learning_rate": 1.7560553633217993e-05,
      "loss": 0.2283,
      "step": 4500
    },
    {
      "epoch": 2.16,
      "learning_rate": 1.395617070357555e-05,
      "loss": 0.185,
      "step": 5000
    },
    {
      "epoch": 2.38,
      "learning_rate": 1.0351787773933102e-05,
      "loss": 0.1686,
      "step": 5500
    },
    {
      "epoch": 2.6,
      "learning_rate": 6.747404844290659e-06,
      "loss": 0.1595,
      "step": 6000
    },
    {
      "epoch": 2.81,
      "learning_rate": 3.143021914648212e-06,
      "loss": 0.1537,
      "step": 6500
    },
    {
      "epoch": 3.0,
      "step": 6936,
      "total_flos": 2.370754172808069e+17,
      "train_loss": 0.3107225017701603,
      "train_runtime": 5244.1454,
      "train_samples_per_second": 338.522,
      "train_steps_per_second": 1.323
    }
  ],
  "logging_steps": 500,
  "max_steps": 6936,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 3,
  "save_steps": 500,
  "total_flos": 2.370754172808069e+17,
  "train_batch_size": 64,
  "trial_name": null,
  "trial_params": null
}