File size: 2,179 Bytes
b146c1a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 1.1261261261261262,
  "eval_steps": 500,
  "global_step": 250,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.11261261261261261,
      "grad_norm": 0.618191659450531,
      "learning_rate": 0.00019510565162951537,
      "loss": 1.2486,
      "step": 25
    },
    {
      "epoch": 0.22522522522522523,
      "grad_norm": 0.8095678687095642,
      "learning_rate": 0.00018090169943749476,
      "loss": 1.2557,
      "step": 50
    },
    {
      "epoch": 0.33783783783783783,
      "grad_norm": 0.578891396522522,
      "learning_rate": 0.00015877852522924732,
      "loss": 1.257,
      "step": 75
    },
    {
      "epoch": 0.45045045045045046,
      "grad_norm": 0.6523413062095642,
      "learning_rate": 0.00013090169943749476,
      "loss": 1.2573,
      "step": 100
    },
    {
      "epoch": 0.5630630630630631,
      "grad_norm": 0.5457090735435486,
      "learning_rate": 0.0001,
      "loss": 1.2009,
      "step": 125
    },
    {
      "epoch": 0.6756756756756757,
      "grad_norm": 0.5979616045951843,
      "learning_rate": 6.909830056250527e-05,
      "loss": 1.2826,
      "step": 150
    },
    {
      "epoch": 0.7882882882882883,
      "grad_norm": 0.6098126173019409,
      "learning_rate": 4.12214747707527e-05,
      "loss": 1.3606,
      "step": 175
    },
    {
      "epoch": 0.9009009009009009,
      "grad_norm": 0.7838721871376038,
      "learning_rate": 1.9098300562505266e-05,
      "loss": 1.3275,
      "step": 200
    },
    {
      "epoch": 1.0135135135135136,
      "grad_norm": 0.5452545881271362,
      "learning_rate": 4.8943483704846475e-06,
      "loss": 1.31,
      "step": 225
    },
    {
      "epoch": 1.1261261261261262,
      "grad_norm": 0.5899876356124878,
      "learning_rate": 0.0,
      "loss": 1.2133,
      "step": 250
    }
  ],
  "logging_steps": 25,
  "max_steps": 250,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 2,
  "save_steps": 500,
  "total_flos": 814586836156416.0,
  "train_batch_size": 8,
  "trial_name": null,
  "trial_params": null
}