File size: 1,853 Bytes
75c4c46
 
 
44be9ef
75c4c46
44be9ef
75c4c46
 
 
 
 
 
44be9ef
75c4c46
 
 
 
 
44be9ef
 
75c4c46
 
 
 
44be9ef
 
75c4c46
 
 
 
44be9ef
 
75c4c46
 
 
 
44be9ef
 
75c4c46
 
 
 
44be9ef
 
75c4c46
 
 
 
44be9ef
 
75c4c46
 
 
 
44be9ef
 
75c4c46
 
 
 
44be9ef
 
 
 
75c4c46
 
 
44be9ef
 
 
 
 
 
 
75c4c46
 
 
44be9ef
75c4c46
44be9ef
75c4c46
44be9ef
75c4c46
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 0.9866666666666667,
  "eval_steps": 500,
  "global_step": 37,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.03,
      "learning_rate": 5e-05,
      "loss": 2.2275,
      "step": 1
    },
    {
      "epoch": 0.13,
      "learning_rate": 0.00019954719225730847,
      "loss": 1.9848,
      "step": 5
    },
    {
      "epoch": 0.27,
      "learning_rate": 0.00018412535328311814,
      "loss": 0.6529,
      "step": 10
    },
    {
      "epoch": 0.4,
      "learning_rate": 0.00015000000000000001,
      "loss": 0.2662,
      "step": 15
    },
    {
      "epoch": 0.53,
      "learning_rate": 0.00010475819158237425,
      "loss": 0.238,
      "step": 20
    },
    {
      "epoch": 0.67,
      "learning_rate": 5.845849869981137e-05,
      "loss": 0.2332,
      "step": 25
    },
    {
      "epoch": 0.8,
      "learning_rate": 2.139469052572127e-05,
      "loss": 0.2287,
      "step": 30
    },
    {
      "epoch": 0.93,
      "learning_rate": 1.8071302737293295e-06,
      "loss": 0.203,
      "step": 35
    },
    {
      "epoch": 0.99,
      "eval_loss": 0.22467635571956635,
      "eval_runtime": 9.0559,
      "eval_samples_per_second": 22.085,
      "eval_steps_per_second": 2.761,
      "step": 37
    },
    {
      "epoch": 0.99,
      "step": 37,
      "total_flos": 20586009395200.0,
      "train_loss": 0.5330296472923176,
      "train_runtime": 203.4112,
      "train_samples_per_second": 2.95,
      "train_steps_per_second": 0.182
    }
  ],
  "logging_steps": 5,
  "max_steps": 37,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 500,
  "total_flos": 20586009395200.0,
  "train_batch_size": 4,
  "trial_name": null,
  "trial_params": null
}