AlekseyKorshuk commited on
Commit
f842923
1 Parent(s): 33fafd4

End of training

Browse files
Files changed (4) hide show
  1. all_results.json +12 -6
  2. eval_results.json +7 -6
  3. train_results.json +8 -0
  4. trainer_state.json +265 -0
all_results.json CHANGED
@@ -1,9 +1,15 @@
1
  {
2
- "eval_accuracy": 0.040365853658536587,
3
- "eval_loss": 2.744140625,
4
- "eval_runtime": 8.8422,
 
5
  "eval_samples": 100,
6
- "eval_samples_per_second": 11.309,
7
- "eval_steps_per_second": 0.226,
8
- "perplexity": 15.551243837871848
 
 
 
 
 
9
  }
 
1
  {
2
+ "epoch": 1.0,
3
+ "eval_accuracy": 0.040325203252032524,
4
+ "eval_loss": 2.6953125,
5
+ "eval_runtime": 5.8054,
6
  "eval_samples": 100,
7
+ "eval_samples_per_second": 17.225,
8
+ "eval_steps_per_second": 0.345,
9
+ "perplexity": 14.81014620089916,
10
+ "train_loss": 2.8766326904296875,
11
+ "train_runtime": 267.0978,
12
+ "train_samples": 1000,
13
+ "train_samples_per_second": 3.744,
14
+ "train_steps_per_second": 0.06
15
  }
eval_results.json CHANGED
@@ -1,9 +1,10 @@
1
  {
2
- "eval_accuracy": 0.040365853658536587,
3
- "eval_loss": 2.744140625,
4
- "eval_runtime": 8.8422,
 
5
  "eval_samples": 100,
6
- "eval_samples_per_second": 11.309,
7
- "eval_steps_per_second": 0.226,
8
- "perplexity": 15.551243837871848
9
  }
 
1
  {
2
+ "epoch": 1.0,
3
+ "eval_accuracy": 0.040325203252032524,
4
+ "eval_loss": 2.6953125,
5
+ "eval_runtime": 5.8054,
6
  "eval_samples": 100,
7
+ "eval_samples_per_second": 17.225,
8
+ "eval_steps_per_second": 0.345,
9
+ "perplexity": 14.81014620089916
10
  }
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0,
3
+ "train_loss": 2.8766326904296875,
4
+ "train_runtime": 267.0978,
5
+ "train_samples": 1000,
6
+ "train_samples_per_second": 3.744,
7
+ "train_steps_per_second": 0.06
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,265 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "global_step": 16,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.06,
12
+ "learning_rate": 0,
13
+ "loss": 2.9912,
14
+ "step": 1
15
+ },
16
+ {
17
+ "epoch": 0.06,
18
+ "eval_accuracy": 0.040365853658536587,
19
+ "eval_loss": 2.744140625,
20
+ "eval_runtime": 4.9375,
21
+ "eval_samples_per_second": 20.253,
22
+ "eval_steps_per_second": 0.405,
23
+ "step": 1
24
+ },
25
+ {
26
+ "epoch": 0.12,
27
+ "learning_rate": 0,
28
+ "loss": 2.9329,
29
+ "step": 2
30
+ },
31
+ {
32
+ "epoch": 0.12,
33
+ "eval_accuracy": 0.040365853658536587,
34
+ "eval_loss": 2.744140625,
35
+ "eval_runtime": 6.0216,
36
+ "eval_samples_per_second": 16.607,
37
+ "eval_steps_per_second": 0.332,
38
+ "step": 2
39
+ },
40
+ {
41
+ "epoch": 0.19,
42
+ "learning_rate": 0.0,
43
+ "loss": 2.9138,
44
+ "step": 3
45
+ },
46
+ {
47
+ "epoch": 0.19,
48
+ "eval_accuracy": 0.038902439024390244,
49
+ "eval_loss": 2.826171875,
50
+ "eval_runtime": 6.6113,
51
+ "eval_samples_per_second": 15.126,
52
+ "eval_steps_per_second": 0.303,
53
+ "step": 3
54
+ },
55
+ {
56
+ "epoch": 0.25,
57
+ "learning_rate": 5e-05,
58
+ "loss": 2.9395,
59
+ "step": 4
60
+ },
61
+ {
62
+ "epoch": 0.25,
63
+ "eval_accuracy": 0.038902439024390244,
64
+ "eval_loss": 2.826171875,
65
+ "eval_runtime": 7.2265,
66
+ "eval_samples_per_second": 13.838,
67
+ "eval_steps_per_second": 0.277,
68
+ "step": 4
69
+ },
70
+ {
71
+ "epoch": 0.31,
72
+ "learning_rate": 5e-05,
73
+ "loss": 2.9109,
74
+ "step": 5
75
+ },
76
+ {
77
+ "epoch": 0.31,
78
+ "eval_accuracy": 0.039878048780487806,
79
+ "eval_loss": 2.794921875,
80
+ "eval_runtime": 7.453,
81
+ "eval_samples_per_second": 13.417,
82
+ "eval_steps_per_second": 0.268,
83
+ "step": 5
84
+ },
85
+ {
86
+ "epoch": 0.38,
87
+ "learning_rate": 5e-05,
88
+ "loss": 2.8391,
89
+ "step": 6
90
+ },
91
+ {
92
+ "epoch": 0.38,
93
+ "eval_accuracy": 0.040284552845528454,
94
+ "eval_loss": 2.74609375,
95
+ "eval_runtime": 6.8467,
96
+ "eval_samples_per_second": 14.606,
97
+ "eval_steps_per_second": 0.292,
98
+ "step": 6
99
+ },
100
+ {
101
+ "epoch": 0.44,
102
+ "learning_rate": 5e-05,
103
+ "loss": 2.9368,
104
+ "step": 7
105
+ },
106
+ {
107
+ "epoch": 0.44,
108
+ "eval_accuracy": 0.03983739837398374,
109
+ "eval_loss": 2.720703125,
110
+ "eval_runtime": 7.2744,
111
+ "eval_samples_per_second": 13.747,
112
+ "eval_steps_per_second": 0.275,
113
+ "step": 7
114
+ },
115
+ {
116
+ "epoch": 0.5,
117
+ "learning_rate": 5e-05,
118
+ "loss": 2.7583,
119
+ "step": 8
120
+ },
121
+ {
122
+ "epoch": 0.5,
123
+ "eval_accuracy": 0.040325203252032524,
124
+ "eval_loss": 2.70703125,
125
+ "eval_runtime": 6.8783,
126
+ "eval_samples_per_second": 14.539,
127
+ "eval_steps_per_second": 0.291,
128
+ "step": 8
129
+ },
130
+ {
131
+ "epoch": 0.56,
132
+ "learning_rate": 5e-05,
133
+ "loss": 2.9756,
134
+ "step": 9
135
+ },
136
+ {
137
+ "epoch": 0.56,
138
+ "eval_accuracy": 0.04083333333333333,
139
+ "eval_loss": 2.68359375,
140
+ "eval_runtime": 7.0122,
141
+ "eval_samples_per_second": 14.261,
142
+ "eval_steps_per_second": 0.285,
143
+ "step": 9
144
+ },
145
+ {
146
+ "epoch": 0.62,
147
+ "learning_rate": 5e-05,
148
+ "loss": 2.8442,
149
+ "step": 10
150
+ },
151
+ {
152
+ "epoch": 0.62,
153
+ "eval_accuracy": 0.04034552845528455,
154
+ "eval_loss": 2.673828125,
155
+ "eval_runtime": 7.0273,
156
+ "eval_samples_per_second": 14.23,
157
+ "eval_steps_per_second": 0.285,
158
+ "step": 10
159
+ },
160
+ {
161
+ "epoch": 0.69,
162
+ "learning_rate": 5e-05,
163
+ "loss": 2.7312,
164
+ "step": 11
165
+ },
166
+ {
167
+ "epoch": 0.69,
168
+ "eval_accuracy": 0.04054878048780488,
169
+ "eval_loss": 2.66796875,
170
+ "eval_runtime": 6.8584,
171
+ "eval_samples_per_second": 14.581,
172
+ "eval_steps_per_second": 0.292,
173
+ "step": 11
174
+ },
175
+ {
176
+ "epoch": 0.75,
177
+ "learning_rate": 5e-05,
178
+ "loss": 2.7439,
179
+ "step": 12
180
+ },
181
+ {
182
+ "epoch": 0.75,
183
+ "eval_accuracy": 0.040365853658536587,
184
+ "eval_loss": 2.669921875,
185
+ "eval_runtime": 6.8135,
186
+ "eval_samples_per_second": 14.677,
187
+ "eval_steps_per_second": 0.294,
188
+ "step": 12
189
+ },
190
+ {
191
+ "epoch": 0.81,
192
+ "learning_rate": 5e-05,
193
+ "loss": 2.9075,
194
+ "step": 13
195
+ },
196
+ {
197
+ "epoch": 0.81,
198
+ "eval_accuracy": 0.04034552845528455,
199
+ "eval_loss": 2.6796875,
200
+ "eval_runtime": 6.1497,
201
+ "eval_samples_per_second": 16.261,
202
+ "eval_steps_per_second": 0.325,
203
+ "step": 13
204
+ },
205
+ {
206
+ "epoch": 0.88,
207
+ "learning_rate": 5e-05,
208
+ "loss": 2.8518,
209
+ "step": 14
210
+ },
211
+ {
212
+ "epoch": 0.88,
213
+ "eval_accuracy": 0.04034552845528455,
214
+ "eval_loss": 2.6796875,
215
+ "eval_runtime": 6.4271,
216
+ "eval_samples_per_second": 15.559,
217
+ "eval_steps_per_second": 0.311,
218
+ "step": 14
219
+ },
220
+ {
221
+ "epoch": 0.94,
222
+ "learning_rate": 5e-05,
223
+ "loss": 2.8579,
224
+ "step": 15
225
+ },
226
+ {
227
+ "epoch": 0.94,
228
+ "eval_accuracy": 0.04044715447154471,
229
+ "eval_loss": 2.677734375,
230
+ "eval_runtime": 7.2159,
231
+ "eval_samples_per_second": 13.858,
232
+ "eval_steps_per_second": 0.277,
233
+ "step": 15
234
+ },
235
+ {
236
+ "epoch": 1.0,
237
+ "learning_rate": 5e-05,
238
+ "loss": 2.8916,
239
+ "step": 16
240
+ },
241
+ {
242
+ "epoch": 1.0,
243
+ "eval_accuracy": 0.040325203252032524,
244
+ "eval_loss": 2.6953125,
245
+ "eval_runtime": 6.3862,
246
+ "eval_samples_per_second": 15.659,
247
+ "eval_steps_per_second": 0.313,
248
+ "step": 16
249
+ },
250
+ {
251
+ "epoch": 1.0,
252
+ "step": 16,
253
+ "total_flos": 4999961640960.0,
254
+ "train_loss": 2.8766326904296875,
255
+ "train_runtime": 267.0978,
256
+ "train_samples_per_second": 3.744,
257
+ "train_steps_per_second": 0.06
258
+ }
259
+ ],
260
+ "max_steps": 16,
261
+ "num_train_epochs": 1,
262
+ "total_flos": 4999961640960.0,
263
+ "trial_name": null,
264
+ "trial_params": null
265
+ }