Coelhomatias commited on
Commit
4907e7d
1 Parent(s): ae7dad9

End of training

Browse files
Files changed (3) hide show
  1. all_results.json +7 -0
  2. train_results.json +7 -0
  3. trainer_state.json +1020 -0
all_results.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 14.98,
3
+ "train_loss": 0.2219778089443193,
4
+ "train_runtime": 55065.7639,
5
+ "train_samples_per_second": 9.607,
6
+ "train_steps_per_second": 0.1
7
+ }
train_results.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 14.98,
3
+ "train_loss": 0.2219778089443193,
4
+ "train_runtime": 55065.7639,
5
+ "train_samples_per_second": 9.607,
6
+ "train_steps_per_second": 0.1
7
+ }
trainer_state.json ADDED
@@ -0,0 +1,1020 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.29125073552131653,
3
+ "best_model_checkpoint": "deit-cvc-drop-aug/checkpoint-1800",
4
+ "epoch": 14.979591836734693,
5
+ "eval_steps": 100,
6
+ "global_step": 5505,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.27,
13
+ "learning_rate": 9.991860321746833e-05,
14
+ "loss": 0.5453,
15
+ "step": 100
16
+ },
17
+ {
18
+ "epoch": 0.27,
19
+ "eval_accuracy": 0.7695840775708089,
20
+ "eval_f1": 0.7750933997509339,
21
+ "eval_loss": 0.48243534564971924,
22
+ "eval_precision": 0.7776111944027986,
23
+ "eval_recall": 0.7725918570009931,
24
+ "eval_runtime": 15.4841,
25
+ "eval_samples_per_second": 253.098,
26
+ "eval_steps_per_second": 15.823,
27
+ "step": 100
28
+ },
29
+ {
30
+ "epoch": 0.54,
31
+ "learning_rate": 9.967467788732156e-05,
32
+ "loss": 0.4324,
33
+ "step": 200
34
+ },
35
+ {
36
+ "epoch": 0.54,
37
+ "eval_accuracy": 0.7685634090329165,
38
+ "eval_f1": 0.7637405574368324,
39
+ "eval_loss": 0.4796455502510071,
40
+ "eval_precision": 0.8032876712328767,
41
+ "eval_recall": 0.7279046673286991,
42
+ "eval_runtime": 15.2008,
43
+ "eval_samples_per_second": 257.815,
44
+ "eval_steps_per_second": 16.118,
45
+ "step": 200
46
+ },
47
+ {
48
+ "epoch": 0.82,
49
+ "learning_rate": 9.926901819904178e-05,
50
+ "loss": 0.4042,
51
+ "step": 300
52
+ },
53
+ {
54
+ "epoch": 0.82,
55
+ "eval_accuracy": 0.8321000255167135,
56
+ "eval_f1": 0.8546819787985865,
57
+ "eval_loss": 0.3790486454963684,
58
+ "eval_precision": 0.7696897374701671,
59
+ "eval_recall": 0.9607745779543198,
60
+ "eval_runtime": 15.77,
61
+ "eval_samples_per_second": 248.509,
62
+ "eval_steps_per_second": 15.536,
63
+ "step": 300
64
+ },
65
+ {
66
+ "epoch": 1.09,
67
+ "learning_rate": 9.870294492836612e-05,
68
+ "loss": 0.3849,
69
+ "step": 400
70
+ },
71
+ {
72
+ "epoch": 1.09,
73
+ "eval_accuracy": 0.8101556519520285,
74
+ "eval_f1": 0.8161146811665843,
75
+ "eval_loss": 0.4099680185317993,
76
+ "eval_precision": 0.8125,
77
+ "eval_recall": 0.8197616683217478,
78
+ "eval_runtime": 13.9661,
79
+ "eval_samples_per_second": 280.608,
80
+ "eval_steps_per_second": 17.542,
81
+ "step": 400
82
+ },
83
+ {
84
+ "epoch": 1.36,
85
+ "learning_rate": 9.797830113701101e-05,
86
+ "loss": 0.3621,
87
+ "step": 500
88
+ },
89
+ {
90
+ "epoch": 1.36,
91
+ "eval_accuracy": 0.8387343710130135,
92
+ "eval_f1": 0.8510838831291235,
93
+ "eval_loss": 0.36887437105178833,
94
+ "eval_precision": 0.809865470852018,
95
+ "eval_recall": 0.8967229394240318,
96
+ "eval_runtime": 14.7142,
97
+ "eval_samples_per_second": 266.341,
98
+ "eval_steps_per_second": 16.651,
99
+ "step": 500
100
+ },
101
+ {
102
+ "epoch": 1.63,
103
+ "learning_rate": 9.709744617190038e-05,
104
+ "loss": 0.3457,
105
+ "step": 600
106
+ },
107
+ {
108
+ "epoch": 1.63,
109
+ "eval_accuracy": 0.8458790507782598,
110
+ "eval_f1": 0.8642086330935251,
111
+ "eval_loss": 0.3312939703464508,
112
+ "eval_precision": 0.7896466721446179,
113
+ "eval_recall": 0.9543197616683218,
114
+ "eval_runtime": 15.9456,
115
+ "eval_samples_per_second": 245.773,
116
+ "eval_steps_per_second": 15.365,
117
+ "step": 600
118
+ },
119
+ {
120
+ "epoch": 1.9,
121
+ "learning_rate": 9.606324798343567e-05,
122
+ "loss": 0.3443,
123
+ "step": 700
124
+ },
125
+ {
126
+ "epoch": 1.9,
127
+ "eval_accuracy": 0.840520540954325,
128
+ "eval_f1": 0.8599596683844947,
129
+ "eval_loss": 0.3424055874347687,
130
+ "eval_precision": 0.7835851367905268,
131
+ "eval_recall": 0.9528301886792453,
132
+ "eval_runtime": 15.7574,
133
+ "eval_samples_per_second": 248.708,
134
+ "eval_steps_per_second": 15.548,
135
+ "step": 700
136
+ },
137
+ {
138
+ "epoch": 2.18,
139
+ "learning_rate": 9.487907378781853e-05,
140
+ "loss": 0.3287,
141
+ "step": 800
142
+ },
143
+ {
144
+ "epoch": 2.18,
145
+ "eval_accuracy": 0.8453687165093136,
146
+ "eval_f1": 0.8560570071258907,
147
+ "eval_loss": 0.33078569173812866,
148
+ "eval_precision": 0.8205828779599271,
149
+ "eval_recall": 0.8947368421052632,
150
+ "eval_runtime": 14.597,
151
+ "eval_samples_per_second": 268.479,
152
+ "eval_steps_per_second": 16.784,
153
+ "step": 800
154
+ },
155
+ {
156
+ "epoch": 2.45,
157
+ "learning_rate": 9.35487791038282e-05,
158
+ "loss": 0.3224,
159
+ "step": 900
160
+ },
161
+ {
162
+ "epoch": 2.45,
163
+ "eval_accuracy": 0.7655014034192396,
164
+ "eval_f1": 0.7437970448843045,
165
+ "eval_loss": 0.454572468996048,
166
+ "eval_precision": 0.8480610298792117,
167
+ "eval_recall": 0.6623634558093346,
168
+ "eval_runtime": 15.6304,
169
+ "eval_samples_per_second": 250.73,
170
+ "eval_steps_per_second": 15.675,
171
+ "step": 900
172
+ },
173
+ {
174
+ "epoch": 2.72,
175
+ "learning_rate": 9.207669519974851e-05,
176
+ "loss": 0.3096,
177
+ "step": 1000
178
+ },
179
+ {
180
+ "epoch": 2.72,
181
+ "eval_accuracy": 0.8438377137024751,
182
+ "eval_f1": 0.8521024649589173,
183
+ "eval_loss": 0.34022626280784607,
184
+ "eval_precision": 0.8300376647834274,
185
+ "eval_recall": 0.8753723932472691,
186
+ "eval_runtime": 15.4283,
187
+ "eval_samples_per_second": 254.014,
188
+ "eval_steps_per_second": 15.88,
189
+ "step": 1000
190
+ },
191
+ {
192
+ "epoch": 2.99,
193
+ "learning_rate": 9.046761499131578e-05,
194
+ "loss": 0.3095,
195
+ "step": 1100
196
+ },
197
+ {
198
+ "epoch": 2.99,
199
+ "eval_accuracy": 0.8384792038785405,
200
+ "eval_f1": 0.8524131499183958,
201
+ "eval_loss": 0.36910393834114075,
202
+ "eval_precision": 0.8035164835164835,
203
+ "eval_recall": 0.9076464746772592,
204
+ "eval_runtime": 15.4854,
205
+ "eval_samples_per_second": 253.077,
206
+ "eval_steps_per_second": 15.821,
207
+ "step": 1100
208
+ },
209
+ {
210
+ "epoch": 3.27,
211
+ "learning_rate": 8.872677743660209e-05,
212
+ "loss": 0.2901,
213
+ "step": 1200
214
+ },
215
+ {
216
+ "epoch": 3.27,
217
+ "eval_accuracy": 0.8328655269201327,
218
+ "eval_f1": 0.8467119120056167,
219
+ "eval_loss": 0.36432939767837524,
220
+ "eval_precision": 0.8007968127490039,
221
+ "eval_recall": 0.8982125124131083,
222
+ "eval_runtime": 14.6332,
223
+ "eval_samples_per_second": 267.816,
224
+ "eval_steps_per_second": 16.743,
225
+ "step": 1200
226
+ },
227
+ {
228
+ "epoch": 3.54,
229
+ "learning_rate": 8.685985047864204e-05,
230
+ "loss": 0.2939,
231
+ "step": 1300
232
+ },
233
+ {
234
+ "epoch": 3.54,
235
+ "eval_accuracy": 0.8601684103087522,
236
+ "eval_f1": 0.8760180995475113,
237
+ "eval_loss": 0.30208107829093933,
238
+ "eval_precision": 0.8046550290939318,
239
+ "eval_recall": 0.9612711022840119,
240
+ "eval_runtime": 14.6512,
241
+ "eval_samples_per_second": 267.487,
242
+ "eval_steps_per_second": 16.722,
243
+ "step": 1300
244
+ },
245
+ {
246
+ "epoch": 3.81,
247
+ "learning_rate": 8.487291259133956e-05,
248
+ "loss": 0.2946,
249
+ "step": 1400
250
+ },
251
+ {
252
+ "epoch": 3.81,
253
+ "eval_accuracy": 0.8300586884409288,
254
+ "eval_f1": 0.8342458934793429,
255
+ "eval_loss": 0.36171799898147583,
256
+ "eval_precision": 0.8363273453093812,
257
+ "eval_recall": 0.8321747765640516,
258
+ "eval_runtime": 15.3842,
259
+ "eval_samples_per_second": 254.741,
260
+ "eval_steps_per_second": 15.925,
261
+ "step": 1400
262
+ },
263
+ {
264
+ "epoch": 4.08,
265
+ "learning_rate": 8.277243298873936e-05,
266
+ "loss": 0.2856,
267
+ "step": 1500
268
+ },
269
+ {
270
+ "epoch": 4.08,
271
+ "eval_accuracy": 0.8127073232967594,
272
+ "eval_f1": 0.8116016427104723,
273
+ "eval_loss": 0.48843976855278015,
274
+ "eval_precision": 0.8400637619553666,
275
+ "eval_recall": 0.7850049652432969,
276
+ "eval_runtime": 14.4792,
277
+ "eval_samples_per_second": 270.663,
278
+ "eval_steps_per_second": 16.921,
279
+ "step": 1500
280
+ },
281
+ {
282
+ "epoch": 4.35,
283
+ "learning_rate": 8.056525056209841e-05,
284
+ "loss": 0.2683,
285
+ "step": 1600
286
+ },
287
+ {
288
+ "epoch": 4.35,
289
+ "eval_accuracy": 0.834141362592498,
290
+ "eval_f1": 0.8380667663178873,
291
+ "eval_loss": 0.3540255129337311,
292
+ "eval_precision": 0.841,
293
+ "eval_recall": 0.8351539225422046,
294
+ "eval_runtime": 15.1561,
295
+ "eval_samples_per_second": 258.575,
296
+ "eval_steps_per_second": 16.165,
297
+ "step": 1600
298
+ },
299
+ {
300
+ "epoch": 4.63,
301
+ "learning_rate": 7.82585516133363e-05,
302
+ "loss": 0.2724,
303
+ "step": 1700
304
+ },
305
+ {
306
+ "epoch": 4.63,
307
+ "eval_accuracy": 0.8581270732329676,
308
+ "eval_f1": 0.8664745437079732,
309
+ "eval_loss": 0.30780917406082153,
310
+ "eval_precision": 0.8390697674418605,
311
+ "eval_recall": 0.8957298907646475,
312
+ "eval_runtime": 14.446,
313
+ "eval_samples_per_second": 271.286,
314
+ "eval_steps_per_second": 16.96,
315
+ "step": 1700
316
+ },
317
+ {
318
+ "epoch": 4.9,
319
+ "learning_rate": 7.585984645736096e-05,
320
+ "loss": 0.2685,
321
+ "step": 1800
322
+ },
323
+ {
324
+ "epoch": 4.9,
325
+ "eval_accuracy": 0.862720081653483,
326
+ "eval_f1": 0.8703614457831326,
327
+ "eval_loss": 0.29125073552131653,
328
+ "eval_precision": 0.8455056179775281,
329
+ "eval_recall": 0.8967229394240318,
330
+ "eval_runtime": 15.5954,
331
+ "eval_samples_per_second": 251.291,
332
+ "eval_steps_per_second": 15.71,
333
+ "step": 1800
334
+ },
335
+ {
336
+ "epoch": 5.17,
337
+ "learning_rate": 7.33769449694499e-05,
338
+ "loss": 0.2449,
339
+ "step": 1900
340
+ },
341
+ {
342
+ "epoch": 5.17,
343
+ "eval_accuracy": 0.8443480479714213,
344
+ "eval_f1": 0.8490099009900989,
345
+ "eval_loss": 0.38658004999160767,
346
+ "eval_precision": 0.8464955577492597,
347
+ "eval_recall": 0.8515392254220456,
348
+ "eval_runtime": 16.0186,
349
+ "eval_samples_per_second": 244.653,
350
+ "eval_steps_per_second": 15.295,
351
+ "step": 1900
352
+ },
353
+ {
354
+ "epoch": 5.44,
355
+ "learning_rate": 7.081793115730153e-05,
356
+ "loss": 0.2468,
357
+ "step": 2000
358
+ },
359
+ {
360
+ "epoch": 5.44,
361
+ "eval_accuracy": 0.8588925746363868,
362
+ "eval_f1": 0.8670353450348642,
363
+ "eval_loss": 0.30722111463546753,
364
+ "eval_precision": 0.8405594405594405,
365
+ "eval_recall": 0.8952333664349553,
366
+ "eval_runtime": 15.614,
367
+ "eval_samples_per_second": 250.993,
368
+ "eval_steps_per_second": 15.691,
369
+ "step": 2000
370
+ },
371
+ {
372
+ "epoch": 5.71,
373
+ "learning_rate": 6.819113684054634e-05,
374
+ "loss": 0.2557,
375
+ "step": 2100
376
+ },
377
+ {
378
+ "epoch": 5.71,
379
+ "eval_accuracy": 0.8257208471548865,
380
+ "eval_f1": 0.8232858990944373,
381
+ "eval_loss": 0.3735339641571045,
382
+ "eval_precision": 0.8595353862776878,
383
+ "eval_recall": 0.7899702085402185,
384
+ "eval_runtime": 15.378,
385
+ "eval_samples_per_second": 254.845,
386
+ "eval_steps_per_second": 15.932,
387
+ "step": 2100
388
+ },
389
+ {
390
+ "epoch": 5.99,
391
+ "learning_rate": 6.550511452341459e-05,
392
+ "loss": 0.25,
393
+ "step": 2200
394
+ },
395
+ {
396
+ "epoch": 5.99,
397
+ "eval_accuracy": 0.8553202347537637,
398
+ "eval_f1": 0.8561278863232682,
399
+ "eval_loss": 0.3116500973701477,
400
+ "eval_precision": 0.8754540736896731,
401
+ "eval_recall": 0.8376365441906654,
402
+ "eval_runtime": 15.151,
403
+ "eval_samples_per_second": 258.663,
404
+ "eval_steps_per_second": 16.171,
405
+ "step": 2200
406
+ },
407
+ {
408
+ "epoch": 6.26,
409
+ "learning_rate": 6.276860954888322e-05,
410
+ "loss": 0.2256,
411
+ "step": 2300
412
+ },
413
+ {
414
+ "epoch": 6.26,
415
+ "eval_accuracy": 0.8573615718295483,
416
+ "eval_f1": 0.8652687394552905,
417
+ "eval_loss": 0.3264216184616089,
418
+ "eval_precision": 0.8407494145199064,
419
+ "eval_recall": 0.891261171797418,
420
+ "eval_runtime": 13.8247,
421
+ "eval_samples_per_second": 283.478,
422
+ "eval_steps_per_second": 17.722,
423
+ "step": 2300
424
+ },
425
+ {
426
+ "epoch": 6.53,
427
+ "learning_rate": 5.999053162496453e-05,
428
+ "loss": 0.234,
429
+ "step": 2400
430
+ },
431
+ {
432
+ "epoch": 6.53,
433
+ "eval_accuracy": 0.8295483541719827,
434
+ "eval_f1": 0.8203335126412049,
435
+ "eval_loss": 0.3617473244667053,
436
+ "eval_precision": 0.8949530516431925,
437
+ "eval_recall": 0.7571996027805362,
438
+ "eval_runtime": 14.3944,
439
+ "eval_samples_per_second": 272.259,
440
+ "eval_steps_per_second": 17.02,
441
+ "step": 2400
442
+ },
443
+ {
444
+ "epoch": 6.8,
445
+ "learning_rate": 5.717992581584373e-05,
446
+ "loss": 0.2259,
447
+ "step": 2500
448
+ },
449
+ {
450
+ "epoch": 6.8,
451
+ "eval_accuracy": 0.8609339117121715,
452
+ "eval_f1": 0.8679428156045553,
453
+ "eval_loss": 0.32836005091667175,
454
+ "eval_precision": 0.8476100331282537,
455
+ "eval_recall": 0.8892750744786495,
456
+ "eval_runtime": 15.9578,
457
+ "eval_samples_per_second": 245.585,
458
+ "eval_steps_per_second": 15.353,
459
+ "step": 2500
460
+ },
461
+ {
462
+ "epoch": 7.07,
463
+ "learning_rate": 5.434594309231389e-05,
464
+ "loss": 0.2261,
465
+ "step": 2600
466
+ },
467
+ {
468
+ "epoch": 7.07,
469
+ "eval_accuracy": 0.8443480479714213,
470
+ "eval_f1": 0.8375066595631327,
471
+ "eval_loss": 0.34862977266311646,
472
+ "eval_precision": 0.903448275862069,
473
+ "eval_recall": 0.7805362462760675,
474
+ "eval_runtime": 14.7094,
475
+ "eval_samples_per_second": 266.428,
476
+ "eval_steps_per_second": 16.656,
477
+ "step": 2600
478
+ },
479
+ {
480
+ "epoch": 7.35,
481
+ "learning_rate": 5.1497810537392844e-05,
482
+ "loss": 0.2087,
483
+ "step": 2700
484
+ },
485
+ {
486
+ "epoch": 7.35,
487
+ "eval_accuracy": 0.836948201071702,
488
+ "eval_f1": 0.8365310821181888,
489
+ "eval_loss": 0.3970935046672821,
490
+ "eval_precision": 0.862796833773087,
491
+ "eval_recall": 0.8118172790466733,
492
+ "eval_runtime": 15.0606,
493
+ "eval_samples_per_second": 260.215,
494
+ "eval_steps_per_second": 16.268,
495
+ "step": 2700
496
+ },
497
+ {
498
+ "epoch": 7.62,
499
+ "learning_rate": 4.8644801304128374e-05,
500
+ "loss": 0.2035,
501
+ "step": 2800
502
+ },
503
+ {
504
+ "epoch": 7.62,
505
+ "eval_accuracy": 0.878285276856341,
506
+ "eval_f1": 0.8830595734248591,
507
+ "eval_loss": 0.31056222319602966,
508
+ "eval_precision": 0.8721549636803874,
509
+ "eval_recall": 0.894240317775571,
510
+ "eval_runtime": 15.0478,
511
+ "eval_samples_per_second": 260.437,
512
+ "eval_steps_per_second": 16.281,
513
+ "step": 2800
514
+ },
515
+ {
516
+ "epoch": 7.89,
517
+ "learning_rate": 4.57962044234053e-05,
518
+ "loss": 0.2116,
519
+ "step": 2900
520
+ },
521
+ {
522
+ "epoch": 7.89,
523
+ "eval_accuracy": 0.8451135493748405,
524
+ "eval_f1": 0.8428682371214082,
525
+ "eval_loss": 0.3734387159347534,
526
+ "eval_precision": 0.8804759329367225,
527
+ "eval_recall": 0.8083416087388282,
528
+ "eval_runtime": 15.0966,
529
+ "eval_samples_per_second": 259.594,
530
+ "eval_steps_per_second": 16.229,
531
+ "step": 2900
532
+ },
533
+ {
534
+ "epoch": 8.16,
535
+ "learning_rate": 4.2961294560056445e-05,
536
+ "loss": 0.1956,
537
+ "step": 3000
538
+ },
539
+ {
540
+ "epoch": 8.16,
541
+ "eval_accuracy": 0.85914774177086,
542
+ "eval_f1": 0.863298662704309,
543
+ "eval_loss": 0.3442569077014923,
544
+ "eval_precision": 0.8611660079051383,
545
+ "eval_recall": 0.865441906653426,
546
+ "eval_runtime": 15.4752,
547
+ "eval_samples_per_second": 253.245,
548
+ "eval_steps_per_second": 15.832,
549
+ "step": 3000
550
+ },
551
+ {
552
+ "epoch": 8.44,
553
+ "learning_rate": 4.01493018157476e-05,
554
+ "loss": 0.1826,
555
+ "step": 3100
556
+ },
557
+ {
558
+ "epoch": 8.44,
559
+ "eval_accuracy": 0.8423067108956367,
560
+ "eval_f1": 0.8373684210526314,
561
+ "eval_loss": 0.3795164227485657,
562
+ "eval_precision": 0.8908174692049272,
563
+ "eval_recall": 0.7899702085402185,
564
+ "eval_runtime": 15.3206,
565
+ "eval_samples_per_second": 255.8,
566
+ "eval_steps_per_second": 15.992,
567
+ "step": 3100
568
+ },
569
+ {
570
+ "epoch": 8.71,
571
+ "learning_rate": 3.7369381676954284e-05,
572
+ "loss": 0.1918,
573
+ "step": 3200
574
+ },
575
+ {
576
+ "epoch": 8.71,
577
+ "eval_accuracy": 0.8581270732329676,
578
+ "eval_f1": 0.8569222851260936,
579
+ "eval_loss": 0.33620110154151917,
580
+ "eval_precision": 0.8894230769230769,
581
+ "eval_recall": 0.8267130089374379,
582
+ "eval_runtime": 14.574,
583
+ "eval_samples_per_second": 268.904,
584
+ "eval_steps_per_second": 16.811,
585
+ "step": 3200
586
+ },
587
+ {
588
+ "epoch": 8.98,
589
+ "learning_rate": 3.463058520587625e-05,
590
+ "loss": 0.1886,
591
+ "step": 3300
592
+ },
593
+ {
594
+ "epoch": 8.98,
595
+ "eval_accuracy": 0.8639959173258485,
596
+ "eval_f1": 0.8692666176109884,
597
+ "eval_loss": 0.3259018063545227,
598
+ "eval_precision": 0.8589432864760058,
599
+ "eval_recall": 0.8798411122144985,
600
+ "eval_runtime": 14.5325,
601
+ "eval_samples_per_second": 269.671,
602
+ "eval_steps_per_second": 16.859,
603
+ "step": 3300
604
+ },
605
+ {
606
+ "epoch": 9.25,
607
+ "learning_rate": 3.194182957134365e-05,
608
+ "loss": 0.1716,
609
+ "step": 3400
610
+ },
611
+ {
612
+ "epoch": 9.25,
613
+ "eval_accuracy": 0.846389385047206,
614
+ "eval_f1": 0.8481331987891019,
615
+ "eval_loss": 0.4269343912601471,
616
+ "eval_precision": 0.862051282051282,
617
+ "eval_recall": 0.8346573982125124,
618
+ "eval_runtime": 15.0706,
619
+ "eval_samples_per_second": 260.043,
620
+ "eval_steps_per_second": 16.257,
621
+ "step": 3400
622
+ },
623
+ {
624
+ "epoch": 9.52,
625
+ "learning_rate": 2.9311869015663125e-05,
626
+ "loss": 0.1654,
627
+ "step": 3500
628
+ },
629
+ {
630
+ "epoch": 9.52,
631
+ "eval_accuracy": 0.859658076039806,
632
+ "eval_f1": 0.858974358974359,
633
+ "eval_loss": 0.40663468837738037,
634
+ "eval_precision": 0.8881230116648993,
635
+ "eval_recall": 0.8316782522343595,
636
+ "eval_runtime": 13.6261,
637
+ "eval_samples_per_second": 287.609,
638
+ "eval_steps_per_second": 17.98,
639
+ "step": 3500
640
+ },
641
+ {
642
+ "epoch": 9.8,
643
+ "learning_rate": 2.674926635193134e-05,
644
+ "loss": 0.1625,
645
+ "step": 3600
646
+ },
647
+ {
648
+ "epoch": 9.8,
649
+ "eval_accuracy": 0.8512375606021945,
650
+ "eval_f1": 0.8488462535649469,
651
+ "eval_loss": 0.3926689624786377,
652
+ "eval_precision": 0.8882257189365166,
653
+ "eval_recall": 0.8128103277060575,
654
+ "eval_runtime": 15.0764,
655
+ "eval_samples_per_second": 259.943,
656
+ "eval_steps_per_second": 16.251,
657
+ "step": 3600
658
+ },
659
+ {
660
+ "epoch": 10.07,
661
+ "learning_rate": 2.42623650846177e-05,
662
+ "loss": 0.1659,
663
+ "step": 3700
664
+ },
665
+ {
666
+ "epoch": 10.07,
667
+ "eval_accuracy": 0.8548099004848175,
668
+ "eval_f1": 0.8529335745670716,
669
+ "eval_loss": 0.37967267632484436,
670
+ "eval_precision": 0.889487870619946,
671
+ "eval_recall": 0.8192651439920556,
672
+ "eval_runtime": 14.8402,
673
+ "eval_samples_per_second": 264.08,
674
+ "eval_steps_per_second": 16.509,
675
+ "step": 3700
676
+ },
677
+ {
678
+ "epoch": 10.34,
679
+ "learning_rate": 2.1859262244187556e-05,
680
+ "loss": 0.1519,
681
+ "step": 3800
682
+ },
683
+ {
684
+ "epoch": 10.34,
685
+ "eval_accuracy": 0.8512375606021945,
686
+ "eval_f1": 0.8502440277421012,
687
+ "eval_loss": 0.4088890254497528,
688
+ "eval_precision": 0.8807876530069185,
689
+ "eval_recall": 0.8217477656405164,
690
+ "eval_runtime": 15.4244,
691
+ "eval_samples_per_second": 254.079,
692
+ "eval_steps_per_second": 15.884,
693
+ "step": 3800
694
+ },
695
+ {
696
+ "epoch": 10.61,
697
+ "learning_rate": 1.9547782024213047e-05,
698
+ "loss": 0.1484,
699
+ "step": 3900
700
+ },
701
+ {
702
+ "epoch": 10.61,
703
+ "eval_accuracy": 0.8545547333503445,
704
+ "eval_f1": 0.8533950617283951,
705
+ "eval_loss": 0.3864934742450714,
706
+ "eval_precision": 0.8852721451440768,
707
+ "eval_recall": 0.823733862959285,
708
+ "eval_runtime": 14.9541,
709
+ "eval_samples_per_second": 262.069,
710
+ "eval_steps_per_second": 16.383,
711
+ "step": 3900
712
+ },
713
+ {
714
+ "epoch": 10.88,
715
+ "learning_rate": 1.7335450306805827e-05,
716
+ "loss": 0.1427,
717
+ "step": 4000
718
+ },
719
+ {
720
+ "epoch": 10.88,
721
+ "eval_accuracy": 0.846389385047206,
722
+ "eval_f1": 0.8427377220480669,
723
+ "eval_loss": 0.4346730411052704,
724
+ "eval_precision": 0.8891951488423374,
725
+ "eval_recall": 0.8008937437934459,
726
+ "eval_runtime": 15.9395,
727
+ "eval_samples_per_second": 245.867,
728
+ "eval_steps_per_second": 15.371,
729
+ "step": 4000
730
+ },
731
+ {
732
+ "epoch": 11.16,
733
+ "learning_rate": 1.522947015931348e-05,
734
+ "loss": 0.1375,
735
+ "step": 4100
736
+ },
737
+ {
738
+ "epoch": 11.16,
739
+ "eval_accuracy": 0.8548099004848175,
740
+ "eval_f1": 0.8532370389476398,
741
+ "eval_loss": 0.4687642753124237,
742
+ "eval_precision": 0.8878153515834675,
743
+ "eval_recall": 0.8212512413108243,
744
+ "eval_runtime": 14.957,
745
+ "eval_samples_per_second": 262.017,
746
+ "eval_steps_per_second": 16.38,
747
+ "step": 4100
748
+ },
749
+ {
750
+ "epoch": 11.43,
751
+ "learning_rate": 1.3236698382059287e-05,
752
+ "loss": 0.1276,
753
+ "step": 4200
754
+ },
755
+ {
756
+ "epoch": 11.43,
757
+ "eval_accuracy": 0.846899719316152,
758
+ "eval_f1": 0.8426023084994753,
759
+ "eval_loss": 0.4686568081378937,
760
+ "eval_precision": 0.8932146829810901,
761
+ "eval_recall": 0.7974180734856008,
762
+ "eval_runtime": 13.9054,
763
+ "eval_samples_per_second": 281.833,
764
+ "eval_steps_per_second": 17.619,
765
+ "step": 4200
766
+ },
767
+ {
768
+ "epoch": 11.7,
769
+ "learning_rate": 1.1363623183482775e-05,
770
+ "loss": 0.1275,
771
+ "step": 4300
772
+ },
773
+ {
774
+ "epoch": 11.7,
775
+ "eval_accuracy": 0.8486858892574637,
776
+ "eval_f1": 0.8447237496726892,
777
+ "eval_loss": 0.4493071436882019,
778
+ "eval_precision": 0.8936288088642659,
779
+ "eval_recall": 0.8008937437934459,
780
+ "eval_runtime": 14.5625,
781
+ "eval_samples_per_second": 269.116,
782
+ "eval_steps_per_second": 16.824,
783
+ "step": 4300
784
+ },
785
+ {
786
+ "epoch": 11.97,
787
+ "learning_rate": 9.616343055368083e-06,
788
+ "loss": 0.1349,
789
+ "step": 4400
790
+ },
791
+ {
792
+ "epoch": 11.97,
793
+ "eval_accuracy": 0.8423067108956367,
794
+ "eval_f1": 0.8360742705570292,
795
+ "eval_loss": 0.4618338346481323,
796
+ "eval_precision": 0.89749430523918,
797
+ "eval_recall": 0.7825223435948362,
798
+ "eval_runtime": 14.9603,
799
+ "eval_samples_per_second": 261.96,
800
+ "eval_steps_per_second": 16.377,
801
+ "step": 4400
802
+ },
803
+ {
804
+ "epoch": 12.24,
805
+ "learning_rate": 8.000546916939644e-06,
806
+ "loss": 0.1217,
807
+ "step": 4500
808
+ },
809
+ {
810
+ "epoch": 12.24,
811
+ "eval_accuracy": 0.849706557795356,
812
+ "eval_f1": 0.8450407787424362,
813
+ "eval_loss": 0.4635533094406128,
814
+ "eval_precision": 0.8987129266927812,
815
+ "eval_recall": 0.7974180734856008,
816
+ "eval_runtime": 15.9792,
817
+ "eval_samples_per_second": 245.256,
818
+ "eval_steps_per_second": 15.332,
819
+ "step": 4500
820
+ },
821
+ {
822
+ "epoch": 12.52,
823
+ "learning_rate": 6.521495592473259e-06,
824
+ "loss": 0.1211,
825
+ "step": 4600
826
+ },
827
+ {
828
+ "epoch": 12.52,
829
+ "eval_accuracy": 0.8555754018882368,
830
+ "eval_f1": 0.855316973415133,
831
+ "eval_loss": 0.45266029238700867,
832
+ "eval_precision": 0.8814541622760801,
833
+ "eval_recall": 0.8306852035749752,
834
+ "eval_runtime": 15.4568,
835
+ "eval_samples_per_second": 253.545,
836
+ "eval_steps_per_second": 15.851,
837
+ "step": 4600
838
+ },
839
+ {
840
+ "epoch": 12.79,
841
+ "learning_rate": 5.184004682729348e-06,
842
+ "loss": 0.1164,
843
+ "step": 4700
844
+ },
845
+ {
846
+ "epoch": 12.79,
847
+ "eval_accuracy": 0.8545547333503445,
848
+ "eval_f1": 0.851639770952629,
849
+ "eval_loss": 0.46692270040512085,
850
+ "eval_precision": 0.8949671772428884,
851
+ "eval_recall": 0.8123138033763655,
852
+ "eval_runtime": 16.339,
853
+ "eval_samples_per_second": 239.856,
854
+ "eval_steps_per_second": 14.995,
855
+ "step": 4700
856
+ },
857
+ {
858
+ "epoch": 13.06,
859
+ "learning_rate": 3.992428885976652e-06,
860
+ "loss": 0.1119,
861
+ "step": 4800
862
+ },
863
+ {
864
+ "epoch": 13.06,
865
+ "eval_accuracy": 0.8517478948711406,
866
+ "eval_f1": 0.8495987574424021,
867
+ "eval_loss": 0.461725115776062,
868
+ "eval_precision": 0.8875067604110329,
869
+ "eval_recall": 0.8147964250248262,
870
+ "eval_runtime": 15.7126,
871
+ "eval_samples_per_second": 249.418,
872
+ "eval_steps_per_second": 15.593,
873
+ "step": 4800
874
+ },
875
+ {
876
+ "epoch": 13.33,
877
+ "learning_rate": 2.9506478196551055e-06,
878
+ "loss": 0.11,
879
+ "step": 4900
880
+ },
881
+ {
882
+ "epoch": 13.33,
883
+ "eval_accuracy": 0.8507272263332483,
884
+ "eval_f1": 0.848012470771629,
885
+ "eval_loss": 0.4718102216720581,
886
+ "eval_precision": 0.8893732970027248,
887
+ "eval_recall": 0.8103277060575969,
888
+ "eval_runtime": 15.1226,
889
+ "eval_samples_per_second": 259.149,
890
+ "eval_steps_per_second": 16.201,
891
+ "step": 4900
892
+ },
893
+ {
894
+ "epoch": 13.61,
895
+ "learning_rate": 2.062053388840768e-06,
896
+ "loss": 0.1138,
897
+ "step": 5000
898
+ },
899
+ {
900
+ "epoch": 13.61,
901
+ "eval_accuracy": 0.8479203878540444,
902
+ "eval_f1": 0.8437336130047195,
903
+ "eval_loss": 0.48920777440071106,
904
+ "eval_precision": 0.8938888888888888,
905
+ "eval_recall": 0.7989076464746773,
906
+ "eval_runtime": 15.5429,
907
+ "eval_samples_per_second": 252.14,
908
+ "eval_steps_per_second": 15.763,
909
+ "step": 5000
910
+ },
911
+ {
912
+ "epoch": 13.88,
913
+ "learning_rate": 1.329538742639358e-06,
914
+ "loss": 0.1058,
915
+ "step": 5100
916
+ },
917
+ {
918
+ "epoch": 13.88,
919
+ "eval_accuracy": 0.849961724929829,
920
+ "eval_f1": 0.8474312402698495,
921
+ "eval_loss": 0.47247427701950073,
922
+ "eval_precision": 0.8875,
923
+ "eval_recall": 0.810824230387289,
924
+ "eval_runtime": 16.0011,
925
+ "eval_samples_per_second": 244.92,
926
+ "eval_steps_per_second": 15.311,
927
+ "step": 5100
928
+ },
929
+ {
930
+ "epoch": 14.15,
931
+ "learning_rate": 7.554888544652305e-07,
932
+ "loss": 0.1042,
933
+ "step": 5200
934
+ },
935
+ {
936
+ "epoch": 14.15,
937
+ "eval_accuracy": 0.849706557795356,
938
+ "eval_f1": 0.8464946572843368,
939
+ "eval_loss": 0.47878143191337585,
940
+ "eval_precision": 0.8908392759188152,
941
+ "eval_recall": 0.8063555114200596,
942
+ "eval_runtime": 15.3441,
943
+ "eval_samples_per_second": 255.408,
944
+ "eval_steps_per_second": 15.967,
945
+ "step": 5200
946
+ },
947
+ {
948
+ "epoch": 14.42,
949
+ "learning_rate": 3.4177275687476974e-07,
950
+ "loss": 0.107,
951
+ "step": 5300
952
+ },
953
+ {
954
+ "epoch": 14.42,
955
+ "eval_accuracy": 0.849706557795356,
956
+ "eval_f1": 0.8466545170528509,
957
+ "eval_loss": 0.4759483337402344,
958
+ "eval_precision": 0.8899835796387521,
959
+ "eval_recall": 0.8073485600794439,
960
+ "eval_runtime": 15.151,
961
+ "eval_samples_per_second": 258.663,
962
+ "eval_steps_per_second": 16.171,
963
+ "step": 5300
964
+ },
965
+ {
966
+ "epoch": 14.69,
967
+ "learning_rate": 8.973745623699903e-08,
968
+ "loss": 0.1047,
969
+ "step": 5400
970
+ },
971
+ {
972
+ "epoch": 14.69,
973
+ "eval_accuracy": 0.8489410563919367,
974
+ "eval_f1": 0.8458333333333334,
975
+ "eval_loss": 0.4766782820224762,
976
+ "eval_precision": 0.8893756845564075,
977
+ "eval_recall": 0.8063555114200596,
978
+ "eval_runtime": 15.3016,
979
+ "eval_samples_per_second": 256.118,
980
+ "eval_steps_per_second": 16.011,
981
+ "step": 5400
982
+ },
983
+ {
984
+ "epoch": 14.97,
985
+ "learning_rate": 2.0354705417280351e-10,
986
+ "loss": 0.1085,
987
+ "step": 5500
988
+ },
989
+ {
990
+ "epoch": 14.97,
991
+ "eval_accuracy": 0.8489410563919367,
992
+ "eval_f1": 0.8458333333333334,
993
+ "eval_loss": 0.47687891125679016,
994
+ "eval_precision": 0.8893756845564075,
995
+ "eval_recall": 0.8063555114200596,
996
+ "eval_runtime": 15.647,
997
+ "eval_samples_per_second": 250.463,
998
+ "eval_steps_per_second": 15.658,
999
+ "step": 5500
1000
+ },
1001
+ {
1002
+ "epoch": 14.98,
1003
+ "step": 5505,
1004
+ "total_flos": 2.046979800964418e+20,
1005
+ "train_loss": 0.2219778089443193,
1006
+ "train_runtime": 55065.7639,
1007
+ "train_samples_per_second": 9.607,
1008
+ "train_steps_per_second": 0.1
1009
+ }
1010
+ ],
1011
+ "logging_steps": 100,
1012
+ "max_steps": 5505,
1013
+ "num_input_tokens_seen": 0,
1014
+ "num_train_epochs": 15,
1015
+ "save_steps": 100,
1016
+ "total_flos": 2.046979800964418e+20,
1017
+ "train_batch_size": 16,
1018
+ "trial_name": null,
1019
+ "trial_params": null
1020
+ }