GlycerinLOL commited on
Commit
5724555
1 Parent(s): c8ee3bb

End of training

Browse files
Files changed (3) hide show
  1. all_results.json +7 -0
  2. train_results.json +7 -0
  3. trainer_state.json +484 -0
all_results.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 15.99,
3
+ "train_loss": 1.050457198154915,
4
+ "train_runtime": 71670.9422,
5
+ "train_samples_per_second": 22.324,
6
+ "train_steps_per_second": 0.232
7
+ }
train_results.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 15.99,
3
+ "train_loss": 1.050457198154915,
4
+ "train_runtime": 71670.9422,
5
+ "train_samples_per_second": 22.324,
6
+ "train_steps_per_second": 0.232
7
+ }
trainer_state.json ADDED
@@ -0,0 +1,484 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 15.988480921526278,
5
+ "eval_steps": 500,
6
+ "global_step": 16656,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.48,
13
+ "learning_rate": 1.9399615754082615e-05,
14
+ "loss": 1.8112,
15
+ "step": 500
16
+ },
17
+ {
18
+ "epoch": 0.96,
19
+ "learning_rate": 1.879923150816523e-05,
20
+ "loss": 1.6328,
21
+ "step": 1000
22
+ },
23
+ {
24
+ "epoch": 1.0,
25
+ "eval_f1": 0.9029,
26
+ "eval_gen_len": 19.87818181818182,
27
+ "eval_loss": 1.4800708293914795,
28
+ "eval_precision": 0.9134,
29
+ "eval_recall": 0.893,
30
+ "eval_rouge1": 0.448,
31
+ "eval_rouge2": 0.2243,
32
+ "eval_rougeL": 0.385,
33
+ "eval_rougeLsum": 0.385,
34
+ "eval_runtime": 603.3554,
35
+ "eval_samples_per_second": 9.116,
36
+ "eval_steps_per_second": 0.57,
37
+ "step": 1041
38
+ },
39
+ {
40
+ "epoch": 1.44,
41
+ "learning_rate": 1.819884726224784e-05,
42
+ "loss": 1.4991,
43
+ "step": 1500
44
+ },
45
+ {
46
+ "epoch": 1.92,
47
+ "learning_rate": 1.7598463016330453e-05,
48
+ "loss": 1.4598,
49
+ "step": 2000
50
+ },
51
+ {
52
+ "epoch": 2.0,
53
+ "eval_f1": 0.9022,
54
+ "eval_gen_len": 19.934363636363635,
55
+ "eval_loss": 1.405110478401184,
56
+ "eval_precision": 0.9147,
57
+ "eval_recall": 0.8903,
58
+ "eval_rouge1": 0.4428,
59
+ "eval_rouge2": 0.2273,
60
+ "eval_rougeL": 0.3851,
61
+ "eval_rougeLsum": 0.385,
62
+ "eval_runtime": 669.8531,
63
+ "eval_samples_per_second": 8.211,
64
+ "eval_steps_per_second": 0.514,
65
+ "step": 2083
66
+ },
67
+ {
68
+ "epoch": 2.4,
69
+ "learning_rate": 1.6998078770413066e-05,
70
+ "loss": 1.3652,
71
+ "step": 2500
72
+ },
73
+ {
74
+ "epoch": 2.88,
75
+ "learning_rate": 1.6397694524495677e-05,
76
+ "loss": 1.3402,
77
+ "step": 3000
78
+ },
79
+ {
80
+ "epoch": 3.0,
81
+ "eval_f1": 0.9034,
82
+ "eval_gen_len": 19.95,
83
+ "eval_loss": 1.3839877843856812,
84
+ "eval_precision": 0.9158,
85
+ "eval_recall": 0.8918,
86
+ "eval_rouge1": 0.4498,
87
+ "eval_rouge2": 0.2318,
88
+ "eval_rougeL": 0.3921,
89
+ "eval_rougeLsum": 0.392,
90
+ "eval_runtime": 670.3562,
91
+ "eval_samples_per_second": 8.205,
92
+ "eval_steps_per_second": 0.513,
93
+ "step": 3125
94
+ },
95
+ {
96
+ "epoch": 3.36,
97
+ "learning_rate": 1.579731027857829e-05,
98
+ "loss": 1.2679,
99
+ "step": 3500
100
+ },
101
+ {
102
+ "epoch": 3.84,
103
+ "learning_rate": 1.5196926032660904e-05,
104
+ "loss": 1.2446,
105
+ "step": 4000
106
+ },
107
+ {
108
+ "epoch": 4.0,
109
+ "eval_f1": 0.9054,
110
+ "eval_gen_len": 19.884,
111
+ "eval_loss": 1.3682185411453247,
112
+ "eval_precision": 0.9169,
113
+ "eval_recall": 0.8944,
114
+ "eval_rouge1": 0.4604,
115
+ "eval_rouge2": 0.2405,
116
+ "eval_rougeL": 0.4014,
117
+ "eval_rougeLsum": 0.4014,
118
+ "eval_runtime": 577.6339,
119
+ "eval_samples_per_second": 9.522,
120
+ "eval_steps_per_second": 0.596,
121
+ "step": 4167
122
+ },
123
+ {
124
+ "epoch": 4.32,
125
+ "learning_rate": 1.4596541786743516e-05,
126
+ "loss": 1.1877,
127
+ "step": 4500
128
+ },
129
+ {
130
+ "epoch": 4.8,
131
+ "learning_rate": 1.399615754082613e-05,
132
+ "loss": 1.1651,
133
+ "step": 5000
134
+ },
135
+ {
136
+ "epoch": 5.0,
137
+ "eval_f1": 0.9055,
138
+ "eval_gen_len": 19.894,
139
+ "eval_loss": 1.3695330619812012,
140
+ "eval_precision": 0.9173,
141
+ "eval_recall": 0.8942,
142
+ "eval_rouge1": 0.4594,
143
+ "eval_rouge2": 0.2401,
144
+ "eval_rougeL": 0.3995,
145
+ "eval_rougeLsum": 0.3995,
146
+ "eval_runtime": 669.362,
147
+ "eval_samples_per_second": 8.217,
148
+ "eval_steps_per_second": 0.514,
149
+ "step": 5208
150
+ },
151
+ {
152
+ "epoch": 5.28,
153
+ "learning_rate": 1.3395773294908743e-05,
154
+ "loss": 1.1201,
155
+ "step": 5500
156
+ },
157
+ {
158
+ "epoch": 5.76,
159
+ "learning_rate": 1.2795389048991355e-05,
160
+ "loss": 1.1002,
161
+ "step": 6000
162
+ },
163
+ {
164
+ "epoch": 6.0,
165
+ "eval_f1": 0.9053,
166
+ "eval_gen_len": 19.91181818181818,
167
+ "eval_loss": 1.3782570362091064,
168
+ "eval_precision": 0.9166,
169
+ "eval_recall": 0.8945,
170
+ "eval_rouge1": 0.4607,
171
+ "eval_rouge2": 0.2423,
172
+ "eval_rougeL": 0.4014,
173
+ "eval_rougeLsum": 0.4014,
174
+ "eval_runtime": 671.1543,
175
+ "eval_samples_per_second": 8.195,
176
+ "eval_steps_per_second": 0.513,
177
+ "step": 6250
178
+ },
179
+ {
180
+ "epoch": 6.24,
181
+ "learning_rate": 1.2195004803073969e-05,
182
+ "loss": 1.0653,
183
+ "step": 6500
184
+ },
185
+ {
186
+ "epoch": 6.72,
187
+ "learning_rate": 1.1594620557156582e-05,
188
+ "loss": 1.0427,
189
+ "step": 7000
190
+ },
191
+ {
192
+ "epoch": 7.0,
193
+ "eval_f1": 0.9056,
194
+ "eval_gen_len": 19.907454545454545,
195
+ "eval_loss": 1.3850913047790527,
196
+ "eval_precision": 0.9172,
197
+ "eval_recall": 0.8946,
198
+ "eval_rouge1": 0.462,
199
+ "eval_rouge2": 0.2432,
200
+ "eval_rougeL": 0.4028,
201
+ "eval_rougeLsum": 0.4028,
202
+ "eval_runtime": 669.8936,
203
+ "eval_samples_per_second": 8.21,
204
+ "eval_steps_per_second": 0.514,
205
+ "step": 7292
206
+ },
207
+ {
208
+ "epoch": 7.2,
209
+ "learning_rate": 1.0994236311239194e-05,
210
+ "loss": 1.0163,
211
+ "step": 7500
212
+ },
213
+ {
214
+ "epoch": 7.68,
215
+ "learning_rate": 1.0393852065321808e-05,
216
+ "loss": 0.9881,
217
+ "step": 8000
218
+ },
219
+ {
220
+ "epoch": 8.0,
221
+ "eval_f1": 0.9059,
222
+ "eval_gen_len": 19.907090909090908,
223
+ "eval_loss": 1.3910883665084839,
224
+ "eval_precision": 0.9177,
225
+ "eval_recall": 0.8947,
226
+ "eval_rouge1": 0.4635,
227
+ "eval_rouge2": 0.2442,
228
+ "eval_rougeL": 0.4038,
229
+ "eval_rougeLsum": 0.4037,
230
+ "eval_runtime": 573.3321,
231
+ "eval_samples_per_second": 9.593,
232
+ "eval_steps_per_second": 0.6,
233
+ "step": 8334
234
+ },
235
+ {
236
+ "epoch": 8.16,
237
+ "learning_rate": 9.79346781940442e-06,
238
+ "loss": 0.9742,
239
+ "step": 8500
240
+ },
241
+ {
242
+ "epoch": 8.64,
243
+ "learning_rate": 9.193083573487034e-06,
244
+ "loss": 0.9435,
245
+ "step": 9000
246
+ },
247
+ {
248
+ "epoch": 9.0,
249
+ "eval_f1": 0.9067,
250
+ "eval_gen_len": 19.880545454545455,
251
+ "eval_loss": 1.4075220823287964,
252
+ "eval_precision": 0.918,
253
+ "eval_recall": 0.8959,
254
+ "eval_rouge1": 0.468,
255
+ "eval_rouge2": 0.2471,
256
+ "eval_rougeL": 0.4085,
257
+ "eval_rougeLsum": 0.4084,
258
+ "eval_runtime": 599.9366,
259
+ "eval_samples_per_second": 9.168,
260
+ "eval_steps_per_second": 0.573,
261
+ "step": 9375
262
+ },
263
+ {
264
+ "epoch": 9.12,
265
+ "learning_rate": 8.592699327569645e-06,
266
+ "loss": 0.9362,
267
+ "step": 9500
268
+ },
269
+ {
270
+ "epoch": 9.6,
271
+ "learning_rate": 7.992315081652257e-06,
272
+ "loss": 0.9035,
273
+ "step": 10000
274
+ },
275
+ {
276
+ "epoch": 10.0,
277
+ "eval_f1": 0.9064,
278
+ "eval_gen_len": 19.881090909090908,
279
+ "eval_loss": 1.412468671798706,
280
+ "eval_precision": 0.9178,
281
+ "eval_recall": 0.8957,
282
+ "eval_rouge1": 0.4675,
283
+ "eval_rouge2": 0.248,
284
+ "eval_rougeL": 0.4085,
285
+ "eval_rougeLsum": 0.4086,
286
+ "eval_runtime": 566.1377,
287
+ "eval_samples_per_second": 9.715,
288
+ "eval_steps_per_second": 0.608,
289
+ "step": 10417
290
+ },
291
+ {
292
+ "epoch": 10.08,
293
+ "learning_rate": 7.391930835734871e-06,
294
+ "loss": 0.9014,
295
+ "step": 10500
296
+ },
297
+ {
298
+ "epoch": 10.56,
299
+ "learning_rate": 6.791546589817484e-06,
300
+ "loss": 0.8702,
301
+ "step": 11000
302
+ },
303
+ {
304
+ "epoch": 11.0,
305
+ "eval_f1": 0.9063,
306
+ "eval_gen_len": 19.894727272727273,
307
+ "eval_loss": 1.4218909740447998,
308
+ "eval_precision": 0.9181,
309
+ "eval_recall": 0.895,
310
+ "eval_rouge1": 0.4646,
311
+ "eval_rouge2": 0.2455,
312
+ "eval_rougeL": 0.405,
313
+ "eval_rougeLsum": 0.4051,
314
+ "eval_runtime": 670.3799,
315
+ "eval_samples_per_second": 8.204,
316
+ "eval_steps_per_second": 0.513,
317
+ "step": 11459
318
+ },
319
+ {
320
+ "epoch": 11.04,
321
+ "learning_rate": 6.191162343900097e-06,
322
+ "loss": 0.8741,
323
+ "step": 11500
324
+ },
325
+ {
326
+ "epoch": 11.52,
327
+ "learning_rate": 5.590778097982709e-06,
328
+ "loss": 0.8395,
329
+ "step": 12000
330
+ },
331
+ {
332
+ "epoch": 12.0,
333
+ "learning_rate": 4.990393852065322e-06,
334
+ "loss": 0.8458,
335
+ "step": 12500
336
+ },
337
+ {
338
+ "epoch": 12.0,
339
+ "eval_f1": 0.9061,
340
+ "eval_gen_len": 19.898545454545456,
341
+ "eval_loss": 1.4338867664337158,
342
+ "eval_precision": 0.9177,
343
+ "eval_recall": 0.8952,
344
+ "eval_rouge1": 0.4643,
345
+ "eval_rouge2": 0.2447,
346
+ "eval_rougeL": 0.4055,
347
+ "eval_rougeLsum": 0.4055,
348
+ "eval_runtime": 670.6829,
349
+ "eval_samples_per_second": 8.201,
350
+ "eval_steps_per_second": 0.513,
351
+ "step": 12501
352
+ },
353
+ {
354
+ "epoch": 12.48,
355
+ "learning_rate": 4.390009606147935e-06,
356
+ "loss": 0.8172,
357
+ "step": 13000
358
+ },
359
+ {
360
+ "epoch": 12.96,
361
+ "learning_rate": 3.7896253602305477e-06,
362
+ "loss": 0.8207,
363
+ "step": 13500
364
+ },
365
+ {
366
+ "epoch": 13.0,
367
+ "eval_f1": 0.9064,
368
+ "eval_gen_len": 19.905272727272727,
369
+ "eval_loss": 1.44303560256958,
370
+ "eval_precision": 0.9182,
371
+ "eval_recall": 0.8952,
372
+ "eval_rouge1": 0.4671,
373
+ "eval_rouge2": 0.2463,
374
+ "eval_rougeL": 0.4068,
375
+ "eval_rougeLsum": 0.4069,
376
+ "eval_runtime": 650.7057,
377
+ "eval_samples_per_second": 8.452,
378
+ "eval_steps_per_second": 0.529,
379
+ "step": 13542
380
+ },
381
+ {
382
+ "epoch": 13.44,
383
+ "learning_rate": 3.189241114313161e-06,
384
+ "loss": 0.8006,
385
+ "step": 14000
386
+ },
387
+ {
388
+ "epoch": 13.92,
389
+ "learning_rate": 2.5888568683957737e-06,
390
+ "loss": 0.7987,
391
+ "step": 14500
392
+ },
393
+ {
394
+ "epoch": 14.0,
395
+ "eval_f1": 0.9059,
396
+ "eval_gen_len": 19.918,
397
+ "eval_loss": 1.449475646018982,
398
+ "eval_precision": 0.9179,
399
+ "eval_recall": 0.8944,
400
+ "eval_rouge1": 0.4633,
401
+ "eval_rouge2": 0.2455,
402
+ "eval_rougeL": 0.4046,
403
+ "eval_rougeLsum": 0.4047,
404
+ "eval_runtime": 661.0314,
405
+ "eval_samples_per_second": 8.32,
406
+ "eval_steps_per_second": 0.52,
407
+ "step": 14584
408
+ },
409
+ {
410
+ "epoch": 14.4,
411
+ "learning_rate": 1.988472622478386e-06,
412
+ "loss": 0.7843,
413
+ "step": 15000
414
+ },
415
+ {
416
+ "epoch": 14.88,
417
+ "learning_rate": 1.3880883765609993e-06,
418
+ "loss": 0.787,
419
+ "step": 15500
420
+ },
421
+ {
422
+ "epoch": 15.0,
423
+ "eval_f1": 0.9064,
424
+ "eval_gen_len": 19.895636363636363,
425
+ "eval_loss": 1.4560260772705078,
426
+ "eval_precision": 0.9182,
427
+ "eval_recall": 0.8953,
428
+ "eval_rouge1": 0.4666,
429
+ "eval_rouge2": 0.2471,
430
+ "eval_rougeL": 0.407,
431
+ "eval_rougeLsum": 0.4072,
432
+ "eval_runtime": 670.9962,
433
+ "eval_samples_per_second": 8.197,
434
+ "eval_steps_per_second": 0.513,
435
+ "step": 15626
436
+ },
437
+ {
438
+ "epoch": 15.36,
439
+ "learning_rate": 7.87704130643612e-07,
440
+ "loss": 0.7775,
441
+ "step": 16000
442
+ },
443
+ {
444
+ "epoch": 15.84,
445
+ "learning_rate": 1.8731988472622478e-07,
446
+ "loss": 0.772,
447
+ "step": 16500
448
+ },
449
+ {
450
+ "epoch": 15.99,
451
+ "eval_f1": 0.9068,
452
+ "eval_gen_len": 19.881636363636364,
453
+ "eval_loss": 1.4622657299041748,
454
+ "eval_precision": 0.9185,
455
+ "eval_recall": 0.8957,
456
+ "eval_rouge1": 0.4678,
457
+ "eval_rouge2": 0.2472,
458
+ "eval_rougeL": 0.4081,
459
+ "eval_rougeLsum": 0.4082,
460
+ "eval_runtime": 669.6134,
461
+ "eval_samples_per_second": 8.214,
462
+ "eval_steps_per_second": 0.514,
463
+ "step": 16656
464
+ },
465
+ {
466
+ "epoch": 15.99,
467
+ "step": 16656,
468
+ "total_flos": 3.421567656204632e+18,
469
+ "train_loss": 1.050457198154915,
470
+ "train_runtime": 71670.9422,
471
+ "train_samples_per_second": 22.324,
472
+ "train_steps_per_second": 0.232
473
+ }
474
+ ],
475
+ "logging_steps": 500,
476
+ "max_steps": 16656,
477
+ "num_input_tokens_seen": 0,
478
+ "num_train_epochs": 16,
479
+ "save_steps": 500,
480
+ "total_flos": 3.421567656204632e+18,
481
+ "train_batch_size": 24,
482
+ "trial_name": null,
483
+ "trial_params": null
484
+ }