PlutoG99001 commited on
Commit
711c41b
1 Parent(s): 3c5a737

End of training

Browse files
Files changed (2) hide show
  1. README.md +3 -1
  2. trainer_state.json +83 -1035
README.md CHANGED
@@ -3,6 +3,8 @@ base_model: facebook/musicgen-melody
3
  library_name: peft
4
  license: cc-by-nc-4.0
5
  tags:
 
 
6
  - generated_from_trainer
7
  model-index:
8
  - name: Contemporary-Model
@@ -14,7 +16,7 @@ should probably proofread and complete it, then remove this comment. -->
14
 
15
  # Contemporary-Model
16
 
17
- This model is a fine-tuned version of [facebook/musicgen-melody](https://huggingface.co/facebook/musicgen-melody) on an unknown dataset.
18
 
19
  ## Model description
20
 
 
3
  library_name: peft
4
  license: cc-by-nc-4.0
5
  tags:
6
+ - text-to-audio
7
+ - Punk
8
  - generated_from_trainer
9
  model-index:
10
  - name: Contemporary-Model
 
16
 
17
  # Contemporary-Model
18
 
19
+ This model is a fine-tuned version of [facebook/musicgen-melody](https://huggingface.co/facebook/musicgen-melody) on the PlutoG99001/Contemporary dataset.
20
 
21
  ## Model description
22
 
trainer_state.json CHANGED
@@ -1,1103 +1,151 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 3.9741935483870967,
5
  "eval_steps": 500,
6
- "global_step": 308,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.025806451612903226,
13
- "grad_norm": 3.5142855644226074,
14
- "learning_rate": 0.00019870129870129872,
15
- "loss": 8.9214,
16
  "step": 2
17
  },
18
  {
19
- "epoch": 0.05161290322580645,
20
- "grad_norm": 3.772717237472534,
21
- "learning_rate": 0.00019740259740259742,
22
- "loss": 8.3405,
23
  "step": 4
24
  },
25
  {
26
- "epoch": 0.07741935483870968,
27
- "grad_norm": 6.7029924392700195,
28
- "learning_rate": 0.00019610389610389613,
29
- "loss": 8.1379,
30
  "step": 6
31
  },
32
  {
33
- "epoch": 0.1032258064516129,
34
- "grad_norm": 3.7924954891204834,
35
- "learning_rate": 0.0001948051948051948,
36
- "loss": 5.913,
37
  "step": 8
38
  },
39
  {
40
- "epoch": 0.12903225806451613,
41
- "grad_norm": 4.214364051818848,
42
- "learning_rate": 0.00019350649350649354,
43
- "loss": 7.0624,
44
  "step": 10
45
  },
46
  {
47
- "epoch": 0.15483870967741936,
48
- "grad_norm": 3.0830295085906982,
49
- "learning_rate": 0.00019220779220779222,
50
- "loss": 6.6031,
51
  "step": 12
52
  },
53
  {
54
- "epoch": 0.18064516129032257,
55
- "grad_norm": 2.293950080871582,
56
- "learning_rate": 0.00019090909090909092,
57
- "loss": 6.1543,
58
  "step": 14
59
  },
60
  {
61
- "epoch": 0.2064516129032258,
62
- "grad_norm": 2.8121275901794434,
63
- "learning_rate": 0.00018961038961038963,
64
- "loss": 5.6952,
65
  "step": 16
66
  },
67
  {
68
- "epoch": 0.23225806451612904,
69
- "grad_norm": 3.2042553424835205,
70
- "learning_rate": 0.00018831168831168833,
71
- "loss": 5.6916,
72
  "step": 18
73
  },
74
  {
75
- "epoch": 0.25806451612903225,
76
- "grad_norm": 3.4788341522216797,
77
- "learning_rate": 0.000187012987012987,
78
- "loss": 5.575,
79
  "step": 20
80
  },
81
  {
82
- "epoch": 0.2838709677419355,
83
- "grad_norm": 1.1461117267608643,
84
- "learning_rate": 0.00018571428571428572,
85
- "loss": 5.7922,
86
  "step": 22
87
  },
88
  {
89
- "epoch": 0.3096774193548387,
90
- "grad_norm": 1.7745267152786255,
91
- "learning_rate": 0.00018441558441558442,
92
- "loss": 5.5945,
93
  "step": 24
94
  },
95
  {
96
- "epoch": 0.33548387096774196,
97
- "grad_norm": 1.0893975496292114,
98
- "learning_rate": 0.00018311688311688313,
99
- "loss": 6.1593,
100
  "step": 26
101
  },
102
  {
103
- "epoch": 0.36129032258064514,
104
- "grad_norm": 2.169384241104126,
105
- "learning_rate": 0.00018181818181818183,
106
- "loss": 5.8243,
107
  "step": 28
108
  },
109
  {
110
- "epoch": 0.3870967741935484,
111
- "grad_norm": 1.8578031063079834,
112
- "learning_rate": 0.00018051948051948054,
113
- "loss": 5.5041,
114
  "step": 30
115
  },
116
  {
117
- "epoch": 0.4129032258064516,
118
- "grad_norm": 1.448111653327942,
119
- "learning_rate": 0.00017922077922077922,
120
- "loss": 5.3471,
121
  "step": 32
122
  },
123
  {
124
- "epoch": 0.43870967741935485,
125
- "grad_norm": 1.828591227531433,
126
- "learning_rate": 0.00017792207792207792,
127
- "loss": 5.0863,
128
  "step": 34
129
  },
130
  {
131
- "epoch": 0.4645161290322581,
132
- "grad_norm": 4.83806848526001,
133
- "learning_rate": 0.00017662337662337663,
134
- "loss": 6.5823,
135
- "step": 36
136
- },
137
- {
138
- "epoch": 0.49032258064516127,
139
- "grad_norm": 2.1270275115966797,
140
- "learning_rate": 0.00017532467532467534,
141
- "loss": 5.3268,
142
- "step": 38
143
- },
144
- {
145
- "epoch": 0.5161290322580645,
146
- "grad_norm": 1.42393958568573,
147
- "learning_rate": 0.00017402597402597401,
148
- "loss": 5.4203,
149
- "step": 40
150
- },
151
- {
152
- "epoch": 0.5419354838709678,
153
- "grad_norm": 2.768852472305298,
154
- "learning_rate": 0.00017272727272727275,
155
- "loss": 5.3403,
156
- "step": 42
157
- },
158
- {
159
- "epoch": 0.567741935483871,
160
- "grad_norm": 0.7816085815429688,
161
- "learning_rate": 0.00017142857142857143,
162
- "loss": 5.6736,
163
- "step": 44
164
- },
165
- {
166
- "epoch": 0.5935483870967742,
167
- "grad_norm": 0.8368009924888611,
168
- "learning_rate": 0.00017012987012987013,
169
- "loss": 5.4177,
170
- "step": 46
171
- },
172
- {
173
- "epoch": 0.6193548387096774,
174
- "grad_norm": 1.7966578006744385,
175
- "learning_rate": 0.00016883116883116884,
176
- "loss": 5.8706,
177
- "step": 48
178
- },
179
- {
180
- "epoch": 0.6451612903225806,
181
- "grad_norm": 0.694948673248291,
182
- "learning_rate": 0.00016753246753246754,
183
- "loss": 5.2186,
184
- "step": 50
185
- },
186
- {
187
- "epoch": 0.6709677419354839,
188
- "grad_norm": 2.0851364135742188,
189
- "learning_rate": 0.00016623376623376625,
190
- "loss": 5.452,
191
- "step": 52
192
- },
193
- {
194
- "epoch": 0.6967741935483871,
195
- "grad_norm": 1.705665946006775,
196
- "learning_rate": 0.00016493506493506495,
197
- "loss": 5.5373,
198
- "step": 54
199
- },
200
- {
201
- "epoch": 0.7225806451612903,
202
- "grad_norm": 1.5386658906936646,
203
- "learning_rate": 0.00016363636363636366,
204
- "loss": 5.8638,
205
- "step": 56
206
- },
207
- {
208
- "epoch": 0.7483870967741936,
209
- "grad_norm": 1.3625737428665161,
210
- "learning_rate": 0.00016233766233766234,
211
- "loss": 5.2913,
212
- "step": 58
213
- },
214
- {
215
- "epoch": 0.7741935483870968,
216
- "grad_norm": 1.357426643371582,
217
- "learning_rate": 0.00016103896103896104,
218
- "loss": 5.7317,
219
- "step": 60
220
- },
221
- {
222
- "epoch": 0.8,
223
- "grad_norm": 1.1882686614990234,
224
- "learning_rate": 0.00015974025974025975,
225
- "loss": 5.9749,
226
- "step": 62
227
- },
228
- {
229
- "epoch": 0.8258064516129032,
230
- "grad_norm": 1.0777816772460938,
231
- "learning_rate": 0.00015844155844155845,
232
- "loss": 5.4791,
233
- "step": 64
234
- },
235
- {
236
- "epoch": 0.8516129032258064,
237
- "grad_norm": 0.7732660174369812,
238
- "learning_rate": 0.00015714285714285716,
239
- "loss": 5.9732,
240
- "step": 66
241
- },
242
- {
243
- "epoch": 0.8774193548387097,
244
- "grad_norm": 1.0730479955673218,
245
- "learning_rate": 0.00015584415584415587,
246
- "loss": 5.4338,
247
- "step": 68
248
- },
249
- {
250
- "epoch": 0.9032258064516129,
251
- "grad_norm": 1.4834214448928833,
252
- "learning_rate": 0.00015454545454545454,
253
- "loss": 5.1802,
254
- "step": 70
255
- },
256
- {
257
- "epoch": 0.9290322580645162,
258
- "grad_norm": 2.2870066165924072,
259
- "learning_rate": 0.00015324675324675325,
260
- "loss": 6.3573,
261
- "step": 72
262
- },
263
- {
264
- "epoch": 0.9548387096774194,
265
- "grad_norm": 0.935196578502655,
266
- "learning_rate": 0.00015194805194805196,
267
- "loss": 5.0633,
268
- "step": 74
269
- },
270
- {
271
- "epoch": 0.9806451612903225,
272
- "grad_norm": 0.994687020778656,
273
- "learning_rate": 0.00015064935064935066,
274
- "loss": 5.8048,
275
- "step": 76
276
- },
277
- {
278
- "epoch": 1.0064516129032257,
279
- "grad_norm": 2.274411201477051,
280
- "learning_rate": 0.00014935064935064934,
281
- "loss": 4.6835,
282
- "step": 78
283
- },
284
- {
285
- "epoch": 1.032258064516129,
286
- "grad_norm": 1.2067323923110962,
287
- "learning_rate": 0.00014805194805194807,
288
- "loss": 5.4713,
289
- "step": 80
290
- },
291
- {
292
- "epoch": 1.0580645161290323,
293
- "grad_norm": 0.9473636150360107,
294
- "learning_rate": 0.00014675324675324675,
295
- "loss": 5.8508,
296
- "step": 82
297
- },
298
- {
299
- "epoch": 1.0838709677419356,
300
- "grad_norm": 5.7191996574401855,
301
- "learning_rate": 0.00014545454545454546,
302
- "loss": 4.9011,
303
- "step": 84
304
- },
305
- {
306
- "epoch": 1.1096774193548387,
307
- "grad_norm": 2.201740026473999,
308
- "learning_rate": 0.00014415584415584416,
309
- "loss": 5.4717,
310
- "step": 86
311
- },
312
- {
313
- "epoch": 1.135483870967742,
314
- "grad_norm": 1.0441229343414307,
315
- "learning_rate": 0.00014285714285714287,
316
- "loss": 5.5206,
317
- "step": 88
318
- },
319
- {
320
- "epoch": 1.1612903225806452,
321
- "grad_norm": 1.0501593351364136,
322
- "learning_rate": 0.00014155844155844155,
323
- "loss": 5.2199,
324
- "step": 90
325
- },
326
- {
327
- "epoch": 1.1870967741935483,
328
- "grad_norm": 1.1907446384429932,
329
- "learning_rate": 0.00014025974025974028,
330
- "loss": 4.8319,
331
- "step": 92
332
- },
333
- {
334
- "epoch": 1.2129032258064516,
335
- "grad_norm": 4.133155345916748,
336
- "learning_rate": 0.00013896103896103896,
337
- "loss": 5.1049,
338
- "step": 94
339
- },
340
- {
341
- "epoch": 1.238709677419355,
342
- "grad_norm": 1.399916172027588,
343
- "learning_rate": 0.00013766233766233766,
344
- "loss": 5.5704,
345
- "step": 96
346
- },
347
- {
348
- "epoch": 1.2645161290322582,
349
- "grad_norm": 1.0567469596862793,
350
- "learning_rate": 0.00013636363636363637,
351
- "loss": 5.3263,
352
- "step": 98
353
- },
354
- {
355
- "epoch": 1.2903225806451613,
356
- "grad_norm": 1.2196253538131714,
357
- "learning_rate": 0.00013506493506493507,
358
- "loss": 5.2236,
359
- "step": 100
360
- },
361
- {
362
- "epoch": 1.3161290322580645,
363
- "grad_norm": 2.465505838394165,
364
- "learning_rate": 0.00013376623376623375,
365
- "loss": 5.0547,
366
- "step": 102
367
- },
368
- {
369
- "epoch": 1.3419354838709676,
370
- "grad_norm": 0.9022129774093628,
371
- "learning_rate": 0.00013246753246753249,
372
- "loss": 5.8387,
373
- "step": 104
374
- },
375
- {
376
- "epoch": 1.367741935483871,
377
- "grad_norm": 1.6524704694747925,
378
- "learning_rate": 0.0001311688311688312,
379
- "loss": 5.4351,
380
- "step": 106
381
- },
382
- {
383
- "epoch": 1.3935483870967742,
384
- "grad_norm": 1.2133516073226929,
385
- "learning_rate": 0.00012987012987012987,
386
- "loss": 5.4708,
387
- "step": 108
388
- },
389
- {
390
- "epoch": 1.4193548387096775,
391
- "grad_norm": 1.019903302192688,
392
- "learning_rate": 0.00012857142857142858,
393
- "loss": 5.8655,
394
- "step": 110
395
- },
396
- {
397
- "epoch": 1.4451612903225808,
398
- "grad_norm": 1.1822901964187622,
399
- "learning_rate": 0.00012727272727272728,
400
- "loss": 5.3089,
401
- "step": 112
402
- },
403
- {
404
- "epoch": 1.4709677419354839,
405
- "grad_norm": 0.8840041160583496,
406
- "learning_rate": 0.000125974025974026,
407
- "loss": 5.0657,
408
- "step": 114
409
- },
410
- {
411
- "epoch": 1.4967741935483871,
412
- "grad_norm": 0.6745492219924927,
413
- "learning_rate": 0.00012467532467532467,
414
- "loss": 4.8004,
415
- "step": 116
416
- },
417
- {
418
- "epoch": 1.5225806451612902,
419
- "grad_norm": 1.4586549997329712,
420
- "learning_rate": 0.0001233766233766234,
421
- "loss": 5.0572,
422
- "step": 118
423
- },
424
- {
425
- "epoch": 1.5483870967741935,
426
- "grad_norm": 1.5153924226760864,
427
- "learning_rate": 0.00012207792207792208,
428
- "loss": 4.4382,
429
- "step": 120
430
- },
431
- {
432
- "epoch": 1.5741935483870968,
433
- "grad_norm": 0.9981886148452759,
434
- "learning_rate": 0.0001207792207792208,
435
- "loss": 4.8087,
436
- "step": 122
437
- },
438
- {
439
- "epoch": 1.6,
440
- "grad_norm": 0.7403278350830078,
441
- "learning_rate": 0.00011948051948051949,
442
- "loss": 5.2025,
443
- "step": 124
444
- },
445
- {
446
- "epoch": 1.6258064516129034,
447
- "grad_norm": 1.1075459718704224,
448
- "learning_rate": 0.0001181818181818182,
449
- "loss": 4.946,
450
- "step": 126
451
- },
452
- {
453
- "epoch": 1.6516129032258065,
454
- "grad_norm": 0.6300385594367981,
455
- "learning_rate": 0.00011688311688311689,
456
- "loss": 5.6817,
457
- "step": 128
458
- },
459
- {
460
- "epoch": 1.6774193548387095,
461
- "grad_norm": 0.8701953291893005,
462
- "learning_rate": 0.00011558441558441559,
463
- "loss": 5.6561,
464
- "step": 130
465
- },
466
- {
467
- "epoch": 1.7032258064516128,
468
- "grad_norm": 1.133817434310913,
469
- "learning_rate": 0.00011428571428571428,
470
- "loss": 5.6528,
471
- "step": 132
472
- },
473
- {
474
- "epoch": 1.729032258064516,
475
- "grad_norm": 1.6048352718353271,
476
- "learning_rate": 0.000112987012987013,
477
- "loss": 4.8602,
478
- "step": 134
479
- },
480
- {
481
- "epoch": 1.7548387096774194,
482
- "grad_norm": 1.2783055305480957,
483
- "learning_rate": 0.00011168831168831168,
484
- "loss": 4.1662,
485
- "step": 136
486
- },
487
- {
488
- "epoch": 1.7806451612903227,
489
- "grad_norm": 1.6137133836746216,
490
- "learning_rate": 0.0001103896103896104,
491
- "loss": 5.2295,
492
- "step": 138
493
- },
494
- {
495
- "epoch": 1.8064516129032258,
496
- "grad_norm": 0.6374461054801941,
497
- "learning_rate": 0.00010909090909090909,
498
- "loss": 5.3992,
499
- "step": 140
500
- },
501
- {
502
- "epoch": 1.832258064516129,
503
- "grad_norm": 0.8323061466217041,
504
- "learning_rate": 0.0001077922077922078,
505
- "loss": 5.1712,
506
- "step": 142
507
- },
508
- {
509
- "epoch": 1.8580645161290321,
510
- "grad_norm": 0.6804484724998474,
511
- "learning_rate": 0.00010649350649350649,
512
- "loss": 5.3369,
513
- "step": 144
514
- },
515
- {
516
- "epoch": 1.8838709677419354,
517
- "grad_norm": 0.9051455855369568,
518
- "learning_rate": 0.0001051948051948052,
519
- "loss": 5.3726,
520
- "step": 146
521
- },
522
- {
523
- "epoch": 1.9096774193548387,
524
- "grad_norm": 0.5890415906906128,
525
- "learning_rate": 0.00010389610389610389,
526
- "loss": 5.4903,
527
- "step": 148
528
- },
529
- {
530
- "epoch": 1.935483870967742,
531
- "grad_norm": 0.5433252453804016,
532
- "learning_rate": 0.00010259740259740261,
533
- "loss": 5.4437,
534
- "step": 150
535
- },
536
- {
537
- "epoch": 1.9612903225806453,
538
- "grad_norm": 1.353121042251587,
539
- "learning_rate": 0.0001012987012987013,
540
- "loss": 4.1137,
541
- "step": 152
542
- },
543
- {
544
- "epoch": 1.9870967741935484,
545
- "grad_norm": 1.0199609994888306,
546
- "learning_rate": 0.0001,
547
- "loss": 5.2558,
548
- "step": 154
549
- },
550
- {
551
- "epoch": 2.0129032258064514,
552
- "grad_norm": 0.6021209359169006,
553
- "learning_rate": 9.870129870129871e-05,
554
- "loss": 5.5338,
555
- "step": 156
556
- },
557
- {
558
- "epoch": 2.0387096774193547,
559
- "grad_norm": 0.9102515578269958,
560
- "learning_rate": 9.74025974025974e-05,
561
- "loss": 4.5988,
562
- "step": 158
563
- },
564
- {
565
- "epoch": 2.064516129032258,
566
- "grad_norm": 0.7101506590843201,
567
- "learning_rate": 9.610389610389611e-05,
568
- "loss": 5.2622,
569
- "step": 160
570
- },
571
- {
572
- "epoch": 2.0903225806451613,
573
- "grad_norm": 0.631308913230896,
574
- "learning_rate": 9.480519480519481e-05,
575
- "loss": 5.4718,
576
- "step": 162
577
- },
578
- {
579
- "epoch": 2.1161290322580646,
580
- "grad_norm": 0.7462102770805359,
581
- "learning_rate": 9.35064935064935e-05,
582
- "loss": 5.0117,
583
- "step": 164
584
- },
585
- {
586
- "epoch": 2.141935483870968,
587
- "grad_norm": 0.6367154717445374,
588
- "learning_rate": 9.220779220779221e-05,
589
- "loss": 4.9968,
590
- "step": 166
591
- },
592
- {
593
- "epoch": 2.167741935483871,
594
- "grad_norm": 0.6959227323532104,
595
- "learning_rate": 9.090909090909092e-05,
596
- "loss": 5.6022,
597
- "step": 168
598
- },
599
- {
600
- "epoch": 2.193548387096774,
601
- "grad_norm": 0.6119377613067627,
602
- "learning_rate": 8.961038961038961e-05,
603
- "loss": 4.9405,
604
- "step": 170
605
- },
606
- {
607
- "epoch": 2.2193548387096773,
608
- "grad_norm": 0.6411863565444946,
609
- "learning_rate": 8.831168831168831e-05,
610
- "loss": 5.1413,
611
- "step": 172
612
- },
613
- {
614
- "epoch": 2.2451612903225806,
615
- "grad_norm": 1.615324854850769,
616
- "learning_rate": 8.701298701298701e-05,
617
- "loss": 4.7308,
618
- "step": 174
619
- },
620
- {
621
- "epoch": 2.270967741935484,
622
- "grad_norm": 0.5708146691322327,
623
- "learning_rate": 8.571428571428571e-05,
624
- "loss": 5.4497,
625
- "step": 176
626
- },
627
- {
628
- "epoch": 2.296774193548387,
629
- "grad_norm": 0.8313891291618347,
630
- "learning_rate": 8.441558441558442e-05,
631
- "loss": 5.3687,
632
- "step": 178
633
- },
634
- {
635
- "epoch": 2.3225806451612905,
636
- "grad_norm": 1.1731419563293457,
637
- "learning_rate": 8.311688311688312e-05,
638
- "loss": 5.3314,
639
- "step": 180
640
- },
641
- {
642
- "epoch": 2.3483870967741938,
643
- "grad_norm": 0.7381497025489807,
644
- "learning_rate": 8.181818181818183e-05,
645
- "loss": 5.1178,
646
- "step": 182
647
- },
648
- {
649
- "epoch": 2.3741935483870966,
650
- "grad_norm": 0.6883618831634521,
651
- "learning_rate": 8.051948051948052e-05,
652
- "loss": 5.9266,
653
- "step": 184
654
- },
655
- {
656
- "epoch": 2.4,
657
- "grad_norm": 0.735080361366272,
658
- "learning_rate": 7.922077922077923e-05,
659
- "loss": 4.7453,
660
- "step": 186
661
- },
662
- {
663
- "epoch": 2.425806451612903,
664
- "grad_norm": 0.6184589862823486,
665
- "learning_rate": 7.792207792207793e-05,
666
- "loss": 5.0924,
667
- "step": 188
668
- },
669
- {
670
- "epoch": 2.4516129032258065,
671
- "grad_norm": 0.6305899024009705,
672
- "learning_rate": 7.662337662337662e-05,
673
- "loss": 5.1167,
674
- "step": 190
675
- },
676
- {
677
- "epoch": 2.47741935483871,
678
- "grad_norm": 0.815334677696228,
679
- "learning_rate": 7.532467532467533e-05,
680
- "loss": 5.4719,
681
- "step": 192
682
- },
683
- {
684
- "epoch": 2.5032258064516126,
685
- "grad_norm": 0.6805923581123352,
686
- "learning_rate": 7.402597402597404e-05,
687
- "loss": 4.4392,
688
- "step": 194
689
- },
690
- {
691
- "epoch": 2.5290322580645164,
692
- "grad_norm": 0.6633741855621338,
693
- "learning_rate": 7.272727272727273e-05,
694
- "loss": 5.1926,
695
- "step": 196
696
- },
697
- {
698
- "epoch": 2.554838709677419,
699
- "grad_norm": 0.6042450666427612,
700
- "learning_rate": 7.142857142857143e-05,
701
- "loss": 4.7079,
702
- "step": 198
703
- },
704
- {
705
- "epoch": 2.5806451612903225,
706
- "grad_norm": 0.5892207622528076,
707
- "learning_rate": 7.012987012987014e-05,
708
- "loss": 4.9938,
709
- "step": 200
710
- },
711
- {
712
- "epoch": 2.606451612903226,
713
- "grad_norm": 0.6753908395767212,
714
- "learning_rate": 6.883116883116883e-05,
715
- "loss": 5.0097,
716
- "step": 202
717
- },
718
- {
719
- "epoch": 2.632258064516129,
720
- "grad_norm": 0.5345973968505859,
721
- "learning_rate": 6.753246753246754e-05,
722
- "loss": 5.1074,
723
- "step": 204
724
- },
725
- {
726
- "epoch": 2.6580645161290324,
727
- "grad_norm": 0.4924313724040985,
728
- "learning_rate": 6.623376623376624e-05,
729
- "loss": 5.0817,
730
- "step": 206
731
- },
732
- {
733
- "epoch": 2.6838709677419352,
734
- "grad_norm": 0.5989976525306702,
735
- "learning_rate": 6.493506493506494e-05,
736
- "loss": 4.2039,
737
- "step": 208
738
- },
739
- {
740
- "epoch": 2.709677419354839,
741
- "grad_norm": 1.0730255842208862,
742
- "learning_rate": 6.363636363636364e-05,
743
- "loss": 4.2224,
744
- "step": 210
745
- },
746
- {
747
- "epoch": 2.735483870967742,
748
- "grad_norm": 0.991038978099823,
749
- "learning_rate": 6.233766233766233e-05,
750
- "loss": 4.8869,
751
- "step": 212
752
- },
753
- {
754
- "epoch": 2.761290322580645,
755
- "grad_norm": 0.5952357053756714,
756
- "learning_rate": 6.103896103896104e-05,
757
- "loss": 5.1753,
758
- "step": 214
759
- },
760
- {
761
- "epoch": 2.7870967741935484,
762
- "grad_norm": 0.6798732876777649,
763
- "learning_rate": 5.9740259740259744e-05,
764
- "loss": 4.3396,
765
- "step": 216
766
- },
767
- {
768
- "epoch": 2.8129032258064517,
769
- "grad_norm": 0.5361295342445374,
770
- "learning_rate": 5.844155844155844e-05,
771
- "loss": 5.2857,
772
- "step": 218
773
- },
774
- {
775
- "epoch": 2.838709677419355,
776
- "grad_norm": 0.6326772570610046,
777
- "learning_rate": 5.714285714285714e-05,
778
- "loss": 4.7385,
779
- "step": 220
780
- },
781
- {
782
- "epoch": 2.864516129032258,
783
- "grad_norm": 0.639589786529541,
784
- "learning_rate": 5.584415584415584e-05,
785
- "loss": 4.3803,
786
- "step": 222
787
- },
788
- {
789
- "epoch": 2.8903225806451616,
790
- "grad_norm": 0.7248474955558777,
791
- "learning_rate": 5.4545454545454546e-05,
792
- "loss": 4.6668,
793
- "step": 224
794
- },
795
- {
796
- "epoch": 2.9161290322580644,
797
- "grad_norm": 0.7551538348197937,
798
- "learning_rate": 5.3246753246753245e-05,
799
- "loss": 5.016,
800
- "step": 226
801
- },
802
- {
803
- "epoch": 2.9419354838709677,
804
- "grad_norm": 0.4990728497505188,
805
- "learning_rate": 5.1948051948051944e-05,
806
- "loss": 5.0843,
807
- "step": 228
808
- },
809
- {
810
- "epoch": 2.967741935483871,
811
- "grad_norm": 0.5729503035545349,
812
- "learning_rate": 5.064935064935065e-05,
813
- "loss": 4.573,
814
- "step": 230
815
- },
816
- {
817
- "epoch": 2.9935483870967743,
818
- "grad_norm": 1.4705737829208374,
819
- "learning_rate": 4.9350649350649355e-05,
820
- "loss": 4.9812,
821
- "step": 232
822
- },
823
- {
824
- "epoch": 3.0193548387096776,
825
- "grad_norm": 0.7053755521774292,
826
- "learning_rate": 4.8051948051948054e-05,
827
- "loss": 4.7651,
828
- "step": 234
829
- },
830
- {
831
- "epoch": 3.0451612903225804,
832
- "grad_norm": 0.6120907664299011,
833
- "learning_rate": 4.675324675324675e-05,
834
- "loss": 5.2057,
835
- "step": 236
836
- },
837
- {
838
- "epoch": 3.0709677419354837,
839
- "grad_norm": 0.6173492074012756,
840
- "learning_rate": 4.545454545454546e-05,
841
- "loss": 5.0342,
842
- "step": 238
843
- },
844
- {
845
- "epoch": 3.096774193548387,
846
- "grad_norm": 0.5435605049133301,
847
- "learning_rate": 4.415584415584416e-05,
848
- "loss": 4.9538,
849
- "step": 240
850
- },
851
- {
852
- "epoch": 3.1225806451612903,
853
- "grad_norm": 1.4004778861999512,
854
- "learning_rate": 4.2857142857142856e-05,
855
- "loss": 4.3856,
856
- "step": 242
857
- },
858
- {
859
- "epoch": 3.1483870967741936,
860
- "grad_norm": 1.1300957202911377,
861
- "learning_rate": 4.155844155844156e-05,
862
- "loss": 5.0085,
863
- "step": 244
864
- },
865
- {
866
- "epoch": 3.174193548387097,
867
- "grad_norm": 0.6826758980751038,
868
- "learning_rate": 4.025974025974026e-05,
869
- "loss": 5.3525,
870
- "step": 246
871
- },
872
- {
873
- "epoch": 3.2,
874
- "grad_norm": 0.6162336468696594,
875
- "learning_rate": 3.8961038961038966e-05,
876
- "loss": 5.1639,
877
- "step": 248
878
- },
879
- {
880
- "epoch": 3.225806451612903,
881
- "grad_norm": 0.7551366686820984,
882
- "learning_rate": 3.7662337662337665e-05,
883
- "loss": 5.1913,
884
- "step": 250
885
- },
886
- {
887
- "epoch": 3.2516129032258063,
888
- "grad_norm": 0.5899360775947571,
889
- "learning_rate": 3.6363636363636364e-05,
890
- "loss": 5.022,
891
- "step": 252
892
- },
893
- {
894
- "epoch": 3.2774193548387096,
895
- "grad_norm": 0.6666110157966614,
896
- "learning_rate": 3.506493506493507e-05,
897
- "loss": 4.697,
898
- "step": 254
899
- },
900
- {
901
- "epoch": 3.303225806451613,
902
- "grad_norm": 0.5686184167861938,
903
- "learning_rate": 3.376623376623377e-05,
904
- "loss": 4.8758,
905
- "step": 256
906
- },
907
- {
908
- "epoch": 3.329032258064516,
909
- "grad_norm": 0.7564727663993835,
910
- "learning_rate": 3.246753246753247e-05,
911
- "loss": 4.9468,
912
- "step": 258
913
- },
914
- {
915
- "epoch": 3.3548387096774195,
916
- "grad_norm": 0.5773691534996033,
917
- "learning_rate": 3.1168831168831166e-05,
918
- "loss": 5.1122,
919
- "step": 260
920
- },
921
- {
922
- "epoch": 3.3806451612903228,
923
- "grad_norm": 0.642393171787262,
924
- "learning_rate": 2.9870129870129872e-05,
925
- "loss": 5.233,
926
- "step": 262
927
- },
928
- {
929
- "epoch": 3.4064516129032256,
930
- "grad_norm": 0.6513245701789856,
931
- "learning_rate": 2.857142857142857e-05,
932
- "loss": 4.2779,
933
- "step": 264
934
- },
935
- {
936
- "epoch": 3.432258064516129,
937
- "grad_norm": 0.8267136812210083,
938
- "learning_rate": 2.7272727272727273e-05,
939
- "loss": 5.2998,
940
- "step": 266
941
- },
942
- {
943
- "epoch": 3.458064516129032,
944
- "grad_norm": 0.5494163632392883,
945
- "learning_rate": 2.5974025974025972e-05,
946
- "loss": 5.4667,
947
- "step": 268
948
- },
949
- {
950
- "epoch": 3.4838709677419355,
951
- "grad_norm": 0.7057967782020569,
952
- "learning_rate": 2.4675324675324678e-05,
953
- "loss": 5.1893,
954
- "step": 270
955
- },
956
- {
957
- "epoch": 3.509677419354839,
958
- "grad_norm": 0.5400364398956299,
959
- "learning_rate": 2.3376623376623376e-05,
960
- "loss": 4.9972,
961
- "step": 272
962
- },
963
- {
964
- "epoch": 3.535483870967742,
965
- "grad_norm": 0.4732670485973358,
966
- "learning_rate": 2.207792207792208e-05,
967
- "loss": 4.896,
968
- "step": 274
969
- },
970
- {
971
- "epoch": 3.5612903225806454,
972
- "grad_norm": 0.5432953834533691,
973
- "learning_rate": 2.077922077922078e-05,
974
- "loss": 5.0452,
975
- "step": 276
976
- },
977
- {
978
- "epoch": 3.587096774193548,
979
- "grad_norm": 0.5461270213127136,
980
- "learning_rate": 1.9480519480519483e-05,
981
- "loss": 4.7124,
982
- "step": 278
983
- },
984
- {
985
- "epoch": 3.6129032258064515,
986
- "grad_norm": 0.6231604218482971,
987
- "learning_rate": 1.8181818181818182e-05,
988
- "loss": 4.3152,
989
- "step": 280
990
- },
991
- {
992
- "epoch": 3.638709677419355,
993
- "grad_norm": 0.42820078134536743,
994
- "learning_rate": 1.6883116883116884e-05,
995
- "loss": 4.9029,
996
- "step": 282
997
- },
998
- {
999
- "epoch": 3.664516129032258,
1000
- "grad_norm": 0.7605751752853394,
1001
- "learning_rate": 1.5584415584415583e-05,
1002
- "loss": 4.8563,
1003
- "step": 284
1004
- },
1005
- {
1006
- "epoch": 3.6903225806451614,
1007
- "grad_norm": 0.8210684061050415,
1008
- "learning_rate": 1.4285714285714285e-05,
1009
- "loss": 3.7461,
1010
- "step": 286
1011
- },
1012
- {
1013
- "epoch": 3.7161290322580647,
1014
- "grad_norm": 0.6016200184822083,
1015
- "learning_rate": 1.2987012987012986e-05,
1016
- "loss": 4.7194,
1017
- "step": 288
1018
- },
1019
- {
1020
- "epoch": 3.741935483870968,
1021
- "grad_norm": 0.7171183228492737,
1022
- "learning_rate": 1.1688311688311688e-05,
1023
- "loss": 4.5957,
1024
- "step": 290
1025
- },
1026
- {
1027
- "epoch": 3.767741935483871,
1028
- "grad_norm": 0.6558433175086975,
1029
- "learning_rate": 1.038961038961039e-05,
1030
- "loss": 4.5994,
1031
- "step": 292
1032
- },
1033
- {
1034
- "epoch": 3.793548387096774,
1035
- "grad_norm": 0.5295835733413696,
1036
- "learning_rate": 9.090909090909091e-06,
1037
- "loss": 5.5671,
1038
- "step": 294
1039
- },
1040
- {
1041
- "epoch": 3.8193548387096774,
1042
- "grad_norm": 0.6055201292037964,
1043
- "learning_rate": 7.792207792207792e-06,
1044
- "loss": 5.0694,
1045
- "step": 296
1046
- },
1047
- {
1048
- "epoch": 3.8451612903225807,
1049
- "grad_norm": 0.6041186451911926,
1050
- "learning_rate": 6.493506493506493e-06,
1051
- "loss": 5.0007,
1052
- "step": 298
1053
- },
1054
- {
1055
- "epoch": 3.870967741935484,
1056
- "grad_norm": 0.7281818985939026,
1057
- "learning_rate": 5.194805194805195e-06,
1058
- "loss": 4.3153,
1059
- "step": 300
1060
- },
1061
- {
1062
- "epoch": 3.896774193548387,
1063
- "grad_norm": 0.4925851821899414,
1064
- "learning_rate": 3.896103896103896e-06,
1065
- "loss": 5.5631,
1066
- "step": 302
1067
- },
1068
- {
1069
- "epoch": 3.9225806451612906,
1070
- "grad_norm": 0.6916934847831726,
1071
- "learning_rate": 2.5974025974025976e-06,
1072
- "loss": 5.3932,
1073
- "step": 304
1074
- },
1075
- {
1076
- "epoch": 3.9483870967741934,
1077
- "grad_norm": 0.6561426520347595,
1078
- "learning_rate": 1.2987012987012988e-06,
1079
- "loss": 5.3373,
1080
- "step": 306
1081
- },
1082
- {
1083
- "epoch": 3.9741935483870967,
1084
- "grad_norm": 0.6102042198181152,
1085
  "learning_rate": 0.0,
1086
- "loss": 5.1064,
1087
- "step": 308
1088
  },
1089
  {
1090
- "epoch": 3.9741935483870967,
1091
- "step": 308,
1092
- "total_flos": 723765039226044.0,
1093
- "train_loss": 5.270187915145577,
1094
- "train_runtime": 746.2778,
1095
- "train_samples_per_second": 3.318,
1096
- "train_steps_per_second": 0.413
1097
  }
1098
  ],
1099
  "logging_steps": 2,
1100
- "max_steps": 308,
1101
  "num_input_tokens_seen": 0,
1102
  "num_train_epochs": 4,
1103
  "save_steps": 500,
@@ -1113,7 +161,7 @@
1113
  "attributes": {}
1114
  }
1115
  },
1116
- "total_flos": 723765039226044.0,
1117
  "train_batch_size": 2,
1118
  "trial_name": null,
1119
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 3.6923076923076925,
5
  "eval_steps": 500,
6
+ "global_step": 36,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.20512820512820512,
13
+ "grad_norm": 3.4090847969055176,
14
+ "learning_rate": 0.00018888888888888888,
15
+ "loss": 8.6815,
16
  "step": 2
17
  },
18
  {
19
+ "epoch": 0.41025641025641024,
20
+ "grad_norm": 3.7809109687805176,
21
+ "learning_rate": 0.00017777777777777779,
22
+ "loss": 8.2204,
23
  "step": 4
24
  },
25
  {
26
+ "epoch": 0.6153846153846154,
27
+ "grad_norm": 5.441247463226318,
28
+ "learning_rate": 0.0001666666666666667,
29
+ "loss": 7.2478,
30
  "step": 6
31
  },
32
  {
33
+ "epoch": 0.8205128205128205,
34
+ "grad_norm": 3.1511142253875732,
35
+ "learning_rate": 0.00015555555555555556,
36
+ "loss": 6.0805,
37
  "step": 8
38
  },
39
  {
40
+ "epoch": 1.0256410256410255,
41
+ "grad_norm": 3.6586670875549316,
42
+ "learning_rate": 0.00014444444444444444,
43
+ "loss": 5.8499,
44
  "step": 10
45
  },
46
  {
47
+ "epoch": 1.2307692307692308,
48
+ "grad_norm": 3.667562961578369,
49
+ "learning_rate": 0.00013333333333333334,
50
+ "loss": 5.6406,
51
  "step": 12
52
  },
53
  {
54
+ "epoch": 1.435897435897436,
55
+ "grad_norm": 4.54071044921875,
56
+ "learning_rate": 0.00012222222222222224,
57
+ "loss": 4.7872,
58
  "step": 14
59
  },
60
  {
61
+ "epoch": 1.641025641025641,
62
+ "grad_norm": 4.788601875305176,
63
+ "learning_rate": 0.00011111111111111112,
64
+ "loss": 6.0102,
65
  "step": 16
66
  },
67
  {
68
+ "epoch": 1.8461538461538463,
69
+ "grad_norm": 2.731595516204834,
70
+ "learning_rate": 0.0001,
71
+ "loss": 5.3443,
72
  "step": 18
73
  },
74
  {
75
+ "epoch": 2.051282051282051,
76
+ "grad_norm": 2.9969563484191895,
77
+ "learning_rate": 8.888888888888889e-05,
78
+ "loss": 5.0464,
79
  "step": 20
80
  },
81
  {
82
+ "epoch": 2.2564102564102564,
83
+ "grad_norm": 2.7965035438537598,
84
+ "learning_rate": 7.777777777777778e-05,
85
+ "loss": 5.0523,
86
  "step": 22
87
  },
88
  {
89
+ "epoch": 2.4615384615384617,
90
+ "grad_norm": 2.5996360778808594,
91
+ "learning_rate": 6.666666666666667e-05,
92
+ "loss": 5.1417,
93
  "step": 24
94
  },
95
  {
96
+ "epoch": 2.6666666666666665,
97
+ "grad_norm": 2.2516720294952393,
98
+ "learning_rate": 5.555555555555556e-05,
99
+ "loss": 4.571,
100
  "step": 26
101
  },
102
  {
103
+ "epoch": 2.871794871794872,
104
+ "grad_norm": 3.5645246505737305,
105
+ "learning_rate": 4.4444444444444447e-05,
106
+ "loss": 4.9004,
107
  "step": 28
108
  },
109
  {
110
+ "epoch": 3.076923076923077,
111
+ "grad_norm": 1.808791995048523,
112
+ "learning_rate": 3.3333333333333335e-05,
113
+ "loss": 4.6747,
114
  "step": 30
115
  },
116
  {
117
+ "epoch": 3.282051282051282,
118
+ "grad_norm": 2.8537707328796387,
119
+ "learning_rate": 2.2222222222222223e-05,
120
+ "loss": 3.9459,
121
  "step": 32
122
  },
123
  {
124
+ "epoch": 3.4871794871794872,
125
+ "grad_norm": 3.786196708679199,
126
+ "learning_rate": 1.1111111111111112e-05,
127
+ "loss": 5.4513,
128
  "step": 34
129
  },
130
  {
131
+ "epoch": 3.6923076923076925,
132
+ "grad_norm": 0.8137506246566772,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
133
  "learning_rate": 0.0,
134
+ "loss": 4.9191,
135
+ "step": 36
136
  },
137
  {
138
+ "epoch": 3.6923076923076925,
139
+ "step": 36,
140
+ "total_flos": 73188581212248.0,
141
+ "train_loss": 5.642510652542114,
142
+ "train_runtime": 91.2089,
143
+ "train_samples_per_second": 3.421,
144
+ "train_steps_per_second": 0.395
145
  }
146
  ],
147
  "logging_steps": 2,
148
+ "max_steps": 36,
149
  "num_input_tokens_seen": 0,
150
  "num_train_epochs": 4,
151
  "save_steps": 500,
 
161
  "attributes": {}
162
  }
163
  },
164
+ "total_flos": 73188581212248.0,
165
  "train_batch_size": 2,
166
  "trial_name": null,
167
  "trial_params": null