ThePyProgrammer commited on
Commit
d036598
·
1 Parent(s): f6bed27

Upload trainer_state.json

Browse files
Files changed (1) hide show
  1. trainer_state.json +674 -0
trainer_state.json ADDED
@@ -0,0 +1,674 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.6546083688735962,
3
+ "best_model_checkpoint": "models_gitignored/distilbert-base-uncased-finetuned-sentence-classification/checkpoint-12626",
4
+ "epoch": 4.0,
5
+ "global_step": 50504,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.04,
12
+ "learning_rate": 1.984159670521147e-05,
13
+ "loss": 1.177,
14
+ "step": 500
15
+ },
16
+ {
17
+ "epoch": 0.08,
18
+ "learning_rate": 1.968319341042294e-05,
19
+ "loss": 0.9186,
20
+ "step": 1000
21
+ },
22
+ {
23
+ "epoch": 0.12,
24
+ "learning_rate": 1.9524790115634406e-05,
25
+ "loss": 0.8261,
26
+ "step": 1500
27
+ },
28
+ {
29
+ "epoch": 0.16,
30
+ "learning_rate": 1.9366386820845877e-05,
31
+ "loss": 0.8177,
32
+ "step": 2000
33
+ },
34
+ {
35
+ "epoch": 0.2,
36
+ "learning_rate": 1.9207983526057344e-05,
37
+ "loss": 0.7788,
38
+ "step": 2500
39
+ },
40
+ {
41
+ "epoch": 0.24,
42
+ "learning_rate": 1.904958023126881e-05,
43
+ "loss": 0.7519,
44
+ "step": 3000
45
+ },
46
+ {
47
+ "epoch": 0.28,
48
+ "learning_rate": 1.889117693648028e-05,
49
+ "loss": 0.7243,
50
+ "step": 3500
51
+ },
52
+ {
53
+ "epoch": 0.32,
54
+ "learning_rate": 1.8732773641691748e-05,
55
+ "loss": 0.7431,
56
+ "step": 4000
57
+ },
58
+ {
59
+ "epoch": 0.36,
60
+ "learning_rate": 1.8574370346903215e-05,
61
+ "loss": 0.7297,
62
+ "step": 4500
63
+ },
64
+ {
65
+ "epoch": 0.4,
66
+ "learning_rate": 1.8415967052114686e-05,
67
+ "loss": 0.7061,
68
+ "step": 5000
69
+ },
70
+ {
71
+ "epoch": 0.44,
72
+ "learning_rate": 1.8257563757326153e-05,
73
+ "loss": 0.698,
74
+ "step": 5500
75
+ },
76
+ {
77
+ "epoch": 0.48,
78
+ "learning_rate": 1.8099160462537623e-05,
79
+ "loss": 0.7074,
80
+ "step": 6000
81
+ },
82
+ {
83
+ "epoch": 0.51,
84
+ "learning_rate": 1.794075716774909e-05,
85
+ "loss": 0.6975,
86
+ "step": 6500
87
+ },
88
+ {
89
+ "epoch": 0.55,
90
+ "learning_rate": 1.7782353872960557e-05,
91
+ "loss": 0.6943,
92
+ "step": 7000
93
+ },
94
+ {
95
+ "epoch": 0.59,
96
+ "learning_rate": 1.7623950578172028e-05,
97
+ "loss": 0.7045,
98
+ "step": 7500
99
+ },
100
+ {
101
+ "epoch": 0.63,
102
+ "learning_rate": 1.7465547283383495e-05,
103
+ "loss": 0.6764,
104
+ "step": 8000
105
+ },
106
+ {
107
+ "epoch": 0.67,
108
+ "learning_rate": 1.730714398859496e-05,
109
+ "loss": 0.6722,
110
+ "step": 8500
111
+ },
112
+ {
113
+ "epoch": 0.71,
114
+ "learning_rate": 1.7148740693806432e-05,
115
+ "loss": 0.6884,
116
+ "step": 9000
117
+ },
118
+ {
119
+ "epoch": 0.75,
120
+ "learning_rate": 1.69903373990179e-05,
121
+ "loss": 0.6903,
122
+ "step": 9500
123
+ },
124
+ {
125
+ "epoch": 0.79,
126
+ "learning_rate": 1.683193410422937e-05,
127
+ "loss": 0.6854,
128
+ "step": 10000
129
+ },
130
+ {
131
+ "epoch": 0.83,
132
+ "learning_rate": 1.667353080944084e-05,
133
+ "loss": 0.6861,
134
+ "step": 10500
135
+ },
136
+ {
137
+ "epoch": 0.87,
138
+ "learning_rate": 1.6515127514652307e-05,
139
+ "loss": 0.6894,
140
+ "step": 11000
141
+ },
142
+ {
143
+ "epoch": 0.91,
144
+ "learning_rate": 1.6356724219863774e-05,
145
+ "loss": 0.6754,
146
+ "step": 11500
147
+ },
148
+ {
149
+ "epoch": 0.95,
150
+ "learning_rate": 1.6198320925075244e-05,
151
+ "loss": 0.6918,
152
+ "step": 12000
153
+ },
154
+ {
155
+ "epoch": 0.99,
156
+ "learning_rate": 1.603991763028671e-05,
157
+ "loss": 0.6746,
158
+ "step": 12500
159
+ },
160
+ {
161
+ "epoch": 1.0,
162
+ "eval_accuracy": 0.778547431856752,
163
+ "eval_f1": 0.7757136063142371,
164
+ "eval_kappa": 0.7014269317598121,
165
+ "eval_loss": 0.6546083688735962,
166
+ "eval_precision": 0.7762465189891471,
167
+ "eval_recall": 0.778547431856752,
168
+ "eval_runtime": 150.616,
169
+ "eval_samples_per_second": 191.699,
170
+ "eval_steps_per_second": 23.968,
171
+ "step": 12626
172
+ },
173
+ {
174
+ "epoch": 1.03,
175
+ "learning_rate": 1.5881514335498182e-05,
176
+ "loss": 0.5694,
177
+ "step": 13000
178
+ },
179
+ {
180
+ "epoch": 1.07,
181
+ "learning_rate": 1.572311104070965e-05,
182
+ "loss": 0.5509,
183
+ "step": 13500
184
+ },
185
+ {
186
+ "epoch": 1.11,
187
+ "learning_rate": 1.5564707745921116e-05,
188
+ "loss": 0.5466,
189
+ "step": 14000
190
+ },
191
+ {
192
+ "epoch": 1.15,
193
+ "learning_rate": 1.5406304451132586e-05,
194
+ "loss": 0.5544,
195
+ "step": 14500
196
+ },
197
+ {
198
+ "epoch": 1.19,
199
+ "learning_rate": 1.5247901156344053e-05,
200
+ "loss": 0.566,
201
+ "step": 15000
202
+ },
203
+ {
204
+ "epoch": 1.23,
205
+ "learning_rate": 1.5089497861555522e-05,
206
+ "loss": 0.5773,
207
+ "step": 15500
208
+ },
209
+ {
210
+ "epoch": 1.27,
211
+ "learning_rate": 1.4931094566766991e-05,
212
+ "loss": 0.6056,
213
+ "step": 16000
214
+ },
215
+ {
216
+ "epoch": 1.31,
217
+ "learning_rate": 1.4772691271978458e-05,
218
+ "loss": 0.5532,
219
+ "step": 16500
220
+ },
221
+ {
222
+ "epoch": 1.35,
223
+ "learning_rate": 1.4614287977189927e-05,
224
+ "loss": 0.5764,
225
+ "step": 17000
226
+ },
227
+ {
228
+ "epoch": 1.39,
229
+ "learning_rate": 1.4455884682401395e-05,
230
+ "loss": 0.5499,
231
+ "step": 17500
232
+ },
233
+ {
234
+ "epoch": 1.43,
235
+ "learning_rate": 1.4297481387612864e-05,
236
+ "loss": 0.5695,
237
+ "step": 18000
238
+ },
239
+ {
240
+ "epoch": 1.47,
241
+ "learning_rate": 1.4139078092824333e-05,
242
+ "loss": 0.5823,
243
+ "step": 18500
244
+ },
245
+ {
246
+ "epoch": 1.5,
247
+ "learning_rate": 1.39806747980358e-05,
248
+ "loss": 0.5329,
249
+ "step": 19000
250
+ },
251
+ {
252
+ "epoch": 1.54,
253
+ "learning_rate": 1.3822271503247269e-05,
254
+ "loss": 0.5661,
255
+ "step": 19500
256
+ },
257
+ {
258
+ "epoch": 1.58,
259
+ "learning_rate": 1.3663868208458737e-05,
260
+ "loss": 0.5851,
261
+ "step": 20000
262
+ },
263
+ {
264
+ "epoch": 1.62,
265
+ "learning_rate": 1.3505464913670206e-05,
266
+ "loss": 0.5859,
267
+ "step": 20500
268
+ },
269
+ {
270
+ "epoch": 1.66,
271
+ "learning_rate": 1.3347061618881673e-05,
272
+ "loss": 0.5569,
273
+ "step": 21000
274
+ },
275
+ {
276
+ "epoch": 1.7,
277
+ "learning_rate": 1.3188658324093142e-05,
278
+ "loss": 0.5473,
279
+ "step": 21500
280
+ },
281
+ {
282
+ "epoch": 1.74,
283
+ "learning_rate": 1.303025502930461e-05,
284
+ "loss": 0.5635,
285
+ "step": 22000
286
+ },
287
+ {
288
+ "epoch": 1.78,
289
+ "learning_rate": 1.287185173451608e-05,
290
+ "loss": 0.5848,
291
+ "step": 22500
292
+ },
293
+ {
294
+ "epoch": 1.82,
295
+ "learning_rate": 1.2713448439727546e-05,
296
+ "loss": 0.5885,
297
+ "step": 23000
298
+ },
299
+ {
300
+ "epoch": 1.86,
301
+ "learning_rate": 1.2555045144939015e-05,
302
+ "loss": 0.5502,
303
+ "step": 23500
304
+ },
305
+ {
306
+ "epoch": 1.9,
307
+ "learning_rate": 1.2396641850150484e-05,
308
+ "loss": 0.5999,
309
+ "step": 24000
310
+ },
311
+ {
312
+ "epoch": 1.94,
313
+ "learning_rate": 1.2238238555361953e-05,
314
+ "loss": 0.5639,
315
+ "step": 24500
316
+ },
317
+ {
318
+ "epoch": 1.98,
319
+ "learning_rate": 1.207983526057342e-05,
320
+ "loss": 0.5664,
321
+ "step": 25000
322
+ },
323
+ {
324
+ "epoch": 2.0,
325
+ "eval_accuracy": 0.7842274789595817,
326
+ "eval_f1": 0.7824161012799614,
327
+ "eval_kappa": 0.7102691347246439,
328
+ "eval_loss": 0.7084277868270874,
329
+ "eval_precision": 0.7870631351341026,
330
+ "eval_recall": 0.7842274789595817,
331
+ "eval_runtime": 152.0087,
332
+ "eval_samples_per_second": 189.943,
333
+ "eval_steps_per_second": 23.749,
334
+ "step": 25252
335
+ },
336
+ {
337
+ "epoch": 2.02,
338
+ "learning_rate": 1.1921431965784888e-05,
339
+ "loss": 0.4751,
340
+ "step": 25500
341
+ },
342
+ {
343
+ "epoch": 2.06,
344
+ "learning_rate": 1.1763028670996357e-05,
345
+ "loss": 0.4208,
346
+ "step": 26000
347
+ },
348
+ {
349
+ "epoch": 2.1,
350
+ "learning_rate": 1.1604625376207826e-05,
351
+ "loss": 0.4563,
352
+ "step": 26500
353
+ },
354
+ {
355
+ "epoch": 2.14,
356
+ "learning_rate": 1.1446222081419293e-05,
357
+ "loss": 0.4513,
358
+ "step": 27000
359
+ },
360
+ {
361
+ "epoch": 2.18,
362
+ "learning_rate": 1.1287818786630762e-05,
363
+ "loss": 0.4385,
364
+ "step": 27500
365
+ },
366
+ {
367
+ "epoch": 2.22,
368
+ "learning_rate": 1.112941549184223e-05,
369
+ "loss": 0.4493,
370
+ "step": 28000
371
+ },
372
+ {
373
+ "epoch": 2.26,
374
+ "learning_rate": 1.0971012197053699e-05,
375
+ "loss": 0.4533,
376
+ "step": 28500
377
+ },
378
+ {
379
+ "epoch": 2.3,
380
+ "learning_rate": 1.0812608902265168e-05,
381
+ "loss": 0.4365,
382
+ "step": 29000
383
+ },
384
+ {
385
+ "epoch": 2.34,
386
+ "learning_rate": 1.0654205607476635e-05,
387
+ "loss": 0.4205,
388
+ "step": 29500
389
+ },
390
+ {
391
+ "epoch": 2.38,
392
+ "learning_rate": 1.0495802312688104e-05,
393
+ "loss": 0.4538,
394
+ "step": 30000
395
+ },
396
+ {
397
+ "epoch": 2.42,
398
+ "learning_rate": 1.0337399017899574e-05,
399
+ "loss": 0.4653,
400
+ "step": 30500
401
+ },
402
+ {
403
+ "epoch": 2.46,
404
+ "learning_rate": 1.0178995723111043e-05,
405
+ "loss": 0.436,
406
+ "step": 31000
407
+ },
408
+ {
409
+ "epoch": 2.49,
410
+ "learning_rate": 1.0020592428322511e-05,
411
+ "loss": 0.4249,
412
+ "step": 31500
413
+ },
414
+ {
415
+ "epoch": 2.53,
416
+ "learning_rate": 9.862189133533979e-06,
417
+ "loss": 0.4441,
418
+ "step": 32000
419
+ },
420
+ {
421
+ "epoch": 2.57,
422
+ "learning_rate": 9.703785838745447e-06,
423
+ "loss": 0.4544,
424
+ "step": 32500
425
+ },
426
+ {
427
+ "epoch": 2.61,
428
+ "learning_rate": 9.545382543956914e-06,
429
+ "loss": 0.446,
430
+ "step": 33000
431
+ },
432
+ {
433
+ "epoch": 2.65,
434
+ "learning_rate": 9.386979249168383e-06,
435
+ "loss": 0.4457,
436
+ "step": 33500
437
+ },
438
+ {
439
+ "epoch": 2.69,
440
+ "learning_rate": 9.228575954379852e-06,
441
+ "loss": 0.4472,
442
+ "step": 34000
443
+ },
444
+ {
445
+ "epoch": 2.73,
446
+ "learning_rate": 9.07017265959132e-06,
447
+ "loss": 0.4728,
448
+ "step": 34500
449
+ },
450
+ {
451
+ "epoch": 2.77,
452
+ "learning_rate": 8.911769364802788e-06,
453
+ "loss": 0.4455,
454
+ "step": 35000
455
+ },
456
+ {
457
+ "epoch": 2.81,
458
+ "learning_rate": 8.753366070014258e-06,
459
+ "loss": 0.4622,
460
+ "step": 35500
461
+ },
462
+ {
463
+ "epoch": 2.85,
464
+ "learning_rate": 8.594962775225727e-06,
465
+ "loss": 0.4566,
466
+ "step": 36000
467
+ },
468
+ {
469
+ "epoch": 2.89,
470
+ "learning_rate": 8.436559480437194e-06,
471
+ "loss": 0.4185,
472
+ "step": 36500
473
+ },
474
+ {
475
+ "epoch": 2.93,
476
+ "learning_rate": 8.278156185648662e-06,
477
+ "loss": 0.4351,
478
+ "step": 37000
479
+ },
480
+ {
481
+ "epoch": 2.97,
482
+ "learning_rate": 8.119752890860131e-06,
483
+ "loss": 0.4566,
484
+ "step": 37500
485
+ },
486
+ {
487
+ "epoch": 3.0,
488
+ "eval_accuracy": 0.7792747549613827,
489
+ "eval_f1": 0.7780530572128844,
490
+ "eval_kappa": 0.7039084509242337,
491
+ "eval_loss": 0.7550320029258728,
492
+ "eval_precision": 0.7813618395098462,
493
+ "eval_recall": 0.7792747549613827,
494
+ "eval_runtime": 151.5948,
495
+ "eval_samples_per_second": 190.462,
496
+ "eval_steps_per_second": 23.813,
497
+ "step": 37878
498
+ },
499
+ {
500
+ "epoch": 3.01,
501
+ "learning_rate": 7.9613495960716e-06,
502
+ "loss": 0.4023,
503
+ "step": 38000
504
+ },
505
+ {
506
+ "epoch": 3.05,
507
+ "learning_rate": 7.802946301283067e-06,
508
+ "loss": 0.3119,
509
+ "step": 38500
510
+ },
511
+ {
512
+ "epoch": 3.09,
513
+ "learning_rate": 7.644543006494536e-06,
514
+ "loss": 0.3205,
515
+ "step": 39000
516
+ },
517
+ {
518
+ "epoch": 3.13,
519
+ "learning_rate": 7.4861397117060044e-06,
520
+ "loss": 0.35,
521
+ "step": 39500
522
+ },
523
+ {
524
+ "epoch": 3.17,
525
+ "learning_rate": 7.327736416917472e-06,
526
+ "loss": 0.3333,
527
+ "step": 40000
528
+ },
529
+ {
530
+ "epoch": 3.21,
531
+ "learning_rate": 7.169333122128941e-06,
532
+ "loss": 0.3563,
533
+ "step": 40500
534
+ },
535
+ {
536
+ "epoch": 3.25,
537
+ "learning_rate": 7.010929827340409e-06,
538
+ "loss": 0.3388,
539
+ "step": 41000
540
+ },
541
+ {
542
+ "epoch": 3.29,
543
+ "learning_rate": 6.852526532551878e-06,
544
+ "loss": 0.3422,
545
+ "step": 41500
546
+ },
547
+ {
548
+ "epoch": 3.33,
549
+ "learning_rate": 6.6941232377633456e-06,
550
+ "loss": 0.3336,
551
+ "step": 42000
552
+ },
553
+ {
554
+ "epoch": 3.37,
555
+ "learning_rate": 6.535719942974814e-06,
556
+ "loss": 0.3433,
557
+ "step": 42500
558
+ },
559
+ {
560
+ "epoch": 3.41,
561
+ "learning_rate": 6.377316648186282e-06,
562
+ "loss": 0.3574,
563
+ "step": 43000
564
+ },
565
+ {
566
+ "epoch": 3.45,
567
+ "learning_rate": 6.218913353397751e-06,
568
+ "loss": 0.3105,
569
+ "step": 43500
570
+ },
571
+ {
572
+ "epoch": 3.48,
573
+ "learning_rate": 6.060510058609219e-06,
574
+ "loss": 0.3545,
575
+ "step": 44000
576
+ },
577
+ {
578
+ "epoch": 3.52,
579
+ "learning_rate": 5.9021067638206875e-06,
580
+ "loss": 0.3469,
581
+ "step": 44500
582
+ },
583
+ {
584
+ "epoch": 3.56,
585
+ "learning_rate": 5.7437034690321554e-06,
586
+ "loss": 0.35,
587
+ "step": 45000
588
+ },
589
+ {
590
+ "epoch": 3.6,
591
+ "learning_rate": 5.585300174243625e-06,
592
+ "loss": 0.3425,
593
+ "step": 45500
594
+ },
595
+ {
596
+ "epoch": 3.64,
597
+ "learning_rate": 5.426896879455094e-06,
598
+ "loss": 0.3203,
599
+ "step": 46000
600
+ },
601
+ {
602
+ "epoch": 3.68,
603
+ "learning_rate": 5.268493584666562e-06,
604
+ "loss": 0.3549,
605
+ "step": 46500
606
+ },
607
+ {
608
+ "epoch": 3.72,
609
+ "learning_rate": 5.11009028987803e-06,
610
+ "loss": 0.3326,
611
+ "step": 47000
612
+ },
613
+ {
614
+ "epoch": 3.76,
615
+ "learning_rate": 4.951686995089498e-06,
616
+ "loss": 0.3328,
617
+ "step": 47500
618
+ },
619
+ {
620
+ "epoch": 3.8,
621
+ "learning_rate": 4.793283700300967e-06,
622
+ "loss": 0.3376,
623
+ "step": 48000
624
+ },
625
+ {
626
+ "epoch": 3.84,
627
+ "learning_rate": 4.634880405512435e-06,
628
+ "loss": 0.3285,
629
+ "step": 48500
630
+ },
631
+ {
632
+ "epoch": 3.88,
633
+ "learning_rate": 4.476477110723904e-06,
634
+ "loss": 0.361,
635
+ "step": 49000
636
+ },
637
+ {
638
+ "epoch": 3.92,
639
+ "learning_rate": 4.3180738159353715e-06,
640
+ "loss": 0.3487,
641
+ "step": 49500
642
+ },
643
+ {
644
+ "epoch": 3.96,
645
+ "learning_rate": 4.15967052114684e-06,
646
+ "loss": 0.3511,
647
+ "step": 50000
648
+ },
649
+ {
650
+ "epoch": 4.0,
651
+ "learning_rate": 4.001267226358309e-06,
652
+ "loss": 0.3528,
653
+ "step": 50500
654
+ },
655
+ {
656
+ "epoch": 4.0,
657
+ "eval_accuracy": 0.7778201087521214,
658
+ "eval_f1": 0.7760540283760836,
659
+ "eval_kappa": 0.7007853270915267,
660
+ "eval_loss": 0.9983726739883423,
661
+ "eval_precision": 0.7770574865770817,
662
+ "eval_recall": 0.7778201087521214,
663
+ "eval_runtime": 151.818,
664
+ "eval_samples_per_second": 190.182,
665
+ "eval_steps_per_second": 23.778,
666
+ "step": 50504
667
+ }
668
+ ],
669
+ "max_steps": 63130,
670
+ "num_train_epochs": 5,
671
+ "total_flos": 5.330919857118713e+16,
672
+ "trial_name": null,
673
+ "trial_params": null
674
+ }