Prikshit7766 commited on
Commit
89e47b2
·
verified ·
1 Parent(s): f36d689

Upload log_history.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. log_history.json +1301 -0
log_history.json ADDED
@@ -0,0 +1,1301 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "loss": 0.2659,
4
+ "grad_norm": 3.4286012649536133,
5
+ "learning_rate": 1.9715261958997724e-05,
6
+ "epoch": 0.2847380410022779,
7
+ "step": 500
8
+ },
9
+ {
10
+ "loss": 0.1081,
11
+ "grad_norm": 0.7319045066833496,
12
+ "learning_rate": 1.9430523917995446e-05,
13
+ "epoch": 0.5694760820045558,
14
+ "step": 1000
15
+ },
16
+ {
17
+ "loss": 0.0777,
18
+ "grad_norm": 3.7819042205810547,
19
+ "learning_rate": 1.9145785876993168e-05,
20
+ "epoch": 0.8542141230068337,
21
+ "step": 1500
22
+ },
23
+ {
24
+ "eval_loss": 0.07203580439090729,
25
+ "eval_overall": {
26
+ "precision": 0.8912693298969072,
27
+ "recall": 0.931167956916863,
28
+ "f1": 0.9107818930041152,
29
+ "accuracy": 0.9814269735680226
30
+ },
31
+ "eval_per_label": {
32
+ "LOC": {
33
+ "precision": 0.9239187076602398,
34
+ "recall": 0.965160587915079,
35
+ "f1": 0.9440894568690097,
36
+ "number": 1837
37
+ },
38
+ "MISC": {
39
+ "precision": 0.7727272727272727,
40
+ "recall": 0.8297180043383948,
41
+ "f1": 0.8002092050209204,
42
+ "number": 922
43
+ },
44
+ "ORG": {
45
+ "precision": 0.8624823695345557,
46
+ "recall": 0.9120059656972409,
47
+ "f1": 0.8865530989488946,
48
+ "number": 1341
49
+ },
50
+ "PER": {
51
+ "precision": 0.9420520999468368,
52
+ "recall": 0.9619978284473398,
53
+ "f1": 0.9519204942250873,
54
+ "number": 1842
55
+ }
56
+ },
57
+ "eval_runtime": 7.4533,
58
+ "eval_samples_per_second": 436.049,
59
+ "eval_steps_per_second": 54.607,
60
+ "epoch": 1.0,
61
+ "step": 1756
62
+ },
63
+ {
64
+ "loss": 0.0631,
65
+ "grad_norm": 0.11871737241744995,
66
+ "learning_rate": 1.886104783599089e-05,
67
+ "epoch": 1.1389521640091116,
68
+ "step": 2000
69
+ },
70
+ {
71
+ "loss": 0.0464,
72
+ "grad_norm": 0.2318667322397232,
73
+ "learning_rate": 1.8576309794988612e-05,
74
+ "epoch": 1.4236902050113895,
75
+ "step": 2500
76
+ },
77
+ {
78
+ "loss": 0.0442,
79
+ "grad_norm": 0.35079020261764526,
80
+ "learning_rate": 1.8291571753986334e-05,
81
+ "epoch": 1.7084282460136673,
82
+ "step": 3000
83
+ },
84
+ {
85
+ "loss": 0.0411,
86
+ "grad_norm": 0.06829982250928879,
87
+ "learning_rate": 1.8006833712984056e-05,
88
+ "epoch": 1.9931662870159452,
89
+ "step": 3500
90
+ },
91
+ {
92
+ "eval_loss": 0.07787470519542694,
93
+ "eval_overall": {
94
+ "precision": 0.9304723885562209,
95
+ "recall": 0.9414338606529788,
96
+ "f1": 0.9359210306173666,
97
+ "accuracy": 0.9842968152116324
98
+ },
99
+ "eval_per_label": {
100
+ "LOC": {
101
+ "precision": 0.9511015583019882,
102
+ "recall": 0.9635274904735982,
103
+ "f1": 0.9572742022714981,
104
+ "number": 1837
105
+ },
106
+ "MISC": {
107
+ "precision": 0.8725910064239829,
108
+ "recall": 0.8839479392624728,
109
+ "f1": 0.8782327586206896,
110
+ "number": 922
111
+ },
112
+ "ORG": {
113
+ "precision": 0.9050822122571002,
114
+ "recall": 0.9030574198359433,
115
+ "f1": 0.9040686823441583,
116
+ "number": 1341
117
+ },
118
+ "PER": {
119
+ "precision": 0.9568919638105375,
120
+ "recall": 0.9761129207383279,
121
+ "f1": 0.9664068798710024,
122
+ "number": 1842
123
+ }
124
+ },
125
+ "eval_runtime": 7.5011,
126
+ "eval_samples_per_second": 433.267,
127
+ "eval_steps_per_second": 54.258,
128
+ "epoch": 2.0,
129
+ "step": 3512
130
+ },
131
+ {
132
+ "loss": 0.0258,
133
+ "grad_norm": 4.330985069274902,
134
+ "learning_rate": 1.7722095671981778e-05,
135
+ "epoch": 2.277904328018223,
136
+ "step": 4000
137
+ },
138
+ {
139
+ "loss": 0.0284,
140
+ "grad_norm": 0.06637139618396759,
141
+ "learning_rate": 1.74373576309795e-05,
142
+ "epoch": 2.562642369020501,
143
+ "step": 4500
144
+ },
145
+ {
146
+ "loss": 0.0269,
147
+ "grad_norm": 11.263030052185059,
148
+ "learning_rate": 1.7152619589977222e-05,
149
+ "epoch": 2.847380410022779,
150
+ "step": 5000
151
+ },
152
+ {
153
+ "eval_loss": 0.07202505320310593,
154
+ "eval_overall": {
155
+ "precision": 0.9318106587222774,
156
+ "recall": 0.9474924267923258,
157
+ "f1": 0.9395861148197597,
158
+ "accuracy": 0.9857685288750221
159
+ },
160
+ "eval_per_label": {
161
+ "LOC": {
162
+ "precision": 0.9626218851570965,
163
+ "recall": 0.9673380511703865,
164
+ "f1": 0.9649742058104807,
165
+ "number": 1837
166
+ },
167
+ "MISC": {
168
+ "precision": 0.8943355119825708,
169
+ "recall": 0.8904555314533622,
170
+ "f1": 0.8923913043478261,
171
+ "number": 922
172
+ },
173
+ "ORG": {
174
+ "precision": 0.8772791023842917,
175
+ "recall": 0.9328859060402684,
176
+ "f1": 0.9042284062161186,
177
+ "number": 1341
178
+ },
179
+ "PER": {
180
+ "precision": 0.9616630669546437,
181
+ "recall": 0.9668838219326819,
182
+ "f1": 0.9642663779101246,
183
+ "number": 1842
184
+ }
185
+ },
186
+ "eval_runtime": 6.5043,
187
+ "eval_samples_per_second": 499.669,
188
+ "eval_steps_per_second": 62.574,
189
+ "epoch": 3.0,
190
+ "step": 5268
191
+ },
192
+ {
193
+ "loss": 0.0226,
194
+ "grad_norm": 0.7242124676704407,
195
+ "learning_rate": 1.6867881548974945e-05,
196
+ "epoch": 3.132118451025057,
197
+ "step": 5500
198
+ },
199
+ {
200
+ "loss": 0.0165,
201
+ "grad_norm": 0.006902824155986309,
202
+ "learning_rate": 1.6583143507972667e-05,
203
+ "epoch": 3.416856492027335,
204
+ "step": 6000
205
+ },
206
+ {
207
+ "loss": 0.0176,
208
+ "grad_norm": 0.031127002090215683,
209
+ "learning_rate": 1.629840546697039e-05,
210
+ "epoch": 3.7015945330296125,
211
+ "step": 6500
212
+ },
213
+ {
214
+ "loss": 0.0196,
215
+ "grad_norm": 0.0038718737196177244,
216
+ "learning_rate": 1.601366742596811e-05,
217
+ "epoch": 3.9863325740318905,
218
+ "step": 7000
219
+ },
220
+ {
221
+ "eval_loss": 0.08569859713315964,
222
+ "eval_overall": {
223
+ "precision": 0.930635838150289,
224
+ "recall": 0.9483338943116796,
225
+ "f1": 0.9394015170459282,
226
+ "accuracy": 0.9850621063165951
227
+ },
228
+ "eval_per_label": {
229
+ "LOC": {
230
+ "precision": 0.9533011272141707,
231
+ "recall": 0.9667936853565596,
232
+ "f1": 0.96,
233
+ "number": 1837
234
+ },
235
+ "MISC": {
236
+ "precision": 0.8623188405797102,
237
+ "recall": 0.903470715835141,
238
+ "f1": 0.8824152542372882,
239
+ "number": 922
240
+ },
241
+ "ORG": {
242
+ "precision": 0.8995664739884393,
243
+ "recall": 0.9284116331096197,
244
+ "f1": 0.9137614678899083,
245
+ "number": 1341
246
+ },
247
+ "PER": {
248
+ "precision": 0.9668838219326819,
249
+ "recall": 0.9668838219326819,
250
+ "f1": 0.9668838219326819,
251
+ "number": 1842
252
+ }
253
+ },
254
+ "eval_runtime": 7.5249,
255
+ "eval_samples_per_second": 431.899,
256
+ "eval_steps_per_second": 54.087,
257
+ "epoch": 4.0,
258
+ "step": 7024
259
+ },
260
+ {
261
+ "loss": 0.0138,
262
+ "grad_norm": 0.17987537384033203,
263
+ "learning_rate": 1.5728929384965833e-05,
264
+ "epoch": 4.271070615034168,
265
+ "step": 7500
266
+ },
267
+ {
268
+ "loss": 0.0134,
269
+ "grad_norm": 1.4385559558868408,
270
+ "learning_rate": 1.5444191343963555e-05,
271
+ "epoch": 4.555808656036446,
272
+ "step": 8000
273
+ },
274
+ {
275
+ "loss": 0.0139,
276
+ "grad_norm": 0.02356315404176712,
277
+ "learning_rate": 1.5159453302961277e-05,
278
+ "epoch": 4.840546697038724,
279
+ "step": 8500
280
+ },
281
+ {
282
+ "eval_loss": 0.08417785912752151,
283
+ "eval_overall": {
284
+ "precision": 0.9310686015831134,
285
+ "recall": 0.9501851228542578,
286
+ "f1": 0.9405297351324339,
287
+ "accuracy": 0.9851062577264967
288
+ },
289
+ "eval_per_label": {
290
+ "LOC": {
291
+ "precision": 0.9627228525121556,
292
+ "recall": 0.9700598802395209,
293
+ "f1": 0.9663774403470715,
294
+ "number": 1837
295
+ },
296
+ "MISC": {
297
+ "precision": 0.8386454183266933,
298
+ "recall": 0.913232104121475,
299
+ "f1": 0.8743509865005192,
300
+ "number": 922
301
+ },
302
+ "ORG": {
303
+ "precision": 0.9070110701107011,
304
+ "recall": 0.9164802386278896,
305
+ "f1": 0.9117210682492581,
306
+ "number": 1341
307
+ },
308
+ "PER": {
309
+ "precision": 0.9670981661272924,
310
+ "recall": 0.9733984799131379,
311
+ "f1": 0.9702380952380952,
312
+ "number": 1842
313
+ }
314
+ },
315
+ "eval_runtime": 7.5038,
316
+ "eval_samples_per_second": 433.114,
317
+ "eval_steps_per_second": 54.239,
318
+ "epoch": 5.0,
319
+ "step": 8780
320
+ },
321
+ {
322
+ "loss": 0.0092,
323
+ "grad_norm": 0.7477717399597168,
324
+ "learning_rate": 1.4874715261958999e-05,
325
+ "epoch": 5.125284738041002,
326
+ "step": 9000
327
+ },
328
+ {
329
+ "loss": 0.006,
330
+ "grad_norm": 0.04322722181677818,
331
+ "learning_rate": 1.4589977220956721e-05,
332
+ "epoch": 5.41002277904328,
333
+ "step": 9500
334
+ },
335
+ {
336
+ "loss": 0.0073,
337
+ "grad_norm": 0.051729559898376465,
338
+ "learning_rate": 1.4305239179954442e-05,
339
+ "epoch": 5.694760820045558,
340
+ "step": 10000
341
+ },
342
+ {
343
+ "loss": 0.0079,
344
+ "grad_norm": 0.012189100496470928,
345
+ "learning_rate": 1.4020501138952165e-05,
346
+ "epoch": 5.979498861047836,
347
+ "step": 10500
348
+ },
349
+ {
350
+ "eval_loss": 0.09725591540336609,
351
+ "eval_overall": {
352
+ "precision": 0.9291845493562232,
353
+ "recall": 0.9473241332884551,
354
+ "f1": 0.9381666666666667,
355
+ "accuracy": 0.9844734208512392
356
+ },
357
+ "eval_per_label": {
358
+ "LOC": {
359
+ "precision": 0.9546424759871932,
360
+ "recall": 0.9738704409363091,
361
+ "f1": 0.9641606036108865,
362
+ "number": 1837
363
+ },
364
+ "MISC": {
365
+ "precision": 0.858739837398374,
366
+ "recall": 0.9164859002169198,
367
+ "f1": 0.8866736621196223,
368
+ "number": 922
369
+ },
370
+ "ORG": {
371
+ "precision": 0.8921852387843705,
372
+ "recall": 0.9194630872483222,
373
+ "f1": 0.9056188027910393,
374
+ "number": 1341
375
+ },
376
+ "PER": {
377
+ "precision": 0.9691969196919692,
378
+ "recall": 0.9565689467969598,
379
+ "f1": 0.9628415300546448,
380
+ "number": 1842
381
+ }
382
+ },
383
+ "eval_runtime": 7.4822,
384
+ "eval_samples_per_second": 434.362,
385
+ "eval_steps_per_second": 54.395,
386
+ "epoch": 6.0,
387
+ "step": 10536
388
+ },
389
+ {
390
+ "loss": 0.008,
391
+ "grad_norm": 0.0029869996942579746,
392
+ "learning_rate": 1.3735763097949887e-05,
393
+ "epoch": 6.264236902050114,
394
+ "step": 11000
395
+ },
396
+ {
397
+ "loss": 0.0066,
398
+ "grad_norm": 0.018263721838593483,
399
+ "learning_rate": 1.3451025056947608e-05,
400
+ "epoch": 6.548974943052392,
401
+ "step": 11500
402
+ },
403
+ {
404
+ "loss": 0.0057,
405
+ "grad_norm": 0.020874306559562683,
406
+ "learning_rate": 1.3166287015945332e-05,
407
+ "epoch": 6.83371298405467,
408
+ "step": 12000
409
+ },
410
+ {
411
+ "eval_loss": 0.097112737596035,
412
+ "eval_overall": {
413
+ "precision": 0.9343584656084656,
414
+ "recall": 0.9510265903736116,
415
+ "f1": 0.9426188490408675,
416
+ "accuracy": 0.9859745687878966
417
+ },
418
+ "eval_per_label": {
419
+ "LOC": {
420
+ "precision": 0.9689373297002725,
421
+ "recall": 0.9678824169842134,
422
+ "f1": 0.968409586056645,
423
+ "number": 1837
424
+ },
425
+ "MISC": {
426
+ "precision": 0.8805031446540881,
427
+ "recall": 0.911062906724512,
428
+ "f1": 0.8955223880597015,
429
+ "number": 922
430
+ },
431
+ "ORG": {
432
+ "precision": 0.8851063829787233,
433
+ "recall": 0.930648769574944,
434
+ "f1": 0.9073064340239914,
435
+ "number": 1341
436
+ },
437
+ "PER": {
438
+ "precision": 0.965386695511087,
439
+ "recall": 0.9690553745928339,
440
+ "f1": 0.967217556217827,
441
+ "number": 1842
442
+ }
443
+ },
444
+ "eval_runtime": 7.4789,
445
+ "eval_samples_per_second": 434.555,
446
+ "eval_steps_per_second": 54.42,
447
+ "epoch": 7.0,
448
+ "step": 12292
449
+ },
450
+ {
451
+ "loss": 0.0031,
452
+ "grad_norm": 0.0007487820694223046,
453
+ "learning_rate": 1.2881548974943054e-05,
454
+ "epoch": 7.118451025056948,
455
+ "step": 12500
456
+ },
457
+ {
458
+ "loss": 0.0037,
459
+ "grad_norm": 15.724783897399902,
460
+ "learning_rate": 1.2596810933940776e-05,
461
+ "epoch": 7.403189066059226,
462
+ "step": 13000
463
+ },
464
+ {
465
+ "loss": 0.0061,
466
+ "grad_norm": 0.614613950252533,
467
+ "learning_rate": 1.2312072892938498e-05,
468
+ "epoch": 7.687927107061503,
469
+ "step": 13500
470
+ },
471
+ {
472
+ "loss": 0.0036,
473
+ "grad_norm": 0.0012391641503199935,
474
+ "learning_rate": 1.2027334851936218e-05,
475
+ "epoch": 7.972665148063781,
476
+ "step": 14000
477
+ },
478
+ {
479
+ "eval_loss": 0.10294844955205917,
480
+ "eval_overall": {
481
+ "precision": 0.9285831285831286,
482
+ "recall": 0.9540558734432851,
483
+ "f1": 0.9411471735701834,
484
+ "accuracy": 0.9852387119562018
485
+ },
486
+ "eval_per_label": {
487
+ "LOC": {
488
+ "precision": 0.9664864864864865,
489
+ "recall": 0.9733260751224823,
490
+ "f1": 0.9698942229454841,
491
+ "number": 1837
492
+ },
493
+ "MISC": {
494
+ "precision": 0.8497983870967742,
495
+ "recall": 0.9143167028199566,
496
+ "f1": 0.8808777429467085,
497
+ "number": 922
498
+ },
499
+ "ORG": {
500
+ "precision": 0.9019607843137255,
501
+ "recall": 0.9261744966442953,
502
+ "f1": 0.9139072847682119,
503
+ "number": 1341
504
+ },
505
+ "PER": {
506
+ "precision": 0.9522799575821845,
507
+ "recall": 0.9750271444082519,
508
+ "f1": 0.9635193133047211,
509
+ "number": 1842
510
+ }
511
+ },
512
+ "eval_runtime": 7.6218,
513
+ "eval_samples_per_second": 426.41,
514
+ "eval_steps_per_second": 53.4,
515
+ "epoch": 8.0,
516
+ "step": 14048
517
+ },
518
+ {
519
+ "loss": 0.0042,
520
+ "grad_norm": 0.15165293216705322,
521
+ "learning_rate": 1.1742596810933942e-05,
522
+ "epoch": 8.257403189066059,
523
+ "step": 14500
524
+ },
525
+ {
526
+ "loss": 0.0038,
527
+ "grad_norm": 0.01020512543618679,
528
+ "learning_rate": 1.1457858769931664e-05,
529
+ "epoch": 8.542141230068337,
530
+ "step": 15000
531
+ },
532
+ {
533
+ "loss": 0.0028,
534
+ "grad_norm": 0.00046127362293191254,
535
+ "learning_rate": 1.1173120728929384e-05,
536
+ "epoch": 8.826879271070615,
537
+ "step": 15500
538
+ },
539
+ {
540
+ "eval_loss": 0.11473368108272552,
541
+ "eval_overall": {
542
+ "precision": 0.9368072787427626,
543
+ "recall": 0.9530461124200605,
544
+ "f1": 0.944856928339034,
545
+ "accuracy": 0.9860187201977983
546
+ },
547
+ "eval_per_label": {
548
+ "LOC": {
549
+ "precision": 0.9632034632034632,
550
+ "recall": 0.9689711486118672,
551
+ "f1": 0.966078697421981,
552
+ "number": 1837
553
+ },
554
+ "MISC": {
555
+ "precision": 0.8518145161290323,
556
+ "recall": 0.9164859002169198,
557
+ "f1": 0.8829676071055382,
558
+ "number": 922
559
+ },
560
+ "ORG": {
561
+ "precision": 0.9313207547169812,
562
+ "recall": 0.9202087994034303,
563
+ "f1": 0.9257314328582145,
564
+ "number": 1341
565
+ },
566
+ "PER": {
567
+ "precision": 0.9595744680851064,
568
+ "recall": 0.9793702497285559,
569
+ "f1": 0.969371305749597,
570
+ "number": 1842
571
+ }
572
+ },
573
+ "eval_runtime": 7.5249,
574
+ "eval_samples_per_second": 431.9,
575
+ "eval_steps_per_second": 54.087,
576
+ "epoch": 9.0,
577
+ "step": 15804
578
+ },
579
+ {
580
+ "loss": 0.0049,
581
+ "grad_norm": 0.17741906642913818,
582
+ "learning_rate": 1.0888382687927108e-05,
583
+ "epoch": 9.111617312072893,
584
+ "step": 16000
585
+ },
586
+ {
587
+ "loss": 0.003,
588
+ "grad_norm": 0.0018998866435140371,
589
+ "learning_rate": 1.060364464692483e-05,
590
+ "epoch": 9.39635535307517,
591
+ "step": 16500
592
+ },
593
+ {
594
+ "loss": 0.0042,
595
+ "grad_norm": 0.0021700740326195955,
596
+ "learning_rate": 1.0318906605922552e-05,
597
+ "epoch": 9.681093394077449,
598
+ "step": 17000
599
+ },
600
+ {
601
+ "loss": 0.0026,
602
+ "grad_norm": 0.014039441011846066,
603
+ "learning_rate": 1.0034168564920275e-05,
604
+ "epoch": 9.965831435079727,
605
+ "step": 17500
606
+ },
607
+ {
608
+ "eval_loss": 0.1184961125254631,
609
+ "eval_overall": {
610
+ "precision": 0.9358889623265036,
611
+ "recall": 0.9532144059239314,
612
+ "f1": 0.9444722361180591,
613
+ "accuracy": 0.9852681462294696
614
+ },
615
+ "eval_per_label": {
616
+ "LOC": {
617
+ "precision": 0.9669197396963124,
618
+ "recall": 0.9706042460533478,
619
+ "f1": 0.9687584895408857,
620
+ "number": 1837
621
+ },
622
+ "MISC": {
623
+ "precision": 0.8826638477801269,
624
+ "recall": 0.9056399132321041,
625
+ "f1": 0.8940042826552462,
626
+ "number": 922
627
+ },
628
+ "ORG": {
629
+ "precision": 0.8961318051575932,
630
+ "recall": 0.9328859060402684,
631
+ "f1": 0.9141395688710267,
632
+ "number": 1341
633
+ },
634
+ "PER": {
635
+ "precision": 0.9619506966773848,
636
+ "recall": 0.9744842562432139,
637
+ "f1": 0.9681769147788565,
638
+ "number": 1842
639
+ }
640
+ },
641
+ "eval_runtime": 7.5803,
642
+ "eval_samples_per_second": 428.744,
643
+ "eval_steps_per_second": 53.692,
644
+ "epoch": 10.0,
645
+ "step": 17560
646
+ },
647
+ {
648
+ "loss": 0.0011,
649
+ "grad_norm": 0.00040622701635584235,
650
+ "learning_rate": 9.749430523917997e-06,
651
+ "epoch": 10.250569476082005,
652
+ "step": 18000
653
+ },
654
+ {
655
+ "loss": 0.001,
656
+ "grad_norm": 0.0004215097869746387,
657
+ "learning_rate": 9.464692482915719e-06,
658
+ "epoch": 10.535307517084282,
659
+ "step": 18500
660
+ },
661
+ {
662
+ "loss": 0.002,
663
+ "grad_norm": 0.0020023963879793882,
664
+ "learning_rate": 9.17995444191344e-06,
665
+ "epoch": 10.82004555808656,
666
+ "step": 19000
667
+ },
668
+ {
669
+ "eval_loss": 0.11947210878133774,
670
+ "eval_overall": {
671
+ "precision": 0.9384717168375786,
672
+ "recall": 0.9548973409626389,
673
+ "f1": 0.9466132799466134,
674
+ "accuracy": 0.9857685288750221
675
+ },
676
+ "eval_per_label": {
677
+ "LOC": {
678
+ "precision": 0.9658906334596643,
679
+ "recall": 0.9711486118671747,
680
+ "f1": 0.9685124864277959,
681
+ "number": 1837
682
+ },
683
+ "MISC": {
684
+ "precision": 0.8954108858057631,
685
+ "recall": 0.9099783080260304,
686
+ "f1": 0.9026358257127487,
687
+ "number": 922
688
+ },
689
+ "ORG": {
690
+ "precision": 0.9075812274368231,
691
+ "recall": 0.9373601789709173,
692
+ "f1": 0.922230374174615,
693
+ "number": 1341
694
+ },
695
+ "PER": {
696
+ "precision": 0.9557805007991476,
697
+ "recall": 0.9739413680781759,
698
+ "f1": 0.9647754772788384,
699
+ "number": 1842
700
+ }
701
+ },
702
+ "eval_runtime": 7.5556,
703
+ "eval_samples_per_second": 430.142,
704
+ "eval_steps_per_second": 53.867,
705
+ "epoch": 11.0,
706
+ "step": 19316
707
+ },
708
+ {
709
+ "loss": 0.0028,
710
+ "grad_norm": 0.030722877010703087,
711
+ "learning_rate": 8.895216400911163e-06,
712
+ "epoch": 11.104783599088838,
713
+ "step": 19500
714
+ },
715
+ {
716
+ "loss": 0.0033,
717
+ "grad_norm": 0.004747629631310701,
718
+ "learning_rate": 8.610478359908885e-06,
719
+ "epoch": 11.389521640091116,
720
+ "step": 20000
721
+ },
722
+ {
723
+ "loss": 0.0018,
724
+ "grad_norm": 0.0014466517604887486,
725
+ "learning_rate": 8.325740318906607e-06,
726
+ "epoch": 11.674259681093394,
727
+ "step": 20500
728
+ },
729
+ {
730
+ "loss": 0.0004,
731
+ "grad_norm": 0.005812987219542265,
732
+ "learning_rate": 8.041002277904329e-06,
733
+ "epoch": 11.958997722095672,
734
+ "step": 21000
735
+ },
736
+ {
737
+ "eval_loss": 0.12146918475627899,
738
+ "eval_overall": {
739
+ "precision": 0.9431799302209669,
740
+ "recall": 0.9554022214742511,
741
+ "f1": 0.9492517348047823,
742
+ "accuracy": 0.9866074056631542
743
+ },
744
+ "eval_per_label": {
745
+ "LOC": {
746
+ "precision": 0.971195652173913,
747
+ "recall": 0.9727817093086554,
748
+ "f1": 0.9719880337231438,
749
+ "number": 1837
750
+ },
751
+ "MISC": {
752
+ "precision": 0.9096844396082698,
753
+ "recall": 0.9067245119305857,
754
+ "f1": 0.9082020640956002,
755
+ "number": 922
756
+ },
757
+ "ORG": {
758
+ "precision": 0.9121522693997072,
759
+ "recall": 0.9291573452647278,
760
+ "f1": 0.9205762837089029,
761
+ "number": 1341
762
+ },
763
+ "PER": {
764
+ "precision": 0.9545934530095037,
765
+ "recall": 0.9815418023887079,
766
+ "f1": 0.9678800856531049,
767
+ "number": 1842
768
+ }
769
+ },
770
+ "eval_runtime": 7.7732,
771
+ "eval_samples_per_second": 418.106,
772
+ "eval_steps_per_second": 52.36,
773
+ "epoch": 12.0,
774
+ "step": 21072
775
+ },
776
+ {
777
+ "loss": 0.0006,
778
+ "grad_norm": 0.0022160038352012634,
779
+ "learning_rate": 7.75626423690205e-06,
780
+ "epoch": 12.24373576309795,
781
+ "step": 21500
782
+ },
783
+ {
784
+ "loss": 0.0008,
785
+ "grad_norm": 0.00019432637782301754,
786
+ "learning_rate": 7.471526195899773e-06,
787
+ "epoch": 12.528473804100228,
788
+ "step": 22000
789
+ },
790
+ {
791
+ "loss": 0.0011,
792
+ "grad_norm": 1.6397913694381714,
793
+ "learning_rate": 7.186788154897495e-06,
794
+ "epoch": 12.813211845102506,
795
+ "step": 22500
796
+ },
797
+ {
798
+ "eval_loss": 0.12505799531936646,
799
+ "eval_overall": {
800
+ "precision": 0.9386808087504143,
801
+ "recall": 0.9532144059239314,
802
+ "f1": 0.9458917835671342,
803
+ "accuracy": 0.9860923058809677
804
+ },
805
+ "eval_per_label": {
806
+ "LOC": {
807
+ "precision": 0.9675148890092041,
808
+ "recall": 0.9727817093086554,
809
+ "f1": 0.9701411509229099,
810
+ "number": 1837
811
+ },
812
+ "MISC": {
813
+ "precision": 0.8782791185729276,
814
+ "recall": 0.9078091106290672,
815
+ "f1": 0.8927999999999999,
816
+ "number": 922
817
+ },
818
+ "ORG": {
819
+ "precision": 0.9241741741741741,
820
+ "recall": 0.9179716629381058,
821
+ "f1": 0.9210624766180322,
822
+ "number": 1341
823
+ },
824
+ "PER": {
825
+ "precision": 0.9511041009463722,
826
+ "recall": 0.9820846905537459,
827
+ "f1": 0.9663461538461537,
828
+ "number": 1842
829
+ }
830
+ },
831
+ "eval_runtime": 7.5826,
832
+ "eval_samples_per_second": 428.613,
833
+ "eval_steps_per_second": 53.676,
834
+ "epoch": 13.0,
835
+ "step": 22828
836
+ },
837
+ {
838
+ "loss": 0.0015,
839
+ "grad_norm": 0.00018138765881303698,
840
+ "learning_rate": 6.9020501138952166e-06,
841
+ "epoch": 13.097949886104784,
842
+ "step": 23000
843
+ },
844
+ {
845
+ "loss": 0.0007,
846
+ "grad_norm": 0.00034025911008939147,
847
+ "learning_rate": 6.617312072892939e-06,
848
+ "epoch": 13.382687927107062,
849
+ "step": 23500
850
+ },
851
+ {
852
+ "loss": 0.0006,
853
+ "grad_norm": 0.00041584973223507404,
854
+ "learning_rate": 6.3325740318906616e-06,
855
+ "epoch": 13.66742596810934,
856
+ "step": 24000
857
+ },
858
+ {
859
+ "loss": 0.0008,
860
+ "grad_norm": 0.0006968477973714471,
861
+ "learning_rate": 6.047835990888384e-06,
862
+ "epoch": 13.952164009111618,
863
+ "step": 24500
864
+ },
865
+ {
866
+ "eval_loss": 0.12853111326694489,
867
+ "eval_overall": {
868
+ "precision": 0.9430043203722167,
869
+ "recall": 0.9550656344665096,
870
+ "f1": 0.9489966555183946,
871
+ "accuracy": 0.986342497203744
872
+ },
873
+ "eval_per_label": {
874
+ "LOC": {
875
+ "precision": 0.9736986301369863,
876
+ "recall": 0.9673380511703865,
877
+ "f1": 0.9705079191698526,
878
+ "number": 1837
879
+ },
880
+ "MISC": {
881
+ "precision": 0.8892438764643238,
882
+ "recall": 0.9056399132321041,
883
+ "f1": 0.8973670069854918,
884
+ "number": 922
885
+ },
886
+ "ORG": {
887
+ "precision": 0.9200293470286134,
888
+ "recall": 0.9351230425055929,
889
+ "f1": 0.9275147928994084,
890
+ "number": 1341
891
+ },
892
+ "PER": {
893
+ "precision": 0.9566367001586462,
894
+ "recall": 0.9820846905537459,
895
+ "f1": 0.9691936780069649,
896
+ "number": 1842
897
+ }
898
+ },
899
+ "eval_runtime": 7.5688,
900
+ "eval_samples_per_second": 429.396,
901
+ "eval_steps_per_second": 53.774,
902
+ "epoch": 14.0,
903
+ "step": 24584
904
+ },
905
+ {
906
+ "loss": 0.0006,
907
+ "grad_norm": 0.00020477738871704787,
908
+ "learning_rate": 5.763097949886105e-06,
909
+ "epoch": 14.236902050113896,
910
+ "step": 25000
911
+ },
912
+ {
913
+ "loss": 0.0007,
914
+ "grad_norm": 0.00032146400189958513,
915
+ "learning_rate": 5.478359908883827e-06,
916
+ "epoch": 14.521640091116174,
917
+ "step": 25500
918
+ },
919
+ {
920
+ "loss": 0.0014,
921
+ "grad_norm": 0.000293695367872715,
922
+ "learning_rate": 5.19362186788155e-06,
923
+ "epoch": 14.806378132118452,
924
+ "step": 26000
925
+ },
926
+ {
927
+ "eval_loss": 0.1253676861524582,
928
+ "eval_overall": {
929
+ "precision": 0.938937613767996,
930
+ "recall": 0.9548973409626389,
931
+ "f1": 0.9468502294534834,
932
+ "accuracy": 0.9861364572908695
933
+ },
934
+ "eval_per_label": {
935
+ "LOC": {
936
+ "precision": 0.9607948442534908,
937
+ "recall": 0.9738704409363091,
938
+ "f1": 0.9672884563395511,
939
+ "number": 1837
940
+ },
941
+ "MISC": {
942
+ "precision": 0.8742203742203742,
943
+ "recall": 0.9121475054229935,
944
+ "f1": 0.8927813163481952,
945
+ "number": 922
946
+ },
947
+ "ORG": {
948
+ "precision": 0.9322289156626506,
949
+ "recall": 0.9231916480238628,
950
+ "f1": 0.9276882727613338,
951
+ "number": 1341
952
+ },
953
+ "PER": {
954
+ "precision": 0.9550502379693284,
955
+ "recall": 0.9804560260586319,
956
+ "f1": 0.9675863916421109,
957
+ "number": 1842
958
+ }
959
+ },
960
+ "eval_runtime": 8.6163,
961
+ "eval_samples_per_second": 377.191,
962
+ "eval_steps_per_second": 47.236,
963
+ "epoch": 15.0,
964
+ "step": 26340
965
+ },
966
+ {
967
+ "loss": 0.0008,
968
+ "grad_norm": 0.00019819244334939867,
969
+ "learning_rate": 4.908883826879272e-06,
970
+ "epoch": 15.09111617312073,
971
+ "step": 26500
972
+ },
973
+ {
974
+ "loss": 0.0008,
975
+ "grad_norm": 0.00021079520229250193,
976
+ "learning_rate": 4.624145785876993e-06,
977
+ "epoch": 15.375854214123008,
978
+ "step": 27000
979
+ },
980
+ {
981
+ "loss": 0.0003,
982
+ "grad_norm": 0.00012124140630476177,
983
+ "learning_rate": 4.339407744874715e-06,
984
+ "epoch": 15.660592255125284,
985
+ "step": 27500
986
+ },
987
+ {
988
+ "loss": 0.0008,
989
+ "grad_norm": 0.000182148942258209,
990
+ "learning_rate": 4.054669703872437e-06,
991
+ "epoch": 15.945330296127562,
992
+ "step": 28000
993
+ },
994
+ {
995
+ "eval_loss": 0.12416187673807144,
996
+ "eval_overall": {
997
+ "precision": 0.9447862963578912,
998
+ "recall": 0.9560753954897341,
999
+ "f1": 0.9503973232956922,
1000
+ "accuracy": 0.9866957084829575
1001
+ },
1002
+ "eval_per_label": {
1003
+ "LOC": {
1004
+ "precision": 0.9680043383947939,
1005
+ "recall": 0.9716929776810016,
1006
+ "f1": 0.969845150774246,
1007
+ "number": 1837
1008
+ },
1009
+ "MISC": {
1010
+ "precision": 0.8954108858057631,
1011
+ "recall": 0.9099783080260304,
1012
+ "f1": 0.9026358257127487,
1013
+ "number": 922
1014
+ },
1015
+ "ORG": {
1016
+ "precision": 0.9301634472511144,
1017
+ "recall": 0.9336316181953765,
1018
+ "f1": 0.9318943059173799,
1019
+ "number": 1341
1020
+ },
1021
+ "PER": {
1022
+ "precision": 0.957051961823966,
1023
+ "recall": 0.9799131378935939,
1024
+ "f1": 0.9683476394849786,
1025
+ "number": 1842
1026
+ }
1027
+ },
1028
+ "eval_runtime": 8.6628,
1029
+ "eval_samples_per_second": 375.168,
1030
+ "eval_steps_per_second": 46.983,
1031
+ "epoch": 16.0,
1032
+ "step": 28096
1033
+ },
1034
+ {
1035
+ "loss": 0.001,
1036
+ "grad_norm": 0.019741835072636604,
1037
+ "learning_rate": 3.76993166287016e-06,
1038
+ "epoch": 16.23006833712984,
1039
+ "step": 28500
1040
+ },
1041
+ {
1042
+ "loss": 0.0003,
1043
+ "grad_norm": 0.00012148160021752119,
1044
+ "learning_rate": 3.4851936218678815e-06,
1045
+ "epoch": 16.514806378132118,
1046
+ "step": 29000
1047
+ },
1048
+ {
1049
+ "loss": 0.0005,
1050
+ "grad_norm": 0.00011341737263137475,
1051
+ "learning_rate": 3.200455580865604e-06,
1052
+ "epoch": 16.799544419134396,
1053
+ "step": 29500
1054
+ },
1055
+ {
1056
+ "eval_loss": 0.12274094671010971,
1057
+ "eval_overall": {
1058
+ "precision": 0.9438631456568676,
1059
+ "recall": 0.9564119824974756,
1060
+ "f1": 0.9500961297333445,
1061
+ "accuracy": 0.9868428798492965
1062
+ },
1063
+ "eval_per_label": {
1064
+ "LOC": {
1065
+ "precision": 0.971195652173913,
1066
+ "recall": 0.9727817093086554,
1067
+ "f1": 0.9719880337231438,
1068
+ "number": 1837
1069
+ },
1070
+ "MISC": {
1071
+ "precision": 0.8761609907120743,
1072
+ "recall": 0.920824295010846,
1073
+ "f1": 0.8979375991538868,
1074
+ "number": 922
1075
+ },
1076
+ "ORG": {
1077
+ "precision": 0.9307004470938898,
1078
+ "recall": 0.9313944817300522,
1079
+ "f1": 0.9310473350726798,
1080
+ "number": 1341
1081
+ },
1082
+ "PER": {
1083
+ "precision": 0.9614973262032086,
1084
+ "recall": 0.9761129207383279,
1085
+ "f1": 0.9687500000000001,
1086
+ "number": 1842
1087
+ }
1088
+ },
1089
+ "eval_runtime": 8.9038,
1090
+ "eval_samples_per_second": 365.012,
1091
+ "eval_steps_per_second": 45.711,
1092
+ "epoch": 17.0,
1093
+ "step": 29852
1094
+ },
1095
+ {
1096
+ "loss": 0.0007,
1097
+ "grad_norm": 0.00015434053784701973,
1098
+ "learning_rate": 2.9157175398633257e-06,
1099
+ "epoch": 17.084282460136674,
1100
+ "step": 30000
1101
+ },
1102
+ {
1103
+ "loss": 0.0,
1104
+ "grad_norm": 0.00021848917822353542,
1105
+ "learning_rate": 2.6309794988610482e-06,
1106
+ "epoch": 17.36902050113895,
1107
+ "step": 30500
1108
+ },
1109
+ {
1110
+ "loss": 0.0002,
1111
+ "grad_norm": 0.0001007779865176417,
1112
+ "learning_rate": 2.34624145785877e-06,
1113
+ "epoch": 17.65375854214123,
1114
+ "step": 31000
1115
+ },
1116
+ {
1117
+ "loss": 0.0002,
1118
+ "grad_norm": 0.00010259783448418602,
1119
+ "learning_rate": 2.061503416856492e-06,
1120
+ "epoch": 17.938496583143507,
1121
+ "step": 31500
1122
+ },
1123
+ {
1124
+ "eval_loss": 0.12293359637260437,
1125
+ "eval_overall": {
1126
+ "precision": 0.9440199335548173,
1127
+ "recall": 0.9564119824974756,
1128
+ "f1": 0.9501755559271025,
1129
+ "accuracy": 0.9870636368988049
1130
+ },
1131
+ "eval_per_label": {
1132
+ "LOC": {
1133
+ "precision": 0.9701249321021184,
1134
+ "recall": 0.9722373434948285,
1135
+ "f1": 0.9711799891245242,
1136
+ "number": 1837
1137
+ },
1138
+ "MISC": {
1139
+ "precision": 0.8810020876826722,
1140
+ "recall": 0.9154013015184381,
1141
+ "f1": 0.8978723404255319,
1142
+ "number": 922
1143
+ },
1144
+ "ORG": {
1145
+ "precision": 0.926829268292683,
1146
+ "recall": 0.9351230425055929,
1147
+ "f1": 0.9309576837416481,
1148
+ "number": 1341
1149
+ },
1150
+ "PER": {
1151
+ "precision": 0.9630620985010707,
1152
+ "recall": 0.9766558089033659,
1153
+ "f1": 0.969811320754717,
1154
+ "number": 1842
1155
+ }
1156
+ },
1157
+ "eval_runtime": 8.7313,
1158
+ "eval_samples_per_second": 372.226,
1159
+ "eval_steps_per_second": 46.614,
1160
+ "epoch": 18.0,
1161
+ "step": 31608
1162
+ },
1163
+ {
1164
+ "loss": 0.0001,
1165
+ "grad_norm": 0.0010326108895242214,
1166
+ "learning_rate": 1.7767653758542143e-06,
1167
+ "epoch": 18.223234624145785,
1168
+ "step": 32000
1169
+ },
1170
+ {
1171
+ "loss": 0.0007,
1172
+ "grad_norm": 0.00016551795124541968,
1173
+ "learning_rate": 1.4920273348519363e-06,
1174
+ "epoch": 18.507972665148063,
1175
+ "step": 32500
1176
+ },
1177
+ {
1178
+ "loss": 0.0004,
1179
+ "grad_norm": 0.00024008983746170998,
1180
+ "learning_rate": 1.2072892938496584e-06,
1181
+ "epoch": 18.79271070615034,
1182
+ "step": 33000
1183
+ },
1184
+ {
1185
+ "eval_loss": 0.12236841022968292,
1186
+ "eval_overall": {
1187
+ "precision": 0.944813829787234,
1188
+ "recall": 0.9565802760013463,
1189
+ "f1": 0.9506606455929084,
1190
+ "accuracy": 0.9872255254017779
1191
+ },
1192
+ "eval_per_label": {
1193
+ "LOC": {
1194
+ "precision": 0.971677559912854,
1195
+ "recall": 0.9711486118671747,
1196
+ "f1": 0.9714130138851075,
1197
+ "number": 1837
1198
+ },
1199
+ "MISC": {
1200
+ "precision": 0.8712667353244078,
1201
+ "recall": 0.9175704989154013,
1202
+ "f1": 0.8938193343898574,
1203
+ "number": 922
1204
+ },
1205
+ "ORG": {
1206
+ "precision": 0.9383458646616541,
1207
+ "recall": 0.930648769574944,
1208
+ "f1": 0.9344814676151254,
1209
+ "number": 1341
1210
+ },
1211
+ "PER": {
1212
+ "precision": 0.961149547631719,
1213
+ "recall": 0.9804560260586319,
1214
+ "f1": 0.9707067992475142,
1215
+ "number": 1842
1216
+ }
1217
+ },
1218
+ "eval_runtime": 6.5081,
1219
+ "eval_samples_per_second": 499.381,
1220
+ "eval_steps_per_second": 62.538,
1221
+ "epoch": 19.0,
1222
+ "step": 33364
1223
+ },
1224
+ {
1225
+ "loss": 0.0001,
1226
+ "grad_norm": 0.00016071839490905404,
1227
+ "learning_rate": 9.225512528473805e-07,
1228
+ "epoch": 19.07744874715262,
1229
+ "step": 33500
1230
+ },
1231
+ {
1232
+ "loss": 0.0001,
1233
+ "grad_norm": 9.665234392741695e-05,
1234
+ "learning_rate": 6.378132118451026e-07,
1235
+ "epoch": 19.362186788154897,
1236
+ "step": 34000
1237
+ },
1238
+ {
1239
+ "loss": 0.0001,
1240
+ "grad_norm": 0.004949676804244518,
1241
+ "learning_rate": 3.530751708428246e-07,
1242
+ "epoch": 19.646924829157175,
1243
+ "step": 34500
1244
+ },
1245
+ {
1246
+ "loss": 0.0005,
1247
+ "grad_norm": 0.00014002641546539962,
1248
+ "learning_rate": 6.83371298405467e-08,
1249
+ "epoch": 19.931662870159453,
1250
+ "step": 35000
1251
+ },
1252
+ {
1253
+ "eval_loss": 0.12258101999759674,
1254
+ "eval_overall": {
1255
+ "precision": 0.9443891102257637,
1256
+ "recall": 0.9574217435207001,
1257
+ "f1": 0.9508607721878657,
1258
+ "accuracy": 0.9872549596750456
1259
+ },
1260
+ "eval_per_label": {
1261
+ "LOC": {
1262
+ "precision": 0.9727371864776445,
1263
+ "recall": 0.9711486118671747,
1264
+ "f1": 0.9719422500681013,
1265
+ "number": 1837
1266
+ },
1267
+ "MISC": {
1268
+ "precision": 0.8746113989637305,
1269
+ "recall": 0.9154013015184381,
1270
+ "f1": 0.8945416004239534,
1271
+ "number": 922
1272
+ },
1273
+ "ORG": {
1274
+ "precision": 0.9336810730253353,
1275
+ "recall": 0.9343773303504848,
1276
+ "f1": 0.9340290719344019,
1277
+ "number": 1341
1278
+ },
1279
+ "PER": {
1280
+ "precision": 0.960169941582581,
1281
+ "recall": 0.9815418023887079,
1282
+ "f1": 0.970738255033557,
1283
+ "number": 1842
1284
+ }
1285
+ },
1286
+ "eval_runtime": 6.7405,
1287
+ "eval_samples_per_second": 482.16,
1288
+ "eval_steps_per_second": 60.381,
1289
+ "epoch": 20.0,
1290
+ "step": 35120
1291
+ },
1292
+ {
1293
+ "train_runtime": 3604.2086,
1294
+ "train_samples_per_second": 77.914,
1295
+ "train_steps_per_second": 9.744,
1296
+ "total_flos": 6145780901050062.0,
1297
+ "train_loss": 0.013757273165407275,
1298
+ "epoch": 20.0,
1299
+ "step": 35120
1300
+ }
1301
+ ]