student-shriman commited on
Commit
d7d0bc9
1 Parent(s): 74b1ed5

Upload 12 files

Browse files
config.json ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "vblagoje/bart_lfqa",
3
+ "_num_labels": 3,
4
+ "activation_dropout": 0.0,
5
+ "activation_function": "gelu",
6
+ "add_bias_logits": false,
7
+ "add_final_layer_norm": false,
8
+ "architectures": [
9
+ "BartForConditionalGeneration"
10
+ ],
11
+ "attention_dropout": 0.0,
12
+ "bos_token_id": 0,
13
+ "classif_dropout": 0.0,
14
+ "classifier_dropout": 0.0,
15
+ "d_model": 1024,
16
+ "decoder_attention_heads": 16,
17
+ "decoder_ffn_dim": 4096,
18
+ "decoder_layerdrop": 0.0,
19
+ "decoder_layers": 12,
20
+ "decoder_start_token_id": 2,
21
+ "dropout": 0.1,
22
+ "encoder_attention_heads": 16,
23
+ "encoder_ffn_dim": 4096,
24
+ "encoder_layerdrop": 0.0,
25
+ "encoder_layers": 12,
26
+ "eos_token_id": 2,
27
+ "forced_eos_token_id": 2,
28
+ "id2label": {
29
+ "0": "LABEL_0",
30
+ "1": "LABEL_1",
31
+ "2": "LABEL_2"
32
+ },
33
+ "init_std": 0.02,
34
+ "is_encoder_decoder": true,
35
+ "label2id": {
36
+ "LABEL_0": 0,
37
+ "LABEL_1": 1,
38
+ "LABEL_2": 2
39
+ },
40
+ "max_position_embeddings": 1024,
41
+ "model_type": "bart",
42
+ "normalize_before": false,
43
+ "normalize_embedding": true,
44
+ "num_hidden_layers": 12,
45
+ "output_past": false,
46
+ "pad_token_id": 1,
47
+ "prefix": " ",
48
+ "scale_embedding": false,
49
+ "static_position_embeddings": false,
50
+ "task_specific_params": {
51
+ "summarization": {
52
+ "early_stopping": true,
53
+ "length_penalty": 2.0,
54
+ "max_length": 142,
55
+ "min_length": 56,
56
+ "no_repeat_ngram_size": 3,
57
+ "num_beams": 4
58
+ }
59
+ },
60
+ "torch_dtype": "float32",
61
+ "transformers_version": "4.27.4",
62
+ "use_cache": true,
63
+ "vocab_size": 50265
64
+ }
generation_config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 0,
4
+ "decoder_start_token_id": 2,
5
+ "eos_token_id": 2,
6
+ "forced_eos_token_id": 2,
7
+ "pad_token_id": 1,
8
+ "transformers_version": "4.27.4"
9
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f0818147dd5e1b9dce966e1abda59e3937b564df27c0a1ab725baba977937f9a
3
+ size 14575
scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d8815bdc8bb546076eab0186cf9c9ae5530c29815a375af2005e06db9842685f
3
+ size 557
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4922222fe0a14a9e76febac18a0258955ae6f9d73ddb2fbcfdc139a7103328c
3
+ size 627
special_tokens_map.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "cls_token": "<s>",
4
+ "eos_token": "</s>",
5
+ "mask_token": {
6
+ "content": "<mask>",
7
+ "lstrip": true,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
+ "pad_token": "<pad>",
13
+ "sep_token": "</s>",
14
+ "unk_token": "<unk>"
15
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "bos_token": "<s>",
4
+ "cls_token": "<s>",
5
+ "eos_token": "</s>",
6
+ "errors": "replace",
7
+ "mask_token": "<mask>",
8
+ "model_max_length": 1024,
9
+ "pad_token": "<pad>",
10
+ "sep_token": "</s>",
11
+ "special_tokens_map_file": null,
12
+ "tokenizer_class": "BartTokenizer",
13
+ "trim_offsets": true,
14
+ "unk_token": "<unk>"
15
+ }
trainer_state.json ADDED
@@ -0,0 +1,524 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 15.0,
5
+ "global_step": 33000,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.23,
12
+ "learning_rate": 1.969939393939394e-05,
13
+ "loss": 2.4249,
14
+ "step": 500
15
+ },
16
+ {
17
+ "epoch": 0.45,
18
+ "learning_rate": 1.9396363636363637e-05,
19
+ "loss": 2.3742,
20
+ "step": 1000
21
+ },
22
+ {
23
+ "epoch": 0.68,
24
+ "learning_rate": 1.9093939393939395e-05,
25
+ "loss": 2.3454,
26
+ "step": 1500
27
+ },
28
+ {
29
+ "epoch": 0.91,
30
+ "learning_rate": 1.8790909090909093e-05,
31
+ "loss": 2.3292,
32
+ "step": 2000
33
+ },
34
+ {
35
+ "epoch": 1.0,
36
+ "eval_loss": 2.4535229206085205,
37
+ "eval_runtime": 1.8377,
38
+ "eval_samples_per_second": 136.041,
39
+ "eval_steps_per_second": 22.855,
40
+ "step": 2200
41
+ },
42
+ {
43
+ "epoch": 1.14,
44
+ "learning_rate": 1.848787878787879e-05,
45
+ "loss": 2.1317,
46
+ "step": 2500
47
+ },
48
+ {
49
+ "epoch": 1.36,
50
+ "learning_rate": 1.8184848484848487e-05,
51
+ "loss": 2.0183,
52
+ "step": 3000
53
+ },
54
+ {
55
+ "epoch": 1.59,
56
+ "learning_rate": 1.788181818181818e-05,
57
+ "loss": 2.0052,
58
+ "step": 3500
59
+ },
60
+ {
61
+ "epoch": 1.82,
62
+ "learning_rate": 1.757878787878788e-05,
63
+ "loss": 1.9885,
64
+ "step": 4000
65
+ },
66
+ {
67
+ "epoch": 2.0,
68
+ "eval_loss": 2.4263100624084473,
69
+ "eval_runtime": 1.8704,
70
+ "eval_samples_per_second": 133.662,
71
+ "eval_steps_per_second": 22.455,
72
+ "step": 4400
73
+ },
74
+ {
75
+ "epoch": 2.05,
76
+ "learning_rate": 1.727575757575758e-05,
77
+ "loss": 1.9505,
78
+ "step": 4500
79
+ },
80
+ {
81
+ "epoch": 2.27,
82
+ "learning_rate": 1.6972727272727273e-05,
83
+ "loss": 1.7025,
84
+ "step": 5000
85
+ },
86
+ {
87
+ "epoch": 2.5,
88
+ "learning_rate": 1.6669696969696972e-05,
89
+ "loss": 1.717,
90
+ "step": 5500
91
+ },
92
+ {
93
+ "epoch": 2.73,
94
+ "learning_rate": 1.636727272727273e-05,
95
+ "loss": 1.7028,
96
+ "step": 6000
97
+ },
98
+ {
99
+ "epoch": 2.95,
100
+ "learning_rate": 1.6064242424242428e-05,
101
+ "loss": 1.741,
102
+ "step": 6500
103
+ },
104
+ {
105
+ "epoch": 3.0,
106
+ "eval_loss": 2.508474111557007,
107
+ "eval_runtime": 1.8606,
108
+ "eval_samples_per_second": 134.366,
109
+ "eval_steps_per_second": 22.573,
110
+ "step": 6600
111
+ },
112
+ {
113
+ "epoch": 3.18,
114
+ "learning_rate": 1.5761212121212123e-05,
115
+ "loss": 1.4946,
116
+ "step": 7000
117
+ },
118
+ {
119
+ "epoch": 3.41,
120
+ "learning_rate": 1.5458181818181818e-05,
121
+ "loss": 1.4504,
122
+ "step": 7500
123
+ },
124
+ {
125
+ "epoch": 3.64,
126
+ "learning_rate": 1.5155151515151516e-05,
127
+ "loss": 1.4769,
128
+ "step": 8000
129
+ },
130
+ {
131
+ "epoch": 3.86,
132
+ "learning_rate": 1.4852121212121213e-05,
133
+ "loss": 1.4818,
134
+ "step": 8500
135
+ },
136
+ {
137
+ "epoch": 4.0,
138
+ "eval_loss": 2.5952579975128174,
139
+ "eval_runtime": 1.911,
140
+ "eval_samples_per_second": 130.825,
141
+ "eval_steps_per_second": 21.979,
142
+ "step": 8800
143
+ },
144
+ {
145
+ "epoch": 4.09,
146
+ "learning_rate": 1.4549090909090911e-05,
147
+ "loss": 1.3898,
148
+ "step": 9000
149
+ },
150
+ {
151
+ "epoch": 4.32,
152
+ "learning_rate": 1.4246666666666669e-05,
153
+ "loss": 1.218,
154
+ "step": 9500
155
+ },
156
+ {
157
+ "epoch": 4.55,
158
+ "learning_rate": 1.3943636363636365e-05,
159
+ "loss": 1.2481,
160
+ "step": 10000
161
+ },
162
+ {
163
+ "epoch": 4.77,
164
+ "learning_rate": 1.364060606060606e-05,
165
+ "loss": 1.2518,
166
+ "step": 10500
167
+ },
168
+ {
169
+ "epoch": 5.0,
170
+ "learning_rate": 1.3337575757575759e-05,
171
+ "loss": 1.2692,
172
+ "step": 11000
173
+ },
174
+ {
175
+ "epoch": 5.0,
176
+ "eval_loss": 2.763364791870117,
177
+ "eval_runtime": 1.8787,
178
+ "eval_samples_per_second": 133.072,
179
+ "eval_steps_per_second": 22.356,
180
+ "step": 11000
181
+ },
182
+ {
183
+ "epoch": 5.23,
184
+ "learning_rate": 1.3035151515151516e-05,
185
+ "loss": 1.0289,
186
+ "step": 11500
187
+ },
188
+ {
189
+ "epoch": 5.45,
190
+ "learning_rate": 1.2732727272727275e-05,
191
+ "loss": 1.0458,
192
+ "step": 12000
193
+ },
194
+ {
195
+ "epoch": 5.68,
196
+ "learning_rate": 1.2429696969696972e-05,
197
+ "loss": 1.0494,
198
+ "step": 12500
199
+ },
200
+ {
201
+ "epoch": 5.91,
202
+ "learning_rate": 1.2126666666666667e-05,
203
+ "loss": 1.057,
204
+ "step": 13000
205
+ },
206
+ {
207
+ "epoch": 6.0,
208
+ "eval_loss": 2.8617300987243652,
209
+ "eval_runtime": 1.8519,
210
+ "eval_samples_per_second": 135.0,
211
+ "eval_steps_per_second": 22.68,
212
+ "step": 13200
213
+ },
214
+ {
215
+ "epoch": 6.14,
216
+ "learning_rate": 1.1823636363636364e-05,
217
+ "loss": 0.9342,
218
+ "step": 13500
219
+ },
220
+ {
221
+ "epoch": 6.36,
222
+ "learning_rate": 1.152060606060606e-05,
223
+ "loss": 0.8533,
224
+ "step": 14000
225
+ },
226
+ {
227
+ "epoch": 6.59,
228
+ "learning_rate": 1.1217575757575759e-05,
229
+ "loss": 0.8949,
230
+ "step": 14500
231
+ },
232
+ {
233
+ "epoch": 6.82,
234
+ "learning_rate": 1.0914545454545456e-05,
235
+ "loss": 0.8928,
236
+ "step": 15000
237
+ },
238
+ {
239
+ "epoch": 7.0,
240
+ "eval_loss": 3.067075729370117,
241
+ "eval_runtime": 1.8518,
242
+ "eval_samples_per_second": 135.007,
243
+ "eval_steps_per_second": 22.681,
244
+ "step": 15400
245
+ },
246
+ {
247
+ "epoch": 7.05,
248
+ "learning_rate": 1.0611515151515152e-05,
249
+ "loss": 0.8587,
250
+ "step": 15500
251
+ },
252
+ {
253
+ "epoch": 7.27,
254
+ "learning_rate": 1.030848484848485e-05,
255
+ "loss": 0.7187,
256
+ "step": 16000
257
+ },
258
+ {
259
+ "epoch": 7.5,
260
+ "learning_rate": 1.0006060606060606e-05,
261
+ "loss": 0.7212,
262
+ "step": 16500
263
+ },
264
+ {
265
+ "epoch": 7.73,
266
+ "learning_rate": 9.703030303030305e-06,
267
+ "loss": 0.7395,
268
+ "step": 17000
269
+ },
270
+ {
271
+ "epoch": 7.95,
272
+ "learning_rate": 9.4e-06,
273
+ "loss": 0.758,
274
+ "step": 17500
275
+ },
276
+ {
277
+ "epoch": 8.0,
278
+ "eval_loss": 3.21189546585083,
279
+ "eval_runtime": 1.8755,
280
+ "eval_samples_per_second": 133.301,
281
+ "eval_steps_per_second": 22.395,
282
+ "step": 17600
283
+ },
284
+ {
285
+ "epoch": 8.18,
286
+ "learning_rate": 9.097575757575759e-06,
287
+ "loss": 0.6303,
288
+ "step": 18000
289
+ },
290
+ {
291
+ "epoch": 8.41,
292
+ "learning_rate": 8.794545454545456e-06,
293
+ "loss": 0.5999,
294
+ "step": 18500
295
+ },
296
+ {
297
+ "epoch": 8.64,
298
+ "learning_rate": 8.491515151515152e-06,
299
+ "loss": 0.6254,
300
+ "step": 19000
301
+ },
302
+ {
303
+ "epoch": 8.86,
304
+ "learning_rate": 8.188484848484849e-06,
305
+ "loss": 0.6222,
306
+ "step": 19500
307
+ },
308
+ {
309
+ "epoch": 9.0,
310
+ "eval_loss": 3.3879506587982178,
311
+ "eval_runtime": 1.8402,
312
+ "eval_samples_per_second": 135.853,
313
+ "eval_steps_per_second": 22.823,
314
+ "step": 19800
315
+ },
316
+ {
317
+ "epoch": 9.09,
318
+ "learning_rate": 7.885454545454546e-06,
319
+ "loss": 0.5877,
320
+ "step": 20000
321
+ },
322
+ {
323
+ "epoch": 9.32,
324
+ "learning_rate": 7.582424242424243e-06,
325
+ "loss": 0.5085,
326
+ "step": 20500
327
+ },
328
+ {
329
+ "epoch": 9.55,
330
+ "learning_rate": 7.279393939393939e-06,
331
+ "loss": 0.5189,
332
+ "step": 21000
333
+ },
334
+ {
335
+ "epoch": 9.77,
336
+ "learning_rate": 6.976363636363637e-06,
337
+ "loss": 0.5198,
338
+ "step": 21500
339
+ },
340
+ {
341
+ "epoch": 10.0,
342
+ "learning_rate": 6.673939393939395e-06,
343
+ "loss": 0.5228,
344
+ "step": 22000
345
+ },
346
+ {
347
+ "epoch": 10.0,
348
+ "eval_loss": 3.485287666320801,
349
+ "eval_runtime": 1.8718,
350
+ "eval_samples_per_second": 133.559,
351
+ "eval_steps_per_second": 22.438,
352
+ "step": 22000
353
+ },
354
+ {
355
+ "epoch": 10.23,
356
+ "learning_rate": 6.371515151515152e-06,
357
+ "loss": 0.4323,
358
+ "step": 22500
359
+ },
360
+ {
361
+ "epoch": 10.45,
362
+ "learning_rate": 6.068484848484849e-06,
363
+ "loss": 0.4348,
364
+ "step": 23000
365
+ },
366
+ {
367
+ "epoch": 10.68,
368
+ "learning_rate": 5.7654545454545465e-06,
369
+ "loss": 0.4376,
370
+ "step": 23500
371
+ },
372
+ {
373
+ "epoch": 10.91,
374
+ "learning_rate": 5.4624242424242424e-06,
375
+ "loss": 0.4441,
376
+ "step": 24000
377
+ },
378
+ {
379
+ "epoch": 11.0,
380
+ "eval_loss": 3.6241962909698486,
381
+ "eval_runtime": 1.8617,
382
+ "eval_samples_per_second": 134.283,
383
+ "eval_steps_per_second": 22.56,
384
+ "step": 24200
385
+ },
386
+ {
387
+ "epoch": 11.14,
388
+ "learning_rate": 5.15939393939394e-06,
389
+ "loss": 0.3995,
390
+ "step": 24500
391
+ },
392
+ {
393
+ "epoch": 11.36,
394
+ "learning_rate": 4.856363636363637e-06,
395
+ "loss": 0.3728,
396
+ "step": 25000
397
+ },
398
+ {
399
+ "epoch": 11.59,
400
+ "learning_rate": 4.5533333333333335e-06,
401
+ "loss": 0.3743,
402
+ "step": 25500
403
+ },
404
+ {
405
+ "epoch": 11.82,
406
+ "learning_rate": 4.250303030303031e-06,
407
+ "loss": 0.3787,
408
+ "step": 26000
409
+ },
410
+ {
411
+ "epoch": 12.0,
412
+ "eval_loss": 3.684976100921631,
413
+ "eval_runtime": 1.8613,
414
+ "eval_samples_per_second": 134.316,
415
+ "eval_steps_per_second": 22.565,
416
+ "step": 26400
417
+ },
418
+ {
419
+ "epoch": 12.05,
420
+ "learning_rate": 3.947272727272727e-06,
421
+ "loss": 0.374,
422
+ "step": 26500
423
+ },
424
+ {
425
+ "epoch": 12.27,
426
+ "learning_rate": 3.645454545454546e-06,
427
+ "loss": 0.3186,
428
+ "step": 27000
429
+ },
430
+ {
431
+ "epoch": 12.5,
432
+ "learning_rate": 3.3424242424242424e-06,
433
+ "loss": 0.3265,
434
+ "step": 27500
435
+ },
436
+ {
437
+ "epoch": 12.73,
438
+ "learning_rate": 3.03939393939394e-06,
439
+ "loss": 0.3263,
440
+ "step": 28000
441
+ },
442
+ {
443
+ "epoch": 12.95,
444
+ "learning_rate": 2.7363636363636363e-06,
445
+ "loss": 0.3312,
446
+ "step": 28500
447
+ },
448
+ {
449
+ "epoch": 13.0,
450
+ "eval_loss": 3.783233880996704,
451
+ "eval_runtime": 1.8573,
452
+ "eval_samples_per_second": 134.603,
453
+ "eval_steps_per_second": 22.613,
454
+ "step": 28600
455
+ },
456
+ {
457
+ "epoch": 13.18,
458
+ "learning_rate": 2.4333333333333335e-06,
459
+ "loss": 0.2967,
460
+ "step": 29000
461
+ },
462
+ {
463
+ "epoch": 13.41,
464
+ "learning_rate": 2.130909090909091e-06,
465
+ "loss": 0.2904,
466
+ "step": 29500
467
+ },
468
+ {
469
+ "epoch": 13.64,
470
+ "learning_rate": 1.827878787878788e-06,
471
+ "loss": 0.293,
472
+ "step": 30000
473
+ },
474
+ {
475
+ "epoch": 13.86,
476
+ "learning_rate": 1.5248484848484849e-06,
477
+ "loss": 0.2893,
478
+ "step": 30500
479
+ },
480
+ {
481
+ "epoch": 14.0,
482
+ "eval_loss": 3.7963521480560303,
483
+ "eval_runtime": 1.8603,
484
+ "eval_samples_per_second": 134.387,
485
+ "eval_steps_per_second": 22.577,
486
+ "step": 30800
487
+ },
488
+ {
489
+ "epoch": 14.09,
490
+ "learning_rate": 1.221818181818182e-06,
491
+ "loss": 0.2799,
492
+ "step": 31000
493
+ },
494
+ {
495
+ "epoch": 14.32,
496
+ "learning_rate": 9.187878787878789e-07,
497
+ "loss": 0.2723,
498
+ "step": 31500
499
+ },
500
+ {
501
+ "epoch": 14.55,
502
+ "learning_rate": 6.163636363636364e-07,
503
+ "loss": 0.2673,
504
+ "step": 32000
505
+ },
506
+ {
507
+ "epoch": 14.77,
508
+ "learning_rate": 3.1333333333333333e-07,
509
+ "loss": 0.2642,
510
+ "step": 32500
511
+ },
512
+ {
513
+ "epoch": 15.0,
514
+ "learning_rate": 1.0303030303030303e-08,
515
+ "loss": 0.2671,
516
+ "step": 33000
517
+ }
518
+ ],
519
+ "max_steps": 33000,
520
+ "num_train_epochs": 15,
521
+ "total_flos": 1.1231089438777344e+17,
522
+ "trial_name": null,
523
+ "trial_params": null
524
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff1c0d46f00b235a51974ca1a2bd3c9cb7ad71b5858d7b9a061c62837f97e21d
3
+ size 3707
vocab.json ADDED
The diff for this file is too large to render. See raw diff