kevinzyz commited on
Commit
0a3362e
1 Parent(s): d43fd00

Training in progress, epoch 1

Browse files
all_results.json CHANGED
@@ -1,15 +1,15 @@
1
  {
2
- "epoch": 5.0,
3
- "eval_accuracy": 0.2919999957084656,
4
- "eval_loss": 1.569107174873352,
5
- "eval_runtime": 0.7522,
6
  "eval_samples": 500,
7
- "eval_samples_per_second": 664.689,
8
- "eval_steps_per_second": 42.54,
9
- "total_flos": 10492735676640.0,
10
- "train_loss": 1.5705763679504394,
11
- "train_runtime": 78.9966,
12
  "train_samples": 4000,
13
- "train_samples_per_second": 253.176,
14
- "train_steps_per_second": 15.823
15
  }
 
1
  {
2
+ "epoch": 3.0,
3
+ "eval_accuracy": 0.2160000056028366,
4
+ "eval_loss": 1.5779266357421875,
5
+ "eval_runtime": 0.8459,
6
  "eval_samples": 500,
7
+ "eval_samples_per_second": 591.1,
8
+ "eval_steps_per_second": 37.83,
9
+ "total_flos": 16105346090400.0,
10
+ "train_loss": 1.5832408892313639,
11
+ "train_runtime": 76.8946,
12
  "train_samples": 4000,
13
+ "train_samples_per_second": 156.058,
14
+ "train_steps_per_second": 9.754
15
  }
eval_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 5.0,
3
- "eval_accuracy": 0.2919999957084656,
4
- "eval_loss": 1.569107174873352,
5
- "eval_runtime": 0.7522,
6
  "eval_samples": 500,
7
- "eval_samples_per_second": 664.689,
8
- "eval_steps_per_second": 42.54
9
  }
 
1
  {
2
+ "epoch": 3.0,
3
+ "eval_accuracy": 0.2160000056028366,
4
+ "eval_loss": 1.5779266357421875,
5
+ "eval_runtime": 0.8459,
6
  "eval_samples": 500,
7
+ "eval_samples_per_second": 591.1,
8
+ "eval_steps_per_second": 37.83
9
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a9a88d0d3a073a3f519bc18050c71d3fd14e044502a026012f7c1dd7c2cc0c6e
3
  size 12755881
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7dfedd6c25cc6b194b200602880102dd46490c4ffb2738f2ae4339ced585ecd1
3
  size 12755881
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 5.0,
3
- "total_flos": 10492735676640.0,
4
- "train_loss": 1.5705763679504394,
5
- "train_runtime": 78.9966,
6
  "train_samples": 4000,
7
- "train_samples_per_second": 253.176,
8
- "train_steps_per_second": 15.823
9
  }
 
1
  {
2
+ "epoch": 3.0,
3
+ "total_flos": 16105346090400.0,
4
+ "train_loss": 1.5832408892313639,
5
+ "train_runtime": 76.8946,
6
  "train_samples": 4000,
7
+ "train_samples_per_second": 156.058,
8
+ "train_steps_per_second": 9.754
9
  }
trainer_state.json CHANGED
@@ -1,442 +1,274 @@
1
  {
2
- "best_metric": 1.569107174873352,
3
- "best_model_checkpoint": "chinese_roberta_L-2_H-128-finetuned-MC-hyper/checkpoint-1250",
4
- "epoch": 5.0,
5
- "global_step": 1250,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
  "epoch": 0.08,
12
- "learning_rate": 4.92e-05,
13
- "loss": 1.6109,
14
  "step": 20
15
  },
16
  {
17
  "epoch": 0.16,
18
- "learning_rate": 4.8400000000000004e-05,
19
- "loss": 1.6093,
20
  "step": 40
21
  },
22
  {
23
  "epoch": 0.24,
24
- "learning_rate": 4.76e-05,
25
- "loss": 1.6059,
26
  "step": 60
27
  },
28
  {
29
  "epoch": 0.32,
30
- "learning_rate": 4.6800000000000006e-05,
31
- "loss": 1.6083,
32
  "step": 80
33
  },
34
  {
35
  "epoch": 0.4,
36
- "learning_rate": 4.600000000000001e-05,
37
- "loss": 1.6063,
38
  "step": 100
39
  },
40
  {
41
  "epoch": 0.48,
42
- "learning_rate": 4.52e-05,
43
- "loss": 1.606,
44
  "step": 120
45
  },
46
  {
47
  "epoch": 0.56,
48
- "learning_rate": 4.44e-05,
49
- "loss": 1.6085,
50
  "step": 140
51
  },
52
  {
53
  "epoch": 0.64,
54
- "learning_rate": 4.36e-05,
55
- "loss": 1.609,
56
  "step": 160
57
  },
58
  {
59
  "epoch": 0.72,
60
- "learning_rate": 4.2800000000000004e-05,
61
- "loss": 1.6032,
62
  "step": 180
63
  },
64
  {
65
  "epoch": 0.8,
66
- "learning_rate": 4.2e-05,
67
- "loss": 1.609,
68
  "step": 200
69
  },
70
  {
71
  "epoch": 0.88,
72
- "learning_rate": 4.12e-05,
73
- "loss": 1.6022,
74
  "step": 220
75
  },
76
  {
77
  "epoch": 0.96,
78
- "learning_rate": 4.0400000000000006e-05,
79
- "loss": 1.5993,
80
  "step": 240
81
  },
82
  {
83
  "epoch": 1.0,
84
- "eval_accuracy": 0.27799999713897705,
85
- "eval_loss": 1.596663236618042,
86
- "eval_runtime": 0.7533,
87
- "eval_samples_per_second": 663.789,
88
- "eval_steps_per_second": 42.483,
89
  "step": 250
90
  },
91
  {
92
  "epoch": 1.04,
93
- "learning_rate": 3.960000000000001e-05,
94
- "loss": 1.6047,
95
  "step": 260
96
  },
97
  {
98
  "epoch": 1.12,
99
- "learning_rate": 3.88e-05,
100
- "loss": 1.6005,
101
  "step": 280
102
  },
103
  {
104
  "epoch": 1.2,
105
- "learning_rate": 3.8e-05,
106
- "loss": 1.5939,
107
  "step": 300
108
  },
109
  {
110
  "epoch": 1.28,
111
- "learning_rate": 3.72e-05,
112
- "loss": 1.5916,
113
  "step": 320
114
  },
115
  {
116
  "epoch": 1.36,
117
- "learning_rate": 3.6400000000000004e-05,
118
- "loss": 1.5908,
119
  "step": 340
120
  },
121
  {
122
  "epoch": 1.44,
123
- "learning_rate": 3.56e-05,
124
- "loss": 1.59,
125
  "step": 360
126
  },
127
  {
128
  "epoch": 1.52,
129
- "learning_rate": 3.48e-05,
130
- "loss": 1.5996,
131
  "step": 380
132
  },
133
  {
134
  "epoch": 1.6,
135
- "learning_rate": 3.4000000000000007e-05,
136
- "loss": 1.586,
137
  "step": 400
138
  },
139
  {
140
  "epoch": 1.68,
141
- "learning_rate": 3.32e-05,
142
- "loss": 1.5712,
143
  "step": 420
144
  },
145
  {
146
  "epoch": 1.76,
147
- "learning_rate": 3.24e-05,
148
- "loss": 1.5726,
149
  "step": 440
150
  },
151
  {
152
  "epoch": 1.84,
153
- "learning_rate": 3.16e-05,
154
- "loss": 1.5727,
155
  "step": 460
156
  },
157
  {
158
  "epoch": 1.92,
159
- "learning_rate": 3.08e-05,
160
- "loss": 1.5822,
161
  "step": 480
162
  },
163
  {
164
  "epoch": 2.0,
165
- "learning_rate": 3e-05,
166
- "loss": 1.5731,
167
  "step": 500
168
  },
169
  {
170
  "epoch": 2.0,
171
- "eval_accuracy": 0.3019999861717224,
172
- "eval_loss": 1.5877653360366821,
173
- "eval_runtime": 0.647,
174
- "eval_samples_per_second": 772.82,
175
- "eval_steps_per_second": 49.46,
176
  "step": 500
177
  },
178
  {
179
  "epoch": 2.08,
180
- "learning_rate": 2.9199999999999998e-05,
181
- "loss": 1.5914,
182
  "step": 520
183
  },
184
  {
185
  "epoch": 2.16,
186
- "learning_rate": 2.84e-05,
187
- "loss": 1.5627,
188
  "step": 540
189
  },
190
  {
191
  "epoch": 2.24,
192
- "learning_rate": 2.7600000000000003e-05,
193
- "loss": 1.5672,
194
  "step": 560
195
  },
196
  {
197
  "epoch": 2.32,
198
- "learning_rate": 2.6800000000000004e-05,
199
- "loss": 1.564,
200
  "step": 580
201
  },
202
  {
203
  "epoch": 2.4,
204
- "learning_rate": 2.6000000000000002e-05,
205
- "loss": 1.577,
206
  "step": 600
207
  },
208
  {
209
  "epoch": 2.48,
210
- "learning_rate": 2.5200000000000003e-05,
211
- "loss": 1.5712,
212
  "step": 620
213
  },
214
  {
215
  "epoch": 2.56,
216
- "learning_rate": 2.44e-05,
217
- "loss": 1.5725,
218
  "step": 640
219
  },
220
  {
221
  "epoch": 2.64,
222
- "learning_rate": 2.36e-05,
223
- "loss": 1.5532,
224
  "step": 660
225
  },
226
  {
227
  "epoch": 2.72,
228
- "learning_rate": 2.2800000000000002e-05,
229
- "loss": 1.5703,
230
  "step": 680
231
  },
232
  {
233
  "epoch": 2.8,
234
- "learning_rate": 2.2000000000000003e-05,
235
- "loss": 1.5803,
236
  "step": 700
237
  },
238
  {
239
  "epoch": 2.88,
240
- "learning_rate": 2.12e-05,
241
- "loss": 1.5728,
242
  "step": 720
243
  },
244
  {
245
  "epoch": 2.96,
246
- "learning_rate": 2.04e-05,
247
- "loss": 1.5434,
248
  "step": 740
249
  },
250
  {
251
  "epoch": 3.0,
252
- "eval_accuracy": 0.29600000381469727,
253
- "eval_loss": 1.5791035890579224,
254
- "eval_runtime": 0.7356,
255
- "eval_samples_per_second": 679.717,
256
- "eval_steps_per_second": 43.502,
257
  "step": 750
258
  },
259
  {
260
- "epoch": 3.04,
261
- "learning_rate": 1.9600000000000002e-05,
262
- "loss": 1.5569,
263
- "step": 760
264
- },
265
- {
266
- "epoch": 3.12,
267
- "learning_rate": 1.88e-05,
268
- "loss": 1.5382,
269
- "step": 780
270
- },
271
- {
272
- "epoch": 3.2,
273
- "learning_rate": 1.8e-05,
274
- "loss": 1.5478,
275
- "step": 800
276
- },
277
- {
278
- "epoch": 3.28,
279
- "learning_rate": 1.7199999999999998e-05,
280
- "loss": 1.5665,
281
- "step": 820
282
- },
283
- {
284
- "epoch": 3.36,
285
- "learning_rate": 1.6400000000000002e-05,
286
- "loss": 1.5494,
287
- "step": 840
288
- },
289
- {
290
- "epoch": 3.44,
291
- "learning_rate": 1.56e-05,
292
- "loss": 1.5489,
293
- "step": 860
294
- },
295
- {
296
- "epoch": 3.52,
297
- "learning_rate": 1.48e-05,
298
- "loss": 1.5483,
299
- "step": 880
300
- },
301
- {
302
- "epoch": 3.6,
303
- "learning_rate": 1.4000000000000001e-05,
304
- "loss": 1.5388,
305
- "step": 900
306
- },
307
- {
308
- "epoch": 3.68,
309
- "learning_rate": 1.32e-05,
310
- "loss": 1.5563,
311
- "step": 920
312
- },
313
- {
314
- "epoch": 3.76,
315
- "learning_rate": 1.24e-05,
316
- "loss": 1.5616,
317
- "step": 940
318
- },
319
- {
320
- "epoch": 3.84,
321
- "learning_rate": 1.16e-05,
322
- "loss": 1.5512,
323
- "step": 960
324
- },
325
- {
326
- "epoch": 3.92,
327
- "learning_rate": 1.08e-05,
328
- "loss": 1.5288,
329
- "step": 980
330
- },
331
- {
332
- "epoch": 4.0,
333
- "learning_rate": 1e-05,
334
- "loss": 1.5473,
335
- "step": 1000
336
- },
337
- {
338
- "epoch": 4.0,
339
- "eval_accuracy": 0.2939999997615814,
340
- "eval_loss": 1.5701889991760254,
341
- "eval_runtime": 0.7589,
342
- "eval_samples_per_second": 658.833,
343
- "eval_steps_per_second": 42.165,
344
- "step": 1000
345
- },
346
- {
347
- "epoch": 4.08,
348
- "learning_rate": 9.2e-06,
349
- "loss": 1.5642,
350
- "step": 1020
351
- },
352
- {
353
- "epoch": 4.16,
354
- "learning_rate": 8.400000000000001e-06,
355
- "loss": 1.5334,
356
- "step": 1040
357
- },
358
- {
359
- "epoch": 4.24,
360
- "learning_rate": 7.6e-06,
361
- "loss": 1.526,
362
- "step": 1060
363
- },
364
- {
365
- "epoch": 4.32,
366
- "learning_rate": 6.800000000000001e-06,
367
- "loss": 1.5361,
368
- "step": 1080
369
- },
370
- {
371
- "epoch": 4.4,
372
- "learning_rate": 6e-06,
373
- "loss": 1.573,
374
- "step": 1100
375
- },
376
- {
377
- "epoch": 4.48,
378
- "learning_rate": 5.2e-06,
379
- "loss": 1.5543,
380
- "step": 1120
381
- },
382
- {
383
- "epoch": 4.56,
384
- "learning_rate": 4.4e-06,
385
- "loss": 1.5193,
386
- "step": 1140
387
- },
388
- {
389
- "epoch": 4.64,
390
- "learning_rate": 3.6e-06,
391
- "loss": 1.5401,
392
- "step": 1160
393
- },
394
- {
395
- "epoch": 4.72,
396
- "learning_rate": 2.8000000000000003e-06,
397
- "loss": 1.5373,
398
- "step": 1180
399
- },
400
- {
401
- "epoch": 4.8,
402
- "learning_rate": 2.0000000000000003e-06,
403
- "loss": 1.5279,
404
- "step": 1200
405
- },
406
- {
407
- "epoch": 4.88,
408
- "learning_rate": 1.2000000000000002e-06,
409
- "loss": 1.5503,
410
- "step": 1220
411
- },
412
- {
413
- "epoch": 4.96,
414
- "learning_rate": 4.0000000000000003e-07,
415
- "loss": 1.5422,
416
- "step": 1240
417
- },
418
- {
419
- "epoch": 5.0,
420
- "eval_accuracy": 0.2919999957084656,
421
- "eval_loss": 1.569107174873352,
422
- "eval_runtime": 0.7454,
423
- "eval_samples_per_second": 670.819,
424
- "eval_steps_per_second": 42.932,
425
- "step": 1250
426
- },
427
- {
428
- "epoch": 5.0,
429
- "step": 1250,
430
- "total_flos": 10492735676640.0,
431
- "train_loss": 1.5705763679504394,
432
- "train_runtime": 78.9966,
433
- "train_samples_per_second": 253.176,
434
- "train_steps_per_second": 15.823
435
  }
436
  ],
437
- "max_steps": 1250,
438
- "num_train_epochs": 5,
439
- "total_flos": 10492735676640.0,
440
  "trial_name": null,
441
  "trial_params": null
442
  }
 
1
  {
2
+ "best_metric": 1.5779266357421875,
3
+ "best_model_checkpoint": "chinese_roberta_L-2_H-128-finetuned-MC-hyper/checkpoint-750",
4
+ "epoch": 3.0,
5
+ "global_step": 750,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
  "epoch": 0.08,
12
+ "learning_rate": 4.866666666666667e-05,
13
+ "loss": 1.6118,
14
  "step": 20
15
  },
16
  {
17
  "epoch": 0.16,
18
+ "learning_rate": 4.7333333333333336e-05,
19
+ "loss": 1.6098,
20
  "step": 40
21
  },
22
  {
23
  "epoch": 0.24,
24
+ "learning_rate": 4.600000000000001e-05,
25
+ "loss": 1.606,
26
  "step": 60
27
  },
28
  {
29
  "epoch": 0.32,
30
+ "learning_rate": 4.466666666666667e-05,
31
+ "loss": 1.6078,
32
  "step": 80
33
  },
34
  {
35
  "epoch": 0.4,
36
+ "learning_rate": 4.3333333333333334e-05,
37
+ "loss": 1.6084,
38
  "step": 100
39
  },
40
  {
41
  "epoch": 0.48,
42
+ "learning_rate": 4.2e-05,
43
+ "loss": 1.6069,
44
  "step": 120
45
  },
46
  {
47
  "epoch": 0.56,
48
+ "learning_rate": 4.066666666666667e-05,
49
+ "loss": 1.6077,
50
  "step": 140
51
  },
52
  {
53
  "epoch": 0.64,
54
+ "learning_rate": 3.933333333333333e-05,
55
+ "loss": 1.6025,
56
  "step": 160
57
  },
58
  {
59
  "epoch": 0.72,
60
+ "learning_rate": 3.8e-05,
61
+ "loss": 1.6015,
62
  "step": 180
63
  },
64
  {
65
  "epoch": 0.8,
66
+ "learning_rate": 3.6666666666666666e-05,
67
+ "loss": 1.5974,
68
  "step": 200
69
  },
70
  {
71
  "epoch": 0.88,
72
+ "learning_rate": 3.5333333333333336e-05,
73
+ "loss": 1.6043,
74
  "step": 220
75
  },
76
  {
77
  "epoch": 0.96,
78
+ "learning_rate": 3.4000000000000007e-05,
79
+ "loss": 1.5896,
80
  "step": 240
81
  },
82
  {
83
  "epoch": 1.0,
84
+ "eval_accuracy": 0.22200000286102295,
85
+ "eval_loss": 1.5897876024246216,
86
+ "eval_runtime": 0.8298,
87
+ "eval_samples_per_second": 602.546,
88
+ "eval_steps_per_second": 38.563,
89
  "step": 250
90
  },
91
  {
92
  "epoch": 1.04,
93
+ "learning_rate": 3.266666666666667e-05,
94
+ "loss": 1.5971,
95
  "step": 260
96
  },
97
  {
98
  "epoch": 1.12,
99
+ "learning_rate": 3.1333333333333334e-05,
100
+ "loss": 1.5927,
101
  "step": 280
102
  },
103
  {
104
  "epoch": 1.2,
105
+ "learning_rate": 3e-05,
106
+ "loss": 1.5924,
107
  "step": 300
108
  },
109
  {
110
  "epoch": 1.28,
111
+ "learning_rate": 2.8666666666666668e-05,
112
+ "loss": 1.5821,
113
  "step": 320
114
  },
115
  {
116
  "epoch": 1.36,
117
+ "learning_rate": 2.733333333333333e-05,
118
+ "loss": 1.5791,
119
  "step": 340
120
  },
121
  {
122
  "epoch": 1.44,
123
+ "learning_rate": 2.6000000000000002e-05,
124
+ "loss": 1.5902,
125
  "step": 360
126
  },
127
  {
128
  "epoch": 1.52,
129
+ "learning_rate": 2.466666666666667e-05,
130
+ "loss": 1.5781,
131
  "step": 380
132
  },
133
  {
134
  "epoch": 1.6,
135
+ "learning_rate": 2.3333333333333336e-05,
136
+ "loss": 1.5712,
137
  "step": 400
138
  },
139
  {
140
  "epoch": 1.68,
141
+ "learning_rate": 2.2000000000000003e-05,
142
+ "loss": 1.5792,
143
  "step": 420
144
  },
145
  {
146
  "epoch": 1.76,
147
+ "learning_rate": 2.0666666666666666e-05,
148
+ "loss": 1.5764,
149
  "step": 440
150
  },
151
  {
152
  "epoch": 1.84,
153
+ "learning_rate": 1.9333333333333333e-05,
154
+ "loss": 1.5908,
155
  "step": 460
156
  },
157
  {
158
  "epoch": 1.92,
159
+ "learning_rate": 1.8e-05,
160
+ "loss": 1.558,
161
  "step": 480
162
  },
163
  {
164
  "epoch": 2.0,
165
+ "learning_rate": 1.6666666666666667e-05,
166
+ "loss": 1.5831,
167
  "step": 500
168
  },
169
  {
170
  "epoch": 2.0,
171
+ "eval_accuracy": 0.21199999749660492,
172
+ "eval_loss": 1.5786162614822388,
173
+ "eval_runtime": 0.8638,
174
+ "eval_samples_per_second": 578.811,
175
+ "eval_steps_per_second": 37.044,
176
  "step": 500
177
  },
178
  {
179
  "epoch": 2.08,
180
+ "learning_rate": 1.5333333333333334e-05,
181
+ "loss": 1.5675,
182
  "step": 520
183
  },
184
  {
185
  "epoch": 2.16,
186
+ "learning_rate": 1.4000000000000001e-05,
187
+ "loss": 1.5696,
188
  "step": 540
189
  },
190
  {
191
  "epoch": 2.24,
192
+ "learning_rate": 1.2666666666666668e-05,
193
+ "loss": 1.5679,
194
  "step": 560
195
  },
196
  {
197
  "epoch": 2.32,
198
+ "learning_rate": 1.1333333333333334e-05,
199
+ "loss": 1.5625,
200
  "step": 580
201
  },
202
  {
203
  "epoch": 2.4,
204
+ "learning_rate": 1e-05,
205
+ "loss": 1.5726,
206
  "step": 600
207
  },
208
  {
209
  "epoch": 2.48,
210
+ "learning_rate": 8.666666666666668e-06,
211
+ "loss": 1.5627,
212
  "step": 620
213
  },
214
  {
215
  "epoch": 2.56,
216
+ "learning_rate": 7.333333333333334e-06,
217
+ "loss": 1.5596,
218
  "step": 640
219
  },
220
  {
221
  "epoch": 2.64,
222
+ "learning_rate": 6e-06,
223
+ "loss": 1.5648,
224
  "step": 660
225
  },
226
  {
227
  "epoch": 2.72,
228
+ "learning_rate": 4.666666666666667e-06,
229
+ "loss": 1.5623,
230
  "step": 680
231
  },
232
  {
233
  "epoch": 2.8,
234
+ "learning_rate": 3.3333333333333333e-06,
235
+ "loss": 1.5766,
236
  "step": 700
237
  },
238
  {
239
  "epoch": 2.88,
240
+ "learning_rate": 2.0000000000000003e-06,
241
+ "loss": 1.5596,
242
  "step": 720
243
  },
244
  {
245
  "epoch": 2.96,
246
+ "learning_rate": 6.666666666666667e-07,
247
+ "loss": 1.54,
248
  "step": 740
249
  },
250
  {
251
  "epoch": 3.0,
252
+ "eval_accuracy": 0.2160000056028366,
253
+ "eval_loss": 1.5779266357421875,
254
+ "eval_runtime": 0.9285,
255
+ "eval_samples_per_second": 538.513,
256
+ "eval_steps_per_second": 34.465,
257
  "step": 750
258
  },
259
  {
260
+ "epoch": 3.0,
261
+ "step": 750,
262
+ "total_flos": 16105346090400.0,
263
+ "train_loss": 1.5832408892313639,
264
+ "train_runtime": 76.8946,
265
+ "train_samples_per_second": 156.058,
266
+ "train_steps_per_second": 9.754
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
267
  }
268
  ],
269
+ "max_steps": 750,
270
+ "num_train_epochs": 3,
271
+ "total_flos": 16105346090400.0,
272
  "trial_name": null,
273
  "trial_params": null
274
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f1eb0324ba0a73ee29fb2559c05fad4e6c64a97f34e7bb4de884b1c0c9cc415f
3
  size 2799
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cdb89f3c4237c498a27550264d267734eb7e511c05c2d3aa52333e0f2f6ca78c
3
  size 2799