Namronaldo2004 commited on
Commit
d3c8535
·
1 Parent(s): 14ed909

Update fine-tuned model

Browse files
adapter_config.json CHANGED
@@ -20,13 +20,13 @@
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
- "o_proj",
24
  "k_proj",
25
- "q_proj",
26
- "v_proj",
27
- "down_proj",
28
  "up_proj",
29
- "gate_proj"
 
 
 
30
  ],
31
  "task_type": " CAUSAL_LM",
32
  "use_dora": false,
 
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
 
23
  "k_proj",
24
+ "gate_proj",
 
 
25
  "up_proj",
26
+ "o_proj",
27
+ "down_proj",
28
+ "q_proj",
29
+ "v_proj"
30
  ],
31
  "task_type": " CAUSAL_LM",
32
  "use_dora": false,
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:430a11259d17a49541e9f31c838ea163b82b162b916c4215762ffab0b4917a07
3
  size 159967880
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a83e9a0016f9da67a243e4a1e37823c5fd485eab0afee3026b8ef7de3841085
3
  size 159967880
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2e45b6af16f366e2479a1fda5784a26168e5d866ba82ed55f4eb013e34c3ceee
3
  size 852876198
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ecccb0d08fadf6a1ad6f7ab16c4ac9074e62eccf977a1c221a8fb9b0f83894a
3
  size 852876198
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b0654b1e5962e1d264ec17c40dcea6c3de9060e283aa95a5fea4d3b6d181505f
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c09ae3d43feaff4e0394c4352b4f7f3a126522099688de7cea344ac7d55250d9
3
  size 14244
trainer_state.json CHANGED
@@ -10,352 +10,352 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.1,
13
- "grad_norm": 0.886165976524353,
14
  "learning_rate": 6.666666666666667e-05,
15
- "loss": 0.8569,
16
  "step": 1
17
  },
18
  {
19
  "epoch": 0.2,
20
- "grad_norm": 0.9352289438247681,
21
  "learning_rate": 0.00013333333333333334,
22
- "loss": 0.9157,
23
  "step": 2
24
  },
25
  {
26
  "epoch": 0.3,
27
- "grad_norm": 0.6887379288673401,
28
  "learning_rate": 0.0002,
29
- "loss": 0.8368,
30
  "step": 3
31
  },
32
  {
33
  "epoch": 0.4,
34
- "grad_norm": 0.5438244938850403,
35
  "learning_rate": 0.00019977668786231534,
36
- "loss": 0.7759,
37
  "step": 4
38
  },
39
  {
40
  "epoch": 0.5,
41
- "grad_norm": 0.4049173593521118,
42
  "learning_rate": 0.000199107748815478,
43
- "loss": 0.7108,
44
  "step": 5
45
  },
46
  {
47
  "epoch": 0.6,
48
- "grad_norm": 0.3374454379081726,
49
  "learning_rate": 0.0001979961705036587,
50
- "loss": 0.6949,
51
  "step": 6
52
  },
53
  {
54
  "epoch": 0.7,
55
- "grad_norm": 0.31860846281051636,
56
  "learning_rate": 0.00019644691750543767,
57
- "loss": 0.6725,
58
  "step": 7
59
  },
60
  {
61
  "epoch": 0.8,
62
- "grad_norm": 0.3116016089916229,
63
  "learning_rate": 0.0001944669091607919,
64
- "loss": 0.6814,
65
  "step": 8
66
  },
67
  {
68
  "epoch": 0.9,
69
- "grad_norm": 0.3020254969596863,
70
  "learning_rate": 0.00019206498866764288,
71
- "loss": 0.6628,
72
  "step": 9
73
  },
74
  {
75
  "epoch": 1.0,
76
- "grad_norm": 0.3751624524593353,
77
  "learning_rate": 0.00018925188358598813,
78
- "loss": 0.6491,
79
  "step": 10
80
  },
81
  {
82
  "epoch": 1.1,
83
- "grad_norm": 0.33873409032821655,
84
  "learning_rate": 0.00018604015792601396,
85
- "loss": 0.5887,
86
  "step": 11
87
  },
88
  {
89
  "epoch": 1.2,
90
- "grad_norm": 0.3233942985534668,
91
  "learning_rate": 0.00018244415603417603,
92
- "loss": 0.5414,
93
  "step": 12
94
  },
95
  {
96
  "epoch": 1.3,
97
- "grad_norm": 0.33184298872947693,
98
  "learning_rate": 0.0001784799385278661,
99
- "loss": 0.5534,
100
  "step": 13
101
  },
102
  {
103
  "epoch": 1.4,
104
- "grad_norm": 0.3251623511314392,
105
  "learning_rate": 0.00017416521056479577,
106
- "loss": 0.5303,
107
  "step": 14
108
  },
109
  {
110
  "epoch": 1.5,
111
- "grad_norm": 0.30667614936828613,
112
  "learning_rate": 0.00016951924276746425,
113
- "loss": 0.5261,
114
  "step": 15
115
  },
116
  {
117
  "epoch": 1.6,
118
- "grad_norm": 0.30262428522109985,
119
  "learning_rate": 0.00016456278515588024,
120
- "loss": 0.5101,
121
  "step": 16
122
  },
123
  {
124
  "epoch": 1.7,
125
- "grad_norm": 0.30721279978752136,
126
  "learning_rate": 0.00015931797447293552,
127
- "loss": 0.5059,
128
  "step": 17
129
  },
130
  {
131
  "epoch": 1.8,
132
- "grad_norm": 0.2939946949481964,
133
  "learning_rate": 0.00015380823531633729,
134
- "loss": 0.4949,
135
  "step": 18
136
  },
137
  {
138
  "epoch": 1.9,
139
- "grad_norm": 0.28288447856903076,
140
  "learning_rate": 0.00014805817551866838,
141
- "loss": 0.4869,
142
  "step": 19
143
  },
144
  {
145
  "epoch": 2.0,
146
- "grad_norm": 0.2906552255153656,
147
  "learning_rate": 0.0001420934762428335,
148
- "loss": 0.4605,
149
  "step": 20
150
  },
151
  {
152
  "epoch": 2.1,
153
- "grad_norm": 0.2544306218624115,
154
  "learning_rate": 0.00013594077728375128,
155
- "loss": 0.4436,
156
  "step": 21
157
  },
158
  {
159
  "epoch": 2.2,
160
- "grad_norm": 0.2552658021450043,
161
  "learning_rate": 0.00012962755808856342,
162
- "loss": 0.4192,
163
  "step": 22
164
  },
165
  {
166
  "epoch": 2.3,
167
- "grad_norm": 0.2546628713607788,
168
  "learning_rate": 0.00012318201502675285,
169
- "loss": 0.4106,
170
  "step": 23
171
  },
172
  {
173
  "epoch": 2.4,
174
- "grad_norm": 0.23741765320301056,
175
  "learning_rate": 0.00011663293545831302,
176
- "loss": 0.3938,
177
  "step": 24
178
  },
179
  {
180
  "epoch": 2.5,
181
- "grad_norm": 0.2525660991668701,
182
  "learning_rate": 0.00011000956916240985,
183
- "loss": 0.4017,
184
  "step": 25
185
  },
186
  {
187
  "epoch": 2.6,
188
- "grad_norm": 0.2451285570859909,
189
  "learning_rate": 0.00010334149770076747,
190
- "loss": 0.3794,
191
  "step": 26
192
  },
193
  {
194
  "epoch": 2.7,
195
- "grad_norm": 0.2524901032447815,
196
  "learning_rate": 9.665850229923258e-05,
197
- "loss": 0.3855,
198
  "step": 27
199
  },
200
  {
201
  "epoch": 2.8,
202
- "grad_norm": 0.24502427875995636,
203
  "learning_rate": 8.999043083759017e-05,
204
- "loss": 0.3616,
205
  "step": 28
206
  },
207
  {
208
  "epoch": 2.9,
209
- "grad_norm": 0.2539559602737427,
210
  "learning_rate": 8.336706454168701e-05,
211
- "loss": 0.3763,
212
  "step": 29
213
  },
214
  {
215
  "epoch": 3.0,
216
- "grad_norm": 0.24636436998844147,
217
  "learning_rate": 7.681798497324716e-05,
218
- "loss": 0.3517,
219
  "step": 30
220
  },
221
  {
222
  "epoch": 3.1,
223
- "grad_norm": 0.23345255851745605,
224
  "learning_rate": 7.037244191143661e-05,
225
- "loss": 0.3284,
226
  "step": 31
227
  },
228
  {
229
  "epoch": 3.2,
230
- "grad_norm": 0.2510325014591217,
231
  "learning_rate": 6.405922271624874e-05,
232
- "loss": 0.3301,
233
  "step": 32
234
  },
235
  {
236
  "epoch": 3.3,
237
- "grad_norm": 0.24276795983314514,
238
  "learning_rate": 5.790652375716652e-05,
239
- "loss": 0.3339,
240
  "step": 33
241
  },
242
  {
243
  "epoch": 3.4,
244
- "grad_norm": 0.23698894679546356,
245
  "learning_rate": 5.1941824481331626e-05,
246
- "loss": 0.3273,
247
  "step": 34
248
  },
249
  {
250
  "epoch": 3.5,
251
- "grad_norm": 0.24454469978809357,
252
  "learning_rate": 4.6191764683662744e-05,
253
- "loss": 0.3073,
254
  "step": 35
255
  },
256
  {
257
  "epoch": 3.6,
258
- "grad_norm": 0.25007879734039307,
259
  "learning_rate": 4.0682025527064486e-05,
260
- "loss": 0.3149,
261
  "step": 36
262
  },
263
  {
264
  "epoch": 3.7,
265
- "grad_norm": 0.25587430596351624,
266
  "learning_rate": 3.543721484411976e-05,
267
- "loss": 0.3081,
268
  "step": 37
269
  },
270
  {
271
  "epoch": 3.8,
272
- "grad_norm": 0.2517322599887848,
273
  "learning_rate": 3.0480757232535772e-05,
274
- "loss": 0.3131,
275
  "step": 38
276
  },
277
  {
278
  "epoch": 3.9,
279
- "grad_norm": 0.23721425235271454,
280
  "learning_rate": 2.5834789435204243e-05,
281
- "loss": 0.2987,
282
  "step": 39
283
  },
284
  {
285
  "epoch": 4.0,
286
- "grad_norm": 0.23976309597492218,
287
  "learning_rate": 2.1520061472133902e-05,
288
- "loss": 0.3148,
289
  "step": 40
290
  },
291
  {
292
  "epoch": 4.1,
293
- "grad_norm": 0.22691340744495392,
294
  "learning_rate": 1.7555843965823992e-05,
295
- "loss": 0.2777,
296
  "step": 41
297
  },
298
  {
299
  "epoch": 4.2,
300
- "grad_norm": 0.23945631086826324,
301
  "learning_rate": 1.3959842073986085e-05,
302
- "loss": 0.2954,
303
  "step": 42
304
  },
305
  {
306
  "epoch": 4.3,
307
- "grad_norm": 0.23303616046905518,
308
  "learning_rate": 1.0748116414011888e-05,
309
- "loss": 0.2882,
310
  "step": 43
311
  },
312
  {
313
  "epoch": 4.4,
314
- "grad_norm": 0.24104540050029755,
315
  "learning_rate": 7.935011332357112e-06,
316
- "loss": 0.2939,
317
  "step": 44
318
  },
319
  {
320
  "epoch": 4.5,
321
- "grad_norm": 0.23101571202278137,
322
  "learning_rate": 5.533090839208133e-06,
323
- "loss": 0.2789,
324
  "step": 45
325
  },
326
  {
327
  "epoch": 4.6,
328
- "grad_norm": 0.2310316413640976,
329
  "learning_rate": 3.5530824945623542e-06,
330
- "loss": 0.2925,
331
  "step": 46
332
  },
333
  {
334
  "epoch": 4.7,
335
- "grad_norm": 0.23140206933021545,
336
  "learning_rate": 2.003829496341325e-06,
337
- "loss": 0.2824,
338
  "step": 47
339
  },
340
  {
341
  "epoch": 4.8,
342
- "grad_norm": 0.22320057451725006,
343
  "learning_rate": 8.922511845219971e-07,
344
- "loss": 0.2975,
345
  "step": 48
346
  },
347
  {
348
  "epoch": 4.9,
349
- "grad_norm": 0.22732405364513397,
350
  "learning_rate": 2.2331213768468363e-07,
351
- "loss": 0.2891,
352
  "step": 49
353
  },
354
  {
355
  "epoch": 5.0,
356
- "grad_norm": 0.2486155927181244,
357
  "learning_rate": 0.0,
358
- "loss": 0.2815,
359
  "step": 50
360
  }
361
  ],
@@ -376,7 +376,7 @@
376
  "attributes": {}
377
  }
378
  },
379
- "total_flos": 7.457968683122688e+16,
380
  "train_batch_size": 1,
381
  "trial_name": null,
382
  "trial_params": null
 
10
  "log_history": [
11
  {
12
  "epoch": 0.1,
13
+ "grad_norm": 0.7374217510223389,
14
  "learning_rate": 6.666666666666667e-05,
15
+ "loss": 0.828,
16
  "step": 1
17
  },
18
  {
19
  "epoch": 0.2,
20
+ "grad_norm": 0.7326979041099548,
21
  "learning_rate": 0.00013333333333333334,
22
+ "loss": 0.8692,
23
  "step": 2
24
  },
25
  {
26
  "epoch": 0.3,
27
+ "grad_norm": 0.5996476411819458,
28
  "learning_rate": 0.0002,
29
+ "loss": 0.7978,
30
  "step": 3
31
  },
32
  {
33
  "epoch": 0.4,
34
+ "grad_norm": 0.4960692524909973,
35
  "learning_rate": 0.00019977668786231534,
36
+ "loss": 0.7581,
37
  "step": 4
38
  },
39
  {
40
  "epoch": 0.5,
41
+ "grad_norm": 0.38662832975387573,
42
  "learning_rate": 0.000199107748815478,
43
+ "loss": 0.7099,
44
  "step": 5
45
  },
46
  {
47
  "epoch": 0.6,
48
+ "grad_norm": 0.47017937898635864,
49
  "learning_rate": 0.0001979961705036587,
50
+ "loss": 0.6783,
51
  "step": 6
52
  },
53
  {
54
  "epoch": 0.7,
55
+ "grad_norm": 0.39107415080070496,
56
  "learning_rate": 0.00019644691750543767,
57
+ "loss": 0.6699,
58
  "step": 7
59
  },
60
  {
61
  "epoch": 0.8,
62
+ "grad_norm": 0.31766048073768616,
63
  "learning_rate": 0.0001944669091607919,
64
+ "loss": 0.6986,
65
  "step": 8
66
  },
67
  {
68
  "epoch": 0.9,
69
+ "grad_norm": 0.28557154536247253,
70
  "learning_rate": 0.00019206498866764288,
71
+ "loss": 0.6539,
72
  "step": 9
73
  },
74
  {
75
  "epoch": 1.0,
76
+ "grad_norm": 0.2970712184906006,
77
  "learning_rate": 0.00018925188358598813,
78
+ "loss": 0.6672,
79
  "step": 10
80
  },
81
  {
82
  "epoch": 1.1,
83
+ "grad_norm": 0.33635565638542175,
84
  "learning_rate": 0.00018604015792601396,
85
+ "loss": 0.5643,
86
  "step": 11
87
  },
88
  {
89
  "epoch": 1.2,
90
+ "grad_norm": 0.34821704030036926,
91
  "learning_rate": 0.00018244415603417603,
92
+ "loss": 0.5525,
93
  "step": 12
94
  },
95
  {
96
  "epoch": 1.3,
97
+ "grad_norm": 0.3633900582790375,
98
  "learning_rate": 0.0001784799385278661,
99
+ "loss": 0.5256,
100
  "step": 13
101
  },
102
  {
103
  "epoch": 1.4,
104
+ "grad_norm": 0.3432920575141907,
105
  "learning_rate": 0.00017416521056479577,
106
+ "loss": 0.5193,
107
  "step": 14
108
  },
109
  {
110
  "epoch": 1.5,
111
+ "grad_norm": 0.3452214002609253,
112
  "learning_rate": 0.00016951924276746425,
113
+ "loss": 0.5046,
114
  "step": 15
115
  },
116
  {
117
  "epoch": 1.6,
118
+ "grad_norm": 0.29285934567451477,
119
  "learning_rate": 0.00016456278515588024,
120
+ "loss": 0.4897,
121
  "step": 16
122
  },
123
  {
124
  "epoch": 1.7,
125
+ "grad_norm": 0.30729544162750244,
126
  "learning_rate": 0.00015931797447293552,
127
+ "loss": 0.501,
128
  "step": 17
129
  },
130
  {
131
  "epoch": 1.8,
132
+ "grad_norm": 0.2720145583152771,
133
  "learning_rate": 0.00015380823531633729,
134
+ "loss": 0.4759,
135
  "step": 18
136
  },
137
  {
138
  "epoch": 1.9,
139
+ "grad_norm": 0.2651268541812897,
140
  "learning_rate": 0.00014805817551866838,
141
+ "loss": 0.4527,
142
  "step": 19
143
  },
144
  {
145
  "epoch": 2.0,
146
+ "grad_norm": 0.2597902715206146,
147
  "learning_rate": 0.0001420934762428335,
148
+ "loss": 0.4479,
149
  "step": 20
150
  },
151
  {
152
  "epoch": 2.1,
153
+ "grad_norm": 0.2531144917011261,
154
  "learning_rate": 0.00013594077728375128,
155
+ "loss": 0.4056,
156
  "step": 21
157
  },
158
  {
159
  "epoch": 2.2,
160
+ "grad_norm": 0.2363481968641281,
161
  "learning_rate": 0.00012962755808856342,
162
+ "loss": 0.3951,
163
  "step": 22
164
  },
165
  {
166
  "epoch": 2.3,
167
+ "grad_norm": 0.23172461986541748,
168
  "learning_rate": 0.00012318201502675285,
169
+ "loss": 0.3863,
170
  "step": 23
171
  },
172
  {
173
  "epoch": 2.4,
174
+ "grad_norm": 0.24559740722179413,
175
  "learning_rate": 0.00011663293545831302,
176
+ "loss": 0.3809,
177
  "step": 24
178
  },
179
  {
180
  "epoch": 2.5,
181
+ "grad_norm": 0.24797773361206055,
182
  "learning_rate": 0.00011000956916240985,
183
+ "loss": 0.3768,
184
  "step": 25
185
  },
186
  {
187
  "epoch": 2.6,
188
+ "grad_norm": 0.25474536418914795,
189
  "learning_rate": 0.00010334149770076747,
190
+ "loss": 0.3628,
191
  "step": 26
192
  },
193
  {
194
  "epoch": 2.7,
195
+ "grad_norm": 0.25486743450164795,
196
  "learning_rate": 9.665850229923258e-05,
197
+ "loss": 0.3603,
198
  "step": 27
199
  },
200
  {
201
  "epoch": 2.8,
202
+ "grad_norm": 0.26718005537986755,
203
  "learning_rate": 8.999043083759017e-05,
204
+ "loss": 0.3584,
205
  "step": 28
206
  },
207
  {
208
  "epoch": 2.9,
209
+ "grad_norm": 0.2610970139503479,
210
  "learning_rate": 8.336706454168701e-05,
211
+ "loss": 0.3546,
212
  "step": 29
213
  },
214
  {
215
  "epoch": 3.0,
216
+ "grad_norm": 0.2513510584831238,
217
  "learning_rate": 7.681798497324716e-05,
218
+ "loss": 0.3507,
219
  "step": 30
220
  },
221
  {
222
  "epoch": 3.1,
223
+ "grad_norm": 0.23601773381233215,
224
  "learning_rate": 7.037244191143661e-05,
225
+ "loss": 0.3212,
226
  "step": 31
227
  },
228
  {
229
  "epoch": 3.2,
230
+ "grad_norm": 0.2521430253982544,
231
  "learning_rate": 6.405922271624874e-05,
232
+ "loss": 0.3399,
233
  "step": 32
234
  },
235
  {
236
  "epoch": 3.3,
237
+ "grad_norm": 0.24177978932857513,
238
  "learning_rate": 5.790652375716652e-05,
239
+ "loss": 0.2891,
240
  "step": 33
241
  },
242
  {
243
  "epoch": 3.4,
244
+ "grad_norm": 0.25264158844947815,
245
  "learning_rate": 5.1941824481331626e-05,
246
+ "loss": 0.3047,
247
  "step": 34
248
  },
249
  {
250
  "epoch": 3.5,
251
+ "grad_norm": 0.23934145271778107,
252
  "learning_rate": 4.6191764683662744e-05,
253
+ "loss": 0.2896,
254
  "step": 35
255
  },
256
  {
257
  "epoch": 3.6,
258
+ "grad_norm": 0.25319987535476685,
259
  "learning_rate": 4.0682025527064486e-05,
260
+ "loss": 0.2977,
261
  "step": 36
262
  },
263
  {
264
  "epoch": 3.7,
265
+ "grad_norm": 0.23155006766319275,
266
  "learning_rate": 3.543721484411976e-05,
267
+ "loss": 0.2793,
268
  "step": 37
269
  },
270
  {
271
  "epoch": 3.8,
272
+ "grad_norm": 0.2498036026954651,
273
  "learning_rate": 3.0480757232535772e-05,
274
+ "loss": 0.3165,
275
  "step": 38
276
  },
277
  {
278
  "epoch": 3.9,
279
+ "grad_norm": 0.23940016329288483,
280
  "learning_rate": 2.5834789435204243e-05,
281
+ "loss": 0.2938,
282
  "step": 39
283
  },
284
  {
285
  "epoch": 4.0,
286
+ "grad_norm": 0.23618489503860474,
287
  "learning_rate": 2.1520061472133902e-05,
288
+ "loss": 0.2713,
289
  "step": 40
290
  },
291
  {
292
  "epoch": 4.1,
293
+ "grad_norm": 0.22089381515979767,
294
  "learning_rate": 1.7555843965823992e-05,
295
+ "loss": 0.2618,
296
  "step": 41
297
  },
298
  {
299
  "epoch": 4.2,
300
+ "grad_norm": 0.2343159019947052,
301
  "learning_rate": 1.3959842073986085e-05,
302
+ "loss": 0.2668,
303
  "step": 42
304
  },
305
  {
306
  "epoch": 4.3,
307
+ "grad_norm": 0.2323874831199646,
308
  "learning_rate": 1.0748116414011888e-05,
309
+ "loss": 0.2645,
310
  "step": 43
311
  },
312
  {
313
  "epoch": 4.4,
314
+ "grad_norm": 0.24557726085186005,
315
  "learning_rate": 7.935011332357112e-06,
316
+ "loss": 0.2728,
317
  "step": 44
318
  },
319
  {
320
  "epoch": 4.5,
321
+ "grad_norm": 0.24076133966445923,
322
  "learning_rate": 5.533090839208133e-06,
323
+ "loss": 0.2729,
324
  "step": 45
325
  },
326
  {
327
  "epoch": 4.6,
328
+ "grad_norm": 0.22625155746936798,
329
  "learning_rate": 3.5530824945623542e-06,
330
+ "loss": 0.2929,
331
  "step": 46
332
  },
333
  {
334
  "epoch": 4.7,
335
+ "grad_norm": 0.23357822000980377,
336
  "learning_rate": 2.003829496341325e-06,
337
+ "loss": 0.2691,
338
  "step": 47
339
  },
340
  {
341
  "epoch": 4.8,
342
+ "grad_norm": 0.22757555544376373,
343
  "learning_rate": 8.922511845219971e-07,
344
+ "loss": 0.2713,
345
  "step": 48
346
  },
347
  {
348
  "epoch": 4.9,
349
+ "grad_norm": 0.23513054847717285,
350
  "learning_rate": 2.2331213768468363e-07,
351
+ "loss": 0.2777,
352
  "step": 49
353
  },
354
  {
355
  "epoch": 5.0,
356
+ "grad_norm": 0.23401321470737457,
357
  "learning_rate": 0.0,
358
+ "loss": 0.2611,
359
  "step": 50
360
  }
361
  ],
 
376
  "attributes": {}
377
  }
378
  },
379
+ "total_flos": 7.430306788466688e+16,
380
  "train_batch_size": 1,
381
  "trial_name": null,
382
  "trial_params": null
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:379e9b75008263e92f24f632d566b21e4ea482a4e680b4f98fef9cb95afe9d88
3
  size 5240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e78e03cdc8dd492bf3e2caa50f5fa2767b7f8b075cdb59edee3e79f289bb3410
3
  size 5240