KakashiH commited on
Commit
a2fe482
verified
1 Parent(s): 4f6b9bf

Upload 11 files

Browse files
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:00033315c3978e7f5b3c187ff43c0c1b0c6f4076183b0997e37ad39f12c73c83
3
  size 75057744
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a60515d25ea6ee36af964b7e294db7d612120dd76d31a7b64c38600f34e828b8
3
  size 75057744
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:872d7b9701d8d7b2a9df16eb5c7940013ae4c0ab32d96039b3651ba65e7c70ba
3
  size 150346986
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3eb16eead2468ef311aa16fe09a0edc0b103f055c974c4733fb01a1d8915c96e
3
  size 150346986
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:97e92e08cd1c210745787fb53c3aec8b820b463c1f5d67263a0c4fecb69f69ca
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5fd92ce8f35a99f049aa43c1341a3b5999aa560c2036a65e964a08167a6e649
3
  size 14244
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2b2e7f09e07aa3c7be669ba27bfff15efcc0eca70c4fe2d063cc724177dea8a7
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3121eaecc736a439a53f42f9220e178e67e74b8074ca77b6592ae4b8178e018c
3
  size 1064
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.986425339366516,
5
  "eval_steps": 500,
6
- "global_step": 330,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -161,83 +161,6 @@
161
  "learning_rate": 0.0002,
162
  "loss": 0.4872,
163
  "step": 220
164
- },
165
- {
166
- "epoch": 2.08,
167
- "grad_norm": 1.0310947895050049,
168
- "learning_rate": 0.0002,
169
- "loss": 0.3785,
170
- "step": 230
171
- },
172
- {
173
- "epoch": 2.17,
174
- "grad_norm": 0.635901153087616,
175
- "learning_rate": 0.0002,
176
- "loss": 0.3905,
177
- "step": 240
178
- },
179
- {
180
- "epoch": 2.26,
181
- "grad_norm": 0.38871026039123535,
182
- "learning_rate": 0.0002,
183
- "loss": 0.3745,
184
- "step": 250
185
- },
186
- {
187
- "epoch": 2.35,
188
- "grad_norm": 0.5501377582550049,
189
- "learning_rate": 0.0002,
190
- "loss": 0.3759,
191
- "step": 260
192
- },
193
- {
194
- "epoch": 2.44,
195
- "grad_norm": 0.6457089781761169,
196
- "learning_rate": 0.0002,
197
- "loss": 0.394,
198
- "step": 270
199
- },
200
- {
201
- "epoch": 2.53,
202
- "grad_norm": 0.8598196506500244,
203
- "learning_rate": 0.0002,
204
- "loss": 0.3909,
205
- "step": 280
206
- },
207
- {
208
- "epoch": 2.62,
209
- "grad_norm": 0.5458590984344482,
210
- "learning_rate": 0.0002,
211
- "loss": 0.3725,
212
- "step": 290
213
- },
214
- {
215
- "epoch": 2.71,
216
- "grad_norm": 0.6310967803001404,
217
- "learning_rate": 0.0002,
218
- "loss": 0.3884,
219
- "step": 300
220
- },
221
- {
222
- "epoch": 2.81,
223
- "grad_norm": 1.0128086805343628,
224
- "learning_rate": 0.0002,
225
- "loss": 0.3829,
226
- "step": 310
227
- },
228
- {
229
- "epoch": 2.9,
230
- "grad_norm": 0.5322939157485962,
231
- "learning_rate": 0.0002,
232
- "loss": 0.3866,
233
- "step": 320
234
- },
235
- {
236
- "epoch": 2.99,
237
- "grad_norm": 0.5951926708221436,
238
- "learning_rate": 0.0002,
239
- "loss": 0.3494,
240
- "step": 330
241
  }
242
  ],
243
  "logging_steps": 10,
@@ -245,7 +168,7 @@
245
  "num_input_tokens_seen": 0,
246
  "num_train_epochs": 3,
247
  "save_steps": 500,
248
- "total_flos": 9.290508651144806e+16,
249
  "train_batch_size": 2,
250
  "trial_name": null,
251
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.0,
5
  "eval_steps": 500,
6
+ "global_step": 221,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
161
  "learning_rate": 0.0002,
162
  "loss": 0.4872,
163
  "step": 220
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
164
  }
165
  ],
166
  "logging_steps": 10,
 
168
  "num_input_tokens_seen": 0,
169
  "num_train_epochs": 3,
170
  "save_steps": 500,
171
+ "total_flos": 6.217168915257754e+16,
172
  "train_batch_size": 2,
173
  "trial_name": null,
174
  "trial_params": null