Rakhman16 commited on
Commit
67c1223
·
verified ·
1 Parent(s): 3993f84

Training in progress, step 1500, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:23c0612ad5dddfadcdb2b879901305d100ce5db6de83b8784684c964517cf79f
3
  size 891558696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d25040acf885f7e2920c47759e972a9c964c43aa59fe6576736ecb99705b9e7f
3
  size 891558696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c61f1cdd59dbbfa29aaa648b120dc6b7a5660b6947962206ed3d139f3045b83e
3
  size 1783272762
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc841690d9edeb441c534cb39ba0e0b76571370a7dc70d911503af4a861ef3c3
3
  size 1783272762
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:686e4f08928bc0c19b99582b6c5fe9ffd12480ee988aecee97f880477c357d0e
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee7003d2d3db8b5d062c3280168e1b356926dfcb2c85d0b9bea95ac9bb64d84f
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9537cb1bb802ef9061fb4a55063dce9ed288df8d5f1ce5dca106b05ee393ba0e
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d51f93e5e4e2970e1d4467bbc53489257074e326323db890e39b7a999de6e4d
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.11499012261629105,
3
- "best_model_checkpoint": "./fine-tuned/checkpoint-1000",
4
- "epoch": 0.08782329952136302,
5
  "eval_steps": 100,
6
- "global_step": 1000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -227,6 +227,116 @@
227
  "eval_samples_per_second": 54.079,
228
  "eval_steps_per_second": 13.52,
229
  "step": 1000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
230
  }
231
  ],
232
  "logging_steps": 50,
@@ -246,7 +356,7 @@
246
  "attributes": {}
247
  }
248
  },
249
- "total_flos": 2435831562240000.0,
250
  "train_batch_size": 4,
251
  "trial_name": null,
252
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.11325465887784958,
3
+ "best_model_checkpoint": "./fine-tuned/checkpoint-1500",
4
+ "epoch": 0.13173494928204452,
5
  "eval_steps": 100,
6
+ "global_step": 1500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
227
  "eval_samples_per_second": 54.079,
228
  "eval_steps_per_second": 13.52,
229
  "step": 1000
230
+ },
231
+ {
232
+ "epoch": 0.09221446449743116,
233
+ "grad_norm": 0.3713476359844208,
234
+ "learning_rate": 4.884726857544353e-05,
235
+ "loss": 0.1251,
236
+ "step": 1050
237
+ },
238
+ {
239
+ "epoch": 0.09660562947349932,
240
+ "grad_norm": 0.37777256965637207,
241
+ "learning_rate": 4.8792376602845605e-05,
242
+ "loss": 0.1153,
243
+ "step": 1100
244
+ },
245
+ {
246
+ "epoch": 0.09660562947349932,
247
+ "eval_loss": 0.11525405198335648,
248
+ "eval_runtime": 85.5526,
249
+ "eval_samples_per_second": 52.132,
250
+ "eval_steps_per_second": 13.033,
251
+ "step": 1100
252
+ },
253
+ {
254
+ "epoch": 0.10099679444956747,
255
+ "grad_norm": 0.48114562034606934,
256
+ "learning_rate": 4.873748463024767e-05,
257
+ "loss": 0.1242,
258
+ "step": 1150
259
+ },
260
+ {
261
+ "epoch": 0.10538795942563563,
262
+ "grad_norm": 0.7776908278465271,
263
+ "learning_rate": 4.868259265764974e-05,
264
+ "loss": 0.1237,
265
+ "step": 1200
266
+ },
267
+ {
268
+ "epoch": 0.10538795942563563,
269
+ "eval_loss": 0.11445864289999008,
270
+ "eval_runtime": 82.4683,
271
+ "eval_samples_per_second": 54.081,
272
+ "eval_steps_per_second": 13.52,
273
+ "step": 1200
274
+ },
275
+ {
276
+ "epoch": 0.10977912440170377,
277
+ "grad_norm": 0.34126266837120056,
278
+ "learning_rate": 4.8627700685051817e-05,
279
+ "loss": 0.1331,
280
+ "step": 1250
281
+ },
282
+ {
283
+ "epoch": 0.11417028937777192,
284
+ "grad_norm": 0.32315969467163086,
285
+ "learning_rate": 4.857280871245389e-05,
286
+ "loss": 0.1167,
287
+ "step": 1300
288
+ },
289
+ {
290
+ "epoch": 0.11417028937777192,
291
+ "eval_loss": 0.11452117562294006,
292
+ "eval_runtime": 82.4936,
293
+ "eval_samples_per_second": 54.065,
294
+ "eval_steps_per_second": 13.516,
295
+ "step": 1300
296
+ },
297
+ {
298
+ "epoch": 0.11856145435384008,
299
+ "grad_norm": 0.7266770005226135,
300
+ "learning_rate": 4.8517916739855967e-05,
301
+ "loss": 0.1183,
302
+ "step": 1350
303
+ },
304
+ {
305
+ "epoch": 0.12295261932990822,
306
+ "grad_norm": 0.4979361295700073,
307
+ "learning_rate": 4.8463024767258035e-05,
308
+ "loss": 0.126,
309
+ "step": 1400
310
+ },
311
+ {
312
+ "epoch": 0.12295261932990822,
313
+ "eval_loss": 0.11409644037485123,
314
+ "eval_runtime": 82.4146,
315
+ "eval_samples_per_second": 54.117,
316
+ "eval_steps_per_second": 13.529,
317
+ "step": 1400
318
+ },
319
+ {
320
+ "epoch": 0.12734378430597637,
321
+ "grad_norm": 0.45280951261520386,
322
+ "learning_rate": 4.840813279466011e-05,
323
+ "loss": 0.1152,
324
+ "step": 1450
325
+ },
326
+ {
327
+ "epoch": 0.13173494928204452,
328
+ "grad_norm": 0.9963550567626953,
329
+ "learning_rate": 4.8353240822062185e-05,
330
+ "loss": 0.1214,
331
+ "step": 1500
332
+ },
333
+ {
334
+ "epoch": 0.13173494928204452,
335
+ "eval_loss": 0.11325465887784958,
336
+ "eval_runtime": 82.4565,
337
+ "eval_samples_per_second": 54.089,
338
+ "eval_steps_per_second": 13.522,
339
+ "step": 1500
340
  }
341
  ],
342
  "logging_steps": 50,
 
356
  "attributes": {}
357
  }
358
  },
359
+ "total_flos": 3653747343360000.0,
360
  "train_batch_size": 4,
361
  "trial_name": null,
362
  "trial_params": null