DuongTrongChi commited on
Commit
ee7230d
·
verified ·
1 Parent(s): b09155b

Training in progress, step 59, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:72ff1f1fc595ad8133f8b71ea00894b44d45ba33fe7f73a1870f3098a549c854
3
  size 100198584
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e986d849d397f1dfe71a0947a97c8f30985df37fcc73fade493d2d8d46b1cacb
3
  size 100198584
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:88f874e97b84685154a02975aaf603049cb990103be4e0b0c51158a10fe11b6e
3
  size 50675156
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9beab69120b8470ba5993d628bb2552a1d7a60b206e9b9165ea3374b579b5c86
3
  size 50675156
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a1353723be4c6fe076940e8befb2fc2a9d06d490d30020d9bfe5043cd2525797
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8da99b8ae5f5d39bfda5424154d9614466b3a6382198e7aa65c1c031599a0314
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.06720847411195324,
5
  "eval_steps": 500,
6
- "global_step": 46,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -329,6 +329,97 @@
329
  "learning_rate": 9.200000000000002e-06,
330
  "loss": 1.9289,
331
  "step": 46
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
332
  }
333
  ],
334
  "logging_steps": 1,
@@ -348,7 +439,7 @@
348
  "attributes": {}
349
  }
350
  },
351
- "total_flos": 5.114263583383142e+16,
352
  "train_batch_size": 4,
353
  "trial_name": null,
354
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.08620217331750525,
5
  "eval_steps": 500,
6
+ "global_step": 59,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
329
  "learning_rate": 9.200000000000002e-06,
330
  "loss": 1.9289,
331
  "step": 46
332
+ },
333
+ {
334
+ "epoch": 0.06866952789699571,
335
+ "grad_norm": 0.24804741144180298,
336
+ "learning_rate": 9.4e-06,
337
+ "loss": 1.9128,
338
+ "step": 47
339
+ },
340
+ {
341
+ "epoch": 0.07013058168203817,
342
+ "grad_norm": 0.2629912197589874,
343
+ "learning_rate": 9.600000000000001e-06,
344
+ "loss": 1.8998,
345
+ "step": 48
346
+ },
347
+ {
348
+ "epoch": 0.07159163546708064,
349
+ "grad_norm": 0.22671160101890564,
350
+ "learning_rate": 9.800000000000001e-06,
351
+ "loss": 1.8634,
352
+ "step": 49
353
+ },
354
+ {
355
+ "epoch": 0.07305268925212309,
356
+ "grad_norm": 0.22378858923912048,
357
+ "learning_rate": 1e-05,
358
+ "loss": 1.9038,
359
+ "step": 50
360
+ },
361
+ {
362
+ "epoch": 0.07451374303716556,
363
+ "grad_norm": 0.25769534707069397,
364
+ "learning_rate": 1.02e-05,
365
+ "loss": 1.8324,
366
+ "step": 51
367
+ },
368
+ {
369
+ "epoch": 0.07597479682220802,
370
+ "grad_norm": 0.22694693505764008,
371
+ "learning_rate": 1.04e-05,
372
+ "loss": 1.8374,
373
+ "step": 52
374
+ },
375
+ {
376
+ "epoch": 0.07743585060725047,
377
+ "grad_norm": 0.23865583539009094,
378
+ "learning_rate": 1.0600000000000002e-05,
379
+ "loss": 1.9129,
380
+ "step": 53
381
+ },
382
+ {
383
+ "epoch": 0.07889690439229294,
384
+ "grad_norm": 0.23314256966114044,
385
+ "learning_rate": 1.0800000000000002e-05,
386
+ "loss": 1.8512,
387
+ "step": 54
388
+ },
389
+ {
390
+ "epoch": 0.0803579581773354,
391
+ "grad_norm": 0.2111833393573761,
392
+ "learning_rate": 1.1000000000000001e-05,
393
+ "loss": 1.8608,
394
+ "step": 55
395
+ },
396
+ {
397
+ "epoch": 0.08181901196237787,
398
+ "grad_norm": 0.24742205440998077,
399
+ "learning_rate": 1.1200000000000001e-05,
400
+ "loss": 1.9129,
401
+ "step": 56
402
+ },
403
+ {
404
+ "epoch": 0.08328006574742032,
405
+ "grad_norm": 0.2268109768629074,
406
+ "learning_rate": 1.14e-05,
407
+ "loss": 1.7996,
408
+ "step": 57
409
+ },
410
+ {
411
+ "epoch": 0.08474111953246279,
412
+ "grad_norm": 0.24065515398979187,
413
+ "learning_rate": 1.16e-05,
414
+ "loss": 1.9203,
415
+ "step": 58
416
+ },
417
+ {
418
+ "epoch": 0.08620217331750525,
419
+ "grad_norm": 0.23937389254570007,
420
+ "learning_rate": 1.18e-05,
421
+ "loss": 1.8845,
422
+ "step": 59
423
  }
424
  ],
425
  "logging_steps": 1,
 
439
  "attributes": {}
440
  }
441
  },
442
+ "total_flos": 6.568072173368525e+16,
443
  "train_batch_size": 4,
444
  "trial_name": null,
445
  "trial_params": null