DuongTrongChi commited on
Commit
ffbb7ce
·
verified ·
1 Parent(s): c23a197

Training in progress, step 775, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7d9a20236727441eb091076ba19df5e6a7d77f0b3c472ff2f8063013a8d51572
3
  size 100198584
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0943a41a553462c4d372d11f0db7e7ac74b82191589741e8cc65f7c9bf91ac59
3
  size 100198584
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b69be20e32e80b2656e56c5d7c6b43e62c713fe0c325ae422961c510d0003eb1
3
  size 50675604
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee780c3dfac7ae5d183bedef0b48a628ef423302dc70107b1ca832ddb1dcaca5
3
  size 50675604
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3c630f82ab3254c93d60737e499149d30223810c5c2c5784792c66db1cac3db4
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7deeaf0e67bca4eb52e599acdab6e6d26eff4b93874f78b39bfb4318b5785931
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.8241426611796983,
5
  "eval_steps": 500,
6
- "global_step": 751,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -5264,6 +5264,174 @@
5264
  "learning_rate": 3.9457459926017264e-06,
5265
  "loss": 1.2137,
5266
  "step": 751
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5267
  }
5268
  ],
5269
  "logging_steps": 1,
@@ -5283,7 +5451,7 @@
5283
  "attributes": {}
5284
  }
5285
  },
5286
- "total_flos": 7.788006116401766e+17,
5287
  "train_batch_size": 4,
5288
  "trial_name": null,
5289
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.850480109739369,
5
  "eval_steps": 500,
6
+ "global_step": 775,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
5264
  "learning_rate": 3.9457459926017264e-06,
5265
  "loss": 1.2137,
5266
  "step": 751
5267
+ },
5268
+ {
5269
+ "epoch": 0.8252400548696845,
5270
+ "grad_norm": 0.14159835875034332,
5271
+ "learning_rate": 3.921085080147966e-06,
5272
+ "loss": 1.1786,
5273
+ "step": 752
5274
+ },
5275
+ {
5276
+ "epoch": 0.8263374485596707,
5277
+ "grad_norm": 0.13204412162303925,
5278
+ "learning_rate": 3.896424167694205e-06,
5279
+ "loss": 1.0827,
5280
+ "step": 753
5281
+ },
5282
+ {
5283
+ "epoch": 0.827434842249657,
5284
+ "grad_norm": 0.14544348418712616,
5285
+ "learning_rate": 3.871763255240444e-06,
5286
+ "loss": 1.2168,
5287
+ "step": 754
5288
+ },
5289
+ {
5290
+ "epoch": 0.8285322359396433,
5291
+ "grad_norm": 0.13837821781635284,
5292
+ "learning_rate": 3.847102342786683e-06,
5293
+ "loss": 1.1306,
5294
+ "step": 755
5295
+ },
5296
+ {
5297
+ "epoch": 0.8296296296296296,
5298
+ "grad_norm": 0.13542281091213226,
5299
+ "learning_rate": 3.822441430332923e-06,
5300
+ "loss": 1.065,
5301
+ "step": 756
5302
+ },
5303
+ {
5304
+ "epoch": 0.8307270233196159,
5305
+ "grad_norm": 0.1563270390033722,
5306
+ "learning_rate": 3.7977805178791616e-06,
5307
+ "loss": 1.0928,
5308
+ "step": 757
5309
+ },
5310
+ {
5311
+ "epoch": 0.8318244170096022,
5312
+ "grad_norm": 0.1355254054069519,
5313
+ "learning_rate": 3.773119605425401e-06,
5314
+ "loss": 1.1382,
5315
+ "step": 758
5316
+ },
5317
+ {
5318
+ "epoch": 0.8329218106995885,
5319
+ "grad_norm": 0.14081105589866638,
5320
+ "learning_rate": 3.7484586929716402e-06,
5321
+ "loss": 1.0408,
5322
+ "step": 759
5323
+ },
5324
+ {
5325
+ "epoch": 0.8340192043895748,
5326
+ "grad_norm": 0.14367350935935974,
5327
+ "learning_rate": 3.723797780517879e-06,
5328
+ "loss": 1.12,
5329
+ "step": 760
5330
+ },
5331
+ {
5332
+ "epoch": 0.8351165980795611,
5333
+ "grad_norm": 0.13955897092819214,
5334
+ "learning_rate": 3.699136868064119e-06,
5335
+ "loss": 1.1122,
5336
+ "step": 761
5337
+ },
5338
+ {
5339
+ "epoch": 0.8362139917695474,
5340
+ "grad_norm": 0.13528084754943848,
5341
+ "learning_rate": 3.674475955610358e-06,
5342
+ "loss": 1.1463,
5343
+ "step": 762
5344
+ },
5345
+ {
5346
+ "epoch": 0.8373113854595337,
5347
+ "grad_norm": 0.13060660660266876,
5348
+ "learning_rate": 3.649815043156597e-06,
5349
+ "loss": 1.088,
5350
+ "step": 763
5351
+ },
5352
+ {
5353
+ "epoch": 0.83840877914952,
5354
+ "grad_norm": 0.14304772019386292,
5355
+ "learning_rate": 3.6251541307028365e-06,
5356
+ "loss": 1.13,
5357
+ "step": 764
5358
+ },
5359
+ {
5360
+ "epoch": 0.8395061728395061,
5361
+ "grad_norm": 0.129106804728508,
5362
+ "learning_rate": 3.6004932182490754e-06,
5363
+ "loss": 1.0758,
5364
+ "step": 765
5365
+ },
5366
+ {
5367
+ "epoch": 0.8406035665294924,
5368
+ "grad_norm": 0.14966481924057007,
5369
+ "learning_rate": 3.5758323057953147e-06,
5370
+ "loss": 1.051,
5371
+ "step": 766
5372
+ },
5373
+ {
5374
+ "epoch": 0.8417009602194787,
5375
+ "grad_norm": 0.13731549680233002,
5376
+ "learning_rate": 3.5511713933415536e-06,
5377
+ "loss": 1.1467,
5378
+ "step": 767
5379
+ },
5380
+ {
5381
+ "epoch": 0.842798353909465,
5382
+ "grad_norm": 0.16249963641166687,
5383
+ "learning_rate": 3.526510480887793e-06,
5384
+ "loss": 1.1939,
5385
+ "step": 768
5386
+ },
5387
+ {
5388
+ "epoch": 0.8438957475994513,
5389
+ "grad_norm": 0.1546361893415451,
5390
+ "learning_rate": 3.5018495684340327e-06,
5391
+ "loss": 1.1762,
5392
+ "step": 769
5393
+ },
5394
+ {
5395
+ "epoch": 0.8449931412894376,
5396
+ "grad_norm": 0.1352168768644333,
5397
+ "learning_rate": 3.4771886559802716e-06,
5398
+ "loss": 1.1351,
5399
+ "step": 770
5400
+ },
5401
+ {
5402
+ "epoch": 0.8460905349794239,
5403
+ "grad_norm": 0.13795001804828644,
5404
+ "learning_rate": 3.452527743526511e-06,
5405
+ "loss": 1.0621,
5406
+ "step": 771
5407
+ },
5408
+ {
5409
+ "epoch": 0.8471879286694102,
5410
+ "grad_norm": 0.13399291038513184,
5411
+ "learning_rate": 3.42786683107275e-06,
5412
+ "loss": 1.1674,
5413
+ "step": 772
5414
+ },
5415
+ {
5416
+ "epoch": 0.8482853223593965,
5417
+ "grad_norm": 0.1293582171201706,
5418
+ "learning_rate": 3.403205918618989e-06,
5419
+ "loss": 1.1216,
5420
+ "step": 773
5421
+ },
5422
+ {
5423
+ "epoch": 0.8493827160493828,
5424
+ "grad_norm": 0.13657528162002563,
5425
+ "learning_rate": 3.3785450061652285e-06,
5426
+ "loss": 1.1037,
5427
+ "step": 774
5428
+ },
5429
+ {
5430
+ "epoch": 0.850480109739369,
5431
+ "grad_norm": 0.14344428479671478,
5432
+ "learning_rate": 3.3538840937114674e-06,
5433
+ "loss": 1.1299,
5434
+ "step": 775
5435
  }
5436
  ],
5437
  "logging_steps": 1,
 
5451
  "attributes": {}
5452
  }
5453
  },
5454
+ "total_flos": 8.037403550740316e+17,
5455
  "train_batch_size": 4,
5456
  "trial_name": null,
5457
  "trial_params": null