DuongTrongChi commited on
Commit
849cfb9
·
verified ·
1 Parent(s): 133aee0

Training in progress, step 210, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9653531852ec6119db0beb46f48bfa17c88968f05b0e674dc503a553f84fe4e3
3
  size 100198584
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:473f4b3ff0b219aa3d4708c39fab335413427789124d1cc01e586e727315cff4
3
  size 100198584
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:80dafb883e7ca8362cc0eba677e7edbf34ec8bc9e8592483d28a1fed39eb72cd
3
  size 50675156
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ec22a1dfeb40ec71635f49ced12c4198b8107b4b216dad54278bd643cdb721c
3
  size 50675156
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fcbc2526bc157d1d1697d4e94eb6c17525855f6b21b0575b373b92dfaeff6f39
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e7650503afdff505462325f6f430e903bb44a778f9119236884da4fcf462c9e5
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.30097707971874715,
5
  "eval_steps": 500,
6
- "global_step": 206,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1449,6 +1449,34 @@
1449
  "learning_rate": 1.6369863013698633e-05,
1450
  "loss": 1.2073,
1451
  "step": 206
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1452
  }
1453
  ],
1454
  "logging_steps": 1,
@@ -1468,7 +1496,7 @@
1468
  "attributes": {}
1469
  }
1470
  },
1471
- "total_flos": 2.3075689748657357e+17,
1472
  "train_batch_size": 4,
1473
  "trial_name": null,
1474
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.306821294858917,
5
  "eval_steps": 500,
6
+ "global_step": 210,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1449
  "learning_rate": 1.6369863013698633e-05,
1450
  "loss": 1.2073,
1451
  "step": 206
1452
+ },
1453
+ {
1454
+ "epoch": 0.3024381335037896,
1455
+ "grad_norm": 0.11093982309103012,
1456
+ "learning_rate": 1.6335616438356166e-05,
1457
+ "loss": 1.3644,
1458
+ "step": 207
1459
+ },
1460
+ {
1461
+ "epoch": 0.3038991872888321,
1462
+ "grad_norm": 0.11352576315402985,
1463
+ "learning_rate": 1.6301369863013702e-05,
1464
+ "loss": 1.2034,
1465
+ "step": 208
1466
+ },
1467
+ {
1468
+ "epoch": 0.30536024107387455,
1469
+ "grad_norm": 0.1127958819270134,
1470
+ "learning_rate": 1.6267123287671232e-05,
1471
+ "loss": 1.241,
1472
+ "step": 209
1473
+ },
1474
+ {
1475
+ "epoch": 0.306821294858917,
1476
+ "grad_norm": 0.10320553183555603,
1477
+ "learning_rate": 1.623287671232877e-05,
1478
+ "loss": 1.2169,
1479
+ "step": 210
1480
  }
1481
  ],
1482
  "logging_steps": 1,
 
1496
  "attributes": {}
1497
  }
1498
  },
1499
+ "total_flos": 2.3556959221775155e+17,
1500
  "train_batch_size": 4,
1501
  "trial_name": null,
1502
  "trial_params": null