mgh6 commited on
Commit
87c0f4b
1 Parent(s): af7b620

Training in progress, step 9400, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d3f3a1db5589596622438f3305091bec4d8eac0af6ce2a5fb67fdb2fabd6a46c
3
  size 136000488
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:182ee968b6fdeec8216ae2242608aac4cf00a82309a22f2bc546f245f6a30f5b
3
  size 136000488
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4d2b73126a971289ab203697cf1e1236e8169aa0b895581f652f1cc563a281b4
3
  size 268176506
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dfc3984044cbf3ce86e00b28e3e8d7a9ea91edc27a0b44779f188f12efd55185
3
  size 268176506
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bda55b185dc98f3bbacd230aaacff604bd97a164ddf9fc1b48c814e202c2b868
3
  size 15006
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:56f87b775049fb0adab4e0d540aff9b9f075c23a8d207a780cdfad0536093ab3
3
  size 15006
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b7ad8d31b03afdc3955f524b574e208e67f3c465ce1d021ebb4d8b9cc6f6fd1a
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a16798b06a013ad4b7ec3ca11219408d900e5c425fe7c3d917c437397043544f
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 1.0438764095306396,
3
  "best_model_checkpoint": "mgh6/TCS_MLM_50/checkpoint-8900",
4
- "epoch": 2.699564586357039,
5
  "eval_steps": 100,
6
- "global_step": 9300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1402,6 +1402,21 @@
1402
  "eval_samples_per_second": 214.017,
1403
  "eval_steps_per_second": 3.344,
1404
  "step": 9300
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1405
  }
1406
  ],
1407
  "logging_steps": 100,
@@ -1416,7 +1431,7 @@
1416
  "early_stopping_threshold": 0.0
1417
  },
1418
  "attributes": {
1419
- "early_stopping_patience_counter": 4
1420
  }
1421
  },
1422
  "TrainerControl": {
@@ -1425,12 +1440,12 @@
1425
  "should_evaluate": false,
1426
  "should_log": false,
1427
  "should_save": true,
1428
- "should_training_stop": false
1429
  },
1430
  "attributes": {}
1431
  }
1432
  },
1433
- "total_flos": 9.30337028749394e+16,
1434
  "train_batch_size": 64,
1435
  "trial_name": null,
1436
  "trial_params": null
 
1
  {
2
  "best_metric": 1.0438764095306396,
3
  "best_model_checkpoint": "mgh6/TCS_MLM_50/checkpoint-8900",
4
+ "epoch": 2.7285921625544267,
5
  "eval_steps": 100,
6
+ "global_step": 9400,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1402
  "eval_samples_per_second": 214.017,
1403
  "eval_steps_per_second": 3.344,
1404
  "step": 9300
1405
+ },
1406
+ {
1407
+ "epoch": 2.7285921625544267,
1408
+ "grad_norm": 1.2580476999282837,
1409
+ "learning_rate": 7.271407837445574e-05,
1410
+ "loss": 1.9873,
1411
+ "step": 9400
1412
+ },
1413
+ {
1414
+ "epoch": 2.7285921625544267,
1415
+ "eval_loss": 1.0441796779632568,
1416
+ "eval_runtime": 213.1744,
1417
+ "eval_samples_per_second": 213.436,
1418
+ "eval_steps_per_second": 3.335,
1419
+ "step": 9400
1420
  }
1421
  ],
1422
  "logging_steps": 100,
 
1431
  "early_stopping_threshold": 0.0
1432
  },
1433
  "attributes": {
1434
+ "early_stopping_patience_counter": 5
1435
  }
1436
  },
1437
  "TrainerControl": {
 
1440
  "should_evaluate": false,
1441
  "should_log": false,
1442
  "should_save": true,
1443
+ "should_training_stop": true
1444
  },
1445
  "attributes": {}
1446
  }
1447
  },
1448
+ "total_flos": 9.403409048272896e+16,
1449
  "train_batch_size": 64,
1450
  "trial_name": null,
1451
  "trial_params": null