4500 checkpoint

Files changed (6) hide show

optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0c9232f79a0a4129ebc9cc6ee0f21bb7a4d715eae675915015fd43d75811ea58
 size 21210715097

 version https://git-lfs.github.com/spec/v1
+oid sha256:199c3170fd3a41772a9babb0927700ffddd6d1c8ac5b7aaec62f8e6d0110efb0
 size 21210715097

pytorch_model-00001-of-00002.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4b3977a25a38d65c8381c63b8d549e59b819ec5bec1ed892867ce834555f874b
 size 9996970517

 version https://git-lfs.github.com/spec/v1
+oid sha256:3e22aada20aea4b7a32f71bdf574f6a942a6fd2f64107e76eee64bc0c51124e1
 size 9996970517

pytorch_model-00002-of-00002.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:547fc177de3ba4ba268864c8128f9b4e05b1196e8a8bdaa44b45d6450e33611f
 size 742637631

 version https://git-lfs.github.com/spec/v1
+oid sha256:4b835b661b4b00d3aca1275c295567e4a1beca1a2f2d5b628e35eb278d35ace6
 size 742637631

rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9fe4f7564f56a1ffa25a5c606d62664ceee4525fae7d67da25c48a37122eeeaf
 size 14503

 version https://git-lfs.github.com/spec/v1
+oid sha256:f6430e5ad1d877b2cd1749d223eec11a131930ad358c4a7dfa9f4e6830aa494e
 size 14503

scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a8c8325f2941fb440a70e076145fff814af3bb9cb14c7d849f105ea262a5d6cf
 size 623

 version https://git-lfs.github.com/spec/v1
+oid sha256:ed56d7b3709a7b50b7e2ab3a6a14bdf4c03d66f131c7f3dfccdf9e4a98638acb
 size 623

trainer_state.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
  "best_metric": null,
  "best_model_checkpoint": null,
- "epoch": 0.7115339655820856,
- "global_step": 3500,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
@@ -48,11 +48,23 @@
  "learning_rate": 4.110410736071574e-06,
  "loss": 2.977,
  "step": 3500
  }
  ],
  "max_steps": 19672,
  "num_train_epochs": 4,
- "total_flos": 2.1653725642752e+18,
  "trial_name": null,
  "trial_params": null
 }

 {
  "best_metric": null,
  "best_model_checkpoint": null,
+ "epoch": 0.9148293843198243,
+ "global_step": 4500,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "learning_rate": 4.110410736071574e-06,
  "loss": 2.977,
  "step": 3500
+ },
+ {
+ "epoch": 0.81,
+ "learning_rate": 3.983326555510371e-06,
+ "loss": 2.9616,
+ "step": 4000
+ },
+ {
+ "epoch": 0.91,
+ "learning_rate": 3.856242374949167e-06,
+ "loss": 2.9615,
+ "step": 4500
  }
  ],
  "max_steps": 19672,
  "num_train_epochs": 4,
+ "total_flos": 2.7840504397824e+18,
  "trial_name": null,
  "trial_params": null
 }