Training in progress, step 200000

Browse files

Files changed (14) hide show

last-checkpoint/optimizer.pt +1 -1
last-checkpoint/pytorch_model.bin +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/rng_state_4.pth +1 -1
last-checkpoint/rng_state_5.pth +1 -1
last-checkpoint/rng_state_6.pth +1 -1
last-checkpoint/rng_state_7.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +311 -3
pytorch_model.bin +1 -1
runs/Feb27_19-46-56_t1v-n-9f780742-w-0/events.out.tfevents.1677527238.t1v-n-9f780742-w-0.2687388.0 +2 -2

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c485e39025b832d2bd3bd464214f232ea6b3f0a5283d0f592f5b95f7afffe93d
 size 885325017

 version https://git-lfs.github.com/spec/v1
+oid sha256:e773abecfb13a8f9a0852369f65ccd735098a6dfc36d74534ad4bb757a9d292f
 size 885325017

last-checkpoint/pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6bc0d6bce669ef3b9a37ef6ee5196e838e490074197c122478da0bc35a3e8702
 size 442675755

 version https://git-lfs.github.com/spec/v1
+oid sha256:25258238e9fa265ee3a03976d6131e2541f88483f4afcd664bf14d1bbf13aa9f
 size 442675755

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:abe2c64113e644318b19b5274e931fbc3240ba8d862c1c7d94b717c6d9403577
 size 13611

 version https://git-lfs.github.com/spec/v1
+oid sha256:9383fb5b7e069e5b15763d58074797a01cbe2f9f7dff29b6d4b4871a86204fc4
 size 13611

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:abe2c64113e644318b19b5274e931fbc3240ba8d862c1c7d94b717c6d9403577
 size 13611

 version https://git-lfs.github.com/spec/v1
+oid sha256:9383fb5b7e069e5b15763d58074797a01cbe2f9f7dff29b6d4b4871a86204fc4
 size 13611

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:abe2c64113e644318b19b5274e931fbc3240ba8d862c1c7d94b717c6d9403577
 size 13611

 version https://git-lfs.github.com/spec/v1
+oid sha256:9383fb5b7e069e5b15763d58074797a01cbe2f9f7dff29b6d4b4871a86204fc4
 size 13611

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:abe2c64113e644318b19b5274e931fbc3240ba8d862c1c7d94b717c6d9403577
 size 13611

 version https://git-lfs.github.com/spec/v1
+oid sha256:9383fb5b7e069e5b15763d58074797a01cbe2f9f7dff29b6d4b4871a86204fc4
 size 13611

last-checkpoint/rng_state_4.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:abe2c64113e644318b19b5274e931fbc3240ba8d862c1c7d94b717c6d9403577
 size 13611

 version https://git-lfs.github.com/spec/v1
+oid sha256:9383fb5b7e069e5b15763d58074797a01cbe2f9f7dff29b6d4b4871a86204fc4
 size 13611

last-checkpoint/rng_state_5.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:abe2c64113e644318b19b5274e931fbc3240ba8d862c1c7d94b717c6d9403577
 size 13611

 version https://git-lfs.github.com/spec/v1
+oid sha256:9383fb5b7e069e5b15763d58074797a01cbe2f9f7dff29b6d4b4871a86204fc4
 size 13611

last-checkpoint/rng_state_6.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:abe2c64113e644318b19b5274e931fbc3240ba8d862c1c7d94b717c6d9403577
 size 13611

 version https://git-lfs.github.com/spec/v1
+oid sha256:9383fb5b7e069e5b15763d58074797a01cbe2f9f7dff29b6d4b4871a86204fc4
 size 13611

last-checkpoint/rng_state_7.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:abe2c64113e644318b19b5274e931fbc3240ba8d862c1c7d94b717c6d9403577
 size 13611

 version https://git-lfs.github.com/spec/v1
+oid sha256:9383fb5b7e069e5b15763d58074797a01cbe2f9f7dff29b6d4b4871a86204fc4
 size 13611

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:04ad030a150fa0d8eb5e5920300951e9645dc85319159f9dd4f177aff4c5b722
 size 623

 version https://git-lfs.github.com/spec/v1
+oid sha256:be1ccf49f4804619cd7d22b74b595a694a368e629a10492b4089d6536d07bdf2
 size 623

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
  "best_metric": null,
  "best_model_checkpoint": null,
- "epoch": 0.15,
- "global_step": 150000,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
@@ -930,11 +930,319 @@
  "eval_samples_per_second": 179.307,
  "eval_steps_per_second": 1.434,
  "step": 150000
  }
  ],
  "max_steps": 1000000,
  "num_train_epochs": 9223372036854775807,
- "total_flos": 2.5268097908736e+18,
  "trial_name": null,
  "trial_params": null
 }

 {
  "best_metric": null,
  "best_model_checkpoint": null,
+ "epoch": 0.2,
+ "global_step": 200000,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "eval_samples_per_second": 179.307,
  "eval_steps_per_second": 1.434,
  "step": 150000
+ },
+ {
+ "epoch": 0.15,
+ "learning_rate": 9.723691552302562e-05,
+ "loss": 1.0169,
+ "step": 151000
+ },
+ {
+ "epoch": 0.15,
+ "learning_rate": 9.718245238567939e-05,
+ "loss": 1.0745,
+ "step": 152000
+ },
+ {
+ "epoch": 0.15,
+ "learning_rate": 9.712747326859315e-05,
+ "loss": 1.0231,
+ "step": 153000
+ },
+ {
+ "epoch": 0.15,
+ "learning_rate": 9.707197877300974e-05,
+ "loss": 0.9675,
+ "step": 154000
+ },
+ {
+ "epoch": 0.15,
+ "learning_rate": 9.701596950580806e-05,
+ "loss": 1.0511,
+ "step": 155000
+ },
+ {
+ "epoch": 0.16,
+ "learning_rate": 9.695944607949649e-05,
+ "loss": 0.9275,
+ "step": 156000
+ },
+ {
+ "epoch": 0.16,
+ "learning_rate": 9.690240911220618e-05,
+ "loss": 0.9136,
+ "step": 157000
+ },
+ {
+ "epoch": 0.16,
+ "learning_rate": 9.684485922768422e-05,
+ "loss": 1.0036,
+ "step": 158000
+ },
+ {
+ "epoch": 0.16,
+ "learning_rate": 9.6786797055287e-05,
+ "loss": 0.8431,
+ "step": 159000
+ },
+ {
+ "epoch": 0.16,
+ "learning_rate": 9.672822322997305e-05,
+ "loss": 0.9113,
+ "step": 160000
+ },
+ {
+ "epoch": 0.16,
+ "learning_rate": 9.66691383922964e-05,
+ "loss": 0.8725,
+ "step": 161000
+ },
+ {
+ "epoch": 0.16,
+ "learning_rate": 9.660954318839933e-05,
+ "loss": 0.7145,
+ "step": 162000
+ },
+ {
+ "epoch": 0.16,
+ "learning_rate": 9.654943827000548e-05,
+ "loss": 0.8835,
+ "step": 163000
+ },
+ {
+ "epoch": 0.16,
+ "learning_rate": 9.648882429441257e-05,
+ "loss": 0.8595,
+ "step": 164000
+ },
+ {
+ "epoch": 0.17,
+ "learning_rate": 9.642770192448536e-05,
+ "loss": 0.9792,
+ "step": 165000
+ },
+ {
+ "epoch": 0.17,
+ "learning_rate": 9.636607182864827e-05,
+ "loss": 0.9287,
+ "step": 166000
+ },
+ {
+ "epoch": 0.17,
+ "learning_rate": 9.630393468087818e-05,
+ "loss": 0.974,
+ "step": 167000
+ },
+ {
+ "epoch": 0.17,
+ "learning_rate": 9.624129116069694e-05,
+ "loss": 1.0263,
+ "step": 168000
+ },
+ {
+ "epoch": 0.17,
+ "learning_rate": 9.617814195316411e-05,
+ "loss": 0.9184,
+ "step": 169000
+ },
+ {
+ "epoch": 0.17,
+ "learning_rate": 9.611448774886924e-05,
+ "loss": 0.9548,
+ "step": 170000
+ },
+ {
+ "epoch": 0.17,
+ "learning_rate": 9.605032924392457e-05,
+ "loss": 0.9853,
+ "step": 171000
+ },
+ {
+ "epoch": 0.17,
+ "learning_rate": 9.598566713995718e-05,
+ "loss": 1.0607,
+ "step": 172000
+ },
+ {
+ "epoch": 0.17,
+ "learning_rate": 9.59205021441015e-05,
+ "loss": 0.925,
+ "step": 173000
+ },
+ {
+ "epoch": 0.17,
+ "learning_rate": 9.58548349689915e-05,
+ "loss": 0.9668,
+ "step": 174000
+ },
+ {
+ "epoch": 0.17,
+ "learning_rate": 9.578866633275288e-05,
+ "loss": 0.9848,
+ "step": 175000
+ },
+ {
+ "epoch": 0.18,
+ "learning_rate": 9.572199695899522e-05,
+ "loss": 0.8986,
+ "step": 176000
+ },
+ {
+ "epoch": 0.18,
+ "learning_rate": 9.565482757680415e-05,
+ "loss": 0.9121,
+ "step": 177000
+ },
+ {
+ "epoch": 0.18,
+ "learning_rate": 9.558715892073323e-05,
+ "loss": 0.9679,
+ "step": 178000
+ },
+ {
+ "epoch": 0.18,
+ "learning_rate": 9.551899173079607e-05,
+ "loss": 0.985,
+ "step": 179000
+ },
+ {
+ "epoch": 0.18,
+ "learning_rate": 9.545032675245813e-05,
+ "loss": 1.0038,
+ "step": 180000
+ },
+ {
+ "epoch": 0.18,
+ "learning_rate": 9.538116473662861e-05,
+ "loss": 0.9433,
+ "step": 181000
+ },
+ {
+ "epoch": 0.18,
+ "learning_rate": 9.531150643965223e-05,
+ "loss": 0.8326,
+ "step": 182000
+ },
+ {
+ "epoch": 0.18,
+ "learning_rate": 9.524135262330098e-05,
+ "loss": 0.9301,
+ "step": 183000
+ },
+ {
+ "epoch": 0.18,
+ "learning_rate": 9.517070405476575e-05,
+ "loss": 0.8511,
+ "step": 184000
+ },
+ {
+ "epoch": 0.18,
+ "learning_rate": 9.509956150664796e-05,
+ "loss": 0.9257,
+ "step": 185000
+ },
+ {
+ "epoch": 0.19,
+ "learning_rate": 9.502792575695112e-05,
+ "loss": 0.9687,
+ "step": 186000
+ },
+ {
+ "epoch": 0.19,
+ "learning_rate": 9.49557975890723e-05,
+ "loss": 0.9373,
+ "step": 187000
+ },
+ {
+ "epoch": 0.19,
+ "learning_rate": 9.488317779179361e-05,
+ "loss": 0.9251,
+ "step": 188000
+ },
+ {
+ "epoch": 0.19,
+ "learning_rate": 9.481006715927351e-05,
+ "loss": 0.9134,
+ "step": 189000
+ },
+ {
+ "epoch": 0.19,
+ "learning_rate": 9.473646649103818e-05,
+ "loss": 0.9581,
+ "step": 190000
+ },
+ {
+ "epoch": 0.19,
+ "learning_rate": 9.46623765919727e-05,
+ "loss": 0.7549,
+ "step": 191000
+ },
+ {
+ "epoch": 0.19,
+ "learning_rate": 9.458779827231237e-05,
+ "loss": 0.7225,
+ "step": 192000
+ },
+ {
+ "epoch": 0.19,
+ "learning_rate": 9.451273234763371e-05,
+ "loss": 0.839,
+ "step": 193000
+ },
+ {
+ "epoch": 0.19,
+ "learning_rate": 9.443717963884569e-05,
+ "loss": 1.1109,
+ "step": 194000
+ },
+ {
+ "epoch": 0.2,
+ "learning_rate": 9.43611409721806e-05,
+ "loss": 0.916,
+ "step": 195000
+ },
+ {
+ "epoch": 0.2,
+ "learning_rate": 9.428461717918511e-05,
+ "loss": 0.8708,
+ "step": 196000
+ },
+ {
+ "epoch": 0.2,
+ "learning_rate": 9.420760909671118e-05,
+ "loss": 1.0268,
+ "step": 197000
+ },
+ {
+ "epoch": 0.2,
+ "learning_rate": 9.413011756690685e-05,
+ "loss": 0.922,
+ "step": 198000
+ },
+ {
+ "epoch": 0.2,
+ "learning_rate": 9.405214343720707e-05,
+ "loss": 0.8987,
+ "step": 199000
+ },
+ {
+ "epoch": 0.2,
+ "learning_rate": 9.397368756032445e-05,
+ "loss": 0.896,
+ "step": 200000
+ },
+ {
+ "epoch": 0.2,
+ "eval_loss": 0.7616699934005737,
+ "eval_runtime": 18.6652,
+ "eval_samples_per_second": 267.878,
+ "eval_steps_per_second": 2.143,
+ "step": 200000
  }
  ],
  "max_steps": 1000000,
  "num_train_epochs": 9223372036854775807,
+ "total_flos": 3.3690797211648e+18,
  "trial_name": null,
  "trial_params": null
 }

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6bc0d6bce669ef3b9a37ef6ee5196e838e490074197c122478da0bc35a3e8702
 size 442675755

 version https://git-lfs.github.com/spec/v1
+oid sha256:25258238e9fa265ee3a03976d6131e2541f88483f4afcd664bf14d1bbf13aa9f
 size 442675755

runs/Feb27_19-46-56_t1v-n-9f780742-w-0/events.out.tfevents.1677527238.t1v-n-9f780742-w-0.2687388.0 CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:53cdd5d73ce64e8fdcfd9d8faae04f0a4434a994fe919e51e7a8311600e68ab6
-size 12107

 version https://git-lfs.github.com/spec/v1
+oid sha256:a1ffc25676510b638a1505e248374c3d96445c6dc7d721b988f33fdb4e0680e8
+size 20383