Training in progress, step 250000

Browse files

Files changed (14) hide show

last-checkpoint/optimizer.pt +1 -1
last-checkpoint/pytorch_model.bin +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/rng_state_4.pth +1 -1
last-checkpoint/rng_state_5.pth +1 -1
last-checkpoint/rng_state_6.pth +1 -1
last-checkpoint/rng_state_7.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +311 -3
pytorch_model.bin +1 -1
runs/Feb27_19-46-56_t1v-n-9f780742-w-0/events.out.tfevents.1677527238.t1v-n-9f780742-w-0.2687388.0 +2 -2

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e773abecfb13a8f9a0852369f65ccd735098a6dfc36d74534ad4bb757a9d292f
 size 885325017

 version https://git-lfs.github.com/spec/v1
+oid sha256:b0cd0db7833e5203216a204bffbff029d0c4d21d0704618435315741d953a97a
 size 885325017

last-checkpoint/pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:25258238e9fa265ee3a03976d6131e2541f88483f4afcd664bf14d1bbf13aa9f
 size 442675755

 version https://git-lfs.github.com/spec/v1
+oid sha256:88d0fe187e6c4f9ad7949efb59a45e3b2900be07352fc0f3f3375418a489b0d4
 size 442675755

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9383fb5b7e069e5b15763d58074797a01cbe2f9f7dff29b6d4b4871a86204fc4
 size 13611

 version https://git-lfs.github.com/spec/v1
+oid sha256:f023c409037d9e35d3d0dd03facc810666d4e4d6430bfbe5558ec5fb736a5059
 size 13611

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9383fb5b7e069e5b15763d58074797a01cbe2f9f7dff29b6d4b4871a86204fc4
 size 13611

 version https://git-lfs.github.com/spec/v1
+oid sha256:f023c409037d9e35d3d0dd03facc810666d4e4d6430bfbe5558ec5fb736a5059
 size 13611

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9383fb5b7e069e5b15763d58074797a01cbe2f9f7dff29b6d4b4871a86204fc4
 size 13611

 version https://git-lfs.github.com/spec/v1
+oid sha256:f023c409037d9e35d3d0dd03facc810666d4e4d6430bfbe5558ec5fb736a5059
 size 13611

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9383fb5b7e069e5b15763d58074797a01cbe2f9f7dff29b6d4b4871a86204fc4
 size 13611

 version https://git-lfs.github.com/spec/v1
+oid sha256:f023c409037d9e35d3d0dd03facc810666d4e4d6430bfbe5558ec5fb736a5059
 size 13611

last-checkpoint/rng_state_4.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9383fb5b7e069e5b15763d58074797a01cbe2f9f7dff29b6d4b4871a86204fc4
 size 13611

 version https://git-lfs.github.com/spec/v1
+oid sha256:f023c409037d9e35d3d0dd03facc810666d4e4d6430bfbe5558ec5fb736a5059
 size 13611

last-checkpoint/rng_state_5.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9383fb5b7e069e5b15763d58074797a01cbe2f9f7dff29b6d4b4871a86204fc4
 size 13611

 version https://git-lfs.github.com/spec/v1
+oid sha256:f023c409037d9e35d3d0dd03facc810666d4e4d6430bfbe5558ec5fb736a5059
 size 13611

last-checkpoint/rng_state_6.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9383fb5b7e069e5b15763d58074797a01cbe2f9f7dff29b6d4b4871a86204fc4
 size 13611

 version https://git-lfs.github.com/spec/v1
+oid sha256:f023c409037d9e35d3d0dd03facc810666d4e4d6430bfbe5558ec5fb736a5059
 size 13611

last-checkpoint/rng_state_7.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9383fb5b7e069e5b15763d58074797a01cbe2f9f7dff29b6d4b4871a86204fc4
 size 13611

 version https://git-lfs.github.com/spec/v1
+oid sha256:f023c409037d9e35d3d0dd03facc810666d4e4d6430bfbe5558ec5fb736a5059
 size 13611

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:be1ccf49f4804619cd7d22b74b595a694a368e629a10492b4089d6536d07bdf2
 size 623

 version https://git-lfs.github.com/spec/v1
+oid sha256:7924e9d3f9ed054868d3ddaa60025f26707d231e7eacc5684e8550acfee9e9c0
 size 623

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
  "best_metric": null,
  "best_model_checkpoint": null,
- "epoch": 0.2,
- "global_step": 200000,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
@@ -1238,11 +1238,319 @@
  "eval_samples_per_second": 267.878,
  "eval_steps_per_second": 2.143,
  "step": 200000
  }
  ],
  "max_steps": 1000000,
  "num_train_epochs": 9223372036854775807,
- "total_flos": 3.3690797211648e+18,
  "trial_name": null,
  "trial_params": null
 }

 {
  "best_metric": null,
  "best_model_checkpoint": null,
+ "epoch": 0.25,
+ "global_step": 250000,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "eval_samples_per_second": 267.878,
  "eval_steps_per_second": 2.143,
  "step": 200000
+ },
+ {
+ "epoch": 0.2,
+ "learning_rate": 9.389475079423988e-05,
+ "loss": 0.8859,
+ "step": 201000
+ },
+ {
+ "epoch": 0.2,
+ "learning_rate": 9.381533400219318e-05,
+ "loss": 0.9032,
+ "step": 202000
+ },
+ {
+ "epoch": 0.2,
+ "learning_rate": 9.373543805267368e-05,
+ "loss": 0.8627,
+ "step": 203000
+ },
+ {
+ "epoch": 0.2,
+ "learning_rate": 9.365506381941066e-05,
+ "loss": 0.8528,
+ "step": 204000
+ },
+ {
+ "epoch": 0.2,
+ "learning_rate": 9.357421218136386e-05,
+ "loss": 0.9027,
+ "step": 205000
+ },
+ {
+ "epoch": 0.21,
+ "learning_rate": 9.349288402271388e-05,
+ "loss": 0.7006,
+ "step": 206000
+ },
+ {
+ "epoch": 0.21,
+ "learning_rate": 9.341108023285238e-05,
+ "loss": 0.8853,
+ "step": 207000
+ },
+ {
+ "epoch": 0.21,
+ "learning_rate": 9.332880170637252e-05,
+ "loss": 0.885,
+ "step": 208000
+ },
+ {
+ "epoch": 0.21,
+ "learning_rate": 9.32460493430591e-05,
+ "loss": 0.9231,
+ "step": 209000
+ },
+ {
+ "epoch": 0.21,
+ "learning_rate": 9.316282404787871e-05,
+ "loss": 1.0182,
+ "step": 210000
+ },
+ {
+ "epoch": 0.21,
+ "learning_rate": 9.30791267309698e-05,
+ "loss": 1.0948,
+ "step": 211000
+ },
+ {
+ "epoch": 0.21,
+ "learning_rate": 9.299495830763286e-05,
+ "loss": 1.0267,
+ "step": 212000
+ },
+ {
+ "epoch": 0.21,
+ "learning_rate": 9.291031969832026e-05,
+ "loss": 0.9211,
+ "step": 213000
+ },
+ {
+ "epoch": 0.21,
+ "learning_rate": 9.282521182862629e-05,
+ "loss": 1.0041,
+ "step": 214000
+ },
+ {
+ "epoch": 0.21,
+ "learning_rate": 9.273963562927695e-05,
+ "loss": 0.8605,
+ "step": 215000
+ },
+ {
+ "epoch": 0.22,
+ "learning_rate": 9.265359203611987e-05,
+ "loss": 0.9107,
+ "step": 216000
+ },
+ {
+ "epoch": 0.22,
+ "learning_rate": 9.256708199011401e-05,
+ "loss": 0.9696,
+ "step": 217000
+ },
+ {
+ "epoch": 0.22,
+ "learning_rate": 9.248010643731935e-05,
+ "loss": 0.8412,
+ "step": 218000
+ },
+ {
+ "epoch": 0.22,
+ "learning_rate": 9.239266632888659e-05,
+ "loss": 0.896,
+ "step": 219000
+ },
+ {
+ "epoch": 0.22,
+ "learning_rate": 9.230476262104677e-05,
+ "loss": 0.866,
+ "step": 220000
+ },
+ {
+ "epoch": 0.22,
+ "learning_rate": 9.221639627510076e-05,
+ "loss": 0.8752,
+ "step": 221000
+ },
+ {
+ "epoch": 0.22,
+ "learning_rate": 9.212756825740873e-05,
+ "loss": 0.8538,
+ "step": 222000
+ },
+ {
+ "epoch": 0.22,
+ "learning_rate": 9.20382795393797e-05,
+ "loss": 0.9473,
+ "step": 223000
+ },
+ {
+ "epoch": 0.22,
+ "learning_rate": 9.194853109746074e-05,
+ "loss": 0.9972,
+ "step": 224000
+ },
+ {
+ "epoch": 0.23,
+ "learning_rate": 9.185832391312644e-05,
+ "loss": 0.9866,
+ "step": 225000
+ },
+ {
+ "epoch": 0.23,
+ "learning_rate": 9.176765897286813e-05,
+ "loss": 0.8537,
+ "step": 226000
+ },
+ {
+ "epoch": 0.23,
+ "learning_rate": 9.167653726818305e-05,
+ "loss": 0.908,
+ "step": 227000
+ },
+ {
+ "epoch": 0.23,
+ "learning_rate": 9.158495979556358e-05,
+ "loss": 0.8834,
+ "step": 228000
+ },
+ {
+ "epoch": 0.23,
+ "learning_rate": 9.14929275564863e-05,
+ "loss": 0.9218,
+ "step": 229000
+ },
+ {
+ "epoch": 0.23,
+ "learning_rate": 9.140044155740101e-05,
+ "loss": 1.0485,
+ "step": 230000
+ },
+ {
+ "epoch": 0.23,
+ "learning_rate": 9.130750280971978e-05,
+ "loss": 1.0703,
+ "step": 231000
+ },
+ {
+ "epoch": 0.23,
+ "learning_rate": 9.121411232980588e-05,
+ "loss": 0.924,
+ "step": 232000
+ },
+ {
+ "epoch": 0.23,
+ "learning_rate": 9.112027113896262e-05,
+ "loss": 0.8991,
+ "step": 233000
+ },
+ {
+ "epoch": 0.23,
+ "learning_rate": 9.102598026342222e-05,
+ "loss": 1.0302,
+ "step": 234000
+ },
+ {
+ "epoch": 0.23,
+ "learning_rate": 9.093124073433463e-05,
+ "loss": 0.6593,
+ "step": 235000
+ },
+ {
+ "epoch": 0.24,
+ "learning_rate": 9.083605358775612e-05,
+ "loss": 0.6215,
+ "step": 236000
+ },
+ {
+ "epoch": 0.24,
+ "learning_rate": 9.074041986463808e-05,
+ "loss": 0.7131,
+ "step": 237000
+ },
+ {
+ "epoch": 0.24,
+ "learning_rate": 9.064434061081562e-05,
+ "loss": 0.9729,
+ "step": 238000
+ },
+ {
+ "epoch": 0.24,
+ "learning_rate": 9.0547816876996e-05,
+ "loss": 0.8938,
+ "step": 239000
+ },
+ {
+ "epoch": 0.24,
+ "learning_rate": 9.045084971874738e-05,
+ "loss": 0.8624,
+ "step": 240000
+ },
+ {
+ "epoch": 0.24,
+ "learning_rate": 9.035344019648702e-05,
+ "loss": 0.9094,
+ "step": 241000
+ },
+ {
+ "epoch": 0.24,
+ "learning_rate": 9.025558937546988e-05,
+ "loss": 0.9128,
+ "step": 242000
+ },
+ {
+ "epoch": 0.24,
+ "learning_rate": 9.015729832577681e-05,
+ "loss": 0.8819,
+ "step": 243000
+ },
+ {
+ "epoch": 0.24,
+ "learning_rate": 9.005856812230304e-05,
+ "loss": 0.8803,
+ "step": 244000
+ },
+ {
+ "epoch": 0.24,
+ "learning_rate": 8.995939984474624e-05,
+ "loss": 0.8916,
+ "step": 245000
+ },
+ {
+ "epoch": 0.25,
+ "learning_rate": 8.98597945775948e-05,
+ "loss": 0.8937,
+ "step": 246000
+ },
+ {
+ "epoch": 0.25,
+ "learning_rate": 8.975975341011596e-05,
+ "loss": 0.9264,
+ "step": 247000
+ },
+ {
+ "epoch": 0.25,
+ "learning_rate": 8.965927743634391e-05,
+ "loss": 0.8509,
+ "step": 248000
+ },
+ {
+ "epoch": 0.25,
+ "learning_rate": 8.955836775506776e-05,
+ "loss": 0.9534,
+ "step": 249000
+ },
+ {
+ "epoch": 0.25,
+ "learning_rate": 8.945702546981969e-05,
+ "loss": 0.8742,
+ "step": 250000
+ },
+ {
+ "epoch": 0.25,
+ "eval_loss": 0.7655419111251831,
+ "eval_runtime": 18.5521,
+ "eval_samples_per_second": 269.511,
+ "eval_steps_per_second": 2.156,
+ "step": 250000
  }
  ],
  "max_steps": 1000000,
  "num_train_epochs": 9223372036854775807,
+ "total_flos": 4.211349651456e+18,
  "trial_name": null,
  "trial_params": null
 }

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:25258238e9fa265ee3a03976d6131e2541f88483f4afcd664bf14d1bbf13aa9f
 size 442675755

 version https://git-lfs.github.com/spec/v1
+oid sha256:88d0fe187e6c4f9ad7949efb59a45e3b2900be07352fc0f3f3375418a489b0d4
 size 442675755

runs/Feb27_19-46-56_t1v-n-9f780742-w-0/events.out.tfevents.1677527238.t1v-n-9f780742-w-0.2687388.0 CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a1ffc25676510b638a1505e248374c3d96445c6dc7d721b988f33fdb4e0680e8
-size 20383

 version https://git-lfs.github.com/spec/v1
+oid sha256:b7c7e38822eddecbccb4cfb0f0637650381b1fc8571e45842529478399d4be84
+size 28659