Training in progress, step 50, checkpoint

Browse files

Files changed (8) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +189 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:15b86de1960b35914ca18fa1ae9c94d562980a3482c8a802053fd4b48a372ec9
 size 1204200

 version https://git-lfs.github.com/spec/v1
+oid sha256:57f065af914537e9562b3c7dcb9e0c4d2d2b2ff224b1d7dc30670fa23719580e
 size 1204200

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e1aad720182bef7d25fe0760a80f3dcc46f61dc979ea02ee0d4cbe1bba30c285
 size 2558458

 version https://git-lfs.github.com/spec/v1
+oid sha256:95f463a0f3c647f2b908d6cc34da03a6d22e6b58450f335b0a26216fb9211465
 size 2558458

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:400afcb90d0d487674bdf5339bf3c1cac4e66ca8f5dfe68cbf313a7e7b36bd5b
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:d80e9758ea74f6fb68f3f767cb4bb31b7b2aef75ef4b7687a17843caf12b4c9c
 size 15024

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:46954f978aa170700961f608f684dfbf8c897f8768cbf9c1abf4d48be4010be3
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:721e7b927664323c5307042e6caaca6d7b681cdf1c77d3b81408fcce21f043af
 size 15024

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7acb2235905b36a8fee9ea18c471f56a8f9853e4dc6820bdb495db703cf3a6e2
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:b538d69cd0d196c242fcbe4e03f801b0018555f7e7f116e9b12b76fc0146864e
 size 15024

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:333242732726263de48b8985648db03fe5385750fc500a5bce4c4d8d569e683e
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:eaf6e58fd4c52fe0ab5a0530570e39b66ab9c018711e8b80b6e7b7d9418e460e
 size 15024

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d271cdb95f63cd655315f063ca2e25c78dc5ae4275523c5d4f80f367586b3351
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:5607f6de446164d9d9adb8b91c44cec55b14aa391e24ba5637c08b834eedda2a
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 11.89288330078125,
-  "best_model_checkpoint": "miner_id_24/checkpoint-25",
-  "epoch": 0.25094102885821834,
   "eval_steps": 25,
-  "global_step": 25,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -198,6 +198,189 @@
       "eval_samples_per_second": 132.004,
       "eval_steps_per_second": 34.321,
       "step": 25
     }
   ],
   "logging_steps": 1,
@@ -221,12 +404,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 216569521111040.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 11.878259658813477,
+  "best_model_checkpoint": "miner_id_24/checkpoint-50",
+  "epoch": 0.5018820577164367,
   "eval_steps": 25,
+  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 132.004,
       "eval_steps_per_second": 34.321,
       "step": 25
+    },
+    {
+      "epoch": 0.26097867001254704,
+      "grad_norm": 0.3656708598136902,
+      "learning_rate": 5.500000000000001e-05,
+      "loss": 11.8925,
+      "step": 26
+    },
+    {
+      "epoch": 0.2710163111668758,
+      "grad_norm": 0.34611430764198303,
+      "learning_rate": 5.205685918464356e-05,
+      "loss": 11.8927,
+      "step": 27
+    },
+    {
+      "epoch": 0.2810539523212045,
+      "grad_norm": 0.3536524176597595,
+      "learning_rate": 4.912632135009769e-05,
+      "loss": 11.8896,
+      "step": 28
+    },
+    {
+      "epoch": 0.29109159347553326,
+      "grad_norm": 0.3487650454044342,
+      "learning_rate": 4.6220935509274235e-05,
+      "loss": 11.8888,
+      "step": 29
+    },
+    {
+      "epoch": 0.30112923462986196,
+      "grad_norm": 0.34203076362609863,
+      "learning_rate": 4.3353142970386564e-05,
+      "loss": 11.8902,
+      "step": 30
+    },
+    {
+      "epoch": 0.3111668757841907,
+      "grad_norm": 0.33263441920280457,
+      "learning_rate": 4.053522406135775e-05,
+      "loss": 11.8897,
+      "step": 31
+    },
+    {
+      "epoch": 0.3212045169385194,
+      "grad_norm": 0.32644975185394287,
+      "learning_rate": 3.777924554357096e-05,
+      "loss": 11.8864,
+      "step": 32
+    },
+    {
+      "epoch": 0.3312421580928482,
+      "grad_norm": 0.3280694782733917,
+      "learning_rate": 3.509700894014496e-05,
+      "loss": 11.8858,
+      "step": 33
+    },
+    {
+      "epoch": 0.34127979924717694,
+      "grad_norm": 0.32731539011001587,
+      "learning_rate": 3.250000000000001e-05,
+      "loss": 11.8855,
+      "step": 34
+    },
+    {
+      "epoch": 0.35131744040150564,
+      "grad_norm": 0.3242013454437256,
+      "learning_rate": 2.9999339514117912e-05,
+      "loss": 11.8837,
+      "step": 35
+    },
+    {
+      "epoch": 0.3613550815558344,
+      "grad_norm": 0.3112275004386902,
+      "learning_rate": 2.760573569460757e-05,
+      "loss": 11.8844,
+      "step": 36
+    },
+    {
+      "epoch": 0.3713927227101631,
+      "grad_norm": 0.3139267861843109,
+      "learning_rate": 2.53294383204969e-05,
+      "loss": 11.8825,
+      "step": 37
+    },
+    {
+      "epoch": 0.38143036386449186,
+      "grad_norm": 0.28634682297706604,
+      "learning_rate": 2.3180194846605367e-05,
+      "loss": 11.8817,
+      "step": 38
+    },
+    {
+      "epoch": 0.39146800501882056,
+      "grad_norm": 0.2885371446609497,
+      "learning_rate": 2.1167208663446025e-05,
+      "loss": 11.8817,
+      "step": 39
+    },
+    {
+      "epoch": 0.4015056461731493,
+      "grad_norm": 0.2969585955142975,
+      "learning_rate": 1.9299099686894423e-05,
+      "loss": 11.8804,
+      "step": 40
+    },
+    {
+      "epoch": 0.411543287327478,
+      "grad_norm": 0.2891557216644287,
+      "learning_rate": 1.758386744638546e-05,
+      "loss": 11.8812,
+      "step": 41
+    },
+    {
+      "epoch": 0.4215809284818068,
+      "grad_norm": 0.2795007824897766,
+      "learning_rate": 1.602885682970026e-05,
+      "loss": 11.8811,
+      "step": 42
+    },
+    {
+      "epoch": 0.4316185696361355,
+      "grad_norm": 0.28135186433792114,
+      "learning_rate": 1.464072663102903e-05,
+      "loss": 11.88,
+      "step": 43
+    },
+    {
+      "epoch": 0.44165621079046424,
+      "grad_norm": 0.28325244784355164,
+      "learning_rate": 1.3425421036992098e-05,
+      "loss": 11.8806,
+      "step": 44
+    },
+    {
+      "epoch": 0.451693851944793,
+      "grad_norm": 0.2817550599575043,
+      "learning_rate": 1.2388144172720251e-05,
+      "loss": 11.8779,
+      "step": 45
+    },
+    {
+      "epoch": 0.4617314930991217,
+      "grad_norm": 0.291200190782547,
+      "learning_rate": 1.1533337816991932e-05,
+      "loss": 11.878,
+      "step": 46
+    },
+    {
+      "epoch": 0.47176913425345046,
+      "grad_norm": 0.2938042879104614,
+      "learning_rate": 1.0864662381854632e-05,
+      "loss": 11.8799,
+      "step": 47
+    },
+    {
+      "epoch": 0.48180677540777916,
+      "grad_norm": 0.28831613063812256,
+      "learning_rate": 1.0384981238178534e-05,
+      "loss": 11.879,
+      "step": 48
+    },
+    {
+      "epoch": 0.4918444165621079,
+      "grad_norm": 0.28514596819877625,
+      "learning_rate": 1.0096348454262845e-05,
+      "loss": 11.8786,
+      "step": 49
+    },
+    {
+      "epoch": 0.5018820577164367,
+      "grad_norm": 0.29764559864997864,
+      "learning_rate": 1e-05,
+      "loss": 11.8804,
+      "step": 50
+    },
+    {
+      "epoch": 0.5018820577164367,
+      "eval_loss": 11.878259658813477,
+      "eval_runtime": 0.3807,
+      "eval_samples_per_second": 131.322,
+      "eval_steps_per_second": 34.144,
+      "step": 50
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 435488802471936.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null