Training in progress, step 50, checkpoint

Browse files

Files changed (8) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +189 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:712a27ef0886dab7e30a27f9a3573ddf12ed952c3ea8354071a05e6291bde1af
 size 319876032

 version https://git-lfs.github.com/spec/v1
+oid sha256:3aa5a7b463d31524b398e21d8aaebdf8b2f5fb66f32d93ec9e23c7aa53426895
 size 319876032

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5bb0e0e067fac7685bd22ba0a2e0c7317edd04225a36eb960db32393e257add7
 size 640009682

 version https://git-lfs.github.com/spec/v1
+oid sha256:96485db7f146183c7751b8f1157565fbc8cf4f12999db8f61bc617471da1af83
 size 640009682

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8200ba761bac4a25093214bf3792da9a24c4e2368c20c7d1c83f0e70b471763b
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:6b099478e46c5343111b06d941d38cb0828a7286fb6b663dafa93bbe65b81edf
 size 14960

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7a354033565b099bfcd55db28215eb15cc52aeb7d40f33bc69db76697e549eab
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:975e42cc04b6138b335dbc8302805eebffe60964025e596cc9f8cd7becf69cd9
 size 15024

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b694f019d56f0f5631151baaef223f013c5cd97ef7fa27656ea4731ee53f6b93
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:2ffbc966e8e1107c2f900bef92859f59b3c62c0d839c7d6960088d16f2eaf3a7
 size 15024

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4bb30a25ce35b38f6ff6463832122236334e2e6b77a579fa3acfa4ddae492764
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:05bf9c7f6aec675a9175ccfc53a986dbd847803b5abc769dd30f0f65c9ef8b03
 size 15024

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f37b2aa490ccb1598b01e14cda36e9081f7ce646deab4d3c2d03de0d2169a755
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:b1df0528620c07325b8faa7567e59b0c1e86a1f1ee6af1245a69c6c0463fe4e2
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 2.6144490242004395,
-  "best_model_checkpoint": "miner_id_24/checkpoint-25",
-  "epoch": 0.8213552361396304,
   "eval_steps": 25,
-  "global_step": 25,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -198,6 +198,189 @@
       "eval_samples_per_second": 34.516,
       "eval_steps_per_second": 4.378,
       "step": 25
     }
   ],
   "logging_steps": 1,
@@ -221,12 +404,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 2.822877455843328e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 2.5790822505950928,
+  "best_model_checkpoint": "miner_id_24/checkpoint-50",
+  "epoch": 1.6509240246406571,
   "eval_steps": 25,
+  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 34.516,
       "eval_steps_per_second": 4.378,
       "step": 25
+    },
+    {
+      "epoch": 0.8542094455852156,
+      "grad_norm": 0.4761195182800293,
+      "learning_rate": 5e-05,
+      "loss": 2.6846,
+      "step": 26
+    },
+    {
+      "epoch": 0.8870636550308009,
+      "grad_norm": 0.5094211101531982,
+      "learning_rate": 4.6729843538492847e-05,
+      "loss": 2.7143,
+      "step": 27
+    },
+    {
+      "epoch": 0.919917864476386,
+      "grad_norm": 0.5358983874320984,
+      "learning_rate": 4.347369038899744e-05,
+      "loss": 2.6664,
+      "step": 28
+    },
+    {
+      "epoch": 0.9527720739219713,
+      "grad_norm": 0.6261024475097656,
+      "learning_rate": 4.0245483899193595e-05,
+      "loss": 2.6405,
+      "step": 29
+    },
+    {
+      "epoch": 0.9856262833675564,
+      "grad_norm": 0.8257274627685547,
+      "learning_rate": 3.705904774487396e-05,
+      "loss": 2.6263,
+      "step": 30
+    },
+    {
+      "epoch": 1.0266940451745379,
+      "grad_norm": 1.1825813055038452,
+      "learning_rate": 3.392802673484193e-05,
+      "loss": 4.2278,
+      "step": 31
+    },
+    {
+      "epoch": 1.0595482546201231,
+      "grad_norm": 0.40372803807258606,
+      "learning_rate": 3.086582838174551e-05,
+      "loss": 2.0355,
+      "step": 32
+    },
+    {
+      "epoch": 1.0924024640657084,
+      "grad_norm": 0.4721590280532837,
+      "learning_rate": 2.7885565489049946e-05,
+      "loss": 2.5572,
+      "step": 33
+    },
+    {
+      "epoch": 1.1252566735112937,
+      "grad_norm": 0.475892037153244,
+      "learning_rate": 2.500000000000001e-05,
+      "loss": 2.3979,
+      "step": 34
+    },
+    {
+      "epoch": 1.158110882956879,
+      "grad_norm": 0.5070028901100159,
+      "learning_rate": 2.2221488349019903e-05,
+      "loss": 2.556,
+      "step": 35
+    },
+    {
+      "epoch": 1.1909650924024642,
+      "grad_norm": 0.5608769059181213,
+      "learning_rate": 1.9561928549563968e-05,
+      "loss": 2.4592,
+      "step": 36
+    },
+    {
+      "epoch": 1.2238193018480492,
+      "grad_norm": 0.6679421663284302,
+      "learning_rate": 1.703270924499656e-05,
+      "loss": 2.5307,
+      "step": 37
+    },
+    {
+      "epoch": 1.2566735112936345,
+      "grad_norm": 0.42008689045906067,
+      "learning_rate": 1.4644660940672627e-05,
+      "loss": 1.8624,
+      "step": 38
+    },
+    {
+      "epoch": 1.2895277207392197,
+      "grad_norm": 0.4553283751010895,
+      "learning_rate": 1.2408009626051137e-05,
+      "loss": 2.1853,
+      "step": 39
+    },
+    {
+      "epoch": 1.322381930184805,
+      "grad_norm": 0.46935948729515076,
+      "learning_rate": 1.0332332985438248e-05,
+      "loss": 2.2706,
+      "step": 40
+    },
+    {
+      "epoch": 1.35523613963039,
+      "grad_norm": 0.4959513247013092,
+      "learning_rate": 8.426519384872733e-06,
+      "loss": 2.4294,
+      "step": 41
+    },
+    {
+      "epoch": 1.3880903490759753,
+      "grad_norm": 0.5358001589775085,
+      "learning_rate": 6.698729810778065e-06,
+      "loss": 2.5825,
+      "step": 42
+    },
+    {
+      "epoch": 1.4209445585215605,
+      "grad_norm": 0.5627439022064209,
+      "learning_rate": 5.156362923365588e-06,
+      "loss": 2.499,
+      "step": 43
+    },
+    {
+      "epoch": 1.4537987679671458,
+      "grad_norm": 0.6085307598114014,
+      "learning_rate": 3.8060233744356633e-06,
+      "loss": 2.4359,
+      "step": 44
+    },
+    {
+      "epoch": 1.486652977412731,
+      "grad_norm": 0.8430867791175842,
+      "learning_rate": 2.653493525244721e-06,
+      "loss": 2.6336,
+      "step": 45
+    },
+    {
+      "epoch": 1.5195071868583163,
+      "grad_norm": 0.4500504732131958,
+      "learning_rate": 1.70370868554659e-06,
+      "loss": 2.1174,
+      "step": 46
+    },
+    {
+      "epoch": 1.5523613963039016,
+      "grad_norm": 0.4381200969219208,
+      "learning_rate": 9.607359798384785e-07,
+      "loss": 2.016,
+      "step": 47
+    },
+    {
+      "epoch": 1.5852156057494866,
+      "grad_norm": 0.515243649482727,
+      "learning_rate": 4.277569313094809e-07,
+      "loss": 2.5711,
+      "step": 48
+    },
+    {
+      "epoch": 1.6180698151950719,
+      "grad_norm": 0.5294801592826843,
+      "learning_rate": 1.0705383806982606e-07,
+      "loss": 2.4628,
+      "step": 49
+    },
+    {
+      "epoch": 1.6509240246406571,
+      "grad_norm": 0.5349022746086121,
+      "learning_rate": 0.0,
+      "loss": 2.4572,
+      "step": 50
+    },
+    {
+      "epoch": 1.6509240246406571,
+      "eval_loss": 2.5790822505950928,
+      "eval_runtime": 5.9316,
+      "eval_samples_per_second": 34.561,
+      "eval_steps_per_second": 4.383,
+      "step": 50
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 5.645754911686656e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null