Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:17c91c9532837237ea4062f1f0b6f2de1d5dc113e31312ff2ca5c19542aeac14
 size 628216

 version https://git-lfs.github.com/spec/v1
+oid sha256:c93368f0c9f8b881cd65130eee22f684530e3aaf36bef1aa7ed26f00067fdc8d
 size 628216

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c479332b4c52e0ac61588aae3132a40ade7f4f9b712a49e15b426e253f6a70c5
 size 352122

 version https://git-lfs.github.com/spec/v1
+oid sha256:1c2b701936911f2e005f464c36463b1c3087d57702b237cb6f248d53a0a782b9
 size 352122

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:eedfc8f02bb8d7d8cd34189a19c0d8b5ce8c0f6ca78a177ffef92beb953688d7
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:2d7bbf2c6a262584ee3cf7d118b7f0ae18043db13669286f517bb185ea18aa3a
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:970068ebf9c0dc6a40c93653c563bb0b2ba5296a6c46496b504a3f1343bf3a62
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:7df65c8f4b426598b0abc4173b1983dcf7411aee63ea4061d980eae7a1af2363
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 10.495588302612305,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.13204225352112675,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 71.913,
       "eval_steps_per_second": 18.006,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1470,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 102465168998400.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 10.490081787109375,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.176056338028169,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 71.913,
       "eval_steps_per_second": 18.006,
       "step": 150
+    },
+    {
+      "epoch": 0.1329225352112676,
+      "grad_norm": 0.655456006526947,
+      "learning_rate": 1.7085562498478883e-05,
+      "loss": 10.4826,
+      "step": 151
+    },
+    {
+      "epoch": 0.13380281690140844,
+      "grad_norm": 0.4750436246395111,
+      "learning_rate": 1.6431976865013128e-05,
+      "loss": 10.5067,
+      "step": 152
+    },
+    {
+      "epoch": 0.13468309859154928,
+      "grad_norm": 0.5088787078857422,
+      "learning_rate": 1.5788935343973164e-05,
+      "loss": 10.4799,
+      "step": 153
+    },
+    {
+      "epoch": 0.13556338028169015,
+      "grad_norm": 0.4471653401851654,
+      "learning_rate": 1.5156613736490108e-05,
+      "loss": 10.5016,
+      "step": 154
+    },
+    {
+      "epoch": 0.136443661971831,
+      "grad_norm": 0.4595582187175751,
+      "learning_rate": 1.4535184912977763e-05,
+      "loss": 10.4885,
+      "step": 155
+    },
+    {
+      "epoch": 0.13732394366197184,
+      "grad_norm": 0.5350455641746521,
+      "learning_rate": 1.3924818765871553e-05,
+      "loss": 10.4808,
+      "step": 156
+    },
+    {
+      "epoch": 0.1382042253521127,
+      "grad_norm": 0.5852507948875427,
+      "learning_rate": 1.3325682163181601e-05,
+      "loss": 10.4695,
+      "step": 157
+    },
+    {
+      "epoch": 0.13908450704225353,
+      "grad_norm": 0.5205310583114624,
+      "learning_rate": 1.2737938902872767e-05,
+      "loss": 10.4588,
+      "step": 158
+    },
+    {
+      "epoch": 0.13996478873239437,
+      "grad_norm": 0.5743169784545898,
+      "learning_rate": 1.2161749668083823e-05,
+      "loss": 10.4531,
+      "step": 159
+    },
+    {
+      "epoch": 0.14084507042253522,
+      "grad_norm": 0.4958171844482422,
+      "learning_rate": 1.159727198319836e-05,
+      "loss": 10.4593,
+      "step": 160
+    },
+    {
+      "epoch": 0.14172535211267606,
+      "grad_norm": 0.43265965580940247,
+      "learning_rate": 1.1044660170779142e-05,
+      "loss": 10.4656,
+      "step": 161
+    },
+    {
+      "epoch": 0.1426056338028169,
+      "grad_norm": 0.5412198901176453,
+      "learning_rate": 1.0504065309377897e-05,
+      "loss": 10.4804,
+      "step": 162
+    },
+    {
+      "epoch": 0.14348591549295775,
+      "grad_norm": 0.4394540786743164,
+      "learning_rate": 9.97563519223192e-06,
+      "loss": 10.4911,
+      "step": 163
+    },
+    {
+      "epoch": 0.1443661971830986,
+      "grad_norm": 0.4882407784461975,
+      "learning_rate": 9.459514286858898e-06,
+      "loss": 10.5067,
+      "step": 164
+    },
+    {
+      "epoch": 0.14524647887323944,
+      "grad_norm": 0.3944062888622284,
+      "learning_rate": 8.95584369556093e-06,
+      "loss": 10.4899,
+      "step": 165
+    },
+    {
+      "epoch": 0.14612676056338028,
+      "grad_norm": 0.3780195415019989,
+      "learning_rate": 8.464761116848546e-06,
+      "loss": 10.4926,
+      "step": 166
+    },
+    {
+      "epoch": 0.14700704225352113,
+      "grad_norm": 0.31584465503692627,
+      "learning_rate": 7.986400807795349e-06,
+      "loss": 10.4902,
+      "step": 167
+    },
+    {
+      "epoch": 0.14788732394366197,
+      "grad_norm": 0.2965972125530243,
+      "learning_rate": 7.520893547333436e-06,
+      "loss": 10.4713,
+      "step": 168
+    },
+    {
+      "epoch": 0.1487676056338028,
+      "grad_norm": 0.3396422266960144,
+      "learning_rate": 7.068366600499744e-06,
+      "loss": 10.5113,
+      "step": 169
+    },
+    {
+      "epoch": 0.14964788732394366,
+      "grad_norm": 0.27547672390937805,
+      "learning_rate": 6.6289436836431076e-06,
+      "loss": 10.4863,
+      "step": 170
+    },
+    {
+      "epoch": 0.1505281690140845,
+      "grad_norm": 0.21695564687252045,
+      "learning_rate": 6.20274493060135e-06,
+      "loss": 10.5045,
+      "step": 171
+    },
+    {
+      "epoch": 0.15140845070422534,
+      "grad_norm": 0.2346537858247757,
+      "learning_rate": 5.789886859858009e-06,
+      "loss": 10.5259,
+      "step": 172
+    },
+    {
+      "epoch": 0.1522887323943662,
+      "grad_norm": 0.23305346071720123,
+      "learning_rate": 5.3904823426872605e-06,
+      "loss": 10.5179,
+      "step": 173
+    },
+    {
+      "epoch": 0.15316901408450703,
+      "grad_norm": 0.3447682559490204,
+      "learning_rate": 5.004640572296062e-06,
+      "loss": 10.5116,
+      "step": 174
+    },
+    {
+      "epoch": 0.15404929577464788,
+      "grad_norm": 0.2727600038051605,
+      "learning_rate": 4.632467033971838e-06,
+      "loss": 10.5402,
+      "step": 175
+    },
+    {
+      "epoch": 0.15492957746478872,
+      "grad_norm": 0.3000151813030243,
+      "learning_rate": 4.274063476243839e-06,
+      "loss": 10.5252,
+      "step": 176
+    },
+    {
+      "epoch": 0.15580985915492956,
+      "grad_norm": 0.24301907420158386,
+      "learning_rate": 3.929527883066117e-06,
+      "loss": 10.5167,
+      "step": 177
+    },
+    {
+      "epoch": 0.15669014084507044,
+      "grad_norm": 0.29026785492897034,
+      "learning_rate": 3.5989544470296595e-06,
+      "loss": 10.5109,
+      "step": 178
+    },
+    {
+      "epoch": 0.15757042253521128,
+      "grad_norm": 0.30544570088386536,
+      "learning_rate": 3.282433543611136e-06,
+      "loss": 10.4965,
+      "step": 179
+    },
+    {
+      "epoch": 0.15845070422535212,
+      "grad_norm": 0.3145160973072052,
+      "learning_rate": 2.980051706465095e-06,
+      "loss": 10.4923,
+      "step": 180
+    },
+    {
+      "epoch": 0.15933098591549297,
+      "grad_norm": 0.29084667563438416,
+      "learning_rate": 2.691891603766556e-06,
+      "loss": 10.5384,
+      "step": 181
+    },
+    {
+      "epoch": 0.1602112676056338,
+      "grad_norm": 0.2924867570400238,
+      "learning_rate": 2.4180320156103298e-06,
+      "loss": 10.5349,
+      "step": 182
+    },
+    {
+      "epoch": 0.16109154929577466,
+      "grad_norm": 0.31382471323013306,
+      "learning_rate": 2.158547812473352e-06,
+      "loss": 10.5328,
+      "step": 183
+    },
+    {
+      "epoch": 0.1619718309859155,
+      "grad_norm": 0.2887389361858368,
+      "learning_rate": 1.9135099347458293e-06,
+      "loss": 10.5209,
+      "step": 184
+    },
+    {
+      "epoch": 0.16285211267605634,
+      "grad_norm": 0.35892584919929504,
+      "learning_rate": 1.6829853733368294e-06,
+      "loss": 10.5239,
+      "step": 185
+    },
+    {
+      "epoch": 0.1637323943661972,
+      "grad_norm": 0.24830038845539093,
+      "learning_rate": 1.4670371513596842e-06,
+      "loss": 10.5013,
+      "step": 186
+    },
+    {
+      "epoch": 0.16461267605633803,
+      "grad_norm": 0.33427461981773376,
+      "learning_rate": 1.2657243069020402e-06,
+      "loss": 10.4891,
+      "step": 187
+    },
+    {
+      "epoch": 0.16549295774647887,
+      "grad_norm": 0.3475538492202759,
+      "learning_rate": 1.0791018768854855e-06,
+      "loss": 10.4896,
+      "step": 188
+    },
+    {
+      "epoch": 0.16637323943661972,
+      "grad_norm": 0.3785645365715027,
+      "learning_rate": 9.072208820189698e-07,
+      "loss": 10.5247,
+      "step": 189
+    },
+    {
+      "epoch": 0.16725352112676056,
+      "grad_norm": 0.3462846279144287,
+      "learning_rate": 7.501283128502722e-07,
+      "loss": 10.5091,
+      "step": 190
+    },
+    {
+      "epoch": 0.1681338028169014,
+      "grad_norm": 0.3662095367908478,
+      "learning_rate": 6.07867116919233e-07,
+      "loss": 10.465,
+      "step": 191
+    },
+    {
+      "epoch": 0.16901408450704225,
+      "grad_norm": 0.46672433614730835,
+      "learning_rate": 4.804761870163643e-07,
+      "loss": 10.4353,
+      "step": 192
+    },
+    {
+      "epoch": 0.1698943661971831,
+      "grad_norm": 0.3511195778846741,
+      "learning_rate": 3.6799035054990215e-07,
+      "loss": 10.4936,
+      "step": 193
+    },
+    {
+      "epoch": 0.17077464788732394,
+      "grad_norm": 0.3752671778202057,
+      "learning_rate": 2.704403600243721e-07,
+      "loss": 10.4624,
+      "step": 194
+    },
+    {
+      "epoch": 0.17165492957746478,
+      "grad_norm": 0.3640790283679962,
+      "learning_rate": 1.878528846331584e-07,
+      "loss": 10.495,
+      "step": 195
+    },
+    {
+      "epoch": 0.17253521126760563,
+      "grad_norm": 0.37574926018714905,
+      "learning_rate": 1.202505029674006e-07,
+      "loss": 10.4919,
+      "step": 196
+    },
+    {
+      "epoch": 0.17341549295774647,
+      "grad_norm": 0.32661178708076477,
+      "learning_rate": 6.765169684323947e-08,
+      "loss": 10.4605,
+      "step": 197
+    },
+    {
+      "epoch": 0.1742957746478873,
+      "grad_norm": 0.415720671415329,
+      "learning_rate": 3.007084624906731e-08,
+      "loss": 10.4822,
+      "step": 198
+    },
+    {
+      "epoch": 0.17517605633802816,
+      "grad_norm": 0.4906235337257385,
+      "learning_rate": 7.518225414204771e-09,
+      "loss": 10.5168,
+      "step": 199
+    },
+    {
+      "epoch": 0.176056338028169,
+      "grad_norm": 0.49893417954444885,
+      "learning_rate": 0.0,
+      "loss": 10.4814,
+      "step": 200
+    },
+    {
+      "epoch": 0.176056338028169,
+      "eval_loss": 10.490081787109375,
+      "eval_runtime": 30.7966,
+      "eval_samples_per_second": 62.117,
+      "eval_steps_per_second": 15.554,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 136620225331200.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null