Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +363 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c8e98d3a212e20155d31dead7831cfb230b34ef256ed1853a47c2cf197dd8d33
 size 478211024

 version https://git-lfs.github.com/spec/v1
+oid sha256:79b563855b16aa77d2e6c8128f7c63e833e35e9705ba98c1888df9c144219b73
 size 478211024

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7fec3d53af7dc82300fdc76bb9df76c4bf544654ea25a533e7684b4891234b2e
 size 243337876

 version https://git-lfs.github.com/spec/v1
+oid sha256:d775538180ccc186920315b0cc97e95af74fab89686e5676ba58065d596cb541
 size 243337876

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4e3e7ef45304592be33a49dd62f55d5ce2e0f3a4d68b86b9f7f43af04b4f895d
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:d185266e81dd71349721c2f1859039b8cc716172235f260f89ef069c6af4d645
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d8ce05761f46e7cf72fb17a02e3a0ca15c9d25ce3babf590eeb40568923b8bac
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:d2d754412c61116546142914503e7369d0cc35d3c380a07e5218f595d76b6d96
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 4.072061061859131,
   "best_model_checkpoint": "miner_id_24/checkpoint-50",
-  "epoch": 2.2388059701492535,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 20.632,
       "eval_steps_per_second": 5.295,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1103,7 +1461,7 @@
         "early_stopping_threshold": 0.0
       },
       "attributes": {
-        "early_stopping_patience_counter": 2
       }
     },
     "TrainerControl": {
@@ -1112,12 +1470,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1.1307256966545408e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 4.072061061859131,
   "best_model_checkpoint": "miner_id_24/checkpoint-50",
+  "epoch": 2.9850746268656714,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 20.632,
       "eval_steps_per_second": 5.295,
       "step": 150
+    },
+    {
+      "epoch": 2.253731343283582,
+      "grad_norm": 7454171292762112.0,
+      "learning_rate": 1.553232954407171e-05,
+      "loss": 4.1971,
+      "step": 151
+    },
+    {
+      "epoch": 2.2686567164179103,
+      "grad_norm": 255465359409152.0,
+      "learning_rate": 1.4938160786375572e-05,
+      "loss": 4.0941,
+      "step": 152
+    },
+    {
+      "epoch": 2.283582089552239,
+      "grad_norm": 1.7983809752399872e+16,
+      "learning_rate": 1.435357758543015e-05,
+      "loss": 4.3582,
+      "step": 153
+    },
+    {
+      "epoch": 2.298507462686567,
+      "grad_norm": 55151649882112.0,
+      "learning_rate": 1.3778739760445552e-05,
+      "loss": 4.1829,
+      "step": 154
+    },
+    {
+      "epoch": 2.3134328358208958,
+      "grad_norm": 6312732131328.0,
+      "learning_rate": 1.3213804466343421e-05,
+      "loss": 4.343,
+      "step": 155
+    },
+    {
+      "epoch": 2.328358208955224,
+      "grad_norm": 4388992581632.0,
+      "learning_rate": 1.2658926150792322e-05,
+      "loss": 4.3359,
+      "step": 156
+    },
+    {
+      "epoch": 2.343283582089552,
+      "grad_norm": 423517519872.0,
+      "learning_rate": 1.2114256511983274e-05,
+      "loss": 4.4192,
+      "step": 157
+    },
+    {
+      "epoch": 2.3582089552238807,
+      "grad_norm": 1554829672448.0,
+      "learning_rate": 1.157994445715706e-05,
+      "loss": 4.3752,
+      "step": 158
+    },
+    {
+      "epoch": 2.373134328358209,
+      "grad_norm": 202584866816.0,
+      "learning_rate": 1.1056136061894384e-05,
+      "loss": 4.2812,
+      "step": 159
+    },
+    {
+      "epoch": 2.388059701492537,
+      "grad_norm": 2543958521544704.0,
+      "learning_rate": 1.0542974530180327e-05,
+      "loss": 4.248,
+      "step": 160
+    },
+    {
+      "epoch": 2.4029850746268657,
+      "grad_norm": 1844698284032.0,
+      "learning_rate": 1.0040600155253765e-05,
+      "loss": 4.499,
+      "step": 161
+    },
+    {
+      "epoch": 2.417910447761194,
+      "grad_norm": 28601806225408.0,
+      "learning_rate": 9.549150281252633e-06,
+      "loss": 4.3483,
+      "step": 162
+    },
+    {
+      "epoch": 2.4328358208955225,
+      "grad_norm": 1640432926720.0,
+      "learning_rate": 9.068759265665384e-06,
+      "loss": 4.37,
+      "step": 163
+    },
+    {
+      "epoch": 2.4477611940298507,
+      "grad_norm": 263266451456.0,
+      "learning_rate": 8.599558442598998e-06,
+      "loss": 4.3392,
+      "step": 164
+    },
+    {
+      "epoch": 2.4626865671641793,
+      "grad_norm": 8539780677632.0,
+      "learning_rate": 8.141676086873572e-06,
+      "loss": 4.2709,
+      "step": 165
+    },
+    {
+      "epoch": 2.4776119402985075,
+      "grad_norm": 3607562027008.0,
+      "learning_rate": 7.695237378953223e-06,
+      "loss": 4.3874,
+      "step": 166
+    },
+    {
+      "epoch": 2.4925373134328357,
+      "grad_norm": 252375264657408.0,
+      "learning_rate": 7.260364370723044e-06,
+      "loss": 4.3339,
+      "step": 167
+    },
+    {
+      "epoch": 2.5074626865671643,
+      "grad_norm": 52871781416960.0,
+      "learning_rate": 6.837175952121306e-06,
+      "loss": 4.219,
+      "step": 168
+    },
+    {
+      "epoch": 2.5223880597014925,
+      "grad_norm": 594725664980992.0,
+      "learning_rate": 6.425787818636131e-06,
+      "loss": 4.4257,
+      "step": 169
+    },
+    {
+      "epoch": 2.5373134328358207,
+      "grad_norm": 8318226907791360.0,
+      "learning_rate": 6.026312439675552e-06,
+      "loss": 4.2786,
+      "step": 170
+    },
+    {
+      "epoch": 2.5522388059701493,
+      "grad_norm": 1183159471308800.0,
+      "learning_rate": 5.6388590278194096e-06,
+      "loss": 4.354,
+      "step": 171
+    },
+    {
+      "epoch": 2.5671641791044775,
+      "grad_norm": 51707077722112.0,
+      "learning_rate": 5.263533508961827e-06,
+      "loss": 4.3583,
+      "step": 172
+    },
+    {
+      "epoch": 2.582089552238806,
+      "grad_norm": 39419243397120.0,
+      "learning_rate": 4.900438493352055e-06,
+      "loss": 4.3092,
+      "step": 173
+    },
+    {
+      "epoch": 2.5970149253731343,
+      "grad_norm": 11675283488768.0,
+      "learning_rate": 4.549673247541875e-06,
+      "loss": 4.4283,
+      "step": 174
+    },
+    {
+      "epoch": 2.611940298507463,
+      "grad_norm": 3264060588032.0,
+      "learning_rate": 4.2113336672471245e-06,
+      "loss": 4.3267,
+      "step": 175
+    },
+    {
+      "epoch": 2.626865671641791,
+      "grad_norm": 471635525632.0,
+      "learning_rate": 3.885512251130763e-06,
+      "loss": 4.1417,
+      "step": 176
+    },
+    {
+      "epoch": 2.6417910447761193,
+      "grad_norm": 12826679705600.0,
+      "learning_rate": 3.5722980755146517e-06,
+      "loss": 4.5587,
+      "step": 177
+    },
+    {
+      "epoch": 2.656716417910448,
+      "grad_norm": 44699993767936.0,
+      "learning_rate": 3.271776770026963e-06,
+      "loss": 4.2962,
+      "step": 178
+    },
+    {
+      "epoch": 2.671641791044776,
+      "grad_norm": 4595890782208.0,
+      "learning_rate": 2.9840304941919415e-06,
+      "loss": 4.4922,
+      "step": 179
+    },
+    {
+      "epoch": 2.6865671641791042,
+      "grad_norm": 19500900548608.0,
+      "learning_rate": 2.7091379149682685e-06,
+      "loss": 4.3235,
+      "step": 180
+    },
+    {
+      "epoch": 2.701492537313433,
+      "grad_norm": 43601534976.0,
+      "learning_rate": 2.4471741852423237e-06,
+      "loss": 4.0723,
+      "step": 181
+    },
+    {
+      "epoch": 2.716417910447761,
+      "grad_norm": 5608959901696.0,
+      "learning_rate": 2.1982109232821178e-06,
+      "loss": 4.1898,
+      "step": 182
+    },
+    {
+      "epoch": 2.7313432835820897,
+      "grad_norm": 37156949065728.0,
+      "learning_rate": 1.962316193157593e-06,
+      "loss": 4.3088,
+      "step": 183
+    },
+    {
+      "epoch": 2.746268656716418,
+      "grad_norm": 267105727414272.0,
+      "learning_rate": 1.7395544861325718e-06,
+      "loss": 4.1701,
+      "step": 184
+    },
+    {
+      "epoch": 2.7611940298507465,
+      "grad_norm": 69628537602048.0,
+      "learning_rate": 1.5299867030334814e-06,
+      "loss": 4.3005,
+      "step": 185
+    },
+    {
+      "epoch": 2.7761194029850746,
+      "grad_norm": 30767564455936.0,
+      "learning_rate": 1.333670137599713e-06,
+      "loss": 4.3322,
+      "step": 186
+    },
+    {
+      "epoch": 2.791044776119403,
+      "grad_norm": 83588359913472.0,
+      "learning_rate": 1.1506584608200367e-06,
+      "loss": 4.2605,
+      "step": 187
+    },
+    {
+      "epoch": 2.8059701492537314,
+      "grad_norm": 8753218322432.0,
+      "learning_rate": 9.810017062595322e-07,
+      "loss": 4.2451,
+      "step": 188
+    },
+    {
+      "epoch": 2.8208955223880596,
+      "grad_norm": 2100625539072.0,
+      "learning_rate": 8.247462563808817e-07,
+      "loss": 4.0398,
+      "step": 189
+    },
+    {
+      "epoch": 2.835820895522388,
+      "grad_norm": 24320883556352.0,
+      "learning_rate": 6.819348298638839e-07,
+      "loss": 4.2575,
+      "step": 190
+    },
+    {
+      "epoch": 2.8507462686567164,
+      "grad_norm": 435874365440.0,
+      "learning_rate": 5.526064699265753e-07,
+      "loss": 4.3941,
+      "step": 191
+    },
+    {
+      "epoch": 2.8656716417910446,
+      "grad_norm": 3687415545856.0,
+      "learning_rate": 4.367965336512403e-07,
+      "loss": 4.2098,
+      "step": 192
+    },
+    {
+      "epoch": 2.8805970149253732,
+      "grad_norm": 5340785541120.0,
+      "learning_rate": 3.3453668231809286e-07,
+      "loss": 4.1522,
+      "step": 193
+    },
+    {
+      "epoch": 2.8955223880597014,
+      "grad_norm": 1328664150016.0,
+      "learning_rate": 2.458548727494292e-07,
+      "loss": 4.2646,
+      "step": 194
+    },
+    {
+      "epoch": 2.91044776119403,
+      "grad_norm": 38467081863168.0,
+      "learning_rate": 1.7077534966650766e-07,
+      "loss": 4.2095,
+      "step": 195
+    },
+    {
+      "epoch": 2.925373134328358,
+      "grad_norm": 1559660068864.0,
+      "learning_rate": 1.0931863906127327e-07,
+      "loss": 4.3461,
+      "step": 196
+    },
+    {
+      "epoch": 2.9402985074626864,
+      "grad_norm": 9872280125440.0,
+      "learning_rate": 6.150154258476315e-08,
+      "loss": 4.2482,
+      "step": 197
+    },
+    {
+      "epoch": 2.955223880597015,
+      "grad_norm": 140452429824.0,
+      "learning_rate": 2.7337132953697554e-08,
+      "loss": 4.2479,
+      "step": 198
+    },
+    {
+      "epoch": 2.970149253731343,
+      "grad_norm": 42921524985856.0,
+      "learning_rate": 6.834750376549792e-09,
+      "loss": 4.1213,
+      "step": 199
+    },
+    {
+      "epoch": 2.9850746268656714,
+      "grad_norm": 3093685338112.0,
+      "learning_rate": 0.0,
+      "loss": 4.1932,
+      "step": 200
+    },
+    {
+      "epoch": 2.9850746268656714,
+      "eval_loss": 4.244617938995361,
+      "eval_runtime": 5.4746,
+      "eval_samples_per_second": 20.641,
+      "eval_steps_per_second": 5.297,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "early_stopping_threshold": 0.0
       },
       "attributes": {
+        "early_stopping_patience_counter": 3
       }
     },
     "TrainerControl": {
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.5084211276873728e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null