Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +365 -7

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ed8e2f732057690d210168e1df1953d803054d4d990eb313a51988d66518031e
 size 671149168

 version https://git-lfs.github.com/spec/v1
+oid sha256:9849fd563f48e4e5585c4326a229a2a65fdff72fa77b97947c4d87cf03b1f0d0
 size 671149168

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5c7fc3c8c74e1a0f5514b348ef97e57050e9849c8761ccaeea08364212a200f5
 size 1342555602

 version https://git-lfs.github.com/spec/v1
+oid sha256:0460e884206a7324bd848edd4fa67174ccfc0fe0703d276f2edbb71dc647cafd
 size 1342555602

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:eedd2298fd16372c6fac7072766f3f115f4a30f98acc788a9a1d7929c2bc3710
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:2ea89e86c083de57ebc7dd4e5015853033482cec08f22425deef3d523cff9853
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:dfd59dca009004df561617f8f6994512d029a952a68609cac24b36df5a0757ce
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:d2d754412c61116546142914503e7369d0cc35d3c380a07e5218f595d76b6d96
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.07757702469825745,
-  "best_model_checkpoint": "miner_id_24/checkpoint-100",
-  "epoch": 0.012836695834492201,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 12.255,
       "eval_steps_per_second": 6.128,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1103,7 +1461,7 @@
         "early_stopping_threshold": 0.0
       },
       "attributes": {
-        "early_stopping_patience_counter": 1
       }
     },
     "TrainerControl": {
@@ -1112,12 +1470,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 2.262770368118784e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.04161956161260605,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.0171155944459896,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 12.255,
       "eval_steps_per_second": 6.128,
       "step": 150
+    },
+    {
+      "epoch": 0.012922273806722149,
+      "grad_norm": 0.8338009715080261,
+      "learning_rate": 1.9136088935510362e-05,
+      "loss": 0.0953,
+      "step": 151
+    },
+    {
+      "epoch": 0.013007851778952098,
+      "grad_norm": 1.0214147567749023,
+      "learning_rate": 1.8414449687337464e-05,
+      "loss": 0.092,
+      "step": 152
+    },
+    {
+      "epoch": 0.013093429751182046,
+      "grad_norm": 1.2112953662872314,
+      "learning_rate": 1.7703596875660645e-05,
+      "loss": 0.141,
+      "step": 153
+    },
+    {
+      "epoch": 0.013179007723411993,
+      "grad_norm": 1.1335638761520386,
+      "learning_rate": 1.700377325606388e-05,
+      "loss": 0.1125,
+      "step": 154
+    },
+    {
+      "epoch": 0.013264585695641942,
+      "grad_norm": 0.7966949939727783,
+      "learning_rate": 1.631521781767214e-05,
+      "loss": 0.0562,
+      "step": 155
+    },
+    {
+      "epoch": 0.01335016366787189,
+      "grad_norm": 0.9197629690170288,
+      "learning_rate": 1.5638165701536868e-05,
+      "loss": 0.0866,
+      "step": 156
+    },
+    {
+      "epoch": 0.013435741640101837,
+      "grad_norm": 0.6064220666885376,
+      "learning_rate": 1.4972848120335453e-05,
+      "loss": 0.0479,
+      "step": 157
+    },
+    {
+      "epoch": 0.013521319612331786,
+      "grad_norm": 0.2772460877895355,
+      "learning_rate": 1.4319492279412388e-05,
+      "loss": 0.0219,
+      "step": 158
+    },
+    {
+      "epoch": 0.013606897584561734,
+      "grad_norm": 0.36105939745903015,
+      "learning_rate": 1.3678321299188801e-05,
+      "loss": 0.0405,
+      "step": 159
+    },
+    {
+      "epoch": 0.013692475556791681,
+      "grad_norm": 0.28316134214401245,
+      "learning_rate": 1.3049554138967051e-05,
+      "loss": 0.0354,
+      "step": 160
+    },
+    {
+      "epoch": 0.01377805352902163,
+      "grad_norm": 0.4974602460861206,
+      "learning_rate": 1.2433405522156332e-05,
+      "loss": 0.0393,
+      "step": 161
+    },
+    {
+      "epoch": 0.013863631501251578,
+      "grad_norm": 0.8241268992424011,
+      "learning_rate": 1.183008586294485e-05,
+      "loss": 0.0663,
+      "step": 162
+    },
+    {
+      "epoch": 0.013949209473481525,
+      "grad_norm": 0.7367086410522461,
+      "learning_rate": 1.1239801194443506e-05,
+      "loss": 0.0417,
+      "step": 163
+    },
+    {
+      "epoch": 0.014034787445711475,
+      "grad_norm": 0.6524820923805237,
+      "learning_rate": 1.066275309832584e-05,
+      "loss": 0.059,
+      "step": 164
+    },
+    {
+      "epoch": 0.014120365417941422,
+      "grad_norm": 0.48198258876800537,
+      "learning_rate": 1.0099138635988026e-05,
+      "loss": 0.0432,
+      "step": 165
+    },
+    {
+      "epoch": 0.01420594339017137,
+      "grad_norm": 0.6708958148956299,
+      "learning_rate": 9.549150281252633e-06,
+      "loss": 0.056,
+      "step": 166
+    },
+    {
+      "epoch": 0.014291521362401319,
+      "grad_norm": 0.6288459897041321,
+      "learning_rate": 9.012975854638949e-06,
+      "loss": 0.0465,
+      "step": 167
+    },
+    {
+      "epoch": 0.014377099334631266,
+      "grad_norm": 0.47757601737976074,
+      "learning_rate": 8.490798459222476e-06,
+      "loss": 0.0461,
+      "step": 168
+    },
+    {
+      "epoch": 0.014462677306861214,
+      "grad_norm": 0.47537004947662354,
+      "learning_rate": 7.982796418105371e-06,
+      "loss": 0.0444,
+      "step": 169
+    },
+    {
+      "epoch": 0.014548255279091161,
+      "grad_norm": 0.27161651849746704,
+      "learning_rate": 7.489143213519301e-06,
+      "loss": 0.0266,
+      "step": 170
+    },
+    {
+      "epoch": 0.01463383325132111,
+      "grad_norm": 0.3608476221561432,
+      "learning_rate": 7.010007427581378e-06,
+      "loss": 0.0257,
+      "step": 171
+    },
+    {
+      "epoch": 0.014719411223551058,
+      "grad_norm": 0.34825852513313293,
+      "learning_rate": 6.5455526847235825e-06,
+      "loss": 0.0376,
+      "step": 172
+    },
+    {
+      "epoch": 0.014804989195781005,
+      "grad_norm": 0.38921675086021423,
+      "learning_rate": 6.0959375958151045e-06,
+      "loss": 0.0284,
+      "step": 173
+    },
+    {
+      "epoch": 0.014890567168010954,
+      "grad_norm": 0.3063510060310364,
+      "learning_rate": 5.6613157039969055e-06,
+      "loss": 0.0246,
+      "step": 174
+    },
+    {
+      "epoch": 0.014976145140240902,
+      "grad_norm": 0.3037574291229248,
+      "learning_rate": 5.241835432246889e-06,
+      "loss": 0.0357,
+      "step": 175
+    },
+    {
+      "epoch": 0.01506172311247085,
+      "grad_norm": 0.36166590452194214,
+      "learning_rate": 4.837640032693558e-06,
+      "loss": 0.0404,
+      "step": 176
+    },
+    {
+      "epoch": 0.015147301084700799,
+      "grad_norm": 0.4001633822917938,
+      "learning_rate": 4.448867537695578e-06,
+      "loss": 0.0462,
+      "step": 177
+    },
+    {
+      "epoch": 0.015232879056930746,
+      "grad_norm": 0.5758128762245178,
+      "learning_rate": 4.075650712703849e-06,
+      "loss": 0.0447,
+      "step": 178
+    },
+    {
+      "epoch": 0.015318457029160694,
+      "grad_norm": 0.4326450228691101,
+      "learning_rate": 3.71811701092219e-06,
+      "loss": 0.0437,
+      "step": 179
+    },
+    {
+      "epoch": 0.015404035001390643,
+      "grad_norm": 0.3488145172595978,
+      "learning_rate": 3.376388529782215e-06,
+      "loss": 0.0392,
+      "step": 180
+    },
+    {
+      "epoch": 0.01548961297362059,
+      "grad_norm": 0.4991649091243744,
+      "learning_rate": 3.0505819692471792e-06,
+      "loss": 0.04,
+      "step": 181
+    },
+    {
+      "epoch": 0.015575190945850538,
+      "grad_norm": 0.3567849397659302,
+      "learning_rate": 2.7408085919590264e-06,
+      "loss": 0.0447,
+      "step": 182
+    },
+    {
+      "epoch": 0.015660768918080485,
+      "grad_norm": 0.5800079107284546,
+      "learning_rate": 2.4471741852423237e-06,
+      "loss": 0.0476,
+      "step": 183
+    },
+    {
+      "epoch": 0.015746346890310434,
+      "grad_norm": 0.29148969054222107,
+      "learning_rate": 2.1697790249779636e-06,
+      "loss": 0.031,
+      "step": 184
+    },
+    {
+      "epoch": 0.015831924862540384,
+      "grad_norm": 0.3116988241672516,
+      "learning_rate": 1.908717841359048e-06,
+      "loss": 0.0313,
+      "step": 185
+    },
+    {
+      "epoch": 0.01591750283477033,
+      "grad_norm": 0.46554750204086304,
+      "learning_rate": 1.6640797865406288e-06,
+      "loss": 0.0401,
+      "step": 186
+    },
+    {
+      "epoch": 0.01600308080700028,
+      "grad_norm": 0.3885302245616913,
+      "learning_rate": 1.4359484041943038e-06,
+      "loss": 0.0311,
+      "step": 187
+    },
+    {
+      "epoch": 0.016088658779230228,
+      "grad_norm": 0.38669371604919434,
+      "learning_rate": 1.2244016009781701e-06,
+      "loss": 0.042,
+      "step": 188
+    },
+    {
+      "epoch": 0.016174236751460173,
+      "grad_norm": 0.3348408639431,
+      "learning_rate": 1.0295116199317057e-06,
+      "loss": 0.0239,
+      "step": 189
+    },
+    {
+      "epoch": 0.016259814723690123,
+      "grad_norm": 0.666803777217865,
+      "learning_rate": 8.513450158049108e-07,
+      "loss": 0.0413,
+      "step": 190
+    },
+    {
+      "epoch": 0.016345392695920072,
+      "grad_norm": 0.5500365495681763,
+      "learning_rate": 6.899626323298713e-07,
+      "loss": 0.0516,
+      "step": 191
+    },
+    {
+      "epoch": 0.016430970668150018,
+      "grad_norm": 0.5613580346107483,
+      "learning_rate": 5.454195814427021e-07,
+      "loss": 0.0627,
+      "step": 192
+    },
+    {
+      "epoch": 0.016516548640379967,
+      "grad_norm": 0.2784683406352997,
+      "learning_rate": 4.177652244628627e-07,
+      "loss": 0.0199,
+      "step": 193
+    },
+    {
+      "epoch": 0.016602126612609912,
+      "grad_norm": 0.6985279321670532,
+      "learning_rate": 3.0704315523631953e-07,
+      "loss": 0.054,
+      "step": 194
+    },
+    {
+      "epoch": 0.01668770458483986,
+      "grad_norm": 0.7367770671844482,
+      "learning_rate": 2.1329118524827662e-07,
+      "loss": 0.0643,
+      "step": 195
+    },
+    {
+      "epoch": 0.01677328255706981,
+      "grad_norm": 0.35487690567970276,
+      "learning_rate": 1.3654133071059893e-07,
+      "loss": 0.0291,
+      "step": 196
+    },
+    {
+      "epoch": 0.016858860529299757,
+      "grad_norm": 0.5007591843605042,
+      "learning_rate": 7.681980162830282e-08,
+      "loss": 0.03,
+      "step": 197
+    },
+    {
+      "epoch": 0.016944438501529706,
+      "grad_norm": 0.3655904233455658,
+      "learning_rate": 3.4146992848854695e-08,
+      "loss": 0.0475,
+      "step": 198
+    },
+    {
+      "epoch": 0.017030016473759655,
+      "grad_norm": 0.7564741373062134,
+      "learning_rate": 8.537477097364522e-09,
+      "loss": 0.04,
+      "step": 199
+    },
+    {
+      "epoch": 0.0171155944459896,
+      "grad_norm": 1.8403810262680054,
+      "learning_rate": 0.0,
+      "loss": 0.1042,
+      "step": 200
+    },
+    {
+      "epoch": 0.0171155944459896,
+      "eval_loss": 0.04161956161260605,
+      "eval_runtime": 1606.9293,
+      "eval_samples_per_second": 12.248,
+      "eval_steps_per_second": 6.124,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "early_stopping_threshold": 0.0
       },
       "attributes": {
+        "early_stopping_patience_counter": 0
       }
     },
     "TrainerControl": {
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 3.017027157491712e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null