Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7b661fada8ed8dbc86abd40b273f3503c85f691a575d8b6c410c952fa5bb4556
 size 226532664

 version https://git-lfs.github.com/spec/v1
+oid sha256:1992c17b1e74d4ea673228221a634d67f00ff94b6f4ffa7b1603e1698243d186
 size 226532664

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:752a1ea276bbdc14793c98c2bdb556acdc7539030af6a1eadbd376555c70a811
 size 115354708

 version https://git-lfs.github.com/spec/v1
+oid sha256:03f5d0abeacb4d52cec804619a34b7ff90e90e5700bae3085511282f56b13176
 size 115354708

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c4258a94fbfdebca89db10bab16f663662efdb059cec3aeedbea2ce2ff10f909
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:c10b2e4dab5dfb1969bc8e4d938562671e73a15675f9a960a2144eb891fe323a
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d8ce05761f46e7cf72fb17a02e3a0ca15c9d25ce3babf590eeb40568923b8bac
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:d2d754412c61116546142914503e7369d0cc35d3c380a07e5218f595d76b6d96
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.48092612624168396,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.05157297576070139,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 51.199,
       "eval_steps_per_second": 12.802,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1470,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 3.737521336457626e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.47032108902931213,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.06876396768093519,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 51.199,
       "eval_steps_per_second": 12.802,
       "step": 150
+    },
+    {
+      "epoch": 0.051916795599106065,
+      "grad_norm": 7.596273422241211,
+      "learning_rate": 1.553232954407171e-05,
+      "loss": 2.3922,
+      "step": 151
+    },
+    {
+      "epoch": 0.052260615437510746,
+      "grad_norm": 4.700192451477051,
+      "learning_rate": 1.4938160786375572e-05,
+      "loss": 2.15,
+      "step": 152
+    },
+    {
+      "epoch": 0.05260443527591542,
+      "grad_norm": 4.358617305755615,
+      "learning_rate": 1.435357758543015e-05,
+      "loss": 2.0072,
+      "step": 153
+    },
+    {
+      "epoch": 0.052948255114320095,
+      "grad_norm": 4.667351722717285,
+      "learning_rate": 1.3778739760445552e-05,
+      "loss": 2.2452,
+      "step": 154
+    },
+    {
+      "epoch": 0.05329207495272477,
+      "grad_norm": 3.7926652431488037,
+      "learning_rate": 1.3213804466343421e-05,
+      "loss": 2.0282,
+      "step": 155
+    },
+    {
+      "epoch": 0.05363589479112945,
+      "grad_norm": 4.377016544342041,
+      "learning_rate": 1.2658926150792322e-05,
+      "loss": 2.049,
+      "step": 156
+    },
+    {
+      "epoch": 0.053979714629534124,
+      "grad_norm": 4.181820869445801,
+      "learning_rate": 1.2114256511983274e-05,
+      "loss": 2.113,
+      "step": 157
+    },
+    {
+      "epoch": 0.0543235344679388,
+      "grad_norm": 3.4626941680908203,
+      "learning_rate": 1.157994445715706e-05,
+      "loss": 1.9766,
+      "step": 158
+    },
+    {
+      "epoch": 0.05466735430634347,
+      "grad_norm": 4.791767597198486,
+      "learning_rate": 1.1056136061894384e-05,
+      "loss": 2.2093,
+      "step": 159
+    },
+    {
+      "epoch": 0.055011174144748154,
+      "grad_norm": 3.9656119346618652,
+      "learning_rate": 1.0542974530180327e-05,
+      "loss": 2.0348,
+      "step": 160
+    },
+    {
+      "epoch": 0.05535499398315283,
+      "grad_norm": 4.498620986938477,
+      "learning_rate": 1.0040600155253765e-05,
+      "loss": 1.9769,
+      "step": 161
+    },
+    {
+      "epoch": 0.0556988138215575,
+      "grad_norm": 3.2577667236328125,
+      "learning_rate": 9.549150281252633e-06,
+      "loss": 1.8342,
+      "step": 162
+    },
+    {
+      "epoch": 0.05604263365996218,
+      "grad_norm": 4.179971694946289,
+      "learning_rate": 9.068759265665384e-06,
+      "loss": 1.9596,
+      "step": 163
+    },
+    {
+      "epoch": 0.05638645349836686,
+      "grad_norm": 3.1900031566619873,
+      "learning_rate": 8.599558442598998e-06,
+      "loss": 1.934,
+      "step": 164
+    },
+    {
+      "epoch": 0.05673027333677153,
+      "grad_norm": 3.710221529006958,
+      "learning_rate": 8.141676086873572e-06,
+      "loss": 2.0005,
+      "step": 165
+    },
+    {
+      "epoch": 0.057074093175176206,
+      "grad_norm": 3.3741302490234375,
+      "learning_rate": 7.695237378953223e-06,
+      "loss": 1.8286,
+      "step": 166
+    },
+    {
+      "epoch": 0.05741791301358088,
+      "grad_norm": 2.9912824630737305,
+      "learning_rate": 7.260364370723044e-06,
+      "loss": 1.8552,
+      "step": 167
+    },
+    {
+      "epoch": 0.05776173285198556,
+      "grad_norm": 3.979104518890381,
+      "learning_rate": 6.837175952121306e-06,
+      "loss": 2.0712,
+      "step": 168
+    },
+    {
+      "epoch": 0.058105552690390236,
+      "grad_norm": 4.897646427154541,
+      "learning_rate": 6.425787818636131e-06,
+      "loss": 1.9269,
+      "step": 169
+    },
+    {
+      "epoch": 0.05844937252879491,
+      "grad_norm": 4.642251491546631,
+      "learning_rate": 6.026312439675552e-06,
+      "loss": 1.9916,
+      "step": 170
+    },
+    {
+      "epoch": 0.058793192367199584,
+      "grad_norm": 4.222075462341309,
+      "learning_rate": 5.6388590278194096e-06,
+      "loss": 2.0948,
+      "step": 171
+    },
+    {
+      "epoch": 0.059137012205604265,
+      "grad_norm": 3.229485034942627,
+      "learning_rate": 5.263533508961827e-06,
+      "loss": 1.908,
+      "step": 172
+    },
+    {
+      "epoch": 0.05948083204400894,
+      "grad_norm": 3.805974245071411,
+      "learning_rate": 4.900438493352055e-06,
+      "loss": 1.9059,
+      "step": 173
+    },
+    {
+      "epoch": 0.059824651882413614,
+      "grad_norm": 3.0714707374572754,
+      "learning_rate": 4.549673247541875e-06,
+      "loss": 1.8018,
+      "step": 174
+    },
+    {
+      "epoch": 0.06016847172081829,
+      "grad_norm": 3.829221248626709,
+      "learning_rate": 4.2113336672471245e-06,
+      "loss": 2.0628,
+      "step": 175
+    },
+    {
+      "epoch": 0.06051229155922297,
+      "grad_norm": 4.390942096710205,
+      "learning_rate": 3.885512251130763e-06,
+      "loss": 1.9008,
+      "step": 176
+    },
+    {
+      "epoch": 0.060856111397627644,
+      "grad_norm": 4.036614894866943,
+      "learning_rate": 3.5722980755146517e-06,
+      "loss": 2.0637,
+      "step": 177
+    },
+    {
+      "epoch": 0.06119993123603232,
+      "grad_norm": 3.732416868209839,
+      "learning_rate": 3.271776770026963e-06,
+      "loss": 1.9309,
+      "step": 178
+    },
+    {
+      "epoch": 0.06154375107443699,
+      "grad_norm": 3.323101043701172,
+      "learning_rate": 2.9840304941919415e-06,
+      "loss": 1.7777,
+      "step": 179
+    },
+    {
+      "epoch": 0.06188757091284167,
+      "grad_norm": 4.518346786499023,
+      "learning_rate": 2.7091379149682685e-06,
+      "loss": 2.0205,
+      "step": 180
+    },
+    {
+      "epoch": 0.06223139075124635,
+      "grad_norm": 2.749868154525757,
+      "learning_rate": 2.4471741852423237e-06,
+      "loss": 1.7724,
+      "step": 181
+    },
+    {
+      "epoch": 0.06257521058965103,
+      "grad_norm": 3.940286874771118,
+      "learning_rate": 2.1982109232821178e-06,
+      "loss": 1.9041,
+      "step": 182
+    },
+    {
+      "epoch": 0.0629190304280557,
+      "grad_norm": 3.184329032897949,
+      "learning_rate": 1.962316193157593e-06,
+      "loss": 1.7954,
+      "step": 183
+    },
+    {
+      "epoch": 0.06326285026646038,
+      "grad_norm": 2.703235626220703,
+      "learning_rate": 1.7395544861325718e-06,
+      "loss": 1.6788,
+      "step": 184
+    },
+    {
+      "epoch": 0.06360667010486505,
+      "grad_norm": 4.29472541809082,
+      "learning_rate": 1.5299867030334814e-06,
+      "loss": 1.9971,
+      "step": 185
+    },
+    {
+      "epoch": 0.06395048994326973,
+      "grad_norm": 3.7988569736480713,
+      "learning_rate": 1.333670137599713e-06,
+      "loss": 1.8016,
+      "step": 186
+    },
+    {
+      "epoch": 0.0642943097816744,
+      "grad_norm": 2.6883325576782227,
+      "learning_rate": 1.1506584608200367e-06,
+      "loss": 1.6985,
+      "step": 187
+    },
+    {
+      "epoch": 0.06463812962007907,
+      "grad_norm": 2.6447510719299316,
+      "learning_rate": 9.810017062595322e-07,
+      "loss": 1.7059,
+      "step": 188
+    },
+    {
+      "epoch": 0.06498194945848375,
+      "grad_norm": 3.892427444458008,
+      "learning_rate": 8.247462563808817e-07,
+      "loss": 1.9132,
+      "step": 189
+    },
+    {
+      "epoch": 0.06532576929688844,
+      "grad_norm": 3.658687114715576,
+      "learning_rate": 6.819348298638839e-07,
+      "loss": 1.7631,
+      "step": 190
+    },
+    {
+      "epoch": 0.06566958913529311,
+      "grad_norm": 3.4642155170440674,
+      "learning_rate": 5.526064699265753e-07,
+      "loss": 1.7524,
+      "step": 191
+    },
+    {
+      "epoch": 0.06601340897369778,
+      "grad_norm": 4.059595584869385,
+      "learning_rate": 4.367965336512403e-07,
+      "loss": 1.9242,
+      "step": 192
+    },
+    {
+      "epoch": 0.06635722881210246,
+      "grad_norm": 3.424908399581909,
+      "learning_rate": 3.3453668231809286e-07,
+      "loss": 1.8341,
+      "step": 193
+    },
+    {
+      "epoch": 0.06670104865050713,
+      "grad_norm": 4.229599475860596,
+      "learning_rate": 2.458548727494292e-07,
+      "loss": 1.9844,
+      "step": 194
+    },
+    {
+      "epoch": 0.06704486848891181,
+      "grad_norm": 4.625471115112305,
+      "learning_rate": 1.7077534966650766e-07,
+      "loss": 1.825,
+      "step": 195
+    },
+    {
+      "epoch": 0.06738868832731648,
+      "grad_norm": 3.651977062225342,
+      "learning_rate": 1.0931863906127327e-07,
+      "loss": 1.7806,
+      "step": 196
+    },
+    {
+      "epoch": 0.06773250816572116,
+      "grad_norm": 3.7606873512268066,
+      "learning_rate": 6.150154258476315e-08,
+      "loss": 1.7365,
+      "step": 197
+    },
+    {
+      "epoch": 0.06807632800412584,
+      "grad_norm": 3.5898656845092773,
+      "learning_rate": 2.7337132953697554e-08,
+      "loss": 1.8591,
+      "step": 198
+    },
+    {
+      "epoch": 0.06842014784253052,
+      "grad_norm": 5.7996954917907715,
+      "learning_rate": 6.834750376549792e-09,
+      "loss": 1.917,
+      "step": 199
+    },
+    {
+      "epoch": 0.06876396768093519,
+      "grad_norm": 3.627258539199829,
+      "learning_rate": 0.0,
+      "loss": 1.7555,
+      "step": 200
+    },
+    {
+      "epoch": 0.06876396768093519,
+      "eval_loss": 0.47032108902931213,
+      "eval_runtime": 95.6637,
+      "eval_samples_per_second": 51.211,
+      "eval_steps_per_second": 12.805,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 4.981288836110746e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null