Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0e74c94188dab04fe0891147b5c7b86945accb744b2d01ebcad3e10dd317cc4f
 size 639691872

 version https://git-lfs.github.com/spec/v1
+oid sha256:fc5b9b6a0f19df74dcefd049a18b933117fbf823d050514bf6ec5385912c309c
 size 639691872

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2feb7600691fb7d5b10a404d1321997a2245b9361349af877618161263c71d67
 size 325339796

 version https://git-lfs.github.com/spec/v1
+oid sha256:757402a6e03716617b9617eaf612867628590dd7f1e3c29b891e6e6228581a35
 size 325339796

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ea32a3deb21f59550ff545058e6c052cfd41665e6eb5122140e0848d1e1981f0
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:e1bc1eda3a83805b6948a665f2d0d43d38f9319042ad9a85e1c971894802b677
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7431a5f5e7b667e1ad74dfbaf504ae1d5a622ff6eb54e4f385c161883d2319ba
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:e1748c065d82adf475a51dba0ff56fd123a17810f07662718ca6d5704be8f9bd
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 2.3092238903045654,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.009624639076034648,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 9.558,
       "eval_steps_per_second": 2.39,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1470,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 5.116529964220416e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 2.2858030796051025,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.012832852101379532,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 9.558,
       "eval_steps_per_second": 2.39,
       "step": 150
+    },
+    {
+      "epoch": 0.009688803336541546,
+      "grad_norm": 0.5700095891952515,
+      "learning_rate": 2.6124736842105265e-05,
+      "loss": 2.2716,
+      "step": 151
+    },
+    {
+      "epoch": 0.009752967597048443,
+      "grad_norm": 0.6680998802185059,
+      "learning_rate": 2.5591578947368422e-05,
+      "loss": 2.5131,
+      "step": 152
+    },
+    {
+      "epoch": 0.009817131857555342,
+      "grad_norm": 0.6374183297157288,
+      "learning_rate": 2.5058421052631576e-05,
+      "loss": 2.571,
+      "step": 153
+    },
+    {
+      "epoch": 0.00988129611806224,
+      "grad_norm": 0.6198439002037048,
+      "learning_rate": 2.4525263157894737e-05,
+      "loss": 2.4856,
+      "step": 154
+    },
+    {
+      "epoch": 0.009945460378569138,
+      "grad_norm": 0.6242525577545166,
+      "learning_rate": 2.3992105263157894e-05,
+      "loss": 2.4885,
+      "step": 155
+    },
+    {
+      "epoch": 0.010009624639076035,
+      "grad_norm": 0.5134913921356201,
+      "learning_rate": 2.345894736842105e-05,
+      "loss": 2.1423,
+      "step": 156
+    },
+    {
+      "epoch": 0.010073788899582933,
+      "grad_norm": 0.49311476945877075,
+      "learning_rate": 2.292578947368421e-05,
+      "loss": 2.2922,
+      "step": 157
+    },
+    {
+      "epoch": 0.01013795316008983,
+      "grad_norm": 0.5304579734802246,
+      "learning_rate": 2.2392631578947366e-05,
+      "loss": 2.3629,
+      "step": 158
+    },
+    {
+      "epoch": 0.010202117420596728,
+      "grad_norm": 0.5376676321029663,
+      "learning_rate": 2.1859473684210527e-05,
+      "loss": 2.4822,
+      "step": 159
+    },
+    {
+      "epoch": 0.010266281681103625,
+      "grad_norm": 0.5748205780982971,
+      "learning_rate": 2.132631578947368e-05,
+      "loss": 2.2693,
+      "step": 160
+    },
+    {
+      "epoch": 0.010330445941610523,
+      "grad_norm": 0.6052488684654236,
+      "learning_rate": 2.0793157894736842e-05,
+      "loss": 2.2503,
+      "step": 161
+    },
+    {
+      "epoch": 0.01039461020211742,
+      "grad_norm": 0.6454349160194397,
+      "learning_rate": 2.026e-05,
+      "loss": 2.4645,
+      "step": 162
+    },
+    {
+      "epoch": 0.010458774462624318,
+      "grad_norm": 0.49100834131240845,
+      "learning_rate": 1.9726842105263157e-05,
+      "loss": 2.2656,
+      "step": 163
+    },
+    {
+      "epoch": 0.010522938723131215,
+      "grad_norm": 0.5916420221328735,
+      "learning_rate": 1.9193684210526314e-05,
+      "loss": 2.2182,
+      "step": 164
+    },
+    {
+      "epoch": 0.010587102983638113,
+      "grad_norm": 0.644229531288147,
+      "learning_rate": 1.866052631578947e-05,
+      "loss": 2.3002,
+      "step": 165
+    },
+    {
+      "epoch": 0.01065126724414501,
+      "grad_norm": 0.5885387659072876,
+      "learning_rate": 1.8127368421052632e-05,
+      "loss": 2.311,
+      "step": 166
+    },
+    {
+      "epoch": 0.010715431504651908,
+      "grad_norm": 0.7083272933959961,
+      "learning_rate": 1.759421052631579e-05,
+      "loss": 2.345,
+      "step": 167
+    },
+    {
+      "epoch": 0.010779595765158807,
+      "grad_norm": 0.6855031251907349,
+      "learning_rate": 1.7061052631578947e-05,
+      "loss": 2.2538,
+      "step": 168
+    },
+    {
+      "epoch": 0.010843760025665705,
+      "grad_norm": 0.6175474524497986,
+      "learning_rate": 1.6527894736842104e-05,
+      "loss": 2.4649,
+      "step": 169
+    },
+    {
+      "epoch": 0.010907924286172602,
+      "grad_norm": 0.6336336135864258,
+      "learning_rate": 1.599473684210526e-05,
+      "loss": 2.1047,
+      "step": 170
+    },
+    {
+      "epoch": 0.0109720885466795,
+      "grad_norm": 0.709040641784668,
+      "learning_rate": 1.546157894736842e-05,
+      "loss": 2.3666,
+      "step": 171
+    },
+    {
+      "epoch": 0.011036252807186397,
+      "grad_norm": 0.7283042073249817,
+      "learning_rate": 1.4928421052631576e-05,
+      "loss": 2.1672,
+      "step": 172
+    },
+    {
+      "epoch": 0.011100417067693295,
+      "grad_norm": 0.7090990543365479,
+      "learning_rate": 1.4395263157894735e-05,
+      "loss": 2.3524,
+      "step": 173
+    },
+    {
+      "epoch": 0.011164581328200193,
+      "grad_norm": 0.7621415853500366,
+      "learning_rate": 1.3862105263157895e-05,
+      "loss": 2.3557,
+      "step": 174
+    },
+    {
+      "epoch": 0.01122874558870709,
+      "grad_norm": 0.8043811321258545,
+      "learning_rate": 1.3328947368421052e-05,
+      "loss": 2.5607,
+      "step": 175
+    },
+    {
+      "epoch": 0.011292909849213988,
+      "grad_norm": 0.7839705348014832,
+      "learning_rate": 1.2795789473684211e-05,
+      "loss": 2.3813,
+      "step": 176
+    },
+    {
+      "epoch": 0.011357074109720885,
+      "grad_norm": 0.7333666086196899,
+      "learning_rate": 1.2262631578947368e-05,
+      "loss": 2.4085,
+      "step": 177
+    },
+    {
+      "epoch": 0.011421238370227783,
+      "grad_norm": 0.7560015916824341,
+      "learning_rate": 1.1729473684210526e-05,
+      "loss": 2.4049,
+      "step": 178
+    },
+    {
+      "epoch": 0.01148540263073468,
+      "grad_norm": 0.8456573486328125,
+      "learning_rate": 1.1196315789473683e-05,
+      "loss": 2.4974,
+      "step": 179
+    },
+    {
+      "epoch": 0.011549566891241578,
+      "grad_norm": 0.8126941919326782,
+      "learning_rate": 1.066315789473684e-05,
+      "loss": 2.0764,
+      "step": 180
+    },
+    {
+      "epoch": 0.011613731151748475,
+      "grad_norm": 0.871433675289154,
+      "learning_rate": 1.013e-05,
+      "loss": 2.2313,
+      "step": 181
+    },
+    {
+      "epoch": 0.011677895412255375,
+      "grad_norm": 0.8391616344451904,
+      "learning_rate": 9.596842105263157e-06,
+      "loss": 2.3128,
+      "step": 182
+    },
+    {
+      "epoch": 0.011742059672762272,
+      "grad_norm": 0.859274685382843,
+      "learning_rate": 9.063684210526316e-06,
+      "loss": 2.2995,
+      "step": 183
+    },
+    {
+      "epoch": 0.01180622393326917,
+      "grad_norm": 0.8498912453651428,
+      "learning_rate": 8.530526315789473e-06,
+      "loss": 2.1925,
+      "step": 184
+    },
+    {
+      "epoch": 0.011870388193776067,
+      "grad_norm": 0.9519957304000854,
+      "learning_rate": 7.99736842105263e-06,
+      "loss": 2.5695,
+      "step": 185
+    },
+    {
+      "epoch": 0.011934552454282965,
+      "grad_norm": 0.8947569131851196,
+      "learning_rate": 7.464210526315788e-06,
+      "loss": 2.3567,
+      "step": 186
+    },
+    {
+      "epoch": 0.011998716714789862,
+      "grad_norm": 0.9288820624351501,
+      "learning_rate": 6.931052631578947e-06,
+      "loss": 2.2551,
+      "step": 187
+    },
+    {
+      "epoch": 0.01206288097529676,
+      "grad_norm": 0.8670544624328613,
+      "learning_rate": 6.3978947368421055e-06,
+      "loss": 2.3142,
+      "step": 188
+    },
+    {
+      "epoch": 0.012127045235803657,
+      "grad_norm": 0.8455491065979004,
+      "learning_rate": 5.864736842105263e-06,
+      "loss": 2.1871,
+      "step": 189
+    },
+    {
+      "epoch": 0.012191209496310555,
+      "grad_norm": 0.9841280579566956,
+      "learning_rate": 5.33157894736842e-06,
+      "loss": 2.1852,
+      "step": 190
+    },
+    {
+      "epoch": 0.012255373756817452,
+      "grad_norm": 1.0499768257141113,
+      "learning_rate": 4.7984210526315785e-06,
+      "loss": 2.4203,
+      "step": 191
+    },
+    {
+      "epoch": 0.01231953801732435,
+      "grad_norm": 0.9910187721252441,
+      "learning_rate": 4.265263157894737e-06,
+      "loss": 2.188,
+      "step": 192
+    },
+    {
+      "epoch": 0.012383702277831247,
+      "grad_norm": 0.9471508860588074,
+      "learning_rate": 3.732105263157894e-06,
+      "loss": 2.157,
+      "step": 193
+    },
+    {
+      "epoch": 0.012447866538338145,
+      "grad_norm": 1.0245410203933716,
+      "learning_rate": 3.1989473684210527e-06,
+      "loss": 2.3334,
+      "step": 194
+    },
+    {
+      "epoch": 0.012512030798845043,
+      "grad_norm": 1.0170687437057495,
+      "learning_rate": 2.66578947368421e-06,
+      "loss": 2.3021,
+      "step": 195
+    },
+    {
+      "epoch": 0.01257619505935194,
+      "grad_norm": 1.0298289060592651,
+      "learning_rate": 2.1326315789473684e-06,
+      "loss": 2.0349,
+      "step": 196
+    },
+    {
+      "epoch": 0.01264035931985884,
+      "grad_norm": 1.140571117401123,
+      "learning_rate": 1.5994736842105264e-06,
+      "loss": 1.9795,
+      "step": 197
+    },
+    {
+      "epoch": 0.012704523580365737,
+      "grad_norm": 1.2025471925735474,
+      "learning_rate": 1.0663157894736842e-06,
+      "loss": 2.3114,
+      "step": 198
+    },
+    {
+      "epoch": 0.012768687840872634,
+      "grad_norm": 1.1247823238372803,
+      "learning_rate": 5.331578947368421e-07,
+      "loss": 2.3796,
+      "step": 199
+    },
+    {
+      "epoch": 0.012832852101379532,
+      "grad_norm": 1.2822043895721436,
+      "learning_rate": 0.0,
+      "loss": 1.9981,
+      "step": 200
+    },
+    {
+      "epoch": 0.012832852101379532,
+      "eval_loss": 2.2858030796051025,
+      "eval_runtime": 686.3255,
+      "eval_samples_per_second": 9.563,
+      "eval_steps_per_second": 2.391,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 6.827631788320358e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null