Training in progress, step 100, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:91e93374e34d8cd402fc43eefc9d8315d3e326a1e6359ff881ed4e8a2f56679f
 size 985240

 version https://git-lfs.github.com/spec/v1
+oid sha256:3dc6c59c18e5a47cc20ec2ea85928bf284768efb619cb36c39f4cb0a868e127e
 size 985240

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:372642c09921d74911530af9e8482b2c3b94681771e265f2c6c4d4458a9b6e15
 size 1980078

 version https://git-lfs.github.com/spec/v1
+oid sha256:406bba763add6afad7d556436aa96c23f1ec6356f36b2fe1cc81b8babf607483
 size 1980078

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:dd398caf88c081dda7d535a3978705439b653e4cceb23d0f66a9e47ebbe50f8c
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:1d288bf14a6d0b4283aca957c6bd056390978747ba8dee8f69df2d2d377c117e
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1900bd8021f13c38b942ed30aea6e2cea1b47664e4ce28d0276b142334732307
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:f2c4a11c3ec7ace2e963dc6e2b0b5b6372cc0250cefb36d5f7289475908638cb
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 11.050891876220703,
-  "best_model_checkpoint": "miner_id_24/checkpoint-50",
-  "epoch": 0.04906771344455348,
   "eval_steps": 50,
-  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -373,6 +373,364 @@
       "eval_samples_per_second": 337.292,
       "eval_steps_per_second": 42.26,
       "step": 50
     }
   ],
   "logging_steps": 1,
@@ -396,12 +754,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 7703686545408.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 11.038614273071289,
+  "best_model_checkpoint": "miner_id_24/checkpoint-100",
+  "epoch": 0.09813542688910697,
   "eval_steps": 50,
+  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 337.292,
       "eval_steps_per_second": 42.26,
       "step": 50
+    },
+    {
+      "epoch": 0.050049067713444556,
+      "grad_norm": 4.3872294425964355,
+      "learning_rate": 2.847932752400164e-06,
+      "loss": 44.1583,
+      "step": 51
+    },
+    {
+      "epoch": 0.05103042198233562,
+      "grad_norm": 4.958473205566406,
+      "learning_rate": 2.761321158169134e-06,
+      "loss": 44.1556,
+      "step": 52
+    },
+    {
+      "epoch": 0.052011776251226695,
+      "grad_norm": 4.641199588775635,
+      "learning_rate": 2.6743911843603134e-06,
+      "loss": 44.1518,
+      "step": 53
+    },
+    {
+      "epoch": 0.05299313052011776,
+      "grad_norm": 4.831917762756348,
+      "learning_rate": 2.587248741756253e-06,
+      "loss": 44.1574,
+      "step": 54
+    },
+    {
+      "epoch": 0.053974484789008834,
+      "grad_norm": 4.641740322113037,
+      "learning_rate": 2.5e-06,
+      "loss": 44.177,
+      "step": 55
+    },
+    {
+      "epoch": 0.0549558390578999,
+      "grad_norm": 5.027724742889404,
+      "learning_rate": 2.4127512582437486e-06,
+      "loss": 44.1403,
+      "step": 56
+    },
+    {
+      "epoch": 0.05593719332679097,
+      "grad_norm": 4.93941068649292,
+      "learning_rate": 2.325608815639687e-06,
+      "loss": 44.1304,
+      "step": 57
+    },
+    {
+      "epoch": 0.05691854759568204,
+      "grad_norm": 4.8098063468933105,
+      "learning_rate": 2.238678841830867e-06,
+      "loss": 44.1245,
+      "step": 58
+    },
+    {
+      "epoch": 0.05789990186457311,
+      "grad_norm": 4.973179340362549,
+      "learning_rate": 2.1520672475998374e-06,
+      "loss": 44.141,
+      "step": 59
+    },
+    {
+      "epoch": 0.058881256133464184,
+      "grad_norm": 4.688238620758057,
+      "learning_rate": 2.0658795558326745e-06,
+      "loss": 44.154,
+      "step": 60
+    },
+    {
+      "epoch": 0.05986261040235525,
+      "grad_norm": 4.850657939910889,
+      "learning_rate": 1.9802207729556023e-06,
+      "loss": 44.1483,
+      "step": 61
+    },
+    {
+      "epoch": 0.06084396467124632,
+      "grad_norm": 4.537477016448975,
+      "learning_rate": 1.895195261000831e-06,
+      "loss": 44.1491,
+      "step": 62
+    },
+    {
+      "epoch": 0.06182531894013739,
+      "grad_norm": 4.47567081451416,
+      "learning_rate": 1.8109066104575023e-06,
+      "loss": 44.1562,
+      "step": 63
+    },
+    {
+      "epoch": 0.06280667320902845,
+      "grad_norm": 4.615531921386719,
+      "learning_rate": 1.7274575140626318e-06,
+      "loss": 44.1694,
+      "step": 64
+    },
+    {
+      "epoch": 0.06378802747791953,
+      "grad_norm": 4.739807605743408,
+      "learning_rate": 1.6449496416858285e-06,
+      "loss": 44.1339,
+      "step": 65
+    },
+    {
+      "epoch": 0.0647693817468106,
+      "grad_norm": 4.978174686431885,
+      "learning_rate": 1.56348351646022e-06,
+      "loss": 44.1016,
+      "step": 66
+    },
+    {
+      "epoch": 0.06575073601570167,
+      "grad_norm": 4.427630424499512,
+      "learning_rate": 1.4831583923105e-06,
+      "loss": 44.1453,
+      "step": 67
+    },
+    {
+      "epoch": 0.06673209028459273,
+      "grad_norm": 4.460508823394775,
+      "learning_rate": 1.4040721330273063e-06,
+      "loss": 44.1541,
+      "step": 68
+    },
+    {
+      "epoch": 0.06771344455348381,
+      "grad_norm": 4.592861175537109,
+      "learning_rate": 1.3263210930352737e-06,
+      "loss": 44.1361,
+      "step": 69
+    },
+    {
+      "epoch": 0.06869479882237488,
+      "grad_norm": 4.343865394592285,
+      "learning_rate": 1.2500000000000007e-06,
+      "loss": 44.1732,
+      "step": 70
+    },
+    {
+      "epoch": 0.06967615309126594,
+      "grad_norm": 4.420068264007568,
+      "learning_rate": 1.1752018394169882e-06,
+      "loss": 44.1383,
+      "step": 71
+    },
+    {
+      "epoch": 0.07065750736015702,
+      "grad_norm": 4.614556789398193,
+      "learning_rate": 1.1020177413231334e-06,
+      "loss": 44.142,
+      "step": 72
+    },
+    {
+      "epoch": 0.07163886162904809,
+      "grad_norm": 4.291627883911133,
+      "learning_rate": 1.0305368692688175e-06,
+      "loss": 44.1535,
+      "step": 73
+    },
+    {
+      "epoch": 0.07262021589793916,
+      "grad_norm": 4.367705345153809,
+      "learning_rate": 9.608463116858544e-07,
+      "loss": 44.1653,
+      "step": 74
+    },
+    {
+      "epoch": 0.07360157016683022,
+      "grad_norm": 4.751893997192383,
+      "learning_rate": 8.930309757836517e-07,
+      "loss": 44.1325,
+      "step": 75
+    },
+    {
+      "epoch": 0.0745829244357213,
+      "grad_norm": 4.403656959533691,
+      "learning_rate": 8.271734841028553e-07,
+      "loss": 44.1413,
+      "step": 76
+    },
+    {
+      "epoch": 0.07556427870461237,
+      "grad_norm": 4.389298915863037,
+      "learning_rate": 7.633540738525066e-07,
+      "loss": 44.1718,
+      "step": 77
+    },
+    {
+      "epoch": 0.07654563297350343,
+      "grad_norm": 4.401954174041748,
+      "learning_rate": 7.016504991533727e-07,
+      "loss": 44.1616,
+      "step": 78
+    },
+    {
+      "epoch": 0.0775269872423945,
+      "grad_norm": 4.580694198608398,
+      "learning_rate": 6.421379363065142e-07,
+      "loss": 44.1418,
+      "step": 79
+    },
+    {
+      "epoch": 0.07850834151128558,
+      "grad_norm": 4.462921619415283,
+      "learning_rate": 5.848888922025553e-07,
+      "loss": 44.1092,
+      "step": 80
+    },
+    {
+      "epoch": 0.07948969578017664,
+      "grad_norm": 4.829708099365234,
+      "learning_rate": 5.299731159831953e-07,
+      "loss": 44.1295,
+      "step": 81
+    },
+    {
+      "epoch": 0.08047105004906771,
+      "grad_norm": 4.394340991973877,
+      "learning_rate": 4.774575140626317e-07,
+      "loss": 44.1685,
+      "step": 82
+    },
+    {
+      "epoch": 0.08145240431795878,
+      "grad_norm": 4.454510688781738,
+      "learning_rate": 4.27406068612396e-07,
+      "loss": 44.1542,
+      "step": 83
+    },
+    {
+      "epoch": 0.08243375858684986,
+      "grad_norm": 4.497045040130615,
+      "learning_rate": 3.798797596089351e-07,
+      "loss": 44.1375,
+      "step": 84
+    },
+    {
+      "epoch": 0.08341511285574092,
+      "grad_norm": 4.434609889984131,
+      "learning_rate": 3.3493649053890325e-07,
+      "loss": 44.1345,
+      "step": 85
+    },
+    {
+      "epoch": 0.08439646712463199,
+      "grad_norm": 4.5244035720825195,
+      "learning_rate": 2.9263101785268253e-07,
+      "loss": 44.1467,
+      "step": 86
+    },
+    {
+      "epoch": 0.08537782139352307,
+      "grad_norm": 4.269883632659912,
+      "learning_rate": 2.53014884252083e-07,
+      "loss": 44.1526,
+      "step": 87
+    },
+    {
+      "epoch": 0.08635917566241413,
+      "grad_norm": 4.184451103210449,
+      "learning_rate": 2.1613635589349756e-07,
+      "loss": 44.1822,
+      "step": 88
+    },
+    {
+      "epoch": 0.0873405299313052,
+      "grad_norm": 4.249412536621094,
+      "learning_rate": 1.8204036358303173e-07,
+      "loss": 44.1716,
+      "step": 89
+    },
+    {
+      "epoch": 0.08832188420019627,
+      "grad_norm": 3.827402114868164,
+      "learning_rate": 1.507684480352292e-07,
+      "loss": 44.1924,
+      "step": 90
+    },
+    {
+      "epoch": 0.08930323846908735,
+      "grad_norm": 4.114834785461426,
+      "learning_rate": 1.223587092621162e-07,
+      "loss": 44.2127,
+      "step": 91
+    },
+    {
+      "epoch": 0.09028459273797841,
+      "grad_norm": 3.819640636444092,
+      "learning_rate": 9.684576015420277e-08,
+      "loss": 44.2051,
+      "step": 92
+    },
+    {
+      "epoch": 0.09126594700686948,
+      "grad_norm": 4.212294578552246,
+      "learning_rate": 7.426068431000883e-08,
+      "loss": 44.1976,
+      "step": 93
+    },
+    {
+      "epoch": 0.09224730127576054,
+      "grad_norm": 3.624086618423462,
+      "learning_rate": 5.463099816548578e-08,
+      "loss": 44.2229,
+      "step": 94
+    },
+    {
+      "epoch": 0.09322865554465162,
+      "grad_norm": 3.4577605724334717,
+      "learning_rate": 3.798061746947995e-08,
+      "loss": 44.2466,
+      "step": 95
+    },
+    {
+      "epoch": 0.09421000981354269,
+      "grad_norm": 3.643979072570801,
+      "learning_rate": 2.4329828146074096e-08,
+      "loss": 44.3109,
+      "step": 96
+    },
+    {
+      "epoch": 0.09519136408243375,
+      "grad_norm": 4.863288879394531,
+      "learning_rate": 1.3695261579316776e-08,
+      "loss": 44.1834,
+      "step": 97
+    },
+    {
+      "epoch": 0.09617271835132483,
+      "grad_norm": 3.8488268852233887,
+      "learning_rate": 6.089874350439507e-09,
+      "loss": 44.2992,
+      "step": 98
+    },
+    {
+      "epoch": 0.0971540726202159,
+      "grad_norm": 4.9372148513793945,
+      "learning_rate": 1.5229324522605949e-09,
+      "loss": 44.3093,
+      "step": 99
+    },
+    {
+      "epoch": 0.09813542688910697,
+      "grad_norm": 5.151540756225586,
+      "learning_rate": 0.0,
+      "loss": 44.4666,
+      "step": 100
+    },
+    {
+      "epoch": 0.09813542688910697,
+      "eval_loss": 11.038614273071289,
+      "eval_runtime": 5.0993,
+      "eval_samples_per_second": 336.516,
+      "eval_steps_per_second": 42.163,
+      "step": 100
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 15407373090816.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null