Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +372 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e62fce865e44bbdded6a9a71d7550de8d2837ed27024d2dedaafa66398d739b6
 size 101752088

 version https://git-lfs.github.com/spec/v1
+oid sha256:66c60c98657ed502fa2c1016b9ce5deef402bbf258bdfa6dd9b8020496dc75a0
 size 101752088

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ea7996c182a97172c9636fc17b47813bdda98fad5530b3554dfd8400a321adbf
 size 203713238

 version https://git-lfs.github.com/spec/v1
+oid sha256:41c579475daf74857706e0dae085b8f2a787058662f7eb347521ab8cc25f5785
 size 203713238

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ac77add03c1b5656d4533577461928b941bae425e10b0bf77538471921ea4266
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:e6e59430d4b4b811794d683d76e428dbc806a60f2c322382193b90622a497076
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e70710c409284f74d525f8db5cfaccc22a8afd29416f19c595da9242ec92d936
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:fe157715eb8e05b3bab2a7f2fafac33705dc4a1a9dd7f6d860c3a7f9597d78bb
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 2.442666530609131,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.021500752526338422,
   "eval_steps": 25,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1113,6 +1113,372 @@
       "eval_samples_per_second": 10.95,
       "eval_steps_per_second": 1.533,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1136,12 +1502,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1.04954557759488e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 2.4329843521118164,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.028667670035117897,
   "eval_steps": 25,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 10.95,
       "eval_steps_per_second": 1.533,
       "step": 150
+    },
+    {
+      "epoch": 0.021644090876514012,
+      "grad_norm": 0.42068323493003845,
+      "learning_rate": 4.659698863221513e-05,
+      "loss": 2.343,
+      "step": 151
+    },
+    {
+      "epoch": 0.0217874292266896,
+      "grad_norm": 0.436985045671463,
+      "learning_rate": 4.481448235912671e-05,
+      "loss": 2.3775,
+      "step": 152
+    },
+    {
+      "epoch": 0.02193076757686519,
+      "grad_norm": 0.43744659423828125,
+      "learning_rate": 4.306073275629044e-05,
+      "loss": 2.366,
+      "step": 153
+    },
+    {
+      "epoch": 0.02207410592704078,
+      "grad_norm": 0.42853182554244995,
+      "learning_rate": 4.133621928133665e-05,
+      "loss": 2.3583,
+      "step": 154
+    },
+    {
+      "epoch": 0.02221744427721637,
+      "grad_norm": 0.43902191519737244,
+      "learning_rate": 3.964141339903026e-05,
+      "loss": 2.3905,
+      "step": 155
+    },
+    {
+      "epoch": 0.022360782627391957,
+      "grad_norm": 0.46048280596733093,
+      "learning_rate": 3.797677845237696e-05,
+      "loss": 2.4043,
+      "step": 156
+    },
+    {
+      "epoch": 0.022504120977567547,
+      "grad_norm": 0.3899269998073578,
+      "learning_rate": 3.634276953594982e-05,
+      "loss": 2.3547,
+      "step": 157
+    },
+    {
+      "epoch": 0.022647459327743136,
+      "grad_norm": 0.37371909618377686,
+      "learning_rate": 3.473983337147118e-05,
+      "loss": 2.3287,
+      "step": 158
+    },
+    {
+      "epoch": 0.022790797677918726,
+      "grad_norm": 0.404742956161499,
+      "learning_rate": 3.316840818568315e-05,
+      "loss": 2.3772,
+      "step": 159
+    },
+    {
+      "epoch": 0.022934136028094316,
+      "grad_norm": 0.38302767276763916,
+      "learning_rate": 3.162892359054098e-05,
+      "loss": 2.3533,
+      "step": 160
+    },
+    {
+      "epoch": 0.023077474378269906,
+      "grad_norm": 0.3811044991016388,
+      "learning_rate": 3.0121800465761293e-05,
+      "loss": 2.3194,
+      "step": 161
+    },
+    {
+      "epoch": 0.023220812728445495,
+      "grad_norm": 0.5596585869789124,
+      "learning_rate": 2.8647450843757897e-05,
+      "loss": 2.4266,
+      "step": 162
+    },
+    {
+      "epoch": 0.023364151078621085,
+      "grad_norm": 0.5400771498680115,
+      "learning_rate": 2.7206277796996144e-05,
+      "loss": 2.3642,
+      "step": 163
+    },
+    {
+      "epoch": 0.023507489428796675,
+      "grad_norm": 0.4592722952365875,
+      "learning_rate": 2.5798675327796993e-05,
+      "loss": 2.327,
+      "step": 164
+    },
+    {
+      "epoch": 0.023650827778972264,
+      "grad_norm": 0.47848427295684814,
+      "learning_rate": 2.4425028260620715e-05,
+      "loss": 2.4503,
+      "step": 165
+    },
+    {
+      "epoch": 0.023794166129147854,
+      "grad_norm": 0.45060548186302185,
+      "learning_rate": 2.3085712136859668e-05,
+      "loss": 2.3749,
+      "step": 166
+    },
+    {
+      "epoch": 0.023937504479323444,
+      "grad_norm": 0.4228126108646393,
+      "learning_rate": 2.178109311216913e-05,
+      "loss": 2.4633,
+      "step": 167
+    },
+    {
+      "epoch": 0.024080842829499034,
+      "grad_norm": 0.5396104454994202,
+      "learning_rate": 2.0511527856363912e-05,
+      "loss": 2.4974,
+      "step": 168
+    },
+    {
+      "epoch": 0.024224181179674623,
+      "grad_norm": 0.47498688101768494,
+      "learning_rate": 1.927736345590839e-05,
+      "loss": 2.5275,
+      "step": 169
+    },
+    {
+      "epoch": 0.024367519529850213,
+      "grad_norm": 0.5508636832237244,
+      "learning_rate": 1.8078937319026654e-05,
+      "loss": 2.4751,
+      "step": 170
+    },
+    {
+      "epoch": 0.0245108578800258,
+      "grad_norm": 0.4817218780517578,
+      "learning_rate": 1.6916577083458228e-05,
+      "loss": 2.5245,
+      "step": 171
+    },
+    {
+      "epoch": 0.02465419623020139,
+      "grad_norm": 0.4276881217956543,
+      "learning_rate": 1.579060052688548e-05,
+      "loss": 2.403,
+      "step": 172
+    },
+    {
+      "epoch": 0.02479753458037698,
+      "grad_norm": 0.4423224627971649,
+      "learning_rate": 1.4701315480056164e-05,
+      "loss": 2.4871,
+      "step": 173
+    },
+    {
+      "epoch": 0.02494087293055257,
+      "grad_norm": 0.4730720818042755,
+      "learning_rate": 1.3649019742625623e-05,
+      "loss": 2.4419,
+      "step": 174
+    },
+    {
+      "epoch": 0.025084211280728158,
+      "grad_norm": 0.4102948009967804,
+      "learning_rate": 1.2634001001741373e-05,
+      "loss": 2.3216,
+      "step": 175
+    },
+    {
+      "epoch": 0.025084211280728158,
+      "eval_loss": 2.4315061569213867,
+      "eval_runtime": 4.5667,
+      "eval_samples_per_second": 10.949,
+      "eval_steps_per_second": 1.533,
+      "step": 175
+    },
+    {
+      "epoch": 0.025227549630903748,
+      "grad_norm": 0.43688416481018066,
+      "learning_rate": 1.1656536753392287e-05,
+      "loss": 2.3618,
+      "step": 176
+    },
+    {
+      "epoch": 0.025370887981079338,
+      "grad_norm": 0.4183753728866577,
+      "learning_rate": 1.0716894226543953e-05,
+      "loss": 2.4004,
+      "step": 177
+    },
+    {
+      "epoch": 0.025514226331254927,
+      "grad_norm": 0.3798510432243347,
+      "learning_rate": 9.815330310080887e-06,
+      "loss": 2.3351,
+      "step": 178
+    },
+    {
+      "epoch": 0.025657564681430517,
+      "grad_norm": 0.4367026090621948,
+      "learning_rate": 8.952091482575824e-06,
+      "loss": 2.339,
+      "step": 179
+    },
+    {
+      "epoch": 0.025800903031606107,
+      "grad_norm": 0.43516334891319275,
+      "learning_rate": 8.127413744904804e-06,
+      "loss": 2.3865,
+      "step": 180
+    },
+    {
+      "epoch": 0.025944241381781696,
+      "grad_norm": 0.39649835228919983,
+      "learning_rate": 7.34152255572697e-06,
+      "loss": 2.4152,
+      "step": 181
+    },
+    {
+      "epoch": 0.026087579731957286,
+      "grad_norm": 0.4496510326862335,
+      "learning_rate": 6.594632769846353e-06,
+      "loss": 2.4076,
+      "step": 182
+    },
+    {
+      "epoch": 0.026230918082132876,
+      "grad_norm": 0.36271652579307556,
+      "learning_rate": 5.886948579472778e-06,
+      "loss": 2.349,
+      "step": 183
+    },
+    {
+      "epoch": 0.026374256432308466,
+      "grad_norm": 0.43088510632514954,
+      "learning_rate": 5.218663458397715e-06,
+      "loss": 2.412,
+      "step": 184
+    },
+    {
+      "epoch": 0.026517594782484055,
+      "grad_norm": 0.4110885262489319,
+      "learning_rate": 4.589960109100444e-06,
+      "loss": 2.3669,
+      "step": 185
+    },
+    {
+      "epoch": 0.02666093313265964,
+      "grad_norm": 0.44089290499687195,
+      "learning_rate": 4.001010412799138e-06,
+      "loss": 2.3644,
+      "step": 186
+    },
+    {
+      "epoch": 0.02680427148283523,
+      "grad_norm": 0.46191123127937317,
+      "learning_rate": 3.451975382460109e-06,
+      "loss": 2.4576,
+      "step": 187
+    },
+    {
+      "epoch": 0.02694760983301082,
+      "grad_norm": 0.43181681632995605,
+      "learning_rate": 2.9430051187785962e-06,
+      "loss": 2.3365,
+      "step": 188
+    },
+    {
+      "epoch": 0.02709094818318641,
+      "grad_norm": 0.5321618914604187,
+      "learning_rate": 2.4742387691426445e-06,
+      "loss": 2.3837,
+      "step": 189
+    },
+    {
+      "epoch": 0.027234286533362,
+      "grad_norm": 0.4526923894882202,
+      "learning_rate": 2.0458044895916513e-06,
+      "loss": 2.3506,
+      "step": 190
+    },
+    {
+      "epoch": 0.02737762488353759,
+      "grad_norm": 0.42679014801979065,
+      "learning_rate": 1.6578194097797258e-06,
+      "loss": 2.4344,
+      "step": 191
+    },
+    {
+      "epoch": 0.02752096323371318,
+      "grad_norm": 0.5031974911689758,
+      "learning_rate": 1.3103896009537207e-06,
+      "loss": 2.3758,
+      "step": 192
+    },
+    {
+      "epoch": 0.02766430158388877,
+      "grad_norm": 0.4715871214866638,
+      "learning_rate": 1.0036100469542786e-06,
+      "loss": 2.4062,
+      "step": 193
+    },
+    {
+      "epoch": 0.02780763993406436,
+      "grad_norm": 0.5107702612876892,
+      "learning_rate": 7.375646182482875e-07,
+      "loss": 2.4455,
+      "step": 194
+    },
+    {
+      "epoch": 0.02795097828423995,
+      "grad_norm": 0.44611087441444397,
+      "learning_rate": 5.123260489995229e-07,
+      "loss": 2.456,
+      "step": 195
+    },
+    {
+      "epoch": 0.02809431663441554,
+      "grad_norm": 0.4629242718219757,
+      "learning_rate": 3.2795591718381975e-07,
+      "loss": 2.3595,
+      "step": 196
+    },
+    {
+      "epoch": 0.02823765498459113,
+      "grad_norm": 0.4822857975959778,
+      "learning_rate": 1.8450462775428942e-07,
+      "loss": 2.4289,
+      "step": 197
+    },
+    {
+      "epoch": 0.028380993334766718,
+      "grad_norm": 0.4529360234737396,
+      "learning_rate": 8.201139886109264e-08,
+      "loss": 2.3392,
+      "step": 198
+    },
+    {
+      "epoch": 0.028524331684942308,
+      "grad_norm": 0.43792641162872314,
+      "learning_rate": 2.0504251129649374e-08,
+      "loss": 2.4626,
+      "step": 199
+    },
+    {
+      "epoch": 0.028667670035117897,
+      "grad_norm": 0.4136735796928406,
+      "learning_rate": 0.0,
+      "loss": 2.35,
+      "step": 200
+    },
+    {
+      "epoch": 0.028667670035117897,
+      "eval_loss": 2.4329843521118164,
+      "eval_runtime": 4.566,
+      "eval_samples_per_second": 10.95,
+      "eval_steps_per_second": 1.533,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.39939410345984e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null