Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +372 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:85fca7563933c3e18b8617e3f002bbe02e8f791207486aa98af646e097dcbda7
 size 50624

 version https://git-lfs.github.com/spec/v1
+oid sha256:c2b1791d2cf4554bbe7f837e27fa757b435ea8dc634c1bf3c720bdcb3e2f4444
 size 50624

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:af5624d6ca53d1eaa55883aa010611be99360bd545dff58eb71be4a4ab9a05aa
 size 118090

 version https://git-lfs.github.com/spec/v1
+oid sha256:01a0cdb405899bda4a4cfced14d79035c028fb6f09916557d70f3858be02cf30
 size 118090

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3e765160d11b81e65ffe3d78d9c3e2c32c3dd5233c33521084cd844020498388
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:2b1edbffa8e0aab2d3f30726404ff5cc271d4172a0b22705acee5006ea9e1875
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e70710c409284f74d525f8db5cfaccc22a8afd29416f19c595da9242ec92d936
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:fe157715eb8e05b3bab2a7f2fafac33705dc4a1a9dd7f6d860c3a7f9597d78bb
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 10.295632362365723,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.023816493914389628,
   "eval_steps": 25,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1113,6 +1113,372 @@
       "eval_samples_per_second": 537.649,
       "eval_steps_per_second": 75.271,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1136,12 +1502,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 16671404851200.0,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 10.293888092041016,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.03175532521918617,
   "eval_steps": 25,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 537.649,
       "eval_steps_per_second": 75.271,
       "step": 150
+    },
+    {
+      "epoch": 0.02397527054048556,
+      "grad_norm": 0.23319634795188904,
+      "learning_rate": 4.659698863221513e-05,
+      "loss": 10.2971,
+      "step": 151
+    },
+    {
+      "epoch": 0.02413404716658149,
+      "grad_norm": 0.17366944253444672,
+      "learning_rate": 4.481448235912671e-05,
+      "loss": 10.2898,
+      "step": 152
+    },
+    {
+      "epoch": 0.024292823792677422,
+      "grad_norm": 0.1751234531402588,
+      "learning_rate": 4.306073275629044e-05,
+      "loss": 10.2979,
+      "step": 153
+    },
+    {
+      "epoch": 0.02445160041877335,
+      "grad_norm": 0.20233570039272308,
+      "learning_rate": 4.133621928133665e-05,
+      "loss": 10.2901,
+      "step": 154
+    },
+    {
+      "epoch": 0.02461037704486928,
+      "grad_norm": 0.2154175490140915,
+      "learning_rate": 3.964141339903026e-05,
+      "loss": 10.2894,
+      "step": 155
+    },
+    {
+      "epoch": 0.024769153670965213,
+      "grad_norm": 0.15442685782909393,
+      "learning_rate": 3.797677845237696e-05,
+      "loss": 10.297,
+      "step": 156
+    },
+    {
+      "epoch": 0.024927930297061144,
+      "grad_norm": 0.12210218608379364,
+      "learning_rate": 3.634276953594982e-05,
+      "loss": 10.2925,
+      "step": 157
+    },
+    {
+      "epoch": 0.025086706923157075,
+      "grad_norm": 0.10777822881937027,
+      "learning_rate": 3.473983337147118e-05,
+      "loss": 10.2849,
+      "step": 158
+    },
+    {
+      "epoch": 0.025245483549253007,
+      "grad_norm": 0.07246610522270203,
+      "learning_rate": 3.316840818568315e-05,
+      "loss": 10.2933,
+      "step": 159
+    },
+    {
+      "epoch": 0.025404260175348935,
+      "grad_norm": 0.08766282349824905,
+      "learning_rate": 3.162892359054098e-05,
+      "loss": 10.2998,
+      "step": 160
+    },
+    {
+      "epoch": 0.025563036801444866,
+      "grad_norm": 0.06367377191781998,
+      "learning_rate": 3.0121800465761293e-05,
+      "loss": 10.3024,
+      "step": 161
+    },
+    {
+      "epoch": 0.025721813427540798,
+      "grad_norm": 0.10457596927881241,
+      "learning_rate": 2.8647450843757897e-05,
+      "loss": 10.289,
+      "step": 162
+    },
+    {
+      "epoch": 0.02588059005363673,
+      "grad_norm": 0.0702897310256958,
+      "learning_rate": 2.7206277796996144e-05,
+      "loss": 10.3078,
+      "step": 163
+    },
+    {
+      "epoch": 0.02603936667973266,
+      "grad_norm": 0.09472493827342987,
+      "learning_rate": 2.5798675327796993e-05,
+      "loss": 10.2987,
+      "step": 164
+    },
+    {
+      "epoch": 0.026198143305828592,
+      "grad_norm": 0.08356080204248428,
+      "learning_rate": 2.4425028260620715e-05,
+      "loss": 10.3073,
+      "step": 165
+    },
+    {
+      "epoch": 0.026356919931924523,
+      "grad_norm": 0.06730841100215912,
+      "learning_rate": 2.3085712136859668e-05,
+      "loss": 10.3081,
+      "step": 166
+    },
+    {
+      "epoch": 0.02651569655802045,
+      "grad_norm": 0.07436297088861465,
+      "learning_rate": 2.178109311216913e-05,
+      "loss": 10.3148,
+      "step": 167
+    },
+    {
+      "epoch": 0.026674473184116382,
+      "grad_norm": 0.09109427034854889,
+      "learning_rate": 2.0511527856363912e-05,
+      "loss": 10.2942,
+      "step": 168
+    },
+    {
+      "epoch": 0.026833249810212314,
+      "grad_norm": 0.07138719409704208,
+      "learning_rate": 1.927736345590839e-05,
+      "loss": 10.2939,
+      "step": 169
+    },
+    {
+      "epoch": 0.026992026436308245,
+      "grad_norm": 0.0833205133676529,
+      "learning_rate": 1.8078937319026654e-05,
+      "loss": 10.3001,
+      "step": 170
+    },
+    {
+      "epoch": 0.027150803062404177,
+      "grad_norm": 0.08817581087350845,
+      "learning_rate": 1.6916577083458228e-05,
+      "loss": 10.3022,
+      "step": 171
+    },
+    {
+      "epoch": 0.027309579688500108,
+      "grad_norm": 0.08368958532810211,
+      "learning_rate": 1.579060052688548e-05,
+      "loss": 10.3065,
+      "step": 172
+    },
+    {
+      "epoch": 0.027468356314596036,
+      "grad_norm": 0.11247318983078003,
+      "learning_rate": 1.4701315480056164e-05,
+      "loss": 10.3019,
+      "step": 173
+    },
+    {
+      "epoch": 0.027627132940691967,
+      "grad_norm": 0.10502248257398605,
+      "learning_rate": 1.3649019742625623e-05,
+      "loss": 10.3019,
+      "step": 174
+    },
+    {
+      "epoch": 0.0277859095667879,
+      "grad_norm": 0.0866779237985611,
+      "learning_rate": 1.2634001001741373e-05,
+      "loss": 10.3097,
+      "step": 175
+    },
+    {
+      "epoch": 0.0277859095667879,
+      "eval_loss": 10.29531478881836,
+      "eval_runtime": 0.0934,
+      "eval_samples_per_second": 535.372,
+      "eval_steps_per_second": 74.952,
+      "step": 175
+    },
+    {
+      "epoch": 0.02794468619288383,
+      "grad_norm": 0.0854175016283989,
+      "learning_rate": 1.1656536753392287e-05,
+      "loss": 10.3084,
+      "step": 176
+    },
+    {
+      "epoch": 0.02810346281897976,
+      "grad_norm": 0.07053595036268234,
+      "learning_rate": 1.0716894226543953e-05,
+      "loss": 10.3021,
+      "step": 177
+    },
+    {
+      "epoch": 0.028262239445075693,
+      "grad_norm": 0.07512841373682022,
+      "learning_rate": 9.815330310080887e-06,
+      "loss": 10.3039,
+      "step": 178
+    },
+    {
+      "epoch": 0.02842101607117162,
+      "grad_norm": 0.06513608247041702,
+      "learning_rate": 8.952091482575824e-06,
+      "loss": 10.2931,
+      "step": 179
+    },
+    {
+      "epoch": 0.028579792697267552,
+      "grad_norm": 0.0893177017569542,
+      "learning_rate": 8.127413744904804e-06,
+      "loss": 10.2949,
+      "step": 180
+    },
+    {
+      "epoch": 0.028738569323363484,
+      "grad_norm": 0.0912618339061737,
+      "learning_rate": 7.34152255572697e-06,
+      "loss": 10.2961,
+      "step": 181
+    },
+    {
+      "epoch": 0.028897345949459415,
+      "grad_norm": 0.07610266655683517,
+      "learning_rate": 6.594632769846353e-06,
+      "loss": 10.3046,
+      "step": 182
+    },
+    {
+      "epoch": 0.029056122575555347,
+      "grad_norm": 0.07157953828573227,
+      "learning_rate": 5.886948579472778e-06,
+      "loss": 10.3041,
+      "step": 183
+    },
+    {
+      "epoch": 0.029214899201651278,
+      "grad_norm": 0.07713142782449722,
+      "learning_rate": 5.218663458397715e-06,
+      "loss": 10.2997,
+      "step": 184
+    },
+    {
+      "epoch": 0.02937367582774721,
+      "grad_norm": 0.07938244193792343,
+      "learning_rate": 4.589960109100444e-06,
+      "loss": 10.3044,
+      "step": 185
+    },
+    {
+      "epoch": 0.029532452453843137,
+      "grad_norm": 0.11273758113384247,
+      "learning_rate": 4.001010412799138e-06,
+      "loss": 10.306,
+      "step": 186
+    },
+    {
+      "epoch": 0.02969122907993907,
+      "grad_norm": 0.08427654206752777,
+      "learning_rate": 3.451975382460109e-06,
+      "loss": 10.3039,
+      "step": 187
+    },
+    {
+      "epoch": 0.029850005706035,
+      "grad_norm": 0.09040991216897964,
+      "learning_rate": 2.9430051187785962e-06,
+      "loss": 10.2942,
+      "step": 188
+    },
+    {
+      "epoch": 0.03000878233213093,
+      "grad_norm": 0.0981961265206337,
+      "learning_rate": 2.4742387691426445e-06,
+      "loss": 10.296,
+      "step": 189
+    },
+    {
+      "epoch": 0.030167558958226863,
+      "grad_norm": 0.06700126826763153,
+      "learning_rate": 2.0458044895916513e-06,
+      "loss": 10.3013,
+      "step": 190
+    },
+    {
+      "epoch": 0.030326335584322794,
+      "grad_norm": 0.07272301614284515,
+      "learning_rate": 1.6578194097797258e-06,
+      "loss": 10.2967,
+      "step": 191
+    },
+    {
+      "epoch": 0.030485112210418722,
+      "grad_norm": 0.10850653797388077,
+      "learning_rate": 1.3103896009537207e-06,
+      "loss": 10.2957,
+      "step": 192
+    },
+    {
+      "epoch": 0.030643888836514654,
+      "grad_norm": 0.08605331182479858,
+      "learning_rate": 1.0036100469542786e-06,
+      "loss": 10.2992,
+      "step": 193
+    },
+    {
+      "epoch": 0.030802665462610585,
+      "grad_norm": 0.08742884546518326,
+      "learning_rate": 7.375646182482875e-07,
+      "loss": 10.2881,
+      "step": 194
+    },
+    {
+      "epoch": 0.030961442088706517,
+      "grad_norm": 0.12425903230905533,
+      "learning_rate": 5.123260489995229e-07,
+      "loss": 10.3004,
+      "step": 195
+    },
+    {
+      "epoch": 0.031120218714802448,
+      "grad_norm": 0.14287036657333374,
+      "learning_rate": 3.2795591718381975e-07,
+      "loss": 10.2931,
+      "step": 196
+    },
+    {
+      "epoch": 0.031278995340898376,
+      "grad_norm": 0.10366120934486389,
+      "learning_rate": 1.8450462775428942e-07,
+      "loss": 10.2972,
+      "step": 197
+    },
+    {
+      "epoch": 0.03143777196699431,
+      "grad_norm": 0.14705786108970642,
+      "learning_rate": 8.201139886109264e-08,
+      "loss": 10.2905,
+      "step": 198
+    },
+    {
+      "epoch": 0.03159654859309024,
+      "grad_norm": 0.10673689842224121,
+      "learning_rate": 2.0504251129649374e-08,
+      "loss": 10.288,
+      "step": 199
+    },
+    {
+      "epoch": 0.03175532521918617,
+      "grad_norm": 0.14931601285934448,
+      "learning_rate": 0.0,
+      "loss": 10.2792,
+      "step": 200
+    },
+    {
+      "epoch": 0.03175532521918617,
+      "eval_loss": 10.293888092041016,
+      "eval_runtime": 0.0926,
+      "eval_samples_per_second": 539.938,
+      "eval_steps_per_second": 75.591,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 22228539801600.0,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null