Training in progress, step 100, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e33ea43f9beb2291bf71df807ff47d3a72df49f4c4c7a14c248564945689fbab
 size 30322120

 version https://git-lfs.github.com/spec/v1
+oid sha256:5cfbe704ce4276472f1f20979a6ea0043f95207eac2138f2d66cab213105345f
 size 30322120

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6f356c448a2f7dbaa0068d81e6808e9d0f1590d6837289593aed4c0c642f3a9e
 size 60837186

 version https://git-lfs.github.com/spec/v1
+oid sha256:2876946d36e7a06fa92ac4d7fe33adc6e244c5e621bbe68dcdbde5d691b7cb4d
 size 60837186

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4341bcfa5b25340682a457b6fcbb20cc8c5b2e7d8edaf7fad67bc2dc77fd767d
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:af6a4b820f4231a631cf38cad86ec74ec053a4da9eb416b7574c925fb8aa6ddc
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1900bd8021f13c38b942ed30aea6e2cea1b47664e4ce28d0276b142334732307
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:f2c4a11c3ec7ace2e963dc6e2b0b5b6372cc0250cefb36d5f7289475908638cb
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 4.675076484680176,
-  "best_model_checkpoint": "miner_id_24/checkpoint-50",
-  "epoch": 0.01897083234526915,
   "eval_steps": 50,
-  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -373,6 +373,364 @@
       "eval_samples_per_second": 63.413,
       "eval_steps_per_second": 15.857,
       "step": 50
     }
   ],
   "logging_steps": 1,
@@ -396,12 +754,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 3106157258342400.0,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 4.415204048156738,
+  "best_model_checkpoint": "miner_id_24/checkpoint-100",
+  "epoch": 0.0379416646905383,
   "eval_steps": 50,
+  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 63.413,
       "eval_steps_per_second": 15.857,
       "step": 50
+    },
+    {
+      "epoch": 0.01935024899217453,
+      "grad_norm": 3.7669692039489746,
+      "learning_rate": 2.847932752400164e-06,
+      "loss": 4.0896,
+      "step": 51
+    },
+    {
+      "epoch": 0.019729665639079914,
+      "grad_norm": 4.779418468475342,
+      "learning_rate": 2.761321158169134e-06,
+      "loss": 3.8922,
+      "step": 52
+    },
+    {
+      "epoch": 0.0201090822859853,
+      "grad_norm": 5.031223297119141,
+      "learning_rate": 2.6743911843603134e-06,
+      "loss": 4.1811,
+      "step": 53
+    },
+    {
+      "epoch": 0.020488498932890682,
+      "grad_norm": 4.953243255615234,
+      "learning_rate": 2.587248741756253e-06,
+      "loss": 3.7881,
+      "step": 54
+    },
+    {
+      "epoch": 0.020867915579796063,
+      "grad_norm": 5.7064208984375,
+      "learning_rate": 2.5e-06,
+      "loss": 4.0777,
+      "step": 55
+    },
+    {
+      "epoch": 0.021247332226701447,
+      "grad_norm": 5.718916893005371,
+      "learning_rate": 2.4127512582437486e-06,
+      "loss": 4.1119,
+      "step": 56
+    },
+    {
+      "epoch": 0.02162674887360683,
+      "grad_norm": 6.402388572692871,
+      "learning_rate": 2.325608815639687e-06,
+      "loss": 4.0965,
+      "step": 57
+    },
+    {
+      "epoch": 0.02200616552051221,
+      "grad_norm": 6.114238739013672,
+      "learning_rate": 2.238678841830867e-06,
+      "loss": 4.1586,
+      "step": 58
+    },
+    {
+      "epoch": 0.022385582167417595,
+      "grad_norm": 5.461117267608643,
+      "learning_rate": 2.1520672475998374e-06,
+      "loss": 3.6969,
+      "step": 59
+    },
+    {
+      "epoch": 0.02276499881432298,
+      "grad_norm": 6.01952600479126,
+      "learning_rate": 2.0658795558326745e-06,
+      "loss": 4.0374,
+      "step": 60
+    },
+    {
+      "epoch": 0.023144415461228363,
+      "grad_norm": 6.108837604522705,
+      "learning_rate": 1.9802207729556023e-06,
+      "loss": 4.3124,
+      "step": 61
+    },
+    {
+      "epoch": 0.023523832108133743,
+      "grad_norm": 6.704663276672363,
+      "learning_rate": 1.895195261000831e-06,
+      "loss": 3.8151,
+      "step": 62
+    },
+    {
+      "epoch": 0.023903248755039127,
+      "grad_norm": 6.462284088134766,
+      "learning_rate": 1.8109066104575023e-06,
+      "loss": 4.3228,
+      "step": 63
+    },
+    {
+      "epoch": 0.02428266540194451,
+      "grad_norm": 7.066595554351807,
+      "learning_rate": 1.7274575140626318e-06,
+      "loss": 4.1603,
+      "step": 64
+    },
+    {
+      "epoch": 0.024662082048849895,
+      "grad_norm": 6.231473922729492,
+      "learning_rate": 1.6449496416858285e-06,
+      "loss": 4.3381,
+      "step": 65
+    },
+    {
+      "epoch": 0.025041498695755275,
+      "grad_norm": 6.865777492523193,
+      "learning_rate": 1.56348351646022e-06,
+      "loss": 3.7183,
+      "step": 66
+    },
+    {
+      "epoch": 0.02542091534266066,
+      "grad_norm": 6.609790802001953,
+      "learning_rate": 1.4831583923105e-06,
+      "loss": 4.5205,
+      "step": 67
+    },
+    {
+      "epoch": 0.025800331989566043,
+      "grad_norm": 6.334497928619385,
+      "learning_rate": 1.4040721330273063e-06,
+      "loss": 4.3837,
+      "step": 68
+    },
+    {
+      "epoch": 0.026179748636471427,
+      "grad_norm": 7.466740608215332,
+      "learning_rate": 1.3263210930352737e-06,
+      "loss": 4.3184,
+      "step": 69
+    },
+    {
+      "epoch": 0.026559165283376807,
+      "grad_norm": 7.18544340133667,
+      "learning_rate": 1.2500000000000007e-06,
+      "loss": 4.3352,
+      "step": 70
+    },
+    {
+      "epoch": 0.02693858193028219,
+      "grad_norm": 6.762673377990723,
+      "learning_rate": 1.1752018394169882e-06,
+      "loss": 4.504,
+      "step": 71
+    },
+    {
+      "epoch": 0.027317998577187575,
+      "grad_norm": 5.527563095092773,
+      "learning_rate": 1.1020177413231334e-06,
+      "loss": 4.1255,
+      "step": 72
+    },
+    {
+      "epoch": 0.027697415224092956,
+      "grad_norm": 6.735006332397461,
+      "learning_rate": 1.0305368692688175e-06,
+      "loss": 4.3378,
+      "step": 73
+    },
+    {
+      "epoch": 0.02807683187099834,
+      "grad_norm": 6.313520431518555,
+      "learning_rate": 9.608463116858544e-07,
+      "loss": 3.8856,
+      "step": 74
+    },
+    {
+      "epoch": 0.028456248517903723,
+      "grad_norm": 6.748076915740967,
+      "learning_rate": 8.930309757836517e-07,
+      "loss": 4.1941,
+      "step": 75
+    },
+    {
+      "epoch": 0.028835665164809107,
+      "grad_norm": 7.661444187164307,
+      "learning_rate": 8.271734841028553e-07,
+      "loss": 4.2481,
+      "step": 76
+    },
+    {
+      "epoch": 0.029215081811714488,
+      "grad_norm": 7.217004299163818,
+      "learning_rate": 7.633540738525066e-07,
+      "loss": 4.3925,
+      "step": 77
+    },
+    {
+      "epoch": 0.02959449845861987,
+      "grad_norm": 7.152622222900391,
+      "learning_rate": 7.016504991533727e-07,
+      "loss": 3.8941,
+      "step": 78
+    },
+    {
+      "epoch": 0.029973915105525255,
+      "grad_norm": 6.765809059143066,
+      "learning_rate": 6.421379363065142e-07,
+      "loss": 4.5282,
+      "step": 79
+    },
+    {
+      "epoch": 0.03035333175243064,
+      "grad_norm": 6.723090171813965,
+      "learning_rate": 5.848888922025553e-07,
+      "loss": 4.3739,
+      "step": 80
+    },
+    {
+      "epoch": 0.03073274839933602,
+      "grad_norm": 7.00840950012207,
+      "learning_rate": 5.299731159831953e-07,
+      "loss": 4.2958,
+      "step": 81
+    },
+    {
+      "epoch": 0.031112165046241404,
+      "grad_norm": 6.291935443878174,
+      "learning_rate": 4.774575140626317e-07,
+      "loss": 3.9951,
+      "step": 82
+    },
+    {
+      "epoch": 0.03149158169314679,
+      "grad_norm": 6.735934734344482,
+      "learning_rate": 4.27406068612396e-07,
+      "loss": 4.4475,
+      "step": 83
+    },
+    {
+      "epoch": 0.03187099834005217,
+      "grad_norm": 7.057698726654053,
+      "learning_rate": 3.798797596089351e-07,
+      "loss": 4.248,
+      "step": 84
+    },
+    {
+      "epoch": 0.032250414986957555,
+      "grad_norm": 7.530543804168701,
+      "learning_rate": 3.3493649053890325e-07,
+      "loss": 4.8054,
+      "step": 85
+    },
+    {
+      "epoch": 0.03262983163386293,
+      "grad_norm": 11.086931228637695,
+      "learning_rate": 2.9263101785268253e-07,
+      "loss": 4.5513,
+      "step": 86
+    },
+    {
+      "epoch": 0.033009248280768316,
+      "grad_norm": 6.664738655090332,
+      "learning_rate": 2.53014884252083e-07,
+      "loss": 4.3811,
+      "step": 87
+    },
+    {
+      "epoch": 0.0333886649276737,
+      "grad_norm": 7.505542278289795,
+      "learning_rate": 2.1613635589349756e-07,
+      "loss": 4.5977,
+      "step": 88
+    },
+    {
+      "epoch": 0.033768081574579084,
+      "grad_norm": 7.716992378234863,
+      "learning_rate": 1.8204036358303173e-07,
+      "loss": 4.8648,
+      "step": 89
+    },
+    {
+      "epoch": 0.03414749822148447,
+      "grad_norm": 9.030020713806152,
+      "learning_rate": 1.507684480352292e-07,
+      "loss": 4.5738,
+      "step": 90
+    },
+    {
+      "epoch": 0.03452691486838985,
+      "grad_norm": 7.875784873962402,
+      "learning_rate": 1.223587092621162e-07,
+      "loss": 4.5702,
+      "step": 91
+    },
+    {
+      "epoch": 0.034906331515295236,
+      "grad_norm": 6.821437358856201,
+      "learning_rate": 9.684576015420277e-08,
+      "loss": 4.7577,
+      "step": 92
+    },
+    {
+      "epoch": 0.03528574816220062,
+      "grad_norm": 7.495672225952148,
+      "learning_rate": 7.426068431000883e-08,
+      "loss": 4.5927,
+      "step": 93
+    },
+    {
+      "epoch": 0.035665164809106,
+      "grad_norm": 8.056374549865723,
+      "learning_rate": 5.463099816548578e-08,
+      "loss": 4.8237,
+      "step": 94
+    },
+    {
+      "epoch": 0.03604458145601138,
+      "grad_norm": 8.059727668762207,
+      "learning_rate": 3.798061746947995e-08,
+      "loss": 5.0902,
+      "step": 95
+    },
+    {
+      "epoch": 0.036423998102916764,
+      "grad_norm": 8.202387809753418,
+      "learning_rate": 2.4329828146074096e-08,
+      "loss": 4.8697,
+      "step": 96
+    },
+    {
+      "epoch": 0.03680341474982215,
+      "grad_norm": 8.158020973205566,
+      "learning_rate": 1.3695261579316776e-08,
+      "loss": 4.7059,
+      "step": 97
+    },
+    {
+      "epoch": 0.03718283139672753,
+      "grad_norm": 8.748244285583496,
+      "learning_rate": 6.089874350439507e-09,
+      "loss": 4.7764,
+      "step": 98
+    },
+    {
+      "epoch": 0.037562248043632916,
+      "grad_norm": 8.805473327636719,
+      "learning_rate": 1.5229324522605949e-09,
+      "loss": 5.3301,
+      "step": 99
+    },
+    {
+      "epoch": 0.0379416646905383,
+      "grad_norm": 8.97905158996582,
+      "learning_rate": 0.0,
+      "loss": 5.5488,
+      "step": 100
+    },
+    {
+      "epoch": 0.0379416646905383,
+      "eval_loss": 4.415204048156738,
+      "eval_runtime": 70.0282,
+      "eval_samples_per_second": 63.389,
+      "eval_steps_per_second": 15.851,
+      "step": 100
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 6212314516684800.0,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null