Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +362 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cb024c15a7553c9000020231a7cec1cd8bffb5ddfdf61368baa8efa85f70b5c8
 size 81576

 version https://git-lfs.github.com/spec/v1
+oid sha256:3a583b8747eb17334382f8aaa2f03ee21bb91655438cbe70bad9246f230fba32
 size 81576

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9f11ffbb6b1be04e1d893c75bb30bad865cd6cb3ea9e1851a5b4654d086d6a4c
 size 173094

 version https://git-lfs.github.com/spec/v1
+oid sha256:675a597eb1945bf59497ad766e676f886be84d7c1135ecdbbf982d265a485f1d
 size 173094

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a2fb8dc2a85d0cc09b3d729bf71a8da974bf095341dbe298d83978634de38d32
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:982eb43f0b685032f5d229850fc4c6116b1560071673bb981b13063a46a6b3e1
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fd5d42bb0afda20ec4c83d38c6af1131541c335ecab229c74e7f418894f3c13b
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:ca9a25c72339c898b564e0c464a3f6fc75bbeec408008928b7ed05533156b98c
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.10131712259371833,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 276.357,
       "eval_steps_per_second": 138.179,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1103,12 +1461,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 24608872857600.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.13508949679162446,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 276.357,
       "eval_steps_per_second": 138.179,
       "step": 150
+    },
+    {
+      "epoch": 0.10199257007767647,
+      "grad_norm": 0.1630678027868271,
+      "learning_rate": 3.106465908814342e-05,
+      "loss": 10.6025,
+      "step": 151
+    },
+    {
+      "epoch": 0.10266801756163459,
+      "grad_norm": 0.15854433178901672,
+      "learning_rate": 2.9876321572751144e-05,
+      "loss": 10.589,
+      "step": 152
+    },
+    {
+      "epoch": 0.1033434650455927,
+      "grad_norm": 0.15652941167354584,
+      "learning_rate": 2.87071551708603e-05,
+      "loss": 10.6022,
+      "step": 153
+    },
+    {
+      "epoch": 0.10401891252955082,
+      "grad_norm": 0.14386983215808868,
+      "learning_rate": 2.7557479520891104e-05,
+      "loss": 10.5974,
+      "step": 154
+    },
+    {
+      "epoch": 0.10469436001350894,
+      "grad_norm": 0.17749740183353424,
+      "learning_rate": 2.6427608932686843e-05,
+      "loss": 10.6112,
+      "step": 155
+    },
+    {
+      "epoch": 0.10536980749746708,
+      "grad_norm": 0.1529720574617386,
+      "learning_rate": 2.5317852301584643e-05,
+      "loss": 10.6075,
+      "step": 156
+    },
+    {
+      "epoch": 0.1060452549814252,
+      "grad_norm": 0.16465173661708832,
+      "learning_rate": 2.422851302396655e-05,
+      "loss": 10.5917,
+      "step": 157
+    },
+    {
+      "epoch": 0.10672070246538332,
+      "grad_norm": 0.148993581533432,
+      "learning_rate": 2.315988891431412e-05,
+      "loss": 10.5951,
+      "step": 158
+    },
+    {
+      "epoch": 0.10739614994934144,
+      "grad_norm": 0.17219752073287964,
+      "learning_rate": 2.2112272123788768e-05,
+      "loss": 10.5877,
+      "step": 159
+    },
+    {
+      "epoch": 0.10807159743329955,
+      "grad_norm": 0.17853744328022003,
+      "learning_rate": 2.1085949060360654e-05,
+      "loss": 10.5945,
+      "step": 160
+    },
+    {
+      "epoch": 0.10874704491725769,
+      "grad_norm": 0.16907915472984314,
+      "learning_rate": 2.008120031050753e-05,
+      "loss": 10.5967,
+      "step": 161
+    },
+    {
+      "epoch": 0.1094224924012158,
+      "grad_norm": 0.18723782896995544,
+      "learning_rate": 1.9098300562505266e-05,
+      "loss": 10.5955,
+      "step": 162
+    },
+    {
+      "epoch": 0.11009793988517393,
+      "grad_norm": 0.18115279078483582,
+      "learning_rate": 1.8137518531330767e-05,
+      "loss": 10.6003,
+      "step": 163
+    },
+    {
+      "epoch": 0.11077338736913205,
+      "grad_norm": 0.14582610130310059,
+      "learning_rate": 1.7199116885197995e-05,
+      "loss": 10.6084,
+      "step": 164
+    },
+    {
+      "epoch": 0.11144883485309018,
+      "grad_norm": 0.16853328049182892,
+      "learning_rate": 1.6283352173747145e-05,
+      "loss": 10.6109,
+      "step": 165
+    },
+    {
+      "epoch": 0.1121242823370483,
+      "grad_norm": 0.16580811142921448,
+      "learning_rate": 1.5390474757906446e-05,
+      "loss": 10.6031,
+      "step": 166
+    },
+    {
+      "epoch": 0.11279972982100642,
+      "grad_norm": 0.1782258301973343,
+      "learning_rate": 1.4520728741446089e-05,
+      "loss": 10.5978,
+      "step": 167
+    },
+    {
+      "epoch": 0.11347517730496454,
+      "grad_norm": 0.16206084191799164,
+      "learning_rate": 1.3674351904242611e-05,
+      "loss": 10.5997,
+      "step": 168
+    },
+    {
+      "epoch": 0.11415062478892266,
+      "grad_norm": 0.1567702740430832,
+      "learning_rate": 1.2851575637272262e-05,
+      "loss": 10.6037,
+      "step": 169
+    },
+    {
+      "epoch": 0.11482607227288079,
+      "grad_norm": 0.174870565533638,
+      "learning_rate": 1.2052624879351104e-05,
+      "loss": 10.5948,
+      "step": 170
+    },
+    {
+      "epoch": 0.11550151975683891,
+      "grad_norm": 0.15422426164150238,
+      "learning_rate": 1.1277718055638819e-05,
+      "loss": 10.5984,
+      "step": 171
+    },
+    {
+      "epoch": 0.11617696724079703,
+      "grad_norm": 0.17559370398521423,
+      "learning_rate": 1.0527067017923654e-05,
+      "loss": 10.5945,
+      "step": 172
+    },
+    {
+      "epoch": 0.11685241472475515,
+      "grad_norm": 0.1844927966594696,
+      "learning_rate": 9.80087698670411e-06,
+      "loss": 10.5987,
+      "step": 173
+    },
+    {
+      "epoch": 0.11752786220871327,
+      "grad_norm": 0.16079100966453552,
+      "learning_rate": 9.09934649508375e-06,
+      "loss": 10.6051,
+      "step": 174
+    },
+    {
+      "epoch": 0.1182033096926714,
+      "grad_norm": 0.16014216840267181,
+      "learning_rate": 8.422667334494249e-06,
+      "loss": 10.596,
+      "step": 175
+    },
+    {
+      "epoch": 0.11887875717662952,
+      "grad_norm": 0.16521821916103363,
+      "learning_rate": 7.771024502261526e-06,
+      "loss": 10.6092,
+      "step": 176
+    },
+    {
+      "epoch": 0.11955420466058764,
+      "grad_norm": 0.1629599630832672,
+      "learning_rate": 7.144596151029303e-06,
+      "loss": 10.5984,
+      "step": 177
+    },
+    {
+      "epoch": 0.12022965214454576,
+      "grad_norm": 0.16382640600204468,
+      "learning_rate": 6.543553540053926e-06,
+      "loss": 10.5851,
+      "step": 178
+    },
+    {
+      "epoch": 0.12090509962850389,
+      "grad_norm": 0.15883676707744598,
+      "learning_rate": 5.968060988383883e-06,
+      "loss": 10.5956,
+      "step": 179
+    },
+    {
+      "epoch": 0.12158054711246201,
+      "grad_norm": 0.184475377202034,
+      "learning_rate": 5.418275829936537e-06,
+      "loss": 10.589,
+      "step": 180
+    },
+    {
+      "epoch": 0.12225599459642013,
+      "grad_norm": 0.19880411028862,
+      "learning_rate": 4.8943483704846475e-06,
+      "loss": 10.581,
+      "step": 181
+    },
+    {
+      "epoch": 0.12293144208037825,
+      "grad_norm": 0.16972365975379944,
+      "learning_rate": 4.3964218465642355e-06,
+      "loss": 10.5909,
+      "step": 182
+    },
+    {
+      "epoch": 0.12360688956433637,
+      "grad_norm": 0.17554166913032532,
+      "learning_rate": 3.924632386315186e-06,
+      "loss": 10.5855,
+      "step": 183
+    },
+    {
+      "epoch": 0.1242823370482945,
+      "grad_norm": 0.15895400941371918,
+      "learning_rate": 3.4791089722651436e-06,
+      "loss": 10.6027,
+      "step": 184
+    },
+    {
+      "epoch": 0.12495778453225262,
+      "grad_norm": 0.15381865203380585,
+      "learning_rate": 3.059973406066963e-06,
+      "loss": 10.6043,
+      "step": 185
+    },
+    {
+      "epoch": 0.12563323201621074,
+      "grad_norm": 0.1588447093963623,
+      "learning_rate": 2.667340275199426e-06,
+      "loss": 10.5858,
+      "step": 186
+    },
+    {
+      "epoch": 0.12630867950016886,
+      "grad_norm": 0.170160710811615,
+      "learning_rate": 2.3013169216400733e-06,
+      "loss": 10.5963,
+      "step": 187
+    },
+    {
+      "epoch": 0.12698412698412698,
+      "grad_norm": 0.19386546313762665,
+      "learning_rate": 1.9620034125190644e-06,
+      "loss": 10.5906,
+      "step": 188
+    },
+    {
+      "epoch": 0.1276595744680851,
+      "grad_norm": 0.15733763575553894,
+      "learning_rate": 1.6494925127617634e-06,
+      "loss": 10.5984,
+      "step": 189
+    },
+    {
+      "epoch": 0.12833502195204322,
+      "grad_norm": 0.1530665159225464,
+      "learning_rate": 1.3638696597277679e-06,
+      "loss": 10.5911,
+      "step": 190
+    },
+    {
+      "epoch": 0.12901046943600136,
+      "grad_norm": 0.1876905858516693,
+      "learning_rate": 1.1052129398531507e-06,
+      "loss": 10.6023,
+      "step": 191
+    },
+    {
+      "epoch": 0.12968591691995948,
+      "grad_norm": 0.1700376272201538,
+      "learning_rate": 8.735930673024806e-07,
+      "loss": 10.5979,
+      "step": 192
+    },
+    {
+      "epoch": 0.1303613644039176,
+      "grad_norm": 0.18517427146434784,
+      "learning_rate": 6.690733646361857e-07,
+      "loss": 10.5894,
+      "step": 193
+    },
+    {
+      "epoch": 0.13103681188787572,
+      "grad_norm": 0.16964897513389587,
+      "learning_rate": 4.917097454988584e-07,
+      "loss": 10.5996,
+      "step": 194
+    },
+    {
+      "epoch": 0.13171225937183384,
+      "grad_norm": 0.1931840479373932,
+      "learning_rate": 3.415506993330153e-07,
+      "loss": 10.5854,
+      "step": 195
+    },
+    {
+      "epoch": 0.13238770685579196,
+      "grad_norm": 0.15033087134361267,
+      "learning_rate": 2.1863727812254653e-07,
+      "loss": 10.6238,
+      "step": 196
+    },
+    {
+      "epoch": 0.13306315433975008,
+      "grad_norm": 0.1763419657945633,
+      "learning_rate": 1.230030851695263e-07,
+      "loss": 10.5943,
+      "step": 197
+    },
+    {
+      "epoch": 0.1337386018237082,
+      "grad_norm": 0.17355754971504211,
+      "learning_rate": 5.467426590739511e-08,
+      "loss": 10.5912,
+      "step": 198
+    },
+    {
+      "epoch": 0.13441404930766632,
+      "grad_norm": 0.1702238917350769,
+      "learning_rate": 1.3669500753099585e-08,
+      "loss": 10.6278,
+      "step": 199
+    },
+    {
+      "epoch": 0.13508949679162446,
+      "grad_norm": 0.15678980946540833,
+      "learning_rate": 0.0,
+      "loss": 10.5993,
+      "step": 200
+    },
+    {
+      "epoch": 0.13508949679162446,
+      "eval_loss": 10.595317840576172,
+      "eval_runtime": 2.2488,
+      "eval_samples_per_second": 277.475,
+      "eval_steps_per_second": 138.738,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 32811830476800.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null