Training in progress, step 361, checkpoint

Browse files

Files changed (4) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +500 -3

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:042b5a0f639dd973e6c738774c646addbaf503d17918c491b47ad403e0b2a39c
 size 100198584

 version https://git-lfs.github.com/spec/v1
+oid sha256:8095535d287a5363ea56d6e1a990b793651e0842f91f0e62b2cde7ad796e3c97
 size 100198584

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:50d1debf1340029cef2e33428b5e5847c84e592717f751c91397f761ad1a0810
 size 50675604

 version https://git-lfs.github.com/spec/v1
+oid sha256:165bc7411b625ca298d13e3c88f8745dc09872293291b602919d21265ad0ba3b
 size 50675604

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a4f4a65b0aac0f626b42dcb31648d8e84043a592009a3ff87291b198bc8baeca
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:84cd725a97db95f8de6db3eaffde1002a056efc831c1635a52e2a74854a937e4
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.42370559766231397,
   "eval_steps": 500,
-  "global_step": 290,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -2037,6 +2037,503 @@
       "learning_rate": 1.3493150684931508e-05,
       "loss": 1.1981,
       "step": 290
     }
   ],
   "logging_steps": 1,
@@ -2056,7 +2553,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 3.2650544243205734e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.5274404164003287,
   "eval_steps": 500,
+  "global_step": 361,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 1.3493150684931508e-05,
       "loss": 1.1981,
       "step": 290
+    },
+    {
+      "epoch": 0.4251666514473564,
+      "grad_norm": 0.09126268327236176,
+      "learning_rate": 1.3458904109589042e-05,
+      "loss": 1.2296,
+      "step": 291
+    },
+    {
+      "epoch": 0.42662770523239885,
+      "grad_norm": 0.09404008835554123,
+      "learning_rate": 1.3424657534246576e-05,
+      "loss": 1.3013,
+      "step": 292
+    },
+    {
+      "epoch": 0.4280887590174413,
+      "grad_norm": 0.09837393462657928,
+      "learning_rate": 1.339041095890411e-05,
+      "loss": 1.2102,
+      "step": 293
+    },
+    {
+      "epoch": 0.4295498128024838,
+      "grad_norm": 0.0978068932890892,
+      "learning_rate": 1.3356164383561646e-05,
+      "loss": 1.1995,
+      "step": 294
+    },
+    {
+      "epoch": 0.43101086658752624,
+      "grad_norm": 0.09333440661430359,
+      "learning_rate": 1.332191780821918e-05,
+      "loss": 1.2284,
+      "step": 295
+    },
+    {
+      "epoch": 0.4324719203725687,
+      "grad_norm": 0.10575850307941437,
+      "learning_rate": 1.3287671232876714e-05,
+      "loss": 1.1442,
+      "step": 296
+    },
+    {
+      "epoch": 0.4339329741576112,
+      "grad_norm": 0.10079637169837952,
+      "learning_rate": 1.3253424657534248e-05,
+      "loss": 1.2114,
+      "step": 297
+    },
+    {
+      "epoch": 0.43539402794265364,
+      "grad_norm": 0.10534002631902695,
+      "learning_rate": 1.3219178082191781e-05,
+      "loss": 1.1894,
+      "step": 298
+    },
+    {
+      "epoch": 0.4368550817276961,
+      "grad_norm": 0.09364209324121475,
+      "learning_rate": 1.3184931506849316e-05,
+      "loss": 1.1565,
+      "step": 299
+    },
+    {
+      "epoch": 0.43831613551273857,
+      "grad_norm": 0.09970992058515549,
+      "learning_rate": 1.3150684931506849e-05,
+      "loss": 1.234,
+      "step": 300
+    },
+    {
+      "epoch": 0.43977718929778103,
+      "grad_norm": 0.10115770250558853,
+      "learning_rate": 1.3116438356164385e-05,
+      "loss": 1.2113,
+      "step": 301
+    },
+    {
+      "epoch": 0.4412382430828235,
+      "grad_norm": 0.10583057254552841,
+      "learning_rate": 1.3082191780821919e-05,
+      "loss": 1.1469,
+      "step": 302
+    },
+    {
+      "epoch": 0.44269929686786597,
+      "grad_norm": 0.10508885979652405,
+      "learning_rate": 1.3047945205479453e-05,
+      "loss": 1.1648,
+      "step": 303
+    },
+    {
+      "epoch": 0.44416035065290843,
+      "grad_norm": 0.09814278036355972,
+      "learning_rate": 1.3013698630136988e-05,
+      "loss": 1.1365,
+      "step": 304
+    },
+    {
+      "epoch": 0.4456214044379509,
+      "grad_norm": 0.0985088050365448,
+      "learning_rate": 1.2979452054794521e-05,
+      "loss": 1.2589,
+      "step": 305
+    },
+    {
+      "epoch": 0.4470824582229933,
+      "grad_norm": 0.10917991399765015,
+      "learning_rate": 1.2945205479452056e-05,
+      "loss": 1.2256,
+      "step": 306
+    },
+    {
+      "epoch": 0.44854351200803577,
+      "grad_norm": 0.10406248271465302,
+      "learning_rate": 1.2910958904109589e-05,
+      "loss": 1.2597,
+      "step": 307
+    },
+    {
+      "epoch": 0.45000456579307824,
+      "grad_norm": 0.09909600764513016,
+      "learning_rate": 1.2876712328767125e-05,
+      "loss": 1.2125,
+      "step": 308
+    },
+    {
+      "epoch": 0.4514656195781207,
+      "grad_norm": 0.10518831014633179,
+      "learning_rate": 1.2842465753424658e-05,
+      "loss": 1.1912,
+      "step": 309
+    },
+    {
+      "epoch": 0.45292667336316317,
+      "grad_norm": 0.09970947355031967,
+      "learning_rate": 1.2808219178082193e-05,
+      "loss": 1.207,
+      "step": 310
+    },
+    {
+      "epoch": 0.45438772714820563,
+      "grad_norm": 0.10252334922552109,
+      "learning_rate": 1.2773972602739728e-05,
+      "loss": 1.2133,
+      "step": 311
+    },
+    {
+      "epoch": 0.4558487809332481,
+      "grad_norm": 0.10719390958547592,
+      "learning_rate": 1.273972602739726e-05,
+      "loss": 1.146,
+      "step": 312
+    },
+    {
+      "epoch": 0.45730983471829056,
+      "grad_norm": 0.11196637153625488,
+      "learning_rate": 1.2705479452054796e-05,
+      "loss": 1.2922,
+      "step": 313
+    },
+    {
+      "epoch": 0.45877088850333303,
+      "grad_norm": 0.11517077684402466,
+      "learning_rate": 1.2671232876712329e-05,
+      "loss": 1.2577,
+      "step": 314
+    },
+    {
+      "epoch": 0.4602319422883755,
+      "grad_norm": 0.09453711658716202,
+      "learning_rate": 1.2636986301369865e-05,
+      "loss": 1.3212,
+      "step": 315
+    },
+    {
+      "epoch": 0.46169299607341796,
+      "grad_norm": 0.1068459153175354,
+      "learning_rate": 1.2602739726027398e-05,
+      "loss": 1.1981,
+      "step": 316
+    },
+    {
+      "epoch": 0.4631540498584604,
+      "grad_norm": 0.0985800251364708,
+      "learning_rate": 1.2568493150684933e-05,
+      "loss": 1.189,
+      "step": 317
+    },
+    {
+      "epoch": 0.4646151036435029,
+      "grad_norm": 0.10522795468568802,
+      "learning_rate": 1.2534246575342466e-05,
+      "loss": 1.2341,
+      "step": 318
+    },
+    {
+      "epoch": 0.46607615742854536,
+      "grad_norm": 0.10663071274757385,
+      "learning_rate": 1.25e-05,
+      "loss": 1.1654,
+      "step": 319
+    },
+    {
+      "epoch": 0.4675372112135878,
+      "grad_norm": 0.10541412234306335,
+      "learning_rate": 1.2465753424657537e-05,
+      "loss": 1.3409,
+      "step": 320
+    },
+    {
+      "epoch": 0.4689982649986303,
+      "grad_norm": 0.10836822539567947,
+      "learning_rate": 1.243150684931507e-05,
+      "loss": 1.2551,
+      "step": 321
+    },
+    {
+      "epoch": 0.4704593187836727,
+      "grad_norm": 0.09797906875610352,
+      "learning_rate": 1.2397260273972605e-05,
+      "loss": 1.1954,
+      "step": 322
+    },
+    {
+      "epoch": 0.47192037256871516,
+      "grad_norm": 0.10791884362697601,
+      "learning_rate": 1.2363013698630138e-05,
+      "loss": 1.2677,
+      "step": 323
+    },
+    {
+      "epoch": 0.47338142635375763,
+      "grad_norm": 0.10603371262550354,
+      "learning_rate": 1.2328767123287673e-05,
+      "loss": 1.1747,
+      "step": 324
+    },
+    {
+      "epoch": 0.4748424801388001,
+      "grad_norm": 0.09947334975004196,
+      "learning_rate": 1.2294520547945206e-05,
+      "loss": 1.24,
+      "step": 325
+    },
+    {
+      "epoch": 0.47630353392384256,
+      "grad_norm": 0.0948692336678505,
+      "learning_rate": 1.226027397260274e-05,
+      "loss": 1.3252,
+      "step": 326
+    },
+    {
+      "epoch": 0.477764587708885,
+      "grad_norm": 0.11293943971395493,
+      "learning_rate": 1.2226027397260273e-05,
+      "loss": 1.2193,
+      "step": 327
+    },
+    {
+      "epoch": 0.4792256414939275,
+      "grad_norm": 0.10320023447275162,
+      "learning_rate": 1.219178082191781e-05,
+      "loss": 1.2418,
+      "step": 328
+    },
+    {
+      "epoch": 0.48068669527896996,
+      "grad_norm": 0.1106739267706871,
+      "learning_rate": 1.2157534246575345e-05,
+      "loss": 1.1499,
+      "step": 329
+    },
+    {
+      "epoch": 0.4821477490640124,
+      "grad_norm": 0.1147918552160263,
+      "learning_rate": 1.2123287671232878e-05,
+      "loss": 1.1246,
+      "step": 330
+    },
+    {
+      "epoch": 0.4836088028490549,
+      "grad_norm": 0.10941941291093826,
+      "learning_rate": 1.2089041095890412e-05,
+      "loss": 1.185,
+      "step": 331
+    },
+    {
+      "epoch": 0.48506985663409735,
+      "grad_norm": 0.10806426405906677,
+      "learning_rate": 1.2054794520547945e-05,
+      "loss": 1.2699,
+      "step": 332
+    },
+    {
+      "epoch": 0.4865309104191398,
+      "grad_norm": 0.09904070943593979,
+      "learning_rate": 1.202054794520548e-05,
+      "loss": 1.1772,
+      "step": 333
+    },
+    {
+      "epoch": 0.4879919642041823,
+      "grad_norm": 0.097396120429039,
+      "learning_rate": 1.1986301369863013e-05,
+      "loss": 1.1753,
+      "step": 334
+    },
+    {
+      "epoch": 0.48945301798922475,
+      "grad_norm": 0.10030350089073181,
+      "learning_rate": 1.195205479452055e-05,
+      "loss": 1.22,
+      "step": 335
+    },
+    {
+      "epoch": 0.4909140717742672,
+      "grad_norm": 0.1039762943983078,
+      "learning_rate": 1.1917808219178084e-05,
+      "loss": 1.2396,
+      "step": 336
+    },
+    {
+      "epoch": 0.4923751255593097,
+      "grad_norm": 0.10109396278858185,
+      "learning_rate": 1.1883561643835617e-05,
+      "loss": 1.185,
+      "step": 337
+    },
+    {
+      "epoch": 0.4938361793443521,
+      "grad_norm": 0.1067412868142128,
+      "learning_rate": 1.1849315068493152e-05,
+      "loss": 1.1804,
+      "step": 338
+    },
+    {
+      "epoch": 0.49529723312939455,
+      "grad_norm": 0.1046527549624443,
+      "learning_rate": 1.1815068493150685e-05,
+      "loss": 1.1996,
+      "step": 339
+    },
+    {
+      "epoch": 0.496758286914437,
+      "grad_norm": 0.10973203927278519,
+      "learning_rate": 1.178082191780822e-05,
+      "loss": 1.1812,
+      "step": 340
+    },
+    {
+      "epoch": 0.4982193406994795,
+      "grad_norm": 0.11020953208208084,
+      "learning_rate": 1.1746575342465753e-05,
+      "loss": 1.2603,
+      "step": 341
+    },
+    {
+      "epoch": 0.49968039448452195,
+      "grad_norm": 0.10667795687913895,
+      "learning_rate": 1.171232876712329e-05,
+      "loss": 1.2421,
+      "step": 342
+    },
+    {
+      "epoch": 0.5011414482695644,
+      "grad_norm": 0.0992070883512497,
+      "learning_rate": 1.1678082191780822e-05,
+      "loss": 1.1995,
+      "step": 343
+    },
+    {
+      "epoch": 0.5026025020546069,
+      "grad_norm": 0.11278413236141205,
+      "learning_rate": 1.1643835616438357e-05,
+      "loss": 1.2166,
+      "step": 344
+    },
+    {
+      "epoch": 0.5040635558396493,
+      "grad_norm": 0.11119436472654343,
+      "learning_rate": 1.1609589041095892e-05,
+      "loss": 1.2551,
+      "step": 345
+    },
+    {
+      "epoch": 0.5055246096246918,
+      "grad_norm": 0.11275441944599152,
+      "learning_rate": 1.1575342465753425e-05,
+      "loss": 1.2082,
+      "step": 346
+    },
+    {
+      "epoch": 0.5069856634097343,
+      "grad_norm": 0.10059484094381332,
+      "learning_rate": 1.154109589041096e-05,
+      "loss": 1.172,
+      "step": 347
+    },
+    {
+      "epoch": 0.5084467171947767,
+      "grad_norm": 0.11156380921602249,
+      "learning_rate": 1.1506849315068493e-05,
+      "loss": 1.1818,
+      "step": 348
+    },
+    {
+      "epoch": 0.5099077709798192,
+      "grad_norm": 0.09762994199991226,
+      "learning_rate": 1.147260273972603e-05,
+      "loss": 1.1674,
+      "step": 349
+    },
+    {
+      "epoch": 0.5113688247648617,
+      "grad_norm": 0.10659226775169373,
+      "learning_rate": 1.1438356164383562e-05,
+      "loss": 1.1513,
+      "step": 350
+    },
+    {
+      "epoch": 0.5128298785499041,
+      "grad_norm": 0.11233004927635193,
+      "learning_rate": 1.1404109589041097e-05,
+      "loss": 1.1637,
+      "step": 351
+    },
+    {
+      "epoch": 0.5142909323349466,
+      "grad_norm": 0.10431814193725586,
+      "learning_rate": 1.1369863013698632e-05,
+      "loss": 1.2148,
+      "step": 352
+    },
+    {
+      "epoch": 0.5157519861199891,
+      "grad_norm": 0.09940113872289658,
+      "learning_rate": 1.1335616438356165e-05,
+      "loss": 1.1836,
+      "step": 353
+    },
+    {
+      "epoch": 0.5172130399050315,
+      "grad_norm": 0.09862768650054932,
+      "learning_rate": 1.1301369863013701e-05,
+      "loss": 1.2356,
+      "step": 354
+    },
+    {
+      "epoch": 0.518674093690074,
+      "grad_norm": 0.10437644273042679,
+      "learning_rate": 1.1267123287671232e-05,
+      "loss": 1.1193,
+      "step": 355
+    },
+    {
+      "epoch": 0.5201351474751165,
+      "grad_norm": 0.09749601781368256,
+      "learning_rate": 1.1232876712328769e-05,
+      "loss": 1.2704,
+      "step": 356
+    },
+    {
+      "epoch": 0.5215962012601589,
+      "grad_norm": 0.1042131707072258,
+      "learning_rate": 1.1198630136986302e-05,
+      "loss": 1.156,
+      "step": 357
+    },
+    {
+      "epoch": 0.5230572550452014,
+      "grad_norm": 0.10424741357564926,
+      "learning_rate": 1.1164383561643837e-05,
+      "loss": 1.2334,
+      "step": 358
+    },
+    {
+      "epoch": 0.5245183088302439,
+      "grad_norm": 0.09913278371095657,
+      "learning_rate": 1.113013698630137e-05,
+      "loss": 1.1721,
+      "step": 359
+    },
+    {
+      "epoch": 0.5259793626152863,
+      "grad_norm": 0.11436072736978531,
+      "learning_rate": 1.1095890410958904e-05,
+      "loss": 1.1675,
+      "step": 360
+    },
+    {
+      "epoch": 0.5274404164003287,
+      "grad_norm": 0.1049785315990448,
+      "learning_rate": 1.1061643835616441e-05,
+      "loss": 1.1788,
+      "step": 361
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 4.0655166024812544e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null