Training in progress, step 400, checkpoint

Browse files

Files changed (6) hide show

last-checkpoint/adapter_config.json +3 -3
last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +2 -2
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +283 -3
last-checkpoint/training_args.bin +2 -2

last-checkpoint/adapter_config.json CHANGED Viewed

@@ -20,12 +20,12 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "q_proj",
     "gate_proj",
-    "down_proj",
     "k_proj",
-    "up_proj",
-    "o_proj",
     "v_proj"
   ],
   "task_type": "CAUSAL_LM",

   "rank_pattern": {},
   "revision": null,
   "target_modules": [
+    "down_proj",
     "q_proj",
+    "o_proj",
+    "up_proj",
     "gate_proj",
     "k_proj",
     "v_proj"
   ],
   "task_type": "CAUSAL_LM",

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:26bb5c693d335a109ecd3fe60b82014ff8dfa61913d7fee21edd21791598ad29
 size 83945296

 version https://git-lfs.github.com/spec/v1
+oid sha256:43ef35dda0f3a5a508eab117460ff6a331211e06b198dc6e1a315d6d8897b434
 size 83945296

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:dce9ee9dd4e83e04d18e5f46015db2023307957f40df415905d533b778c1ce59
-size 168149074

 version https://git-lfs.github.com/spec/v1
+oid sha256:784499b9c57080a9aa835529570368af18b60b12be5d1a140d26af708d454530
+size 168155346

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ece9be991d1c749eb41eeb3d0b4d2b0f0e42672da5226547e851b5cc6a20a704
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:35297f49e243ed1d027a26f9d8cc60d7b1b3d88f3cde5bada6803ecb49d4e54c
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.04523181304183943,
   "eval_steps": 500,
-  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -287,6 +287,286 @@
       "learning_rate": 4.993395348466544e-05,
       "loss": 0.7012,
       "step": 200
     }
   ],
   "logging_steps": 5,
@@ -306,7 +586,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 2.1310339876808294e+17,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.09046362608367886,
   "eval_steps": 500,
+  "global_step": 400,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 4.993395348466544e-05,
       "loss": 0.7012,
       "step": 200
+    },
+    {
+      "epoch": 0.046362608367885416,
+      "grad_norm": 0.28732138872146606,
+      "learning_rate": 4.992718700485085e-05,
+      "loss": 0.7247,
+      "step": 205
+    },
+    {
+      "epoch": 0.047493403693931395,
+      "grad_norm": 0.2657299339771271,
+      "learning_rate": 4.99200911095478e-05,
+      "loss": 0.7247,
+      "step": 210
+    },
+    {
+      "epoch": 0.04862419901997738,
+      "grad_norm": 0.30124104022979736,
+      "learning_rate": 4.991266589252933e-05,
+      "loss": 0.7001,
+      "step": 215
+    },
+    {
+      "epoch": 0.049754994346023367,
+      "grad_norm": 0.3533799946308136,
+      "learning_rate": 4.990491145192049e-05,
+      "loss": 0.7714,
+      "step": 220
+    },
+    {
+      "epoch": 0.05088578967206935,
+      "grad_norm": 0.29441332817077637,
+      "learning_rate": 4.989682789019706e-05,
+      "loss": 0.7338,
+      "step": 225
+    },
+    {
+      "epoch": 0.05201658499811534,
+      "grad_norm": 0.2670339345932007,
+      "learning_rate": 4.988841531418418e-05,
+      "loss": 0.719,
+      "step": 230
+    },
+    {
+      "epoch": 0.053147380324161324,
+      "grad_norm": 0.44572877883911133,
+      "learning_rate": 4.9879673835054955e-05,
+      "loss": 0.7315,
+      "step": 235
+    },
+    {
+      "epoch": 0.05427817565020731,
+      "grad_norm": 0.29553067684173584,
+      "learning_rate": 4.9870603568328985e-05,
+      "loss": 0.7495,
+      "step": 240
+    },
+    {
+      "epoch": 0.055408970976253295,
+      "grad_norm": 0.26393231749534607,
+      "learning_rate": 4.986120463387084e-05,
+      "loss": 0.6637,
+      "step": 245
+    },
+    {
+      "epoch": 0.05653976630229928,
+      "grad_norm": 0.35982418060302734,
+      "learning_rate": 4.985147715588845e-05,
+      "loss": 0.7571,
+      "step": 250
+    },
+    {
+      "epoch": 0.05767056162834527,
+      "grad_norm": 0.38977113366127014,
+      "learning_rate": 4.9841421262931506e-05,
+      "loss": 0.7551,
+      "step": 255
+    },
+    {
+      "epoch": 0.05880135695439125,
+      "grad_norm": 0.28935956954956055,
+      "learning_rate": 4.983103708788972e-05,
+      "loss": 0.7863,
+      "step": 260
+    },
+    {
+      "epoch": 0.05993215228043724,
+      "grad_norm": 0.34443530440330505,
+      "learning_rate": 4.98203247679911e-05,
+      "loss": 0.8106,
+      "step": 265
+    },
+    {
+      "epoch": 0.061062947606483224,
+      "grad_norm": 0.4763427674770355,
+      "learning_rate": 4.980928444480011e-05,
+      "loss": 0.7729,
+      "step": 270
+    },
+    {
+      "epoch": 0.06219374293252921,
+      "grad_norm": 0.2860422730445862,
+      "learning_rate": 4.9797916264215824e-05,
+      "loss": 0.7593,
+      "step": 275
+    },
+    {
+      "epoch": 0.0633245382585752,
+      "grad_norm": 0.28870680928230286,
+      "learning_rate": 4.978622037647e-05,
+      "loss": 0.7574,
+      "step": 280
+    },
+    {
+      "epoch": 0.06445533358462119,
+      "grad_norm": 0.40277180075645447,
+      "learning_rate": 4.9774196936125056e-05,
+      "loss": 0.799,
+      "step": 285
+    },
+    {
+      "epoch": 0.06558612891066717,
+      "grad_norm": 0.3290288746356964,
+      "learning_rate": 4.9761846102072065e-05,
+      "loss": 0.7519,
+      "step": 290
+    },
+    {
+      "epoch": 0.06671692423671316,
+      "grad_norm": 0.3139791190624237,
+      "learning_rate": 4.9749168037528635e-05,
+      "loss": 0.6837,
+      "step": 295
+    },
+    {
+      "epoch": 0.06784771956275915,
+      "grad_norm": 0.30802035331726074,
+      "learning_rate": 4.9736162910036785e-05,
+      "loss": 0.7662,
+      "step": 300
+    },
+    {
+      "epoch": 0.06897851488880513,
+      "grad_norm": 0.34561124444007874,
+      "learning_rate": 4.972283089146067e-05,
+      "loss": 0.6897,
+      "step": 305
+    },
+    {
+      "epoch": 0.07010931021485112,
+      "grad_norm": 0.3372039198875427,
+      "learning_rate": 4.970917215798438e-05,
+      "loss": 0.7344,
+      "step": 310
+    },
+    {
+      "epoch": 0.0712401055408971,
+      "grad_norm": 0.41160914301872253,
+      "learning_rate": 4.9695186890109567e-05,
+      "loss": 0.832,
+      "step": 315
+    },
+    {
+      "epoch": 0.07237090086694309,
+      "grad_norm": 0.2914057672023773,
+      "learning_rate": 4.968087527265306e-05,
+      "loss": 0.7113,
+      "step": 320
+    },
+    {
+      "epoch": 0.07350169619298907,
+      "grad_norm": 0.3247675597667694,
+      "learning_rate": 4.966623749474445e-05,
+      "loss": 0.6996,
+      "step": 325
+    },
+    {
+      "epoch": 0.07463249151903506,
+      "grad_norm": 0.435735285282135,
+      "learning_rate": 4.9651273749823546e-05,
+      "loss": 0.8236,
+      "step": 330
+    },
+    {
+      "epoch": 0.07576328684508105,
+      "grad_norm": 0.3213053047657013,
+      "learning_rate": 4.963598423563788e-05,
+      "loss": 0.7012,
+      "step": 335
+    },
+    {
+      "epoch": 0.07689408217112703,
+      "grad_norm": 0.3745056390762329,
+      "learning_rate": 4.962036915424004e-05,
+      "loss": 0.7018,
+      "step": 340
+    },
+    {
+      "epoch": 0.07802487749717302,
+      "grad_norm": 0.28368842601776123,
+      "learning_rate": 4.960442871198503e-05,
+      "loss": 0.7084,
+      "step": 345
+    },
+    {
+      "epoch": 0.079155672823219,
+      "grad_norm": 0.2621799409389496,
+      "learning_rate": 4.958816311952752e-05,
+      "loss": 0.7217,
+      "step": 350
+    },
+    {
+      "epoch": 0.08028646814926499,
+      "grad_norm": 0.25561287999153137,
+      "learning_rate": 4.95715725918191e-05,
+      "loss": 0.7616,
+      "step": 355
+    },
+    {
+      "epoch": 0.08141726347531097,
+      "grad_norm": 0.3495071828365326,
+      "learning_rate": 4.9554657348105385e-05,
+      "loss": 0.7061,
+      "step": 360
+    },
+    {
+      "epoch": 0.08254805880135696,
+      "grad_norm": 0.3490068018436432,
+      "learning_rate": 4.953741761192317e-05,
+      "loss": 0.7809,
+      "step": 365
+    },
+    {
+      "epoch": 0.08367885412740295,
+      "grad_norm": 0.39416739344596863,
+      "learning_rate": 4.9519853611097434e-05,
+      "loss": 0.7282,
+      "step": 370
+    },
+    {
+      "epoch": 0.08480964945344893,
+      "grad_norm": 0.2763444185256958,
+      "learning_rate": 4.950196557773837e-05,
+      "loss": 0.7262,
+      "step": 375
+    },
+    {
+      "epoch": 0.08594044477949492,
+      "grad_norm": 0.29107871651649475,
+      "learning_rate": 4.948375374823828e-05,
+      "loss": 0.7346,
+      "step": 380
+    },
+    {
+      "epoch": 0.0870712401055409,
+      "grad_norm": 0.28965339064598083,
+      "learning_rate": 4.946521836326847e-05,
+      "loss": 0.6768,
+      "step": 385
+    },
+    {
+      "epoch": 0.08820203543158689,
+      "grad_norm": 0.31072792410850525,
+      "learning_rate": 4.9446359667776065e-05,
+      "loss": 0.7277,
+      "step": 390
+    },
+    {
+      "epoch": 0.08933283075763288,
+      "grad_norm": 0.2789427936077118,
+      "learning_rate": 4.9427177910980794e-05,
+      "loss": 0.7481,
+      "step": 395
+    },
+    {
+      "epoch": 0.09046362608367886,
+      "grad_norm": 0.2573710083961487,
+      "learning_rate": 4.9407673346371644e-05,
+      "loss": 0.7077,
+      "step": 400
     }
   ],
   "logging_steps": 5,
       "attributes": {}
     }
   },
+  "total_flos": 4.2708606055664845e+17,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

last-checkpoint/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9319d9c079514e379e4c3967b718f85a19bc1f8b61112bad04a43a46d5d6afe2
-size 5752

 version https://git-lfs.github.com/spec/v1
+oid sha256:c29b6114cee3a1eb0c6657320d373e2561ec03a011bc688ec4cc2b0b164a6831
+size 5816