Training in progress, step 676, checkpoint

Browse files

Files changed (4) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +73 -3

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0cc5125f76d6dbaaf8f53b7058f9db944f682d3b54268cdcab102643bbb5c715
 size 100198584

 version https://git-lfs.github.com/spec/v1
+oid sha256:47c1e90dbecf9635856d092c6cddea8202536da475af28c5df57f9a15b232128
 size 100198584

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2353d647b00d14aca06f778f9d43a65a8201b5792af6fb89150d357af16ee31c
 size 50675604

 version https://git-lfs.github.com/spec/v1
+oid sha256:99bb6870709dc61c780604e0ba8b8967af8b3b68a8fe57d1de47cc64ae9e2f69
 size 50675604

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e4dfc30b5618d8f9126fd758d49456abcb3bac7a76ca1747eea78894ae958013
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:9967ef256882f56c127a1407616df2fb585de0b861d9905ab72b987597cec7ec
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.7308641975308642,
   "eval_steps": 500,
-  "global_step": 666,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -4669,6 +4669,76 @@
       "learning_rate": 6.041923551171394e-06,
       "loss": 1.2012,
       "step": 666
     }
   ],
   "logging_steps": 1,
@@ -4688,7 +4758,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 6.904814083900785e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.741838134430727,
   "eval_steps": 500,
+  "global_step": 676,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 6.041923551171394e-06,
       "loss": 1.2012,
       "step": 666
+    },
+    {
+      "epoch": 0.7319615912208505,
+      "grad_norm": 0.14305201172828674,
+      "learning_rate": 6.017262638717633e-06,
+      "loss": 1.2075,
+      "step": 667
+    },
+    {
+      "epoch": 0.7330589849108368,
+      "grad_norm": 0.1388700008392334,
+      "learning_rate": 5.9926017262638725e-06,
+      "loss": 1.1049,
+      "step": 668
+    },
+    {
+      "epoch": 0.7341563786008231,
+      "grad_norm": 0.13110363483428955,
+      "learning_rate": 5.967940813810111e-06,
+      "loss": 1.1915,
+      "step": 669
+    },
+    {
+      "epoch": 0.7352537722908093,
+      "grad_norm": 0.1336205154657364,
+      "learning_rate": 5.94327990135635e-06,
+      "loss": 1.1189,
+      "step": 670
+    },
+    {
+      "epoch": 0.7363511659807956,
+      "grad_norm": 0.15483205020427704,
+      "learning_rate": 5.91861898890259e-06,
+      "loss": 1.0508,
+      "step": 671
+    },
+    {
+      "epoch": 0.7374485596707819,
+      "grad_norm": 0.1405985802412033,
+      "learning_rate": 5.893958076448829e-06,
+      "loss": 1.1348,
+      "step": 672
+    },
+    {
+      "epoch": 0.7385459533607682,
+      "grad_norm": 0.13037075102329254,
+      "learning_rate": 5.869297163995068e-06,
+      "loss": 1.1437,
+      "step": 673
+    },
+    {
+      "epoch": 0.7396433470507544,
+      "grad_norm": 0.12945199012756348,
+      "learning_rate": 5.844636251541308e-06,
+      "loss": 1.1265,
+      "step": 674
+    },
+    {
+      "epoch": 0.7407407407407407,
+      "grad_norm": 0.1295364648103714,
+      "learning_rate": 5.8199753390875466e-06,
+      "loss": 1.1266,
+      "step": 675
+    },
+    {
+      "epoch": 0.741838134430727,
+      "grad_norm": 0.12387209385633469,
+      "learning_rate": 5.7953144266337855e-06,
+      "loss": 1.184,
+      "step": 676
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 7.010987710203372e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null