Training in progress, step 136, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +122 -3

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1b16313c79bd3f248bf4d438764be663bf73293a82f845bfc82bbdc1c96faa18
 size 97728

 version https://git-lfs.github.com/spec/v1
+oid sha256:b5fcff8219bc1ba7c5267761e9d8e58b26e76bd77b4498ffe3eb4be61d378e5a
 size 97728

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8820d9b006cab186b61503299fdb9b87f1c8bf0451bfe4bbedc210c4bc63254a
 size 212298

 version https://git-lfs.github.com/spec/v1
+oid sha256:1f9cc90d01583459809b77feb1f6a0bc3c1c8d2e63c24f720751cb3d88d193fe
 size 212298

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0126cf7d989a7263b97f1fe2ca3d6bc2827ac39dc2b4674586229158dba72ea3
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:a9288f3ba37e7c624f2b8517ced4aae32ee804313f3587dcfb02d5f13209458f
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d4a2e0d38e4aad78961b54bb99f0a18c11d847ea1d3d3bc12cf223a3f862cf9e
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:6aeb1fbb5e964bbc83fa43b049054867ad1faca8f43271132d241ae074069d5d
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.40317628374801484,
   "eval_steps": 50,
-  "global_step": 119,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -864,6 +864,125 @@
       "learning_rate": 6.326741512198266e-05,
       "loss": 10.346,
       "step": 119
     }
   ],
   "logging_steps": 1,
@@ -883,7 +1002,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 50987546050560.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.46077289571201696,
   "eval_steps": 50,
+  "global_step": 136,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 6.326741512198266e-05,
       "loss": 10.346,
       "step": 119
+    },
+    {
+      "epoch": 0.4065643197458973,
+      "grad_norm": 0.020479971542954445,
+      "learning_rate": 6.197638667498022e-05,
+      "loss": 10.3456,
+      "step": 120
+    },
+    {
+      "epoch": 0.40995235574377975,
+      "grad_norm": 0.023177366703748703,
+      "learning_rate": 6.068932534675913e-05,
+      "loss": 10.3449,
+      "step": 121
+    },
+    {
+      "epoch": 0.41334039174166226,
+      "grad_norm": 0.01870677061378956,
+      "learning_rate": 5.9406623188668055e-05,
+      "loss": 10.346,
+      "step": 122
+    },
+    {
+      "epoch": 0.4167284277395447,
+      "grad_norm": 0.0195186547935009,
+      "learning_rate": 5.812867092421013e-05,
+      "loss": 10.3459,
+      "step": 123
+    },
+    {
+      "epoch": 0.4201164637374272,
+      "grad_norm": 0.02066197618842125,
+      "learning_rate": 5.685585783002493e-05,
+      "loss": 10.3451,
+      "step": 124
+    },
+    {
+      "epoch": 0.4235044997353097,
+      "grad_norm": 0.018614448606967926,
+      "learning_rate": 5.558857161731093e-05,
+      "loss": 10.3456,
+      "step": 125
+    },
+    {
+      "epoch": 0.4268925357331922,
+      "grad_norm": 0.02485392615199089,
+      "learning_rate": 5.4327198313725064e-05,
+      "loss": 10.3456,
+      "step": 126
+    },
+    {
+      "epoch": 0.43028057173107465,
+      "grad_norm": 0.019080353900790215,
+      "learning_rate": 5.307212214579474e-05,
+      "loss": 10.3463,
+      "step": 127
+    },
+    {
+      "epoch": 0.4336686077289571,
+      "grad_norm": 0.02017894946038723,
+      "learning_rate": 5.182372542187895e-05,
+      "loss": 10.3456,
+      "step": 128
+    },
+    {
+      "epoch": 0.4370566437268396,
+      "grad_norm": 0.02289474382996559,
+      "learning_rate": 5.058238841571326e-05,
+      "loss": 10.3443,
+      "step": 129
+    },
+    {
+      "epoch": 0.44044467972472207,
+      "grad_norm": 0.02067600190639496,
+      "learning_rate": 4.934848925057484e-05,
+      "loss": 10.3459,
+      "step": 130
+    },
+    {
+      "epoch": 0.4438327157226046,
+      "grad_norm": 0.02071257308125496,
+      "learning_rate": 4.812240378410248e-05,
+      "loss": 10.3452,
+      "step": 131
+    },
+    {
+      "epoch": 0.44722075172048703,
+      "grad_norm": 0.02436411753296852,
+      "learning_rate": 4.690450549380659e-05,
+      "loss": 10.3449,
+      "step": 132
+    },
+    {
+      "epoch": 0.4506087877183695,
+      "grad_norm": 0.0237566027790308,
+      "learning_rate": 4.569516536330447e-05,
+      "loss": 10.3462,
+      "step": 133
+    },
+    {
+      "epoch": 0.453996823716252,
+      "grad_norm": 0.029039116576313972,
+      "learning_rate": 4.449475176931499e-05,
+      "loss": 10.3455,
+      "step": 134
+    },
+    {
+      "epoch": 0.45738485971413445,
+      "grad_norm": 0.021290864795446396,
+      "learning_rate": 4.3303630369447554e-05,
+      "loss": 10.3446,
+      "step": 135
+    },
+    {
+      "epoch": 0.46077289571201696,
+      "grad_norm": 0.01688864268362522,
+      "learning_rate": 4.212216399081918e-05,
+      "loss": 10.3439,
+      "step": 136
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 58258116182016.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null