Training in progress, step 391, checkpoint

Files changed (4) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7d467248f80e50b7299be4a044d3c34650a4bb3d921ee97e1d2cbac5af5b7fc6
 size 100198584

 version https://git-lfs.github.com/spec/v1
+oid sha256:9efa6add42e5fefba9d56a1afcc7355eca82c2424267276f7b90c9528c7123ba
 size 100198584

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ea91a98de0b3ad51bc037e37c58325d581d5d9a687104d1c0db3b0ba2b46168e
 size 50675604

 version https://git-lfs.github.com/spec/v1
+oid sha256:1fe1f005838acb336ca4fe0ab46eb7f60f522df631c4bef7cf7e92b1a0d3032f
 size 50675604

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:43819302638e139ec073cc0bd87e7c492c70c5966a60959bb8d025a1d3e72519
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:c5a8867b92ac804cb31a1e033b1e487ddd7366d0bc3c5cae4e0f95d48f50a374
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.4235939643347051,
   "eval_steps": 500,
-  "global_step": 386,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -2709,6 +2709,41 @@
       "learning_rate": 1.2946979038224414e-05,
       "loss": 1.1242,
       "step": 386
     }
   ],
   "logging_steps": 1,
@@ -2728,7 +2763,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 4.0076346624661094e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.4290809327846365,
   "eval_steps": 500,
+  "global_step": 391,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 1.2946979038224414e-05,
       "loss": 1.1242,
       "step": 386
+    },
+    {
+      "epoch": 0.4246913580246914,
+      "grad_norm": 0.16041865944862366,
+      "learning_rate": 1.2922318125770655e-05,
+      "loss": 1.1821,
+      "step": 387
+    },
+    {
+      "epoch": 0.4257887517146776,
+      "grad_norm": 0.13168294727802277,
+      "learning_rate": 1.2897657213316894e-05,
+      "loss": 1.1585,
+      "step": 388
+    },
+    {
+      "epoch": 0.4268861454046639,
+      "grad_norm": 0.11569740623235703,
+      "learning_rate": 1.2872996300863132e-05,
+      "loss": 1.1275,
+      "step": 389
+    },
+    {
+      "epoch": 0.4279835390946502,
+      "grad_norm": 0.14467458426952362,
+      "learning_rate": 1.2848335388409371e-05,
+      "loss": 1.1436,
+      "step": 390
+    },
+    {
+      "epoch": 0.4290809327846365,
+      "grad_norm": 0.11647368967533112,
+      "learning_rate": 1.2823674475955612e-05,
+      "loss": 1.1061,
+      "step": 391
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 4.0602758438180045e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null