Training in progress, step 100, checkpoint

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5b747b902cf488c338b42395ca734abd9b9cbad0638c2424bff16e17dbf15acd
 size 83945296

 version https://git-lfs.github.com/spec/v1
+oid sha256:1f2452201b1a1c07a46260c67f6c5cb715cce3500579c5ec73919b798ace8d03
 size 83945296

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2b1207ad6bcf1979dbaa993490890a53c0ebc800589da600b6b1282aaca71968
 size 42545748

 version https://git-lfs.github.com/spec/v1
+oid sha256:4c5827c92eda7d133f5f2e02378b82df19bba09950052d529a9f45087b2691f4
 size 42545748

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:051ef68e76ed7d1f7e722937ec594a046926e550a4052c7ce2c65763a149f537
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:3f4827f3907c884124cf5304ad6de566f4005bfd0b47fc207be524313cec4714
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6ced1fab70f37dfdd161d53ad45356c3e3789a4ff9624d1b399d33b98b7f6e47
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:30b806c9629c135739ae955bdbf613e7c7323c34a5bcf2bbc2116e176d24f227
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.12345679012345678,
   "eval_steps": 1000,
-  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -49,6 +49,41 @@
       "learning_rate": 1e-05,
       "loss": 1.3828,
       "step": 50
     }
   ],
   "logging_steps": 10,
@@ -68,7 +103,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 8759069950771200.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.24691358024691357,
   "eval_steps": 1000,
+  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 1e-05,
       "loss": 1.3828,
       "step": 50
+    },
+    {
+      "epoch": 0.14814814814814814,
+      "grad_norm": 111082.546875,
+      "learning_rate": 1.2e-05,
+      "loss": 1.3397,
+      "step": 60
+    },
+    {
+      "epoch": 0.1728395061728395,
+      "grad_norm": 121515.2578125,
+      "learning_rate": 1.4000000000000001e-05,
+      "loss": 1.2831,
+      "step": 70
+    },
+    {
+      "epoch": 0.19753086419753085,
+      "grad_norm": 108325.3984375,
+      "learning_rate": 1.6000000000000003e-05,
+      "loss": 1.3085,
+      "step": 80
+    },
+    {
+      "epoch": 0.2222222222222222,
+      "grad_norm": 175475.78125,
+      "learning_rate": 1.8e-05,
+      "loss": 1.2624,
+      "step": 90
+    },
+    {
+      "epoch": 0.24691358024691357,
+      "grad_norm": 116793.203125,
+      "learning_rate": 2e-05,
+      "loss": 1.2683,
+      "step": 100
     }
   ],
   "logging_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 1.75181399015424e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null