Training in progress, step 18, checkpoint

Browse files

Files changed (6) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +74 -3

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:99a56ebedb10978bd4e193f3784da47fe9af8d1296615f5e231c886a16df738c
 size 2264640

 version https://git-lfs.github.com/spec/v1
+oid sha256:8ace6cb9505b630cc42f296da21a26f3b567183b3a9811fb87fbbb9091993720
 size 2264640

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c6176f1acdd175c0a7768374262fd2fac06322be9937e16bebeb9b14d46d46cc
 size 1183674

 version https://git-lfs.github.com/spec/v1
+oid sha256:d8e094ef751cbd269068b095cde0e43290ede08203abd0c895a15a9461015938
 size 1183674

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:007df89bec6daff126d2a1d69c9d4018343e128705fb1d5b2af992fa6385b40b
 size 14512

 version https://git-lfs.github.com/spec/v1
+oid sha256:20a90b6a2c70e12c09fb32cf9d04cacc11c2523c58a742e731a65ec3e279e352
 size 14512

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d4b1b142e127cfd32806d5105fed9a95a44b87b67f715dd9a5647e9d18a139c3
 size 14512

 version https://git-lfs.github.com/spec/v1
+oid sha256:f72b88be9468665c95a0d3c5676292c6feab6bcdb9e687c2b969b2a3bbd6c3c0
 size 14512

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:35fabaf6f0b238c38746c1776744ea0f8639b1f15587fe23529faa4cb355d67f
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:e36117d22c9a63b8dad3fdec6160f4a75b956bf530cd84159b5adbb0baea0ea0
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.2508710801393728,
   "eval_steps": 9,
-  "global_step": 9,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -86,6 +86,77 @@
       "eval_samples_per_second": 192.254,
       "eval_steps_per_second": 48.461,
       "step": 9
     }
   ],
   "logging_steps": 1,
@@ -105,7 +176,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1547005696409600.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.5017421602787456,
   "eval_steps": 9,
+  "global_step": 18,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 192.254,
       "eval_steps_per_second": 48.461,
       "step": 9
+    },
+    {
+      "epoch": 0.2787456445993031,
+      "grad_norm": 0.7740032076835632,
+      "learning_rate": 8.695044586103296e-05,
+      "loss": 4.6227,
+      "step": 10
+    },
+    {
+      "epoch": 0.30662020905923343,
+      "grad_norm": 0.7942211627960205,
+      "learning_rate": 8.368478218232787e-05,
+      "loss": 4.6837,
+      "step": 11
+    },
+    {
+      "epoch": 0.3344947735191638,
+      "grad_norm": 0.8654100894927979,
+      "learning_rate": 8.013173181896283e-05,
+      "loss": 4.6198,
+      "step": 12
+    },
+    {
+      "epoch": 0.3623693379790941,
+      "grad_norm": 0.9776397943496704,
+      "learning_rate": 7.63216081438678e-05,
+      "loss": 4.7598,
+      "step": 13
+    },
+    {
+      "epoch": 0.3902439024390244,
+      "grad_norm": 0.828058123588562,
+      "learning_rate": 7.228691778882693e-05,
+      "loss": 4.6796,
+      "step": 14
+    },
+    {
+      "epoch": 0.4181184668989547,
+      "grad_norm": 0.8717594146728516,
+      "learning_rate": 6.806208330935766e-05,
+      "loss": 4.5456,
+      "step": 15
+    },
+    {
+      "epoch": 0.445993031358885,
+      "grad_norm": 0.9016168117523193,
+      "learning_rate": 6.368314950360415e-05,
+      "loss": 4.7329,
+      "step": 16
+    },
+    {
+      "epoch": 0.4738675958188153,
+      "grad_norm": 0.9021468162536621,
+      "learning_rate": 5.918747589082853e-05,
+      "loss": 4.6807,
+      "step": 17
+    },
+    {
+      "epoch": 0.5017421602787456,
+      "grad_norm": 0.9048068523406982,
+      "learning_rate": 5.4613417973165106e-05,
+      "loss": 4.6001,
+      "step": 18
+    },
+    {
+      "epoch": 0.5017421602787456,
+      "eval_loss": 4.461349010467529,
+      "eval_runtime": 1.2645,
+      "eval_samples_per_second": 191.38,
+      "eval_steps_per_second": 48.24,
+      "step": 18
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 3078865861738496.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null