Training in progress, step 445, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +158 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5f14d35b2279e1bcb7fb4b7b70f56a7bc720d14962682ca3c0bbb2a32c572689
 size 80013120

 version https://git-lfs.github.com/spec/v1
+oid sha256:a0b1d1f985338e732480dad91d3b0c295a5acc699977017e5ced149d3591f017
 size 80013120

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:181e4d5770ede9dfda344e6d3f86cf5fb5b89a6d145168efd1058fc35a3da9fa
 size 41120084

 version https://git-lfs.github.com/spec/v1
+oid sha256:a75b1d231151f0dd236e71618c7ec5e0bb749f4609fe5f72d57da7bff6262c7e
 size 41120084

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d342406ff6e89eb8cdfff5b6dd64949295d3a0a84d5fbc6fc4e72a9f7457db32
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:d719228f4ad72adb2eba882474a882697da2ca692f9ff39a991618eed3a74316
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3d110b385a7dffb4d471adbd45c914f5fc5f8ea9533c165316859b91f2d4cdd1
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:7329c50730a00f35c6eac05c0e64503a6ca7eb6faad2aa5a4d5c0dd51c1d12ee
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.7563308947664603,
   "eval_steps": 112,
-  "global_step": 336,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -508,6 +508,160 @@
       "eval_samples_per_second": 12.752,
       "eval_steps_per_second": 6.376,
       "step": 336
     }
   ],
   "logging_steps": 5,
@@ -522,12 +676,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1.1042063598904934e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 1.0016882386043895,
   "eval_steps": 112,
+  "global_step": 445,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 12.752,
       "eval_steps_per_second": 6.376,
       "step": 336
+    },
+    {
+      "epoch": 0.7653348339898706,
+      "grad_norm": 0.010809916071593761,
+      "learning_rate": 1.3700225403843469e-05,
+      "loss": 0.0008,
+      "step": 340
+    },
+    {
+      "epoch": 0.7765897580191333,
+      "grad_norm": 0.045122772455215454,
+      "learning_rate": 1.2482508892179884e-05,
+      "loss": 0.0011,
+      "step": 345
+    },
+    {
+      "epoch": 0.7878446820483962,
+      "grad_norm": 0.008580501191318035,
+      "learning_rate": 1.1313708031358183e-05,
+      "loss": 0.0017,
+      "step": 350
+    },
+    {
+      "epoch": 0.799099606077659,
+      "grad_norm": 0.002904525026679039,
+      "learning_rate": 1.0195346714717813e-05,
+      "loss": 0.0006,
+      "step": 355
+    },
+    {
+      "epoch": 0.8103545301069218,
+      "grad_norm": 0.015455417335033417,
+      "learning_rate": 9.12888307205541e-06,
+      "loss": 0.0008,
+      "step": 360
+    },
+    {
+      "epoch": 0.8216094541361846,
+      "grad_norm": 0.012445817701518536,
+      "learning_rate": 8.115707568501768e-06,
+      "loss": 0.0009,
+      "step": 365
+    },
+    {
+      "epoch": 0.8328643781654473,
+      "grad_norm": 0.05810451880097389,
+      "learning_rate": 7.157141191620548e-06,
+      "loss": 0.0009,
+      "step": 370
+    },
+    {
+      "epoch": 0.8441193021947102,
+      "grad_norm": 0.006151565816253424,
+      "learning_rate": 6.2544337290925185e-06,
+      "loss": 0.0008,
+      "step": 375
+    },
+    {
+      "epoch": 0.855374226223973,
+      "grad_norm": 0.033290039747953415,
+      "learning_rate": 5.408762139230888e-06,
+      "loss": 0.0008,
+      "step": 380
+    },
+    {
+      "epoch": 0.8666291502532358,
+      "grad_norm": 0.005590901710093021,
+      "learning_rate": 4.621229016452156e-06,
+      "loss": 0.0018,
+      "step": 385
+    },
+    {
+      "epoch": 0.8778840742824986,
+      "grad_norm": 0.08342321962118149,
+      "learning_rate": 3.892861153703342e-06,
+      "loss": 0.001,
+      "step": 390
+    },
+    {
+      "epoch": 0.8891389983117614,
+      "grad_norm": 0.008743366226553917,
+      "learning_rate": 3.2246082037199532e-06,
+      "loss": 0.0007,
+      "step": 395
+    },
+    {
+      "epoch": 0.9003939223410242,
+      "grad_norm": 0.006391066592186689,
+      "learning_rate": 2.6173414408598827e-06,
+      "loss": 0.0008,
+      "step": 400
+    },
+    {
+      "epoch": 0.911648846370287,
+      "grad_norm": 0.016693001613020897,
+      "learning_rate": 2.0718526251279346e-06,
+      "loss": 0.0007,
+      "step": 405
+    },
+    {
+      "epoch": 0.9229037703995498,
+      "grad_norm": 0.00829355325549841,
+      "learning_rate": 1.5888529698718346e-06,
+      "loss": 0.001,
+      "step": 410
+    },
+    {
+      "epoch": 0.9341586944288126,
+      "grad_norm": 0.007485538721084595,
+      "learning_rate": 1.1689722144956671e-06,
+      "loss": 0.0015,
+      "step": 415
+    },
+    {
+      "epoch": 0.9454136184580754,
+      "grad_norm": 0.030309610068798065,
+      "learning_rate": 8.127578033998662e-07,
+      "loss": 0.0009,
+      "step": 420
+    },
+    {
+      "epoch": 0.9566685424873382,
+      "grad_norm": 0.020800478756427765,
+      "learning_rate": 5.206741722181386e-07,
+      "loss": 0.0008,
+      "step": 425
+    },
+    {
+      "epoch": 0.967923466516601,
+      "grad_norm": 0.0036028597969561815,
+      "learning_rate": 2.9310214228202013e-07,
+      "loss": 0.0007,
+      "step": 430
+    },
+    {
+      "epoch": 0.9791783905458639,
+      "grad_norm": 0.0036092002410441637,
+      "learning_rate": 1.3033842410251075e-07,
+      "loss": 0.0008,
+      "step": 435
+    },
+    {
+      "epoch": 0.9904333145751266,
+      "grad_norm": 0.004002279601991177,
+      "learning_rate": 3.259523051615254e-08,
+      "loss": 0.0008,
+      "step": 440
+    },
+    {
+      "epoch": 1.0016882386043895,
+      "grad_norm": 0.08439312875270844,
+      "learning_rate": 0.0,
+      "loss": 0.0013,
+      "step": 445
     }
   ],
   "logging_steps": 5,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.462005370107986e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null