Training in progress, step 100, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +361 -3

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:86e58ba785a86cf20d0213a0115039d516dbc75ede0229ffde234921a2e67ab1
 size 81576

 version https://git-lfs.github.com/spec/v1
+oid sha256:72cabe2e50e47855b731f0b59856370478fdef3d918034c6210d60d4582ad692
 size 81576

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6db8568c1c384515269c6e4dc5788763e38b4593118b3ab609c6754be8cfd750
 size 173094

 version https://git-lfs.github.com/spec/v1
+oid sha256:3408330e006c0fc4057b7efb470b7caf087333ffec2cf54fa773c94478d5eb71
 size 173094

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8c93ef709c3715e4b2f9673389486b3ebaea139971897022bb419b83c0e32166
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:bfe54cf7ac438aee0473d5efd8220205ccbf506fefb5b1843674ee2910182d68
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a9e02dc10b7239989ab9b4418ee704e53fad611ad6b77ad633028bb8eb5238dd
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:3fc7800513a1b4dd006c457152c700dd768bb49ee4ed8e4d9665a4e42095b054
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.033772374197906116,
   "eval_steps": 50,
-  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -373,6 +373,364 @@
       "eval_samples_per_second": 281.442,
       "eval_steps_per_second": 140.721,
       "step": 50
     }
   ],
   "logging_steps": 1,
@@ -392,7 +750,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 8202957619200.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.06754474839581223,
   "eval_steps": 50,
+  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 281.442,
       "eval_steps_per_second": 140.721,
       "step": 50
+    },
+    {
+      "epoch": 0.034447821681864235,
+      "grad_norm": 0.37587037682533264,
+      "learning_rate": 0.00017788772787621126,
+      "loss": 10.6597,
+      "step": 51
+    },
+    {
+      "epoch": 0.035123269165822354,
+      "grad_norm": 0.35869449377059937,
+      "learning_rate": 0.00017684011108568592,
+      "loss": 10.6479,
+      "step": 52
+    },
+    {
+      "epoch": 0.03579871664978048,
+      "grad_norm": 0.3892292380332947,
+      "learning_rate": 0.0001757714869760335,
+      "loss": 10.651,
+      "step": 53
+    },
+    {
+      "epoch": 0.0364741641337386,
+      "grad_norm": 0.372700572013855,
+      "learning_rate": 0.0001746821476984154,
+      "loss": 10.6507,
+      "step": 54
+    },
+    {
+      "epoch": 0.037149611617696726,
+      "grad_norm": 0.3261224329471588,
+      "learning_rate": 0.00017357239106731317,
+      "loss": 10.6657,
+      "step": 55
+    },
+    {
+      "epoch": 0.037825059101654845,
+      "grad_norm": 0.3149575889110565,
+      "learning_rate": 0.00017244252047910892,
+      "loss": 10.6366,
+      "step": 56
+    },
+    {
+      "epoch": 0.03850050658561297,
+      "grad_norm": 0.3464124798774719,
+      "learning_rate": 0.00017129284482913972,
+      "loss": 10.6405,
+      "step": 57
+    },
+    {
+      "epoch": 0.03917595406957109,
+      "grad_norm": 0.291887491941452,
+      "learning_rate": 0.00017012367842724887,
+      "loss": 10.6374,
+      "step": 58
+    },
+    {
+      "epoch": 0.03985140155352921,
+      "grad_norm": 0.2717147767543793,
+      "learning_rate": 0.0001689353409118566,
+      "loss": 10.6367,
+      "step": 59
+    },
+    {
+      "epoch": 0.040526849037487336,
+      "grad_norm": 0.27471521496772766,
+      "learning_rate": 0.00016772815716257412,
+      "loss": 10.6394,
+      "step": 60
+    },
+    {
+      "epoch": 0.041202296521445456,
+      "grad_norm": 0.26729482412338257,
+      "learning_rate": 0.0001665024572113848,
+      "loss": 10.6301,
+      "step": 61
+    },
+    {
+      "epoch": 0.04187774400540358,
+      "grad_norm": 0.25519606471061707,
+      "learning_rate": 0.00016525857615241687,
+      "loss": 10.6343,
+      "step": 62
+    },
+    {
+      "epoch": 0.0425531914893617,
+      "grad_norm": 0.25052082538604736,
+      "learning_rate": 0.00016399685405033167,
+      "loss": 10.6282,
+      "step": 63
+    },
+    {
+      "epoch": 0.04322863897331983,
+      "grad_norm": 0.24196283519268036,
+      "learning_rate": 0.0001627176358473537,
+      "loss": 10.6253,
+      "step": 64
+    },
+    {
+      "epoch": 0.04390408645727795,
+      "grad_norm": 0.24466153979301453,
+      "learning_rate": 0.0001614212712689668,
+      "loss": 10.6304,
+      "step": 65
+    },
+    {
+      "epoch": 0.044579533941236066,
+      "grad_norm": 0.21384331583976746,
+      "learning_rate": 0.00016010811472830252,
+      "loss": 10.6279,
+      "step": 66
+    },
+    {
+      "epoch": 0.04525498142519419,
+      "grad_norm": 0.23176230490207672,
+      "learning_rate": 0.00015877852522924732,
+      "loss": 10.6254,
+      "step": 67
+    },
+    {
+      "epoch": 0.04593042890915231,
+      "grad_norm": 0.21549372375011444,
+      "learning_rate": 0.00015743286626829437,
+      "loss": 10.6254,
+      "step": 68
+    },
+    {
+      "epoch": 0.04660587639311044,
+      "grad_norm": 0.21212856471538544,
+      "learning_rate": 0.0001560715057351673,
+      "loss": 10.6306,
+      "step": 69
+    },
+    {
+      "epoch": 0.04728132387706856,
+      "grad_norm": 0.2017771154642105,
+      "learning_rate": 0.00015469481581224272,
+      "loss": 10.6216,
+      "step": 70
+    },
+    {
+      "epoch": 0.04795677136102668,
+      "grad_norm": 0.21862851083278656,
+      "learning_rate": 0.0001533031728727994,
+      "loss": 10.6099,
+      "step": 71
+    },
+    {
+      "epoch": 0.0486322188449848,
+      "grad_norm": 0.1980670690536499,
+      "learning_rate": 0.00015189695737812152,
+      "loss": 10.6245,
+      "step": 72
+    },
+    {
+      "epoch": 0.04930766632894292,
+      "grad_norm": 0.2036397010087967,
+      "learning_rate": 0.0001504765537734844,
+      "loss": 10.6232,
+      "step": 73
+    },
+    {
+      "epoch": 0.04998311381290105,
+      "grad_norm": 0.17732380330562592,
+      "learning_rate": 0.00014904235038305083,
+      "loss": 10.6244,
+      "step": 74
+    },
+    {
+      "epoch": 0.05065856129685917,
+      "grad_norm": 0.19181127846240997,
+      "learning_rate": 0.00014759473930370736,
+      "loss": 10.6222,
+      "step": 75
+    },
+    {
+      "epoch": 0.05133400878081729,
+      "grad_norm": 0.1601181924343109,
+      "learning_rate": 0.0001461341162978688,
+      "loss": 10.6218,
+      "step": 76
+    },
+    {
+      "epoch": 0.05200945626477541,
+      "grad_norm": 0.18116536736488342,
+      "learning_rate": 0.00014466088068528068,
+      "loss": 10.625,
+      "step": 77
+    },
+    {
+      "epoch": 0.05268490374873354,
+      "grad_norm": 0.14323946833610535,
+      "learning_rate": 0.00014317543523384928,
+      "loss": 10.6489,
+      "step": 78
+    },
+    {
+      "epoch": 0.05336035123269166,
+      "grad_norm": 0.1809110790491104,
+      "learning_rate": 0.00014167818604952906,
+      "loss": 10.6282,
+      "step": 79
+    },
+    {
+      "epoch": 0.05403579871664978,
+      "grad_norm": 0.13735494017601013,
+      "learning_rate": 0.00014016954246529696,
+      "loss": 10.6169,
+      "step": 80
+    },
+    {
+      "epoch": 0.0547112462006079,
+      "grad_norm": 0.15906408429145813,
+      "learning_rate": 0.00013864991692924523,
+      "loss": 10.6212,
+      "step": 81
+    },
+    {
+      "epoch": 0.05538669368456602,
+      "grad_norm": 0.17548221349716187,
+      "learning_rate": 0.00013711972489182208,
+      "loss": 10.6171,
+      "step": 82
+    },
+    {
+      "epoch": 0.05606214116852415,
+      "grad_norm": 0.12418357282876968,
+      "learning_rate": 0.00013557938469225167,
+      "loss": 10.6143,
+      "step": 83
+    },
+    {
+      "epoch": 0.05673758865248227,
+      "grad_norm": 0.1521899700164795,
+      "learning_rate": 0.00013402931744416433,
+      "loss": 10.6199,
+      "step": 84
+    },
+    {
+      "epoch": 0.057413036136440394,
+      "grad_norm": 0.14142099022865295,
+      "learning_rate": 0.00013246994692046836,
+      "loss": 10.6131,
+      "step": 85
+    },
+    {
+      "epoch": 0.058088483620398514,
+      "grad_norm": 0.13722097873687744,
+      "learning_rate": 0.00013090169943749476,
+      "loss": 10.6153,
+      "step": 86
+    },
+    {
+      "epoch": 0.05876393110435663,
+      "grad_norm": 0.14299722015857697,
+      "learning_rate": 0.0001293250037384465,
+      "loss": 10.6094,
+      "step": 87
+    },
+    {
+      "epoch": 0.05943937858831476,
+      "grad_norm": 0.13671687245368958,
+      "learning_rate": 0.00012774029087618446,
+      "loss": 10.6242,
+      "step": 88
+    },
+    {
+      "epoch": 0.06011482607227288,
+      "grad_norm": 0.12905743718147278,
+      "learning_rate": 0.00012614799409538198,
+      "loss": 10.6116,
+      "step": 89
+    },
+    {
+      "epoch": 0.060790273556231005,
+      "grad_norm": 0.13734190165996552,
+      "learning_rate": 0.00012454854871407994,
+      "loss": 10.6142,
+      "step": 90
+    },
+    {
+      "epoch": 0.061465721040189124,
+      "grad_norm": 0.13106867671012878,
+      "learning_rate": 0.00012294239200467516,
+      "loss": 10.6176,
+      "step": 91
+    },
+    {
+      "epoch": 0.06214116852414725,
+      "grad_norm": 0.1443423628807068,
+      "learning_rate": 0.0001213299630743747,
+      "loss": 10.6369,
+      "step": 92
+    },
+    {
+      "epoch": 0.06281661600810537,
+      "grad_norm": 0.11222351342439651,
+      "learning_rate": 0.00011971170274514802,
+      "loss": 10.6164,
+      "step": 93
+    },
+    {
+      "epoch": 0.06349206349206349,
+      "grad_norm": 0.11618170142173767,
+      "learning_rate": 0.000118088053433211,
+      "loss": 10.6057,
+      "step": 94
+    },
+    {
+      "epoch": 0.06416751097602161,
+      "grad_norm": 0.11936385929584503,
+      "learning_rate": 0.00011645945902807341,
+      "loss": 10.6146,
+      "step": 95
+    },
+    {
+      "epoch": 0.06484295845997974,
+      "grad_norm": 0.12773548066616058,
+      "learning_rate": 0.0001148263647711842,
+      "loss": 10.6161,
+      "step": 96
+    },
+    {
+      "epoch": 0.06551840594393786,
+      "grad_norm": 0.1389545202255249,
+      "learning_rate": 0.00011318921713420691,
+      "loss": 10.6053,
+      "step": 97
+    },
+    {
+      "epoch": 0.06619385342789598,
+      "grad_norm": 0.13642248511314392,
+      "learning_rate": 0.00011154846369695863,
+      "loss": 10.6153,
+      "step": 98
+    },
+    {
+      "epoch": 0.0668693009118541,
+      "grad_norm": 0.11348054558038712,
+      "learning_rate": 0.0001099045530250463,
+      "loss": 10.6132,
+      "step": 99
+    },
+    {
+      "epoch": 0.06754474839581223,
+      "grad_norm": 0.1491929590702057,
+      "learning_rate": 0.00010825793454723325,
+      "loss": 10.6309,
+      "step": 100
+    },
+    {
+      "epoch": 0.06754474839581223,
+      "eval_loss": 10.612899780273438,
+      "eval_runtime": 2.3146,
+      "eval_samples_per_second": 269.593,
+      "eval_steps_per_second": 134.797,
+      "step": 100
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 16405915238400.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null