Training in progress, step 635, checkpoint

Browse files

Files changed (4) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +388 -3

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6589280cb3cff27d0e3f809d9fe18d16f0e9a7c5605ca58835189f572dabff16
 size 100198584

 version https://git-lfs.github.com/spec/v1
+oid sha256:3e6916f7e36c566c02cf5b2c732bbba7342ba9b478d907f6a86bacf7cb2c0150
 size 100198584

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0682c52911bfd561a6f52732b0960db62f88ed27fe66503c2c995a4eaf4cdaed
 size 50675604

 version https://git-lfs.github.com/spec/v1
+oid sha256:a06e698c180e26ad266a872f2b31ce48f7f30f3e994431201bfae0f415a7c1a3
 size 50675604

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c64a34b4af24bd84d792037de6a5cdeb1e9758d386ffdf2a30823c12441032d4
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:673f846db062d0e3d1543ca2e762819fe3771910bf6b5b7e181954499f0e9914
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.8474111953246279,
   "eval_steps": 500,
-  "global_step": 580,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -4067,6 +4067,391 @@
       "learning_rate": 3.5616438356164386e-06,
       "loss": 1.1487,
       "step": 580
     }
   ],
   "logging_steps": 1,
@@ -4086,7 +4471,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 6.557703270576169e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.9277691535019633,
   "eval_steps": 500,
+  "global_step": 635,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 3.5616438356164386e-06,
       "loss": 1.1487,
       "step": 580
+    },
+    {
+      "epoch": 0.8488722491096703,
+      "grad_norm": 0.12069284915924072,
+      "learning_rate": 3.527397260273973e-06,
+      "loss": 1.1901,
+      "step": 581
+    },
+    {
+      "epoch": 0.8503333028947128,
+      "grad_norm": 0.11841464787721634,
+      "learning_rate": 3.4931506849315072e-06,
+      "loss": 1.1714,
+      "step": 582
+    },
+    {
+      "epoch": 0.8517943566797552,
+      "grad_norm": 0.12204127013683319,
+      "learning_rate": 3.458904109589041e-06,
+      "loss": 1.1501,
+      "step": 583
+    },
+    {
+      "epoch": 0.8532554104647977,
+      "grad_norm": 0.13133108615875244,
+      "learning_rate": 3.4246575342465754e-06,
+      "loss": 1.2586,
+      "step": 584
+    },
+    {
+      "epoch": 0.8547164642498402,
+      "grad_norm": 0.11903316527605057,
+      "learning_rate": 3.39041095890411e-06,
+      "loss": 1.2399,
+      "step": 585
+    },
+    {
+      "epoch": 0.8561775180348826,
+      "grad_norm": 0.1073235422372818,
+      "learning_rate": 3.356164383561644e-06,
+      "loss": 1.2036,
+      "step": 586
+    },
+    {
+      "epoch": 0.8576385718199251,
+      "grad_norm": 0.1356821060180664,
+      "learning_rate": 3.3219178082191784e-06,
+      "loss": 1.1168,
+      "step": 587
+    },
+    {
+      "epoch": 0.8590996256049676,
+      "grad_norm": 0.1314031332731247,
+      "learning_rate": 3.2876712328767123e-06,
+      "loss": 1.1407,
+      "step": 588
+    },
+    {
+      "epoch": 0.86056067939001,
+      "grad_norm": 0.1098315566778183,
+      "learning_rate": 3.253424657534247e-06,
+      "loss": 1.1737,
+      "step": 589
+    },
+    {
+      "epoch": 0.8620217331750525,
+      "grad_norm": 0.12484846264123917,
+      "learning_rate": 3.2191780821917813e-06,
+      "loss": 1.29,
+      "step": 590
+    },
+    {
+      "epoch": 0.863482786960095,
+      "grad_norm": 0.12023176997900009,
+      "learning_rate": 3.184931506849315e-06,
+      "loss": 1.2455,
+      "step": 591
+    },
+    {
+      "epoch": 0.8649438407451374,
+      "grad_norm": 0.1124570220708847,
+      "learning_rate": 3.1506849315068495e-06,
+      "loss": 1.1792,
+      "step": 592
+    },
+    {
+      "epoch": 0.8664048945301799,
+      "grad_norm": 0.11319974809885025,
+      "learning_rate": 3.1164383561643843e-06,
+      "loss": 1.1708,
+      "step": 593
+    },
+    {
+      "epoch": 0.8678659483152223,
+      "grad_norm": 0.11844311654567719,
+      "learning_rate": 3.082191780821918e-06,
+      "loss": 1.1962,
+      "step": 594
+    },
+    {
+      "epoch": 0.8693270021002648,
+      "grad_norm": 0.12392130494117737,
+      "learning_rate": 3.0479452054794525e-06,
+      "loss": 1.1748,
+      "step": 595
+    },
+    {
+      "epoch": 0.8707880558853073,
+      "grad_norm": 0.1305224448442459,
+      "learning_rate": 3.0136986301369864e-06,
+      "loss": 1.2388,
+      "step": 596
+    },
+    {
+      "epoch": 0.8722491096703497,
+      "grad_norm": 0.11098136007785797,
+      "learning_rate": 2.979452054794521e-06,
+      "loss": 1.2109,
+      "step": 597
+    },
+    {
+      "epoch": 0.8737101634553922,
+      "grad_norm": 0.11488241702318192,
+      "learning_rate": 2.945205479452055e-06,
+      "loss": 1.2222,
+      "step": 598
+    },
+    {
+      "epoch": 0.8751712172404347,
+      "grad_norm": 0.11296083778142929,
+      "learning_rate": 2.9109589041095893e-06,
+      "loss": 1.2425,
+      "step": 599
+    },
+    {
+      "epoch": 0.8766322710254771,
+      "grad_norm": 0.11494144797325134,
+      "learning_rate": 2.876712328767123e-06,
+      "loss": 1.1589,
+      "step": 600
+    },
+    {
+      "epoch": 0.8780933248105196,
+      "grad_norm": 0.11837083101272583,
+      "learning_rate": 2.842465753424658e-06,
+      "loss": 1.1556,
+      "step": 601
+    },
+    {
+      "epoch": 0.8795543785955621,
+      "grad_norm": 0.12680098414421082,
+      "learning_rate": 2.8082191780821922e-06,
+      "loss": 1.1742,
+      "step": 602
+    },
+    {
+      "epoch": 0.8810154323806045,
+      "grad_norm": 0.12157981842756271,
+      "learning_rate": 2.773972602739726e-06,
+      "loss": 1.1715,
+      "step": 603
+    },
+    {
+      "epoch": 0.882476486165647,
+      "grad_norm": 0.11859599500894547,
+      "learning_rate": 2.7397260273972604e-06,
+      "loss": 1.1988,
+      "step": 604
+    },
+    {
+      "epoch": 0.8839375399506895,
+      "grad_norm": 0.11733684688806534,
+      "learning_rate": 2.705479452054795e-06,
+      "loss": 1.2507,
+      "step": 605
+    },
+    {
+      "epoch": 0.8853985937357319,
+      "grad_norm": 0.11254255473613739,
+      "learning_rate": 2.671232876712329e-06,
+      "loss": 1.1843,
+      "step": 606
+    },
+    {
+      "epoch": 0.8868596475207744,
+      "grad_norm": 0.11433933675289154,
+      "learning_rate": 2.6369863013698634e-06,
+      "loss": 1.1129,
+      "step": 607
+    },
+    {
+      "epoch": 0.8883207013058169,
+      "grad_norm": 0.11361224204301834,
+      "learning_rate": 2.6027397260273973e-06,
+      "loss": 1.1883,
+      "step": 608
+    },
+    {
+      "epoch": 0.8897817550908593,
+      "grad_norm": 0.11042333394289017,
+      "learning_rate": 2.568493150684932e-06,
+      "loss": 1.2322,
+      "step": 609
+    },
+    {
+      "epoch": 0.8912428088759018,
+      "grad_norm": 0.1262194812297821,
+      "learning_rate": 2.534246575342466e-06,
+      "loss": 1.1767,
+      "step": 610
+    },
+    {
+      "epoch": 0.8927038626609443,
+      "grad_norm": 0.10622208565473557,
+      "learning_rate": 2.5e-06,
+      "loss": 1.2355,
+      "step": 611
+    },
+    {
+      "epoch": 0.8941649164459866,
+      "grad_norm": 0.11434955894947052,
+      "learning_rate": 2.4657534246575345e-06,
+      "loss": 1.1571,
+      "step": 612
+    },
+    {
+      "epoch": 0.8956259702310291,
+      "grad_norm": 0.11948630958795547,
+      "learning_rate": 2.431506849315069e-06,
+      "loss": 1.1345,
+      "step": 613
+    },
+    {
+      "epoch": 0.8970870240160715,
+      "grad_norm": 0.119502492249012,
+      "learning_rate": 2.3972602739726027e-06,
+      "loss": 1.1296,
+      "step": 614
+    },
+    {
+      "epoch": 0.898548077801114,
+      "grad_norm": 0.1145474910736084,
+      "learning_rate": 2.363013698630137e-06,
+      "loss": 1.2316,
+      "step": 615
+    },
+    {
+      "epoch": 0.9000091315861565,
+      "grad_norm": 0.11403004080057144,
+      "learning_rate": 2.3287671232876713e-06,
+      "loss": 1.1536,
+      "step": 616
+    },
+    {
+      "epoch": 0.9014701853711989,
+      "grad_norm": 0.11280905455350876,
+      "learning_rate": 2.2945205479452057e-06,
+      "loss": 1.1922,
+      "step": 617
+    },
+    {
+      "epoch": 0.9029312391562414,
+      "grad_norm": 0.10832927376031876,
+      "learning_rate": 2.26027397260274e-06,
+      "loss": 1.2386,
+      "step": 618
+    },
+    {
+      "epoch": 0.9043922929412839,
+      "grad_norm": 0.11291555315256119,
+      "learning_rate": 2.2260273972602743e-06,
+      "loss": 1.1788,
+      "step": 619
+    },
+    {
+      "epoch": 0.9058533467263263,
+      "grad_norm": 0.1250094771385193,
+      "learning_rate": 2.191780821917808e-06,
+      "loss": 1.2142,
+      "step": 620
+    },
+    {
+      "epoch": 0.9073144005113688,
+      "grad_norm": 0.11631559580564499,
+      "learning_rate": 2.1575342465753425e-06,
+      "loss": 1.2121,
+      "step": 621
+    },
+    {
+      "epoch": 0.9087754542964113,
+      "grad_norm": 0.11511734873056412,
+      "learning_rate": 2.123287671232877e-06,
+      "loss": 1.1137,
+      "step": 622
+    },
+    {
+      "epoch": 0.9102365080814537,
+      "grad_norm": 0.12305217236280441,
+      "learning_rate": 2.089041095890411e-06,
+      "loss": 1.1745,
+      "step": 623
+    },
+    {
+      "epoch": 0.9116975618664962,
+      "grad_norm": 0.14875584840774536,
+      "learning_rate": 2.0547945205479454e-06,
+      "loss": 1.1986,
+      "step": 624
+    },
+    {
+      "epoch": 0.9131586156515387,
+      "grad_norm": 0.13255374133586884,
+      "learning_rate": 2.0205479452054797e-06,
+      "loss": 1.2716,
+      "step": 625
+    },
+    {
+      "epoch": 0.9146196694365811,
+      "grad_norm": 0.13747917115688324,
+      "learning_rate": 1.9863013698630136e-06,
+      "loss": 1.1092,
+      "step": 626
+    },
+    {
+      "epoch": 0.9160807232216236,
+      "grad_norm": 0.11340590566396713,
+      "learning_rate": 1.952054794520548e-06,
+      "loss": 1.2085,
+      "step": 627
+    },
+    {
+      "epoch": 0.9175417770066661,
+      "grad_norm": 0.11387283354997635,
+      "learning_rate": 1.9178082191780823e-06,
+      "loss": 1.2549,
+      "step": 628
+    },
+    {
+      "epoch": 0.9190028307917085,
+      "grad_norm": 0.11490115523338318,
+      "learning_rate": 1.8835616438356166e-06,
+      "loss": 1.1344,
+      "step": 629
+    },
+    {
+      "epoch": 0.920463884576751,
+      "grad_norm": 0.10832976549863815,
+      "learning_rate": 1.8493150684931507e-06,
+      "loss": 1.2395,
+      "step": 630
+    },
+    {
+      "epoch": 0.9219249383617935,
+      "grad_norm": 0.12825772166252136,
+      "learning_rate": 1.8150684931506852e-06,
+      "loss": 1.2296,
+      "step": 631
+    },
+    {
+      "epoch": 0.9233859921468359,
+      "grad_norm": 0.1021864116191864,
+      "learning_rate": 1.7808219178082193e-06,
+      "loss": 1.2589,
+      "step": 632
+    },
+    {
+      "epoch": 0.9248470459318784,
+      "grad_norm": 0.12429718673229218,
+      "learning_rate": 1.7465753424657536e-06,
+      "loss": 1.1167,
+      "step": 633
+    },
+    {
+      "epoch": 0.9263080997169209,
+      "grad_norm": 0.10847421735525131,
+      "learning_rate": 1.7123287671232877e-06,
+      "loss": 1.1975,
+      "step": 634
+    },
+    {
+      "epoch": 0.9277691535019633,
+      "grad_norm": 0.11923690140247345,
+      "learning_rate": 1.678082191780822e-06,
+      "loss": 1.1595,
+      "step": 635
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 7.18235035100799e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null