Training in progress, step 666, checkpoint

Browse files

Files changed (4) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +521 -3

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bc1c655855b293e8483dd4fb04c85c74b9d78570e6d69ec02a9a00041397ab4c
 size 100198584

 version https://git-lfs.github.com/spec/v1
+oid sha256:0cc5125f76d6dbaaf8f53b7058f9db944f682d3b54268cdcab102643bbb5c715
 size 100198584

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7f6c1de617623b9f3b9b3b92049c151ab4dd84b2058cf77fac1fd8af1afe7765
 size 50675604

 version https://git-lfs.github.com/spec/v1
+oid sha256:2353d647b00d14aca06f778f9d43a65a8201b5792af6fb89150d357af16ee31c
 size 50675604

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:514895963f5ba684bb297b72010037f5ca38e8ef847d63ce0102594f529ff421
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:e4dfc30b5618d8f9126fd758d49456abcb3bac7a76ca1747eea78894ae958013
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.6496570644718793,
   "eval_steps": 500,
-  "global_step": 592,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -4151,6 +4151,524 @@
       "learning_rate": 7.866831072749692e-06,
       "loss": 1.0413,
       "step": 592
     }
   ],
   "logging_steps": 1,
@@ -4170,7 +4688,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 6.134242377148416e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.7308641975308642,
   "eval_steps": 500,
+  "global_step": 666,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 7.866831072749692e-06,
       "loss": 1.0413,
       "step": 592
+    },
+    {
+      "epoch": 0.6507544581618656,
+      "grad_norm": 0.13166101276874542,
+      "learning_rate": 7.842170160295932e-06,
+      "loss": 1.0842,
+      "step": 593
+    },
+    {
+      "epoch": 0.6518518518518519,
+      "grad_norm": 0.13434740900993347,
+      "learning_rate": 7.817509247842171e-06,
+      "loss": 1.1033,
+      "step": 594
+    },
+    {
+      "epoch": 0.6529492455418381,
+      "grad_norm": 0.1200101375579834,
+      "learning_rate": 7.79284833538841e-06,
+      "loss": 1.08,
+      "step": 595
+    },
+    {
+      "epoch": 0.6540466392318244,
+      "grad_norm": 0.13683106005191803,
+      "learning_rate": 7.768187422934649e-06,
+      "loss": 1.128,
+      "step": 596
+    },
+    {
+      "epoch": 0.6551440329218107,
+      "grad_norm": 0.13702082633972168,
+      "learning_rate": 7.743526510480888e-06,
+      "loss": 1.1325,
+      "step": 597
+    },
+    {
+      "epoch": 0.656241426611797,
+      "grad_norm": 0.13655568659305573,
+      "learning_rate": 7.718865598027127e-06,
+      "loss": 1.0282,
+      "step": 598
+    },
+    {
+      "epoch": 0.6573388203017833,
+      "grad_norm": 0.11861226707696915,
+      "learning_rate": 7.694204685573366e-06,
+      "loss": 1.1373,
+      "step": 599
+    },
+    {
+      "epoch": 0.6584362139917695,
+      "grad_norm": 0.13324734568595886,
+      "learning_rate": 7.669543773119606e-06,
+      "loss": 1.1823,
+      "step": 600
+    },
+    {
+      "epoch": 0.6595336076817558,
+      "grad_norm": 0.13969723880290985,
+      "learning_rate": 7.644882860665845e-06,
+      "loss": 1.1193,
+      "step": 601
+    },
+    {
+      "epoch": 0.6606310013717421,
+      "grad_norm": 0.138763889670372,
+      "learning_rate": 7.620221948212084e-06,
+      "loss": 1.1123,
+      "step": 602
+    },
+    {
+      "epoch": 0.6617283950617284,
+      "grad_norm": 0.13153599202632904,
+      "learning_rate": 7.595561035758323e-06,
+      "loss": 1.1058,
+      "step": 603
+    },
+    {
+      "epoch": 0.6628257887517147,
+      "grad_norm": 0.1288379430770874,
+      "learning_rate": 7.570900123304563e-06,
+      "loss": 1.0909,
+      "step": 604
+    },
+    {
+      "epoch": 0.663923182441701,
+      "grad_norm": 0.1367582231760025,
+      "learning_rate": 7.546239210850802e-06,
+      "loss": 1.0334,
+      "step": 605
+    },
+    {
+      "epoch": 0.6650205761316872,
+      "grad_norm": 0.12939676642417908,
+      "learning_rate": 7.521578298397041e-06,
+      "loss": 1.0775,
+      "step": 606
+    },
+    {
+      "epoch": 0.6661179698216735,
+      "grad_norm": 0.13814114034175873,
+      "learning_rate": 7.4969173859432805e-06,
+      "loss": 1.1047,
+      "step": 607
+    },
+    {
+      "epoch": 0.6672153635116598,
+      "grad_norm": 0.1455143690109253,
+      "learning_rate": 7.472256473489519e-06,
+      "loss": 1.163,
+      "step": 608
+    },
+    {
+      "epoch": 0.6683127572016461,
+      "grad_norm": 0.15828116238117218,
+      "learning_rate": 7.447595561035758e-06,
+      "loss": 1.0329,
+      "step": 609
+    },
+    {
+      "epoch": 0.6694101508916324,
+      "grad_norm": 0.14122609794139862,
+      "learning_rate": 7.422934648581999e-06,
+      "loss": 1.1115,
+      "step": 610
+    },
+    {
+      "epoch": 0.6705075445816187,
+      "grad_norm": 0.12997443974018097,
+      "learning_rate": 7.398273736128238e-06,
+      "loss": 1.1093,
+      "step": 611
+    },
+    {
+      "epoch": 0.671604938271605,
+      "grad_norm": 0.13643480837345123,
+      "learning_rate": 7.373612823674477e-06,
+      "loss": 1.13,
+      "step": 612
+    },
+    {
+      "epoch": 0.6727023319615912,
+      "grad_norm": 0.12696883082389832,
+      "learning_rate": 7.348951911220716e-06,
+      "loss": 1.1004,
+      "step": 613
+    },
+    {
+      "epoch": 0.6737997256515775,
+      "grad_norm": 0.14005936682224274,
+      "learning_rate": 7.324290998766955e-06,
+      "loss": 1.0574,
+      "step": 614
+    },
+    {
+      "epoch": 0.6748971193415638,
+      "grad_norm": 0.14040903747081757,
+      "learning_rate": 7.299630086313194e-06,
+      "loss": 1.1028,
+      "step": 615
+    },
+    {
+      "epoch": 0.6759945130315501,
+      "grad_norm": 0.12855766713619232,
+      "learning_rate": 7.274969173859433e-06,
+      "loss": 1.1411,
+      "step": 616
+    },
+    {
+      "epoch": 0.6770919067215363,
+      "grad_norm": 0.14175771176815033,
+      "learning_rate": 7.250308261405673e-06,
+      "loss": 1.077,
+      "step": 617
+    },
+    {
+      "epoch": 0.6781893004115226,
+      "grad_norm": 0.12823879718780518,
+      "learning_rate": 7.225647348951912e-06,
+      "loss": 1.0089,
+      "step": 618
+    },
+    {
+      "epoch": 0.6792866941015089,
+      "grad_norm": 0.13076744973659515,
+      "learning_rate": 7.200986436498151e-06,
+      "loss": 1.1641,
+      "step": 619
+    },
+    {
+      "epoch": 0.6803840877914952,
+      "grad_norm": 0.1256016492843628,
+      "learning_rate": 7.1763255240443905e-06,
+      "loss": 1.1092,
+      "step": 620
+    },
+    {
+      "epoch": 0.6814814814814815,
+      "grad_norm": 0.14268584549427032,
+      "learning_rate": 7.1516646115906294e-06,
+      "loss": 1.0106,
+      "step": 621
+    },
+    {
+      "epoch": 0.6825788751714678,
+      "grad_norm": 0.13120578229427338,
+      "learning_rate": 7.127003699136868e-06,
+      "loss": 1.0598,
+      "step": 622
+    },
+    {
+      "epoch": 0.683676268861454,
+      "grad_norm": 0.13504907488822937,
+      "learning_rate": 7.102342786683107e-06,
+      "loss": 1.0966,
+      "step": 623
+    },
+    {
+      "epoch": 0.6847736625514403,
+      "grad_norm": 0.12563414871692657,
+      "learning_rate": 7.077681874229347e-06,
+      "loss": 1.0724,
+      "step": 624
+    },
+    {
+      "epoch": 0.6858710562414266,
+      "grad_norm": 0.13522499799728394,
+      "learning_rate": 7.053020961775586e-06,
+      "loss": 1.1182,
+      "step": 625
+    },
+    {
+      "epoch": 0.6869684499314129,
+      "grad_norm": 0.12960287928581238,
+      "learning_rate": 7.028360049321825e-06,
+      "loss": 1.1252,
+      "step": 626
+    },
+    {
+      "epoch": 0.6880658436213992,
+      "grad_norm": 0.13346299529075623,
+      "learning_rate": 7.003699136868065e-06,
+      "loss": 1.1317,
+      "step": 627
+    },
+    {
+      "epoch": 0.6891632373113855,
+      "grad_norm": 0.1333625316619873,
+      "learning_rate": 6.979038224414304e-06,
+      "loss": 1.1505,
+      "step": 628
+    },
+    {
+      "epoch": 0.6902606310013717,
+      "grad_norm": 0.1392945796251297,
+      "learning_rate": 6.954377311960543e-06,
+      "loss": 1.1966,
+      "step": 629
+    },
+    {
+      "epoch": 0.691358024691358,
+      "grad_norm": 0.12204419821500778,
+      "learning_rate": 6.929716399506782e-06,
+      "loss": 1.1243,
+      "step": 630
+    },
+    {
+      "epoch": 0.6924554183813443,
+      "grad_norm": 0.1395426094532013,
+      "learning_rate": 6.905055487053022e-06,
+      "loss": 1.0475,
+      "step": 631
+    },
+    {
+      "epoch": 0.6935528120713306,
+      "grad_norm": 0.13325053453445435,
+      "learning_rate": 6.880394574599261e-06,
+      "loss": 1.0344,
+      "step": 632
+    },
+    {
+      "epoch": 0.6946502057613169,
+      "grad_norm": 0.14765462279319763,
+      "learning_rate": 6.8557336621455e-06,
+      "loss": 1.0965,
+      "step": 633
+    },
+    {
+      "epoch": 0.6957475994513032,
+      "grad_norm": 0.12556719779968262,
+      "learning_rate": 6.8310727496917395e-06,
+      "loss": 1.199,
+      "step": 634
+    },
+    {
+      "epoch": 0.6968449931412894,
+      "grad_norm": 0.12908804416656494,
+      "learning_rate": 6.806411837237978e-06,
+      "loss": 1.1594,
+      "step": 635
+    },
+    {
+      "epoch": 0.6979423868312757,
+      "grad_norm": 0.1703738272190094,
+      "learning_rate": 6.781750924784217e-06,
+      "loss": 1.0171,
+      "step": 636
+    },
+    {
+      "epoch": 0.699039780521262,
+      "grad_norm": 0.12791863083839417,
+      "learning_rate": 6.757090012330457e-06,
+      "loss": 1.2105,
+      "step": 637
+    },
+    {
+      "epoch": 0.7001371742112483,
+      "grad_norm": 0.17011161148548126,
+      "learning_rate": 6.732429099876696e-06,
+      "loss": 1.0192,
+      "step": 638
+    },
+    {
+      "epoch": 0.7012345679012346,
+      "grad_norm": 0.14074620604515076,
+      "learning_rate": 6.707768187422935e-06,
+      "loss": 1.1763,
+      "step": 639
+    },
+    {
+      "epoch": 0.7023319615912208,
+      "grad_norm": 0.13788381218910217,
+      "learning_rate": 6.683107274969174e-06,
+      "loss": 1.0638,
+      "step": 640
+    },
+    {
+      "epoch": 0.7034293552812071,
+      "grad_norm": 0.13305304944515228,
+      "learning_rate": 6.6584463625154135e-06,
+      "loss": 1.1449,
+      "step": 641
+    },
+    {
+      "epoch": 0.7045267489711934,
+      "grad_norm": 0.1297188103199005,
+      "learning_rate": 6.633785450061652e-06,
+      "loss": 1.1244,
+      "step": 642
+    },
+    {
+      "epoch": 0.7056241426611797,
+      "grad_norm": 0.12216539680957794,
+      "learning_rate": 6.609124537607891e-06,
+      "loss": 1.099,
+      "step": 643
+    },
+    {
+      "epoch": 0.706721536351166,
+      "grad_norm": 0.12714643776416779,
+      "learning_rate": 6.584463625154132e-06,
+      "loss": 1.1373,
+      "step": 644
+    },
+    {
+      "epoch": 0.7078189300411523,
+      "grad_norm": 0.12196072936058044,
+      "learning_rate": 6.559802712700371e-06,
+      "loss": 1.1225,
+      "step": 645
+    },
+    {
+      "epoch": 0.7089163237311386,
+      "grad_norm": 0.1701362133026123,
+      "learning_rate": 6.53514180024661e-06,
+      "loss": 0.991,
+      "step": 646
+    },
+    {
+      "epoch": 0.7100137174211248,
+      "grad_norm": 0.1309044361114502,
+      "learning_rate": 6.5104808877928495e-06,
+      "loss": 1.1614,
+      "step": 647
+    },
+    {
+      "epoch": 0.7111111111111111,
+      "grad_norm": 0.1310199499130249,
+      "learning_rate": 6.485819975339088e-06,
+      "loss": 1.1724,
+      "step": 648
+    },
+    {
+      "epoch": 0.7122085048010974,
+      "grad_norm": 0.15935364365577698,
+      "learning_rate": 6.461159062885327e-06,
+      "loss": 1.0417,
+      "step": 649
+    },
+    {
+      "epoch": 0.7133058984910837,
+      "grad_norm": 0.13248024880886078,
+      "learning_rate": 6.436498150431566e-06,
+      "loss": 1.2158,
+      "step": 650
+    },
+    {
+      "epoch": 0.7144032921810699,
+      "grad_norm": 0.14017465710639954,
+      "learning_rate": 6.411837237977806e-06,
+      "loss": 1.1212,
+      "step": 651
+    },
+    {
+      "epoch": 0.7155006858710562,
+      "grad_norm": 0.13974924385547638,
+      "learning_rate": 6.387176325524045e-06,
+      "loss": 1.0866,
+      "step": 652
+    },
+    {
+      "epoch": 0.7165980795610425,
+      "grad_norm": 0.13914860785007477,
+      "learning_rate": 6.362515413070284e-06,
+      "loss": 1.046,
+      "step": 653
+    },
+    {
+      "epoch": 0.7176954732510288,
+      "grad_norm": 0.1510930210351944,
+      "learning_rate": 6.3378545006165236e-06,
+      "loss": 0.9835,
+      "step": 654
+    },
+    {
+      "epoch": 0.7187928669410151,
+      "grad_norm": 0.13082289695739746,
+      "learning_rate": 6.3131935881627625e-06,
+      "loss": 1.1769,
+      "step": 655
+    },
+    {
+      "epoch": 0.7198902606310014,
+      "grad_norm": 0.14069297909736633,
+      "learning_rate": 6.288532675709001e-06,
+      "loss": 1.0869,
+      "step": 656
+    },
+    {
+      "epoch": 0.7209876543209877,
+      "grad_norm": 0.1553945541381836,
+      "learning_rate": 6.263871763255241e-06,
+      "loss": 1.0641,
+      "step": 657
+    },
+    {
+      "epoch": 0.722085048010974,
+      "grad_norm": 0.14064814150333405,
+      "learning_rate": 6.23921085080148e-06,
+      "loss": 1.1924,
+      "step": 658
+    },
+    {
+      "epoch": 0.7231824417009602,
+      "grad_norm": 0.1389569491147995,
+      "learning_rate": 6.214549938347719e-06,
+      "loss": 1.0729,
+      "step": 659
+    },
+    {
+      "epoch": 0.7242798353909465,
+      "grad_norm": 0.14110144972801208,
+      "learning_rate": 6.189889025893958e-06,
+      "loss": 1.1349,
+      "step": 660
+    },
+    {
+      "epoch": 0.7253772290809328,
+      "grad_norm": 0.13982906937599182,
+      "learning_rate": 6.1652281134401985e-06,
+      "loss": 1.0304,
+      "step": 661
+    },
+    {
+      "epoch": 0.7264746227709191,
+      "grad_norm": 0.12203299254179001,
+      "learning_rate": 6.140567200986437e-06,
+      "loss": 1.2023,
+      "step": 662
+    },
+    {
+      "epoch": 0.7275720164609053,
+      "grad_norm": 0.1401350200176239,
+      "learning_rate": 6.115906288532676e-06,
+      "loss": 1.0947,
+      "step": 663
+    },
+    {
+      "epoch": 0.7286694101508916,
+      "grad_norm": 0.14056162536144257,
+      "learning_rate": 6.091245376078916e-06,
+      "loss": 1.073,
+      "step": 664
+    },
+    {
+      "epoch": 0.7297668038408779,
+      "grad_norm": 0.13901904225349426,
+      "learning_rate": 6.066584463625155e-06,
+      "loss": 1.134,
+      "step": 665
+    },
+    {
+      "epoch": 0.7308641975308642,
+      "grad_norm": 0.1339583396911621,
+      "learning_rate": 6.041923551171394e-06,
+      "loss": 1.2012,
+      "step": 666
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 6.904814083900785e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null