Training in progress, step 553, checkpoint

Browse files

Files changed (4) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +549 -3

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a76ebf58b46d7d773c0e60f3c3b3202c39175dc82e54c189f3267d0947c4a8ff
 size 100198584

 version https://git-lfs.github.com/spec/v1
+oid sha256:7c7efe866276ae7cbe3d104678cd00bf3527bbd6e30a9a945a5ebecfee27e759
 size 100198584

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:62b4cdbdbd72fca14cbd95d40faf9abb8a8ecb3ba4993c400be6e7b437b4b824
 size 50675604

 version https://git-lfs.github.com/spec/v1
+oid sha256:32b9dc35758135f884494cc0e53f601a5e3e91671cbbf46618714b389006b98e
 size 50675604

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b050456dfa3d81625079076f253a2e5a55a9198ab0c9ed74cbb8cd2fe6a1e442
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:6e166349555ca5e7760bd934ba6aeb783f7b623356bfc480bba3fd1729b099ba
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.6940005478951694,
   "eval_steps": 500,
-  "global_step": 475,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -3332,6 +3332,552 @@
       "learning_rate": 7.1575342465753425e-06,
       "loss": 1.2028,
       "step": 475
     }
   ],
   "logging_steps": 1,
@@ -3351,7 +3897,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 5.357403510693028e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.8079627431284814,
   "eval_steps": 500,
+  "global_step": 553,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 7.1575342465753425e-06,
       "loss": 1.2028,
       "step": 475
+    },
+    {
+      "epoch": 0.6954616016802119,
+      "grad_norm": 0.12074082344770432,
+      "learning_rate": 7.123287671232877e-06,
+      "loss": 1.3148,
+      "step": 476
+    },
+    {
+      "epoch": 0.6969226554652543,
+      "grad_norm": 0.11308849602937698,
+      "learning_rate": 7.089041095890411e-06,
+      "loss": 1.2337,
+      "step": 477
+    },
+    {
+      "epoch": 0.6983837092502968,
+      "grad_norm": 0.11705081909894943,
+      "learning_rate": 7.054794520547946e-06,
+      "loss": 1.1486,
+      "step": 478
+    },
+    {
+      "epoch": 0.6998447630353393,
+      "grad_norm": 0.11467244476079941,
+      "learning_rate": 7.020547945205481e-06,
+      "loss": 1.3139,
+      "step": 479
+    },
+    {
+      "epoch": 0.7013058168203817,
+      "grad_norm": 0.11780110001564026,
+      "learning_rate": 6.9863013698630145e-06,
+      "loss": 1.2417,
+      "step": 480
+    },
+    {
+      "epoch": 0.7027668706054242,
+      "grad_norm": 0.11144915223121643,
+      "learning_rate": 6.952054794520548e-06,
+      "loss": 1.1892,
+      "step": 481
+    },
+    {
+      "epoch": 0.7042279243904667,
+      "grad_norm": 0.13650654256343842,
+      "learning_rate": 6.917808219178082e-06,
+      "loss": 1.0756,
+      "step": 482
+    },
+    {
+      "epoch": 0.7056889781755091,
+      "grad_norm": 0.11797624081373215,
+      "learning_rate": 6.883561643835617e-06,
+      "loss": 1.275,
+      "step": 483
+    },
+    {
+      "epoch": 0.7071500319605516,
+      "grad_norm": 0.1288430392742157,
+      "learning_rate": 6.849315068493151e-06,
+      "loss": 1.1719,
+      "step": 484
+    },
+    {
+      "epoch": 0.7086110857455941,
+      "grad_norm": 0.11288215219974518,
+      "learning_rate": 6.815068493150685e-06,
+      "loss": 1.1841,
+      "step": 485
+    },
+    {
+      "epoch": 0.7100721395306365,
+      "grad_norm": 0.11503782123327255,
+      "learning_rate": 6.78082191780822e-06,
+      "loss": 1.1977,
+      "step": 486
+    },
+    {
+      "epoch": 0.7115331933156789,
+      "grad_norm": 0.11059726774692535,
+      "learning_rate": 6.746575342465754e-06,
+      "loss": 1.1771,
+      "step": 487
+    },
+    {
+      "epoch": 0.7129942471007213,
+      "grad_norm": 0.12105996906757355,
+      "learning_rate": 6.712328767123288e-06,
+      "loss": 1.218,
+      "step": 488
+    },
+    {
+      "epoch": 0.7144553008857638,
+      "grad_norm": 0.1285051554441452,
+      "learning_rate": 6.678082191780823e-06,
+      "loss": 1.1423,
+      "step": 489
+    },
+    {
+      "epoch": 0.7159163546708063,
+      "grad_norm": 0.11841081827878952,
+      "learning_rate": 6.643835616438357e-06,
+      "loss": 1.1433,
+      "step": 490
+    },
+    {
+      "epoch": 0.7173774084558487,
+      "grad_norm": 0.11378856003284454,
+      "learning_rate": 6.609589041095891e-06,
+      "loss": 1.1941,
+      "step": 491
+    },
+    {
+      "epoch": 0.7188384622408912,
+      "grad_norm": 0.10600030422210693,
+      "learning_rate": 6.5753424657534245e-06,
+      "loss": 1.188,
+      "step": 492
+    },
+    {
+      "epoch": 0.7202995160259337,
+      "grad_norm": 0.10563358664512634,
+      "learning_rate": 6.541095890410959e-06,
+      "loss": 1.2165,
+      "step": 493
+    },
+    {
+      "epoch": 0.7217605698109761,
+      "grad_norm": 0.11092449724674225,
+      "learning_rate": 6.506849315068494e-06,
+      "loss": 1.1956,
+      "step": 494
+    },
+    {
+      "epoch": 0.7232216235960186,
+      "grad_norm": 0.11748078465461731,
+      "learning_rate": 6.472602739726028e-06,
+      "loss": 1.1741,
+      "step": 495
+    },
+    {
+      "epoch": 0.7246826773810611,
+      "grad_norm": 0.12683749198913574,
+      "learning_rate": 6.438356164383563e-06,
+      "loss": 1.166,
+      "step": 496
+    },
+    {
+      "epoch": 0.7261437311661035,
+      "grad_norm": 0.10706394910812378,
+      "learning_rate": 6.4041095890410965e-06,
+      "loss": 1.2278,
+      "step": 497
+    },
+    {
+      "epoch": 0.727604784951146,
+      "grad_norm": 0.10801483690738678,
+      "learning_rate": 6.36986301369863e-06,
+      "loss": 1.1693,
+      "step": 498
+    },
+    {
+      "epoch": 0.7290658387361885,
+      "grad_norm": 0.12337271869182587,
+      "learning_rate": 6.335616438356164e-06,
+      "loss": 1.229,
+      "step": 499
+    },
+    {
+      "epoch": 0.7305268925212309,
+      "grad_norm": 0.13341547548770905,
+      "learning_rate": 6.301369863013699e-06,
+      "loss": 1.1274,
+      "step": 500
+    },
+    {
+      "epoch": 0.7319879463062734,
+      "grad_norm": 0.11283931136131287,
+      "learning_rate": 6.267123287671233e-06,
+      "loss": 1.1867,
+      "step": 501
+    },
+    {
+      "epoch": 0.7334490000913159,
+      "grad_norm": 0.1254453957080841,
+      "learning_rate": 6.2328767123287685e-06,
+      "loss": 1.2634,
+      "step": 502
+    },
+    {
+      "epoch": 0.7349100538763583,
+      "grad_norm": 0.125976100564003,
+      "learning_rate": 6.198630136986302e-06,
+      "loss": 1.0729,
+      "step": 503
+    },
+    {
+      "epoch": 0.7363711076614008,
+      "grad_norm": 0.10732964426279068,
+      "learning_rate": 6.164383561643836e-06,
+      "loss": 1.2072,
+      "step": 504
+    },
+    {
+      "epoch": 0.7378321614464433,
+      "grad_norm": 0.12129033356904984,
+      "learning_rate": 6.13013698630137e-06,
+      "loss": 1.1197,
+      "step": 505
+    },
+    {
+      "epoch": 0.7392932152314857,
+      "grad_norm": 0.12821297347545624,
+      "learning_rate": 6.095890410958905e-06,
+      "loss": 1.1515,
+      "step": 506
+    },
+    {
+      "epoch": 0.7407542690165282,
+      "grad_norm": 0.11655119061470032,
+      "learning_rate": 6.061643835616439e-06,
+      "loss": 1.172,
+      "step": 507
+    },
+    {
+      "epoch": 0.7422153228015707,
+      "grad_norm": 0.11455903202295303,
+      "learning_rate": 6.027397260273973e-06,
+      "loss": 1.1762,
+      "step": 508
+    },
+    {
+      "epoch": 0.7436763765866131,
+      "grad_norm": 0.10701651871204376,
+      "learning_rate": 5.993150684931507e-06,
+      "loss": 1.1685,
+      "step": 509
+    },
+    {
+      "epoch": 0.7451374303716556,
+      "grad_norm": 0.11655491590499878,
+      "learning_rate": 5.958904109589042e-06,
+      "loss": 1.1975,
+      "step": 510
+    },
+    {
+      "epoch": 0.746598484156698,
+      "grad_norm": 0.11159254610538483,
+      "learning_rate": 5.924657534246576e-06,
+      "loss": 1.2226,
+      "step": 511
+    },
+    {
+      "epoch": 0.7480595379417405,
+      "grad_norm": 0.11702670156955719,
+      "learning_rate": 5.89041095890411e-06,
+      "loss": 1.1552,
+      "step": 512
+    },
+    {
+      "epoch": 0.749520591726783,
+      "grad_norm": 0.11294779181480408,
+      "learning_rate": 5.856164383561645e-06,
+      "loss": 1.189,
+      "step": 513
+    },
+    {
+      "epoch": 0.7509816455118254,
+      "grad_norm": 0.10862728208303452,
+      "learning_rate": 5.821917808219179e-06,
+      "loss": 1.1997,
+      "step": 514
+    },
+    {
+      "epoch": 0.7524426992968679,
+      "grad_norm": 0.12142271548509598,
+      "learning_rate": 5.7876712328767125e-06,
+      "loss": 1.1985,
+      "step": 515
+    },
+    {
+      "epoch": 0.7539037530819104,
+      "grad_norm": 0.11388342082500458,
+      "learning_rate": 5.753424657534246e-06,
+      "loss": 1.1195,
+      "step": 516
+    },
+    {
+      "epoch": 0.7553648068669528,
+      "grad_norm": 0.11917892098426819,
+      "learning_rate": 5.719178082191781e-06,
+      "loss": 1.1685,
+      "step": 517
+    },
+    {
+      "epoch": 0.7568258606519952,
+      "grad_norm": 0.11466323584318161,
+      "learning_rate": 5.684931506849316e-06,
+      "loss": 1.1837,
+      "step": 518
+    },
+    {
+      "epoch": 0.7582869144370377,
+      "grad_norm": 0.11477669328451157,
+      "learning_rate": 5.6506849315068506e-06,
+      "loss": 1.2336,
+      "step": 519
+    },
+    {
+      "epoch": 0.7597479682220801,
+      "grad_norm": 0.12040074169635773,
+      "learning_rate": 5.6164383561643845e-06,
+      "loss": 1.2553,
+      "step": 520
+    },
+    {
+      "epoch": 0.7612090220071226,
+      "grad_norm": 0.11461540311574936,
+      "learning_rate": 5.582191780821918e-06,
+      "loss": 1.1205,
+      "step": 521
+    },
+    {
+      "epoch": 0.7626700757921651,
+      "grad_norm": 0.10867593437433243,
+      "learning_rate": 5.547945205479452e-06,
+      "loss": 1.1629,
+      "step": 522
+    },
+    {
+      "epoch": 0.7641311295772075,
+      "grad_norm": 0.12366941571235657,
+      "learning_rate": 5.513698630136987e-06,
+      "loss": 1.2059,
+      "step": 523
+    },
+    {
+      "epoch": 0.76559218336225,
+      "grad_norm": 0.11807534843683243,
+      "learning_rate": 5.479452054794521e-06,
+      "loss": 1.2863,
+      "step": 524
+    },
+    {
+      "epoch": 0.7670532371472925,
+      "grad_norm": 0.11750409007072449,
+      "learning_rate": 5.445205479452055e-06,
+      "loss": 1.2293,
+      "step": 525
+    },
+    {
+      "epoch": 0.7685142909323349,
+      "grad_norm": 0.11874507367610931,
+      "learning_rate": 5.41095890410959e-06,
+      "loss": 1.2275,
+      "step": 526
+    },
+    {
+      "epoch": 0.7699753447173774,
+      "grad_norm": 0.12096529453992844,
+      "learning_rate": 5.376712328767124e-06,
+      "loss": 1.1525,
+      "step": 527
+    },
+    {
+      "epoch": 0.7714363985024199,
+      "grad_norm": 0.10740137845277786,
+      "learning_rate": 5.342465753424658e-06,
+      "loss": 1.2267,
+      "step": 528
+    },
+    {
+      "epoch": 0.7728974522874623,
+      "grad_norm": 0.12556499242782593,
+      "learning_rate": 5.308219178082192e-06,
+      "loss": 1.2042,
+      "step": 529
+    },
+    {
+      "epoch": 0.7743585060725048,
+      "grad_norm": 0.1109650582075119,
+      "learning_rate": 5.273972602739727e-06,
+      "loss": 1.1813,
+      "step": 530
+    },
+    {
+      "epoch": 0.7758195598575472,
+      "grad_norm": 0.10629246383905411,
+      "learning_rate": 5.239726027397261e-06,
+      "loss": 1.1959,
+      "step": 531
+    },
+    {
+      "epoch": 0.7772806136425897,
+      "grad_norm": 0.1113128662109375,
+      "learning_rate": 5.2054794520547945e-06,
+      "loss": 1.1441,
+      "step": 532
+    },
+    {
+      "epoch": 0.7787416674276322,
+      "grad_norm": 0.11163881421089172,
+      "learning_rate": 5.171232876712328e-06,
+      "loss": 1.1991,
+      "step": 533
+    },
+    {
+      "epoch": 0.7802027212126746,
+      "grad_norm": 0.12462608516216278,
+      "learning_rate": 5.136986301369864e-06,
+      "loss": 1.1397,
+      "step": 534
+    },
+    {
+      "epoch": 0.7816637749977171,
+      "grad_norm": 0.11631737649440765,
+      "learning_rate": 5.102739726027398e-06,
+      "loss": 1.1253,
+      "step": 535
+    },
+    {
+      "epoch": 0.7831248287827596,
+      "grad_norm": 0.12405448406934738,
+      "learning_rate": 5.068493150684932e-06,
+      "loss": 1.1595,
+      "step": 536
+    },
+    {
+      "epoch": 0.784585882567802,
+      "grad_norm": 0.1106800064444542,
+      "learning_rate": 5.0342465753424665e-06,
+      "loss": 1.2755,
+      "step": 537
+    },
+    {
+      "epoch": 0.7860469363528445,
+      "grad_norm": 0.10687270015478134,
+      "learning_rate": 5e-06,
+      "loss": 1.1443,
+      "step": 538
+    },
+    {
+      "epoch": 0.787507990137887,
+      "grad_norm": 0.10897688567638397,
+      "learning_rate": 4.965753424657534e-06,
+      "loss": 1.1917,
+      "step": 539
+    },
+    {
+      "epoch": 0.7889690439229294,
+      "grad_norm": 0.12325593084096909,
+      "learning_rate": 4.931506849315069e-06,
+      "loss": 1.165,
+      "step": 540
+    },
+    {
+      "epoch": 0.7904300977079719,
+      "grad_norm": 0.12180227786302567,
+      "learning_rate": 4.897260273972603e-06,
+      "loss": 1.15,
+      "step": 541
+    },
+    {
+      "epoch": 0.7918911514930144,
+      "grad_norm": 0.1084585040807724,
+      "learning_rate": 4.863013698630138e-06,
+      "loss": 1.2431,
+      "step": 542
+    },
+    {
+      "epoch": 0.7933522052780568,
+      "grad_norm": 0.11849282681941986,
+      "learning_rate": 4.8287671232876716e-06,
+      "loss": 1.2494,
+      "step": 543
+    },
+    {
+      "epoch": 0.7948132590630993,
+      "grad_norm": 0.1109924465417862,
+      "learning_rate": 4.7945205479452054e-06,
+      "loss": 1.1517,
+      "step": 544
+    },
+    {
+      "epoch": 0.7962743128481418,
+      "grad_norm": 0.1313486099243164,
+      "learning_rate": 4.76027397260274e-06,
+      "loss": 1.2044,
+      "step": 545
+    },
+    {
+      "epoch": 0.7977353666331842,
+      "grad_norm": 0.12946535646915436,
+      "learning_rate": 4.726027397260274e-06,
+      "loss": 1.101,
+      "step": 546
+    },
+    {
+      "epoch": 0.7991964204182267,
+      "grad_norm": 0.1083068773150444,
+      "learning_rate": 4.691780821917809e-06,
+      "loss": 1.1933,
+      "step": 547
+    },
+    {
+      "epoch": 0.8006574742032692,
+      "grad_norm": 0.11590442061424255,
+      "learning_rate": 4.657534246575343e-06,
+      "loss": 1.198,
+      "step": 548
+    },
+    {
+      "epoch": 0.8021185279883116,
+      "grad_norm": 0.11123711615800858,
+      "learning_rate": 4.6232876712328774e-06,
+      "loss": 1.1259,
+      "step": 549
+    },
+    {
+      "epoch": 0.803579581773354,
+      "grad_norm": 0.1170891597867012,
+      "learning_rate": 4.589041095890411e-06,
+      "loss": 1.1977,
+      "step": 550
+    },
+    {
+      "epoch": 0.8050406355583964,
+      "grad_norm": 0.12325557321310043,
+      "learning_rate": 4.554794520547945e-06,
+      "loss": 1.1549,
+      "step": 551
+    },
+    {
+      "epoch": 0.8065016893434389,
+      "grad_norm": 0.10925264656543732,
+      "learning_rate": 4.52054794520548e-06,
+      "loss": 1.1376,
+      "step": 552
+    },
+    {
+      "epoch": 0.8079627431284814,
+      "grad_norm": 0.10057859122753143,
+      "learning_rate": 4.486301369863014e-06,
+      "loss": 1.1237,
+      "step": 553
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 6.248047407796224e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null