Training in progress, step 900, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +713 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ceaf78b1155a682386b3cfb79f11317f0c2457782dbd3657cdaf9291dfdca1e3
 size 645975704

 version https://git-lfs.github.com/spec/v1
+oid sha256:a73ccdef1570147d126299fecf0da458f1ba19bb57d6fdc7db1abe9d83d52c25
 size 645975704

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:96171409d3c5c295e89036253fc60fcce59cc896bc1a7504fe583ec4de968ee7
 size 172499150

 version https://git-lfs.github.com/spec/v1
+oid sha256:f941851548332d5c59fd66e5e5ff0c39cf73f6812af458137852d4a81a14d635
 size 172499150

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:eb0108f5d0f01af02c687c6ab1e72d8d1b85397b6d7f385716091e5744faad05
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:705272c5ad69f3447ec8c3ef520427cc9e3a750c4d06cc9a86c3411b8c96c72f
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:08a704d06eca7b97f105133fe836286c56aa3985170f4aa8b30cfdd074a3d39d
 size 2080

 version https://git-lfs.github.com/spec/v1
+oid sha256:f40671505ac1f1b5a829b35d9dc2110b449a6843e5a694b695ca6aced79d181b
 size 2080

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 0.22727738320827484,
   "best_model_checkpoint": "miner_id_24/checkpoint-500",
-  "epoch": 0.011804807507857574,
   "eval_steps": 100,
-  "global_step": 800,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -5679,6 +5679,714 @@
       "eval_samples_per_second": 4.772,
       "eval_steps_per_second": 4.772,
       "step": 800
     }
   ],
   "logging_steps": 1,
@@ -5693,7 +6401,7 @@
         "early_stopping_threshold": 0.0
       },
       "attributes": {
-        "early_stopping_patience_counter": 3
       }
     },
     "TrainerControl": {
@@ -5702,12 +6410,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 6.6928744759296e+16,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 0.22727738320827484,
   "best_model_checkpoint": "miner_id_24/checkpoint-500",
+  "epoch": 0.013280408446339771,
   "eval_steps": 100,
+  "global_step": 900,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 4.772,
       "eval_steps_per_second": 4.772,
       "step": 800
+    },
+    {
+      "epoch": 0.011819563517242396,
+      "grad_norm": 1.8588392734527588,
+      "learning_rate": 0.0001999928936129436,
+      "loss": 0.3129,
+      "step": 801
+    },
+    {
+      "epoch": 0.011834319526627219,
+      "grad_norm": 2.285543441772461,
+      "learning_rate": 0.00019999287906102836,
+      "loss": 0.3976,
+      "step": 802
+    },
+    {
+      "epoch": 0.011849075536012041,
+      "grad_norm": 0.8554681539535522,
+      "learning_rate": 0.0001999928499571979,
+      "loss": 0.2226,
+      "step": 803
+    },
+    {
+      "epoch": 0.011863831545396863,
+      "grad_norm": 1.9635239839553833,
+      "learning_rate": 0.00019999283540528268,
+      "loss": 0.2287,
+      "step": 804
+    },
+    {
+      "epoch": 0.011878587554781685,
+      "grad_norm": 2.571794033050537,
+      "learning_rate": 0.00019999282085336745,
+      "loss": 0.2562,
+      "step": 805
+    },
+    {
+      "epoch": 0.011893343564166508,
+      "grad_norm": 0.862459659576416,
+      "learning_rate": 0.00019999280630145222,
+      "loss": 0.2025,
+      "step": 806
+    },
+    {
+      "epoch": 0.011908099573551328,
+      "grad_norm": 0.749636709690094,
+      "learning_rate": 0.00019999277719762176,
+      "loss": 0.2318,
+      "step": 807
+    },
+    {
+      "epoch": 0.01192285558293615,
+      "grad_norm": 0.8192420601844788,
+      "learning_rate": 0.00019999277719762176,
+      "loss": 0.2551,
+      "step": 808
+    },
+    {
+      "epoch": 0.011937611592320973,
+      "grad_norm": 0.7866696715354919,
+      "learning_rate": 0.0001999927480937913,
+      "loss": 0.1355,
+      "step": 809
+    },
+    {
+      "epoch": 0.011952367601705795,
+      "grad_norm": 1.7909586429595947,
+      "learning_rate": 0.00019999273354187608,
+      "loss": 0.2249,
+      "step": 810
+    },
+    {
+      "epoch": 0.011967123611090617,
+      "grad_norm": 2.0296058654785156,
+      "learning_rate": 0.00019999270443804562,
+      "loss": 0.5412,
+      "step": 811
+    },
+    {
+      "epoch": 0.01198187962047544,
+      "grad_norm": 1.0121299028396606,
+      "learning_rate": 0.0001999926898861304,
+      "loss": 0.1499,
+      "step": 812
+    },
+    {
+      "epoch": 0.011996635629860261,
+      "grad_norm": 2.553496837615967,
+      "learning_rate": 0.00019999267533421516,
+      "loss": 0.3079,
+      "step": 813
+    },
+    {
+      "epoch": 0.012011391639245082,
+      "grad_norm": 1.5361422300338745,
+      "learning_rate": 0.0001999926462303847,
+      "loss": 0.3867,
+      "step": 814
+    },
+    {
+      "epoch": 0.012026147648629904,
+      "grad_norm": 1.4031721353530884,
+      "learning_rate": 0.0001999926462303847,
+      "loss": 0.1639,
+      "step": 815
+    },
+    {
+      "epoch": 0.012040903658014726,
+      "grad_norm": 3.4092421531677246,
+      "learning_rate": 0.00019999261712655425,
+      "loss": 0.3241,
+      "step": 816
+    },
+    {
+      "epoch": 0.012055659667399549,
+      "grad_norm": 1.655253529548645,
+      "learning_rate": 0.00019999260257463902,
+      "loss": 0.2299,
+      "step": 817
+    },
+    {
+      "epoch": 0.012070415676784371,
+      "grad_norm": 1.2588114738464355,
+      "learning_rate": 0.00019999257347080857,
+      "loss": 0.1894,
+      "step": 818
+    },
+    {
+      "epoch": 0.012085171686169193,
+      "grad_norm": 5.756618976593018,
+      "learning_rate": 0.00019999255891889334,
+      "loss": 0.2173,
+      "step": 819
+    },
+    {
+      "epoch": 0.012099927695554014,
+      "grad_norm": 3.3106698989868164,
+      "learning_rate": 0.0001999925443669781,
+      "loss": 0.5165,
+      "step": 820
+    },
+    {
+      "epoch": 0.012114683704938836,
+      "grad_norm": 2.1290841102600098,
+      "learning_rate": 0.00019999251526314765,
+      "loss": 0.2764,
+      "step": 821
+    },
+    {
+      "epoch": 0.012129439714323658,
+      "grad_norm": 3.6686553955078125,
+      "learning_rate": 0.00019999251526314765,
+      "loss": 0.3461,
+      "step": 822
+    },
+    {
+      "epoch": 0.01214419572370848,
+      "grad_norm": 1.1680198907852173,
+      "learning_rate": 0.0001999924861593172,
+      "loss": 0.1711,
+      "step": 823
+    },
+    {
+      "epoch": 0.012158951733093303,
+      "grad_norm": 0.8747446537017822,
+      "learning_rate": 0.00019999245705548674,
+      "loss": 0.1633,
+      "step": 824
+    },
+    {
+      "epoch": 0.012173707742478125,
+      "grad_norm": 1.3591049909591675,
+      "learning_rate": 0.0001999924425035715,
+      "loss": 0.1543,
+      "step": 825
+    },
+    {
+      "epoch": 0.012188463751862947,
+      "grad_norm": 0.7809590697288513,
+      "learning_rate": 0.00019999242795165628,
+      "loss": 0.2076,
+      "step": 826
+    },
+    {
+      "epoch": 0.012203219761247767,
+      "grad_norm": 1.4864180088043213,
+      "learning_rate": 0.00019999241339974105,
+      "loss": 0.2414,
+      "step": 827
+    },
+    {
+      "epoch": 0.01221797577063259,
+      "grad_norm": 2.2250685691833496,
+      "learning_rate": 0.0001999923842959106,
+      "loss": 0.2012,
+      "step": 828
+    },
+    {
+      "epoch": 0.012232731780017412,
+      "grad_norm": 1.8341047763824463,
+      "learning_rate": 0.00019999236974399537,
+      "loss": 0.2294,
+      "step": 829
+    },
+    {
+      "epoch": 0.012247487789402234,
+      "grad_norm": 2.336993932723999,
+      "learning_rate": 0.00019999235519208014,
+      "loss": 0.2665,
+      "step": 830
+    },
+    {
+      "epoch": 0.012262243798787056,
+      "grad_norm": 1.2305421829223633,
+      "learning_rate": 0.00019999232608824968,
+      "loss": 0.2372,
+      "step": 831
+    },
+    {
+      "epoch": 0.012276999808171879,
+      "grad_norm": 1.3784295320510864,
+      "learning_rate": 0.00019999231153633446,
+      "loss": 0.3309,
+      "step": 832
+    },
+    {
+      "epoch": 0.012291755817556699,
+      "grad_norm": 0.9471179842948914,
+      "learning_rate": 0.00019999229698441923,
+      "loss": 0.2573,
+      "step": 833
+    },
+    {
+      "epoch": 0.012306511826941521,
+      "grad_norm": 0.8644984364509583,
+      "learning_rate": 0.00019999226788058877,
+      "loss": 0.1812,
+      "step": 834
+    },
+    {
+      "epoch": 0.012321267836326344,
+      "grad_norm": 1.345884084701538,
+      "learning_rate": 0.00019999225332867354,
+      "loss": 0.2861,
+      "step": 835
+    },
+    {
+      "epoch": 0.012336023845711166,
+      "grad_norm": 4.202690124511719,
+      "learning_rate": 0.0001999922387767583,
+      "loss": 0.3206,
+      "step": 836
+    },
+    {
+      "epoch": 0.012350779855095988,
+      "grad_norm": 0.8438819050788879,
+      "learning_rate": 0.00019999222422484308,
+      "loss": 0.1899,
+      "step": 837
+    },
+    {
+      "epoch": 0.01236553586448081,
+      "grad_norm": 1.8235636949539185,
+      "learning_rate": 0.00019999219512101263,
+      "loss": 0.3505,
+      "step": 838
+    },
+    {
+      "epoch": 0.012380291873865632,
+      "grad_norm": 0.7939683198928833,
+      "learning_rate": 0.0001999921805690974,
+      "loss": 0.2743,
+      "step": 839
+    },
+    {
+      "epoch": 0.012395047883250453,
+      "grad_norm": 1.7612792253494263,
+      "learning_rate": 0.00019999216601718217,
+      "loss": 0.3142,
+      "step": 840
+    },
+    {
+      "epoch": 0.012409803892635275,
+      "grad_norm": 1.8200362920761108,
+      "learning_rate": 0.00019999213691335171,
+      "loss": 0.3512,
+      "step": 841
+    },
+    {
+      "epoch": 0.012424559902020097,
+      "grad_norm": 0.8324838280677795,
+      "learning_rate": 0.00019999212236143649,
+      "loss": 0.1305,
+      "step": 842
+    },
+    {
+      "epoch": 0.01243931591140492,
+      "grad_norm": 1.1254831552505493,
+      "learning_rate": 0.00019999210780952126,
+      "loss": 0.2469,
+      "step": 843
+    },
+    {
+      "epoch": 0.012454071920789742,
+      "grad_norm": 0.7088738679885864,
+      "learning_rate": 0.0001999920787056908,
+      "loss": 0.2798,
+      "step": 844
+    },
+    {
+      "epoch": 0.012468827930174564,
+      "grad_norm": 0.7736618518829346,
+      "learning_rate": 0.00019999206415377557,
+      "loss": 0.1676,
+      "step": 845
+    },
+    {
+      "epoch": 0.012483583939559386,
+      "grad_norm": 12.703507423400879,
+      "learning_rate": 0.00019999204960186034,
+      "loss": 0.3818,
+      "step": 846
+    },
+    {
+      "epoch": 0.012498339948944207,
+      "grad_norm": 0.668497622013092,
+      "learning_rate": 0.0001999920204980299,
+      "loss": 0.2096,
+      "step": 847
+    },
+    {
+      "epoch": 0.012513095958329029,
+      "grad_norm": 0.7278184294700623,
+      "learning_rate": 0.00019999200594611466,
+      "loss": 0.2179,
+      "step": 848
+    },
+    {
+      "epoch": 0.012527851967713851,
+      "grad_norm": 0.7589313983917236,
+      "learning_rate": 0.0001999919768422842,
+      "loss": 0.1714,
+      "step": 849
+    },
+    {
+      "epoch": 0.012542607977098674,
+      "grad_norm": 0.637834906578064,
+      "learning_rate": 0.00019999196229036897,
+      "loss": 0.1514,
+      "step": 850
+    },
+    {
+      "epoch": 0.012557363986483496,
+      "grad_norm": 2.347073793411255,
+      "learning_rate": 0.00019999194773845375,
+      "loss": 0.5262,
+      "step": 851
+    },
+    {
+      "epoch": 0.012572119995868318,
+      "grad_norm": 0.46010205149650574,
+      "learning_rate": 0.0001999919186346233,
+      "loss": 0.1787,
+      "step": 852
+    },
+    {
+      "epoch": 0.012586876005253139,
+      "grad_norm": 1.3159286975860596,
+      "learning_rate": 0.00019999190408270806,
+      "loss": 0.3263,
+      "step": 853
+    },
+    {
+      "epoch": 0.01260163201463796,
+      "grad_norm": 0.6969799995422363,
+      "learning_rate": 0.00019999188953079283,
+      "loss": 0.1956,
+      "step": 854
+    },
+    {
+      "epoch": 0.012616388024022783,
+      "grad_norm": 0.7132418751716614,
+      "learning_rate": 0.00019999186042696238,
+      "loss": 0.0918,
+      "step": 855
+    },
+    {
+      "epoch": 0.012631144033407605,
+      "grad_norm": 1.258521556854248,
+      "learning_rate": 0.00019999184587504715,
+      "loss": 0.2805,
+      "step": 856
+    },
+    {
+      "epoch": 0.012645900042792427,
+      "grad_norm": 2.1078953742980957,
+      "learning_rate": 0.00019999183132313192,
+      "loss": 0.3306,
+      "step": 857
+    },
+    {
+      "epoch": 0.01266065605217725,
+      "grad_norm": 1.695388674736023,
+      "learning_rate": 0.00019999180221930146,
+      "loss": 0.2416,
+      "step": 858
+    },
+    {
+      "epoch": 0.012675412061562072,
+      "grad_norm": 0.9387673735618591,
+      "learning_rate": 0.00019999178766738623,
+      "loss": 0.3334,
+      "step": 859
+    },
+    {
+      "epoch": 0.012690168070946892,
+      "grad_norm": 1.193365216255188,
+      "learning_rate": 0.000199991773115471,
+      "loss": 0.1776,
+      "step": 860
+    },
+    {
+      "epoch": 0.012704924080331715,
+      "grad_norm": 1.4889655113220215,
+      "learning_rate": 0.00019999174401164055,
+      "loss": 0.1784,
+      "step": 861
+    },
+    {
+      "epoch": 0.012719680089716537,
+      "grad_norm": 1.3445992469787598,
+      "learning_rate": 0.00019999172945972532,
+      "loss": 0.2482,
+      "step": 862
+    },
+    {
+      "epoch": 0.012734436099101359,
+      "grad_norm": 1.2509477138519287,
+      "learning_rate": 0.0001999917149078101,
+      "loss": 0.1961,
+      "step": 863
+    },
+    {
+      "epoch": 0.012749192108486181,
+      "grad_norm": 0.9405835270881653,
+      "learning_rate": 0.00019999168580397964,
+      "loss": 0.1089,
+      "step": 864
+    },
+    {
+      "epoch": 0.012763948117871004,
+      "grad_norm": 0.4583745300769806,
+      "learning_rate": 0.0001999916712520644,
+      "loss": 0.0451,
+      "step": 865
+    },
+    {
+      "epoch": 0.012778704127255826,
+      "grad_norm": 0.8631611466407776,
+      "learning_rate": 0.00019999165670014918,
+      "loss": 0.2,
+      "step": 866
+    },
+    {
+      "epoch": 0.012793460136640646,
+      "grad_norm": 2.4206740856170654,
+      "learning_rate": 0.00019999162759631872,
+      "loss": 0.3839,
+      "step": 867
+    },
+    {
+      "epoch": 0.012808216146025468,
+      "grad_norm": 4.0995659828186035,
+      "learning_rate": 0.00019999159849248827,
+      "loss": 0.3994,
+      "step": 868
+    },
+    {
+      "epoch": 0.01282297215541029,
+      "grad_norm": 0.8621058464050293,
+      "learning_rate": 0.00019999158394057304,
+      "loss": 0.1875,
+      "step": 869
+    },
+    {
+      "epoch": 0.012837728164795113,
+      "grad_norm": 1.5550447702407837,
+      "learning_rate": 0.0001999915693886578,
+      "loss": 0.2768,
+      "step": 870
+    },
+    {
+      "epoch": 0.012852484174179935,
+      "grad_norm": 2.0353689193725586,
+      "learning_rate": 0.00019999154028482735,
+      "loss": 0.3118,
+      "step": 871
+    },
+    {
+      "epoch": 0.012867240183564757,
+      "grad_norm": 1.4061765670776367,
+      "learning_rate": 0.00019999152573291212,
+      "loss": 0.3058,
+      "step": 872
+    },
+    {
+      "epoch": 0.012881996192949578,
+      "grad_norm": 1.2627537250518799,
+      "learning_rate": 0.0001999915111809969,
+      "loss": 0.3512,
+      "step": 873
+    },
+    {
+      "epoch": 0.0128967522023344,
+      "grad_norm": 2.3084962368011475,
+      "learning_rate": 0.00019999148207716644,
+      "loss": 0.3348,
+      "step": 874
+    },
+    {
+      "epoch": 0.012911508211719222,
+      "grad_norm": 0.8072152137756348,
+      "learning_rate": 0.0001999914675252512,
+      "loss": 0.2395,
+      "step": 875
+    },
+    {
+      "epoch": 0.012926264221104045,
+      "grad_norm": 1.2461738586425781,
+      "learning_rate": 0.00019999145297333598,
+      "loss": 0.4091,
+      "step": 876
+    },
+    {
+      "epoch": 0.012941020230488867,
+      "grad_norm": 0.8416983485221863,
+      "learning_rate": 0.00019999142386950552,
+      "loss": 0.2494,
+      "step": 877
+    },
+    {
+      "epoch": 0.012955776239873689,
+      "grad_norm": 0.7885262370109558,
+      "learning_rate": 0.0001999914093175903,
+      "loss": 0.2535,
+      "step": 878
+    },
+    {
+      "epoch": 0.012970532249258511,
+      "grad_norm": 0.6679831743240356,
+      "learning_rate": 0.00019999139476567507,
+      "loss": 0.2249,
+      "step": 879
+    },
+    {
+      "epoch": 0.012985288258643332,
+      "grad_norm": 1.0578850507736206,
+      "learning_rate": 0.0001999913656618446,
+      "loss": 0.2963,
+      "step": 880
+    },
+    {
+      "epoch": 0.013000044268028154,
+      "grad_norm": 0.6506802439689636,
+      "learning_rate": 0.00019999135110992938,
+      "loss": 0.2141,
+      "step": 881
+    },
+    {
+      "epoch": 0.013014800277412976,
+      "grad_norm": 0.920437753200531,
+      "learning_rate": 0.00019999132200609893,
+      "loss": 0.2699,
+      "step": 882
+    },
+    {
+      "epoch": 0.013029556286797798,
+      "grad_norm": 1.2478429079055786,
+      "learning_rate": 0.0001999913074541837,
+      "loss": 0.2938,
+      "step": 883
+    },
+    {
+      "epoch": 0.01304431229618262,
+      "grad_norm": 0.9770641922950745,
+      "learning_rate": 0.00019999127835035324,
+      "loss": 0.2247,
+      "step": 884
+    },
+    {
+      "epoch": 0.013059068305567443,
+      "grad_norm": 0.7141462564468384,
+      "learning_rate": 0.000199991263798438,
+      "loss": 0.1756,
+      "step": 885
+    },
+    {
+      "epoch": 0.013073824314952265,
+      "grad_norm": 0.6457951664924622,
+      "learning_rate": 0.00019999124924652278,
+      "loss": 0.2273,
+      "step": 886
+    },
+    {
+      "epoch": 0.013088580324337086,
+      "grad_norm": 1.0006299018859863,
+      "learning_rate": 0.00019999122014269233,
+      "loss": 0.1971,
+      "step": 887
+    },
+    {
+      "epoch": 0.013103336333721908,
+      "grad_norm": 1.2667323350906372,
+      "learning_rate": 0.0001999912055907771,
+      "loss": 0.1411,
+      "step": 888
+    },
+    {
+      "epoch": 0.01311809234310673,
+      "grad_norm": 1.0697591304779053,
+      "learning_rate": 0.00019999119103886187,
+      "loss": 0.2687,
+      "step": 889
+    },
+    {
+      "epoch": 0.013132848352491552,
+      "grad_norm": 0.8275360465049744,
+      "learning_rate": 0.00019999116193503141,
+      "loss": 0.1895,
+      "step": 890
+    },
+    {
+      "epoch": 0.013147604361876375,
+      "grad_norm": 1.5881019830703735,
+      "learning_rate": 0.00019999113283120096,
+      "loss": 0.1936,
+      "step": 891
+    },
+    {
+      "epoch": 0.013162360371261197,
+      "grad_norm": 11.928915023803711,
+      "learning_rate": 0.00019999113283120096,
+      "loss": 0.6702,
+      "step": 892
+    },
+    {
+      "epoch": 0.013177116380646017,
+      "grad_norm": 0.9647379517555237,
+      "learning_rate": 0.0001999911037273705,
+      "loss": 0.2089,
+      "step": 893
+    },
+    {
+      "epoch": 0.01319187239003084,
+      "grad_norm": 0.9707943797111511,
+      "learning_rate": 0.00019999107462354004,
+      "loss": 0.2171,
+      "step": 894
+    },
+    {
+      "epoch": 0.013206628399415662,
+      "grad_norm": 0.6461355686187744,
+      "learning_rate": 0.00019999106007162482,
+      "loss": 0.1915,
+      "step": 895
+    },
+    {
+      "epoch": 0.013221384408800484,
+      "grad_norm": 1.8797247409820557,
+      "learning_rate": 0.0001999910455197096,
+      "loss": 0.4694,
+      "step": 896
+    },
+    {
+      "epoch": 0.013236140418185306,
+      "grad_norm": 2.4890024662017822,
+      "learning_rate": 0.00019999101641587913,
+      "loss": 0.2185,
+      "step": 897
+    },
+    {
+      "epoch": 0.013250896427570128,
+      "grad_norm": 1.5101224184036255,
+      "learning_rate": 0.0001999910018639639,
+      "loss": 0.3527,
+      "step": 898
+    },
+    {
+      "epoch": 0.01326565243695495,
+      "grad_norm": 1.7768837213516235,
+      "learning_rate": 0.00019999097276013345,
+      "loss": 0.3919,
+      "step": 899
+    },
+    {
+      "epoch": 0.013280408446339771,
+      "grad_norm": 1.1590951681137085,
+      "learning_rate": 0.00019999095820821822,
+      "loss": 0.2561,
+      "step": 900
+    },
+    {
+      "epoch": 0.013280408446339771,
+      "eval_loss": 0.30002906918525696,
+      "eval_runtime": 28.4979,
+      "eval_samples_per_second": 4.772,
+      "eval_steps_per_second": 4.772,
+      "step": 900
     }
   ],
   "logging_steps": 1,
         "early_stopping_threshold": 0.0
       },
       "attributes": {
+        "early_stopping_patience_counter": 4
       }
     },
     "TrainerControl": {
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 7.533788331245568e+16,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null