LCK_LLM6 / trainer_state.json
Syzseisus's picture
Upload folder using huggingface_hub
80aa7d8 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.07948599059415778,
"eval_steps": 500,
"global_step": 4800,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.001655958137378287,
"grad_norm": 0.19597935676574707,
"learning_rate": 0.00019966890756553004,
"loss": 2.1972,
"step": 100
},
{
"epoch": 0.003311916274756574,
"grad_norm": 0.25808241963386536,
"learning_rate": 0.00019933771577352244,
"loss": 1.9677,
"step": 200
},
{
"epoch": 0.0049678744121348616,
"grad_norm": 0.23811133205890656,
"learning_rate": 0.00019900652398151486,
"loss": 1.9341,
"step": 300
},
{
"epoch": 0.006623832549513148,
"grad_norm": 0.26714324951171875,
"learning_rate": 0.00019867533218950728,
"loss": 1.915,
"step": 400
},
{
"epoch": 0.008279790686891435,
"grad_norm": 0.23645658791065216,
"learning_rate": 0.0001983441403974997,
"loss": 1.8916,
"step": 500
},
{
"epoch": 0.009935748824269723,
"grad_norm": 0.2878512740135193,
"learning_rate": 0.00019801294860549213,
"loss": 1.9003,
"step": 600
},
{
"epoch": 0.01159170696164801,
"grad_norm": 0.2687942087650299,
"learning_rate": 0.00019768175681348456,
"loss": 1.876,
"step": 700
},
{
"epoch": 0.013247665099026296,
"grad_norm": 0.2722982168197632,
"learning_rate": 0.00019735056502147698,
"loss": 1.9004,
"step": 800
},
{
"epoch": 0.014903623236404583,
"grad_norm": 0.25342944264411926,
"learning_rate": 0.0001970193732294694,
"loss": 1.8947,
"step": 900
},
{
"epoch": 0.01655958137378287,
"grad_norm": 0.2900806963443756,
"learning_rate": 0.0001966881814374618,
"loss": 1.8795,
"step": 1000
},
{
"epoch": 0.018215539511161158,
"grad_norm": 0.24855603277683258,
"learning_rate": 0.00019635698964545422,
"loss": 1.8657,
"step": 1100
},
{
"epoch": 0.019871497648539446,
"grad_norm": 0.25272709131240845,
"learning_rate": 0.00019602579785344665,
"loss": 1.8687,
"step": 1200
},
{
"epoch": 0.02152745578591773,
"grad_norm": 0.31408464908599854,
"learning_rate": 0.00019569460606143904,
"loss": 1.8332,
"step": 1300
},
{
"epoch": 0.02318341392329602,
"grad_norm": 0.26880863308906555,
"learning_rate": 0.00019536341426943147,
"loss": 1.8603,
"step": 1400
},
{
"epoch": 0.024839372060674308,
"grad_norm": 0.2371913194656372,
"learning_rate": 0.0001950322224774239,
"loss": 1.8273,
"step": 1500
},
{
"epoch": 0.026495330198052593,
"grad_norm": 0.2510370910167694,
"learning_rate": 0.00019470103068541632,
"loss": 1.8524,
"step": 1600
},
{
"epoch": 0.02815128833543088,
"grad_norm": 0.26143962144851685,
"learning_rate": 0.00019436983889340874,
"loss": 1.8543,
"step": 1700
},
{
"epoch": 0.029807246472809166,
"grad_norm": 0.2438499480485916,
"learning_rate": 0.00019403864710140116,
"loss": 1.8411,
"step": 1800
},
{
"epoch": 0.03146320461018746,
"grad_norm": 0.2666601836681366,
"learning_rate": 0.0001937074553093936,
"loss": 1.8548,
"step": 1900
},
{
"epoch": 0.03311916274756574,
"grad_norm": 0.2752065062522888,
"learning_rate": 0.000193376263517386,
"loss": 1.8534,
"step": 2000
},
{
"epoch": 0.03477512088494403,
"grad_norm": 0.24849963188171387,
"learning_rate": 0.00019304507172537844,
"loss": 1.8476,
"step": 2100
},
{
"epoch": 0.036431079022322316,
"grad_norm": 0.2809307277202606,
"learning_rate": 0.00019271387993337083,
"loss": 1.8505,
"step": 2200
},
{
"epoch": 0.038087037159700604,
"grad_norm": 0.23209506273269653,
"learning_rate": 0.00019238268814136326,
"loss": 1.864,
"step": 2300
},
{
"epoch": 0.03974299529707889,
"grad_norm": 0.25108611583709717,
"learning_rate": 0.00019205149634935568,
"loss": 1.8411,
"step": 2400
},
{
"epoch": 0.041398953434457174,
"grad_norm": 0.2639986276626587,
"learning_rate": 0.0001917203045573481,
"loss": 1.8456,
"step": 2500
},
{
"epoch": 0.04305491157183546,
"grad_norm": 0.2927249073982239,
"learning_rate": 0.00019138911276534053,
"loss": 1.8361,
"step": 2600
},
{
"epoch": 0.04471086970921375,
"grad_norm": 0.2660035192966461,
"learning_rate": 0.00019105792097333295,
"loss": 1.8352,
"step": 2700
},
{
"epoch": 0.04636682784659204,
"grad_norm": 0.23683211207389832,
"learning_rate": 0.00019072672918132538,
"loss": 1.824,
"step": 2800
},
{
"epoch": 0.04802278598397033,
"grad_norm": 0.7001804709434509,
"learning_rate": 0.00019039553738931777,
"loss": 1.82,
"step": 2900
},
{
"epoch": 0.049678744121348616,
"grad_norm": 0.2500315010547638,
"learning_rate": 0.0001900643455973102,
"loss": 1.862,
"step": 3000
},
{
"epoch": 0.0513347022587269,
"grad_norm": 0.2476750761270523,
"learning_rate": 0.00018973315380530262,
"loss": 1.8247,
"step": 3100
},
{
"epoch": 0.052990660396105185,
"grad_norm": 0.23064357042312622,
"learning_rate": 0.00018940196201329502,
"loss": 1.8685,
"step": 3200
},
{
"epoch": 0.054646618533483474,
"grad_norm": 0.2495209276676178,
"learning_rate": 0.00018907077022128744,
"loss": 1.8214,
"step": 3300
},
{
"epoch": 0.05630257667086176,
"grad_norm": 0.25310614705085754,
"learning_rate": 0.00018873957842927987,
"loss": 1.84,
"step": 3400
},
{
"epoch": 0.05795853480824005,
"grad_norm": 0.24329665303230286,
"learning_rate": 0.0001884083866372723,
"loss": 1.7982,
"step": 3500
},
{
"epoch": 0.05961449294561833,
"grad_norm": 0.25951218605041504,
"learning_rate": 0.00018807719484526471,
"loss": 1.8078,
"step": 3600
},
{
"epoch": 0.06127045108299662,
"grad_norm": 0.24307559430599213,
"learning_rate": 0.00018774600305325714,
"loss": 1.8181,
"step": 3700
},
{
"epoch": 0.06292640922037492,
"grad_norm": 0.27577558159828186,
"learning_rate": 0.00018741481126124956,
"loss": 1.8158,
"step": 3800
},
{
"epoch": 0.06458236735775319,
"grad_norm": 0.21584127843379974,
"learning_rate": 0.00018708361946924199,
"loss": 1.8362,
"step": 3900
},
{
"epoch": 0.06623832549513148,
"grad_norm": 0.2558760344982147,
"learning_rate": 0.0001867524276772344,
"loss": 1.8425,
"step": 4000
},
{
"epoch": 0.06789428363250977,
"grad_norm": 0.2206682711839676,
"learning_rate": 0.0001864212358852268,
"loss": 1.8155,
"step": 4100
},
{
"epoch": 0.06955024176988805,
"grad_norm": 0.23946842551231384,
"learning_rate": 0.00018609004409321923,
"loss": 1.8349,
"step": 4200
},
{
"epoch": 0.07120619990726634,
"grad_norm": 0.22356823086738586,
"learning_rate": 0.00018575885230121165,
"loss": 1.8013,
"step": 4300
},
{
"epoch": 0.07286215804464463,
"grad_norm": 0.28327444195747375,
"learning_rate": 0.00018542766050920408,
"loss": 1.8193,
"step": 4400
},
{
"epoch": 0.07451811618202292,
"grad_norm": 0.259748637676239,
"learning_rate": 0.0001850964687171965,
"loss": 1.8401,
"step": 4500
},
{
"epoch": 0.07617407431940121,
"grad_norm": 0.24509303271770477,
"learning_rate": 0.00018476527692518893,
"loss": 1.8313,
"step": 4600
},
{
"epoch": 0.0778300324567795,
"grad_norm": 0.2799519896507263,
"learning_rate": 0.00018443408513318132,
"loss": 1.8437,
"step": 4700
},
{
"epoch": 0.07948599059415778,
"grad_norm": 0.25356101989746094,
"learning_rate": 0.00018410289334117375,
"loss": 1.7989,
"step": 4800
}
],
"logging_steps": 100,
"max_steps": 60388,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 800,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 3.318952218329088e+18,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}