QinLiuNLP's picture
Model save
3718184 verified
raw
history blame
14 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 10.0,
"eval_steps": 1000,
"global_step": 250,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.04,
"grad_norm": 5.49031856887175,
"learning_rate": 4.0000000000000003e-07,
"logits/chosen": -1.73323655128479,
"logits/rejected": -1.963712453842163,
"logps/chosen": -64.71795654296875,
"logps/rejected": -92.56527709960938,
"loss": 0.6931,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.4,
"grad_norm": 1.9493782971816778,
"learning_rate": 4.000000000000001e-06,
"logits/chosen": -1.7276591062545776,
"logits/rejected": -1.90531325340271,
"logps/chosen": -72.89968872070312,
"logps/rejected": -117.47108459472656,
"loss": 0.6205,
"rewards/accuracies": 0.6180555820465088,
"rewards/chosen": -0.09507845342159271,
"rewards/margins": 0.2017170786857605,
"rewards/rejected": -0.296795517206192,
"step": 10
},
{
"epoch": 0.8,
"grad_norm": 3.7554212607527964,
"learning_rate": 8.000000000000001e-06,
"logits/chosen": -1.4812278747558594,
"logits/rejected": -1.7664066553115845,
"logps/chosen": -64.52765655517578,
"logps/rejected": -272.7065734863281,
"loss": 0.3953,
"rewards/accuracies": 0.8374999761581421,
"rewards/chosen": -0.0347316637635231,
"rewards/margins": 1.7828114032745361,
"rewards/rejected": -1.8175432682037354,
"step": 20
},
{
"epoch": 1.2,
"grad_norm": 6.377266655785215,
"learning_rate": 9.987820251299121e-06,
"logits/chosen": -1.6151340007781982,
"logits/rejected": -1.9460217952728271,
"logps/chosen": -143.10971069335938,
"logps/rejected": -631.4593505859375,
"loss": 0.1279,
"rewards/accuracies": 0.981249988079071,
"rewards/chosen": -0.8146063685417175,
"rewards/margins": 4.596449851989746,
"rewards/rejected": -5.4110565185546875,
"step": 30
},
{
"epoch": 1.6,
"grad_norm": 0.5750895192621723,
"learning_rate": 9.890738003669029e-06,
"logits/chosen": -1.6781848669052124,
"logits/rejected": -2.1725101470947266,
"logps/chosen": -153.36740112304688,
"logps/rejected": -981.5718994140625,
"loss": 0.0112,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.9592069387435913,
"rewards/margins": 7.915135383605957,
"rewards/rejected": -8.87434196472168,
"step": 40
},
{
"epoch": 2.0,
"grad_norm": 15.42824961576695,
"learning_rate": 9.698463103929542e-06,
"logits/chosen": -2.032032012939453,
"logits/rejected": -2.422545909881592,
"logps/chosen": -291.60833740234375,
"logps/rejected": -1471.12451171875,
"loss": 0.0265,
"rewards/accuracies": 0.9937499761581421,
"rewards/chosen": -2.294036388397217,
"rewards/margins": 11.53666877746582,
"rewards/rejected": -13.830705642700195,
"step": 50
},
{
"epoch": 2.4,
"grad_norm": 0.04134383974122578,
"learning_rate": 9.414737964294636e-06,
"logits/chosen": -1.654057264328003,
"logits/rejected": -2.1446919441223145,
"logps/chosen": -305.9764709472656,
"logps/rejected": -1340.496826171875,
"loss": 0.0101,
"rewards/accuracies": 1.0,
"rewards/chosen": -2.476349353790283,
"rewards/margins": 9.981760025024414,
"rewards/rejected": -12.458109855651855,
"step": 60
},
{
"epoch": 2.8,
"grad_norm": 0.08981946731483229,
"learning_rate": 9.045084971874738e-06,
"logits/chosen": -1.8004175424575806,
"logits/rejected": -2.220939874649048,
"logps/chosen": -351.9838562011719,
"logps/rejected": -1534.653564453125,
"loss": 0.0004,
"rewards/accuracies": 1.0,
"rewards/chosen": -2.8868424892425537,
"rewards/margins": 11.563664436340332,
"rewards/rejected": -14.450506210327148,
"step": 70
},
{
"epoch": 3.2,
"grad_norm": 0.004562484598212617,
"learning_rate": 8.596699001693257e-06,
"logits/chosen": -1.955934762954712,
"logits/rejected": -2.4210100173950195,
"logps/chosen": -273.1143493652344,
"logps/rejected": -1595.3917236328125,
"loss": 0.0003,
"rewards/accuracies": 1.0,
"rewards/chosen": -2.1271727085113525,
"rewards/margins": 12.920974731445312,
"rewards/rejected": -15.048149108886719,
"step": 80
},
{
"epoch": 3.6,
"grad_norm": 0.1380036178242026,
"learning_rate": 8.078307376628292e-06,
"logits/chosen": -2.363739252090454,
"logits/rejected": -2.916693925857544,
"logps/chosen": -290.6210021972656,
"logps/rejected": -1820.208251953125,
"loss": 0.0001,
"rewards/accuracies": 1.0,
"rewards/chosen": -2.2858262062072754,
"rewards/margins": 15.026013374328613,
"rewards/rejected": -17.311840057373047,
"step": 90
},
{
"epoch": 4.0,
"grad_norm": 0.027548373376636768,
"learning_rate": 7.500000000000001e-06,
"logits/chosen": -1.8597825765609741,
"logits/rejected": -2.5159268379211426,
"logps/chosen": -259.3269348144531,
"logps/rejected": -1694.156005859375,
"loss": 0.0036,
"rewards/accuracies": 1.0,
"rewards/chosen": -1.9733333587646484,
"rewards/margins": 14.052156448364258,
"rewards/rejected": -16.025489807128906,
"step": 100
},
{
"epoch": 4.4,
"grad_norm": 0.002852087112507335,
"learning_rate": 6.873032967079562e-06,
"logits/chosen": -1.1590913534164429,
"logits/rejected": -1.9738283157348633,
"logps/chosen": -243.7049560546875,
"logps/rejected": -1585.557373046875,
"loss": 0.0008,
"rewards/accuracies": 1.0,
"rewards/chosen": -1.8130344152450562,
"rewards/margins": 13.158452033996582,
"rewards/rejected": -14.97148609161377,
"step": 110
},
{
"epoch": 4.8,
"grad_norm": 0.050302608930815555,
"learning_rate": 6.209609477998339e-06,
"logits/chosen": -1.1821445226669312,
"logits/rejected": -2.0527586936950684,
"logps/chosen": -265.8364562988281,
"logps/rejected": -1697.0533447265625,
"loss": 0.0002,
"rewards/accuracies": 1.0,
"rewards/chosen": -2.049001693725586,
"rewards/margins": 14.01880168914795,
"rewards/rejected": -16.06780242919922,
"step": 120
},
{
"epoch": 5.2,
"grad_norm": 0.002082230483938349,
"learning_rate": 5.522642316338268e-06,
"logits/chosen": -1.4268571138381958,
"logits/rejected": -2.196958303451538,
"logps/chosen": -261.50567626953125,
"logps/rejected": -1654.785888671875,
"loss": 0.0002,
"rewards/accuracies": 1.0,
"rewards/chosen": -2.0303101539611816,
"rewards/margins": 13.576850891113281,
"rewards/rejected": -15.607162475585938,
"step": 130
},
{
"epoch": 5.6,
"grad_norm": 0.0019893945992160438,
"learning_rate": 4.825502516487497e-06,
"logits/chosen": -1.7991822957992554,
"logits/rejected": -2.550363063812256,
"logps/chosen": -319.41717529296875,
"logps/rejected": -1847.844970703125,
"loss": 0.0001,
"rewards/accuracies": 1.0,
"rewards/chosen": -2.583193302154541,
"rewards/margins": 14.985641479492188,
"rewards/rejected": -17.568836212158203,
"step": 140
},
{
"epoch": 6.0,
"grad_norm": 0.0015741333192918123,
"learning_rate": 4.131759111665349e-06,
"logits/chosen": -1.9366722106933594,
"logits/rejected": -2.7011678218841553,
"logps/chosen": -317.91302490234375,
"logps/rejected": -1850.040771484375,
"loss": 0.0001,
"rewards/accuracies": 1.0,
"rewards/chosen": -2.5857417583465576,
"rewards/margins": 15.00117015838623,
"rewards/rejected": -17.586910247802734,
"step": 150
},
{
"epoch": 6.4,
"grad_norm": 0.0011046666078130404,
"learning_rate": 3.4549150281252635e-06,
"logits/chosen": -1.9223568439483643,
"logits/rejected": -2.68572735786438,
"logps/chosen": -357.874755859375,
"logps/rejected": -2010.9976806640625,
"loss": 0.0001,
"rewards/accuracies": 1.0,
"rewards/chosen": -2.9552831649780273,
"rewards/margins": 16.265609741210938,
"rewards/rejected": -19.22089195251465,
"step": 160
},
{
"epoch": 6.8,
"grad_norm": 0.0008850683483477801,
"learning_rate": 2.8081442660546126e-06,
"logits/chosen": -1.939117431640625,
"logits/rejected": -2.6780524253845215,
"logps/chosen": -341.6114196777344,
"logps/rejected": -1941.843017578125,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -2.7991585731506348,
"rewards/margins": 15.720464706420898,
"rewards/rejected": -18.519622802734375,
"step": 170
},
{
"epoch": 7.2,
"grad_norm": 0.0006990799912761066,
"learning_rate": 2.204035482646267e-06,
"logits/chosen": -2.032839298248291,
"logits/rejected": -2.8106019496917725,
"logps/chosen": -401.89849853515625,
"logps/rejected": -2151.74072265625,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -3.3739535808563232,
"rewards/margins": 17.275264739990234,
"rewards/rejected": -20.649219512939453,
"step": 180
},
{
"epoch": 7.6,
"grad_norm": 0.0009810237591607713,
"learning_rate": 1.6543469682057105e-06,
"logits/chosen": -2.005420446395874,
"logits/rejected": -2.788722038269043,
"logps/chosen": -371.88824462890625,
"logps/rejected": -2029.203125,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -3.0968661308288574,
"rewards/margins": 16.302059173583984,
"rewards/rejected": -19.398929595947266,
"step": 190
},
{
"epoch": 8.0,
"grad_norm": 0.005038481428283606,
"learning_rate": 1.1697777844051105e-06,
"logits/chosen": -1.6506569385528564,
"logits/rejected": -2.4032554626464844,
"logps/chosen": -272.77264404296875,
"logps/rejected": -1795.190185546875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -2.1385960578918457,
"rewards/margins": 14.876760482788086,
"rewards/rejected": -17.01535987854004,
"step": 200
},
{
"epoch": 8.4,
"grad_norm": 0.0007269378594182772,
"learning_rate": 7.597595192178702e-07,
"logits/chosen": -1.87616765499115,
"logits/rejected": -2.6102375984191895,
"logps/chosen": -321.3392639160156,
"logps/rejected": -1848.580810546875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -2.6000113487243652,
"rewards/margins": 14.976901054382324,
"rewards/rejected": -17.576915740966797,
"step": 210
},
{
"epoch": 8.8,
"grad_norm": 0.0007139184876414333,
"learning_rate": 4.322727117869951e-07,
"logits/chosen": -1.9712779521942139,
"logits/rejected": -2.749927043914795,
"logps/chosen": -361.2076416015625,
"logps/rejected": -1984.336669921875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -2.9964027404785156,
"rewards/margins": 15.949694633483887,
"rewards/rejected": -18.946096420288086,
"step": 220
},
{
"epoch": 9.2,
"grad_norm": 0.0009109490304217519,
"learning_rate": 1.9369152030840553e-07,
"logits/chosen": -1.8861472606658936,
"logits/rejected": -2.6229307651519775,
"logps/chosen": -340.4961853027344,
"logps/rejected": -1894.4420166015625,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -2.806462287902832,
"rewards/margins": 15.230853080749512,
"rewards/rejected": -18.037317276000977,
"step": 230
},
{
"epoch": 9.6,
"grad_norm": 0.0007160536583349626,
"learning_rate": 4.865965629214819e-08,
"logits/chosen": -1.8993374109268188,
"logits/rejected": -2.6610684394836426,
"logps/chosen": -320.7115783691406,
"logps/rejected": -1872.178466796875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -2.6092982292175293,
"rewards/margins": 15.210273742675781,
"rewards/rejected": -17.81957244873047,
"step": 240
},
{
"epoch": 10.0,
"grad_norm": 0.0006100741232287624,
"learning_rate": 0.0,
"logits/chosen": -1.9146867990493774,
"logits/rejected": -2.670241117477417,
"logps/chosen": -317.877685546875,
"logps/rejected": -1852.078857421875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -2.5683541297912598,
"rewards/margins": 15.033134460449219,
"rewards/rejected": -17.60148811340332,
"step": 250
},
{
"epoch": 10.0,
"step": 250,
"total_flos": 0.0,
"train_loss": 0.048202857348136605,
"train_runtime": 22140.6758,
"train_samples_per_second": 0.723,
"train_steps_per_second": 0.011
}
],
"logging_steps": 10,
"max_steps": 250,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 1000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}