|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 10.0, |
|
"eval_steps": 1000, |
|
"global_step": 250, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 5.49031856887175, |
|
"learning_rate": 4.0000000000000003e-07, |
|
"logits/chosen": -1.73323655128479, |
|
"logits/rejected": -1.963712453842163, |
|
"logps/chosen": -64.71795654296875, |
|
"logps/rejected": -92.56527709960938, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 1.9493782971816778, |
|
"learning_rate": 4.000000000000001e-06, |
|
"logits/chosen": -1.7276591062545776, |
|
"logits/rejected": -1.90531325340271, |
|
"logps/chosen": -72.89968872070312, |
|
"logps/rejected": -117.47108459472656, |
|
"loss": 0.6205, |
|
"rewards/accuracies": 0.6180555820465088, |
|
"rewards/chosen": -0.09507845342159271, |
|
"rewards/margins": 0.2017170786857605, |
|
"rewards/rejected": -0.296795517206192, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 3.7554212607527964, |
|
"learning_rate": 8.000000000000001e-06, |
|
"logits/chosen": -1.4812278747558594, |
|
"logits/rejected": -1.7664066553115845, |
|
"logps/chosen": -64.52765655517578, |
|
"logps/rejected": -272.7065734863281, |
|
"loss": 0.3953, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -0.0347316637635231, |
|
"rewards/margins": 1.7828114032745361, |
|
"rewards/rejected": -1.8175432682037354, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"grad_norm": 6.377266655785215, |
|
"learning_rate": 9.987820251299121e-06, |
|
"logits/chosen": -1.6151340007781982, |
|
"logits/rejected": -1.9460217952728271, |
|
"logps/chosen": -143.10971069335938, |
|
"logps/rejected": -631.4593505859375, |
|
"loss": 0.1279, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/chosen": -0.8146063685417175, |
|
"rewards/margins": 4.596449851989746, |
|
"rewards/rejected": -5.4110565185546875, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"grad_norm": 0.5750895192621723, |
|
"learning_rate": 9.890738003669029e-06, |
|
"logits/chosen": -1.6781848669052124, |
|
"logits/rejected": -2.1725101470947266, |
|
"logps/chosen": -153.36740112304688, |
|
"logps/rejected": -981.5718994140625, |
|
"loss": 0.0112, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.9592069387435913, |
|
"rewards/margins": 7.915135383605957, |
|
"rewards/rejected": -8.87434196472168, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 15.42824961576695, |
|
"learning_rate": 9.698463103929542e-06, |
|
"logits/chosen": -2.032032012939453, |
|
"logits/rejected": -2.422545909881592, |
|
"logps/chosen": -291.60833740234375, |
|
"logps/rejected": -1471.12451171875, |
|
"loss": 0.0265, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -2.294036388397217, |
|
"rewards/margins": 11.53666877746582, |
|
"rewards/rejected": -13.830705642700195, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"grad_norm": 0.04134383974122578, |
|
"learning_rate": 9.414737964294636e-06, |
|
"logits/chosen": -1.654057264328003, |
|
"logits/rejected": -2.1446919441223145, |
|
"logps/chosen": -305.9764709472656, |
|
"logps/rejected": -1340.496826171875, |
|
"loss": 0.0101, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.476349353790283, |
|
"rewards/margins": 9.981760025024414, |
|
"rewards/rejected": -12.458109855651855, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"grad_norm": 0.08981946731483229, |
|
"learning_rate": 9.045084971874738e-06, |
|
"logits/chosen": -1.8004175424575806, |
|
"logits/rejected": -2.220939874649048, |
|
"logps/chosen": -351.9838562011719, |
|
"logps/rejected": -1534.653564453125, |
|
"loss": 0.0004, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.8868424892425537, |
|
"rewards/margins": 11.563664436340332, |
|
"rewards/rejected": -14.450506210327148, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"grad_norm": 0.004562484598212617, |
|
"learning_rate": 8.596699001693257e-06, |
|
"logits/chosen": -1.955934762954712, |
|
"logits/rejected": -2.4210100173950195, |
|
"logps/chosen": -273.1143493652344, |
|
"logps/rejected": -1595.3917236328125, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.1271727085113525, |
|
"rewards/margins": 12.920974731445312, |
|
"rewards/rejected": -15.048149108886719, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"grad_norm": 0.1380036178242026, |
|
"learning_rate": 8.078307376628292e-06, |
|
"logits/chosen": -2.363739252090454, |
|
"logits/rejected": -2.916693925857544, |
|
"logps/chosen": -290.6210021972656, |
|
"logps/rejected": -1820.208251953125, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.2858262062072754, |
|
"rewards/margins": 15.026013374328613, |
|
"rewards/rejected": -17.311840057373047, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 0.027548373376636768, |
|
"learning_rate": 7.500000000000001e-06, |
|
"logits/chosen": -1.8597825765609741, |
|
"logits/rejected": -2.5159268379211426, |
|
"logps/chosen": -259.3269348144531, |
|
"logps/rejected": -1694.156005859375, |
|
"loss": 0.0036, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.9733333587646484, |
|
"rewards/margins": 14.052156448364258, |
|
"rewards/rejected": -16.025489807128906, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"grad_norm": 0.002852087112507335, |
|
"learning_rate": 6.873032967079562e-06, |
|
"logits/chosen": -1.1590913534164429, |
|
"logits/rejected": -1.9738283157348633, |
|
"logps/chosen": -243.7049560546875, |
|
"logps/rejected": -1585.557373046875, |
|
"loss": 0.0008, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.8130344152450562, |
|
"rewards/margins": 13.158452033996582, |
|
"rewards/rejected": -14.97148609161377, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"grad_norm": 0.050302608930815555, |
|
"learning_rate": 6.209609477998339e-06, |
|
"logits/chosen": -1.1821445226669312, |
|
"logits/rejected": -2.0527586936950684, |
|
"logps/chosen": -265.8364562988281, |
|
"logps/rejected": -1697.0533447265625, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.049001693725586, |
|
"rewards/margins": 14.01880168914795, |
|
"rewards/rejected": -16.06780242919922, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 5.2, |
|
"grad_norm": 0.002082230483938349, |
|
"learning_rate": 5.522642316338268e-06, |
|
"logits/chosen": -1.4268571138381958, |
|
"logits/rejected": -2.196958303451538, |
|
"logps/chosen": -261.50567626953125, |
|
"logps/rejected": -1654.785888671875, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.0303101539611816, |
|
"rewards/margins": 13.576850891113281, |
|
"rewards/rejected": -15.607162475585938, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 5.6, |
|
"grad_norm": 0.0019893945992160438, |
|
"learning_rate": 4.825502516487497e-06, |
|
"logits/chosen": -1.7991822957992554, |
|
"logits/rejected": -2.550363063812256, |
|
"logps/chosen": -319.41717529296875, |
|
"logps/rejected": -1847.844970703125, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.583193302154541, |
|
"rewards/margins": 14.985641479492188, |
|
"rewards/rejected": -17.568836212158203, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 0.0015741333192918123, |
|
"learning_rate": 4.131759111665349e-06, |
|
"logits/chosen": -1.9366722106933594, |
|
"logits/rejected": -2.7011678218841553, |
|
"logps/chosen": -317.91302490234375, |
|
"logps/rejected": -1850.040771484375, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.5857417583465576, |
|
"rewards/margins": 15.00117015838623, |
|
"rewards/rejected": -17.586910247802734, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"grad_norm": 0.0011046666078130404, |
|
"learning_rate": 3.4549150281252635e-06, |
|
"logits/chosen": -1.9223568439483643, |
|
"logits/rejected": -2.68572735786438, |
|
"logps/chosen": -357.874755859375, |
|
"logps/rejected": -2010.9976806640625, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.9552831649780273, |
|
"rewards/margins": 16.265609741210938, |
|
"rewards/rejected": -19.22089195251465, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 6.8, |
|
"grad_norm": 0.0008850683483477801, |
|
"learning_rate": 2.8081442660546126e-06, |
|
"logits/chosen": -1.939117431640625, |
|
"logits/rejected": -2.6780524253845215, |
|
"logps/chosen": -341.6114196777344, |
|
"logps/rejected": -1941.843017578125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.7991585731506348, |
|
"rewards/margins": 15.720464706420898, |
|
"rewards/rejected": -18.519622802734375, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 7.2, |
|
"grad_norm": 0.0006990799912761066, |
|
"learning_rate": 2.204035482646267e-06, |
|
"logits/chosen": -2.032839298248291, |
|
"logits/rejected": -2.8106019496917725, |
|
"logps/chosen": -401.89849853515625, |
|
"logps/rejected": -2151.74072265625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.3739535808563232, |
|
"rewards/margins": 17.275264739990234, |
|
"rewards/rejected": -20.649219512939453, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 7.6, |
|
"grad_norm": 0.0009810237591607713, |
|
"learning_rate": 1.6543469682057105e-06, |
|
"logits/chosen": -2.005420446395874, |
|
"logits/rejected": -2.788722038269043, |
|
"logps/chosen": -371.88824462890625, |
|
"logps/rejected": -2029.203125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.0968661308288574, |
|
"rewards/margins": 16.302059173583984, |
|
"rewards/rejected": -19.398929595947266, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 0.005038481428283606, |
|
"learning_rate": 1.1697777844051105e-06, |
|
"logits/chosen": -1.6506569385528564, |
|
"logits/rejected": -2.4032554626464844, |
|
"logps/chosen": -272.77264404296875, |
|
"logps/rejected": -1795.190185546875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.1385960578918457, |
|
"rewards/margins": 14.876760482788086, |
|
"rewards/rejected": -17.01535987854004, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 8.4, |
|
"grad_norm": 0.0007269378594182772, |
|
"learning_rate": 7.597595192178702e-07, |
|
"logits/chosen": -1.87616765499115, |
|
"logits/rejected": -2.6102375984191895, |
|
"logps/chosen": -321.3392639160156, |
|
"logps/rejected": -1848.580810546875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.6000113487243652, |
|
"rewards/margins": 14.976901054382324, |
|
"rewards/rejected": -17.576915740966797, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 8.8, |
|
"grad_norm": 0.0007139184876414333, |
|
"learning_rate": 4.322727117869951e-07, |
|
"logits/chosen": -1.9712779521942139, |
|
"logits/rejected": -2.749927043914795, |
|
"logps/chosen": -361.2076416015625, |
|
"logps/rejected": -1984.336669921875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.9964027404785156, |
|
"rewards/margins": 15.949694633483887, |
|
"rewards/rejected": -18.946096420288086, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 9.2, |
|
"grad_norm": 0.0009109490304217519, |
|
"learning_rate": 1.9369152030840553e-07, |
|
"logits/chosen": -1.8861472606658936, |
|
"logits/rejected": -2.6229307651519775, |
|
"logps/chosen": -340.4961853027344, |
|
"logps/rejected": -1894.4420166015625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.806462287902832, |
|
"rewards/margins": 15.230853080749512, |
|
"rewards/rejected": -18.037317276000977, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 9.6, |
|
"grad_norm": 0.0007160536583349626, |
|
"learning_rate": 4.865965629214819e-08, |
|
"logits/chosen": -1.8993374109268188, |
|
"logits/rejected": -2.6610684394836426, |
|
"logps/chosen": -320.7115783691406, |
|
"logps/rejected": -1872.178466796875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.6092982292175293, |
|
"rewards/margins": 15.210273742675781, |
|
"rewards/rejected": -17.81957244873047, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 0.0006100741232287624, |
|
"learning_rate": 0.0, |
|
"logits/chosen": -1.9146867990493774, |
|
"logits/rejected": -2.670241117477417, |
|
"logps/chosen": -317.877685546875, |
|
"logps/rejected": -1852.078857421875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.5683541297912598, |
|
"rewards/margins": 15.033134460449219, |
|
"rewards/rejected": -17.60148811340332, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 250, |
|
"total_flos": 0.0, |
|
"train_loss": 0.048202857348136605, |
|
"train_runtime": 22140.6758, |
|
"train_samples_per_second": 0.723, |
|
"train_steps_per_second": 0.011 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 250, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 1000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|