|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9998286203941731, |
|
"eval_steps": 500, |
|
"global_step": 2917, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.7123287671232876e-08, |
|
"logits/chosen": -2.21498966217041, |
|
"logits/rejected": -1.5619134902954102, |
|
"logps/chosen": -448.18634033203125, |
|
"logps/rejected": -230.1645965576172, |
|
"loss": 0.1703, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.7123287671232878e-07, |
|
"logits/chosen": -1.9158155918121338, |
|
"logits/rejected": -1.947864055633545, |
|
"logps/chosen": -236.8881072998047, |
|
"logps/rejected": -271.3336181640625, |
|
"loss": 0.1345, |
|
"rewards/accuracies": 0.3888888955116272, |
|
"rewards/chosen": 1.0350075172027573e-05, |
|
"rewards/margins": -1.4042092516319826e-05, |
|
"rewards/rejected": 2.4392174964305013e-05, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.4246575342465755e-07, |
|
"logits/chosen": -1.9916515350341797, |
|
"logits/rejected": -1.7161877155303955, |
|
"logps/chosen": -181.11163330078125, |
|
"logps/rejected": -196.61138916015625, |
|
"loss": 0.1196, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 4.240129783283919e-06, |
|
"rewards/margins": 0.00010243832366541028, |
|
"rewards/rejected": -9.819817205425352e-05, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 5.136986301369864e-07, |
|
"logits/chosen": -1.9248673915863037, |
|
"logits/rejected": -1.8731294870376587, |
|
"logps/chosen": -227.29898071289062, |
|
"logps/rejected": -272.0036315917969, |
|
"loss": 0.1104, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.00023775253794156015, |
|
"rewards/margins": 0.00018605976947583258, |
|
"rewards/rejected": -0.00042381230741739273, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 6.849315068493151e-07, |
|
"logits/chosen": -1.9346742630004883, |
|
"logits/rejected": -1.6983258724212646, |
|
"logps/chosen": -284.8092346191406, |
|
"logps/rejected": -235.57955932617188, |
|
"loss": 0.103, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.00043622878729365766, |
|
"rewards/margins": 0.0012934322003275156, |
|
"rewards/rejected": -0.001729660900309682, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 8.561643835616439e-07, |
|
"logits/chosen": -1.997926950454712, |
|
"logits/rejected": -1.5733534097671509, |
|
"logps/chosen": -234.2779998779297, |
|
"logps/rejected": -186.01576232910156, |
|
"loss": 0.1645, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.004240738693624735, |
|
"rewards/margins": 0.0029823766089975834, |
|
"rewards/rejected": -0.007223114371299744, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.0273972602739727e-06, |
|
"logits/chosen": -1.8750112056732178, |
|
"logits/rejected": -1.5471408367156982, |
|
"logps/chosen": -226.1807098388672, |
|
"logps/rejected": -231.8741455078125, |
|
"loss": 0.1159, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.026738092303276062, |
|
"rewards/margins": 0.011812428012490273, |
|
"rewards/rejected": -0.038550518453121185, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.1986301369863014e-06, |
|
"logits/chosen": -1.8785406351089478, |
|
"logits/rejected": -1.6199915409088135, |
|
"logps/chosen": -309.3338623046875, |
|
"logps/rejected": -310.3027038574219, |
|
"loss": 0.1245, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.06837661564350128, |
|
"rewards/margins": 0.023886824026703835, |
|
"rewards/rejected": -0.09226343780755997, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.3698630136986302e-06, |
|
"logits/chosen": -1.8794485330581665, |
|
"logits/rejected": -1.588679552078247, |
|
"logps/chosen": -331.4400939941406, |
|
"logps/rejected": -316.6481018066406, |
|
"loss": 0.1391, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.09261558204889297, |
|
"rewards/margins": 0.05595500394701958, |
|
"rewards/rejected": -0.14857056736946106, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.541095890410959e-06, |
|
"logits/chosen": -1.73800790309906, |
|
"logits/rejected": -1.5066587924957275, |
|
"logps/chosen": -349.1307678222656, |
|
"logps/rejected": -445.1373596191406, |
|
"loss": 0.1099, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.1359734833240509, |
|
"rewards/margins": 0.0638502761721611, |
|
"rewards/rejected": -0.1998237520456314, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.7123287671232877e-06, |
|
"logits/chosen": -1.9925429821014404, |
|
"logits/rejected": -1.5489239692687988, |
|
"logps/chosen": -431.8665466308594, |
|
"logps/rejected": -449.634521484375, |
|
"loss": 0.124, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.14928413927555084, |
|
"rewards/margins": 0.0672868937253952, |
|
"rewards/rejected": -0.21657104790210724, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.8835616438356166e-06, |
|
"logits/chosen": -1.938245177268982, |
|
"logits/rejected": -1.5735212564468384, |
|
"logps/chosen": -445.81585693359375, |
|
"logps/rejected": -423.18975830078125, |
|
"loss": 0.1018, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.15499410033226013, |
|
"rewards/margins": 0.051616422832012177, |
|
"rewards/rejected": -0.2066105306148529, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.0547945205479454e-06, |
|
"logits/chosen": -1.8546117544174194, |
|
"logits/rejected": -1.6232588291168213, |
|
"logps/chosen": -401.47039794921875, |
|
"logps/rejected": -448.80926513671875, |
|
"loss": 0.0749, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.13759753108024597, |
|
"rewards/margins": 0.0838770940899849, |
|
"rewards/rejected": -0.22147460281848907, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.2260273972602743e-06, |
|
"logits/chosen": -1.7494646310806274, |
|
"logits/rejected": -1.6120729446411133, |
|
"logps/chosen": -393.23095703125, |
|
"logps/rejected": -414.7491760253906, |
|
"loss": 0.067, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.1571560651063919, |
|
"rewards/margins": 0.04211575910449028, |
|
"rewards/rejected": -0.19927182793617249, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.3972602739726027e-06, |
|
"logits/chosen": -1.8314697742462158, |
|
"logits/rejected": -1.4660162925720215, |
|
"logps/chosen": -343.4485778808594, |
|
"logps/rejected": -441.86578369140625, |
|
"loss": 0.1026, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.11567596346139908, |
|
"rewards/margins": 0.09308116137981415, |
|
"rewards/rejected": -0.20875711739063263, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.568493150684932e-06, |
|
"logits/chosen": -1.769061803817749, |
|
"logits/rejected": -1.5675675868988037, |
|
"logps/chosen": -377.51739501953125, |
|
"logps/rejected": -394.83465576171875, |
|
"loss": 0.0902, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.15446873009204865, |
|
"rewards/margins": 0.04394759237766266, |
|
"rewards/rejected": -0.1984163224697113, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.7397260273972604e-06, |
|
"logits/chosen": -1.7472827434539795, |
|
"logits/rejected": -1.5711588859558105, |
|
"logps/chosen": -377.2109069824219, |
|
"logps/rejected": -495.3035583496094, |
|
"loss": 0.0971, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.1695915013551712, |
|
"rewards/margins": 0.08974287658929825, |
|
"rewards/rejected": -0.25933438539505005, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.9109589041095893e-06, |
|
"logits/chosen": -1.8495628833770752, |
|
"logits/rejected": -1.5801998376846313, |
|
"logps/chosen": -324.21337890625, |
|
"logps/rejected": -401.52374267578125, |
|
"loss": 0.1013, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.06990089267492294, |
|
"rewards/margins": 0.1042117103934288, |
|
"rewards/rejected": -0.17411258816719055, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.082191780821918e-06, |
|
"logits/chosen": -1.929395079612732, |
|
"logits/rejected": -1.5674490928649902, |
|
"logps/chosen": -332.16339111328125, |
|
"logps/rejected": -346.80450439453125, |
|
"loss": 0.087, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.08584436774253845, |
|
"rewards/margins": 0.055128611624240875, |
|
"rewards/rejected": -0.14097298681735992, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.253424657534247e-06, |
|
"logits/chosen": -2.0753486156463623, |
|
"logits/rejected": -1.6974204778671265, |
|
"logps/chosen": -361.5826721191406, |
|
"logps/rejected": -427.80426025390625, |
|
"loss": 0.0783, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.09311284124851227, |
|
"rewards/margins": 0.08312083035707474, |
|
"rewards/rejected": -0.1762336641550064, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.4246575342465754e-06, |
|
"logits/chosen": -2.206291675567627, |
|
"logits/rejected": -1.9346578121185303, |
|
"logps/chosen": -351.0352783203125, |
|
"logps/rejected": -328.7702941894531, |
|
"loss": 0.0932, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.09371860325336456, |
|
"rewards/margins": 0.030456852167844772, |
|
"rewards/rejected": -0.12417546659708023, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.5958904109589043e-06, |
|
"logits/chosen": -1.9507122039794922, |
|
"logits/rejected": -1.7119086980819702, |
|
"logps/chosen": -292.4835205078125, |
|
"logps/rejected": -361.983154296875, |
|
"loss": 0.0902, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.10735298693180084, |
|
"rewards/margins": 0.070334292948246, |
|
"rewards/rejected": -0.17768728733062744, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.767123287671233e-06, |
|
"logits/chosen": -1.8516258001327515, |
|
"logits/rejected": -1.7747443914413452, |
|
"logps/chosen": -324.3973388671875, |
|
"logps/rejected": -373.698486328125, |
|
"loss": 0.1124, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -0.1862211674451828, |
|
"rewards/margins": 0.040039997547864914, |
|
"rewards/rejected": -0.226261168718338, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.938356164383562e-06, |
|
"logits/chosen": -2.0984139442443848, |
|
"logits/rejected": -1.8558800220489502, |
|
"logps/chosen": -437.14459228515625, |
|
"logps/rejected": -443.85089111328125, |
|
"loss": 0.0886, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.15925905108451843, |
|
"rewards/margins": 0.06432916969060898, |
|
"rewards/rejected": -0.223588228225708, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.109589041095891e-06, |
|
"logits/chosen": -1.8379871845245361, |
|
"logits/rejected": -1.6939672231674194, |
|
"logps/chosen": -351.00531005859375, |
|
"logps/rejected": -395.9615783691406, |
|
"loss": 0.0919, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.1394365429878235, |
|
"rewards/margins": 0.06487082690000534, |
|
"rewards/rejected": -0.20430736243724823, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.28082191780822e-06, |
|
"logits/chosen": -2.0115137100219727, |
|
"logits/rejected": -1.811408281326294, |
|
"logps/chosen": -411.9342346191406, |
|
"logps/rejected": -407.1709289550781, |
|
"loss": 0.085, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.115334153175354, |
|
"rewards/margins": 0.04877592995762825, |
|
"rewards/rejected": -0.16411006450653076, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.4520547945205486e-06, |
|
"logits/chosen": -1.917514443397522, |
|
"logits/rejected": -1.6930125951766968, |
|
"logps/chosen": -409.4349365234375, |
|
"logps/rejected": -449.8536071777344, |
|
"loss": 0.0983, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.15566658973693848, |
|
"rewards/margins": 0.08868524432182312, |
|
"rewards/rejected": -0.24435186386108398, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.6232876712328774e-06, |
|
"logits/chosen": -1.9329277276992798, |
|
"logits/rejected": -1.6485719680786133, |
|
"logps/chosen": -373.9496765136719, |
|
"logps/rejected": -461.1018981933594, |
|
"loss": 0.0747, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.17361843585968018, |
|
"rewards/margins": 0.12690795958042145, |
|
"rewards/rejected": -0.30052638053894043, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.7945205479452054e-06, |
|
"logits/chosen": -2.2051990032196045, |
|
"logits/rejected": -1.7319234609603882, |
|
"logps/chosen": -330.0890197753906, |
|
"logps/rejected": -398.8971252441406, |
|
"loss": 0.1105, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.09975247085094452, |
|
"rewards/margins": 0.12828990817070007, |
|
"rewards/rejected": -0.2280423939228058, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.965753424657534e-06, |
|
"logits/chosen": -2.15606427192688, |
|
"logits/rejected": -2.102695941925049, |
|
"logps/chosen": -421.451171875, |
|
"logps/rejected": -486.7090759277344, |
|
"loss": 0.0951, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.15245939791202545, |
|
"rewards/margins": 0.05438145995140076, |
|
"rewards/rejected": -0.20684084296226501, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.99988541499203e-06, |
|
"logits/chosen": -1.9494987726211548, |
|
"logits/rejected": -1.7490851879119873, |
|
"logps/chosen": -476.6026916503906, |
|
"logps/rejected": -605.9563598632812, |
|
"loss": 0.1238, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.25119680166244507, |
|
"rewards/margins": 0.1370239108800888, |
|
"rewards/rejected": -0.38822072744369507, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.999419931399174e-06, |
|
"logits/chosen": -1.973769187927246, |
|
"logits/rejected": -2.039909839630127, |
|
"logps/chosen": -482.8154296875, |
|
"logps/rejected": -553.8546142578125, |
|
"loss": 0.0801, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.27585893869400024, |
|
"rewards/margins": 0.04923711344599724, |
|
"rewards/rejected": -0.325096070766449, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.998596454278661e-06, |
|
"logits/chosen": -2.062340021133423, |
|
"logits/rejected": -1.94599187374115, |
|
"logps/chosen": -504.60546875, |
|
"logps/rejected": -554.1131591796875, |
|
"loss": 0.0724, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.24708731472492218, |
|
"rewards/margins": 0.06553421169519424, |
|
"rewards/rejected": -0.31262150406837463, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.99741510157765e-06, |
|
"logits/chosen": -1.9648933410644531, |
|
"logits/rejected": -1.780860185623169, |
|
"logps/chosen": -620.1107788085938, |
|
"logps/rejected": -667.7522583007812, |
|
"loss": 0.1007, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.313930869102478, |
|
"rewards/margins": 0.06759389489889145, |
|
"rewards/rejected": -0.3815247416496277, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.995876042502048e-06, |
|
"logits/chosen": -2.1744627952575684, |
|
"logits/rejected": -1.8270336389541626, |
|
"logps/chosen": -484.35101318359375, |
|
"logps/rejected": -553.4263305664062, |
|
"loss": 0.1147, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.2436065375804901, |
|
"rewards/margins": 0.08232472836971283, |
|
"rewards/rejected": -0.32593125104904175, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.993979497492282e-06, |
|
"logits/chosen": -1.914698600769043, |
|
"logits/rejected": -1.7681747674942017, |
|
"logps/chosen": -472.70794677734375, |
|
"logps/rejected": -629.3675537109375, |
|
"loss": 0.1157, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.2542678713798523, |
|
"rewards/margins": 0.09860087186098099, |
|
"rewards/rejected": -0.3528687059879303, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.9917257381917115e-06, |
|
"logits/chosen": -2.0197696685791016, |
|
"logits/rejected": -1.7743446826934814, |
|
"logps/chosen": -639.1318359375, |
|
"logps/rejected": -686.443603515625, |
|
"loss": 0.1203, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.3869578242301941, |
|
"rewards/margins": 0.06751660257577896, |
|
"rewards/rejected": -0.45447444915771484, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.989115087407737e-06, |
|
"logits/chosen": -1.904762864112854, |
|
"logits/rejected": -1.8776410818099976, |
|
"logps/chosen": -539.9891357421875, |
|
"logps/rejected": -601.7970581054688, |
|
"loss": 0.0625, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.3085237145423889, |
|
"rewards/margins": 0.08412410318851471, |
|
"rewards/rejected": -0.3926478326320648, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.986147919065551e-06, |
|
"logits/chosen": -1.876813530921936, |
|
"logits/rejected": -1.7952611446380615, |
|
"logps/chosen": -488.16107177734375, |
|
"logps/rejected": -583.83056640625, |
|
"loss": 0.0983, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.25909119844436646, |
|
"rewards/margins": 0.04833118990063667, |
|
"rewards/rejected": -0.30742236971855164, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.982824658154589e-06, |
|
"logits/chosen": -2.1078734397888184, |
|
"logits/rejected": -1.9783456325531006, |
|
"logps/chosen": -396.71630859375, |
|
"logps/rejected": -487.6923828125, |
|
"loss": 0.0596, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.23540160059928894, |
|
"rewards/margins": 0.09165269136428833, |
|
"rewards/rejected": -0.32705432176589966, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.979145780667652e-06, |
|
"logits/chosen": -2.0578982830047607, |
|
"logits/rejected": -1.7134244441986084, |
|
"logps/chosen": -535.1932983398438, |
|
"logps/rejected": -577.6029663085938, |
|
"loss": 0.0977, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.24049289524555206, |
|
"rewards/margins": 0.0864410474896431, |
|
"rewards/rejected": -0.32693392038345337, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.975111813532733e-06, |
|
"logits/chosen": -1.844091773033142, |
|
"logits/rejected": -1.5848596096038818, |
|
"logps/chosen": -569.6875, |
|
"logps/rejected": -624.45849609375, |
|
"loss": 0.0983, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.30232542753219604, |
|
"rewards/margins": 0.08498513698577881, |
|
"rewards/rejected": -0.38731056451797485, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.970723334537547e-06, |
|
"logits/chosen": -1.7672590017318726, |
|
"logits/rejected": -1.5241564512252808, |
|
"logps/chosen": -493.00628662109375, |
|
"logps/rejected": -542.5633544921875, |
|
"loss": 0.0683, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.2543202340602875, |
|
"rewards/margins": 0.0733615979552269, |
|
"rewards/rejected": -0.32768186926841736, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.965980972246767e-06, |
|
"logits/chosen": -1.9794318675994873, |
|
"logits/rejected": -1.9301669597625732, |
|
"logps/chosen": -460.9219665527344, |
|
"logps/rejected": -573.4908447265625, |
|
"loss": 0.1151, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.26725929975509644, |
|
"rewards/margins": 0.08439052850008011, |
|
"rewards/rejected": -0.3516498804092407, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.960885405912001e-06, |
|
"logits/chosen": -1.9738355875015259, |
|
"logits/rejected": -1.8872768878936768, |
|
"logps/chosen": -490.4803771972656, |
|
"logps/rejected": -552.944091796875, |
|
"loss": 0.1051, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.22316575050354004, |
|
"rewards/margins": 0.07666633278131485, |
|
"rewards/rejected": -0.2998320460319519, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.955437365374499e-06, |
|
"logits/chosen": -2.042649507522583, |
|
"logits/rejected": -1.833367943763733, |
|
"logps/chosen": -427.7418518066406, |
|
"logps/rejected": -428.240966796875, |
|
"loss": 0.0845, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.15850776433944702, |
|
"rewards/margins": 0.04146740958094597, |
|
"rewards/rejected": -0.1999751627445221, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.949637630960618e-06, |
|
"logits/chosen": -2.2080771923065186, |
|
"logits/rejected": -1.9125267267227173, |
|
"logps/chosen": -363.9242858886719, |
|
"logps/rejected": -413.38140869140625, |
|
"loss": 0.1212, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.151325061917305, |
|
"rewards/margins": 0.08935161679983139, |
|
"rewards/rejected": -0.2406766712665558, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.943487033370056e-06, |
|
"logits/chosen": -2.1318681240081787, |
|
"logits/rejected": -1.7986198663711548, |
|
"logps/chosen": -613.80712890625, |
|
"logps/rejected": -721.6338500976562, |
|
"loss": 0.0967, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.3193725049495697, |
|
"rewards/margins": 0.10743044316768646, |
|
"rewards/rejected": -0.42680296301841736, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.936986453556871e-06, |
|
"logits/chosen": -2.0002779960632324, |
|
"logits/rejected": -1.736196756362915, |
|
"logps/chosen": -580.8267211914062, |
|
"logps/rejected": -735.5391845703125, |
|
"loss": 0.0949, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.3693394362926483, |
|
"rewards/margins": 0.12292595952749252, |
|
"rewards/rejected": -0.49226540327072144, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.930136822603299e-06, |
|
"logits/chosen": -1.894122838973999, |
|
"logits/rejected": -1.7605243921279907, |
|
"logps/chosen": -661.7432250976562, |
|
"logps/rejected": -752.2081909179688, |
|
"loss": 0.0547, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.40788716077804565, |
|
"rewards/margins": 0.11583086103200912, |
|
"rewards/rejected": -0.5237180590629578, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.922939121586396e-06, |
|
"logits/chosen": -1.8540306091308594, |
|
"logits/rejected": -1.6888984441757202, |
|
"logps/chosen": -703.7432861328125, |
|
"logps/rejected": -763.1231689453125, |
|
"loss": 0.1083, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.4497678875923157, |
|
"rewards/margins": 0.08937375247478485, |
|
"rewards/rejected": -0.5391416549682617, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.915394381437517e-06, |
|
"logits/chosen": -2.064244508743286, |
|
"logits/rejected": -1.787021279335022, |
|
"logps/chosen": -432.42877197265625, |
|
"logps/rejected": -491.94720458984375, |
|
"loss": 0.1268, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.30147069692611694, |
|
"rewards/margins": 0.08128456771373749, |
|
"rewards/rejected": -0.3827553391456604, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.907503682794656e-06, |
|
"logits/chosen": -2.1678388118743896, |
|
"logits/rejected": -1.9495502710342407, |
|
"logps/chosen": -532.3627319335938, |
|
"logps/rejected": -569.6402587890625, |
|
"loss": 0.106, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.314094603061676, |
|
"rewards/margins": 0.050387926399707794, |
|
"rewards/rejected": -0.36448249220848083, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.899268155847667e-06, |
|
"logits/chosen": -1.9529002904891968, |
|
"logits/rejected": -1.7975317239761353, |
|
"logps/chosen": -348.3830871582031, |
|
"logps/rejected": -417.27947998046875, |
|
"loss": 0.0803, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.20450131595134735, |
|
"rewards/margins": 0.07301940768957138, |
|
"rewards/rejected": -0.2775207459926605, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.890688980176381e-06, |
|
"logits/chosen": -2.286426067352295, |
|
"logits/rejected": -1.9330307245254517, |
|
"logps/chosen": -511.94647216796875, |
|
"logps/rejected": -562.6546630859375, |
|
"loss": 0.0752, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.24677078425884247, |
|
"rewards/margins": 0.09590072929859161, |
|
"rewards/rejected": -0.34267157316207886, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.881767384581658e-06, |
|
"logits/chosen": -2.3378746509552, |
|
"logits/rejected": -2.0311076641082764, |
|
"logps/chosen": -499.9852600097656, |
|
"logps/rejected": -580.5631103515625, |
|
"loss": 0.0586, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.24438512325286865, |
|
"rewards/margins": 0.08834482729434967, |
|
"rewards/rejected": -0.3327299654483795, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.872504646909387e-06, |
|
"logits/chosen": -2.234276056289673, |
|
"logits/rejected": -1.9043071269989014, |
|
"logps/chosen": -550.0889282226562, |
|
"logps/rejected": -577.0806884765625, |
|
"loss": 0.0994, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.29700541496276855, |
|
"rewards/margins": 0.07648201286792755, |
|
"rewards/rejected": -0.3734873831272125, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.8629020938674536e-06, |
|
"logits/chosen": -2.295952558517456, |
|
"logits/rejected": -1.7963495254516602, |
|
"logps/chosen": -415.9605407714844, |
|
"logps/rejected": -460.894287109375, |
|
"loss": 0.0775, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.14038066565990448, |
|
"rewards/margins": 0.0807698667049408, |
|
"rewards/rejected": -0.22115054726600647, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.852961100835717e-06, |
|
"logits/chosen": -2.271327257156372, |
|
"logits/rejected": -1.7637627124786377, |
|
"logps/chosen": -500.87481689453125, |
|
"logps/rejected": -481.28228759765625, |
|
"loss": 0.1147, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.21432165801525116, |
|
"rewards/margins": 0.062083542346954346, |
|
"rewards/rejected": -0.2764051854610443, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.84268309166902e-06, |
|
"logits/chosen": -1.9748462438583374, |
|
"logits/rejected": -1.959495186805725, |
|
"logps/chosen": -416.51531982421875, |
|
"logps/rejected": -488.2625427246094, |
|
"loss": 0.0904, |
|
"rewards/accuracies": 0.42500001192092896, |
|
"rewards/chosen": -0.24128298461437225, |
|
"rewards/margins": 0.043503545224666595, |
|
"rewards/rejected": -0.28478652238845825, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.832069538493237e-06, |
|
"logits/chosen": -2.1981587409973145, |
|
"logits/rejected": -1.8468116521835327, |
|
"logps/chosen": -520.8303833007812, |
|
"logps/rejected": -545.0802001953125, |
|
"loss": 0.0926, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.259421169757843, |
|
"rewards/margins": 0.06593780219554901, |
|
"rewards/rejected": -0.3253589868545532, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.821121961494431e-06, |
|
"logits/chosen": -2.008756160736084, |
|
"logits/rejected": -1.8565582036972046, |
|
"logps/chosen": -487.271240234375, |
|
"logps/rejected": -622.4865112304688, |
|
"loss": 0.1233, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.26701903343200684, |
|
"rewards/margins": 0.1124114841222763, |
|
"rewards/rejected": -0.37943053245544434, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.80984192870111e-06, |
|
"logits/chosen": -2.2143161296844482, |
|
"logits/rejected": -2.0251948833465576, |
|
"logps/chosen": -475.0022888183594, |
|
"logps/rejected": -536.0418090820312, |
|
"loss": 0.1049, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.23833520710468292, |
|
"rewards/margins": 0.08123020827770233, |
|
"rewards/rejected": -0.31956541538238525, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.798231055759643e-06, |
|
"logits/chosen": -2.1074166297912598, |
|
"logits/rejected": -1.8376226425170898, |
|
"logps/chosen": -575.6253662109375, |
|
"logps/rejected": -653.4076538085938, |
|
"loss": 0.0738, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.31133222579956055, |
|
"rewards/margins": 0.125440776348114, |
|
"rewards/rejected": -0.43677300214767456, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.786291005702841e-06, |
|
"logits/chosen": -1.996763825416565, |
|
"logits/rejected": -1.8434244394302368, |
|
"logps/chosen": -673.5606689453125, |
|
"logps/rejected": -735.9039306640625, |
|
"loss": 0.0861, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.4302713871002197, |
|
"rewards/margins": 0.07868895679712296, |
|
"rewards/rejected": -0.5089603662490845, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.7740234887117745e-06, |
|
"logits/chosen": -2.1286087036132812, |
|
"logits/rejected": -2.0892319679260254, |
|
"logps/chosen": -660.74267578125, |
|
"logps/rejected": -708.2584838867188, |
|
"loss": 0.0776, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.3790927827358246, |
|
"rewards/margins": 0.09251677989959717, |
|
"rewards/rejected": -0.47160959243774414, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.761430261870804e-06, |
|
"logits/chosen": -2.271576404571533, |
|
"logits/rejected": -2.0834946632385254, |
|
"logps/chosen": -636.5614624023438, |
|
"logps/rejected": -729.883544921875, |
|
"loss": 0.0955, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.3770856559276581, |
|
"rewards/margins": 0.0929422676563263, |
|
"rewards/rejected": -0.4700279235839844, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.748513128915928e-06, |
|
"logits/chosen": -2.1836562156677246, |
|
"logits/rejected": -1.9008392095565796, |
|
"logps/chosen": -611.7904663085938, |
|
"logps/rejected": -666.9830932617188, |
|
"loss": 0.0629, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.35665163397789, |
|
"rewards/margins": 0.07555453479290009, |
|
"rewards/rejected": -0.4322062134742737, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.735273939976425e-06, |
|
"logits/chosen": -2.0491878986358643, |
|
"logits/rejected": -1.8870747089385986, |
|
"logps/chosen": -617.8563842773438, |
|
"logps/rejected": -739.245361328125, |
|
"loss": 0.0922, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.3806142210960388, |
|
"rewards/margins": 0.11178413778543472, |
|
"rewards/rejected": -0.49239835143089294, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.721714591309859e-06, |
|
"logits/chosen": -2.241105079650879, |
|
"logits/rejected": -1.764789342880249, |
|
"logps/chosen": -493.39361572265625, |
|
"logps/rejected": -587.4572143554688, |
|
"loss": 0.0869, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.25177472829818726, |
|
"rewards/margins": 0.11156761646270752, |
|
"rewards/rejected": -0.3633423447608948, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.707837025030478e-06, |
|
"logits/chosen": -2.0533032417297363, |
|
"logits/rejected": -1.9060271978378296, |
|
"logps/chosen": -480.1116638183594, |
|
"logps/rejected": -561.89599609375, |
|
"loss": 0.0799, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.291046142578125, |
|
"rewards/margins": 0.0875721424818039, |
|
"rewards/rejected": -0.3786182999610901, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.693643228831046e-06, |
|
"logits/chosen": -2.1423745155334473, |
|
"logits/rejected": -1.8586080074310303, |
|
"logps/chosen": -486.69598388671875, |
|
"logps/rejected": -578.1256713867188, |
|
"loss": 0.0801, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.2516762316226959, |
|
"rewards/margins": 0.10918694734573364, |
|
"rewards/rejected": -0.3608631491661072, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.67913523569814e-06, |
|
"logits/chosen": -2.124239444732666, |
|
"logits/rejected": -1.8211021423339844, |
|
"logps/chosen": -505.65789794921875, |
|
"logps/rejected": -655.1524658203125, |
|
"loss": 0.0577, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.2613288164138794, |
|
"rewards/margins": 0.16436012089252472, |
|
"rewards/rejected": -0.4256889224052429, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.664315123620965e-06, |
|
"logits/chosen": -2.059915542602539, |
|
"logits/rejected": -1.8637025356292725, |
|
"logps/chosen": -627.88232421875, |
|
"logps/rejected": -771.10595703125, |
|
"loss": 0.0728, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.3556235432624817, |
|
"rewards/margins": 0.15885277092456818, |
|
"rewards/rejected": -0.5144763588905334, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.649185015293728e-06, |
|
"logits/chosen": -2.202380895614624, |
|
"logits/rejected": -1.7330690622329712, |
|
"logps/chosen": -586.8130493164062, |
|
"logps/rejected": -704.4319458007812, |
|
"loss": 0.0514, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.3131484091281891, |
|
"rewards/margins": 0.13473856449127197, |
|
"rewards/rejected": -0.44788694381713867, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.6337470778115946e-06, |
|
"logits/chosen": -2.2767772674560547, |
|
"logits/rejected": -1.9689744710922241, |
|
"logps/chosen": -588.1864013671875, |
|
"logps/rejected": -624.829345703125, |
|
"loss": 0.0587, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.2952669858932495, |
|
"rewards/margins": 0.10641799122095108, |
|
"rewards/rejected": -0.4016849398612976, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.6180035223603e-06, |
|
"logits/chosen": -2.1548593044281006, |
|
"logits/rejected": -1.7463791370391846, |
|
"logps/chosen": -608.1296997070312, |
|
"logps/rejected": -624.1170043945312, |
|
"loss": 0.0462, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.3367452323436737, |
|
"rewards/margins": 0.07480922341346741, |
|
"rewards/rejected": -0.4115544855594635, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.60195660389944e-06, |
|
"logits/chosen": -1.9999465942382812, |
|
"logits/rejected": -1.7375587224960327, |
|
"logps/chosen": -535.9517822265625, |
|
"logps/rejected": -687.1197509765625, |
|
"loss": 0.1019, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.3230445384979248, |
|
"rewards/margins": 0.11864666640758514, |
|
"rewards/rejected": -0.44169121980667114, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.585608620839487e-06, |
|
"logits/chosen": -2.0938560962677, |
|
"logits/rejected": -1.678571343421936, |
|
"logps/chosen": -540.8283081054688, |
|
"logps/rejected": -611.6381225585938, |
|
"loss": 0.0954, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.3455389142036438, |
|
"rewards/margins": 0.125118687748909, |
|
"rewards/rejected": -0.4706575870513916, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.56896191471259e-06, |
|
"logits/chosen": -2.2690582275390625, |
|
"logits/rejected": -1.8676296472549438, |
|
"logps/chosen": -589.1588745117188, |
|
"logps/rejected": -705.4854736328125, |
|
"loss": 0.0739, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.3118017613887787, |
|
"rewards/margins": 0.1364874541759491, |
|
"rewards/rejected": -0.4482892155647278, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.552018869837197e-06, |
|
"logits/chosen": -2.1564137935638428, |
|
"logits/rejected": -1.83013916015625, |
|
"logps/chosen": -609.3465576171875, |
|
"logps/rejected": -699.4984130859375, |
|
"loss": 0.0745, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.3426007628440857, |
|
"rewards/margins": 0.13322630524635315, |
|
"rewards/rejected": -0.47582703828811646, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.534781912976546e-06, |
|
"logits/chosen": -2.1592516899108887, |
|
"logits/rejected": -1.8255417346954346, |
|
"logps/chosen": -491.94366455078125, |
|
"logps/rejected": -572.9866943359375, |
|
"loss": 0.0519, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.29055461287498474, |
|
"rewards/margins": 0.10878726094961166, |
|
"rewards/rejected": -0.399341881275177, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.517253512991077e-06, |
|
"logits/chosen": -2.1750411987304688, |
|
"logits/rejected": -1.8149001598358154, |
|
"logps/chosen": -588.0299682617188, |
|
"logps/rejected": -729.2427368164062, |
|
"loss": 0.0431, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.3461843430995941, |
|
"rewards/margins": 0.1527387797832489, |
|
"rewards/rejected": -0.498923122882843, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.499436180484816e-06, |
|
"logits/chosen": -2.0803980827331543, |
|
"logits/rejected": -1.8419866561889648, |
|
"logps/chosen": -657.2296142578125, |
|
"logps/rejected": -715.0289916992188, |
|
"loss": 0.0468, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.3858215808868408, |
|
"rewards/margins": 0.1072002500295639, |
|
"rewards/rejected": -0.49302178621292114, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.481332467445784e-06, |
|
"logits/chosen": -2.1348459720611572, |
|
"logits/rejected": -1.8123409748077393, |
|
"logps/chosen": -555.2808837890625, |
|
"logps/rejected": -704.3569946289062, |
|
"loss": 0.0561, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.36617225408554077, |
|
"rewards/margins": 0.1446944773197174, |
|
"rewards/rejected": -0.5108667016029358, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.462944966880464e-06, |
|
"logits/chosen": -2.137538433074951, |
|
"logits/rejected": -1.9754664897918701, |
|
"logps/chosen": -614.5938110351562, |
|
"logps/rejected": -646.7432250976562, |
|
"loss": 0.0661, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.3780440390110016, |
|
"rewards/margins": 0.04127226397395134, |
|
"rewards/rejected": -0.41931629180908203, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.444276312442415e-06, |
|
"logits/chosen": -2.0289080142974854, |
|
"logits/rejected": -1.7629003524780273, |
|
"logps/chosen": -541.0709838867188, |
|
"logps/rejected": -660.1776123046875, |
|
"loss": 0.1268, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.3162756562232971, |
|
"rewards/margins": 0.12853361666202545, |
|
"rewards/rejected": -0.4448092579841614, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.425329178055044e-06, |
|
"logits/chosen": -2.194471597671509, |
|
"logits/rejected": -2.0296597480773926, |
|
"logps/chosen": -475.4537048339844, |
|
"logps/rejected": -510.2806701660156, |
|
"loss": 0.0885, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.2241251915693283, |
|
"rewards/margins": 0.07522304356098175, |
|
"rewards/rejected": -0.29934826493263245, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.40610627752862e-06, |
|
"logits/chosen": -2.2251474857330322, |
|
"logits/rejected": -1.7184536457061768, |
|
"logps/chosen": -499.7225036621094, |
|
"logps/rejected": -632.2643432617188, |
|
"loss": 0.0669, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.2541348338127136, |
|
"rewards/margins": 0.1577138453722, |
|
"rewards/rejected": -0.4118487238883972, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.386610364171575e-06, |
|
"logits/chosen": -2.0547173023223877, |
|
"logits/rejected": -1.9380995035171509, |
|
"logps/chosen": -583.1637573242188, |
|
"logps/rejected": -675.6350708007812, |
|
"loss": 0.0608, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.31417426466941833, |
|
"rewards/margins": 0.07938437163829803, |
|
"rewards/rejected": -0.3935586214065552, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.366844230396145e-06, |
|
"logits/chosen": -2.1797802448272705, |
|
"logits/rejected": -1.739689588546753, |
|
"logps/chosen": -619.1736450195312, |
|
"logps/rejected": -779.1910400390625, |
|
"loss": 0.0831, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.354561984539032, |
|
"rewards/margins": 0.16057774424552917, |
|
"rewards/rejected": -0.5151397585868835, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.346810707318409e-06, |
|
"logits/chosen": -2.101902723312378, |
|
"logits/rejected": -1.756699800491333, |
|
"logps/chosen": -550.2091064453125, |
|
"logps/rejected": -652.6905517578125, |
|
"loss": 0.0876, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.34942546486854553, |
|
"rewards/margins": 0.1116580218076706, |
|
"rewards/rejected": -0.4610835015773773, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.326512664352788e-06, |
|
"logits/chosen": -2.1261112689971924, |
|
"logits/rejected": -1.695481538772583, |
|
"logps/chosen": -602.4801635742188, |
|
"logps/rejected": -667.7581176757812, |
|
"loss": 0.0785, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.3986489176750183, |
|
"rewards/margins": 0.07721661776304245, |
|
"rewards/rejected": -0.47586554288864136, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.30595300880106e-06, |
|
"logits/chosen": -1.9621734619140625, |
|
"logits/rejected": -1.8363412618637085, |
|
"logps/chosen": -516.5333862304688, |
|
"logps/rejected": -650.1600952148438, |
|
"loss": 0.0876, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.3551289439201355, |
|
"rewards/margins": 0.12143947929143906, |
|
"rewards/rejected": -0.47656846046447754, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.285134685435941e-06, |
|
"logits/chosen": -2.111262321472168, |
|
"logits/rejected": -1.8083570003509521, |
|
"logps/chosen": -619.1475830078125, |
|
"logps/rejected": -671.5328369140625, |
|
"loss": 0.069, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.3735567033290863, |
|
"rewards/margins": 0.09376771748065948, |
|
"rewards/rejected": -0.467324435710907, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.264060676079302e-06, |
|
"logits/chosen": -1.897774338722229, |
|
"logits/rejected": -1.758368730545044, |
|
"logps/chosen": -648.8074951171875, |
|
"logps/rejected": -753.0172119140625, |
|
"loss": 0.099, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.3972818851470947, |
|
"rewards/margins": 0.11452829837799072, |
|
"rewards/rejected": -0.5118101239204407, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.242733999175087e-06, |
|
"logits/chosen": -2.1442208290100098, |
|
"logits/rejected": -1.8442842960357666, |
|
"logps/chosen": -574.822265625, |
|
"logps/rejected": -674.7249145507812, |
|
"loss": 0.0727, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.3319811522960663, |
|
"rewards/margins": 0.10243819653987885, |
|
"rewards/rejected": -0.4344193935394287, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.221157709356973e-06, |
|
"logits/chosen": -2.069833517074585, |
|
"logits/rejected": -2.053417682647705, |
|
"logps/chosen": -473.2567443847656, |
|
"logps/rejected": -551.2691650390625, |
|
"loss": 0.0748, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.28391993045806885, |
|
"rewards/margins": 0.07798723131418228, |
|
"rewards/rejected": -0.3619071841239929, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.199334897010857e-06, |
|
"logits/chosen": -2.428363561630249, |
|
"logits/rejected": -1.9904381036758423, |
|
"logps/chosen": -574.2955932617188, |
|
"logps/rejected": -636.1633911132812, |
|
"loss": 0.0508, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.29294562339782715, |
|
"rewards/margins": 0.11134348064661026, |
|
"rewards/rejected": -0.404289186000824, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.177268687832216e-06, |
|
"logits/chosen": -2.2618508338928223, |
|
"logits/rejected": -1.9453493356704712, |
|
"logps/chosen": -583.1436767578125, |
|
"logps/rejected": -659.47998046875, |
|
"loss": 0.0492, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.3153453767299652, |
|
"rewards/margins": 0.0904412493109703, |
|
"rewards/rejected": -0.4057866036891937, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.154962242378413e-06, |
|
"logits/chosen": -2.2178263664245605, |
|
"logits/rejected": -1.5476510524749756, |
|
"logps/chosen": -663.134765625, |
|
"logps/rejected": -688.6644287109375, |
|
"loss": 0.0723, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.3838910460472107, |
|
"rewards/margins": 0.10614491999149323, |
|
"rewards/rejected": -0.4900360107421875, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.132418755616006e-06, |
|
"logits/chosen": -2.1056065559387207, |
|
"logits/rejected": -1.8169822692871094, |
|
"logps/chosen": -689.2779541015625, |
|
"logps/rejected": -805.4830322265625, |
|
"loss": 0.0795, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.42020684480667114, |
|
"rewards/margins": 0.09372207522392273, |
|
"rewards/rejected": -0.5139288902282715, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.109641456463135e-06, |
|
"logits/chosen": -2.270031452178955, |
|
"logits/rejected": -2.0831856727600098, |
|
"logps/chosen": -583.760009765625, |
|
"logps/rejected": -599.2212524414062, |
|
"loss": 0.1385, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.3357154428958893, |
|
"rewards/margins": 0.05204144865274429, |
|
"rewards/rejected": -0.387756884098053, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.086633607327036e-06, |
|
"logits/chosen": -1.9891811609268188, |
|
"logits/rejected": -1.892112135887146, |
|
"logps/chosen": -592.84814453125, |
|
"logps/rejected": -705.7501831054688, |
|
"loss": 0.0726, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.40938377380371094, |
|
"rewards/margins": 0.07667826116085052, |
|
"rewards/rejected": -0.4860619902610779, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.06339850363677e-06, |
|
"logits/chosen": -2.229407548904419, |
|
"logits/rejected": -1.667838454246521, |
|
"logps/chosen": -679.7462158203125, |
|
"logps/rejected": -710.1227416992188, |
|
"loss": 0.0926, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.380540132522583, |
|
"rewards/margins": 0.09510533511638641, |
|
"rewards/rejected": -0.4756454527378082, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.039939473371213e-06, |
|
"logits/chosen": -2.238617420196533, |
|
"logits/rejected": -1.8673069477081299, |
|
"logps/chosen": -528.8187255859375, |
|
"logps/rejected": -692.1638793945312, |
|
"loss": 0.0882, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.32173532247543335, |
|
"rewards/margins": 0.16088572144508362, |
|
"rewards/rejected": -0.48262104392051697, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.01625987658239e-06, |
|
"logits/chosen": -2.209980010986328, |
|
"logits/rejected": -1.9042339324951172, |
|
"logps/chosen": -692.3306884765625, |
|
"logps/rejected": -672.4944458007812, |
|
"loss": 0.0636, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.3526589572429657, |
|
"rewards/margins": 0.04882120341062546, |
|
"rewards/rejected": -0.40148013830184937, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.992363104914211e-06, |
|
"logits/chosen": -2.256624698638916, |
|
"logits/rejected": -1.9974693059921265, |
|
"logps/chosen": -560.591796875, |
|
"logps/rejected": -578.005859375, |
|
"loss": 0.0446, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.27544528245925903, |
|
"rewards/margins": 0.07316794246435165, |
|
"rewards/rejected": -0.3486132025718689, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.9682525811166835e-06, |
|
"logits/chosen": -2.007150173187256, |
|
"logits/rejected": -1.7732871770858765, |
|
"logps/chosen": -560.3460693359375, |
|
"logps/rejected": -629.5783081054688, |
|
"loss": 0.0836, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.37917017936706543, |
|
"rewards/margins": 0.04371767118573189, |
|
"rewards/rejected": -0.4228878617286682, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.943931758555669e-06, |
|
"logits/chosen": -2.0913567543029785, |
|
"logits/rejected": -1.7354761362075806, |
|
"logps/chosen": -608.615478515625, |
|
"logps/rejected": -674.1383666992188, |
|
"loss": 0.0904, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.3448850214481354, |
|
"rewards/margins": 0.10200424492359161, |
|
"rewards/rejected": -0.4468892514705658, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.91940412071826e-06, |
|
"logits/chosen": -2.225922107696533, |
|
"logits/rejected": -1.8326537609100342, |
|
"logps/chosen": -651.896728515625, |
|
"logps/rejected": -687.8331298828125, |
|
"loss": 0.0807, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.3168756365776062, |
|
"rewards/margins": 0.10071317106485367, |
|
"rewards/rejected": -0.4175888001918793, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.894673180713829e-06, |
|
"logits/chosen": -2.0696487426757812, |
|
"logits/rejected": -1.9062414169311523, |
|
"logps/chosen": -542.2778930664062, |
|
"logps/rejected": -659.351318359375, |
|
"loss": 0.1094, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.29152894020080566, |
|
"rewards/margins": 0.09752384573221207, |
|
"rewards/rejected": -0.38905277848243713, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.869742480770855e-06, |
|
"logits/chosen": -2.371598958969116, |
|
"logits/rejected": -2.1790008544921875, |
|
"logps/chosen": -508.5203552246094, |
|
"logps/rejected": -568.6731567382812, |
|
"loss": 0.0912, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.2144095003604889, |
|
"rewards/margins": 0.08987125009298325, |
|
"rewards/rejected": -0.30428069829940796, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.844615591729558e-06, |
|
"logits/chosen": -2.045975685119629, |
|
"logits/rejected": -1.9603124856948853, |
|
"logps/chosen": -437.69635009765625, |
|
"logps/rejected": -593.5106201171875, |
|
"loss": 0.0675, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.2701892852783203, |
|
"rewards/margins": 0.0922120064496994, |
|
"rewards/rejected": -0.3624013364315033, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.819296112530448e-06, |
|
"logits/chosen": -1.885154128074646, |
|
"logits/rejected": -1.9763774871826172, |
|
"logps/chosen": -534.5982055664062, |
|
"logps/rejected": -644.5667114257812, |
|
"loss": 0.103, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.3009013533592224, |
|
"rewards/margins": 0.07459478080272675, |
|
"rewards/rejected": -0.37549614906311035, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.7937876696988505e-06, |
|
"logits/chosen": -2.228935718536377, |
|
"logits/rejected": -1.992790937423706, |
|
"logps/chosen": -595.5947265625, |
|
"logps/rejected": -676.4202880859375, |
|
"loss": 0.0764, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.33396145701408386, |
|
"rewards/margins": 0.09032727777957916, |
|
"rewards/rejected": -0.4242887496948242, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.7680939168254733e-06, |
|
"logits/chosen": -2.148974657058716, |
|
"logits/rejected": -1.8327066898345947, |
|
"logps/chosen": -651.0219116210938, |
|
"logps/rejected": -701.9970092773438, |
|
"loss": 0.0987, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.37643447518348694, |
|
"rewards/margins": 0.08819916099309921, |
|
"rewards/rejected": -0.46463364362716675, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.7422185340430983e-06, |
|
"logits/chosen": -2.2028675079345703, |
|
"logits/rejected": -2.0429883003234863, |
|
"logps/chosen": -584.5667114257812, |
|
"logps/rejected": -641.1693115234375, |
|
"loss": 0.0937, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.34211236238479614, |
|
"rewards/margins": 0.09406879544258118, |
|
"rewards/rejected": -0.4361811578273773, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.71616522749948e-06, |
|
"logits/chosen": -2.465292453765869, |
|
"logits/rejected": -2.052821159362793, |
|
"logps/chosen": -604.7883911132812, |
|
"logps/rejected": -693.3052978515625, |
|
"loss": 0.0774, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.33726420998573303, |
|
"rewards/margins": 0.13398997485637665, |
|
"rewards/rejected": -0.4712541103363037, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.6899377288265043e-06, |
|
"logits/chosen": -2.0992684364318848, |
|
"logits/rejected": -1.9688608646392822, |
|
"logps/chosen": -596.2180786132812, |
|
"logps/rejected": -685.1240844726562, |
|
"loss": 0.0809, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.3490615487098694, |
|
"rewards/margins": 0.11136557906866074, |
|
"rewards/rejected": -0.46042710542678833, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.6635397946057114e-06, |
|
"logits/chosen": -2.260376453399658, |
|
"logits/rejected": -1.8176358938217163, |
|
"logps/chosen": -605.2567138671875, |
|
"logps/rejected": -649.65673828125, |
|
"loss": 0.0525, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.3633373975753784, |
|
"rewards/margins": 0.11985437572002411, |
|
"rewards/rejected": -0.4831917881965637, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.6369752058302327e-06, |
|
"logits/chosen": -2.275251865386963, |
|
"logits/rejected": -1.9965429306030273, |
|
"logps/chosen": -539.2314453125, |
|
"logps/rejected": -636.8073120117188, |
|
"loss": 0.0995, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.3215656876564026, |
|
"rewards/margins": 0.13287541270256042, |
|
"rewards/rejected": -0.4544410705566406, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.610247767363239e-06, |
|
"logits/chosen": -1.9407621622085571, |
|
"logits/rejected": -1.8752870559692383, |
|
"logps/chosen": -587.7548828125, |
|
"logps/rejected": -671.2868041992188, |
|
"loss": 0.0756, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.3427480161190033, |
|
"rewards/margins": 0.06522272527217865, |
|
"rewards/rejected": -0.40797075629234314, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.5833613073929684e-06, |
|
"logits/chosen": -2.1943631172180176, |
|
"logits/rejected": -1.859442114830017, |
|
"logps/chosen": -558.7838134765625, |
|
"logps/rejected": -717.7572021484375, |
|
"loss": 0.0603, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.3341369032859802, |
|
"rewards/margins": 0.13402590155601501, |
|
"rewards/rejected": -0.46816277503967285, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.55631967688441e-06, |
|
"logits/chosen": -2.1805663108825684, |
|
"logits/rejected": -1.8173482418060303, |
|
"logps/chosen": -807.4880981445312, |
|
"logps/rejected": -837.0833740234375, |
|
"loss": 0.0518, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.45633822679519653, |
|
"rewards/margins": 0.08246854692697525, |
|
"rewards/rejected": -0.5388067364692688, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.5291267490277316e-06, |
|
"logits/chosen": -1.8296005725860596, |
|
"logits/rejected": -1.6457901000976562, |
|
"logps/chosen": -621.5354614257812, |
|
"logps/rejected": -701.6083374023438, |
|
"loss": 0.1176, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.3704153597354889, |
|
"rewards/margins": 0.11859778314828873, |
|
"rewards/rejected": -0.48901304602622986, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.501786418683515e-06, |
|
"logits/chosen": -2.0369582176208496, |
|
"logits/rejected": -1.9080692529678345, |
|
"logps/chosen": -765.410400390625, |
|
"logps/rejected": -825.2081298828125, |
|
"loss": 0.0802, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.5283640623092651, |
|
"rewards/margins": 0.08725164830684662, |
|
"rewards/rejected": -0.615615725517273, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.474302601824896e-06, |
|
"logits/chosen": -2.2294676303863525, |
|
"logits/rejected": -1.7623846530914307, |
|
"logps/chosen": -824.7463989257812, |
|
"logps/rejected": -865.5594482421875, |
|
"loss": 0.104, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.5527879595756531, |
|
"rewards/margins": 0.07611383497714996, |
|
"rewards/rejected": -0.6289017796516418, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.4466792349766767e-06, |
|
"logits/chosen": -2.3877675533294678, |
|
"logits/rejected": -2.136277198791504, |
|
"logps/chosen": -631.8321533203125, |
|
"logps/rejected": -653.0026245117188, |
|
"loss": 0.0999, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.3877051770687103, |
|
"rewards/margins": 0.06024375557899475, |
|
"rewards/rejected": -0.4479489326477051, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.4189202746514938e-06, |
|
"logits/chosen": -2.127175807952881, |
|
"logits/rejected": -1.8073135614395142, |
|
"logps/chosen": -673.6165771484375, |
|
"logps/rejected": -750.7117309570312, |
|
"loss": 0.0823, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.4296782910823822, |
|
"rewards/margins": 0.1071913093328476, |
|
"rewards/rejected": -0.5368696451187134, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.391029696783127e-06, |
|
"logits/chosen": -1.9842946529388428, |
|
"logits/rejected": -1.5577259063720703, |
|
"logps/chosen": -650.3092041015625, |
|
"logps/rejected": -740.8306274414062, |
|
"loss": 0.0833, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.4263342022895813, |
|
"rewards/margins": 0.12290897220373154, |
|
"rewards/rejected": -0.549243152141571, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.3630114961570187e-06, |
|
"logits/chosen": -2.326686382293701, |
|
"logits/rejected": -1.8404957056045532, |
|
"logps/chosen": -725.4781494140625, |
|
"logps/rejected": -822.6609497070312, |
|
"loss": 0.0923, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.4563368260860443, |
|
"rewards/margins": 0.14041298627853394, |
|
"rewards/rejected": -0.5967497825622559, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.3348696858381023e-06, |
|
"logits/chosen": -2.081413984298706, |
|
"logits/rejected": -1.8651702404022217, |
|
"logps/chosen": -677.7269897460938, |
|
"logps/rejected": -756.968505859375, |
|
"loss": 0.0801, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.4635470509529114, |
|
"rewards/margins": 0.09350712597370148, |
|
"rewards/rejected": -0.5570541024208069, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.3066082965960082e-06, |
|
"logits/chosen": -2.1301093101501465, |
|
"logits/rejected": -2.035060405731201, |
|
"logps/chosen": -712.52685546875, |
|
"logps/rejected": -769.6886596679688, |
|
"loss": 0.0568, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.4348185062408447, |
|
"rewards/margins": 0.09993582218885422, |
|
"rewards/rejected": -0.5347543954849243, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.278231376327731e-06, |
|
"logits/chosen": -2.1865429878234863, |
|
"logits/rejected": -1.7728700637817383, |
|
"logps/chosen": -692.3992919921875, |
|
"logps/rejected": -818.3483276367188, |
|
"loss": 0.0822, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.4515460431575775, |
|
"rewards/margins": 0.13385489583015442, |
|
"rewards/rejected": -0.5854009985923767, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.249742989477851e-06, |
|
"logits/chosen": -2.199068546295166, |
|
"logits/rejected": -1.866813063621521, |
|
"logps/chosen": -817.2093505859375, |
|
"logps/rejected": -920.2403564453125, |
|
"loss": 0.0511, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.5562337040901184, |
|
"rewards/margins": 0.11897413432598114, |
|
"rewards/rejected": -0.6752079129219055, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.2211472164563756e-06, |
|
"logits/chosen": -2.109049081802368, |
|
"logits/rejected": -1.7997972965240479, |
|
"logps/chosen": -705.8816528320312, |
|
"logps/rejected": -768.1865234375, |
|
"loss": 0.095, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.4462059438228607, |
|
"rewards/margins": 0.09233604371547699, |
|
"rewards/rejected": -0.5385419130325317, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.192448153054306e-06, |
|
"logits/chosen": -2.2047770023345947, |
|
"logits/rejected": -1.8513100147247314, |
|
"logps/chosen": -771.5567626953125, |
|
"logps/rejected": -856.609375, |
|
"loss": 0.0693, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.5159443616867065, |
|
"rewards/margins": 0.11039619147777557, |
|
"rewards/rejected": -0.6263405680656433, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.16364990985699e-06, |
|
"logits/chosen": -2.357393980026245, |
|
"logits/rejected": -1.782231330871582, |
|
"logps/chosen": -714.89404296875, |
|
"logps/rejected": -804.54150390625, |
|
"loss": 0.0874, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.446491539478302, |
|
"rewards/margins": 0.16014492511749268, |
|
"rewards/rejected": -0.6066364645957947, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.134756611655362e-06, |
|
"logits/chosen": -2.3336434364318848, |
|
"logits/rejected": -2.126812219619751, |
|
"logps/chosen": -558.0953369140625, |
|
"logps/rejected": -690.7258911132812, |
|
"loss": 0.0954, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.36971205472946167, |
|
"rewards/margins": 0.10154370963573456, |
|
"rewards/rejected": -0.4712557792663574, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.1057723968551427e-06, |
|
"logits/chosen": -2.052511215209961, |
|
"logits/rejected": -1.6268789768218994, |
|
"logps/chosen": -709.0806884765625, |
|
"logps/rejected": -791.6183471679688, |
|
"loss": 0.0784, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.45772653818130493, |
|
"rewards/margins": 0.14719423651695251, |
|
"rewards/rejected": -0.6049207448959351, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.0767014168841e-06, |
|
"logits/chosen": -2.0302186012268066, |
|
"logits/rejected": -1.9973528385162354, |
|
"logps/chosen": -628.5819091796875, |
|
"logps/rejected": -709.375, |
|
"loss": 0.0716, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.34966421127319336, |
|
"rewards/margins": 0.07439250499010086, |
|
"rewards/rejected": -0.4240567088127136, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3.047547835597432e-06, |
|
"logits/chosen": -1.909949541091919, |
|
"logits/rejected": -1.8792356252670288, |
|
"logps/chosen": -582.7291259765625, |
|
"logps/rejected": -683.0811767578125, |
|
"loss": 0.0874, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.3829612135887146, |
|
"rewards/margins": 0.07136234641075134, |
|
"rewards/rejected": -0.45432358980178833, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3.0183158286813755e-06, |
|
"logits/chosen": -2.278263568878174, |
|
"logits/rejected": -1.8273910284042358, |
|
"logps/chosen": -655.0009765625, |
|
"logps/rejected": -684.890625, |
|
"loss": 0.075, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.3779233396053314, |
|
"rewards/margins": 0.10793854296207428, |
|
"rewards/rejected": -0.4858619272708893, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.989009583055121e-06, |
|
"logits/chosen": -2.129441022872925, |
|
"logits/rejected": -1.9885050058364868, |
|
"logps/chosen": -765.9420166015625, |
|
"logps/rejected": -878.65576171875, |
|
"loss": 0.0642, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.47016677260398865, |
|
"rewards/margins": 0.11287762969732285, |
|
"rewards/rejected": -0.5830444097518921, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.959633296271117e-06, |
|
"logits/chosen": -2.113435983657837, |
|
"logits/rejected": -1.8716232776641846, |
|
"logps/chosen": -611.4641723632812, |
|
"logps/rejected": -689.0220336914062, |
|
"loss": 0.0852, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.4303819537162781, |
|
"rewards/margins": 0.09832239896059036, |
|
"rewards/rejected": -0.5287044048309326, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.9301911759138535e-06, |
|
"logits/chosen": -2.1812188625335693, |
|
"logits/rejected": -1.9601389169692993, |
|
"logps/chosen": -618.2063598632812, |
|
"logps/rejected": -735.6884155273438, |
|
"loss": 0.0752, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.4212234914302826, |
|
"rewards/margins": 0.11810547113418579, |
|
"rewards/rejected": -0.539328932762146, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.900687438997205e-06, |
|
"logits/chosen": -2.0657143592834473, |
|
"logits/rejected": -1.8039214611053467, |
|
"logps/chosen": -632.2223510742188, |
|
"logps/rejected": -755.59228515625, |
|
"loss": 0.0795, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.4010123312473297, |
|
"rewards/margins": 0.09653668105602264, |
|
"rewards/rejected": -0.49754899740219116, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.871126311360424e-06, |
|
"logits/chosen": -2.4564061164855957, |
|
"logits/rejected": -1.8799717426300049, |
|
"logps/chosen": -642.9153442382812, |
|
"logps/rejected": -700.4526977539062, |
|
"loss": 0.08, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.341757595539093, |
|
"rewards/margins": 0.12287576496601105, |
|
"rewards/rejected": -0.46463337540626526, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.8415120270628756e-06, |
|
"logits/chosen": -2.2899577617645264, |
|
"logits/rejected": -1.8913564682006836, |
|
"logps/chosen": -625.5499877929688, |
|
"logps/rejected": -751.3653564453125, |
|
"loss": 0.059, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.34753698110580444, |
|
"rewards/margins": 0.17222611606121063, |
|
"rewards/rejected": -0.5197631120681763, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.8118488277775852e-06, |
|
"logits/chosen": -2.1799449920654297, |
|
"logits/rejected": -2.2029454708099365, |
|
"logps/chosen": -541.7261352539062, |
|
"logps/rejected": -652.3367919921875, |
|
"loss": 0.0522, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.3303873538970947, |
|
"rewards/margins": 0.08029700815677643, |
|
"rewards/rejected": -0.4106842875480652, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.7821409621837042e-06, |
|
"logits/chosen": -2.434246063232422, |
|
"logits/rejected": -1.9443886280059814, |
|
"logps/chosen": -614.01220703125, |
|
"logps/rejected": -698.8470458984375, |
|
"loss": 0.048, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.36497288942337036, |
|
"rewards/margins": 0.10476745665073395, |
|
"rewards/rejected": -0.4697403311729431, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.7523926853579702e-06, |
|
"logits/chosen": -2.0561671257019043, |
|
"logits/rejected": -1.7254194021224976, |
|
"logps/chosen": -624.192626953125, |
|
"logps/rejected": -757.9207763671875, |
|
"loss": 0.0956, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.4151741564273834, |
|
"rewards/margins": 0.1256508082151413, |
|
"rewards/rejected": -0.5408250093460083, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.722608258165244e-06, |
|
"logits/chosen": -2.2351367473602295, |
|
"logits/rejected": -1.6919574737548828, |
|
"logps/chosen": -727.2100830078125, |
|
"logps/rejected": -801.911865234375, |
|
"loss": 0.094, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.43864089250564575, |
|
"rewards/margins": 0.1414739489555359, |
|
"rewards/rejected": -0.5801147818565369, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.6927919466482293e-06, |
|
"logits/chosen": -2.0343658924102783, |
|
"logits/rejected": -1.7758781909942627, |
|
"logps/chosen": -717.9749755859375, |
|
"logps/rejected": -791.5612182617188, |
|
"loss": 0.0753, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.501410186290741, |
|
"rewards/margins": 0.08459311723709106, |
|
"rewards/rejected": -0.5860033631324768, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.662948021416441e-06, |
|
"logits/chosen": -2.232266426086426, |
|
"logits/rejected": -2.0454351902008057, |
|
"logps/chosen": -727.7106323242188, |
|
"logps/rejected": -814.9327392578125, |
|
"loss": 0.0557, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.46008116006851196, |
|
"rewards/margins": 0.10495243221521378, |
|
"rewards/rejected": -0.5650335550308228, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.6330807570345253e-06, |
|
"logits/chosen": -2.1601340770721436, |
|
"logits/rejected": -1.9538816213607788, |
|
"logps/chosen": -685.18896484375, |
|
"logps/rejected": -795.1231689453125, |
|
"loss": 0.0791, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.4878101348876953, |
|
"rewards/margins": 0.1159614771604538, |
|
"rewards/rejected": -0.6037715673446655, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.6031944314100077e-06, |
|
"logits/chosen": -2.442682981491089, |
|
"logits/rejected": -2.2220892906188965, |
|
"logps/chosen": -678.3485717773438, |
|
"logps/rejected": -837.5185546875, |
|
"loss": 0.0906, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.4468511641025543, |
|
"rewards/margins": 0.12742993235588074, |
|
"rewards/rejected": -0.5742811560630798, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.5732933251805716e-06, |
|
"logits/chosen": -2.238412380218506, |
|
"logits/rejected": -2.0841736793518066, |
|
"logps/chosen": -718.62060546875, |
|
"logps/rejected": -867.1959228515625, |
|
"loss": 0.0541, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.4921676218509674, |
|
"rewards/margins": 0.10790624469518661, |
|
"rewards/rejected": -0.6000738143920898, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.543381721100931e-06, |
|
"logits/chosen": -2.335407257080078, |
|
"logits/rejected": -2.028334140777588, |
|
"logps/chosen": -614.9449462890625, |
|
"logps/rejected": -788.1812133789062, |
|
"loss": 0.0811, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.41624245047569275, |
|
"rewards/margins": 0.17170578241348267, |
|
"rewards/rejected": -0.587948203086853, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.513463903429418e-06, |
|
"logits/chosen": -2.4693076610565186, |
|
"logits/rejected": -2.0430703163146973, |
|
"logps/chosen": -725.3846435546875, |
|
"logps/rejected": -755.0938720703125, |
|
"loss": 0.1021, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.4062575697898865, |
|
"rewards/margins": 0.10169617831707001, |
|
"rewards/rejected": -0.5079537630081177, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.483544157314338e-06, |
|
"logits/chosen": -2.293912410736084, |
|
"logits/rejected": -1.9852508306503296, |
|
"logps/chosen": -639.7050170898438, |
|
"logps/rejected": -763.0711059570312, |
|
"loss": 0.0737, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.4078396260738373, |
|
"rewards/margins": 0.1239791288971901, |
|
"rewards/rejected": -0.5318187475204468, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.453626768180214e-06, |
|
"logits/chosen": -2.122490167617798, |
|
"logits/rejected": -1.8506181240081787, |
|
"logps/chosen": -720.9524536132812, |
|
"logps/rejected": -728.7947998046875, |
|
"loss": 0.1032, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.4711574912071228, |
|
"rewards/margins": 0.0486263632774353, |
|
"rewards/rejected": -0.5197838544845581, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.4237160211139697e-06, |
|
"logits/chosen": -2.0483882427215576, |
|
"logits/rejected": -1.869739294052124, |
|
"logps/chosen": -622.5816650390625, |
|
"logps/rejected": -701.8380126953125, |
|
"loss": 0.0577, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.41618186235427856, |
|
"rewards/margins": 0.08069188892841339, |
|
"rewards/rejected": -0.49687376618385315, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.393816200251187e-06, |
|
"logits/chosen": -2.0031468868255615, |
|
"logits/rejected": -1.54337739944458, |
|
"logps/chosen": -700.4393310546875, |
|
"logps/rejected": -752.5362548828125, |
|
"loss": 0.0727, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.4837239384651184, |
|
"rewards/margins": 0.10101475566625595, |
|
"rewards/rejected": -0.584738552570343, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.3639315881624776e-06, |
|
"logits/chosen": -2.3299944400787354, |
|
"logits/rejected": -1.946599006652832, |
|
"logps/chosen": -644.5682373046875, |
|
"logps/rejected": -742.0753173828125, |
|
"loss": 0.0999, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.44712233543395996, |
|
"rewards/margins": 0.09644552320241928, |
|
"rewards/rejected": -0.5435678362846375, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.334066465240093e-06, |
|
"logits/chosen": -2.055642604827881, |
|
"logits/rejected": -1.5556762218475342, |
|
"logps/chosen": -778.634033203125, |
|
"logps/rejected": -831.9388427734375, |
|
"loss": 0.0767, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.4928356111049652, |
|
"rewards/margins": 0.1221829205751419, |
|
"rewards/rejected": -0.6150184869766235, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.3042251090848357e-06, |
|
"logits/chosen": -2.259159564971924, |
|
"logits/rejected": -1.7662973403930664, |
|
"logps/chosen": -643.4910888671875, |
|
"logps/rejected": -777.8291015625, |
|
"loss": 0.0825, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.4092964231967926, |
|
"rewards/margins": 0.1640159636735916, |
|
"rewards/rejected": -0.5733123421669006, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.2744117938933814e-06, |
|
"logits/chosen": -2.2976372241973877, |
|
"logits/rejected": -1.9439456462860107, |
|
"logps/chosen": -806.0792236328125, |
|
"logps/rejected": -835.6940307617188, |
|
"loss": 0.0729, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.49456721544265747, |
|
"rewards/margins": 0.07213100790977478, |
|
"rewards/rejected": -0.5666981935501099, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.2446307898460807e-06, |
|
"logits/chosen": -2.1391043663024902, |
|
"logits/rejected": -1.7574889659881592, |
|
"logps/chosen": -778.1689453125, |
|
"logps/rejected": -875.1658935546875, |
|
"loss": 0.0758, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.5087161064147949, |
|
"rewards/margins": 0.12866072356700897, |
|
"rewards/rejected": -0.6373767852783203, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.2148863624953364e-06, |
|
"logits/chosen": -2.1262030601501465, |
|
"logits/rejected": -1.8642327785491943, |
|
"logps/chosen": -721.8367919921875, |
|
"logps/rejected": -874.4264526367188, |
|
"loss": 0.0491, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.48331218957901, |
|
"rewards/margins": 0.12461258471012115, |
|
"rewards/rejected": -0.6079246997833252, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.1851827721546483e-06, |
|
"logits/chosen": -2.0042014122009277, |
|
"logits/rejected": -1.7912979125976562, |
|
"logps/chosen": -779.1425170898438, |
|
"logps/rejected": -934.787109375, |
|
"loss": 0.046, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.5522741079330444, |
|
"rewards/margins": 0.15596961975097656, |
|
"rewards/rejected": -0.7082436680793762, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.155524273288405e-06, |
|
"logits/chosen": -2.2937986850738525, |
|
"logits/rejected": -1.780461072921753, |
|
"logps/chosen": -812.2808837890625, |
|
"logps/rejected": -918.3689575195312, |
|
"loss": 0.0805, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.4778391420841217, |
|
"rewards/margins": 0.1493079662322998, |
|
"rewards/rejected": -0.6271471381187439, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.125915113902514e-06, |
|
"logits/chosen": -2.125365734100342, |
|
"logits/rejected": -1.9615271091461182, |
|
"logps/chosen": -657.5159912109375, |
|
"logps/rejected": -758.6436767578125, |
|
"loss": 0.0946, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.42783278226852417, |
|
"rewards/margins": 0.09773501008749008, |
|
"rewards/rejected": -0.5255678296089172, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.096359534935958e-06, |
|
"logits/chosen": -1.965488076210022, |
|
"logits/rejected": -1.7874730825424194, |
|
"logps/chosen": -719.5172119140625, |
|
"logps/rejected": -838.3024291992188, |
|
"loss": 0.0825, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.520493745803833, |
|
"rewards/margins": 0.09665954858064651, |
|
"rewards/rejected": -0.6171532869338989, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.0668617696533603e-06, |
|
"logits/chosen": -2.1595165729522705, |
|
"logits/rejected": -1.795940637588501, |
|
"logps/chosen": -750.8433227539062, |
|
"logps/rejected": -791.1815795898438, |
|
"loss": 0.0898, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.5119301080703735, |
|
"rewards/margins": 0.08175458759069443, |
|
"rewards/rejected": -0.5936846137046814, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.0374260430386542e-06, |
|
"logits/chosen": -2.072263717651367, |
|
"logits/rejected": -1.8800386190414429, |
|
"logps/chosen": -735.1260986328125, |
|
"logps/rejected": -801.7227172851562, |
|
"loss": 0.0447, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.49476614594459534, |
|
"rewards/margins": 0.09266269207000732, |
|
"rewards/rejected": -0.5874288082122803, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 2.0080565711899327e-06, |
|
"logits/chosen": -1.9065930843353271, |
|
"logits/rejected": -1.6727092266082764, |
|
"logps/chosen": -684.2578125, |
|
"logps/rejected": -754.9581298828125, |
|
"loss": 0.0742, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.47697681188583374, |
|
"rewards/margins": 0.08060398697853088, |
|
"rewards/rejected": -0.5575807690620422, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.978757560715579e-06, |
|
"logits/chosen": -2.1670429706573486, |
|
"logits/rejected": -1.99936842918396, |
|
"logps/chosen": -735.7562866210938, |
|
"logps/rejected": -859.4212036132812, |
|
"loss": 0.0952, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.4949742257595062, |
|
"rewards/margins": 0.13264210522174835, |
|
"rewards/rejected": -0.627616286277771, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.9495332081317466e-06, |
|
"logits/chosen": -2.0642776489257812, |
|
"logits/rejected": -1.8938987255096436, |
|
"logps/chosen": -819.5340576171875, |
|
"logps/rejected": -883.53564453125, |
|
"loss": 0.0872, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.5653868317604065, |
|
"rewards/margins": 0.07868463546037674, |
|
"rewards/rejected": -0.6440714597702026, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.9203876992612904e-06, |
|
"logits/chosen": -2.0933823585510254, |
|
"logits/rejected": -1.7218735218048096, |
|
"logps/chosen": -661.1719360351562, |
|
"logps/rejected": -781.9850463867188, |
|
"loss": 0.1006, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.43831753730773926, |
|
"rewards/margins": 0.16069361567497253, |
|
"rewards/rejected": -0.5990111231803894, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.891325208634231e-06, |
|
"logits/chosen": -2.287635087966919, |
|
"logits/rejected": -1.9859931468963623, |
|
"logps/chosen": -612.9700927734375, |
|
"logps/rejected": -735.92919921875, |
|
"loss": 0.0608, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.4461449682712555, |
|
"rewards/margins": 0.13415846228599548, |
|
"rewards/rejected": -0.580303430557251, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.8623498988898309e-06, |
|
"logits/chosen": -2.2971930503845215, |
|
"logits/rejected": -1.8358027935028076, |
|
"logps/chosen": -716.4727172851562, |
|
"logps/rejected": -849.65869140625, |
|
"loss": 0.0872, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.47384414076805115, |
|
"rewards/margins": 0.13949860632419586, |
|
"rewards/rejected": -0.6133427023887634, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.83346592018038e-06, |
|
"logits/chosen": -2.231283664703369, |
|
"logits/rejected": -1.9385484457015991, |
|
"logps/chosen": -650.612060546875, |
|
"logps/rejected": -773.4234008789062, |
|
"loss": 0.1013, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.4305783212184906, |
|
"rewards/margins": 0.13019639253616333, |
|
"rewards/rejected": -0.5607747435569763, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.8046774095767652e-06, |
|
"logits/chosen": -2.3146252632141113, |
|
"logits/rejected": -2.038005828857422, |
|
"logps/chosen": -650.5593872070312, |
|
"logps/rejected": -713.4359130859375, |
|
"loss": 0.0792, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.4199472963809967, |
|
"rewards/margins": 0.11954204738140106, |
|
"rewards/rejected": -0.5394893884658813, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.775988490475914e-06, |
|
"logits/chosen": -1.9583438634872437, |
|
"logits/rejected": -1.9360641241073608, |
|
"logps/chosen": -607.3677978515625, |
|
"logps/rejected": -768.2495727539062, |
|
"loss": 0.0828, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.4246648848056793, |
|
"rewards/margins": 0.1216825619339943, |
|
"rewards/rejected": -0.546347439289093, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.7474032720101991e-06, |
|
"logits/chosen": -2.302241802215576, |
|
"logits/rejected": -2.053952932357788, |
|
"logps/chosen": -578.2738647460938, |
|
"logps/rejected": -709.3789672851562, |
|
"loss": 0.0998, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.3617783188819885, |
|
"rewards/margins": 0.12743337452411652, |
|
"rewards/rejected": -0.48921164870262146, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.7189258484588853e-06, |
|
"logits/chosen": -2.295841932296753, |
|
"logits/rejected": -1.8487634658813477, |
|
"logps/chosen": -805.4288330078125, |
|
"logps/rejected": -849.3688354492188, |
|
"loss": 0.0616, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.5005868673324585, |
|
"rewards/margins": 0.10474538803100586, |
|
"rewards/rejected": -0.6053322553634644, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.6905602986617006e-06, |
|
"logits/chosen": -2.2122347354888916, |
|
"logits/rejected": -1.8240172863006592, |
|
"logps/chosen": -648.9810791015625, |
|
"logps/rejected": -798.175048828125, |
|
"loss": 0.0807, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.4233244061470032, |
|
"rewards/margins": 0.15655961632728577, |
|
"rewards/rejected": -0.5798839926719666, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.662310685434625e-06, |
|
"logits/chosen": -2.3458707332611084, |
|
"logits/rejected": -2.2734622955322266, |
|
"logps/chosen": -647.6605224609375, |
|
"logps/rejected": -782.6307373046875, |
|
"loss": 0.073, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.3885071873664856, |
|
"rewards/margins": 0.09876324236392975, |
|
"rewards/rejected": -0.48727044463157654, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.6341810549879666e-06, |
|
"logits/chosen": -2.344203233718872, |
|
"logits/rejected": -2.0581679344177246, |
|
"logps/chosen": -571.4796752929688, |
|
"logps/rejected": -596.5723876953125, |
|
"loss": 0.0721, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.35855913162231445, |
|
"rewards/margins": 0.06313261389732361, |
|
"rewards/rejected": -0.4216917157173157, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.6061754363468255e-06, |
|
"logits/chosen": -2.259507894515991, |
|
"logits/rejected": -2.1293258666992188, |
|
"logps/chosen": -650.2342529296875, |
|
"logps/rejected": -721.8099365234375, |
|
"loss": 0.0816, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.3676004707813263, |
|
"rewards/margins": 0.07684332877397537, |
|
"rewards/rejected": -0.44444379210472107, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.5782978407740087e-06, |
|
"logits/chosen": -2.028473138809204, |
|
"logits/rejected": -2.022217273712158, |
|
"logps/chosen": -670.6177368164062, |
|
"logps/rejected": -752.2406616210938, |
|
"loss": 0.0834, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.44136539101600647, |
|
"rewards/margins": 0.08413775265216827, |
|
"rewards/rejected": -0.5255030393600464, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.5505522611954977e-06, |
|
"logits/chosen": -2.2423367500305176, |
|
"logits/rejected": -1.7249571084976196, |
|
"logps/chosen": -652.0220947265625, |
|
"logps/rejected": -791.6546020507812, |
|
"loss": 0.1056, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.42029309272766113, |
|
"rewards/margins": 0.17177040874958038, |
|
"rewards/rejected": -0.5920634865760803, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.522942671628537e-06, |
|
"logits/chosen": -2.2815046310424805, |
|
"logits/rejected": -2.0123918056488037, |
|
"logps/chosen": -581.7095336914062, |
|
"logps/rejected": -690.0474853515625, |
|
"loss": 0.0764, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.37874311208724976, |
|
"rewards/margins": 0.09943266212940216, |
|
"rewards/rejected": -0.4781757891178131, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.495473026612435e-06, |
|
"logits/chosen": -2.2578682899475098, |
|
"logits/rejected": -1.8968786001205444, |
|
"logps/chosen": -684.0667724609375, |
|
"logps/rejected": -732.0203247070312, |
|
"loss": 0.0857, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.43941551446914673, |
|
"rewards/margins": 0.09977763891220093, |
|
"rewards/rejected": -0.5391931533813477, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.4681472606421512e-06, |
|
"logits/chosen": -2.2577121257781982, |
|
"logits/rejected": -1.9029508829116821, |
|
"logps/chosen": -682.9171142578125, |
|
"logps/rejected": -752.029296875, |
|
"loss": 0.0774, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.46012359857559204, |
|
"rewards/margins": 0.10639479011297226, |
|
"rewards/rejected": -0.5665184259414673, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.4409692876047582e-06, |
|
"logits/chosen": -2.3715949058532715, |
|
"logits/rejected": -1.9981542825698853, |
|
"logps/chosen": -686.21630859375, |
|
"logps/rejected": -765.2901611328125, |
|
"loss": 0.0823, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.44159477949142456, |
|
"rewards/margins": 0.10385797917842865, |
|
"rewards/rejected": -0.5454527735710144, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.4139430002188486e-06, |
|
"logits/chosen": -2.1407675743103027, |
|
"logits/rejected": -2.0084660053253174, |
|
"logps/chosen": -565.1947631835938, |
|
"logps/rejected": -623.761474609375, |
|
"loss": 0.0826, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.380687952041626, |
|
"rewards/margins": 0.06562694162130356, |
|
"rewards/rejected": -0.44631490111351013, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.3870722694769858e-06, |
|
"logits/chosen": -2.301060199737549, |
|
"logits/rejected": -2.099889039993286, |
|
"logps/chosen": -667.0670166015625, |
|
"logps/rejected": -778.9105834960938, |
|
"loss": 0.0774, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.43501314520835876, |
|
"rewards/margins": 0.11666470766067505, |
|
"rewards/rejected": -0.5516778230667114, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.3603609440912508e-06, |
|
"logits/chosen": -2.1625216007232666, |
|
"logits/rejected": -2.026824951171875, |
|
"logps/chosen": -466.3900451660156, |
|
"logps/rejected": -600.93896484375, |
|
"loss": 0.1211, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.3264867663383484, |
|
"rewards/margins": 0.1292310655117035, |
|
"rewards/rejected": -0.4557178020477295, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.3338128499419925e-06, |
|
"logits/chosen": -2.277644634246826, |
|
"logits/rejected": -1.7413593530654907, |
|
"logps/chosen": -672.1463623046875, |
|
"logps/rejected": -718.1644287109375, |
|
"loss": 0.0756, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.4492368698120117, |
|
"rewards/margins": 0.10236699879169464, |
|
"rewards/rejected": -0.5516039133071899, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.3074317895298492e-06, |
|
"logits/chosen": -2.2224433422088623, |
|
"logits/rejected": -1.9981178045272827, |
|
"logps/chosen": -816.6317138671875, |
|
"logps/rejected": -843.4306640625, |
|
"loss": 0.0589, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.539585292339325, |
|
"rewards/margins": 0.07050606608390808, |
|
"rewards/rejected": -0.6100913882255554, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.2812215414311036e-06, |
|
"logits/chosen": -2.049561023712158, |
|
"logits/rejected": -1.8878101110458374, |
|
"logps/chosen": -747.5322265625, |
|
"logps/rejected": -847.0015869140625, |
|
"loss": 0.0788, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.5520612597465515, |
|
"rewards/margins": 0.08974529802799225, |
|
"rewards/rejected": -0.6418064832687378, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.2551858597564859e-06, |
|
"logits/chosen": -2.118635654449463, |
|
"logits/rejected": -1.993412971496582, |
|
"logps/chosen": -758.1939697265625, |
|
"logps/rejected": -849.9517822265625, |
|
"loss": 0.0765, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.5349586009979248, |
|
"rewards/margins": 0.06833256781101227, |
|
"rewards/rejected": -0.6032911539077759, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.2293284736134605e-06, |
|
"logits/chosen": -2.226203441619873, |
|
"logits/rejected": -2.0049796104431152, |
|
"logps/chosen": -663.0206298828125, |
|
"logps/rejected": -744.8146362304688, |
|
"loss": 0.0652, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.4578720033168793, |
|
"rewards/margins": 0.0849481076002121, |
|
"rewards/rejected": -0.5428200960159302, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.2036530865721115e-06, |
|
"logits/chosen": -2.1977055072784424, |
|
"logits/rejected": -1.923651099205017, |
|
"logps/chosen": -752.8314208984375, |
|
"logps/rejected": -866.5847778320312, |
|
"loss": 0.0559, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.5004793405532837, |
|
"rewards/margins": 0.11119532585144043, |
|
"rewards/rejected": -0.6116746664047241, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.178163376134671e-06, |
|
"logits/chosen": -2.203216552734375, |
|
"logits/rejected": -2.1631100177764893, |
|
"logps/chosen": -730.8253784179688, |
|
"logps/rejected": -790.9908447265625, |
|
"loss": 0.086, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.49312084913253784, |
|
"rewards/margins": 0.06093892455101013, |
|
"rewards/rejected": -0.5540598034858704, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.152862993208794e-06, |
|
"logits/chosen": -2.1542184352874756, |
|
"logits/rejected": -1.721289038658142, |
|
"logps/chosen": -694.8465576171875, |
|
"logps/rejected": -716.5363159179688, |
|
"loss": 0.0579, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.4218994081020355, |
|
"rewards/margins": 0.10558446496725082, |
|
"rewards/rejected": -0.5274838805198669, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.1277555615846339e-06, |
|
"logits/chosen": -2.0153145790100098, |
|
"logits/rejected": -1.8428608179092407, |
|
"logps/chosen": -670.2786865234375, |
|
"logps/rejected": -854.0362548828125, |
|
"loss": 0.0693, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.43655434250831604, |
|
"rewards/margins": 0.15288135409355164, |
|
"rewards/rejected": -0.5894356966018677, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.1028446774158021e-06, |
|
"logits/chosen": -2.2273738384246826, |
|
"logits/rejected": -2.0033624172210693, |
|
"logps/chosen": -650.3299560546875, |
|
"logps/rejected": -771.5960693359375, |
|
"loss": 0.0669, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.44391244649887085, |
|
"rewards/margins": 0.1265546977519989, |
|
"rewards/rejected": -0.5704671740531921, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.0781339087042955e-06, |
|
"logits/chosen": -2.233987808227539, |
|
"logits/rejected": -1.9722740650177002, |
|
"logps/chosen": -668.952392578125, |
|
"logps/rejected": -746.8803100585938, |
|
"loss": 0.092, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.4446966052055359, |
|
"rewards/margins": 0.10449746996164322, |
|
"rewards/rejected": -0.5491940975189209, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.053626794789441e-06, |
|
"logits/chosen": -2.167900562286377, |
|
"logits/rejected": -2.1640636920928955, |
|
"logps/chosen": -722.8687744140625, |
|
"logps/rejected": -834.3153076171875, |
|
"loss": 0.0647, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.5201176404953003, |
|
"rewards/margins": 0.06028919294476509, |
|
"rewards/rejected": -0.5804067850112915, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.029326845840961e-06, |
|
"logits/chosen": -2.3251118659973145, |
|
"logits/rejected": -1.9303410053253174, |
|
"logps/chosen": -679.392822265625, |
|
"logps/rejected": -753.8246459960938, |
|
"loss": 0.056, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.43752750754356384, |
|
"rewards/margins": 0.13090887665748596, |
|
"rewards/rejected": -0.5684363842010498, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.0052375423562038e-06, |
|
"logits/chosen": -2.242652416229248, |
|
"logits/rejected": -2.0578553676605225, |
|
"logps/chosen": -684.14013671875, |
|
"logps/rejected": -813.5594482421875, |
|
"loss": 0.0601, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.4639926850795746, |
|
"rewards/margins": 0.08700541406869888, |
|
"rewards/rejected": -0.5509980916976929, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.813623346616325e-07, |
|
"logits/chosen": -1.9593162536621094, |
|
"logits/rejected": -1.3148808479309082, |
|
"logps/chosen": -727.7343139648438, |
|
"logps/rejected": -799.2838134765625, |
|
"loss": 0.0755, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.43542367219924927, |
|
"rewards/margins": 0.15335293114185333, |
|
"rewards/rejected": -0.5887765884399414, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.577046424186336e-07, |
|
"logits/chosen": -2.250488758087158, |
|
"logits/rejected": -2.152696132659912, |
|
"logps/chosen": -743.4170532226562, |
|
"logps/rejected": -749.8002319335938, |
|
"loss": 0.0858, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.45544886589050293, |
|
"rewards/margins": 0.04140906408429146, |
|
"rewards/rejected": -0.4968579411506653, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.342678541337155e-07, |
|
"logits/chosen": -2.17391037940979, |
|
"logits/rejected": -1.7850377559661865, |
|
"logps/chosen": -670.3488159179688, |
|
"logps/rejected": -719.6155395507812, |
|
"loss": 0.1111, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.47339048981666565, |
|
"rewards/margins": 0.07163342088460922, |
|
"rewards/rejected": -0.5450239181518555, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 9.110553266731676e-07, |
|
"logits/chosen": -1.9487330913543701, |
|
"logits/rejected": -1.920854926109314, |
|
"logps/chosen": -684.4945068359375, |
|
"logps/rejected": -799.9359130859375, |
|
"loss": 0.0455, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.47916141152381897, |
|
"rewards/margins": 0.1000744104385376, |
|
"rewards/rejected": -0.5792357921600342, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 8.880703847822603e-07, |
|
"logits/chosen": -2.07055401802063, |
|
"logits/rejected": -1.928727149963379, |
|
"logps/chosen": -658.1580200195312, |
|
"logps/rejected": -768.7928466796875, |
|
"loss": 0.0684, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.4428200125694275, |
|
"rewards/margins": 0.12602929770946503, |
|
"rewards/rejected": -0.5688492655754089, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 8.653163206090326e-07, |
|
"logits/chosen": -2.4357573986053467, |
|
"logits/rejected": -1.9457374811172485, |
|
"logps/chosen": -623.8139038085938, |
|
"logps/rejected": -643.5352783203125, |
|
"loss": 0.0653, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.37632665038108826, |
|
"rewards/margins": 0.08694492280483246, |
|
"rewards/rejected": -0.4632716178894043, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 8.427963932327621e-07, |
|
"logits/chosen": -2.18113112449646, |
|
"logits/rejected": -2.0196382999420166, |
|
"logps/chosen": -602.2532958984375, |
|
"logps/rejected": -781.3187255859375, |
|
"loss": 0.0677, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.4085915982723236, |
|
"rewards/margins": 0.16642124950885773, |
|
"rewards/rejected": -0.5750128030776978, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 8.205138281971617e-07, |
|
"logits/chosen": -2.0964841842651367, |
|
"logits/rejected": -1.816980004310608, |
|
"logps/chosen": -691.4669189453125, |
|
"logps/rejected": -711.4818725585938, |
|
"loss": 0.0626, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.4612464904785156, |
|
"rewards/margins": 0.0856751948595047, |
|
"rewards/rejected": -0.5469216704368591, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 7.984718170483813e-07, |
|
"logits/chosen": -2.1438546180725098, |
|
"logits/rejected": -1.992221474647522, |
|
"logps/chosen": -613.9456787109375, |
|
"logps/rejected": -792.9467163085938, |
|
"loss": 0.1208, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.41083821654319763, |
|
"rewards/margins": 0.1366868019104004, |
|
"rewards/rejected": -0.5475250482559204, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.766735168778853e-07, |
|
"logits/chosen": -2.3303608894348145, |
|
"logits/rejected": -1.964838981628418, |
|
"logps/chosen": -724.9031982421875, |
|
"logps/rejected": -794.0396728515625, |
|
"loss": 0.065, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.4382435381412506, |
|
"rewards/margins": 0.1007271558046341, |
|
"rewards/rejected": -0.5389707088470459, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.551220498702547e-07, |
|
"logits/chosen": -2.219709873199463, |
|
"logits/rejected": -1.8147242069244385, |
|
"logps/chosen": -690.845458984375, |
|
"logps/rejected": -772.4855346679688, |
|
"loss": 0.0714, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.4500810205936432, |
|
"rewards/margins": 0.10746470838785172, |
|
"rewards/rejected": -0.5575457215309143, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.338205028560003e-07, |
|
"logits/chosen": -2.296119213104248, |
|
"logits/rejected": -1.979353904724121, |
|
"logps/chosen": -654.2385864257812, |
|
"logps/rejected": -730.311767578125, |
|
"loss": 0.0731, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.4028875231742859, |
|
"rewards/margins": 0.1070215255022049, |
|
"rewards/rejected": -0.5099090337753296, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 7.127719268694294e-07, |
|
"logits/chosen": -2.161729097366333, |
|
"logits/rejected": -1.8845264911651611, |
|
"logps/chosen": -690.8424682617188, |
|
"logps/rejected": -765.8018798828125, |
|
"loss": 0.0763, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.45226621627807617, |
|
"rewards/margins": 0.1182960644364357, |
|
"rewards/rejected": -0.5705623626708984, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 6.919793367116453e-07, |
|
"logits/chosen": -2.2758870124816895, |
|
"logits/rejected": -2.1513137817382812, |
|
"logps/chosen": -645.7116088867188, |
|
"logps/rejected": -750.9886474609375, |
|
"loss": 0.0744, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.39909255504608154, |
|
"rewards/margins": 0.11400028318166733, |
|
"rewards/rejected": -0.5130928158760071, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 6.714457105187383e-07, |
|
"logits/chosen": -2.358992338180542, |
|
"logits/rejected": -1.8777456283569336, |
|
"logps/chosen": -718.3897094726562, |
|
"logps/rejected": -846.4613037109375, |
|
"loss": 0.069, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.4483721852302551, |
|
"rewards/margins": 0.1424846649169922, |
|
"rewards/rejected": -0.5908567905426025, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.511739893352226e-07, |
|
"logits/chosen": -2.1870875358581543, |
|
"logits/rejected": -2.080655336380005, |
|
"logps/chosen": -697.439208984375, |
|
"logps/rejected": -725.9364013671875, |
|
"loss": 0.0663, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.4647183418273926, |
|
"rewards/margins": 0.0482785627245903, |
|
"rewards/rejected": -0.5129969120025635, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.311670766927869e-07, |
|
"logits/chosen": -1.9962774515151978, |
|
"logits/rejected": -1.9848453998565674, |
|
"logps/chosen": -651.7489013671875, |
|
"logps/rejected": -729.2185668945312, |
|
"loss": 0.1025, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.46112656593322754, |
|
"rewards/margins": 0.07899859547615051, |
|
"rewards/rejected": -0.5401251912117004, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 6.114278381944253e-07, |
|
"logits/chosen": -2.1914873123168945, |
|
"logits/rejected": -2.372349500656128, |
|
"logps/chosen": -576.5204467773438, |
|
"logps/rejected": -652.2879638671875, |
|
"loss": 0.0874, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.38175448775291443, |
|
"rewards/margins": 0.045350007712841034, |
|
"rewards/rejected": -0.42710447311401367, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5.91959101103988e-07, |
|
"logits/chosen": -2.443941593170166, |
|
"logits/rejected": -2.3585076332092285, |
|
"logps/chosen": -623.3038940429688, |
|
"logps/rejected": -749.5606689453125, |
|
"loss": 0.0805, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.40153616666793823, |
|
"rewards/margins": 0.12084267288446426, |
|
"rewards/rejected": -0.5223788619041443, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5.727636539412368e-07, |
|
"logits/chosen": -2.2379660606384277, |
|
"logits/rejected": -1.8004734516143799, |
|
"logps/chosen": -637.1851806640625, |
|
"logps/rejected": -713.89697265625, |
|
"loss": 0.0514, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.3866319954395294, |
|
"rewards/margins": 0.12998130917549133, |
|
"rewards/rejected": -0.5166133642196655, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.538442460824417e-07, |
|
"logits/chosen": -2.189680576324463, |
|
"logits/rejected": -1.9954487085342407, |
|
"logps/chosen": -629.7688598632812, |
|
"logps/rejected": -715.0001831054688, |
|
"loss": 0.0985, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.4157083034515381, |
|
"rewards/margins": 0.10353025048971176, |
|
"rewards/rejected": -0.5192385911941528, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.352035873665817e-07, |
|
"logits/chosen": -2.3851559162139893, |
|
"logits/rejected": -2.140918016433716, |
|
"logps/chosen": -579.01123046875, |
|
"logps/rejected": -623.9471435546875, |
|
"loss": 0.0898, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.35611358284950256, |
|
"rewards/margins": 0.07924910634756088, |
|
"rewards/rejected": -0.43536263704299927, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.168443477072207e-07, |
|
"logits/chosen": -2.320765256881714, |
|
"logits/rejected": -1.9919421672821045, |
|
"logps/chosen": -578.265625, |
|
"logps/rejected": -717.7510375976562, |
|
"loss": 0.065, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.3810497522354126, |
|
"rewards/margins": 0.13789904117584229, |
|
"rewards/rejected": -0.5189487338066101, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.987691567100866e-07, |
|
"logits/chosen": -2.205519199371338, |
|
"logits/rejected": -1.9962717294692993, |
|
"logps/chosen": -694.5599365234375, |
|
"logps/rejected": -781.102783203125, |
|
"loss": 0.0873, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.4531136155128479, |
|
"rewards/margins": 0.08138440549373627, |
|
"rewards/rejected": -0.5344979763031006, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.809806032964351e-07, |
|
"logits/chosen": -2.2204413414001465, |
|
"logits/rejected": -2.0109105110168457, |
|
"logps/chosen": -601.0872802734375, |
|
"logps/rejected": -644.5216064453125, |
|
"loss": 0.1044, |
|
"rewards/accuracies": 0.42500001192092896, |
|
"rewards/chosen": -0.3811626136302948, |
|
"rewards/margins": 0.06587550789117813, |
|
"rewards/rejected": -0.44703811407089233, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.634812353322371e-07, |
|
"logits/chosen": -2.3256497383117676, |
|
"logits/rejected": -1.8949310779571533, |
|
"logps/chosen": -661.0736083984375, |
|
"logps/rejected": -761.2631225585938, |
|
"loss": 0.0912, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.4190675616264343, |
|
"rewards/margins": 0.11600930988788605, |
|
"rewards/rejected": -0.535076916217804, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.462735592632439e-07, |
|
"logits/chosen": -2.003680944442749, |
|
"logits/rejected": -1.8513685464859009, |
|
"logps/chosen": -706.0535278320312, |
|
"logps/rejected": -843.9226684570312, |
|
"loss": 0.0716, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.45064783096313477, |
|
"rewards/margins": 0.1290721446275711, |
|
"rewards/rejected": -0.5797199606895447, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.293600397559897e-07, |
|
"logits/chosen": -2.1723484992980957, |
|
"logits/rejected": -1.9190336465835571, |
|
"logps/chosen": -624.4666137695312, |
|
"logps/rejected": -677.8482055664062, |
|
"loss": 0.0984, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.41719359159469604, |
|
"rewards/margins": 0.05301886796951294, |
|
"rewards/rejected": -0.470212459564209, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.1274309934477454e-07, |
|
"logits/chosen": -2.106175422668457, |
|
"logits/rejected": -1.8884546756744385, |
|
"logps/chosen": -612.33544921875, |
|
"logps/rejected": -652.5370483398438, |
|
"loss": 0.0818, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.39127975702285767, |
|
"rewards/margins": 0.054196156561374664, |
|
"rewards/rejected": -0.44547590613365173, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.964251180846826e-07, |
|
"logits/chosen": -2.3211159706115723, |
|
"logits/rejected": -1.9752849340438843, |
|
"logps/chosen": -544.6336669921875, |
|
"logps/rejected": -604.1102294921875, |
|
"loss": 0.0891, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.34942182898521423, |
|
"rewards/margins": 0.07262709736824036, |
|
"rewards/rejected": -0.422048956155777, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.8040843321068746e-07, |
|
"logits/chosen": -2.1404693126678467, |
|
"logits/rejected": -1.9147093296051025, |
|
"logps/chosen": -709.58154296875, |
|
"logps/rejected": -804.4060668945312, |
|
"loss": 0.0657, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.48798832297325134, |
|
"rewards/margins": 0.10476765781641006, |
|
"rewards/rejected": -0.5927559733390808, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.646953388028854e-07, |
|
"logits/chosen": -2.04292631149292, |
|
"logits/rejected": -1.5764219760894775, |
|
"logps/chosen": -687.10546875, |
|
"logps/rejected": -760.7067260742188, |
|
"loss": 0.0816, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.4793581962585449, |
|
"rewards/margins": 0.11864249408245087, |
|
"rewards/rejected": -0.598000705242157, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.4928808545791614e-07, |
|
"logits/chosen": -2.073615550994873, |
|
"logits/rejected": -2.1788148880004883, |
|
"logps/chosen": -638.221435546875, |
|
"logps/rejected": -761.6395263671875, |
|
"loss": 0.0891, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.44060444831848145, |
|
"rewards/margins": 0.09731185436248779, |
|
"rewards/rejected": -0.5379163026809692, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.341888799666068e-07, |
|
"logits/chosen": -2.262629747390747, |
|
"logits/rejected": -1.9284954071044922, |
|
"logps/chosen": -686.7388916015625, |
|
"logps/rejected": -743.3775634765625, |
|
"loss": 0.0855, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.4059697091579437, |
|
"rewards/margins": 0.09988965839147568, |
|
"rewards/rejected": -0.505859375, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.1939988499789075e-07, |
|
"logits/chosen": -2.0978739261627197, |
|
"logits/rejected": -1.8978351354599, |
|
"logps/chosen": -708.4168701171875, |
|
"logps/rejected": -820.39404296875, |
|
"loss": 0.0743, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.49596619606018066, |
|
"rewards/margins": 0.1125420555472374, |
|
"rewards/rejected": -0.6085082292556763, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.0492321878904913e-07, |
|
"logits/chosen": -2.175656795501709, |
|
"logits/rejected": -1.7882719039916992, |
|
"logps/chosen": -786.6962280273438, |
|
"logps/rejected": -890.03466796875, |
|
"loss": 0.1061, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.5101443529129028, |
|
"rewards/margins": 0.11035291850566864, |
|
"rewards/rejected": -0.6204972267150879, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.907609548423135e-07, |
|
"logits/chosen": -2.3059334754943848, |
|
"logits/rejected": -2.0130183696746826, |
|
"logps/chosen": -542.1705322265625, |
|
"logps/rejected": -715.5118408203125, |
|
"loss": 0.0811, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.36655205488204956, |
|
"rewards/margins": 0.14779023826122284, |
|
"rewards/rejected": -0.5143422484397888, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.7691512162787567e-07, |
|
"logits/chosen": -2.0405287742614746, |
|
"logits/rejected": -2.0915091037750244, |
|
"logps/chosen": -702.5558471679688, |
|
"logps/rejected": -788.6998291015625, |
|
"loss": 0.0696, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.4405028223991394, |
|
"rewards/margins": 0.09354646503925323, |
|
"rewards/rejected": -0.5340492129325867, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.6338770229335176e-07, |
|
"logits/chosen": -2.1023449897766113, |
|
"logits/rejected": -1.8752168416976929, |
|
"logps/chosen": -760.4970092773438, |
|
"logps/rejected": -833.1339721679688, |
|
"loss": 0.0622, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.46654874086380005, |
|
"rewards/margins": 0.10432098805904388, |
|
"rewards/rejected": -0.5708697438240051, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.501806343797303e-07, |
|
"logits/chosen": -2.3457446098327637, |
|
"logits/rejected": -2.3163278102874756, |
|
"logps/chosen": -633.0409545898438, |
|
"logps/rejected": -750.288818359375, |
|
"loss": 0.093, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.43354344367980957, |
|
"rewards/margins": 0.08344617486000061, |
|
"rewards/rejected": -0.516989529132843, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.3729580954386183e-07, |
|
"logits/chosen": -2.204981803894043, |
|
"logits/rejected": -1.9701652526855469, |
|
"logps/chosen": -688.1578979492188, |
|
"logps/rejected": -808.5591430664062, |
|
"loss": 0.061, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.44313064217567444, |
|
"rewards/margins": 0.14626091718673706, |
|
"rewards/rejected": -0.5893915891647339, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.2473507328751086e-07, |
|
"logits/chosen": -1.9686768054962158, |
|
"logits/rejected": -1.7817811965942383, |
|
"logps/chosen": -637.0069580078125, |
|
"logps/rejected": -730.8543090820312, |
|
"loss": 0.0792, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.3913404643535614, |
|
"rewards/margins": 0.11009415239095688, |
|
"rewards/rejected": -0.5014346241950989, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.1250022469302745e-07, |
|
"logits/chosen": -2.3968021869659424, |
|
"logits/rejected": -1.9710814952850342, |
|
"logps/chosen": -783.5001220703125, |
|
"logps/rejected": -783.8637084960938, |
|
"loss": 0.0559, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.4583209156990051, |
|
"rewards/margins": 0.07906268537044525, |
|
"rewards/rejected": -0.537383496761322, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.0059301616566107e-07, |
|
"logits/chosen": -1.9925310611724854, |
|
"logits/rejected": -2.1229825019836426, |
|
"logps/chosen": -628.9017944335938, |
|
"logps/rejected": -814.9282836914062, |
|
"loss": 0.0987, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.48036131262779236, |
|
"rewards/margins": 0.1412503570318222, |
|
"rewards/rejected": -0.6216117143630981, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.8901515318256318e-07, |
|
"logits/chosen": -2.3602724075317383, |
|
"logits/rejected": -1.8974645137786865, |
|
"logps/chosen": -728.95947265625, |
|
"logps/rejected": -866.9889526367188, |
|
"loss": 0.053, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.4281562268733978, |
|
"rewards/margins": 0.11615494638681412, |
|
"rewards/rejected": -0.5443111658096313, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.7776829404851092e-07, |
|
"logits/chosen": -2.2596402168273926, |
|
"logits/rejected": -1.877781867980957, |
|
"logps/chosen": -777.1273193359375, |
|
"logps/rejected": -868.2491455078125, |
|
"loss": 0.0797, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.4920421242713928, |
|
"rewards/margins": 0.14914286136627197, |
|
"rewards/rejected": -0.64118492603302, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.6685404965838647e-07, |
|
"logits/chosen": -2.3345799446105957, |
|
"logits/rejected": -2.0257980823516846, |
|
"logps/chosen": -681.239990234375, |
|
"logps/rejected": -695.14501953125, |
|
"loss": 0.0789, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.4106011986732483, |
|
"rewards/margins": 0.07871082425117493, |
|
"rewards/rejected": -0.48931199312210083, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.5627398326644811e-07, |
|
"logits/chosen": -2.2587788105010986, |
|
"logits/rejected": -2.0362513065338135, |
|
"logps/chosen": -705.2887573242188, |
|
"logps/rejected": -740.9500122070312, |
|
"loss": 0.0835, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.45956987142562866, |
|
"rewards/margins": 0.06559441983699799, |
|
"rewards/rejected": -0.5251643061637878, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.460296102624248e-07, |
|
"logits/chosen": -2.258861541748047, |
|
"logits/rejected": -2.2292912006378174, |
|
"logps/chosen": -662.3797607421875, |
|
"logps/rejected": -802.5979614257812, |
|
"loss": 0.0791, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.43469151854515076, |
|
"rewards/margins": 0.11645804345607758, |
|
"rewards/rejected": -0.5511494874954224, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.3612239795446348e-07, |
|
"logits/chosen": -2.229052782058716, |
|
"logits/rejected": -1.8262239694595337, |
|
"logps/chosen": -585.1064453125, |
|
"logps/rejected": -665.6681518554688, |
|
"loss": 0.0482, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.3851460814476013, |
|
"rewards/margins": 0.09434196352958679, |
|
"rewards/rejected": -0.4794880449771881, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.2655376535896852e-07, |
|
"logits/chosen": -2.274597644805908, |
|
"logits/rejected": -1.9262031316757202, |
|
"logps/chosen": -641.3375244140625, |
|
"logps/rejected": -756.7232666015625, |
|
"loss": 0.0618, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.3911328911781311, |
|
"rewards/margins": 0.13705289363861084, |
|
"rewards/rejected": -0.5281857252120972, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.1732508299735379e-07, |
|
"logits/chosen": -2.2501373291015625, |
|
"logits/rejected": -1.8707962036132812, |
|
"logps/chosen": -603.0183715820312, |
|
"logps/rejected": -690.1578369140625, |
|
"loss": 0.0705, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.39655619859695435, |
|
"rewards/margins": 0.1162206381559372, |
|
"rewards/rejected": -0.5127768516540527, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.0843767269974131e-07, |
|
"logits/chosen": -2.2717010974884033, |
|
"logits/rejected": -2.084096908569336, |
|
"logps/chosen": -690.6661987304688, |
|
"logps/rejected": -738.0612182617188, |
|
"loss": 0.0704, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.44601020216941833, |
|
"rewards/margins": 0.0691831111907959, |
|
"rewards/rejected": -0.5151932239532471, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 9.989280741563689e-08, |
|
"logits/chosen": -2.2434730529785156, |
|
"logits/rejected": -1.928205132484436, |
|
"logps/chosen": -684.7159423828125, |
|
"logps/rejected": -753.5374755859375, |
|
"loss": 0.0737, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.42067623138427734, |
|
"rewards/margins": 0.11073021590709686, |
|
"rewards/rejected": -0.5314064621925354, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 9.169171103160123e-08, |
|
"logits/chosen": -2.2055764198303223, |
|
"logits/rejected": -1.9301769733428955, |
|
"logps/chosen": -670.9398803710938, |
|
"logps/rejected": -797.8900756835938, |
|
"loss": 0.0828, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.4384661316871643, |
|
"rewards/margins": 0.12283768504858017, |
|
"rewards/rejected": -0.5613037347793579, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 8.383555819595601e-08, |
|
"logits/chosen": -2.2271904945373535, |
|
"logits/rejected": -2.005030393600464, |
|
"logps/chosen": -687.7010498046875, |
|
"logps/rejected": -819.2864379882812, |
|
"loss": 0.0767, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.4591870903968811, |
|
"rewards/margins": 0.14114715158939362, |
|
"rewards/rejected": -0.6003342270851135, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 7.632547415053482e-08, |
|
"logits/chosen": -2.4016735553741455, |
|
"logits/rejected": -2.1341352462768555, |
|
"logps/chosen": -751.8325805664062, |
|
"logps/rejected": -827.7970581054688, |
|
"loss": 0.1146, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.48245877027511597, |
|
"rewards/margins": 0.08054832369089127, |
|
"rewards/rejected": -0.5630070567131042, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 6.916253456951572e-08, |
|
"logits/chosen": -2.2689526081085205, |
|
"logits/rejected": -2.0502147674560547, |
|
"logps/chosen": -725.8679809570312, |
|
"logps/rejected": -830.0611572265625, |
|
"loss": 0.042, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.46661239862442017, |
|
"rewards/margins": 0.1116294115781784, |
|
"rewards/rejected": -0.578241765499115, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 6.23477654053517e-08, |
|
"logits/chosen": -2.389768600463867, |
|
"logits/rejected": -1.788630723953247, |
|
"logps/chosen": -633.7202758789062, |
|
"logps/rejected": -682.6315307617188, |
|
"loss": 0.078, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.37593162059783936, |
|
"rewards/margins": 0.1151694804430008, |
|
"rewards/rejected": -0.49110108613967896, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 5.588214274182158e-08, |
|
"logits/chosen": -2.29005765914917, |
|
"logits/rejected": -1.8733975887298584, |
|
"logps/chosen": -736.537841796875, |
|
"logps/rejected": -748.2416381835938, |
|
"loss": 0.1075, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.42465394735336304, |
|
"rewards/margins": 0.09247386455535889, |
|
"rewards/rejected": -0.5171278119087219, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 4.9766592654227344e-08, |
|
"logits/chosen": -2.290553092956543, |
|
"logits/rejected": -1.8525292873382568, |
|
"logps/chosen": -776.4290161132812, |
|
"logps/rejected": -816.3228149414062, |
|
"loss": 0.0501, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.49286454916000366, |
|
"rewards/margins": 0.10129784047603607, |
|
"rewards/rejected": -0.5941623449325562, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 4.400199107674946e-08, |
|
"logits/chosen": -2.1630892753601074, |
|
"logits/rejected": -1.940159559249878, |
|
"logps/chosen": -665.2659912109375, |
|
"logps/rejected": -710.6292724609375, |
|
"loss": 0.0637, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.4292607307434082, |
|
"rewards/margins": 0.062006641179323196, |
|
"rewards/rejected": -0.4912673532962799, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.8589163676986674e-08, |
|
"logits/chosen": -2.389782190322876, |
|
"logits/rejected": -2.072308301925659, |
|
"logps/chosen": -739.9847412109375, |
|
"logps/rejected": -799.0582885742188, |
|
"loss": 0.0508, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.42423492670059204, |
|
"rewards/margins": 0.11466383934020996, |
|
"rewards/rejected": -0.538898766040802, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.3528885737696136e-08, |
|
"logits/chosen": -2.229954957962036, |
|
"logits/rejected": -2.001962661743164, |
|
"logps/chosen": -763.1449584960938, |
|
"logps/rejected": -890.884765625, |
|
"loss": 0.0776, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.4881008267402649, |
|
"rewards/margins": 0.1101212278008461, |
|
"rewards/rejected": -0.598222017288208, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.8821882045748928e-08, |
|
"logits/chosen": -2.1927006244659424, |
|
"logits/rejected": -1.9519379138946533, |
|
"logps/chosen": -609.7821044921875, |
|
"logps/rejected": -670.4013671875, |
|
"loss": 0.1037, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.43954816460609436, |
|
"rewards/margins": 0.08661254495382309, |
|
"rewards/rejected": -0.526160717010498, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.4468826788316967e-08, |
|
"logits/chosen": -2.2692456245422363, |
|
"logits/rejected": -1.8533859252929688, |
|
"logps/chosen": -694.7283935546875, |
|
"logps/rejected": -838.3411865234375, |
|
"loss": 0.0369, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.41949623823165894, |
|
"rewards/margins": 0.1585858017206192, |
|
"rewards/rejected": -0.5780820846557617, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.0470343456310827e-08, |
|
"logits/chosen": -2.1465401649475098, |
|
"logits/rejected": -1.92649245262146, |
|
"logps/chosen": -801.5432739257812, |
|
"logps/rejected": -876.83203125, |
|
"loss": 0.0925, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.5086441040039062, |
|
"rewards/margins": 0.1165408119559288, |
|
"rewards/rejected": -0.6251848936080933, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.682700475507476e-08, |
|
"logits/chosen": -2.319272518157959, |
|
"logits/rejected": -2.125800609588623, |
|
"logps/chosen": -730.833740234375, |
|
"logps/rejected": -750.211669921875, |
|
"loss": 0.08, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.44253936409950256, |
|
"rewards/margins": 0.06718467175960541, |
|
"rewards/rejected": -0.5097240209579468, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.3539332522359282e-08, |
|
"logits/chosen": -2.2746429443359375, |
|
"logits/rejected": -1.9740593433380127, |
|
"logps/chosen": -795.7684936523438, |
|
"logps/rejected": -879.1627197265625, |
|
"loss": 0.0623, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.519045352935791, |
|
"rewards/margins": 0.11925999075174332, |
|
"rewards/rejected": -0.6383053660392761, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.0607797653577333e-08, |
|
"logits/chosen": -2.160733938217163, |
|
"logits/rejected": -1.9417308568954468, |
|
"logps/chosen": -561.7853393554688, |
|
"logps/rejected": -670.7401733398438, |
|
"loss": 0.0739, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.3653794825077057, |
|
"rewards/margins": 0.09239096939563751, |
|
"rewards/rejected": -0.457770437002182, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 8.032820034357126e-09, |
|
"logits/chosen": -2.2955965995788574, |
|
"logits/rejected": -2.136737585067749, |
|
"logps/chosen": -670.2855224609375, |
|
"logps/rejected": -771.9925537109375, |
|
"loss": 0.0742, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.4561973214149475, |
|
"rewards/margins": 0.10345951467752457, |
|
"rewards/rejected": -0.5596567988395691, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 5.814768480403021e-09, |
|
"logits/chosen": -2.0290589332580566, |
|
"logits/rejected": -1.981529951095581, |
|
"logps/chosen": -576.2341918945312, |
|
"logps/rejected": -762.9359130859375, |
|
"loss": 0.0692, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.4308190941810608, |
|
"rewards/margins": 0.13548357784748077, |
|
"rewards/rejected": -0.5663026571273804, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.953960684668634e-09, |
|
"logits/chosen": -2.110337018966675, |
|
"logits/rejected": -2.0485644340515137, |
|
"logps/chosen": -660.661865234375, |
|
"logps/rejected": -738.81884765625, |
|
"loss": 0.0598, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.44688519835472107, |
|
"rewards/margins": 0.08198593556880951, |
|
"rewards/rejected": -0.5288710594177246, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 2.4506631718534956e-09, |
|
"logits/chosen": -2.2245066165924072, |
|
"logits/rejected": -1.9781780242919922, |
|
"logps/chosen": -732.562255859375, |
|
"logps/rejected": -833.1080932617188, |
|
"loss": 0.0685, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.48917946219444275, |
|
"rewards/margins": 0.0765211433172226, |
|
"rewards/rejected": -0.5657006502151489, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.3050912602297071e-09, |
|
"logits/chosen": -2.0510215759277344, |
|
"logits/rejected": -1.8791606426239014, |
|
"logps/chosen": -708.2174072265625, |
|
"logps/rejected": -814.8040161132812, |
|
"loss": 0.079, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.46390801668167114, |
|
"rewards/margins": 0.11161540448665619, |
|
"rewards/rejected": -0.5755234360694885, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 5.1740903080022e-10, |
|
"logits/chosen": -2.4098868370056152, |
|
"logits/rejected": -1.9928724765777588, |
|
"logps/chosen": -596.1256713867188, |
|
"logps/rejected": -630.7376708984375, |
|
"loss": 0.0822, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.3659174144268036, |
|
"rewards/margins": 0.11456866562366486, |
|
"rewards/rejected": -0.48048609495162964, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 8.772930379846723e-11, |
|
"logits/chosen": -2.240025043487549, |
|
"logits/rejected": -2.1385819911956787, |
|
"logps/chosen": -616.0072021484375, |
|
"logps/rejected": -749.4381103515625, |
|
"loss": 0.0707, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.4194551110267639, |
|
"rewards/margins": 0.09669093787670135, |
|
"rewards/rejected": -0.5161460041999817, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 2917, |
|
"total_flos": 0.0, |
|
"train_loss": 0.0819665433958372, |
|
"train_runtime": 16805.1619, |
|
"train_samples_per_second": 1.389, |
|
"train_steps_per_second": 0.174 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 2917, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|