|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.6001919846412287, |
|
"eval_steps": 500, |
|
"global_step": 2501, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.9920318725099604e-08, |
|
"logits/chosen": -1.8077198266983032, |
|
"logits/rejected": -1.711557388305664, |
|
"logps/chosen": -187.02471923828125, |
|
"logps/rejected": -122.2266616821289, |
|
"loss": 0.4697, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.9920318725099604e-07, |
|
"logits/chosen": -1.91868257522583, |
|
"logits/rejected": -1.382498860359192, |
|
"logps/chosen": -176.21807861328125, |
|
"logps/rejected": -120.50502014160156, |
|
"loss": 0.4051, |
|
"rewards/accuracies": 0.2777777910232544, |
|
"rewards/chosen": -4.647710011340678e-05, |
|
"rewards/margins": -6.936895078979433e-05, |
|
"rewards/rejected": 2.2891843400429934e-05, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 3.9840637450199207e-07, |
|
"logits/chosen": -1.933166265487671, |
|
"logits/rejected": -1.376651406288147, |
|
"logps/chosen": -183.34518432617188, |
|
"logps/rejected": -109.8729476928711, |
|
"loss": 0.3902, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 5.835342381033115e-05, |
|
"rewards/margins": 5.425453491625376e-05, |
|
"rewards/rejected": 4.098887529835338e-06, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 5.976095617529881e-07, |
|
"logits/chosen": -1.8762012720108032, |
|
"logits/rejected": -1.4956092834472656, |
|
"logps/chosen": -173.73521423339844, |
|
"logps/rejected": -129.2222137451172, |
|
"loss": 0.3999, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.000283640343695879, |
|
"rewards/margins": 0.0003057140565942973, |
|
"rewards/rejected": -2.2073701984481886e-05, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 7.968127490039841e-07, |
|
"logits/chosen": -2.074092388153076, |
|
"logits/rejected": -1.6071268320083618, |
|
"logps/chosen": -128.0385284423828, |
|
"logps/rejected": -99.65340423583984, |
|
"loss": 0.3735, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.0004021901695523411, |
|
"rewards/margins": 0.0006805313169024885, |
|
"rewards/rejected": -0.0002783412055578083, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 9.9601593625498e-07, |
|
"logits/chosen": -2.022669792175293, |
|
"logits/rejected": -1.4573835134506226, |
|
"logps/chosen": -151.2292022705078, |
|
"logps/rejected": -106.66209411621094, |
|
"loss": 0.383, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.0010000061010941863, |
|
"rewards/margins": 0.0013790394878014922, |
|
"rewards/rejected": -0.00037903329939581454, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.1952191235059762e-06, |
|
"logits/chosen": -1.9887306690216064, |
|
"logits/rejected": -1.5061299800872803, |
|
"logps/chosen": -206.5648193359375, |
|
"logps/rejected": -130.1720733642578, |
|
"loss": 0.4111, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.0012637332547456026, |
|
"rewards/margins": 0.0010847109369933605, |
|
"rewards/rejected": 0.00017902204126585275, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.3944223107569721e-06, |
|
"logits/chosen": -2.064723253250122, |
|
"logits/rejected": -1.4734152555465698, |
|
"logps/chosen": -200.28292846679688, |
|
"logps/rejected": -123.68900299072266, |
|
"loss": 0.3915, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.00973748043179512, |
|
"rewards/margins": 0.007851692847907543, |
|
"rewards/rejected": -0.017589174211025238, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.5936254980079683e-06, |
|
"logits/chosen": -1.9502429962158203, |
|
"logits/rejected": -1.3335387706756592, |
|
"logps/chosen": -223.71151733398438, |
|
"logps/rejected": -220.52392578125, |
|
"loss": 0.3923, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.063636913895607, |
|
"rewards/margins": 0.03966347128152847, |
|
"rewards/rejected": -0.10330037772655487, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.7928286852589644e-06, |
|
"logits/chosen": -1.8217380046844482, |
|
"logits/rejected": -1.2573997974395752, |
|
"logps/chosen": -345.07183837890625, |
|
"logps/rejected": -365.8221740722656, |
|
"loss": 0.2872, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.15853700041770935, |
|
"rewards/margins": 0.059460896998643875, |
|
"rewards/rejected": -0.2179979383945465, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.99203187250996e-06, |
|
"logits/chosen": -1.9064254760742188, |
|
"logits/rejected": -1.2212843894958496, |
|
"logps/chosen": -434.5439453125, |
|
"logps/rejected": -479.3141174316406, |
|
"loss": 0.279, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.2425861656665802, |
|
"rewards/margins": 0.1260627657175064, |
|
"rewards/rejected": -0.3686489164829254, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 2.1912350597609563e-06, |
|
"logits/chosen": -1.9829845428466797, |
|
"logits/rejected": -1.3048018217086792, |
|
"logps/chosen": -418.07952880859375, |
|
"logps/rejected": -516.9057006835938, |
|
"loss": 0.2641, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.2715674340724945, |
|
"rewards/margins": 0.14631584286689758, |
|
"rewards/rejected": -0.4178832471370697, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 2.3904382470119524e-06, |
|
"logits/chosen": -1.9264233112335205, |
|
"logits/rejected": -1.4916644096374512, |
|
"logps/chosen": -516.511474609375, |
|
"logps/rejected": -570.0408325195312, |
|
"loss": 0.3119, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.3569049537181854, |
|
"rewards/margins": 0.09669794887304306, |
|
"rewards/rejected": -0.4536028802394867, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 2.589641434262948e-06, |
|
"logits/chosen": -2.02325177192688, |
|
"logits/rejected": -1.567561388015747, |
|
"logps/chosen": -412.42327880859375, |
|
"logps/rejected": -471.11962890625, |
|
"loss": 0.2955, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.23843295872211456, |
|
"rewards/margins": 0.11186476051807404, |
|
"rewards/rejected": -0.350297749042511, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 2.7888446215139443e-06, |
|
"logits/chosen": -2.2830417156219482, |
|
"logits/rejected": -1.7622215747833252, |
|
"logps/chosen": -320.74603271484375, |
|
"logps/rejected": -390.9023742675781, |
|
"loss": 0.3239, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.15108834207057953, |
|
"rewards/margins": 0.13633789122104645, |
|
"rewards/rejected": -0.2874262034893036, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.9880478087649404e-06, |
|
"logits/chosen": -2.1028332710266113, |
|
"logits/rejected": -1.609794020652771, |
|
"logps/chosen": -509.71051025390625, |
|
"logps/rejected": -607.0804443359375, |
|
"loss": 0.2944, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.3575127124786377, |
|
"rewards/margins": 0.14793363213539124, |
|
"rewards/rejected": -0.5054463148117065, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 3.1872509960159366e-06, |
|
"logits/chosen": -2.0964839458465576, |
|
"logits/rejected": -1.6341785192489624, |
|
"logps/chosen": -634.71337890625, |
|
"logps/rejected": -707.5382080078125, |
|
"loss": 0.275, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.45250073075294495, |
|
"rewards/margins": 0.1270226687192917, |
|
"rewards/rejected": -0.5795234441757202, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 3.3864541832669323e-06, |
|
"logits/chosen": -1.9589221477508545, |
|
"logits/rejected": -1.5987298488616943, |
|
"logps/chosen": -550.2916870117188, |
|
"logps/rejected": -589.5011596679688, |
|
"loss": 0.3143, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.42545145750045776, |
|
"rewards/margins": 0.08496164530515671, |
|
"rewards/rejected": -0.5104131102561951, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 3.585657370517929e-06, |
|
"logits/chosen": -2.2417616844177246, |
|
"logits/rejected": -1.566329836845398, |
|
"logps/chosen": -465.1465759277344, |
|
"logps/rejected": -618.5088500976562, |
|
"loss": 0.2378, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.28945469856262207, |
|
"rewards/margins": 0.20885801315307617, |
|
"rewards/rejected": -0.49831271171569824, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 3.7848605577689246e-06, |
|
"logits/chosen": -2.0054221153259277, |
|
"logits/rejected": -1.608533263206482, |
|
"logps/chosen": -474.73468017578125, |
|
"logps/rejected": -527.5888061523438, |
|
"loss": 0.3241, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.3396565020084381, |
|
"rewards/margins": 0.0798039585351944, |
|
"rewards/rejected": -0.4194604754447937, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 3.98406374501992e-06, |
|
"logits/chosen": -2.1822266578674316, |
|
"logits/rejected": -1.7768001556396484, |
|
"logps/chosen": -381.39166259765625, |
|
"logps/rejected": -489.23114013671875, |
|
"loss": 0.2525, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.2405729591846466, |
|
"rewards/margins": 0.13961976766586304, |
|
"rewards/rejected": -0.38019272685050964, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.183266932270917e-06, |
|
"logits/chosen": -2.0433428287506104, |
|
"logits/rejected": -1.155823826789856, |
|
"logps/chosen": -570.4938354492188, |
|
"logps/rejected": -718.9401245117188, |
|
"loss": 0.2815, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.4118042588233948, |
|
"rewards/margins": 0.2083522379398346, |
|
"rewards/rejected": -0.620156466960907, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.382470119521913e-06, |
|
"logits/chosen": -2.0706868171691895, |
|
"logits/rejected": -1.3509011268615723, |
|
"logps/chosen": -564.2838745117188, |
|
"logps/rejected": -744.248779296875, |
|
"loss": 0.261, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.40354689955711365, |
|
"rewards/margins": 0.22655579447746277, |
|
"rewards/rejected": -0.6301027536392212, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.581673306772908e-06, |
|
"logits/chosen": -2.1797804832458496, |
|
"logits/rejected": -1.5733859539031982, |
|
"logps/chosen": -329.7751159667969, |
|
"logps/rejected": -467.71807861328125, |
|
"loss": 0.2674, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.1671646535396576, |
|
"rewards/margins": 0.19905708730220795, |
|
"rewards/rejected": -0.36622172594070435, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.780876494023905e-06, |
|
"logits/chosen": -2.288848876953125, |
|
"logits/rejected": -1.6748888492584229, |
|
"logps/chosen": -571.8269653320312, |
|
"logps/rejected": -707.6842041015625, |
|
"loss": 0.2649, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.4129388928413391, |
|
"rewards/margins": 0.19371375441551208, |
|
"rewards/rejected": -0.6066526174545288, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.980079681274901e-06, |
|
"logits/chosen": -2.222977876663208, |
|
"logits/rejected": -1.6339671611785889, |
|
"logps/chosen": -457.69000244140625, |
|
"logps/rejected": -580.9161987304688, |
|
"loss": 0.2866, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.3078981339931488, |
|
"rewards/margins": 0.1683819591999054, |
|
"rewards/rejected": -0.4762801229953766, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.999802610509541e-06, |
|
"logits/chosen": -2.17555570602417, |
|
"logits/rejected": -1.641728401184082, |
|
"logps/chosen": -571.9801025390625, |
|
"logps/rejected": -752.2948608398438, |
|
"loss": 0.2416, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.40048933029174805, |
|
"rewards/margins": 0.21740679442882538, |
|
"rewards/rejected": -0.6178960800170898, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.9991203164860365e-06, |
|
"logits/chosen": -2.3382885456085205, |
|
"logits/rejected": -1.802610158920288, |
|
"logps/chosen": -461.989013671875, |
|
"logps/rejected": -589.45166015625, |
|
"loss": 0.2291, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.31522998213768005, |
|
"rewards/margins": 0.16316869854927063, |
|
"rewards/rejected": -0.4783986508846283, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.997950814005098e-06, |
|
"logits/chosen": -2.5300729274749756, |
|
"logits/rejected": -1.6759153604507446, |
|
"logps/chosen": -527.8553466796875, |
|
"logps/rejected": -689.076904296875, |
|
"loss": 0.2481, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.318958580493927, |
|
"rewards/margins": 0.24714262783527374, |
|
"rewards/rejected": -0.5661011934280396, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.99629433106355e-06, |
|
"logits/chosen": -2.233731746673584, |
|
"logits/rejected": -1.6700172424316406, |
|
"logps/chosen": -671.8790283203125, |
|
"logps/rejected": -758.0708618164062, |
|
"loss": 0.2699, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.4771367013454437, |
|
"rewards/margins": 0.15160521864891052, |
|
"rewards/rejected": -0.628741979598999, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.994151190596025e-06, |
|
"logits/chosen": -2.2926723957061768, |
|
"logits/rejected": -1.6014070510864258, |
|
"logps/chosen": -357.16583251953125, |
|
"logps/rejected": -530.6362915039062, |
|
"loss": 0.2773, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.20203897356987, |
|
"rewards/margins": 0.20360472798347473, |
|
"rewards/rejected": -0.4056437015533447, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.9915218104120024e-06, |
|
"logits/chosen": -2.1675281524658203, |
|
"logits/rejected": -1.540875792503357, |
|
"logps/chosen": -517.7607421875, |
|
"logps/rejected": -669.5948486328125, |
|
"loss": 0.2334, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.3637324273586273, |
|
"rewards/margins": 0.20930609107017517, |
|
"rewards/rejected": -0.5730385780334473, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.98840670311436e-06, |
|
"logits/chosen": -2.3536510467529297, |
|
"logits/rejected": -1.794704794883728, |
|
"logps/chosen": -533.6524658203125, |
|
"logps/rejected": -621.9884643554688, |
|
"loss": 0.2721, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.3811626136302948, |
|
"rewards/margins": 0.13614344596862793, |
|
"rewards/rejected": -0.5173059701919556, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.984806475999437e-06, |
|
"logits/chosen": -2.2430691719055176, |
|
"logits/rejected": -1.409332036972046, |
|
"logps/chosen": -616.1778564453125, |
|
"logps/rejected": -776.3978271484375, |
|
"loss": 0.2444, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.42925572395324707, |
|
"rewards/margins": 0.21815767884254456, |
|
"rewards/rejected": -0.647413432598114, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.980721830938645e-06, |
|
"logits/chosen": -2.1990833282470703, |
|
"logits/rejected": -1.544798493385315, |
|
"logps/chosen": -605.9384765625, |
|
"logps/rejected": -732.4371337890625, |
|
"loss": 0.2803, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.439105361700058, |
|
"rewards/margins": 0.172859787940979, |
|
"rewards/rejected": -0.6119651198387146, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.9761535642416284e-06, |
|
"logits/chosen": -2.2408649921417236, |
|
"logits/rejected": -1.799768090248108, |
|
"logps/chosen": -489.03533935546875, |
|
"logps/rejected": -665.1949462890625, |
|
"loss": 0.2405, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.37104225158691406, |
|
"rewards/margins": 0.19040416181087494, |
|
"rewards/rejected": -0.5614464282989502, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.9711025665010335e-06, |
|
"logits/chosen": -2.2574844360351562, |
|
"logits/rejected": -1.8442182540893555, |
|
"logps/chosen": -408.14654541015625, |
|
"logps/rejected": -561.5911865234375, |
|
"loss": 0.2777, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.269236296415329, |
|
"rewards/margins": 0.16201291978359222, |
|
"rewards/rejected": -0.4312492311000824, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.965569822418878e-06, |
|
"logits/chosen": -2.0681309700012207, |
|
"logits/rejected": -1.4381722211837769, |
|
"logps/chosen": -559.8223876953125, |
|
"logps/rejected": -734.8892211914062, |
|
"loss": 0.2241, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.43346816301345825, |
|
"rewards/margins": 0.1917620450258255, |
|
"rewards/rejected": -0.6252301931381226, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.9595564106145825e-06, |
|
"logits/chosen": -2.303969383239746, |
|
"logits/rejected": -1.733120322227478, |
|
"logps/chosen": -418.6346740722656, |
|
"logps/rejected": -560.189208984375, |
|
"loss": 0.2358, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.2271452397108078, |
|
"rewards/margins": 0.19738546013832092, |
|
"rewards/rejected": -0.42453068494796753, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.953063503414692e-06, |
|
"logits/chosen": -2.2448253631591797, |
|
"logits/rejected": -1.8518972396850586, |
|
"logps/chosen": -494.76055908203125, |
|
"logps/rejected": -628.48388671875, |
|
"loss": 0.2679, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.335178941488266, |
|
"rewards/margins": 0.17853963375091553, |
|
"rewards/rejected": -0.5137186050415039, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.946092366624333e-06, |
|
"logits/chosen": -2.2507317066192627, |
|
"logits/rejected": -1.619484305381775, |
|
"logps/chosen": -526.4837646484375, |
|
"logps/rejected": -697.7025146484375, |
|
"loss": 0.2529, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.3731151223182678, |
|
"rewards/margins": 0.21801939606666565, |
|
"rewards/rejected": -0.5911344885826111, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.938644359280433e-06, |
|
"logits/chosen": -2.375333070755005, |
|
"logits/rejected": -1.6055479049682617, |
|
"logps/chosen": -563.16552734375, |
|
"logps/rejected": -741.4131469726562, |
|
"loss": 0.212, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.36431047320365906, |
|
"rewards/margins": 0.2534615397453308, |
|
"rewards/rejected": -0.6177719831466675, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.930720933386782e-06, |
|
"logits/chosen": -2.233098268508911, |
|
"logits/rejected": -1.6022119522094727, |
|
"logps/chosen": -426.771728515625, |
|
"logps/rejected": -549.2544555664062, |
|
"loss": 0.2503, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.2603939175605774, |
|
"rewards/margins": 0.18492767214775085, |
|
"rewards/rejected": -0.445321649312973, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.922323633630957e-06, |
|
"logits/chosen": -2.1745898723602295, |
|
"logits/rejected": -1.5291332006454468, |
|
"logps/chosen": -516.7222290039062, |
|
"logps/rejected": -726.829345703125, |
|
"loss": 0.2005, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.3355748653411865, |
|
"rewards/margins": 0.27592363953590393, |
|
"rewards/rejected": -0.6114985346794128, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.913454097083185e-06, |
|
"logits/chosen": -2.2320826053619385, |
|
"logits/rejected": -1.747312307357788, |
|
"logps/chosen": -674.8052368164062, |
|
"logps/rejected": -752.8465576171875, |
|
"loss": 0.2891, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.5011820793151855, |
|
"rewards/margins": 0.12799863517284393, |
|
"rewards/rejected": -0.6291807889938354, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.904114052877189e-06, |
|
"logits/chosen": -2.121434211730957, |
|
"logits/rejected": -1.4196488857269287, |
|
"logps/chosen": -554.5970458984375, |
|
"logps/rejected": -728.1629028320312, |
|
"loss": 0.2619, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.39592963457107544, |
|
"rewards/margins": 0.23849153518676758, |
|
"rewards/rejected": -0.6344212293624878, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.894305321873092e-06, |
|
"logits/chosen": -2.1138224601745605, |
|
"logits/rejected": -1.6369972229003906, |
|
"logps/chosen": -714.6239624023438, |
|
"logps/rejected": -836.7276611328125, |
|
"loss": 0.2451, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.5520918965339661, |
|
"rewards/margins": 0.17846426367759705, |
|
"rewards/rejected": -0.7305561304092407, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.884029816302441e-06, |
|
"logits/chosen": -2.2457010746002197, |
|
"logits/rejected": -1.7463334798812866, |
|
"logps/chosen": -603.1580810546875, |
|
"logps/rejected": -735.0208740234375, |
|
"loss": 0.2874, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.4632132947444916, |
|
"rewards/margins": 0.17302510142326355, |
|
"rewards/rejected": -0.6362384557723999, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.873289539395404e-06, |
|
"logits/chosen": -2.1492977142333984, |
|
"logits/rejected": -1.6171681880950928, |
|
"logps/chosen": -582.4373779296875, |
|
"logps/rejected": -740.5318603515625, |
|
"loss": 0.2167, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.42827337980270386, |
|
"rewards/margins": 0.2127343863248825, |
|
"rewards/rejected": -0.6410078406333923, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.862086584990246e-06, |
|
"logits/chosen": -2.2339184284210205, |
|
"logits/rejected": -1.7799112796783447, |
|
"logps/chosen": -582.9187622070312, |
|
"logps/rejected": -674.3992309570312, |
|
"loss": 0.2649, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.3971683382987976, |
|
"rewards/margins": 0.1706150472164154, |
|
"rewards/rejected": -0.5677834153175354, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.850423137125126e-06, |
|
"logits/chosen": -1.98675537109375, |
|
"logits/rejected": -1.467492699623108, |
|
"logps/chosen": -641.2584228515625, |
|
"logps/rejected": -804.790771484375, |
|
"loss": 0.2572, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.4882410168647766, |
|
"rewards/margins": 0.21470656991004944, |
|
"rewards/rejected": -0.7029476165771484, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.838301469612315e-06, |
|
"logits/chosen": -2.24493408203125, |
|
"logits/rejected": -1.498622179031372, |
|
"logps/chosen": -557.9942626953125, |
|
"logps/rejected": -705.88623046875, |
|
"loss": 0.2415, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.3977532982826233, |
|
"rewards/margins": 0.20435115694999695, |
|
"rewards/rejected": -0.6021044850349426, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.825723945594912e-06, |
|
"logits/chosen": -2.1898727416992188, |
|
"logits/rejected": -1.737510323524475, |
|
"logps/chosen": -415.2431640625, |
|
"logps/rejected": -563.3753051757812, |
|
"loss": 0.285, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.27554893493652344, |
|
"rewards/margins": 0.1814078390598297, |
|
"rewards/rejected": -0.4569567143917084, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.812693017086145e-06, |
|
"logits/chosen": -2.2709362506866455, |
|
"logits/rejected": -1.7534446716308594, |
|
"logps/chosen": -521.100341796875, |
|
"logps/rejected": -715.1611938476562, |
|
"loss": 0.2564, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.3482024073600769, |
|
"rewards/margins": 0.23160001635551453, |
|
"rewards/rejected": -0.5798024535179138, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.799211224491348e-06, |
|
"logits/chosen": -2.000866651535034, |
|
"logits/rejected": -1.438919186592102, |
|
"logps/chosen": -606.0504760742188, |
|
"logps/rejected": -769.3602294921875, |
|
"loss": 0.2518, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.5148371458053589, |
|
"rewards/margins": 0.1749372035264969, |
|
"rewards/rejected": -0.6897743344306946, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.7852811961126974e-06, |
|
"logits/chosen": -2.2116503715515137, |
|
"logits/rejected": -1.419684648513794, |
|
"logps/chosen": -566.8517456054688, |
|
"logps/rejected": -797.03076171875, |
|
"loss": 0.2348, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.42510828375816345, |
|
"rewards/margins": 0.28039130568504333, |
|
"rewards/rejected": -0.7054997086524963, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.770905647636828e-06, |
|
"logits/chosen": -2.19708514213562, |
|
"logits/rejected": -1.514418601989746, |
|
"logps/chosen": -372.2975769042969, |
|
"logps/rejected": -572.015625, |
|
"loss": 0.2529, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.22482657432556152, |
|
"rewards/margins": 0.2627830505371094, |
|
"rewards/rejected": -0.4876096844673157, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.756087381605399e-06, |
|
"logits/chosen": -2.212829113006592, |
|
"logits/rejected": -1.5567867755889893, |
|
"logps/chosen": -451.62091064453125, |
|
"logps/rejected": -675.35498046875, |
|
"loss": 0.2243, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.28890347480773926, |
|
"rewards/margins": 0.2642940580844879, |
|
"rewards/rejected": -0.5531975030899048, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.740829286868732e-06, |
|
"logits/chosen": -2.0651936531066895, |
|
"logits/rejected": -1.282173991203308, |
|
"logps/chosen": -608.8034057617188, |
|
"logps/rejected": -814.4363403320312, |
|
"loss": 0.2309, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.4570806920528412, |
|
"rewards/margins": 0.2720951437950134, |
|
"rewards/rejected": -0.729175865650177, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.725134338022631e-06, |
|
"logits/chosen": -1.976910948753357, |
|
"logits/rejected": -1.3392863273620605, |
|
"logps/chosen": -673.251220703125, |
|
"logps/rejected": -844.18408203125, |
|
"loss": 0.2425, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.4962734580039978, |
|
"rewards/margins": 0.2366379052400589, |
|
"rewards/rejected": -0.7329114675521851, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.709005594828471e-06, |
|
"logits/chosen": -2.177516460418701, |
|
"logits/rejected": -1.4188392162322998, |
|
"logps/chosen": -349.5711669921875, |
|
"logps/rejected": -489.1685485839844, |
|
"loss": 0.268, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.18546082079410553, |
|
"rewards/margins": 0.21840114891529083, |
|
"rewards/rejected": -0.4038619101047516, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.692446201616692e-06, |
|
"logits/chosen": -2.171480894088745, |
|
"logits/rejected": -1.486372470855713, |
|
"logps/chosen": -489.7513732910156, |
|
"logps/rejected": -671.7752685546875, |
|
"loss": 0.2265, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.3359985947608948, |
|
"rewards/margins": 0.2379292994737625, |
|
"rewards/rejected": -0.5739278197288513, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.675459386673815e-06, |
|
"logits/chosen": -2.188599109649658, |
|
"logits/rejected": -1.417234182357788, |
|
"logps/chosen": -669.4722290039062, |
|
"logps/rejected": -851.4942626953125, |
|
"loss": 0.2386, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.4645751118659973, |
|
"rewards/margins": 0.2636135220527649, |
|
"rewards/rejected": -0.7281886339187622, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.658048461613068e-06, |
|
"logits/chosen": -2.186264753341675, |
|
"logits/rejected": -1.3317312002182007, |
|
"logps/chosen": -558.1051635742188, |
|
"logps/rejected": -807.9237060546875, |
|
"loss": 0.2096, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.4056881070137024, |
|
"rewards/margins": 0.2980197072029114, |
|
"rewards/rejected": -0.7037078738212585, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.640216820728791e-06, |
|
"logits/chosen": -2.0650200843811035, |
|
"logits/rejected": -1.5434041023254395, |
|
"logps/chosen": -591.6378173828125, |
|
"logps/rejected": -726.2674560546875, |
|
"loss": 0.3041, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.440895140171051, |
|
"rewards/margins": 0.15948018431663513, |
|
"rewards/rejected": -0.6003752946853638, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.621967940334705e-06, |
|
"logits/chosen": -2.1776533126831055, |
|
"logits/rejected": -1.505947470664978, |
|
"logps/chosen": -533.233154296875, |
|
"logps/rejected": -758.1294555664062, |
|
"loss": 0.2585, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.34916678071022034, |
|
"rewards/margins": 0.2798806130886078, |
|
"rewards/rejected": -0.6290473341941833, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.603305378086201e-06, |
|
"logits/chosen": -1.815281629562378, |
|
"logits/rejected": -1.1680887937545776, |
|
"logps/chosen": -623.07275390625, |
|
"logps/rejected": -776.311279296875, |
|
"loss": 0.2435, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.4895709156990051, |
|
"rewards/margins": 0.18619278073310852, |
|
"rewards/rejected": -0.675763726234436, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.584232772286769e-06, |
|
"logits/chosen": -1.9976768493652344, |
|
"logits/rejected": -1.2251824140548706, |
|
"logps/chosen": -644.8419189453125, |
|
"logps/rejected": -831.3948364257812, |
|
"loss": 0.2896, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.4800949990749359, |
|
"rewards/margins": 0.2344970703125, |
|
"rewards/rejected": -0.7145919799804688, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.5647538411786965e-06, |
|
"logits/chosen": -2.1248276233673096, |
|
"logits/rejected": -1.5544915199279785, |
|
"logps/chosen": -530.0264282226562, |
|
"logps/rejected": -631.1822509765625, |
|
"loss": 0.2546, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.36536192893981934, |
|
"rewards/margins": 0.15929196774959564, |
|
"rewards/rejected": -0.5246539115905762, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.544872382218202e-06, |
|
"logits/chosen": -2.3693809509277344, |
|
"logits/rejected": -1.6609468460083008, |
|
"logps/chosen": -424.1357421875, |
|
"logps/rejected": -568.0796508789062, |
|
"loss": 0.2299, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.257046639919281, |
|
"rewards/margins": 0.19673588871955872, |
|
"rewards/rejected": -0.45378249883651733, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.5245922713351e-06, |
|
"logits/chosen": -2.135702610015869, |
|
"logits/rejected": -1.4124939441680908, |
|
"logps/chosen": -638.5548095703125, |
|
"logps/rejected": -831.3570556640625, |
|
"loss": 0.2649, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.46429723501205444, |
|
"rewards/margins": 0.2442464828491211, |
|
"rewards/rejected": -0.7085437774658203, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.503917462177192e-06, |
|
"logits/chosen": -2.142075300216675, |
|
"logits/rejected": -1.5568357706069946, |
|
"logps/chosen": -572.1000366210938, |
|
"logps/rejected": -753.2780151367188, |
|
"loss": 0.292, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.44795316457748413, |
|
"rewards/margins": 0.20818133652210236, |
|
"rewards/rejected": -0.6561344861984253, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.482851985339487e-06, |
|
"logits/chosen": -2.1371376514434814, |
|
"logits/rejected": -1.4664158821105957, |
|
"logps/chosen": -545.5193481445312, |
|
"logps/rejected": -744.4154052734375, |
|
"loss": 0.239, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.3687899112701416, |
|
"rewards/margins": 0.2610931992530823, |
|
"rewards/rejected": -0.6298831701278687, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.461399947578434e-06, |
|
"logits/chosen": -2.133291721343994, |
|
"logits/rejected": -1.604524850845337, |
|
"logps/chosen": -517.1326293945312, |
|
"logps/rejected": -649.8372802734375, |
|
"loss": 0.231, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.3689945638179779, |
|
"rewards/margins": 0.1885087788105011, |
|
"rewards/rejected": -0.5575034022331238, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.439565531011299e-06, |
|
"logits/chosen": -1.8887875080108643, |
|
"logits/rejected": -1.270674467086792, |
|
"logps/chosen": -613.1268310546875, |
|
"logps/rejected": -801.4429321289062, |
|
"loss": 0.2473, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.4629307687282562, |
|
"rewards/margins": 0.22329919040203094, |
|
"rewards/rejected": -0.6862298846244812, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.417352992300854e-06, |
|
"logits/chosen": -2.3292813301086426, |
|
"logits/rejected": -1.6490875482559204, |
|
"logps/chosen": -425.7843322753906, |
|
"logps/rejected": -597.09814453125, |
|
"loss": 0.2591, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.2748299241065979, |
|
"rewards/margins": 0.2304181158542633, |
|
"rewards/rejected": -0.5052480101585388, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.3947666618255335e-06, |
|
"logits/chosen": -2.14275860786438, |
|
"logits/rejected": -1.552585244178772, |
|
"logps/chosen": -411.56024169921875, |
|
"logps/rejected": -597.5908813476562, |
|
"loss": 0.2509, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.2517482042312622, |
|
"rewards/margins": 0.2175951898097992, |
|
"rewards/rejected": -0.4693434238433838, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.3718109428352155e-06, |
|
"logits/chosen": -2.094531536102295, |
|
"logits/rejected": -1.4484500885009766, |
|
"logps/chosen": -561.7736206054688, |
|
"logps/rejected": -734.1261596679688, |
|
"loss": 0.2477, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.4113832116127014, |
|
"rewards/margins": 0.2228744924068451, |
|
"rewards/rejected": -0.6342577338218689, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.348490310592801e-06, |
|
"logits/chosen": -1.8707062005996704, |
|
"logits/rejected": -1.506484866142273, |
|
"logps/chosen": -714.3576049804688, |
|
"logps/rejected": -866.392578125, |
|
"loss": 0.2457, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.5910379886627197, |
|
"rewards/margins": 0.16303986310958862, |
|
"rewards/rejected": -0.7540777921676636, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.3248093115017544e-06, |
|
"logits/chosen": -2.125237464904785, |
|
"logits/rejected": -1.4492751359939575, |
|
"logps/chosen": -582.9632568359375, |
|
"logps/rejected": -715.1638793945312, |
|
"loss": 0.2496, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.3824686110019684, |
|
"rewards/margins": 0.22166451811790466, |
|
"rewards/rejected": -0.6041331887245178, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.3007725622197675e-06, |
|
"logits/chosen": -2.2058329582214355, |
|
"logits/rejected": -1.5816318988800049, |
|
"logps/chosen": -570.4851684570312, |
|
"logps/rejected": -719.0206909179688, |
|
"loss": 0.227, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.38881629705429077, |
|
"rewards/margins": 0.21982832252979279, |
|
"rewards/rejected": -0.6086446046829224, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.27638474875874e-06, |
|
"logits/chosen": -2.1510353088378906, |
|
"logits/rejected": -1.3720935583114624, |
|
"logps/chosen": -377.4300537109375, |
|
"logps/rejected": -588.8758544921875, |
|
"loss": 0.2153, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.23323938250541687, |
|
"rewards/margins": 0.264992356300354, |
|
"rewards/rejected": -0.4982317090034485, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.25165062557123e-06, |
|
"logits/chosen": -2.1816329956054688, |
|
"logits/rejected": -1.862624168395996, |
|
"logps/chosen": -489.541015625, |
|
"logps/rejected": -573.84814453125, |
|
"loss": 0.2876, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.33602797985076904, |
|
"rewards/margins": 0.1257353574037552, |
|
"rewards/rejected": -0.46176332235336304, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.226575014623557e-06, |
|
"logits/chosen": -2.121556520462036, |
|
"logits/rejected": -1.500382661819458, |
|
"logps/chosen": -569.2263793945312, |
|
"logps/rejected": -745.4718017578125, |
|
"loss": 0.2127, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.37383171916007996, |
|
"rewards/margins": 0.24332182109355927, |
|
"rewards/rejected": -0.617153525352478, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.201162804455764e-06, |
|
"logits/chosen": -2.158127784729004, |
|
"logits/rejected": -1.5402179956436157, |
|
"logps/chosen": -477.3233337402344, |
|
"logps/rejected": -729.618408203125, |
|
"loss": 0.2182, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.336378812789917, |
|
"rewards/margins": 0.27327030897140503, |
|
"rewards/rejected": -0.609649121761322, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.175418949228571e-06, |
|
"logits/chosen": -2.0661299228668213, |
|
"logits/rejected": -1.3702499866485596, |
|
"logps/chosen": -421.61676025390625, |
|
"logps/rejected": -608.9027709960938, |
|
"loss": 0.2314, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.28428909182548523, |
|
"rewards/margins": 0.23253202438354492, |
|
"rewards/rejected": -0.5168210864067078, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.149348467757566e-06, |
|
"logits/chosen": -2.2505486011505127, |
|
"logits/rejected": -1.6682631969451904, |
|
"logps/chosen": -519.7850341796875, |
|
"logps/rejected": -675.0706787109375, |
|
"loss": 0.2582, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.40990179777145386, |
|
"rewards/margins": 0.17124588787555695, |
|
"rewards/rejected": -0.5811477303504944, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.122956442534765e-06, |
|
"logits/chosen": -2.070268154144287, |
|
"logits/rejected": -1.589167833328247, |
|
"logps/chosen": -569.9495239257812, |
|
"logps/rejected": -677.8106689453125, |
|
"loss": 0.2566, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.4036158621311188, |
|
"rewards/margins": 0.1584577113389969, |
|
"rewards/rejected": -0.5620735883712769, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.096248018737781e-06, |
|
"logits/chosen": -1.9538896083831787, |
|
"logits/rejected": -1.506744623184204, |
|
"logps/chosen": -598.489990234375, |
|
"logps/rejected": -718.8297729492188, |
|
"loss": 0.2701, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.4573654234409332, |
|
"rewards/margins": 0.15690357983112335, |
|
"rewards/rejected": -0.614268958568573, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.069228403226751e-06, |
|
"logits/chosen": -2.2838289737701416, |
|
"logits/rejected": -1.5676209926605225, |
|
"logps/chosen": -536.0494384765625, |
|
"logps/rejected": -721.7430419921875, |
|
"loss": 0.2196, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.37890490889549255, |
|
"rewards/margins": 0.2322990447282791, |
|
"rewards/rejected": -0.6112040281295776, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.041902863529257e-06, |
|
"logits/chosen": -2.2310588359832764, |
|
"logits/rejected": -1.7672712802886963, |
|
"logps/chosen": -479.8710021972656, |
|
"logps/rejected": -604.5220947265625, |
|
"loss": 0.2552, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.3082687258720398, |
|
"rewards/margins": 0.17145316302776337, |
|
"rewards/rejected": -0.47972187399864197, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.014276726813404e-06, |
|
"logits/chosen": -2.1795742511749268, |
|
"logits/rejected": -1.4126112461090088, |
|
"logps/chosen": -480.098876953125, |
|
"logps/rejected": -701.5238037109375, |
|
"loss": 0.2253, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.32372376322746277, |
|
"rewards/margins": 0.2762491703033447, |
|
"rewards/rejected": -0.5999729037284851, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 3.986355378849284e-06, |
|
"logits/chosen": -2.3270981311798096, |
|
"logits/rejected": -2.0575973987579346, |
|
"logps/chosen": -562.2911376953125, |
|
"logps/rejected": -678.2664794921875, |
|
"loss": 0.255, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.39332714676856995, |
|
"rewards/margins": 0.1472180336713791, |
|
"rewards/rejected": -0.5405451655387878, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 3.958144262959004e-06, |
|
"logits/chosen": -2.204744815826416, |
|
"logits/rejected": -1.6555640697479248, |
|
"logps/chosen": -619.665283203125, |
|
"logps/rejected": -708.1978759765625, |
|
"loss": 0.4841, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.48071688413619995, |
|
"rewards/margins": 0.11553032696247101, |
|
"rewards/rejected": -0.5962471961975098, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 3.929648878955507e-06, |
|
"logits/chosen": -2.1643776893615723, |
|
"logits/rejected": -1.7133562564849854, |
|
"logps/chosen": -498.3556213378906, |
|
"logps/rejected": -624.0385131835938, |
|
"loss": 0.2892, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.3023318946361542, |
|
"rewards/margins": 0.18938633799552917, |
|
"rewards/rejected": -0.49171820282936096, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 3.900874782070362e-06, |
|
"logits/chosen": -2.2862777709960938, |
|
"logits/rejected": -1.5697494745254517, |
|
"logps/chosen": -523.8767700195312, |
|
"logps/rejected": -712.3031005859375, |
|
"loss": 0.2906, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.35215550661087036, |
|
"rewards/margins": 0.26922211050987244, |
|
"rewards/rejected": -0.6213775873184204, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 3.871827581870772e-06, |
|
"logits/chosen": -1.9373928308486938, |
|
"logits/rejected": -1.2519636154174805, |
|
"logps/chosen": -612.5736083984375, |
|
"logps/rejected": -769.370361328125, |
|
"loss": 0.2162, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.44734010100364685, |
|
"rewards/margins": 0.20580251514911652, |
|
"rewards/rejected": -0.6531426906585693, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 3.842512941165968e-06, |
|
"logits/chosen": -1.9552310705184937, |
|
"logits/rejected": -1.2225010395050049, |
|
"logps/chosen": -595.6505126953125, |
|
"logps/rejected": -807.1351318359375, |
|
"loss": 0.2339, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.4424334168434143, |
|
"rewards/margins": 0.2755126357078552, |
|
"rewards/rejected": -0.7179459929466248, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 3.8129365749032398e-06, |
|
"logits/chosen": -2.179598331451416, |
|
"logits/rejected": -1.5930414199829102, |
|
"logps/chosen": -470.39666748046875, |
|
"logps/rejected": -708.7127075195312, |
|
"loss": 0.2389, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.3327002227306366, |
|
"rewards/margins": 0.247235506772995, |
|
"rewards/rejected": -0.5799357295036316, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 3.783104249053793e-06, |
|
"logits/chosen": -1.876232385635376, |
|
"logits/rejected": -1.1954014301300049, |
|
"logps/chosen": -482.48809814453125, |
|
"logps/rejected": -653.3897705078125, |
|
"loss": 0.2342, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.3456036448478699, |
|
"rewards/margins": 0.20723696053028107, |
|
"rewards/rejected": -0.5528405904769897, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 3.7530217794886607e-06, |
|
"logits/chosen": -2.168401002883911, |
|
"logits/rejected": -1.362849473953247, |
|
"logps/chosen": -565.8056030273438, |
|
"logps/rejected": -753.4979248046875, |
|
"loss": 0.2181, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.3963169455528259, |
|
"rewards/margins": 0.2578599452972412, |
|
"rewards/rejected": -0.6541768908500671, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 3.722695030844891e-06, |
|
"logits/chosen": -1.9112541675567627, |
|
"logits/rejected": -1.4224046468734741, |
|
"logps/chosen": -536.9829711914062, |
|
"logps/rejected": -687.1131591796875, |
|
"loss": 0.2746, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.41709670424461365, |
|
"rewards/margins": 0.18000951409339905, |
|
"rewards/rejected": -0.5971062183380127, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 3.6921299153822198e-06, |
|
"logits/chosen": -2.2641046047210693, |
|
"logits/rejected": -1.6229751110076904, |
|
"logps/chosen": -529.2923583984375, |
|
"logps/rejected": -726.6102905273438, |
|
"loss": 0.228, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.34389495849609375, |
|
"rewards/margins": 0.2508707046508789, |
|
"rewards/rejected": -0.5947656035423279, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 3.66133239183047e-06, |
|
"logits/chosen": -2.1011762619018555, |
|
"logits/rejected": -1.4804975986480713, |
|
"logps/chosen": -495.906494140625, |
|
"logps/rejected": -683.6201171875, |
|
"loss": 0.2441, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.3397464454174042, |
|
"rewards/margins": 0.23895248770713806, |
|
"rewards/rejected": -0.5786989331245422, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 3.630308464227877e-06, |
|
"logits/chosen": -2.093890428543091, |
|
"logits/rejected": -1.4402543306350708, |
|
"logps/chosen": -405.9877014160156, |
|
"logps/rejected": -528.1480712890625, |
|
"loss": 0.2503, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.20128004252910614, |
|
"rewards/margins": 0.19632327556610107, |
|
"rewards/rejected": -0.3976033329963684, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 3.5990641807506e-06, |
|
"logits/chosen": -2.3380658626556396, |
|
"logits/rejected": -1.6520893573760986, |
|
"logps/chosen": -430.703369140625, |
|
"logps/rejected": -646.5703735351562, |
|
"loss": 0.2577, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.27441808581352234, |
|
"rewards/margins": 0.26531141996383667, |
|
"rewards/rejected": -0.5397294759750366, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 3.5676056325336084e-06, |
|
"logits/chosen": -2.1818904876708984, |
|
"logits/rejected": -1.784393310546875, |
|
"logps/chosen": -520.1884155273438, |
|
"logps/rejected": -674.11962890625, |
|
"loss": 0.2316, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.3729000687599182, |
|
"rewards/margins": 0.16966374218463898, |
|
"rewards/rejected": -0.5425638556480408, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 3.535938952483211e-06, |
|
"logits/chosen": -2.2213704586029053, |
|
"logits/rejected": -1.418710708618164, |
|
"logps/chosen": -560.0448608398438, |
|
"logps/rejected": -815.6527709960938, |
|
"loss": 0.2124, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.40334635972976685, |
|
"rewards/margins": 0.2945135533809662, |
|
"rewards/rejected": -0.6978598833084106, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 3.5040703140814254e-06, |
|
"logits/chosen": -2.221543788909912, |
|
"logits/rejected": -1.6487659215927124, |
|
"logps/chosen": -634.6618041992188, |
|
"logps/rejected": -839.3564453125, |
|
"loss": 0.2324, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.4737940728664398, |
|
"rewards/margins": 0.24561241269111633, |
|
"rewards/rejected": -0.7194064855575562, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 3.4720059301824527e-06, |
|
"logits/chosen": -2.189147710800171, |
|
"logits/rejected": -1.7664272785186768, |
|
"logps/chosen": -526.344970703125, |
|
"logps/rejected": -650.4627075195312, |
|
"loss": 0.2229, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.36051493883132935, |
|
"rewards/margins": 0.19068099558353424, |
|
"rewards/rejected": -0.5511959791183472, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 3.439752051801467e-06, |
|
"logits/chosen": -2.276291608810425, |
|
"logits/rejected": -1.5065466165542603, |
|
"logps/chosen": -466.7132263183594, |
|
"logps/rejected": -688.0767822265625, |
|
"loss": 0.2285, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.31362882256507874, |
|
"rewards/margins": 0.2555429935455322, |
|
"rewards/rejected": -0.5691717863082886, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 3.407314966895966e-06, |
|
"logits/chosen": -2.2229745388031006, |
|
"logits/rejected": -1.6873805522918701, |
|
"logps/chosen": -499.5079650878906, |
|
"logps/rejected": -678.7784423828125, |
|
"loss": 0.248, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.34916746616363525, |
|
"rewards/margins": 0.20373527705669403, |
|
"rewards/rejected": -0.5529027581214905, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 3.3747009991399226e-06, |
|
"logits/chosen": -2.0915369987487793, |
|
"logits/rejected": -1.515972375869751, |
|
"logps/chosen": -462.2139587402344, |
|
"logps/rejected": -621.6764526367188, |
|
"loss": 0.2303, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.34132781624794006, |
|
"rewards/margins": 0.19509322941303253, |
|
"rewards/rejected": -0.536421000957489, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 3.341916506690971e-06, |
|
"logits/chosen": -2.1414732933044434, |
|
"logits/rejected": -1.5188627243041992, |
|
"logps/chosen": -487.9198303222656, |
|
"logps/rejected": -658.6336059570312, |
|
"loss": 0.2358, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.3202676773071289, |
|
"rewards/margins": 0.2106558084487915, |
|
"rewards/rejected": -0.5309234857559204, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 3.308967880950874e-06, |
|
"logits/chosen": -2.0529282093048096, |
|
"logits/rejected": -1.4871912002563477, |
|
"logps/chosen": -542.2448120117188, |
|
"logps/rejected": -726.5451049804688, |
|
"loss": 0.2379, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.41947245597839355, |
|
"rewards/margins": 0.209178164601326, |
|
"rewards/rejected": -0.6286506056785583, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 3.275861545319504e-06, |
|
"logits/chosen": -2.241400718688965, |
|
"logits/rejected": -1.7179415225982666, |
|
"logps/chosen": -475.1095275878906, |
|
"logps/rejected": -658.5631103515625, |
|
"loss": 0.254, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.31501632928848267, |
|
"rewards/margins": 0.21485765278339386, |
|
"rewards/rejected": -0.5298739671707153, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 3.2426039539425875e-06, |
|
"logits/chosen": -2.236320972442627, |
|
"logits/rejected": -1.3771488666534424, |
|
"logps/chosen": -501.77960205078125, |
|
"logps/rejected": -705.9559326171875, |
|
"loss": 0.2485, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.33146998286247253, |
|
"rewards/margins": 0.2820337116718292, |
|
"rewards/rejected": -0.613503634929657, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 3.2092015904534614e-06, |
|
"logits/chosen": -2.1324212551116943, |
|
"logits/rejected": -1.3361310958862305, |
|
"logps/chosen": -465.21234130859375, |
|
"logps/rejected": -626.5828857421875, |
|
"loss": 0.222, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.30111175775527954, |
|
"rewards/margins": 0.2380938082933426, |
|
"rewards/rejected": -0.5392054915428162, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 3.17566096670907e-06, |
|
"logits/chosen": -2.1174681186676025, |
|
"logits/rejected": -1.8057496547698975, |
|
"logps/chosen": -514.4780883789062, |
|
"logps/rejected": -612.9591064453125, |
|
"loss": 0.2773, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.3825177252292633, |
|
"rewards/margins": 0.13299870491027832, |
|
"rewards/rejected": -0.5155164003372192, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 3.14198862152047e-06, |
|
"logits/chosen": -2.3264975547790527, |
|
"logits/rejected": -1.8200445175170898, |
|
"logps/chosen": -509.34332275390625, |
|
"logps/rejected": -662.3193359375, |
|
"loss": 0.2653, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.3371141850948334, |
|
"rewards/margins": 0.19747108221054077, |
|
"rewards/rejected": -0.5345852971076965, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 3.1081911193780734e-06, |
|
"logits/chosen": -2.1914255619049072, |
|
"logits/rejected": -1.6171748638153076, |
|
"logps/chosen": -607.6774291992188, |
|
"logps/rejected": -785.1390991210938, |
|
"loss": 0.2038, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.4608604907989502, |
|
"rewards/margins": 0.20961742103099823, |
|
"rewards/rejected": -0.6704779267311096, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 3.074275049171889e-06, |
|
"logits/chosen": -2.2372031211853027, |
|
"logits/rejected": -1.4800232648849487, |
|
"logps/chosen": -483.6255798339844, |
|
"logps/rejected": -684.0525512695312, |
|
"loss": 0.2085, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.3340616226196289, |
|
"rewards/margins": 0.26013877987861633, |
|
"rewards/rejected": -0.5942003726959229, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 3.0402470229070057e-06, |
|
"logits/chosen": -2.0542514324188232, |
|
"logits/rejected": -1.2669802904129028, |
|
"logps/chosen": -501.363037109375, |
|
"logps/rejected": -668.2828979492188, |
|
"loss": 0.2264, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.32397252321243286, |
|
"rewards/margins": 0.25711455941200256, |
|
"rewards/rejected": -0.5810869932174683, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 3.006113674414565e-06, |
|
"logits/chosen": -2.124558925628662, |
|
"logits/rejected": -1.5115940570831299, |
|
"logps/chosen": -526.3839721679688, |
|
"logps/rejected": -727.1844482421875, |
|
"loss": 0.2239, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.3876585364341736, |
|
"rewards/margins": 0.2264205515384674, |
|
"rewards/rejected": -0.6140791177749634, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 2.9718816580584885e-06, |
|
"logits/chosen": -2.330988645553589, |
|
"logits/rejected": -1.7894699573516846, |
|
"logps/chosen": -450.5931701660156, |
|
"logps/rejected": -639.8587036132812, |
|
"loss": 0.2195, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.300833135843277, |
|
"rewards/margins": 0.2428937703371048, |
|
"rewards/rejected": -0.5437268614768982, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 2.9375576474381907e-06, |
|
"logits/chosen": -2.09294056892395, |
|
"logits/rejected": -1.5990426540374756, |
|
"logps/chosen": -523.2689208984375, |
|
"logps/rejected": -714.5638427734375, |
|
"loss": 0.2505, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.36846724152565, |
|
"rewards/margins": 0.24714866280555725, |
|
"rewards/rejected": -0.6156159043312073, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 2.9031483340875523e-06, |
|
"logits/chosen": -2.161059617996216, |
|
"logits/rejected": -1.649753212928772, |
|
"logps/chosen": -595.0687255859375, |
|
"logps/rejected": -741.806396484375, |
|
"loss": 0.2356, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.42591404914855957, |
|
"rewards/margins": 0.19824771583080292, |
|
"rewards/rejected": -0.6241617202758789, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 2.868660426170388e-06, |
|
"logits/chosen": -2.0810506343841553, |
|
"logits/rejected": -1.3984508514404297, |
|
"logps/chosen": -519.595458984375, |
|
"logps/rejected": -709.3812255859375, |
|
"loss": 0.2182, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.37194812297821045, |
|
"rewards/margins": 0.21791160106658936, |
|
"rewards/rejected": -0.5898597836494446, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 2.8341006471726817e-06, |
|
"logits/chosen": -1.937578558921814, |
|
"logits/rejected": -1.3747153282165527, |
|
"logps/chosen": -516.6207885742188, |
|
"logps/rejected": -695.8414306640625, |
|
"loss": 0.2206, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.3487696647644043, |
|
"rewards/margins": 0.2523016929626465, |
|
"rewards/rejected": -0.6010713577270508, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 2.7994757345918244e-06, |
|
"logits/chosen": -2.414841890335083, |
|
"logits/rejected": -1.7823702096939087, |
|
"logps/chosen": -453.10357666015625, |
|
"logps/rejected": -609.0830078125, |
|
"loss": 0.2223, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.29341357946395874, |
|
"rewards/margins": 0.1968296319246292, |
|
"rewards/rejected": -0.49024319648742676, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 2.76479243862313e-06, |
|
"logits/chosen": -2.3372766971588135, |
|
"logits/rejected": -1.365880012512207, |
|
"logps/chosen": -455.2708435058594, |
|
"logps/rejected": -712.4964599609375, |
|
"loss": 0.2234, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.29985883831977844, |
|
"rewards/margins": 0.31078290939331055, |
|
"rewards/rejected": -0.6106417775154114, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 2.7300575208438684e-06, |
|
"logits/chosen": -2.193761110305786, |
|
"logits/rejected": -1.7236446142196655, |
|
"logps/chosen": -438.6455078125, |
|
"logps/rejected": -565.2931518554688, |
|
"loss": 0.2211, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.28494128584861755, |
|
"rewards/margins": 0.18237076699733734, |
|
"rewards/rejected": -0.4673120379447937, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 2.695277752895084e-06, |
|
"logits/chosen": -2.1613688468933105, |
|
"logits/rejected": -1.6745634078979492, |
|
"logps/chosen": -445.5487365722656, |
|
"logps/rejected": -583.8851318359375, |
|
"loss": 0.2158, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.30961447954177856, |
|
"rewards/margins": 0.17475193738937378, |
|
"rewards/rejected": -0.48436641693115234, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 2.6604599151614514e-06, |
|
"logits/chosen": -2.3241307735443115, |
|
"logits/rejected": -1.718146562576294, |
|
"logps/chosen": -472.390380859375, |
|
"logps/rejected": -670.6461791992188, |
|
"loss": 0.2484, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.30457672476768494, |
|
"rewards/margins": 0.2522759437561035, |
|
"rewards/rejected": -0.5568526983261108, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 2.625610795449424e-06, |
|
"logits/chosen": -2.118846893310547, |
|
"logits/rejected": -1.4407285451889038, |
|
"logps/chosen": -462.6249084472656, |
|
"logps/rejected": -711.793212890625, |
|
"loss": 0.2517, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.32376161217689514, |
|
"rewards/margins": 0.27978068590164185, |
|
"rewards/rejected": -0.6035423278808594, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 2.59073718766394e-06, |
|
"logits/chosen": -2.1999683380126953, |
|
"logits/rejected": -1.4900109767913818, |
|
"logps/chosen": -476.880859375, |
|
"logps/rejected": -675.8462524414062, |
|
"loss": 0.2466, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.3132093548774719, |
|
"rewards/margins": 0.25780683755874634, |
|
"rewards/rejected": -0.571016252040863, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 2.5558458904839345e-06, |
|
"logits/chosen": -2.192030191421509, |
|
"logits/rejected": -1.7015453577041626, |
|
"logps/chosen": -497.59429931640625, |
|
"logps/rejected": -690.6241455078125, |
|
"loss": 0.2367, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.3215945065021515, |
|
"rewards/margins": 0.2261447012424469, |
|
"rewards/rejected": -0.5477392673492432, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 2.5209437060369266e-06, |
|
"logits/chosen": -2.1700994968414307, |
|
"logits/rejected": -1.501390814781189, |
|
"logps/chosen": -551.3470458984375, |
|
"logps/rejected": -779.6153564453125, |
|
"loss": 0.2324, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.39861801266670227, |
|
"rewards/margins": 0.272796094417572, |
|
"rewards/rejected": -0.6714141368865967, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 2.4860374385729298e-06, |
|
"logits/chosen": -2.3210222721099854, |
|
"logits/rejected": -1.647146224975586, |
|
"logps/chosen": -461.9217224121094, |
|
"logps/rejected": -660.2238159179688, |
|
"loss": 0.2164, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.30266645550727844, |
|
"rewards/margins": 0.2639373540878296, |
|
"rewards/rejected": -0.5666038990020752, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 2.4511338931379475e-06, |
|
"logits/chosen": -2.270345687866211, |
|
"logits/rejected": -1.7757459878921509, |
|
"logps/chosen": -534.710693359375, |
|
"logps/rejected": -685.7799072265625, |
|
"loss": 0.1999, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.3926665186882019, |
|
"rewards/margins": 0.20033708214759827, |
|
"rewards/rejected": -0.5930036306381226, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 2.4162398742473216e-06, |
|
"logits/chosen": -2.436246395111084, |
|
"logits/rejected": -1.9106261730194092, |
|
"logps/chosen": -395.49920654296875, |
|
"logps/rejected": -564.9219970703125, |
|
"loss": 0.2506, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.19471552968025208, |
|
"rewards/margins": 0.22500737011432648, |
|
"rewards/rejected": -0.41972288489341736, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 2.381362184559173e-06, |
|
"logits/chosen": -2.3186497688293457, |
|
"logits/rejected": -1.7353124618530273, |
|
"logps/chosen": -545.6030883789062, |
|
"logps/rejected": -732.5777587890625, |
|
"loss": 0.2682, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.37879544496536255, |
|
"rewards/margins": 0.24574975669384003, |
|
"rewards/rejected": -0.6245452165603638, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 2.3465076235482117e-06, |
|
"logits/chosen": -2.099091053009033, |
|
"logits/rejected": -1.251534104347229, |
|
"logps/chosen": -591.8385009765625, |
|
"logps/rejected": -823.31201171875, |
|
"loss": 0.2263, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.3871614336967468, |
|
"rewards/margins": 0.3113124966621399, |
|
"rewards/rejected": -0.6984738707542419, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 2.3116829861801687e-06, |
|
"logits/chosen": -2.2782912254333496, |
|
"logits/rejected": -1.6930453777313232, |
|
"logps/chosen": -549.5288696289062, |
|
"logps/rejected": -727.2321166992188, |
|
"loss": 0.244, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.4014182984828949, |
|
"rewards/margins": 0.21473488211631775, |
|
"rewards/rejected": -0.6161531209945679, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 2.276895061587099e-06, |
|
"logits/chosen": -2.3278706073760986, |
|
"logits/rejected": -1.6391801834106445, |
|
"logps/chosen": -570.4884643554688, |
|
"logps/rejected": -818.1508178710938, |
|
"loss": 0.1976, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.40508347749710083, |
|
"rewards/margins": 0.28309813141822815, |
|
"rewards/rejected": -0.6881815791130066, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 2.242150631743832e-06, |
|
"logits/chosen": -2.3615341186523438, |
|
"logits/rejected": -1.725358009338379, |
|
"logps/chosen": -478.43603515625, |
|
"logps/rejected": -650.2882080078125, |
|
"loss": 0.2222, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.31791216135025024, |
|
"rewards/margins": 0.2205561101436615, |
|
"rewards/rejected": -0.5384682416915894, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 2.207456470145807e-06, |
|
"logits/chosen": -2.35868239402771, |
|
"logits/rejected": -1.982661247253418, |
|
"logps/chosen": -456.50439453125, |
|
"logps/rejected": -552.9801635742188, |
|
"loss": 0.291, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.29267337918281555, |
|
"rewards/margins": 0.14309945702552795, |
|
"rewards/rejected": -0.4357728064060211, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 2.17281934048857e-06, |
|
"logits/chosen": -2.1577529907226562, |
|
"logits/rejected": -1.2531640529632568, |
|
"logps/chosen": -470.220947265625, |
|
"logps/rejected": -691.4623413085938, |
|
"loss": 0.2374, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.2939762473106384, |
|
"rewards/margins": 0.2891116142272949, |
|
"rewards/rejected": -0.5830878615379333, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 2.1382459953491773e-06, |
|
"logits/chosen": -2.153989315032959, |
|
"logits/rejected": -1.712892770767212, |
|
"logps/chosen": -509.06103515625, |
|
"logps/rejected": -677.8247680664062, |
|
"loss": 0.2359, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.3281121850013733, |
|
"rewards/margins": 0.20805349946022034, |
|
"rewards/rejected": -0.5361656546592712, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 2.103743174869769e-06, |
|
"logits/chosen": -1.9424854516983032, |
|
"logits/rejected": -1.422446370124817, |
|
"logps/chosen": -697.8045654296875, |
|
"logps/rejected": -815.5999755859375, |
|
"loss": 0.2559, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.5232383012771606, |
|
"rewards/margins": 0.18479886651039124, |
|
"rewards/rejected": -0.7080371379852295, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 2.0693176054435586e-06, |
|
"logits/chosen": -2.266700506210327, |
|
"logits/rejected": -1.5664805173873901, |
|
"logps/chosen": -565.6947021484375, |
|
"logps/rejected": -702.6282348632812, |
|
"loss": 0.2324, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.3801000118255615, |
|
"rewards/margins": 0.20919163525104523, |
|
"rewards/rejected": -0.5892916321754456, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 2.034975998403517e-06, |
|
"logits/chosen": -2.202317953109741, |
|
"logits/rejected": -1.178363561630249, |
|
"logps/chosen": -471.96514892578125, |
|
"logps/rejected": -716.80322265625, |
|
"loss": 0.2517, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.29619717597961426, |
|
"rewards/margins": 0.32016804814338684, |
|
"rewards/rejected": -0.6163652539253235, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 2.0007250487139827e-06, |
|
"logits/chosen": -2.1976194381713867, |
|
"logits/rejected": -1.7854173183441162, |
|
"logps/chosen": -409.1556396484375, |
|
"logps/rejected": -538.7450561523438, |
|
"loss": 0.2324, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.2653912305831909, |
|
"rewards/margins": 0.16031914949417114, |
|
"rewards/rejected": -0.42571038007736206, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.9665714336654604e-06, |
|
"logits/chosen": -2.2585816383361816, |
|
"logits/rejected": -1.4220424890518188, |
|
"logps/chosen": -477.648681640625, |
|
"logps/rejected": -702.324462890625, |
|
"loss": 0.2246, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.31048738956451416, |
|
"rewards/margins": 0.29150474071502686, |
|
"rewards/rejected": -0.6019921898841858, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.9325218115728756e-06, |
|
"logits/chosen": -2.259849786758423, |
|
"logits/rejected": -1.4995836019515991, |
|
"logps/chosen": -520.2024536132812, |
|
"logps/rejected": -719.3076782226562, |
|
"loss": 0.2124, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.3540114760398865, |
|
"rewards/margins": 0.2679198682308197, |
|
"rewards/rejected": -0.6219313740730286, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.8985828204775206e-06, |
|
"logits/chosen": -2.2383124828338623, |
|
"logits/rejected": -1.5292937755584717, |
|
"logps/chosen": -460.6908264160156, |
|
"logps/rejected": -634.41552734375, |
|
"loss": 0.1988, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.31010597944259644, |
|
"rewards/margins": 0.2322625368833542, |
|
"rewards/rejected": -0.5423685312271118, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.8647610768529581e-06, |
|
"logits/chosen": -2.3701467514038086, |
|
"logits/rejected": -1.669344186782837, |
|
"logps/chosen": -515.9610595703125, |
|
"logps/rejected": -702.8071899414062, |
|
"loss": 0.2465, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.3375261425971985, |
|
"rewards/margins": 0.22942647337913513, |
|
"rewards/rejected": -0.566952645778656, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1.8310631743151187e-06, |
|
"logits/chosen": -2.3348028659820557, |
|
"logits/rejected": -1.6338012218475342, |
|
"logps/chosen": -478.33428955078125, |
|
"logps/rejected": -730.6288452148438, |
|
"loss": 0.2324, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.33109721541404724, |
|
"rewards/margins": 0.2853388786315918, |
|
"rewards/rejected": -0.6164361238479614, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1.7974956823368728e-06, |
|
"logits/chosen": -2.156118631362915, |
|
"logits/rejected": -1.6093488931655884, |
|
"logps/chosen": -582.0015258789062, |
|
"logps/rejected": -735.3878784179688, |
|
"loss": 0.2101, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.4120170474052429, |
|
"rewards/margins": 0.207681804895401, |
|
"rewards/rejected": -0.6196987628936768, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1.7640651449672913e-06, |
|
"logits/chosen": -2.3183345794677734, |
|
"logits/rejected": -1.6045589447021484, |
|
"logps/chosen": -474.8514709472656, |
|
"logps/rejected": -684.0841064453125, |
|
"loss": 0.2061, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.28908270597457886, |
|
"rewards/margins": 0.27025845646858215, |
|
"rewards/rejected": -0.5593411326408386, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1.7307780795558743e-06, |
|
"logits/chosen": -2.1290550231933594, |
|
"logits/rejected": -1.5637315511703491, |
|
"logps/chosen": -484.809814453125, |
|
"logps/rejected": -644.54052734375, |
|
"loss": 0.2349, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.3229495882987976, |
|
"rewards/margins": 0.21575050055980682, |
|
"rewards/rejected": -0.5387001633644104, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.6976409754819767e-06, |
|
"logits/chosen": -2.2557132244110107, |
|
"logits/rejected": -1.787325143814087, |
|
"logps/chosen": -505.43115234375, |
|
"logps/rejected": -684.2761840820312, |
|
"loss": 0.2373, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.3303944170475006, |
|
"rewards/margins": 0.22589488327503204, |
|
"rewards/rejected": -0.5562892556190491, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.6646602928896962e-06, |
|
"logits/chosen": -2.1125569343566895, |
|
"logits/rejected": -1.5178847312927246, |
|
"logps/chosen": -604.7052612304688, |
|
"logps/rejected": -748.1799926757812, |
|
"loss": 0.2035, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.4366299510002136, |
|
"rewards/margins": 0.2077966034412384, |
|
"rewards/rejected": -0.6444265246391296, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.6318424614284525e-06, |
|
"logits/chosen": -2.0228872299194336, |
|
"logits/rejected": -1.6469089984893799, |
|
"logps/chosen": -619.0574951171875, |
|
"logps/rejected": -747.111328125, |
|
"loss": 0.2531, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.4670810103416443, |
|
"rewards/margins": 0.15774844586849213, |
|
"rewards/rejected": -0.6248295307159424, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.5991938789995138e-06, |
|
"logits/chosen": -2.178020477294922, |
|
"logits/rejected": -1.616796851158142, |
|
"logps/chosen": -625.3834838867188, |
|
"logps/rejected": -798.6873168945312, |
|
"loss": 0.2581, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.46300649642944336, |
|
"rewards/margins": 0.23273572325706482, |
|
"rewards/rejected": -0.6957422494888306, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.5667209105087134e-06, |
|
"logits/chosen": -2.1452012062072754, |
|
"logits/rejected": -1.4882639646530151, |
|
"logps/chosen": -637.6602783203125, |
|
"logps/rejected": -835.0618896484375, |
|
"loss": 0.2314, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.4847962260246277, |
|
"rewards/margins": 0.23071709275245667, |
|
"rewards/rejected": -0.7155133485794067, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.5344298866256002e-06, |
|
"logits/chosen": -2.1361632347106934, |
|
"logits/rejected": -1.393336296081543, |
|
"logps/chosen": -598.9241333007812, |
|
"logps/rejected": -825.7525634765625, |
|
"loss": 0.2541, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.4569925367832184, |
|
"rewards/margins": 0.26417863368988037, |
|
"rewards/rejected": -0.7211712002754211, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.502327102549262e-06, |
|
"logits/chosen": -2.1626908779144287, |
|
"logits/rejected": -1.562538743019104, |
|
"logps/chosen": -468.4820251464844, |
|
"logps/rejected": -660.3966064453125, |
|
"loss": 0.204, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.3353859782218933, |
|
"rewards/margins": 0.2233034074306488, |
|
"rewards/rejected": -0.5586894154548645, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.4704188167810635e-06, |
|
"logits/chosen": -2.2278897762298584, |
|
"logits/rejected": -1.6712696552276611, |
|
"logps/chosen": -513.513916015625, |
|
"logps/rejected": -695.208984375, |
|
"loss": 0.2159, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.362549364566803, |
|
"rewards/margins": 0.23459453880786896, |
|
"rewards/rejected": -0.5971439480781555, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.438711249904536e-06, |
|
"logits/chosen": -2.2296204566955566, |
|
"logits/rejected": -1.5326087474822998, |
|
"logps/chosen": -495.2484436035156, |
|
"logps/rejected": -728.9078369140625, |
|
"loss": 0.2267, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.3783426582813263, |
|
"rewards/margins": 0.26764681935310364, |
|
"rewards/rejected": -0.6459894180297852, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.4072105833726685e-06, |
|
"logits/chosen": -2.308741569519043, |
|
"logits/rejected": -1.57771897315979, |
|
"logps/chosen": -545.0296630859375, |
|
"logps/rejected": -739.1373291015625, |
|
"loss": 0.2682, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.3485683798789978, |
|
"rewards/margins": 0.25783371925354004, |
|
"rewards/rejected": -0.6064020991325378, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.375922958302815e-06, |
|
"logits/chosen": -2.177499532699585, |
|
"logits/rejected": -1.6153056621551514, |
|
"logps/chosen": -575.4890747070312, |
|
"logps/rejected": -719.1101684570312, |
|
"loss": 0.257, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.4291258454322815, |
|
"rewards/margins": 0.1913391649723053, |
|
"rewards/rejected": -0.6204649209976196, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.3448544742794792e-06, |
|
"logits/chosen": -2.359710931777954, |
|
"logits/rejected": -1.8718398809432983, |
|
"logps/chosen": -518.01318359375, |
|
"logps/rejected": -663.861083984375, |
|
"loss": 0.2177, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.39127427339553833, |
|
"rewards/margins": 0.15787231922149658, |
|
"rewards/rejected": -0.5491466522216797, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.3140111881651773e-06, |
|
"logits/chosen": -1.9541170597076416, |
|
"logits/rejected": -1.3082023859024048, |
|
"logps/chosen": -529.1683349609375, |
|
"logps/rejected": -741.0734252929688, |
|
"loss": 0.219, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.3982730805873871, |
|
"rewards/margins": 0.24865877628326416, |
|
"rewards/rejected": -0.6469318866729736, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.2833991129196508e-06, |
|
"logits/chosen": -2.241741418838501, |
|
"logits/rejected": -1.4685299396514893, |
|
"logps/chosen": -470.96209716796875, |
|
"logps/rejected": -709.0288696289062, |
|
"loss": 0.216, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.3294115662574768, |
|
"rewards/margins": 0.2778538763523102, |
|
"rewards/rejected": -0.6072654724121094, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.2530242164276236e-06, |
|
"logits/chosen": -2.0970499515533447, |
|
"logits/rejected": -1.3942879438400269, |
|
"logps/chosen": -486.5403747558594, |
|
"logps/rejected": -718.138671875, |
|
"loss": 0.2207, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.31325727701187134, |
|
"rewards/margins": 0.3117991089820862, |
|
"rewards/rejected": -0.6250563859939575, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.2228924203353507e-06, |
|
"logits/chosen": -2.068192720413208, |
|
"logits/rejected": -1.585180401802063, |
|
"logps/chosen": -533.9257202148438, |
|
"logps/rejected": -647.5416870117188, |
|
"loss": 0.2262, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.390627920627594, |
|
"rewards/margins": 0.16129513084888458, |
|
"rewards/rejected": -0.5519230365753174, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.1930095988961837e-06, |
|
"logits/chosen": -2.2689132690429688, |
|
"logits/rejected": -1.6284101009368896, |
|
"logps/chosen": -489.23468017578125, |
|
"logps/rejected": -698.836181640625, |
|
"loss": 0.2103, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.31348201632499695, |
|
"rewards/margins": 0.2569728493690491, |
|
"rewards/rejected": -0.5704549551010132, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.1633815778253721e-06, |
|
"logits/chosen": -2.223635673522949, |
|
"logits/rejected": -1.5692812204360962, |
|
"logps/chosen": -564.4153442382812, |
|
"logps/rejected": -758.5940551757812, |
|
"loss": 0.2683, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.3882458806037903, |
|
"rewards/margins": 0.24454763531684875, |
|
"rewards/rejected": -0.6327935457229614, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.1340141331643276e-06, |
|
"logits/chosen": -2.226630926132202, |
|
"logits/rejected": -1.6583023071289062, |
|
"logps/chosen": -499.40155029296875, |
|
"logps/rejected": -738.2061767578125, |
|
"loss": 0.2232, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.3272217810153961, |
|
"rewards/margins": 0.2891238331794739, |
|
"rewards/rejected": -0.6163456439971924, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.1049129901545756e-06, |
|
"logits/chosen": -2.193066358566284, |
|
"logits/rejected": -1.6954580545425415, |
|
"logps/chosen": -497.5428161621094, |
|
"logps/rejected": -683.050048828125, |
|
"loss": 0.2054, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.3609580397605896, |
|
"rewards/margins": 0.21754273772239685, |
|
"rewards/rejected": -0.5785007476806641, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.0760838221216065e-06, |
|
"logits/chosen": -2.311552047729492, |
|
"logits/rejected": -1.7207205295562744, |
|
"logps/chosen": -450.60504150390625, |
|
"logps/rejected": -628.2011108398438, |
|
"loss": 0.2457, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.25561460852622986, |
|
"rewards/margins": 0.24946312606334686, |
|
"rewards/rejected": -0.5050776600837708, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.0475322493688506e-06, |
|
"logits/chosen": -2.1966607570648193, |
|
"logits/rejected": -1.5143920183181763, |
|
"logps/chosen": -420.51885986328125, |
|
"logps/rejected": -698.9932250976562, |
|
"loss": 0.2089, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.26709431409835815, |
|
"rewards/margins": 0.31104663014411926, |
|
"rewards/rejected": -0.578140914440155, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.0192638380819884e-06, |
|
"logits/chosen": -2.3707401752471924, |
|
"logits/rejected": -1.5773122310638428, |
|
"logps/chosen": -453.8907165527344, |
|
"logps/rejected": -664.8104248046875, |
|
"loss": 0.2373, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.2851904630661011, |
|
"rewards/margins": 0.2743096947669983, |
|
"rewards/rejected": -0.5595001578330994, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 9.912840992438087e-07, |
|
"logits/chosen": -2.176928758621216, |
|
"logits/rejected": -1.5890274047851562, |
|
"logps/chosen": -573.779541015625, |
|
"logps/rejected": -796.2474975585938, |
|
"loss": 0.2305, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.4253336787223816, |
|
"rewards/margins": 0.26153475046157837, |
|
"rewards/rejected": -0.6868684887886047, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 9.63598487559839e-07, |
|
"logits/chosen": -2.2372374534606934, |
|
"logits/rejected": -1.546007752418518, |
|
"logps/chosen": -464.493408203125, |
|
"logps/rejected": -669.3597412109375, |
|
"loss": 0.208, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.30372193455696106, |
|
"rewards/margins": 0.278639018535614, |
|
"rewards/rejected": -0.5823609828948975, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 9.362124003949324e-07, |
|
"logits/chosen": -2.1051459312438965, |
|
"logits/rejected": -1.6941722631454468, |
|
"logps/chosen": -516.5181884765625, |
|
"logps/rejected": -681.1585693359375, |
|
"loss": 0.2474, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.36375877261161804, |
|
"rewards/margins": 0.18948772549629211, |
|
"rewards/rejected": -0.5532464981079102, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 9.091311767210453e-07, |
|
"logits/chosen": -2.1879124641418457, |
|
"logits/rejected": -1.6842693090438843, |
|
"logps/chosen": -510.8837890625, |
|
"logps/rejected": -665.6920166015625, |
|
"loss": 0.2358, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.36585649847984314, |
|
"rewards/margins": 0.18732208013534546, |
|
"rewards/rejected": -0.553178608417511, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 8.823600960763901e-07, |
|
"logits/chosen": -2.130765199661255, |
|
"logits/rejected": -1.5883699655532837, |
|
"logps/chosen": -534.0338745117188, |
|
"logps/rejected": -741.09423828125, |
|
"loss": 0.2077, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.38128194212913513, |
|
"rewards/margins": 0.25697097182273865, |
|
"rewards/rejected": -0.6382529139518738, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 8.559043775361816e-07, |
|
"logits/chosen": -2.1295900344848633, |
|
"logits/rejected": -1.5722177028656006, |
|
"logps/chosen": -551.2114868164062, |
|
"logps/rejected": -690.095947265625, |
|
"loss": 0.2276, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.3628248870372772, |
|
"rewards/margins": 0.2208392173051834, |
|
"rewards/rejected": -0.5836641192436218, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 8.297691786951706e-07, |
|
"logits/chosen": -2.266829013824463, |
|
"logits/rejected": -1.4247468709945679, |
|
"logps/chosen": -547.1033935546875, |
|
"logps/rejected": -776.6325073242188, |
|
"loss": 0.2145, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.35183387994766235, |
|
"rewards/margins": 0.31444767117500305, |
|
"rewards/rejected": -0.666281521320343, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 8.039595946621551e-07, |
|
"logits/chosen": -2.2304577827453613, |
|
"logits/rejected": -1.3978160619735718, |
|
"logps/chosen": -538.714599609375, |
|
"logps/rejected": -793.1671142578125, |
|
"loss": 0.2318, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.39072176814079285, |
|
"rewards/margins": 0.2959004342556, |
|
"rewards/rejected": -0.6866222620010376, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 7.784806570666795e-07, |
|
"logits/chosen": -2.147185802459717, |
|
"logits/rejected": -1.5393598079681396, |
|
"logps/chosen": -481.76373291015625, |
|
"logps/rejected": -670.560791015625, |
|
"loss": 0.2545, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.3331444263458252, |
|
"rewards/margins": 0.2202250212430954, |
|
"rewards/rejected": -0.553369402885437, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 7.533373330781127e-07, |
|
"logits/chosen": -2.355670928955078, |
|
"logits/rejected": -1.5707635879516602, |
|
"logps/chosen": -561.0064697265625, |
|
"logps/rejected": -760.4944458007812, |
|
"loss": 0.2233, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.3677171766757965, |
|
"rewards/margins": 0.2857670485973358, |
|
"rewards/rejected": -0.6534842252731323, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 7.285345244372843e-07, |
|
"logits/chosen": -2.2503583431243896, |
|
"logits/rejected": -1.3964335918426514, |
|
"logps/chosen": -503.65777587890625, |
|
"logps/rejected": -756.98828125, |
|
"loss": 0.1685, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.33578115701675415, |
|
"rewards/margins": 0.312565416097641, |
|
"rewards/rejected": -0.6483466029167175, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 7.040770665008853e-07, |
|
"logits/chosen": -2.2794625759124756, |
|
"logits/rejected": -1.6694520711898804, |
|
"logps/chosen": -546.9520263671875, |
|
"logps/rejected": -695.8685302734375, |
|
"loss": 0.2262, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.3904178738594055, |
|
"rewards/margins": 0.2141040861606598, |
|
"rewards/rejected": -0.6045219302177429, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 6.799697272987976e-07, |
|
"logits/chosen": -2.1750683784484863, |
|
"logits/rejected": -1.3738349676132202, |
|
"logps/chosen": -540.0596923828125, |
|
"logps/rejected": -722.267578125, |
|
"loss": 0.2455, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.3699941039085388, |
|
"rewards/margins": 0.2555733621120453, |
|
"rewards/rejected": -0.6255674958229065, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 6.562172066045655e-07, |
|
"logits/chosen": -2.167599678039551, |
|
"logits/rejected": -1.6329807043075562, |
|
"logps/chosen": -441.4002380371094, |
|
"logps/rejected": -602.337646484375, |
|
"loss": 0.2318, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.31705886125564575, |
|
"rewards/margins": 0.21183231472969055, |
|
"rewards/rejected": -0.5288912057876587, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 6.328241350191619e-07, |
|
"logits/chosen": -2.2226500511169434, |
|
"logits/rejected": -1.537512183189392, |
|
"logps/chosen": -480.624755859375, |
|
"logps/rejected": -686.3212890625, |
|
"loss": 0.2092, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.30479687452316284, |
|
"rewards/margins": 0.2621714472770691, |
|
"rewards/rejected": -0.5669684410095215, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 6.097950730682426e-07, |
|
"logits/chosen": -2.176600456237793, |
|
"logits/rejected": -1.5454185009002686, |
|
"logps/chosen": -503.0887756347656, |
|
"logps/rejected": -687.1947021484375, |
|
"loss": 0.2479, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.3592723608016968, |
|
"rewards/margins": 0.23496529459953308, |
|
"rewards/rejected": -0.5942376255989075, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 5.871345103130646e-07, |
|
"logits/chosen": -2.087590217590332, |
|
"logits/rejected": -1.4282548427581787, |
|
"logps/chosen": -597.8372802734375, |
|
"logps/rejected": -783.7901611328125, |
|
"loss": 0.2177, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.44616231322288513, |
|
"rewards/margins": 0.21901002526283264, |
|
"rewards/rejected": -0.665172278881073, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 5.64846864475237e-07, |
|
"logits/chosen": -2.1877074241638184, |
|
"logits/rejected": -1.913442611694336, |
|
"logps/chosen": -512.3074951171875, |
|
"logps/rejected": -631.2486572265625, |
|
"loss": 0.2673, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.35744622349739075, |
|
"rewards/margins": 0.16726166009902954, |
|
"rewards/rejected": -0.5247078537940979, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 5.429364805754758e-07, |
|
"logits/chosen": -2.0919928550720215, |
|
"logits/rejected": -1.5754165649414062, |
|
"logps/chosen": -502.09405517578125, |
|
"logps/rejected": -650.5390014648438, |
|
"loss": 0.2226, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.3595220446586609, |
|
"rewards/margins": 0.19984325766563416, |
|
"rewards/rejected": -0.5593653321266174, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 5.214076300865359e-07, |
|
"logits/chosen": -1.9238027334213257, |
|
"logits/rejected": -1.2277315855026245, |
|
"logps/chosen": -580.2691650390625, |
|
"logps/rejected": -834.8424072265625, |
|
"loss": 0.2114, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.4341210722923279, |
|
"rewards/margins": 0.30377626419067383, |
|
"rewards/rejected": -0.7378972768783569, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 5.002645101004766e-07, |
|
"logits/chosen": -2.335980176925659, |
|
"logits/rejected": -1.380081057548523, |
|
"logps/chosen": -504.81024169921875, |
|
"logps/rejected": -794.1038208007812, |
|
"loss": 0.1986, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.3392624855041504, |
|
"rewards/margins": 0.3661222457885742, |
|
"rewards/rejected": -0.7053847908973694, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.795112425104323e-07, |
|
"logits/chosen": -2.2008166313171387, |
|
"logits/rejected": -1.5865943431854248, |
|
"logps/chosen": -581.0762939453125, |
|
"logps/rejected": -769.32177734375, |
|
"loss": 0.2619, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.4084292948246002, |
|
"rewards/margins": 0.25177350640296936, |
|
"rewards/rejected": -0.6602028012275696, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.591518732070402e-07, |
|
"logits/chosen": -1.9570486545562744, |
|
"logits/rejected": -1.4179704189300537, |
|
"logps/chosen": -534.230224609375, |
|
"logps/rejected": -729.9030151367188, |
|
"loss": 0.2396, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.3860064148902893, |
|
"rewards/margins": 0.24045896530151367, |
|
"rewards/rejected": -0.6264654397964478, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.391903712896861e-07, |
|
"logits/chosen": -2.1633055210113525, |
|
"logits/rejected": -1.5320649147033691, |
|
"logps/chosen": -577.50439453125, |
|
"logps/rejected": -789.090087890625, |
|
"loss": 0.242, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.4344731271266937, |
|
"rewards/margins": 0.2326418161392212, |
|
"rewards/rejected": -0.6671148538589478, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.196306282927187e-07, |
|
"logits/chosen": -2.198305130004883, |
|
"logits/rejected": -1.6743977069854736, |
|
"logps/chosen": -516.3706665039062, |
|
"logps/rejected": -713.3460693359375, |
|
"loss": 0.2211, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.3524255156517029, |
|
"rewards/margins": 0.25238287448883057, |
|
"rewards/rejected": -0.6048084497451782, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.0047645742679275e-07, |
|
"logits/chosen": -2.15580415725708, |
|
"logits/rejected": -1.6634889841079712, |
|
"logps/chosen": -539.8309326171875, |
|
"logps/rejected": -740.0956420898438, |
|
"loss": 0.218, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.36082297563552856, |
|
"rewards/margins": 0.23072440922260284, |
|
"rewards/rejected": -0.5915473103523254, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 3.817315928354695e-07, |
|
"logits/chosen": -2.187629222869873, |
|
"logits/rejected": -1.617875099182129, |
|
"logps/chosen": -528.2720336914062, |
|
"logps/rejected": -703.1224365234375, |
|
"loss": 0.2308, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.36435794830322266, |
|
"rewards/margins": 0.23637576401233673, |
|
"rewards/rejected": -0.6007336378097534, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 3.633996888672428e-07, |
|
"logits/chosen": -2.1398205757141113, |
|
"logits/rejected": -1.48526132106781, |
|
"logps/chosen": -582.8170166015625, |
|
"logps/rejected": -740.50390625, |
|
"loss": 0.206, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.4385349154472351, |
|
"rewards/margins": 0.2139698714017868, |
|
"rewards/rejected": -0.6525048017501831, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 3.4548431936311275e-07, |
|
"logits/chosen": -2.3196969032287598, |
|
"logits/rejected": -1.9413297176361084, |
|
"logps/chosen": -515.4064331054688, |
|
"logps/rejected": -631.7554931640625, |
|
"loss": 0.24, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.3508912920951843, |
|
"rewards/margins": 0.1308077871799469, |
|
"rewards/rejected": -0.4816990792751312, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 3.2798897695986155e-07, |
|
"logits/chosen": -2.225336790084839, |
|
"logits/rejected": -1.4455909729003906, |
|
"logps/chosen": -562.00439453125, |
|
"logps/rejected": -792.2058715820312, |
|
"loss": 0.2268, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.3739345073699951, |
|
"rewards/margins": 0.29291829466819763, |
|
"rewards/rejected": -0.6668527722358704, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 3.1091707240915704e-07, |
|
"logits/chosen": -2.318101167678833, |
|
"logits/rejected": -1.5058709383010864, |
|
"logps/chosen": -528.3710327148438, |
|
"logps/rejected": -773.945068359375, |
|
"loss": 0.2322, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.3821602761745453, |
|
"rewards/margins": 0.2865941524505615, |
|
"rewards/rejected": -0.6687543988227844, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.942719339126171e-07, |
|
"logits/chosen": -2.114053249359131, |
|
"logits/rejected": -1.4541417360305786, |
|
"logps/chosen": -575.40625, |
|
"logps/rejected": -743.8985595703125, |
|
"loss": 0.2631, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.4056618809700012, |
|
"rewards/margins": 0.24168558418750763, |
|
"rewards/rejected": -0.6473473906517029, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.780568064729716e-07, |
|
"logits/chosen": -2.1276280879974365, |
|
"logits/rejected": -1.5844396352767944, |
|
"logps/chosen": -556.7276000976562, |
|
"logps/rejected": -722.495849609375, |
|
"loss": 0.232, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.40463319420814514, |
|
"rewards/margins": 0.21296298503875732, |
|
"rewards/rejected": -0.6175961494445801, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.622748512614437e-07, |
|
"logits/chosen": -2.328648328781128, |
|
"logits/rejected": -1.7766300439834595, |
|
"logps/chosen": -508.13250732421875, |
|
"logps/rejected": -652.4500122070312, |
|
"loss": 0.2499, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.35677647590637207, |
|
"rewards/margins": 0.18978366255760193, |
|
"rewards/rejected": -0.5465601682662964, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.4692914500147185e-07, |
|
"logits/chosen": -2.1308367252349854, |
|
"logits/rejected": -1.7297455072402954, |
|
"logps/chosen": -539.8509521484375, |
|
"logps/rejected": -701.5316162109375, |
|
"loss": 0.2199, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.37622612714767456, |
|
"rewards/margins": 0.193180650472641, |
|
"rewards/rejected": -0.5694067478179932, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.320226793688979e-07, |
|
"logits/chosen": -2.176285982131958, |
|
"logits/rejected": -1.585010290145874, |
|
"logps/chosen": -597.8599853515625, |
|
"logps/rejected": -753.0974731445312, |
|
"loss": 0.2317, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.4297494888305664, |
|
"rewards/margins": 0.20314817130565643, |
|
"rewards/rejected": -0.632897675037384, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.1755836040873197e-07, |
|
"logits/chosen": -2.09965181350708, |
|
"logits/rejected": -1.5018677711486816, |
|
"logps/chosen": -551.79541015625, |
|
"logps/rejected": -750.493408203125, |
|
"loss": 0.2168, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.4179263114929199, |
|
"rewards/margins": 0.248566672205925, |
|
"rewards/rejected": -0.6664929389953613, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.0353900796861503e-07, |
|
"logits/chosen": -2.2555556297302246, |
|
"logits/rejected": -1.8257776498794556, |
|
"logps/chosen": -505.36663818359375, |
|
"logps/rejected": -631.3004760742188, |
|
"loss": 0.2511, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.35016122460365295, |
|
"rewards/margins": 0.18649618327617645, |
|
"rewards/rejected": -0.5366573929786682, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 1.8996735514908327e-07, |
|
"logits/chosen": -2.083270311355591, |
|
"logits/rejected": -1.2722394466400146, |
|
"logps/chosen": -549.7798461914062, |
|
"logps/rejected": -800.9820556640625, |
|
"loss": 0.2384, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.3795488774776459, |
|
"rewards/margins": 0.324424147605896, |
|
"rewards/rejected": -0.7039730548858643, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 1.7684604777074427e-07, |
|
"logits/chosen": -2.2368104457855225, |
|
"logits/rejected": -1.5557681322097778, |
|
"logps/chosen": -581.6041259765625, |
|
"logps/rejected": -770.4801635742188, |
|
"loss": 0.2392, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.4050823152065277, |
|
"rewards/margins": 0.2399863749742508, |
|
"rewards/rejected": -0.6450687646865845, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 1.6417764385846996e-07, |
|
"logits/chosen": -2.2670297622680664, |
|
"logits/rejected": -1.6125434637069702, |
|
"logps/chosen": -544.0134887695312, |
|
"logps/rejected": -734.3864135742188, |
|
"loss": 0.2371, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.38986533880233765, |
|
"rewards/margins": 0.23339907824993134, |
|
"rewards/rejected": -0.6232645511627197, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 1.5196461314270438e-07, |
|
"logits/chosen": -2.25602650642395, |
|
"logits/rejected": -1.743583083152771, |
|
"logps/chosen": -564.7320556640625, |
|
"logps/rejected": -716.3707275390625, |
|
"loss": 0.2454, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.42164283990859985, |
|
"rewards/margins": 0.19018793106079102, |
|
"rewards/rejected": -0.6118307709693909, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 1.4020933657798385e-07, |
|
"logits/chosen": -2.139263868331909, |
|
"logits/rejected": -1.414298415184021, |
|
"logps/chosen": -474.62567138671875, |
|
"logps/rejected": -724.90234375, |
|
"loss": 0.2306, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.3453265130519867, |
|
"rewards/margins": 0.2758641839027405, |
|
"rewards/rejected": -0.6211907267570496, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 1.2891410587876714e-07, |
|
"logits/chosen": -2.2542724609375, |
|
"logits/rejected": -1.5322940349578857, |
|
"logps/chosen": -551.5656127929688, |
|
"logps/rejected": -730.9732055664062, |
|
"loss": 0.2194, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.37710070610046387, |
|
"rewards/margins": 0.2374318391084671, |
|
"rewards/rejected": -0.614532470703125, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 1.180811230726589e-07, |
|
"logits/chosen": -2.2804083824157715, |
|
"logits/rejected": -1.5980250835418701, |
|
"logps/chosen": -603.2000122070312, |
|
"logps/rejected": -805.7347412109375, |
|
"loss": 0.2254, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.42967596650123596, |
|
"rewards/margins": 0.24143275618553162, |
|
"rewards/rejected": -0.6711087226867676, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 1.0771250007112155e-07, |
|
"logits/chosen": -1.982797384262085, |
|
"logits/rejected": -1.3462848663330078, |
|
"logps/chosen": -626.7324829101562, |
|
"logps/rejected": -776.6309814453125, |
|
"loss": 0.2424, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.46235284209251404, |
|
"rewards/margins": 0.2094097137451172, |
|
"rewards/rejected": -0.6717625856399536, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 9.781025825775392e-08, |
|
"logits/chosen": -2.231228828430176, |
|
"logits/rejected": -1.511236310005188, |
|
"logps/chosen": -614.570068359375, |
|
"logps/rejected": -825.9508666992188, |
|
"loss": 0.2251, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.45310935378074646, |
|
"rewards/margins": 0.26085203886032104, |
|
"rewards/rejected": -0.7139613628387451, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 8.837632809421681e-08, |
|
"logits/chosen": -2.0968010425567627, |
|
"logits/rejected": -1.3801376819610596, |
|
"logps/chosen": -569.6008911132812, |
|
"logps/rejected": -795.4178466796875, |
|
"loss": 0.2706, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.4094913601875305, |
|
"rewards/margins": 0.26325908303260803, |
|
"rewards/rejected": -0.6727504134178162, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 7.941254874388904e-08, |
|
"logits/chosen": -2.363359212875366, |
|
"logits/rejected": -1.888514757156372, |
|
"logps/chosen": -576.6600341796875, |
|
"logps/rejected": -702.62060546875, |
|
"loss": 0.2567, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.43606749176979065, |
|
"rewards/margins": 0.16073410212993622, |
|
"rewards/rejected": -0.5968016386032104, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 7.092066771331507e-08, |
|
"logits/chosen": -2.1661031246185303, |
|
"logits/rejected": -1.3824275732040405, |
|
"logps/chosen": -557.127685546875, |
|
"logps/rejected": -710.2548828125, |
|
"loss": 0.2119, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.38099274039268494, |
|
"rewards/margins": 0.22092202305793762, |
|
"rewards/rejected": -0.6019147634506226, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 6.29023405115281e-08, |
|
"logits/chosen": -2.1738715171813965, |
|
"logits/rejected": -1.355930209159851, |
|
"logps/chosen": -592.2274169921875, |
|
"logps/rejected": -788.6678466796875, |
|
"loss": 0.2163, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.40638837218284607, |
|
"rewards/margins": 0.2868625521659851, |
|
"rewards/rejected": -0.6932509541511536, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 5.535913032730295e-08, |
|
"logits/chosen": -2.406480312347412, |
|
"logits/rejected": -1.549068570137024, |
|
"logps/chosen": -548.6936645507812, |
|
"logps/rejected": -776.6002807617188, |
|
"loss": 0.1999, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.3767138421535492, |
|
"rewards/margins": 0.2962132692337036, |
|
"rewards/rejected": -0.6729270815849304, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.829250772441091e-08, |
|
"logits/chosen": -2.178439140319824, |
|
"logits/rejected": -1.8580322265625, |
|
"logps/chosen": -604.1091918945312, |
|
"logps/rejected": -721.529541015625, |
|
"loss": 0.2414, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.45432454347610474, |
|
"rewards/margins": 0.1475105583667755, |
|
"rewards/rejected": -0.6018351316452026, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.170385035493108e-08, |
|
"logits/chosen": -2.346930503845215, |
|
"logits/rejected": -1.8971471786499023, |
|
"logps/chosen": -603.0961303710938, |
|
"logps/rejected": -768.2886352539062, |
|
"loss": 0.2649, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.43998581171035767, |
|
"rewards/margins": 0.20180657505989075, |
|
"rewards/rejected": -0.6417924165725708, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 3.5594442690671806e-08, |
|
"logits/chosen": -2.013385772705078, |
|
"logits/rejected": -1.435459852218628, |
|
"logps/chosen": -645.1131591796875, |
|
"logps/rejected": -806.417724609375, |
|
"loss": 0.2246, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.48335081338882446, |
|
"rewards/margins": 0.2265961617231369, |
|
"rewards/rejected": -0.7099469900131226, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.9965475772762154e-08, |
|
"logits/chosen": -2.311368465423584, |
|
"logits/rejected": -1.5600301027297974, |
|
"logps/chosen": -517.8285522460938, |
|
"logps/rejected": -704.8583374023438, |
|
"loss": 0.2205, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.3128499984741211, |
|
"rewards/margins": 0.2409205138683319, |
|
"rewards/rejected": -0.5537704825401306, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.48180469794565e-08, |
|
"logits/chosen": -2.281245470046997, |
|
"logits/rejected": -1.7727069854736328, |
|
"logps/chosen": -474.0367126464844, |
|
"logps/rejected": -638.2462768554688, |
|
"loss": 0.2309, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.33747315406799316, |
|
"rewards/margins": 0.18603594601154327, |
|
"rewards/rejected": -0.5235090851783752, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.015315981219651e-08, |
|
"logits/chosen": -2.094449281692505, |
|
"logits/rejected": -1.5543745756149292, |
|
"logps/chosen": -585.0030517578125, |
|
"logps/rejected": -759.0601806640625, |
|
"loss": 0.2485, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.4290609359741211, |
|
"rewards/margins": 0.21084634959697723, |
|
"rewards/rejected": -0.6399072408676147, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 1.5971723699979015e-08, |
|
"logits/chosen": -2.2383382320404053, |
|
"logits/rejected": -1.490106463432312, |
|
"logps/chosen": -590.9389038085938, |
|
"logps/rejected": -763.5640869140625, |
|
"loss": 0.2426, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.4045529365539551, |
|
"rewards/margins": 0.24901354312896729, |
|
"rewards/rejected": -0.6535664796829224, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 1.2274553822058944e-08, |
|
"logits/chosen": -2.32003116607666, |
|
"logits/rejected": -1.4864509105682373, |
|
"logps/chosen": -516.053955078125, |
|
"logps/rejected": -737.1478271484375, |
|
"loss": 0.2119, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.3795922100543976, |
|
"rewards/margins": 0.26055005192756653, |
|
"rewards/rejected": -0.6401422619819641, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 9.062370949029231e-09, |
|
"logits/chosen": -2.3300509452819824, |
|
"logits/rejected": -1.7241268157958984, |
|
"logps/chosen": -578.6694946289062, |
|
"logps/rejected": -768.1864013671875, |
|
"loss": 0.2388, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.40254122018814087, |
|
"rewards/margins": 0.2296265810728073, |
|
"rewards/rejected": -0.6321677565574646, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 6.3358013023062656e-09, |
|
"logits/chosen": -2.0109941959381104, |
|
"logits/rejected": -1.2714914083480835, |
|
"logps/chosen": -602.9906005859375, |
|
"logps/rejected": -774.6351318359375, |
|
"loss": 0.2411, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.4527587294578552, |
|
"rewards/margins": 0.22364509105682373, |
|
"rewards/rejected": -0.676403820514679, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.095376432044218e-09, |
|
"logits/chosen": -2.153900623321533, |
|
"logits/rejected": -1.431398630142212, |
|
"logps/chosen": -501.6558532714844, |
|
"logps/rejected": -703.2398681640625, |
|
"loss": 0.2246, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.3516218066215515, |
|
"rewards/margins": 0.26861336827278137, |
|
"rewards/rejected": -0.6202351450920105, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.3415331135115404e-09, |
|
"logits/chosen": -2.1722538471221924, |
|
"logits/rejected": -1.4859097003936768, |
|
"logps/chosen": -573.3174438476562, |
|
"logps/rejected": -785.3301391601562, |
|
"loss": 0.2381, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.41808199882507324, |
|
"rewards/margins": 0.2717761993408203, |
|
"rewards/rejected": -0.6898581981658936, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 1.0746132619374184e-09, |
|
"logits/chosen": -2.1704633235931396, |
|
"logits/rejected": -1.4999759197235107, |
|
"logps/chosen": -571.4072265625, |
|
"logps/rejected": -791.2116088867188, |
|
"loss": 0.2218, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.41102513670921326, |
|
"rewards/margins": 0.2840521037578583, |
|
"rewards/rejected": -0.6950772404670715, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.9486386585786395e-10, |
|
"logits/chosen": -2.255235433578491, |
|
"logits/rejected": -1.5667657852172852, |
|
"logps/chosen": -458.48248291015625, |
|
"logps/rejected": -655.1217651367188, |
|
"loss": 0.2506, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.30341845750808716, |
|
"rewards/margins": 0.2525646388530731, |
|
"rewards/rejected": -0.5559830665588379, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.4369389622913575e-12, |
|
"logits/chosen": -2.230522394180298, |
|
"logits/rejected": -1.6053917407989502, |
|
"logps/chosen": -498.0244140625, |
|
"logps/rejected": -680.3582763671875, |
|
"loss": 0.2225, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.36517006158828735, |
|
"rewards/margins": 0.2226562201976776, |
|
"rewards/rejected": -0.5878263115882874, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"step": 2501, |
|
"total_flos": 0.0, |
|
"train_loss": 0.2318923625944615, |
|
"train_runtime": 76628.4869, |
|
"train_samples_per_second": 0.391, |
|
"train_steps_per_second": 0.033 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 2501, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|