diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,10544 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 7500, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 6.666666666666667e-09, + "logits/chosen": -2.4099323749542236, + "logits/rejected": -1.6240229606628418, + "logps/chosen": -448.31744384765625, + "logps/rejected": -191.33251953125, + "loss": 0.4075, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.0, + "rewards/margins": 0.0, + "rewards/rejected": 0.0, + "step": 1 + }, + { + "epoch": 0.0, + "learning_rate": 6.666666666666668e-08, + "logits/chosen": -1.989020824432373, + "logits/rejected": -1.5890716314315796, + "logps/chosen": -236.1018829345703, + "logps/rejected": -226.4254913330078, + "loss": 0.3306, + "rewards/accuracies": 0.4166666567325592, + "rewards/chosen": -7.88598208600888e-06, + "rewards/margins": 3.9829301385907456e-05, + "rewards/rejected": -4.771529711433686e-05, + "step": 10 + }, + { + "epoch": 0.0, + "learning_rate": 1.3333333333333336e-07, + "logits/chosen": -1.8541256189346313, + "logits/rejected": -1.5651360750198364, + "logps/chosen": -270.60076904296875, + "logps/rejected": -270.3322448730469, + "loss": 0.3308, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -5.58321007702034e-05, + "rewards/margins": 6.728438893333077e-05, + "rewards/rejected": -0.00012311647878959775, + "step": 20 + }, + { + "epoch": 0.0, + "learning_rate": 2.0000000000000002e-07, + "logits/chosen": -2.009821653366089, + "logits/rejected": -1.5267605781555176, + "logps/chosen": -289.7004699707031, + "logps/rejected": -240.9877166748047, + "loss": 0.3801, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": 6.77972930134274e-05, + "rewards/margins": 0.00017511224723421037, + "rewards/rejected": -0.00010731497604865581, + "step": 30 + }, + { + "epoch": 0.01, + "learning_rate": 2.666666666666667e-07, + "logits/chosen": -2.0199921131134033, + "logits/rejected": -1.3087493181228638, + "logps/chosen": -318.3033752441406, + "logps/rejected": -240.2353057861328, + "loss": 0.3182, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": 0.0001012681022984907, + "rewards/margins": 0.00041467478149570525, + "rewards/rejected": -0.0003134066646452993, + "step": 40 + }, + { + "epoch": 0.01, + "learning_rate": 3.3333333333333335e-07, + "logits/chosen": -1.5997120141983032, + "logits/rejected": -1.3897509574890137, + "logps/chosen": -229.5198211669922, + "logps/rejected": -240.71969604492188, + "loss": 0.3759, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 0.0002661711478140205, + "rewards/margins": 0.0016337096458300948, + "rewards/rejected": -0.0013675385853275657, + "step": 50 + }, + { + "epoch": 0.01, + "learning_rate": 4.0000000000000003e-07, + "logits/chosen": -2.0922536849975586, + "logits/rejected": -1.6669349670410156, + "logps/chosen": -247.25009155273438, + "logps/rejected": -203.55384826660156, + "loss": 0.3801, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.0004775059060193598, + "rewards/margins": 0.0029451469890773296, + "rewards/rejected": -0.0034226528368890285, + "step": 60 + }, + { + "epoch": 0.01, + "learning_rate": 4.666666666666667e-07, + "logits/chosen": -1.8292672634124756, + "logits/rejected": -1.3837965726852417, + "logps/chosen": -267.9219970703125, + "logps/rejected": -246.97720336914062, + "loss": 0.3532, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 0.00016045381198637187, + "rewards/margins": 0.007861686870455742, + "rewards/rejected": -0.007701232098042965, + "step": 70 + }, + { + "epoch": 0.01, + "learning_rate": 5.333333333333335e-07, + "logits/chosen": -1.9224233627319336, + "logits/rejected": -1.5631624460220337, + "logps/chosen": -271.2846374511719, + "logps/rejected": -298.92950439453125, + "loss": 0.2886, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.011401079595088959, + "rewards/margins": 0.021195482462644577, + "rewards/rejected": -0.032596562057733536, + "step": 80 + }, + { + "epoch": 0.01, + "learning_rate": 6.000000000000001e-07, + "logits/chosen": -1.8209606409072876, + "logits/rejected": -1.4652128219604492, + "logps/chosen": -242.1433868408203, + "logps/rejected": -284.43731689453125, + "loss": 0.3295, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.030289877206087112, + "rewards/margins": 0.016816267743706703, + "rewards/rejected": -0.047106143087148666, + "step": 90 + }, + { + "epoch": 0.01, + "learning_rate": 6.666666666666667e-07, + "logits/chosen": -2.0971648693084717, + "logits/rejected": -1.5691124200820923, + "logps/chosen": -373.49371337890625, + "logps/rejected": -374.1169128417969, + "loss": 0.2423, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.04245720058679581, + "rewards/margins": 0.04919002950191498, + "rewards/rejected": -0.09164722263813019, + "step": 100 + }, + { + "epoch": 0.01, + "learning_rate": 7.333333333333334e-07, + "logits/chosen": -2.0052523612976074, + "logits/rejected": -1.4158036708831787, + "logps/chosen": -284.71112060546875, + "logps/rejected": -388.4095458984375, + "loss": 0.1985, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.03486671671271324, + "rewards/margins": 0.10158306360244751, + "rewards/rejected": -0.13644976913928986, + "step": 110 + }, + { + "epoch": 0.02, + "learning_rate": 8.000000000000001e-07, + "logits/chosen": -1.7910232543945312, + "logits/rejected": -1.195896863937378, + "logps/chosen": -349.8062438964844, + "logps/rejected": -492.1412048339844, + "loss": 0.2283, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.07517056912183762, + "rewards/margins": 0.15838217735290527, + "rewards/rejected": -0.2335527241230011, + "step": 120 + }, + { + "epoch": 0.02, + "learning_rate": 8.666666666666668e-07, + "logits/chosen": -1.8132175207138062, + "logits/rejected": -1.30657958984375, + "logps/chosen": -379.88336181640625, + "logps/rejected": -567.0684814453125, + "loss": 0.1626, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.11514802277088165, + "rewards/margins": 0.23259301483631134, + "rewards/rejected": -0.347741037607193, + "step": 130 + }, + { + "epoch": 0.02, + "learning_rate": 9.333333333333334e-07, + "logits/chosen": -1.6623605489730835, + "logits/rejected": -1.2550121545791626, + "logps/chosen": -436.0331115722656, + "logps/rejected": -693.096923828125, + "loss": 0.1626, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.14200234413146973, + "rewards/margins": 0.25166502594947815, + "rewards/rejected": -0.3936673700809479, + "step": 140 + }, + { + "epoch": 0.02, + "learning_rate": 1.0000000000000002e-06, + "logits/chosen": -1.7733418941497803, + "logits/rejected": -1.16778564453125, + "logps/chosen": -511.66925048828125, + "logps/rejected": -699.0992431640625, + "loss": 0.1233, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.21348969638347626, + "rewards/margins": 0.27515870332717896, + "rewards/rejected": -0.48864835500717163, + "step": 150 + }, + { + "epoch": 0.02, + "learning_rate": 1.066666666666667e-06, + "logits/chosen": -1.879869818687439, + "logits/rejected": -1.2550278902053833, + "logps/chosen": -482.166259765625, + "logps/rejected": -760.3292236328125, + "loss": 0.124, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.2403174340724945, + "rewards/margins": 0.2997317612171173, + "rewards/rejected": -0.5400491952896118, + "step": 160 + }, + { + "epoch": 0.02, + "learning_rate": 1.1333333333333334e-06, + "logits/chosen": -1.6282508373260498, + "logits/rejected": -0.9627411961555481, + "logps/chosen": -414.8555603027344, + "logps/rejected": -705.8746337890625, + "loss": 0.1528, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.14894428849220276, + "rewards/margins": 0.37094196677207947, + "rewards/rejected": -0.5198861956596375, + "step": 170 + }, + { + "epoch": 0.02, + "learning_rate": 1.2000000000000002e-06, + "logits/chosen": -1.5305386781692505, + "logits/rejected": -1.1183087825775146, + "logps/chosen": -443.141845703125, + "logps/rejected": -795.3884887695312, + "loss": 0.1274, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.16620799899101257, + "rewards/margins": 0.3535141348838806, + "rewards/rejected": -0.5197221040725708, + "step": 180 + }, + { + "epoch": 0.03, + "learning_rate": 1.2666666666666669e-06, + "logits/chosen": -1.6709671020507812, + "logits/rejected": -1.0611721277236938, + "logps/chosen": -524.5181274414062, + "logps/rejected": -810.8128662109375, + "loss": 0.1407, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.2099526822566986, + "rewards/margins": 0.3740180432796478, + "rewards/rejected": -0.583970844745636, + "step": 190 + }, + { + "epoch": 0.03, + "learning_rate": 1.3333333333333334e-06, + "logits/chosen": -1.864699363708496, + "logits/rejected": -1.0785900354385376, + "logps/chosen": -482.0804748535156, + "logps/rejected": -730.0054931640625, + "loss": 0.1423, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.18910348415374756, + "rewards/margins": 0.29500436782836914, + "rewards/rejected": -0.4841078817844391, + "step": 200 + }, + { + "epoch": 0.03, + "learning_rate": 1.4000000000000001e-06, + "logits/chosen": -1.492980718612671, + "logits/rejected": -0.9422389268875122, + "logps/chosen": -467.66241455078125, + "logps/rejected": -867.6456298828125, + "loss": 0.131, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.2241428643465042, + "rewards/margins": 0.3977489471435547, + "rewards/rejected": -0.6218917965888977, + "step": 210 + }, + { + "epoch": 0.03, + "learning_rate": 1.4666666666666669e-06, + "logits/chosen": -1.6000795364379883, + "logits/rejected": -0.9295433163642883, + "logps/chosen": -429.8138732910156, + "logps/rejected": -812.4957885742188, + "loss": 0.0933, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.15654727816581726, + "rewards/margins": 0.4246586263179779, + "rewards/rejected": -0.5812059640884399, + "step": 220 + }, + { + "epoch": 0.03, + "learning_rate": 1.5333333333333334e-06, + "logits/chosen": -1.561963438987732, + "logits/rejected": -1.016340970993042, + "logps/chosen": -496.5228576660156, + "logps/rejected": -820.2783203125, + "loss": 0.1253, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.23068983852863312, + "rewards/margins": 0.3521668314933777, + "rewards/rejected": -0.5828567743301392, + "step": 230 + }, + { + "epoch": 0.03, + "learning_rate": 1.6000000000000001e-06, + "logits/chosen": -1.543303370475769, + "logits/rejected": -1.0427916049957275, + "logps/chosen": -444.46014404296875, + "logps/rejected": -828.2269287109375, + "loss": 0.0941, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1384342759847641, + "rewards/margins": 0.4045810103416443, + "rewards/rejected": -0.5430153012275696, + "step": 240 + }, + { + "epoch": 0.03, + "learning_rate": 1.6666666666666667e-06, + "logits/chosen": -1.546133279800415, + "logits/rejected": -1.1522371768951416, + "logps/chosen": -533.6563110351562, + "logps/rejected": -807.0501098632812, + "loss": 0.1711, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.2938057780265808, + "rewards/margins": 0.25221148133277893, + "rewards/rejected": -0.5460172295570374, + "step": 250 + }, + { + "epoch": 0.03, + "learning_rate": 1.7333333333333336e-06, + "logits/chosen": -1.8788810968399048, + "logits/rejected": -1.0959275960922241, + "logps/chosen": -480.61639404296875, + "logps/rejected": -740.069091796875, + "loss": 0.1395, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.1702469289302826, + "rewards/margins": 0.3451237082481384, + "rewards/rejected": -0.5153706073760986, + "step": 260 + }, + { + "epoch": 0.04, + "learning_rate": 1.8000000000000001e-06, + "logits/chosen": -1.5190702676773071, + "logits/rejected": -0.980027973651886, + "logps/chosen": -489.94207763671875, + "logps/rejected": -871.6253662109375, + "loss": 0.0945, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.2337626963853836, + "rewards/margins": 0.3760890066623688, + "rewards/rejected": -0.6098517179489136, + "step": 270 + }, + { + "epoch": 0.04, + "learning_rate": 1.8666666666666669e-06, + "logits/chosen": -1.468842625617981, + "logits/rejected": -1.388031005859375, + "logps/chosen": -381.8927307128906, + "logps/rejected": -781.1408081054688, + "loss": 0.176, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.19682732224464417, + "rewards/margins": 0.3542131185531616, + "rewards/rejected": -0.5510404109954834, + "step": 280 + }, + { + "epoch": 0.04, + "learning_rate": 1.9333333333333336e-06, + "logits/chosen": -1.6719022989273071, + "logits/rejected": -0.883672833442688, + "logps/chosen": -480.5074768066406, + "logps/rejected": -683.6339111328125, + "loss": 0.1495, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.15120992064476013, + "rewards/margins": 0.3173143267631531, + "rewards/rejected": -0.4685242176055908, + "step": 290 + }, + { + "epoch": 0.04, + "learning_rate": 2.0000000000000003e-06, + "logits/chosen": -1.5708786249160767, + "logits/rejected": -0.9583484530448914, + "logps/chosen": -463.66143798828125, + "logps/rejected": -833.3902587890625, + "loss": 0.1112, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1888360232114792, + "rewards/margins": 0.4084080159664154, + "rewards/rejected": -0.5972440838813782, + "step": 300 + }, + { + "epoch": 0.04, + "learning_rate": 2.0666666666666666e-06, + "logits/chosen": -1.4761362075805664, + "logits/rejected": -0.9715279340744019, + "logps/chosen": -498.23876953125, + "logps/rejected": -934.1094970703125, + "loss": 0.1048, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.23782452940940857, + "rewards/margins": 0.4102650582790375, + "rewards/rejected": -0.648089587688446, + "step": 310 + }, + { + "epoch": 0.04, + "learning_rate": 2.133333333333334e-06, + "logits/chosen": -1.631696343421936, + "logits/rejected": -0.9176268577575684, + "logps/chosen": -534.7830810546875, + "logps/rejected": -852.8056640625, + "loss": 0.1289, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.23324842751026154, + "rewards/margins": 0.3649081587791443, + "rewards/rejected": -0.5981565713882446, + "step": 320 + }, + { + "epoch": 0.04, + "learning_rate": 2.2e-06, + "logits/chosen": -1.6708920001983643, + "logits/rejected": -1.3186357021331787, + "logps/chosen": -372.9091796875, + "logps/rejected": -703.5186767578125, + "loss": 0.1738, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.13428106904029846, + "rewards/margins": 0.3291914463043213, + "rewards/rejected": -0.463472455739975, + "step": 330 + }, + { + "epoch": 0.05, + "learning_rate": 2.266666666666667e-06, + "logits/chosen": -1.4466753005981445, + "logits/rejected": -1.0592036247253418, + "logps/chosen": -432.8397521972656, + "logps/rejected": -774.4104614257812, + "loss": 0.1597, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.19159728288650513, + "rewards/margins": 0.31294533610343933, + "rewards/rejected": -0.5045426487922668, + "step": 340 + }, + { + "epoch": 0.05, + "learning_rate": 2.3333333333333336e-06, + "logits/chosen": -1.584106206893921, + "logits/rejected": -1.0651090145111084, + "logps/chosen": -403.37353515625, + "logps/rejected": -676.6839599609375, + "loss": 0.1674, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.1616145223379135, + "rewards/margins": 0.2902247905731201, + "rewards/rejected": -0.4518393576145172, + "step": 350 + }, + { + "epoch": 0.05, + "learning_rate": 2.4000000000000003e-06, + "logits/chosen": -1.2375500202178955, + "logits/rejected": -0.8195334672927856, + "logps/chosen": -413.3699645996094, + "logps/rejected": -646.4727172851562, + "loss": 0.1935, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.17713095247745514, + "rewards/margins": 0.27770182490348816, + "rewards/rejected": -0.4548327922821045, + "step": 360 + }, + { + "epoch": 0.05, + "learning_rate": 2.466666666666667e-06, + "logits/chosen": -1.9084575176239014, + "logits/rejected": -1.1361953020095825, + "logps/chosen": -406.1421813964844, + "logps/rejected": -712.7696533203125, + "loss": 0.1213, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.10945934057235718, + "rewards/margins": 0.37037864327430725, + "rewards/rejected": -0.47983798384666443, + "step": 370 + }, + { + "epoch": 0.05, + "learning_rate": 2.5333333333333338e-06, + "logits/chosen": -1.6088730096817017, + "logits/rejected": -0.9428955316543579, + "logps/chosen": -603.7337646484375, + "logps/rejected": -854.6677856445312, + "loss": 0.1224, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.32599180936813354, + "rewards/margins": 0.3368051052093506, + "rewards/rejected": -0.6627969145774841, + "step": 380 + }, + { + "epoch": 0.05, + "learning_rate": 2.6e-06, + "logits/chosen": -1.5955326557159424, + "logits/rejected": -1.1725094318389893, + "logps/chosen": -557.5750732421875, + "logps/rejected": -774.2070922851562, + "loss": 0.1827, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.3110761344432831, + "rewards/margins": 0.2598307728767395, + "rewards/rejected": -0.570906937122345, + "step": 390 + }, + { + "epoch": 0.05, + "learning_rate": 2.666666666666667e-06, + "logits/chosen": -1.635376214981079, + "logits/rejected": -0.9140356779098511, + "logps/chosen": -605.5277709960938, + "logps/rejected": -973.1331787109375, + "loss": 0.0749, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.30397385358810425, + "rewards/margins": 0.42590442299842834, + "rewards/rejected": -0.729878306388855, + "step": 400 + }, + { + "epoch": 0.05, + "learning_rate": 2.7333333333333336e-06, + "logits/chosen": -1.6633880138397217, + "logits/rejected": -1.0514023303985596, + "logps/chosen": -480.266845703125, + "logps/rejected": -835.7138671875, + "loss": 0.1532, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.21217575669288635, + "rewards/margins": 0.3710072338581085, + "rewards/rejected": -0.5831829309463501, + "step": 410 + }, + { + "epoch": 0.06, + "learning_rate": 2.8000000000000003e-06, + "logits/chosen": -1.7376649379730225, + "logits/rejected": -1.1299479007720947, + "logps/chosen": -487.1092834472656, + "logps/rejected": -886.4588012695312, + "loss": 0.1114, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.20204193890094757, + "rewards/margins": 0.41378122568130493, + "rewards/rejected": -0.6158231496810913, + "step": 420 + }, + { + "epoch": 0.06, + "learning_rate": 2.866666666666667e-06, + "logits/chosen": -1.5146900415420532, + "logits/rejected": -1.1276448965072632, + "logps/chosen": -339.6864929199219, + "logps/rejected": -572.2979125976562, + "loss": 0.1681, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.10513390600681305, + "rewards/margins": 0.25415563583374023, + "rewards/rejected": -0.3592894971370697, + "step": 430 + }, + { + "epoch": 0.06, + "learning_rate": 2.9333333333333338e-06, + "logits/chosen": -1.6274223327636719, + "logits/rejected": -1.0441734790802002, + "logps/chosen": -395.390869140625, + "logps/rejected": -655.0814208984375, + "loss": 0.1743, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.18642660975456238, + "rewards/margins": 0.28394168615341187, + "rewards/rejected": -0.47036832571029663, + "step": 440 + }, + { + "epoch": 0.06, + "learning_rate": 3e-06, + "logits/chosen": -1.8228683471679688, + "logits/rejected": -1.2179882526397705, + "logps/chosen": -464.56201171875, + "logps/rejected": -823.7962646484375, + "loss": 0.1176, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.21438221633434296, + "rewards/margins": 0.38342320919036865, + "rewards/rejected": -0.597805380821228, + "step": 450 + }, + { + "epoch": 0.06, + "learning_rate": 3.066666666666667e-06, + "logits/chosen": -1.4820787906646729, + "logits/rejected": -0.9871037602424622, + "logps/chosen": -505.381103515625, + "logps/rejected": -717.4215087890625, + "loss": 0.1718, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.28233593702316284, + "rewards/margins": 0.2751788794994354, + "rewards/rejected": -0.5575148463249207, + "step": 460 + }, + { + "epoch": 0.06, + "learning_rate": 3.133333333333334e-06, + "logits/chosen": -1.724001169204712, + "logits/rejected": -0.8987816572189331, + "logps/chosen": -523.7393798828125, + "logps/rejected": -955.359375, + "loss": 0.0896, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.23277001082897186, + "rewards/margins": 0.503928542137146, + "rewards/rejected": -0.7366985082626343, + "step": 470 + }, + { + "epoch": 0.06, + "learning_rate": 3.2000000000000003e-06, + "logits/chosen": -1.648781180381775, + "logits/rejected": -1.0982370376586914, + "logps/chosen": -436.5008239746094, + "logps/rejected": -818.0286865234375, + "loss": 0.1631, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.21771135926246643, + "rewards/margins": 0.38935723900794983, + "rewards/rejected": -0.607068657875061, + "step": 480 + }, + { + "epoch": 0.07, + "learning_rate": 3.266666666666667e-06, + "logits/chosen": -1.3881748914718628, + "logits/rejected": -0.9845792651176453, + "logps/chosen": -424.2598571777344, + "logps/rejected": -879.8922119140625, + "loss": 0.1189, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.16997823119163513, + "rewards/margins": 0.43233782052993774, + "rewards/rejected": -0.6023160219192505, + "step": 490 + }, + { + "epoch": 0.07, + "learning_rate": 3.3333333333333333e-06, + "logits/chosen": -1.9221470355987549, + "logits/rejected": -1.1122715473175049, + "logps/chosen": -424.117919921875, + "logps/rejected": -606.3838500976562, + "loss": 0.1658, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.12460688501596451, + "rewards/margins": 0.3144245743751526, + "rewards/rejected": -0.4390315115451813, + "step": 500 + }, + { + "epoch": 0.07, + "learning_rate": 3.4000000000000005e-06, + "logits/chosen": -1.4596699476242065, + "logits/rejected": -1.0453931093215942, + "logps/chosen": -382.4856262207031, + "logps/rejected": -710.4122314453125, + "loss": 0.1403, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.1713198721408844, + "rewards/margins": 0.3306172490119934, + "rewards/rejected": -0.5019370913505554, + "step": 510 + }, + { + "epoch": 0.07, + "learning_rate": 3.4666666666666672e-06, + "logits/chosen": -1.5380425453186035, + "logits/rejected": -1.0979361534118652, + "logps/chosen": -400.77777099609375, + "logps/rejected": -663.2037353515625, + "loss": 0.16, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.12989595532417297, + "rewards/margins": 0.27567845582962036, + "rewards/rejected": -0.40557441115379333, + "step": 520 + }, + { + "epoch": 0.07, + "learning_rate": 3.5333333333333335e-06, + "logits/chosen": -1.3848693370819092, + "logits/rejected": -1.0763051509857178, + "logps/chosen": -434.2381286621094, + "logps/rejected": -745.5271606445312, + "loss": 0.154, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.20348243415355682, + "rewards/margins": 0.31607288122177124, + "rewards/rejected": -0.519555389881134, + "step": 530 + }, + { + "epoch": 0.07, + "learning_rate": 3.6000000000000003e-06, + "logits/chosen": -1.5418269634246826, + "logits/rejected": -0.9006346464157104, + "logps/chosen": -563.9154052734375, + "logps/rejected": -912.6988525390625, + "loss": 0.1521, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.31098824739456177, + "rewards/margins": 0.3480641841888428, + "rewards/rejected": -0.6590523719787598, + "step": 540 + }, + { + "epoch": 0.07, + "learning_rate": 3.6666666666666666e-06, + "logits/chosen": -1.5205752849578857, + "logits/rejected": -1.076106071472168, + "logps/chosen": -494.74298095703125, + "logps/rejected": -874.4945068359375, + "loss": 0.1266, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.23468537628650665, + "rewards/margins": 0.3964490294456482, + "rewards/rejected": -0.6311343908309937, + "step": 550 + }, + { + "epoch": 0.07, + "learning_rate": 3.7333333333333337e-06, + "logits/chosen": -1.3866068124771118, + "logits/rejected": -0.9945683479309082, + "logps/chosen": -443.53277587890625, + "logps/rejected": -780.4207763671875, + "loss": 0.1463, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.21203942596912384, + "rewards/margins": 0.3310268521308899, + "rewards/rejected": -0.5430662631988525, + "step": 560 + }, + { + "epoch": 0.08, + "learning_rate": 3.8000000000000005e-06, + "logits/chosen": -1.5126006603240967, + "logits/rejected": -0.922463059425354, + "logps/chosen": -485.6847229003906, + "logps/rejected": -759.709716796875, + "loss": 0.1369, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.2078860104084015, + "rewards/margins": 0.3498801589012146, + "rewards/rejected": -0.5577661395072937, + "step": 570 + }, + { + "epoch": 0.08, + "learning_rate": 3.866666666666667e-06, + "logits/chosen": -1.4890224933624268, + "logits/rejected": -1.0690945386886597, + "logps/chosen": -372.1873474121094, + "logps/rejected": -693.4901123046875, + "loss": 0.1531, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.1699579507112503, + "rewards/margins": 0.333346426486969, + "rewards/rejected": -0.5033043622970581, + "step": 580 + }, + { + "epoch": 0.08, + "learning_rate": 3.9333333333333335e-06, + "logits/chosen": -1.496760606765747, + "logits/rejected": -1.0878002643585205, + "logps/chosen": -333.7672424316406, + "logps/rejected": -653.5516357421875, + "loss": 0.1251, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.11113238334655762, + "rewards/margins": 0.3392719626426697, + "rewards/rejected": -0.4504043459892273, + "step": 590 + }, + { + "epoch": 0.08, + "learning_rate": 4.000000000000001e-06, + "logits/chosen": -1.385391354560852, + "logits/rejected": -0.9585914611816406, + "logps/chosen": -380.42291259765625, + "logps/rejected": -723.8530883789062, + "loss": 0.1679, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.15061721205711365, + "rewards/margins": 0.342263400554657, + "rewards/rejected": -0.492880642414093, + "step": 600 + }, + { + "epoch": 0.08, + "learning_rate": 4.066666666666667e-06, + "logits/chosen": -1.3731095790863037, + "logits/rejected": -1.0338678359985352, + "logps/chosen": -412.2789001464844, + "logps/rejected": -753.2153930664062, + "loss": 0.1668, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.19600458443164825, + "rewards/margins": 0.33788540959358215, + "rewards/rejected": -0.5338899493217468, + "step": 610 + }, + { + "epoch": 0.08, + "learning_rate": 4.133333333333333e-06, + "logits/chosen": -1.396970510482788, + "logits/rejected": -0.7782880067825317, + "logps/chosen": -517.6131591796875, + "logps/rejected": -849.923828125, + "loss": 0.1735, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.22774609923362732, + "rewards/margins": 0.35508590936660767, + "rewards/rejected": -0.5828320384025574, + "step": 620 + }, + { + "epoch": 0.08, + "learning_rate": 4.2000000000000004e-06, + "logits/chosen": -1.1911531686782837, + "logits/rejected": -0.7422036528587341, + "logps/chosen": -549.0075073242188, + "logps/rejected": -880.8527221679688, + "loss": 0.1548, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.3094066083431244, + "rewards/margins": 0.3175147473812103, + "rewards/rejected": -0.6269214153289795, + "step": 630 + }, + { + "epoch": 0.09, + "learning_rate": 4.266666666666668e-06, + "logits/chosen": -1.3914610147476196, + "logits/rejected": -0.7553210854530334, + "logps/chosen": -550.94580078125, + "logps/rejected": -705.796875, + "loss": 0.1998, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.282638818025589, + "rewards/margins": 0.24997854232788086, + "rewards/rejected": -0.5326173901557922, + "step": 640 + }, + { + "epoch": 0.09, + "learning_rate": 4.333333333333334e-06, + "logits/chosen": -1.5466039180755615, + "logits/rejected": -0.8693670034408569, + "logps/chosen": -531.4341430664062, + "logps/rejected": -816.57958984375, + "loss": 0.1839, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.2846863865852356, + "rewards/margins": 0.29854658246040344, + "rewards/rejected": -0.5832329392433167, + "step": 650 + }, + { + "epoch": 0.09, + "learning_rate": 4.4e-06, + "logits/chosen": -1.7242590188980103, + "logits/rejected": -1.2059741020202637, + "logps/chosen": -398.84722900390625, + "logps/rejected": -687.1949462890625, + "loss": 0.1864, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.15967541933059692, + "rewards/margins": 0.31971973180770874, + "rewards/rejected": -0.4793950915336609, + "step": 660 + }, + { + "epoch": 0.09, + "learning_rate": 4.4666666666666665e-06, + "logits/chosen": -1.4636131525039673, + "logits/rejected": -0.9360952377319336, + "logps/chosen": -465.987060546875, + "logps/rejected": -890.33544921875, + "loss": 0.1079, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.211115762591362, + "rewards/margins": 0.4524083137512207, + "rewards/rejected": -0.6635240316390991, + "step": 670 + }, + { + "epoch": 0.09, + "learning_rate": 4.533333333333334e-06, + "logits/chosen": -1.55280339717865, + "logits/rejected": -0.9204519391059875, + "logps/chosen": -589.2633666992188, + "logps/rejected": -837.0107421875, + "loss": 0.1781, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.2600969672203064, + "rewards/margins": 0.3369695544242859, + "rewards/rejected": -0.5970665216445923, + "step": 680 + }, + { + "epoch": 0.09, + "learning_rate": 4.600000000000001e-06, + "logits/chosen": -1.4817430973052979, + "logits/rejected": -0.7725855708122253, + "logps/chosen": -692.3911743164062, + "logps/rejected": -1051.6116943359375, + "loss": 0.1705, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.3202987313270569, + "rewards/margins": 0.4163621962070465, + "rewards/rejected": -0.7366609573364258, + "step": 690 + }, + { + "epoch": 0.09, + "learning_rate": 4.666666666666667e-06, + "logits/chosen": -1.4252475500106812, + "logits/rejected": -0.8196362257003784, + "logps/chosen": -674.8631591796875, + "logps/rejected": -991.6558837890625, + "loss": 0.1071, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.37729111313819885, + "rewards/margins": 0.34081095457077026, + "rewards/rejected": -0.718101978302002, + "step": 700 + }, + { + "epoch": 0.09, + "learning_rate": 4.7333333333333335e-06, + "logits/chosen": -1.3881539106369019, + "logits/rejected": -0.8435044288635254, + "logps/chosen": -610.1460571289062, + "logps/rejected": -940.4801635742188, + "loss": 0.1247, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.3053412437438965, + "rewards/margins": 0.35758185386657715, + "rewards/rejected": -0.6629230976104736, + "step": 710 + }, + { + "epoch": 0.1, + "learning_rate": 4.800000000000001e-06, + "logits/chosen": -1.4027553796768188, + "logits/rejected": -0.835735023021698, + "logps/chosen": -514.1181030273438, + "logps/rejected": -825.6218872070312, + "loss": 0.1403, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.24213871359825134, + "rewards/margins": 0.3390628397464752, + "rewards/rejected": -0.581201434135437, + "step": 720 + }, + { + "epoch": 0.1, + "learning_rate": 4.866666666666667e-06, + "logits/chosen": -1.5231796503067017, + "logits/rejected": -0.9179169535636902, + "logps/chosen": -494.64447021484375, + "logps/rejected": -796.8231201171875, + "loss": 0.1334, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.21210741996765137, + "rewards/margins": 0.3426913917064667, + "rewards/rejected": -0.5547988414764404, + "step": 730 + }, + { + "epoch": 0.1, + "learning_rate": 4.933333333333334e-06, + "logits/chosen": -1.555768609046936, + "logits/rejected": -0.8261078000068665, + "logps/chosen": -448.23956298828125, + "logps/rejected": -778.2855834960938, + "loss": 0.0803, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.17634207010269165, + "rewards/margins": 0.4066527485847473, + "rewards/rejected": -0.582994818687439, + "step": 740 + }, + { + "epoch": 0.1, + "learning_rate": 5e-06, + "logits/chosen": -1.5711801052093506, + "logits/rejected": -1.0917423963546753, + "logps/chosen": -538.0059814453125, + "logps/rejected": -896.7288818359375, + "loss": 0.1498, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.23624293506145477, + "rewards/margins": 0.36364543437957764, + "rewards/rejected": -0.5998883843421936, + "step": 750 + }, + { + "epoch": 0.1, + "learning_rate": 4.999972922944898e-06, + "logits/chosen": -1.443225383758545, + "logits/rejected": -0.8622598648071289, + "logps/chosen": -394.50592041015625, + "logps/rejected": -769.7340087890625, + "loss": 0.0854, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.13901114463806152, + "rewards/margins": 0.40866619348526, + "rewards/rejected": -0.5476772785186768, + "step": 760 + }, + { + "epoch": 0.1, + "learning_rate": 4.999891692366121e-06, + "logits/chosen": -1.4831535816192627, + "logits/rejected": -1.0045164823532104, + "logps/chosen": -486.68145751953125, + "logps/rejected": -841.0699462890625, + "loss": 0.1678, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.22009320557117462, + "rewards/margins": 0.36181217432022095, + "rewards/rejected": -0.5819053649902344, + "step": 770 + }, + { + "epoch": 0.1, + "learning_rate": 4.999756310023261e-06, + "logits/chosen": -1.7487876415252686, + "logits/rejected": -1.058393955230713, + "logps/chosen": -549.0352783203125, + "logps/rejected": -802.88671875, + "loss": 0.1662, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.24724209308624268, + "rewards/margins": 0.32394474744796753, + "rewards/rejected": -0.5711868405342102, + "step": 780 + }, + { + "epoch": 0.11, + "learning_rate": 4.99956677884892e-06, + "logits/chosen": -1.243823766708374, + "logits/rejected": -1.0226433277130127, + "logps/chosen": -490.24554443359375, + "logps/rejected": -882.51611328125, + "loss": 0.132, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.2924804091453552, + "rewards/margins": 0.3418129086494446, + "rewards/rejected": -0.6342933773994446, + "step": 790 + }, + { + "epoch": 0.11, + "learning_rate": 4.999323102948655e-06, + "logits/chosen": -1.3914577960968018, + "logits/rejected": -0.9622076153755188, + "logps/chosen": -560.9547119140625, + "logps/rejected": -822.1790161132812, + "loss": 0.2206, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.3030504584312439, + "rewards/margins": 0.3117767572402954, + "rewards/rejected": -0.6148272752761841, + "step": 800 + }, + { + "epoch": 0.11, + "learning_rate": 4.999025287600886e-06, + "logits/chosen": -1.7494767904281616, + "logits/rejected": -1.1633926630020142, + "logps/chosen": -481.3182678222656, + "logps/rejected": -820.7761840820312, + "loss": 0.1194, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.14612558484077454, + "rewards/margins": 0.40427789092063904, + "rewards/rejected": -0.5504035353660583, + "step": 810 + }, + { + "epoch": 0.11, + "learning_rate": 4.998673339256785e-06, + "logits/chosen": -1.7987353801727295, + "logits/rejected": -1.194678544998169, + "logps/chosen": -485.6026916503906, + "logps/rejected": -903.0382690429688, + "loss": 0.106, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.20885272324085236, + "rewards/margins": 0.3845108449459076, + "rewards/rejected": -0.5933635830879211, + "step": 820 + }, + { + "epoch": 0.11, + "learning_rate": 4.99826726554013e-06, + "logits/chosen": -1.6719648838043213, + "logits/rejected": -1.136561393737793, + "logps/chosen": -405.9174499511719, + "logps/rejected": -852.544921875, + "loss": 0.097, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.15679454803466797, + "rewards/margins": 0.4471038281917572, + "rewards/rejected": -0.6038983464241028, + "step": 830 + }, + { + "epoch": 0.11, + "learning_rate": 4.997807075247147e-06, + "logits/chosen": -1.4109543561935425, + "logits/rejected": -0.9229291081428528, + "logps/chosen": -410.027587890625, + "logps/rejected": -742.806884765625, + "loss": 0.1399, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.13022413849830627, + "rewards/margins": 0.38399913907051086, + "rewards/rejected": -0.5142232179641724, + "step": 840 + }, + { + "epoch": 0.11, + "learning_rate": 4.997292778346312e-06, + "logits/chosen": -1.8231170177459717, + "logits/rejected": -1.1286346912384033, + "logps/chosen": -409.29254150390625, + "logps/rejected": -768.8543701171875, + "loss": 0.1378, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.13088057935237885, + "rewards/margins": 0.37201347947120667, + "rewards/rejected": -0.5028941035270691, + "step": 850 + }, + { + "epoch": 0.11, + "learning_rate": 4.996724385978142e-06, + "logits/chosen": -1.8869422674179077, + "logits/rejected": -1.3146274089813232, + "logps/chosen": -461.17913818359375, + "logps/rejected": -739.4716186523438, + "loss": 0.1808, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.15798479318618774, + "rewards/margins": 0.3153396248817444, + "rewards/rejected": -0.4733244478702545, + "step": 860 + }, + { + "epoch": 0.12, + "learning_rate": 4.996101910454953e-06, + "logits/chosen": -1.6006109714508057, + "logits/rejected": -0.9469090700149536, + "logps/chosen": -449.0116271972656, + "logps/rejected": -766.9967651367188, + "loss": 0.1518, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.14297400414943695, + "rewards/margins": 0.36727023124694824, + "rewards/rejected": -0.5102442502975464, + "step": 870 + }, + { + "epoch": 0.12, + "learning_rate": 4.995425365260585e-06, + "logits/chosen": -1.4800479412078857, + "logits/rejected": -0.9261878728866577, + "logps/chosen": -470.6463317871094, + "logps/rejected": -783.7489624023438, + "loss": 0.1492, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.18247491121292114, + "rewards/margins": 0.3636634945869446, + "rewards/rejected": -0.546138346195221, + "step": 880 + }, + { + "epoch": 0.12, + "learning_rate": 4.994694765050121e-06, + "logits/chosen": -1.275754451751709, + "logits/rejected": -0.8973702192306519, + "logps/chosen": -363.8408508300781, + "logps/rejected": -709.4940185546875, + "loss": 0.1327, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1413012444972992, + "rewards/margins": 0.36497193574905396, + "rewards/rejected": -0.5062731504440308, + "step": 890 + }, + { + "epoch": 0.12, + "learning_rate": 4.993910125649561e-06, + "logits/chosen": -1.463733434677124, + "logits/rejected": -1.1680874824523926, + "logps/chosen": -325.5857849121094, + "logps/rejected": -539.0804443359375, + "loss": 0.2243, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.13075824081897736, + "rewards/margins": 0.24207767844200134, + "rewards/rejected": -0.3728359341621399, + "step": 900 + }, + { + "epoch": 0.12, + "learning_rate": 4.993071464055486e-06, + "logits/chosen": -1.7845207452774048, + "logits/rejected": -1.0263665914535522, + "logps/chosen": -360.773193359375, + "logps/rejected": -632.8513793945312, + "loss": 0.2036, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.11185532808303833, + "rewards/margins": 0.32573041319847107, + "rewards/rejected": -0.4375857412815094, + "step": 910 + }, + { + "epoch": 0.12, + "learning_rate": 4.992178798434684e-06, + "logits/chosen": -1.693821668624878, + "logits/rejected": -1.128700852394104, + "logps/chosen": -323.9156494140625, + "logps/rejected": -669.1837158203125, + "loss": 0.155, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.07970709353685379, + "rewards/margins": 0.3461817800998688, + "rewards/rejected": -0.42588886618614197, + "step": 920 + }, + { + "epoch": 0.12, + "learning_rate": 4.9912321481237616e-06, + "logits/chosen": -1.5555380582809448, + "logits/rejected": -0.9651791453361511, + "logps/chosen": -371.4592590332031, + "logps/rejected": -629.7036743164062, + "loss": 0.1393, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.11751563847064972, + "rewards/margins": 0.30828914046287537, + "rewards/rejected": -0.4258047640323639, + "step": 930 + }, + { + "epoch": 0.13, + "learning_rate": 4.990231533628719e-06, + "logits/chosen": -1.428524136543274, + "logits/rejected": -0.9469925165176392, + "logps/chosen": -374.8126220703125, + "logps/rejected": -788.1290283203125, + "loss": 0.1347, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.12096371501684189, + "rewards/margins": 0.4053443372249603, + "rewards/rejected": -0.5263080596923828, + "step": 940 + }, + { + "epoch": 0.13, + "learning_rate": 4.989176976624511e-06, + "logits/chosen": -1.436598539352417, + "logits/rejected": -0.9486812353134155, + "logps/chosen": -430.09332275390625, + "logps/rejected": -847.3265380859375, + "loss": 0.0798, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.12556599080562592, + "rewards/margins": 0.45097970962524414, + "rewards/rejected": -0.5765457153320312, + "step": 950 + }, + { + "epoch": 0.13, + "learning_rate": 4.988068499954578e-06, + "logits/chosen": -1.656904935836792, + "logits/rejected": -0.7772586941719055, + "logps/chosen": -555.0128784179688, + "logps/rejected": -860.6650390625, + "loss": 0.1112, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.20253200829029083, + "rewards/margins": 0.4072926938533783, + "rewards/rejected": -0.6098247170448303, + "step": 960 + }, + { + "epoch": 0.13, + "learning_rate": 4.986906127630346e-06, + "logits/chosen": -1.301117181777954, + "logits/rejected": -0.9449092149734497, + "logps/chosen": -494.9105529785156, + "logps/rejected": -889.8714599609375, + "loss": 0.1113, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.22420816123485565, + "rewards/margins": 0.3903779685497284, + "rewards/rejected": -0.6145861148834229, + "step": 970 + }, + { + "epoch": 0.13, + "learning_rate": 4.985689884830711e-06, + "logits/chosen": -1.3350300788879395, + "logits/rejected": -0.9063273668289185, + "logps/chosen": -417.68878173828125, + "logps/rejected": -705.7192993164062, + "loss": 0.1779, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.22058455646038055, + "rewards/margins": 0.28054580092430115, + "rewards/rejected": -0.5011304020881653, + "step": 980 + }, + { + "epoch": 0.13, + "learning_rate": 4.984419797901491e-06, + "logits/chosen": -1.6169216632843018, + "logits/rejected": -0.7380484938621521, + "logps/chosen": -452.4661560058594, + "logps/rejected": -739.5595703125, + "loss": 0.1124, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.1508241593837738, + "rewards/margins": 0.3955535888671875, + "rewards/rejected": -0.5463777780532837, + "step": 990 + }, + { + "epoch": 0.13, + "learning_rate": 4.983095894354858e-06, + "logits/chosen": -1.2634952068328857, + "logits/rejected": -1.00887930393219, + "logps/chosen": -380.5688781738281, + "logps/rejected": -815.0224609375, + "loss": 0.1, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.13798674941062927, + "rewards/margins": 0.43448323011398315, + "rewards/rejected": -0.5724700093269348, + "step": 1000 + }, + { + "epoch": 0.13, + "learning_rate": 4.981718202868738e-06, + "logits/chosen": -1.3051574230194092, + "logits/rejected": -0.9665368795394897, + "logps/chosen": -449.41912841796875, + "logps/rejected": -758.8711547851562, + "loss": 0.1449, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.17802168428897858, + "rewards/margins": 0.330491304397583, + "rewards/rejected": -0.5085129737854004, + "step": 1010 + }, + { + "epoch": 0.14, + "learning_rate": 4.980286753286196e-06, + "logits/chosen": -1.7392864227294922, + "logits/rejected": -0.7400856614112854, + "logps/chosen": -542.4926147460938, + "logps/rejected": -784.757568359375, + "loss": 0.1234, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.1896502673625946, + "rewards/margins": 0.3813712000846863, + "rewards/rejected": -0.5710214376449585, + "step": 1020 + }, + { + "epoch": 0.14, + "learning_rate": 4.978801576614779e-06, + "logits/chosen": -1.7218875885009766, + "logits/rejected": -1.2126208543777466, + "logps/chosen": -534.4451293945312, + "logps/rejected": -855.2717895507812, + "loss": 0.1815, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.22779861092567444, + "rewards/margins": 0.35777026414871216, + "rewards/rejected": -0.5855687856674194, + "step": 1030 + }, + { + "epoch": 0.14, + "learning_rate": 4.97726270502586e-06, + "logits/chosen": -1.4519014358520508, + "logits/rejected": -1.0071544647216797, + "logps/chosen": -378.66204833984375, + "logps/rejected": -685.9154663085938, + "loss": 0.1499, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.1705239862203598, + "rewards/margins": 0.3545297682285309, + "rewards/rejected": -0.5250537991523743, + "step": 1040 + }, + { + "epoch": 0.14, + "learning_rate": 4.975670171853926e-06, + "logits/chosen": -1.462651252746582, + "logits/rejected": -1.1279051303863525, + "logps/chosen": -422.732666015625, + "logps/rejected": -821.091796875, + "loss": 0.1274, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.20711009204387665, + "rewards/margins": 0.3589819073677063, + "rewards/rejected": -0.5660920143127441, + "step": 1050 + }, + { + "epoch": 0.14, + "learning_rate": 4.974024011595864e-06, + "logits/chosen": -1.5038468837738037, + "logits/rejected": -0.8036036491394043, + "logps/chosen": -486.71124267578125, + "logps/rejected": -806.163330078125, + "loss": 0.1787, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.2330636978149414, + "rewards/margins": 0.34660226106643677, + "rewards/rejected": -0.5796659588813782, + "step": 1060 + }, + { + "epoch": 0.14, + "learning_rate": 4.97232425991021e-06, + "logits/chosen": -1.6467853784561157, + "logits/rejected": -0.9985973238945007, + "logps/chosen": -512.62548828125, + "logps/rejected": -888.6848754882812, + "loss": 0.0914, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.21965539455413818, + "rewards/margins": 0.3955782353878021, + "rewards/rejected": -0.6152336597442627, + "step": 1070 + }, + { + "epoch": 0.14, + "learning_rate": 4.970570953616383e-06, + "logits/chosen": -1.4452916383743286, + "logits/rejected": -0.927303671836853, + "logps/chosen": -471.254638671875, + "logps/rejected": -731.012939453125, + "loss": 0.1654, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.22571448981761932, + "rewards/margins": 0.2892727255821228, + "rewards/rejected": -0.5149871706962585, + "step": 1080 + }, + { + "epoch": 0.15, + "learning_rate": 4.9687641306938766e-06, + "logits/chosen": -1.5185739994049072, + "logits/rejected": -1.169559359550476, + "logps/chosen": -446.5347595214844, + "logps/rejected": -802.6253051757812, + "loss": 0.1374, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.22027269005775452, + "rewards/margins": 0.3299613296985626, + "rewards/rejected": -0.5502340197563171, + "step": 1090 + }, + { + "epoch": 0.15, + "learning_rate": 4.966903830281449e-06, + "logits/chosen": -1.5317347049713135, + "logits/rejected": -0.922308087348938, + "logps/chosen": -376.71490478515625, + "logps/rejected": -660.5279541015625, + "loss": 0.1419, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.14628192782402039, + "rewards/margins": 0.3379586637020111, + "rewards/rejected": -0.4842405915260315, + "step": 1100 + }, + { + "epoch": 0.15, + "learning_rate": 4.964990092676263e-06, + "logits/chosen": -1.753617525100708, + "logits/rejected": -1.197157621383667, + "logps/chosen": -461.98699951171875, + "logps/rejected": -775.8490600585938, + "loss": 0.1432, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.17882071435451508, + "rewards/margins": 0.3258904814720154, + "rewards/rejected": -0.5047112703323364, + "step": 1110 + }, + { + "epoch": 0.15, + "learning_rate": 4.9630229593330226e-06, + "logits/chosen": -1.414660930633545, + "logits/rejected": -1.0446290969848633, + "logps/chosen": -500.90802001953125, + "logps/rejected": -849.2281494140625, + "loss": 0.1148, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.23089566826820374, + "rewards/margins": 0.3633985221385956, + "rewards/rejected": -0.5942941904067993, + "step": 1120 + }, + { + "epoch": 0.15, + "learning_rate": 4.96100247286307e-06, + "logits/chosen": -1.594772219657898, + "logits/rejected": -0.9971159100532532, + "logps/chosen": -482.41131591796875, + "logps/rejected": -744.8736572265625, + "loss": 0.1841, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.19598089158535004, + "rewards/margins": 0.3241138756275177, + "rewards/rejected": -0.5200947523117065, + "step": 1130 + }, + { + "epoch": 0.15, + "learning_rate": 4.958928677033465e-06, + "logits/chosen": -1.541329264640808, + "logits/rejected": -1.0629053115844727, + "logps/chosen": -504.20794677734375, + "logps/rejected": -874.9547729492188, + "loss": 0.1078, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.2284460961818695, + "rewards/margins": 0.3831626772880554, + "rewards/rejected": -0.6116088032722473, + "step": 1140 + }, + { + "epoch": 0.15, + "learning_rate": 4.956801616766033e-06, + "logits/chosen": -1.495168924331665, + "logits/rejected": -0.9355745315551758, + "logps/chosen": -517.480224609375, + "logps/rejected": -883.2626953125, + "loss": 0.1319, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.2589607536792755, + "rewards/margins": 0.37442725896835327, + "rewards/rejected": -0.6333879828453064, + "step": 1150 + }, + { + "epoch": 0.15, + "learning_rate": 4.954621338136399e-06, + "logits/chosen": -1.3089743852615356, + "logits/rejected": -0.8916622996330261, + "logps/chosen": -547.32666015625, + "logps/rejected": -806.1666870117188, + "loss": 0.167, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.2952585518360138, + "rewards/margins": 0.29442209005355835, + "rewards/rejected": -0.5896806120872498, + "step": 1160 + }, + { + "epoch": 0.16, + "learning_rate": 4.9523878883729794e-06, + "logits/chosen": -1.5897184610366821, + "logits/rejected": -1.0826232433319092, + "logps/chosen": -606.2791748046875, + "logps/rejected": -918.5662231445312, + "loss": 0.1267, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.32889145612716675, + "rewards/margins": 0.3194637894630432, + "rewards/rejected": -0.64835524559021, + "step": 1170 + }, + { + "epoch": 0.16, + "learning_rate": 4.95010131585597e-06, + "logits/chosen": -1.5232877731323242, + "logits/rejected": -0.9521480798721313, + "logps/chosen": -433.63641357421875, + "logps/rejected": -817.1611938476562, + "loss": 0.1077, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.2032690793275833, + "rewards/margins": 0.39957109093666077, + "rewards/rejected": -0.6028401851654053, + "step": 1180 + }, + { + "epoch": 0.16, + "learning_rate": 4.94776167011629e-06, + "logits/chosen": -1.4146318435668945, + "logits/rejected": -1.2307662963867188, + "logps/chosen": -374.317626953125, + "logps/rejected": -775.4901123046875, + "loss": 0.1527, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.20076608657836914, + "rewards/margins": 0.33227279782295227, + "rewards/rejected": -0.5330389738082886, + "step": 1190 + }, + { + "epoch": 0.16, + "learning_rate": 4.9453690018345144e-06, + "logits/chosen": -1.8757169246673584, + "logits/rejected": -1.1736624240875244, + "logps/chosen": -530.3184814453125, + "logps/rejected": -783.8491821289062, + "loss": 0.1378, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.17494268715381622, + "rewards/margins": 0.35382869839668274, + "rewards/rejected": -0.5287714004516602, + "step": 1200 + }, + { + "epoch": 0.16, + "learning_rate": 4.94292336283977e-06, + "logits/chosen": -1.5193207263946533, + "logits/rejected": -0.9281194806098938, + "logps/chosen": -563.0369873046875, + "logps/rejected": -860.80419921875, + "loss": 0.1374, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.25554361939430237, + "rewards/margins": 0.3764990270137787, + "rewards/rejected": -0.6320425868034363, + "step": 1210 + }, + { + "epoch": 0.16, + "learning_rate": 4.940424806108619e-06, + "logits/chosen": -1.1913942098617554, + "logits/rejected": -1.1959201097488403, + "logps/chosen": -644.5512084960938, + "logps/rejected": -949.6307373046875, + "loss": 0.1714, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.4327170252799988, + "rewards/margins": 0.29346293210983276, + "rewards/rejected": -0.7261799573898315, + "step": 1220 + }, + { + "epoch": 0.16, + "learning_rate": 4.937873385763909e-06, + "logits/chosen": -1.3897337913513184, + "logits/rejected": -0.5832411050796509, + "logps/chosen": -696.0330200195312, + "logps/rejected": -1003.8894653320312, + "loss": 0.1041, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.33212608098983765, + "rewards/margins": 0.4552842080593109, + "rewards/rejected": -0.7874102592468262, + "step": 1230 + }, + { + "epoch": 0.17, + "learning_rate": 4.935269157073597e-06, + "logits/chosen": -1.3500540256500244, + "logits/rejected": -0.8013589978218079, + "logps/chosen": -672.7722778320312, + "logps/rejected": -932.7517700195312, + "loss": 0.1232, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.4343793988227844, + "rewards/margins": 0.2973952293395996, + "rewards/rejected": -0.731774628162384, + "step": 1240 + }, + { + "epoch": 0.17, + "learning_rate": 4.93261217644956e-06, + "logits/chosen": -1.6776320934295654, + "logits/rejected": -1.179521918296814, + "logps/chosen": -562.4392700195312, + "logps/rejected": -992.1153564453125, + "loss": 0.1318, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.29206374287605286, + "rewards/margins": 0.37819522619247437, + "rewards/rejected": -0.6702588796615601, + "step": 1250 + }, + { + "epoch": 0.17, + "learning_rate": 4.9299025014463665e-06, + "logits/chosen": -1.5510917901992798, + "logits/rejected": -1.1057939529418945, + "logps/chosen": -603.5531005859375, + "logps/rejected": -955.1824951171875, + "loss": 0.1113, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.3010428547859192, + "rewards/margins": 0.3675137460231781, + "rewards/rejected": -0.6685565710067749, + "step": 1260 + }, + { + "epoch": 0.17, + "learning_rate": 4.92714019076003e-06, + "logits/chosen": -1.5722512006759644, + "logits/rejected": -0.7237231135368347, + "logps/chosen": -585.212890625, + "logps/rejected": -873.8582153320312, + "loss": 0.1326, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.30967992544174194, + "rewards/margins": 0.38483959436416626, + "rewards/rejected": -0.6945194602012634, + "step": 1270 + }, + { + "epoch": 0.17, + "learning_rate": 4.924325304226745e-06, + "logits/chosen": -1.391479730606079, + "logits/rejected": -0.8646553754806519, + "logps/chosen": -601.2059936523438, + "logps/rejected": -808.3594970703125, + "loss": 0.2136, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.3519865870475769, + "rewards/margins": 0.26446038484573364, + "rewards/rejected": -0.6164470314979553, + "step": 1280 + }, + { + "epoch": 0.17, + "learning_rate": 4.921457902821578e-06, + "logits/chosen": -1.5498051643371582, + "logits/rejected": -0.9521937370300293, + "logps/chosen": -450.6568908691406, + "logps/rejected": -783.283203125, + "loss": 0.114, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.1782601922750473, + "rewards/margins": 0.3781951367855072, + "rewards/rejected": -0.5564553737640381, + "step": 1290 + }, + { + "epoch": 0.17, + "learning_rate": 4.91853804865716e-06, + "logits/chosen": -1.7239599227905273, + "logits/rejected": -0.9669686555862427, + "logps/chosen": -506.9222106933594, + "logps/rejected": -948.07421875, + "loss": 0.0635, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.22643208503723145, + "rewards/margins": 0.4678976535797119, + "rewards/rejected": -0.6943297386169434, + "step": 1300 + }, + { + "epoch": 0.17, + "learning_rate": 4.915565804982332e-06, + "logits/chosen": -1.7978935241699219, + "logits/rejected": -1.182894229888916, + "logps/chosen": -494.53582763671875, + "logps/rejected": -797.4810791015625, + "loss": 0.197, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.2301880419254303, + "rewards/margins": 0.3509904742240906, + "rewards/rejected": -0.5811785459518433, + "step": 1310 + }, + { + "epoch": 0.18, + "learning_rate": 4.912541236180779e-06, + "logits/chosen": -1.486143708229065, + "logits/rejected": -1.1926538944244385, + "logps/chosen": -456.87127685546875, + "logps/rejected": -752.5570678710938, + "loss": 0.1695, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.24015769362449646, + "rewards/margins": 0.29379063844680786, + "rewards/rejected": -0.5339483618736267, + "step": 1320 + }, + { + "epoch": 0.18, + "learning_rate": 4.909464407769633e-06, + "logits/chosen": -1.9261629581451416, + "logits/rejected": -1.1128313541412354, + "logps/chosen": -486.73992919921875, + "logps/rejected": -843.7420043945312, + "loss": 0.0968, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.22261467576026917, + "rewards/margins": 0.41170936822891235, + "rewards/rejected": -0.6343240737915039, + "step": 1330 + }, + { + "epoch": 0.18, + "learning_rate": 4.9063353863980565e-06, + "logits/chosen": -1.572691559791565, + "logits/rejected": -1.1048139333724976, + "logps/chosen": -556.69677734375, + "logps/rejected": -925.6715698242188, + "loss": 0.1952, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.3395165205001831, + "rewards/margins": 0.3469579815864563, + "rewards/rejected": -0.6864745020866394, + "step": 1340 + }, + { + "epoch": 0.18, + "learning_rate": 4.903154239845798e-06, + "logits/chosen": -1.7576971054077148, + "logits/rejected": -1.2093678712844849, + "logps/chosen": -494.8274841308594, + "logps/rejected": -986.6290283203125, + "loss": 0.1029, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.23781287670135498, + "rewards/margins": 0.46980786323547363, + "rewards/rejected": -0.7076207399368286, + "step": 1350 + }, + { + "epoch": 0.18, + "learning_rate": 4.899921037021719e-06, + "logits/chosen": -1.593591332435608, + "logits/rejected": -1.111011266708374, + "logps/chosen": -531.3776245117188, + "logps/rejected": -887.5836181640625, + "loss": 0.1081, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.26948198676109314, + "rewards/margins": 0.4102948307991028, + "rewards/rejected": -0.6797767877578735, + "step": 1360 + }, + { + "epoch": 0.18, + "learning_rate": 4.896635847962311e-06, + "logits/chosen": -1.5916521549224854, + "logits/rejected": -1.0768035650253296, + "logps/chosen": -480.07305908203125, + "logps/rejected": -845.6593627929688, + "loss": 0.0772, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.2070520669221878, + "rewards/margins": 0.4083788990974426, + "rewards/rejected": -0.6154308915138245, + "step": 1370 + }, + { + "epoch": 0.18, + "learning_rate": 4.893298743830168e-06, + "logits/chosen": -1.479468822479248, + "logits/rejected": -1.1445562839508057, + "logps/chosen": -425.95770263671875, + "logps/rejected": -745.1259765625, + "loss": 0.168, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.21790972352027893, + "rewards/margins": 0.3140547573566437, + "rewards/rejected": -0.5319644808769226, + "step": 1380 + }, + { + "epoch": 0.19, + "learning_rate": 4.889909796912454e-06, + "logits/chosen": -1.646630883216858, + "logits/rejected": -0.9626755714416504, + "logps/chosen": -495.00518798828125, + "logps/rejected": -861.8333740234375, + "loss": 0.1344, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.24911876022815704, + "rewards/margins": 0.4423271715641022, + "rewards/rejected": -0.6914458870887756, + "step": 1390 + }, + { + "epoch": 0.19, + "learning_rate": 4.88646908061933e-06, + "logits/chosen": -1.7141090631484985, + "logits/rejected": -1.0960562229156494, + "logps/chosen": -524.4849853515625, + "logps/rejected": -900.6906127929688, + "loss": 0.158, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.23685124516487122, + "rewards/margins": 0.42406564950942993, + "rewards/rejected": -0.660916805267334, + "step": 1400 + }, + { + "epoch": 0.19, + "learning_rate": 4.882976669482368e-06, + "logits/chosen": -1.5529906749725342, + "logits/rejected": -1.1421276330947876, + "logps/chosen": -408.2734375, + "logps/rejected": -759.225341796875, + "loss": 0.1603, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.1565893143415451, + "rewards/margins": 0.382378488779068, + "rewards/rejected": -0.5389677882194519, + "step": 1410 + }, + { + "epoch": 0.19, + "learning_rate": 4.879432639152935e-06, + "logits/chosen": -1.637101173400879, + "logits/rejected": -1.1991854906082153, + "logps/chosen": -459.44793701171875, + "logps/rejected": -865.1060791015625, + "loss": 0.1332, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.20239762961864471, + "rewards/margins": 0.38929039239883423, + "rewards/rejected": -0.5916879177093506, + "step": 1420 + }, + { + "epoch": 0.19, + "learning_rate": 4.875837066400553e-06, + "logits/chosen": -1.6581170558929443, + "logits/rejected": -1.0959937572479248, + "logps/chosen": -405.64141845703125, + "logps/rejected": -753.9737548828125, + "loss": 0.1626, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.16078518331050873, + "rewards/margins": 0.3797248899936676, + "rewards/rejected": -0.5405100584030151, + "step": 1430 + }, + { + "epoch": 0.19, + "learning_rate": 4.8721900291112415e-06, + "logits/chosen": -1.581157922744751, + "logits/rejected": -1.0542665719985962, + "logps/chosen": -461.44122314453125, + "logps/rejected": -759.1900634765625, + "loss": 0.1771, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.21266725659370422, + "rewards/margins": 0.32838940620422363, + "rewards/rejected": -0.5410566329956055, + "step": 1440 + }, + { + "epoch": 0.19, + "learning_rate": 4.868491606285823e-06, + "logits/chosen": -1.7201025485992432, + "logits/rejected": -1.107848048210144, + "logps/chosen": -427.12225341796875, + "logps/rejected": -793.6038208007812, + "loss": 0.1358, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.144453227519989, + "rewards/margins": 0.3804222643375397, + "rewards/rejected": -0.5248754620552063, + "step": 1450 + }, + { + "epoch": 0.19, + "learning_rate": 4.864741878038218e-06, + "logits/chosen": -1.6883983612060547, + "logits/rejected": -1.1231297254562378, + "logps/chosen": -460.866943359375, + "logps/rejected": -855.5372924804688, + "loss": 0.1019, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.16945675015449524, + "rewards/margins": 0.4335222840309143, + "rewards/rejected": -0.6029790639877319, + "step": 1460 + }, + { + "epoch": 0.2, + "learning_rate": 4.860940925593703e-06, + "logits/chosen": -1.944753885269165, + "logits/rejected": -1.1425764560699463, + "logps/chosen": -432.50830078125, + "logps/rejected": -708.9544067382812, + "loss": 0.1625, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.14104752242565155, + "rewards/margins": 0.3777088224887848, + "rewards/rejected": -0.5187563896179199, + "step": 1470 + }, + { + "epoch": 0.2, + "learning_rate": 4.857088831287158e-06, + "logits/chosen": -1.645452857017517, + "logits/rejected": -1.108695149421692, + "logps/chosen": -394.0192565917969, + "logps/rejected": -793.5757446289062, + "loss": 0.1427, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.1653439849615097, + "rewards/margins": 0.4196528494358063, + "rewards/rejected": -0.5849968194961548, + "step": 1480 + }, + { + "epoch": 0.2, + "learning_rate": 4.85318567856128e-06, + "logits/chosen": -1.715916633605957, + "logits/rejected": -1.1867271661758423, + "logps/chosen": -391.3207702636719, + "logps/rejected": -817.5634155273438, + "loss": 0.1369, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.13050571084022522, + "rewards/margins": 0.3892734944820404, + "rewards/rejected": -0.5197792649269104, + "step": 1490 + }, + { + "epoch": 0.2, + "learning_rate": 4.849231551964771e-06, + "logits/chosen": -1.7923057079315186, + "logits/rejected": -1.1524606943130493, + "logps/chosen": -476.6368103027344, + "logps/rejected": -782.6160278320312, + "loss": 0.1402, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.21068593859672546, + "rewards/margins": 0.3372471332550049, + "rewards/rejected": -0.5479329824447632, + "step": 1500 + }, + { + "epoch": 0.2, + "learning_rate": 4.8452265371505176e-06, + "logits/chosen": -1.6856279373168945, + "logits/rejected": -1.022018313407898, + "logps/chosen": -529.304443359375, + "logps/rejected": -827.2320556640625, + "loss": 0.1235, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.21702179312705994, + "rewards/margins": 0.35849815607070923, + "rewards/rejected": -0.5755199193954468, + "step": 1510 + }, + { + "epoch": 0.2, + "learning_rate": 4.841170720873723e-06, + "logits/chosen": -1.4261338710784912, + "logits/rejected": -1.0991995334625244, + "logps/chosen": -479.01416015625, + "logps/rejected": -837.9700317382812, + "loss": 0.1442, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.2580581605434418, + "rewards/margins": 0.31764236092567444, + "rewards/rejected": -0.575700581073761, + "step": 1520 + }, + { + "epoch": 0.2, + "learning_rate": 4.837064190990036e-06, + "logits/chosen": -1.6842906475067139, + "logits/rejected": -1.2078498601913452, + "logps/chosen": -509.49359130859375, + "logps/rejected": -809.8397216796875, + "loss": 0.1369, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.27093303203582764, + "rewards/margins": 0.3115740120410919, + "rewards/rejected": -0.5825070142745972, + "step": 1530 + }, + { + "epoch": 0.21, + "learning_rate": 4.832907036453647e-06, + "logits/chosen": -1.6339679956436157, + "logits/rejected": -1.1635912656784058, + "logps/chosen": -478.7958984375, + "logps/rejected": -798.2124633789062, + "loss": 0.1894, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.22973430156707764, + "rewards/margins": 0.3353918194770813, + "rewards/rejected": -0.5651262402534485, + "step": 1540 + }, + { + "epoch": 0.21, + "learning_rate": 4.828699347315357e-06, + "logits/chosen": -1.6856197118759155, + "logits/rejected": -1.1501435041427612, + "logps/chosen": -434.57763671875, + "logps/rejected": -643.5542602539062, + "loss": 0.1656, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.16013285517692566, + "rewards/margins": 0.29741623997688293, + "rewards/rejected": -0.4575490951538086, + "step": 1550 + }, + { + "epoch": 0.21, + "learning_rate": 4.824441214720629e-06, + "logits/chosen": -1.6989209651947021, + "logits/rejected": -1.1924196481704712, + "logps/chosen": -405.2452697753906, + "logps/rejected": -653.5948486328125, + "loss": 0.1908, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.18102996051311493, + "rewards/margins": 0.27397722005844116, + "rewards/rejected": -0.4550072252750397, + "step": 1560 + }, + { + "epoch": 0.21, + "learning_rate": 4.8201327309076176e-06, + "logits/chosen": -1.582778811454773, + "logits/rejected": -1.004863977432251, + "logps/chosen": -476.71990966796875, + "logps/rejected": -729.3921508789062, + "loss": 0.1627, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.20964249968528748, + "rewards/margins": 0.2946922183036804, + "rewards/rejected": -0.5043348073959351, + "step": 1570 + }, + { + "epoch": 0.21, + "learning_rate": 4.815773989205165e-06, + "logits/chosen": -1.5439220666885376, + "logits/rejected": -1.1240843534469604, + "logps/chosen": -545.4619140625, + "logps/rejected": -922.7950439453125, + "loss": 0.1253, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.29893285036087036, + "rewards/margins": 0.38949456810951233, + "rewards/rejected": -0.6884275078773499, + "step": 1580 + }, + { + "epoch": 0.21, + "learning_rate": 4.811365084030784e-06, + "logits/chosen": -1.7266323566436768, + "logits/rejected": -0.853527843952179, + "logps/chosen": -715.392578125, + "logps/rejected": -1047.7681884765625, + "loss": 0.109, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.3249026834964752, + "rewards/margins": 0.3879953920841217, + "rewards/rejected": -0.7128980159759521, + "step": 1590 + }, + { + "epoch": 0.21, + "learning_rate": 4.806906110888606e-06, + "logits/chosen": -1.355763554573059, + "logits/rejected": -0.7085272669792175, + "logps/chosen": -526.99658203125, + "logps/rejected": -888.4259643554688, + "loss": 0.1378, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.2837275564670563, + "rewards/margins": 0.399003803730011, + "rewards/rejected": -0.6827312707901001, + "step": 1600 + }, + { + "epoch": 0.21, + "learning_rate": 4.8023971663673235e-06, + "logits/chosen": -1.3139488697052002, + "logits/rejected": -0.9653435945510864, + "logps/chosen": -558.3027954101562, + "logps/rejected": -852.5972900390625, + "loss": 0.1833, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.3123556673526764, + "rewards/margins": 0.30963483452796936, + "rewards/rejected": -0.6219905018806458, + "step": 1610 + }, + { + "epoch": 0.22, + "learning_rate": 4.7978383481380865e-06, + "logits/chosen": -1.6188299655914307, + "logits/rejected": -0.9288069605827332, + "logps/chosen": -449.55841064453125, + "logps/rejected": -813.2579345703125, + "loss": 0.1403, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.20635005831718445, + "rewards/margins": 0.38359805941581726, + "rewards/rejected": -0.5899481177330017, + "step": 1620 + }, + { + "epoch": 0.22, + "learning_rate": 4.793229754952393e-06, + "logits/chosen": -1.500043511390686, + "logits/rejected": -0.805435299873352, + "logps/chosen": -551.26171875, + "logps/rejected": -906.537109375, + "loss": 0.0924, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.2886804938316345, + "rewards/margins": 0.3959423899650574, + "rewards/rejected": -0.6846228837966919, + "step": 1630 + }, + { + "epoch": 0.22, + "learning_rate": 4.788571486639948e-06, + "logits/chosen": -1.3259161710739136, + "logits/rejected": -0.6448807716369629, + "logps/chosen": -577.3439331054688, + "logps/rejected": -947.9227294921875, + "loss": 0.113, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.32035812735557556, + "rewards/margins": 0.4091481566429138, + "rewards/rejected": -0.7295061945915222, + "step": 1640 + }, + { + "epoch": 0.22, + "learning_rate": 4.783863644106502e-06, + "logits/chosen": -1.44963538646698, + "logits/rejected": -0.7897195219993591, + "logps/chosen": -593.3853759765625, + "logps/rejected": -874.451171875, + "loss": 0.172, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.32261866331100464, + "rewards/margins": 0.33006590604782104, + "rewards/rejected": -0.6526845693588257, + "step": 1650 + }, + { + "epoch": 0.22, + "learning_rate": 4.779106329331665e-06, + "logits/chosen": -1.610004186630249, + "logits/rejected": -0.855453610420227, + "logps/chosen": -560.9100952148438, + "logps/rejected": -858.4417114257812, + "loss": 0.1716, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.2593235373497009, + "rewards/margins": 0.3451550602912903, + "rewards/rejected": -0.6044785380363464, + "step": 1660 + }, + { + "epoch": 0.22, + "learning_rate": 4.774299645366696e-06, + "logits/chosen": -1.5046002864837646, + "logits/rejected": -0.9667149782180786, + "logps/chosen": -527.2569580078125, + "logps/rejected": -902.4674072265625, + "loss": 0.1106, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.27475467324256897, + "rewards/margins": 0.3705506920814514, + "rewards/rejected": -0.645305335521698, + "step": 1670 + }, + { + "epoch": 0.22, + "learning_rate": 4.769443696332272e-06, + "logits/chosen": -1.348170518875122, + "logits/rejected": -0.6280630230903625, + "logps/chosen": -596.0364990234375, + "logps/rejected": -971.1904296875, + "loss": 0.0834, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.2669234573841095, + "rewards/margins": 0.4294804036617279, + "rewards/rejected": -0.6964038610458374, + "step": 1680 + }, + { + "epoch": 0.23, + "learning_rate": 4.764538587416233e-06, + "logits/chosen": -1.2824132442474365, + "logits/rejected": -0.9618009328842163, + "logps/chosen": -526.2830810546875, + "logps/rejected": -815.99951171875, + "loss": 0.1573, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.30147814750671387, + "rewards/margins": 0.2752738893032074, + "rewards/rejected": -0.5767520666122437, + "step": 1690 + }, + { + "epoch": 0.23, + "learning_rate": 4.759584424871302e-06, + "logits/chosen": -1.0407376289367676, + "logits/rejected": -0.5895905494689941, + "logps/chosen": -549.6045532226562, + "logps/rejected": -836.8787231445312, + "loss": 0.1874, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.3209471106529236, + "rewards/margins": 0.32041606307029724, + "rewards/rejected": -0.6413631439208984, + "step": 1700 + }, + { + "epoch": 0.23, + "learning_rate": 4.754581316012785e-06, + "logits/chosen": -1.4987887144088745, + "logits/rejected": -0.8465301394462585, + "logps/chosen": -541.6981201171875, + "logps/rejected": -859.7433471679688, + "loss": 0.1707, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.2668864130973816, + "rewards/margins": 0.3836662769317627, + "rewards/rejected": -0.6505526900291443, + "step": 1710 + }, + { + "epoch": 0.23, + "learning_rate": 4.749529369216246e-06, + "logits/chosen": -1.3862401247024536, + "logits/rejected": -0.8952510952949524, + "logps/chosen": -439.4287109375, + "logps/rejected": -859.2282104492188, + "loss": 0.0808, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.18136066198349, + "rewards/margins": 0.40953293442726135, + "rewards/rejected": -0.5908936262130737, + "step": 1720 + }, + { + "epoch": 0.23, + "learning_rate": 4.744428693915158e-06, + "logits/chosen": -1.365352988243103, + "logits/rejected": -0.8398834466934204, + "logps/chosen": -441.2855529785156, + "logps/rejected": -844.7384643554688, + "loss": 0.1138, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.21025002002716064, + "rewards/margins": 0.3863261342048645, + "rewards/rejected": -0.5965762138366699, + "step": 1730 + }, + { + "epoch": 0.23, + "learning_rate": 4.7392794005985324e-06, + "logits/chosen": -1.448047399520874, + "logits/rejected": -0.8834174275398254, + "logps/chosen": -370.3255615234375, + "logps/rejected": -726.6725463867188, + "loss": 0.1543, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.16698488593101501, + "rewards/margins": 0.37776023149490356, + "rewards/rejected": -0.5447450876235962, + "step": 1740 + }, + { + "epoch": 0.23, + "learning_rate": 4.734081600808531e-06, + "logits/chosen": -1.7213389873504639, + "logits/rejected": -0.924578845500946, + "logps/chosen": -447.802490234375, + "logps/rejected": -833.771484375, + "loss": 0.1591, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.15919806063175201, + "rewards/margins": 0.4283002018928528, + "rewards/rejected": -0.5874982476234436, + "step": 1750 + }, + { + "epoch": 0.23, + "learning_rate": 4.7288354071380415e-06, + "logits/chosen": -1.3697175979614258, + "logits/rejected": -0.7719752788543701, + "logps/chosen": -551.510009765625, + "logps/rejected": -905.880859375, + "loss": 0.1401, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.2881290018558502, + "rewards/margins": 0.4137204587459564, + "rewards/rejected": -0.7018495202064514, + "step": 1760 + }, + { + "epoch": 0.24, + "learning_rate": 4.723540933228245e-06, + "logits/chosen": -1.226289987564087, + "logits/rejected": -0.81830233335495, + "logps/chosen": -660.1695556640625, + "logps/rejected": -888.3117065429688, + "loss": 0.1612, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.38691434264183044, + "rewards/margins": 0.27954649925231934, + "rewards/rejected": -0.6664608120918274, + "step": 1770 + }, + { + "epoch": 0.24, + "learning_rate": 4.7181982937661485e-06, + "logits/chosen": -1.4922593832015991, + "logits/rejected": -0.7460156679153442, + "logps/chosen": -632.2621459960938, + "logps/rejected": -992.2732543945312, + "loss": 0.0865, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -0.31351277232170105, + "rewards/margins": 0.4019736647605896, + "rewards/rejected": -0.7154864072799683, + "step": 1780 + }, + { + "epoch": 0.24, + "learning_rate": 4.712807604482108e-06, + "logits/chosen": -1.1576956510543823, + "logits/rejected": -0.8577936291694641, + "logps/chosen": -587.1032104492188, + "logps/rejected": -970.9803466796875, + "loss": 0.1084, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.3230358064174652, + "rewards/margins": 0.38744255900382996, + "rewards/rejected": -0.7104784250259399, + "step": 1790 + }, + { + "epoch": 0.24, + "learning_rate": 4.707368982147318e-06, + "logits/chosen": -1.1999406814575195, + "logits/rejected": -0.9700831174850464, + "logps/chosen": -542.734130859375, + "logps/rejected": -997.7930908203125, + "loss": 0.1223, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.28139543533325195, + "rewards/margins": 0.39619675278663635, + "rewards/rejected": -0.6775921583175659, + "step": 1800 + }, + { + "epoch": 0.24, + "learning_rate": 4.701882544571277e-06, + "logits/chosen": -1.3693358898162842, + "logits/rejected": -0.8610717058181763, + "logps/chosen": -425.99969482421875, + "logps/rejected": -809.958740234375, + "loss": 0.1192, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.2034183293581009, + "rewards/margins": 0.40131768584251404, + "rewards/rejected": -0.6047360301017761, + "step": 1810 + }, + { + "epoch": 0.24, + "learning_rate": 4.696348410599244e-06, + "logits/chosen": -1.654017686843872, + "logits/rejected": -0.8782070279121399, + "logps/chosen": -553.987060546875, + "logps/rejected": -852.6536254882812, + "loss": 0.1298, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.18976715207099915, + "rewards/margins": 0.4038692116737366, + "rewards/rejected": -0.5936363935470581, + "step": 1820 + }, + { + "epoch": 0.24, + "learning_rate": 4.690766700109659e-06, + "logits/chosen": -1.4825284481048584, + "logits/rejected": -0.9183340072631836, + "logps/chosen": -488.19488525390625, + "logps/rejected": -913.3912963867188, + "loss": 0.1293, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.2699379622936249, + "rewards/margins": 0.39716458320617676, + "rewards/rejected": -0.6671024560928345, + "step": 1830 + }, + { + "epoch": 0.25, + "learning_rate": 4.685137534011549e-06, + "logits/chosen": -1.2411205768585205, + "logits/rejected": -0.8965455889701843, + "logps/chosen": -512.8843994140625, + "logps/rejected": -809.3594970703125, + "loss": 0.1386, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.24077479541301727, + "rewards/margins": 0.34711533784866333, + "rewards/rejected": -0.5878901481628418, + "step": 1840 + }, + { + "epoch": 0.25, + "learning_rate": 4.679461034241906e-06, + "logits/chosen": -1.602836012840271, + "logits/rejected": -0.8313294649124146, + "logps/chosen": -574.6096801757812, + "logps/rejected": -949.3818359375, + "loss": 0.1248, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.22495639324188232, + "rewards/margins": 0.43183308839797974, + "rewards/rejected": -0.6567894220352173, + "step": 1850 + }, + { + "epoch": 0.25, + "learning_rate": 4.673737323763048e-06, + "logits/chosen": -1.4966367483139038, + "logits/rejected": -1.098854422569275, + "logps/chosen": -454.702880859375, + "logps/rejected": -771.0753784179688, + "loss": 0.1334, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.15163853764533997, + "rewards/margins": 0.3771851360797882, + "rewards/rejected": -0.5288236737251282, + "step": 1860 + }, + { + "epoch": 0.25, + "learning_rate": 4.667966526559953e-06, + "logits/chosen": -1.396028995513916, + "logits/rejected": -0.7422189712524414, + "logps/chosen": -537.6190185546875, + "logps/rejected": -792.0523071289062, + "loss": 0.1821, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.18353763222694397, + "rewards/margins": 0.3589433431625366, + "rewards/rejected": -0.5424809455871582, + "step": 1870 + }, + { + "epoch": 0.25, + "learning_rate": 4.662148767637578e-06, + "logits/chosen": -1.3878698348999023, + "logits/rejected": -0.8569218516349792, + "logps/chosen": -450.87225341796875, + "logps/rejected": -836.5355224609375, + "loss": 0.1115, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.17101620137691498, + "rewards/margins": 0.4047500491142273, + "rewards/rejected": -0.5757663249969482, + "step": 1880 + }, + { + "epoch": 0.25, + "learning_rate": 4.656284173018144e-06, + "logits/chosen": -1.3037761449813843, + "logits/rejected": -1.0120489597320557, + "logps/chosen": -332.4677429199219, + "logps/rejected": -664.6961669921875, + "loss": 0.2034, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.1323479413986206, + "rewards/margins": 0.31846508383750916, + "rewards/rejected": -0.45081305503845215, + "step": 1890 + }, + { + "epoch": 0.25, + "learning_rate": 4.650372869738415e-06, + "logits/chosen": -1.6151546239852905, + "logits/rejected": -0.8685706257820129, + "logps/chosen": -459.19580078125, + "logps/rejected": -817.8629760742188, + "loss": 0.1344, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.14569053053855896, + "rewards/margins": 0.4179355204105377, + "rewards/rejected": -0.5636261105537415, + "step": 1900 + }, + { + "epoch": 0.25, + "learning_rate": 4.644414985846934e-06, + "logits/chosen": -1.3685284852981567, + "logits/rejected": -0.6978548765182495, + "logps/chosen": -474.23370361328125, + "logps/rejected": -859.4371948242188, + "loss": 0.1026, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.19393648207187653, + "rewards/margins": 0.4319811761379242, + "rewards/rejected": -0.6259176135063171, + "step": 1910 + }, + { + "epoch": 0.26, + "learning_rate": 4.638410650401267e-06, + "logits/chosen": -1.1842671632766724, + "logits/rejected": -0.7414307594299316, + "logps/chosen": -456.84503173828125, + "logps/rejected": -706.1348266601562, + "loss": 0.2247, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.24000349640846252, + "rewards/margins": 0.2802012264728546, + "rewards/rejected": -0.5202046632766724, + "step": 1920 + }, + { + "epoch": 0.26, + "learning_rate": 4.632359993465188e-06, + "logits/chosen": -1.2892394065856934, + "logits/rejected": -0.9212859869003296, + "logps/chosen": -491.70135498046875, + "logps/rejected": -833.9127197265625, + "loss": 0.1346, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.2496131956577301, + "rewards/margins": 0.34575051069259644, + "rewards/rejected": -0.5953637361526489, + "step": 1930 + }, + { + "epoch": 0.26, + "learning_rate": 4.626263146105875e-06, + "logits/chosen": -1.3242267370224, + "logits/rejected": -0.7134020328521729, + "logps/chosen": -457.72332763671875, + "logps/rejected": -758.073486328125, + "loss": 0.1219, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.1769895851612091, + "rewards/margins": 0.3537253141403198, + "rewards/rejected": -0.5307148694992065, + "step": 1940 + }, + { + "epoch": 0.26, + "learning_rate": 4.620120240391065e-06, + "logits/chosen": -1.515119194984436, + "logits/rejected": -0.9801965951919556, + "logps/chosen": -454.9673767089844, + "logps/rejected": -697.6735229492188, + "loss": 0.1353, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.1867002248764038, + "rewards/margins": 0.3209150433540344, + "rewards/rejected": -0.5076152682304382, + "step": 1950 + }, + { + "epoch": 0.26, + "learning_rate": 4.613931409386196e-06, + "logits/chosen": -1.4317365884780884, + "logits/rejected": -1.0157676935195923, + "logps/chosen": -565.1531982421875, + "logps/rejected": -884.1183471679688, + "loss": 0.1517, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.26452139019966125, + "rewards/margins": 0.3587113320827484, + "rewards/rejected": -0.6232327222824097, + "step": 1960 + }, + { + "epoch": 0.26, + "learning_rate": 4.607696787151522e-06, + "logits/chosen": -1.3605797290802002, + "logits/rejected": -0.8848182559013367, + "logps/chosen": -434.24066162109375, + "logps/rejected": -841.8572387695312, + "loss": 0.107, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.2228735238313675, + "rewards/margins": 0.3770410418510437, + "rewards/rejected": -0.5999146103858948, + "step": 1970 + }, + { + "epoch": 0.26, + "learning_rate": 4.601416508739211e-06, + "logits/chosen": -1.5537292957305908, + "logits/rejected": -0.7724756002426147, + "logps/chosen": -470.87945556640625, + "logps/rejected": -794.3897705078125, + "loss": 0.1274, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.18849892914295197, + "rewards/margins": 0.3892095685005188, + "rewards/rejected": -0.5777084827423096, + "step": 1980 + }, + { + "epoch": 0.27, + "learning_rate": 4.595090710190419e-06, + "logits/chosen": -1.3826183080673218, + "logits/rejected": -0.9825338125228882, + "logps/chosen": -487.48529052734375, + "logps/rejected": -996.5350341796875, + "loss": 0.1142, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.23563413321971893, + "rewards/margins": 0.4019049108028412, + "rewards/rejected": -0.6375390291213989, + "step": 1990 + }, + { + "epoch": 0.27, + "learning_rate": 4.588719528532342e-06, + "logits/chosen": -1.4032509326934814, + "logits/rejected": -0.8891399502754211, + "logps/chosen": -607.7633056640625, + "logps/rejected": -910.2550048828125, + "loss": 0.1593, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.2815513014793396, + "rewards/margins": 0.3732183575630188, + "rewards/rejected": -0.6547696590423584, + "step": 2000 + }, + { + "epoch": 0.27, + "learning_rate": 4.582303101775249e-06, + "logits/chosen": -1.3333938121795654, + "logits/rejected": -0.7552350759506226, + "logps/chosen": -540.1254272460938, + "logps/rejected": -880.9000854492188, + "loss": 0.1156, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.27906858921051025, + "rewards/margins": 0.4015675485134125, + "rewards/rejected": -0.6806361675262451, + "step": 2010 + }, + { + "epoch": 0.27, + "learning_rate": 4.575841568909494e-06, + "logits/chosen": -1.3045923709869385, + "logits/rejected": -0.6709948778152466, + "logps/chosen": -446.7435607910156, + "logps/rejected": -876.2479248046875, + "loss": 0.1014, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.16647282242774963, + "rewards/margins": 0.4480370879173279, + "rewards/rejected": -0.6145098805427551, + "step": 2020 + }, + { + "epoch": 0.27, + "learning_rate": 4.569335069902502e-06, + "logits/chosen": -1.4322322607040405, + "logits/rejected": -0.5658160448074341, + "logps/chosen": -449.83154296875, + "logps/rejected": -833.4611206054688, + "loss": 0.1121, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.17377746105194092, + "rewards/margins": 0.4472419321537018, + "rewards/rejected": -0.6210194230079651, + "step": 2030 + }, + { + "epoch": 0.27, + "learning_rate": 4.562783745695738e-06, + "logits/chosen": -1.2313501834869385, + "logits/rejected": -0.840812087059021, + "logps/chosen": -346.85137939453125, + "logps/rejected": -779.2326049804688, + "loss": 0.1071, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.16269828379154205, + "rewards/margins": 0.4053496718406677, + "rewards/rejected": -0.5680479407310486, + "step": 2040 + }, + { + "epoch": 0.27, + "learning_rate": 4.556187738201656e-06, + "logits/chosen": -1.3862967491149902, + "logits/rejected": -0.760252833366394, + "logps/chosen": -532.8507690429688, + "logps/rejected": -826.9495239257812, + "loss": 0.1067, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.2203497439622879, + "rewards/margins": 0.38376516103744507, + "rewards/rejected": -0.604114830493927, + "step": 2050 + }, + { + "epoch": 0.27, + "learning_rate": 4.549547190300622e-06, + "logits/chosen": -1.2185680866241455, + "logits/rejected": -0.8623320460319519, + "logps/chosen": -493.34564208984375, + "logps/rejected": -827.0408935546875, + "loss": 0.1204, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.2509461045265198, + "rewards/margins": 0.3551548421382904, + "rewards/rejected": -0.6061009168624878, + "step": 2060 + }, + { + "epoch": 0.28, + "learning_rate": 4.542862245837821e-06, + "logits/chosen": -1.5642473697662354, + "logits/rejected": -0.926191508769989, + "logps/chosen": -411.05242919921875, + "logps/rejected": -723.1922607421875, + "loss": 0.1455, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.18251535296440125, + "rewards/margins": 0.3536899983882904, + "rewards/rejected": -0.5362053513526917, + "step": 2070 + }, + { + "epoch": 0.28, + "learning_rate": 4.536133049620143e-06, + "logits/chosen": -1.5234981775283813, + "logits/rejected": -0.7424275279045105, + "logps/chosen": -530.3670043945312, + "logps/rejected": -836.0511474609375, + "loss": 0.1389, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.16530658304691315, + "rewards/margins": 0.42894306778907776, + "rewards/rejected": -0.5942496061325073, + "step": 2080 + }, + { + "epoch": 0.28, + "learning_rate": 4.529359747413038e-06, + "logits/chosen": -1.338952660560608, + "logits/rejected": -0.7675120234489441, + "logps/chosen": -538.5235595703125, + "logps/rejected": -848.5220947265625, + "loss": 0.1503, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.2846592962741852, + "rewards/margins": 0.3030379116535187, + "rewards/rejected": -0.5876971483230591, + "step": 2090 + }, + { + "epoch": 0.28, + "learning_rate": 4.522542485937369e-06, + "logits/chosen": -1.3365002870559692, + "logits/rejected": -0.7856238484382629, + "logps/chosen": -400.2157287597656, + "logps/rejected": -783.48681640625, + "loss": 0.0966, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.16450873017311096, + "rewards/margins": 0.3819323182106018, + "rewards/rejected": -0.5464409589767456, + "step": 2100 + }, + { + "epoch": 0.28, + "learning_rate": 4.515681412866228e-06, + "logits/chosen": -1.3786782026290894, + "logits/rejected": -0.7613228559494019, + "logps/chosen": -449.18212890625, + "logps/rejected": -853.5227661132812, + "loss": 0.1186, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.19289013743400574, + "rewards/margins": 0.3786999583244324, + "rewards/rejected": -0.5715900659561157, + "step": 2110 + }, + { + "epoch": 0.28, + "learning_rate": 4.508776676821739e-06, + "logits/chosen": -1.6430130004882812, + "logits/rejected": -0.9073891639709473, + "logps/chosen": -417.95391845703125, + "logps/rejected": -733.4771728515625, + "loss": 0.1272, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.10977999866008759, + "rewards/margins": 0.3991335332393646, + "rewards/rejected": -0.5089135766029358, + "step": 2120 + }, + { + "epoch": 0.28, + "learning_rate": 4.501828427371834e-06, + "logits/chosen": -1.4118962287902832, + "logits/rejected": -0.835736095905304, + "logps/chosen": -472.13134765625, + "logps/rejected": -701.7862548828125, + "loss": 0.1591, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.17745926976203918, + "rewards/margins": 0.3164125084877014, + "rewards/rejected": -0.4938717782497406, + "step": 2130 + }, + { + "epoch": 0.29, + "learning_rate": 4.494836815027022e-06, + "logits/chosen": -1.6626451015472412, + "logits/rejected": -0.9878584742546082, + "logps/chosen": -423.9879455566406, + "logps/rejected": -770.7366333007812, + "loss": 0.1042, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.13481102883815765, + "rewards/margins": 0.4097488522529602, + "rewards/rejected": -0.5445598363876343, + "step": 2140 + }, + { + "epoch": 0.29, + "learning_rate": 4.48780199123712e-06, + "logits/chosen": -1.0662163496017456, + "logits/rejected": -0.7782390713691711, + "logps/chosen": -492.75469970703125, + "logps/rejected": -793.8905639648438, + "loss": 0.1767, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.25908443331718445, + "rewards/margins": 0.32769250869750977, + "rewards/rejected": -0.5867769122123718, + "step": 2150 + }, + { + "epoch": 0.29, + "learning_rate": 4.4807241083879774e-06, + "logits/chosen": -1.431119680404663, + "logits/rejected": -1.0311700105667114, + "logps/chosen": -495.00115966796875, + "logps/rejected": -914.7777099609375, + "loss": 0.1157, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.22871288657188416, + "rewards/margins": 0.43868550658226013, + "rewards/rejected": -0.6673983931541443, + "step": 2160 + }, + { + "epoch": 0.29, + "learning_rate": 4.473603319798173e-06, + "logits/chosen": -1.7834135293960571, + "logits/rejected": -1.212897539138794, + "logps/chosen": -476.74267578125, + "logps/rejected": -959.1222534179688, + "loss": 0.1058, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.19958916306495667, + "rewards/margins": 0.4401358962059021, + "rewards/rejected": -0.6397250294685364, + "step": 2170 + }, + { + "epoch": 0.29, + "learning_rate": 4.466439779715696e-06, + "logits/chosen": -1.570049524307251, + "logits/rejected": -1.1138522624969482, + "logps/chosen": -457.2566833496094, + "logps/rejected": -823.2316284179688, + "loss": 0.1634, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.2262900322675705, + "rewards/margins": 0.37429413199424744, + "rewards/rejected": -0.6005841493606567, + "step": 2180 + }, + { + "epoch": 0.29, + "learning_rate": 4.4592336433146e-06, + "logits/chosen": -1.434529423713684, + "logits/rejected": -0.9157952070236206, + "logps/chosen": -503.70184326171875, + "logps/rejected": -816.2783203125, + "loss": 0.1488, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.17298392951488495, + "rewards/margins": 0.39118996262550354, + "rewards/rejected": -0.5641738772392273, + "step": 2190 + }, + { + "epoch": 0.29, + "learning_rate": 4.451985066691649e-06, + "logits/chosen": -1.9089066982269287, + "logits/rejected": -1.133928656578064, + "logps/chosen": -472.3794860839844, + "logps/rejected": -772.7537841796875, + "loss": 0.1542, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.15726697444915771, + "rewards/margins": 0.359056293964386, + "rewards/rejected": -0.5163232088088989, + "step": 2200 + }, + { + "epoch": 0.29, + "learning_rate": 4.444694206862929e-06, + "logits/chosen": -1.5789159536361694, + "logits/rejected": -1.0540728569030762, + "logps/chosen": -390.03387451171875, + "logps/rejected": -772.8970947265625, + "loss": 0.1576, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.14799469709396362, + "rewards/margins": 0.37714943289756775, + "rewards/rejected": -0.5251442193984985, + "step": 2210 + }, + { + "epoch": 0.3, + "learning_rate": 4.437361221760449e-06, + "logits/chosen": -1.6248416900634766, + "logits/rejected": -1.0704140663146973, + "logps/chosen": -492.60638427734375, + "logps/rejected": -778.5485229492188, + "loss": 0.1399, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.2461838722229004, + "rewards/margins": 0.3204324543476105, + "rewards/rejected": -0.5666162967681885, + "step": 2220 + }, + { + "epoch": 0.3, + "learning_rate": 4.4299862702287255e-06, + "logits/chosen": -1.6715940237045288, + "logits/rejected": -0.9992098808288574, + "logps/chosen": -545.5618896484375, + "logps/rejected": -752.6533813476562, + "loss": 0.1674, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.2320774346590042, + "rewards/margins": 0.3299207091331482, + "rewards/rejected": -0.5619980692863464, + "step": 2230 + }, + { + "epoch": 0.3, + "learning_rate": 4.422569512021332e-06, + "logits/chosen": -1.3779773712158203, + "logits/rejected": -0.849908173084259, + "logps/chosen": -454.3763122558594, + "logps/rejected": -809.4392700195312, + "loss": 0.1371, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.20665785670280457, + "rewards/margins": 0.37678617238998413, + "rewards/rejected": -0.5834440588951111, + "step": 2240 + }, + { + "epoch": 0.3, + "learning_rate": 4.415111107797445e-06, + "logits/chosen": -1.6013469696044922, + "logits/rejected": -0.9178832769393921, + "logps/chosen": -450.80322265625, + "logps/rejected": -827.7667846679688, + "loss": 0.0834, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.17798402905464172, + "rewards/margins": 0.4255714416503906, + "rewards/rejected": -0.60355544090271, + "step": 2250 + }, + { + "epoch": 0.3, + "learning_rate": 4.407611219118363e-06, + "logits/chosen": -1.5318782329559326, + "logits/rejected": -1.0508818626403809, + "logps/chosen": -577.3057861328125, + "logps/rejected": -969.0447387695312, + "loss": 0.1826, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.2945424020290375, + "rewards/margins": 0.3278708755970001, + "rewards/rejected": -0.6224132776260376, + "step": 2260 + }, + { + "epoch": 0.3, + "learning_rate": 4.4000700084440046e-06, + "logits/chosen": -1.736802339553833, + "logits/rejected": -1.0055776834487915, + "logps/chosen": -491.3179626464844, + "logps/rejected": -798.9041137695312, + "loss": 0.0974, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.24061992764472961, + "rewards/margins": 0.3673352599143982, + "rewards/rejected": -0.6079551577568054, + "step": 2270 + }, + { + "epoch": 0.3, + "learning_rate": 4.3924876391293915e-06, + "logits/chosen": -1.4131683111190796, + "logits/rejected": -0.8145586848258972, + "logps/chosen": -452.830078125, + "logps/rejected": -853.2399291992188, + "loss": 0.1319, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.19131293892860413, + "rewards/margins": 0.4480679929256439, + "rewards/rejected": -0.639380931854248, + "step": 2280 + }, + { + "epoch": 0.31, + "learning_rate": 4.384864275421109e-06, + "logits/chosen": -1.515782117843628, + "logits/rejected": -0.9318229556083679, + "logps/chosen": -366.37353515625, + "logps/rejected": -722.2740478515625, + "loss": 0.137, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.1316198855638504, + "rewards/margins": 0.4034051299095154, + "rewards/rejected": -0.5350250005722046, + "step": 2290 + }, + { + "epoch": 0.31, + "learning_rate": 4.377200082453748e-06, + "logits/chosen": -1.5804684162139893, + "logits/rejected": -1.0182714462280273, + "logps/chosen": -350.4043273925781, + "logps/rejected": -682.8316040039062, + "loss": 0.1649, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.14684131741523743, + "rewards/margins": 0.34305766224861145, + "rewards/rejected": -0.4898989200592041, + "step": 2300 + }, + { + "epoch": 0.31, + "learning_rate": 4.36949522624633e-06, + "logits/chosen": -1.346221685409546, + "logits/rejected": -1.045738697052002, + "logps/chosen": -370.6365661621094, + "logps/rejected": -878.205078125, + "loss": 0.0781, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.1513518989086151, + "rewards/margins": 0.43637341260910034, + "rewards/rejected": -0.5877252817153931, + "step": 2310 + }, + { + "epoch": 0.31, + "learning_rate": 4.361749873698707e-06, + "logits/chosen": -1.887648344039917, + "logits/rejected": -1.1761292219161987, + "logps/chosen": -420.795166015625, + "logps/rejected": -686.5606689453125, + "loss": 0.1384, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.1394852101802826, + "rewards/margins": 0.3560732901096344, + "rewards/rejected": -0.495558500289917, + "step": 2320 + }, + { + "epoch": 0.31, + "learning_rate": 4.353964192587949e-06, + "logits/chosen": -1.4805281162261963, + "logits/rejected": -0.7832016348838806, + "logps/chosen": -443.5951232910156, + "logps/rejected": -762.2675170898438, + "loss": 0.1246, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.21405291557312012, + "rewards/margins": 0.3591863512992859, + "rewards/rejected": -0.573239266872406, + "step": 2330 + }, + { + "epoch": 0.31, + "learning_rate": 4.346138351564711e-06, + "logits/chosen": -1.7262485027313232, + "logits/rejected": -0.9207326769828796, + "logps/chosen": -474.5480041503906, + "logps/rejected": -702.486572265625, + "loss": 0.1708, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.2004108726978302, + "rewards/margins": 0.30196017026901245, + "rewards/rejected": -0.5023710131645203, + "step": 2340 + }, + { + "epoch": 0.31, + "learning_rate": 4.338272520149572e-06, + "logits/chosen": -1.532149314880371, + "logits/rejected": -0.9204761385917664, + "logps/chosen": -534.96728515625, + "logps/rejected": -887.0525512695312, + "loss": 0.1276, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.19786028563976288, + "rewards/margins": 0.4368208944797516, + "rewards/rejected": -0.6346812844276428, + "step": 2350 + }, + { + "epoch": 0.31, + "learning_rate": 4.330366868729376e-06, + "logits/chosen": -1.6435654163360596, + "logits/rejected": -1.03916335105896, + "logps/chosen": -494.278564453125, + "logps/rejected": -798.6319580078125, + "loss": 0.1529, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.2153468132019043, + "rewards/margins": 0.342166543006897, + "rewards/rejected": -0.5575133562088013, + "step": 2360 + }, + { + "epoch": 0.32, + "learning_rate": 4.322421568553529e-06, + "logits/chosen": -1.6959123611450195, + "logits/rejected": -1.2254985570907593, + "logps/chosen": -438.72869873046875, + "logps/rejected": -706.9481811523438, + "loss": 0.1368, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.180532768368721, + "rewards/margins": 0.3147241473197937, + "rewards/rejected": -0.4952569007873535, + "step": 2370 + }, + { + "epoch": 0.32, + "learning_rate": 4.3144367917302964e-06, + "logits/chosen": -1.3080450296401978, + "logits/rejected": -0.740618884563446, + "logps/chosen": -504.75067138671875, + "logps/rejected": -789.192138671875, + "loss": 0.1527, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.20803742110729218, + "rewards/margins": 0.37267953157424927, + "rewards/rejected": -0.5807169675827026, + "step": 2380 + }, + { + "epoch": 0.32, + "learning_rate": 4.30641271122307e-06, + "logits/chosen": -1.7245194911956787, + "logits/rejected": -1.1383044719696045, + "logps/chosen": -500.14764404296875, + "logps/rejected": -779.827880859375, + "loss": 0.1317, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.24187901616096497, + "rewards/margins": 0.34209081530570984, + "rewards/rejected": -0.5839698314666748, + "step": 2390 + }, + { + "epoch": 0.32, + "learning_rate": 4.2983495008466285e-06, + "logits/chosen": -1.6434742212295532, + "logits/rejected": -1.0891433954238892, + "logps/chosen": -534.0972290039062, + "logps/rejected": -861.4666137695312, + "loss": 0.128, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.22732210159301758, + "rewards/margins": 0.38719356060028076, + "rewards/rejected": -0.6145156621932983, + "step": 2400 + }, + { + "epoch": 0.32, + "learning_rate": 4.290247335263362e-06, + "logits/chosen": -1.530216932296753, + "logits/rejected": -0.9373283386230469, + "logps/chosen": -495.4457092285156, + "logps/rejected": -882.18359375, + "loss": 0.1107, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.19321858882904053, + "rewards/margins": 0.4353697896003723, + "rewards/rejected": -0.6285883188247681, + "step": 2410 + }, + { + "epoch": 0.32, + "learning_rate": 4.2821063899795015e-06, + "logits/chosen": -1.4912149906158447, + "logits/rejected": -0.9739956855773926, + "logps/chosen": -488.48431396484375, + "logps/rejected": -824.3746948242188, + "loss": 0.1278, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.21902771294116974, + "rewards/margins": 0.3857320547103882, + "rewards/rejected": -0.6047598123550415, + "step": 2420 + }, + { + "epoch": 0.32, + "learning_rate": 4.273926841341303e-06, + "logits/chosen": -1.606318712234497, + "logits/rejected": -0.9916576147079468, + "logps/chosen": -363.5210876464844, + "logps/rejected": -840.6154174804688, + "loss": 0.0952, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.15150922536849976, + "rewards/margins": 0.46653613448143005, + "rewards/rejected": -0.6180453300476074, + "step": 2430 + }, + { + "epoch": 0.33, + "learning_rate": 4.265708866531238e-06, + "logits/chosen": -1.608624815940857, + "logits/rejected": -0.9720064401626587, + "logps/chosen": -510.4712829589844, + "logps/rejected": -808.0747680664062, + "loss": 0.1947, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.24383282661437988, + "rewards/margins": 0.3697541356086731, + "rewards/rejected": -0.613586962223053, + "step": 2440 + }, + { + "epoch": 0.33, + "learning_rate": 4.257452643564155e-06, + "logits/chosen": -1.499190092086792, + "logits/rejected": -0.8431995511054993, + "logps/chosen": -606.6427001953125, + "logps/rejected": -963.7305908203125, + "loss": 0.1502, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.30497798323631287, + "rewards/margins": 0.37246131896972656, + "rewards/rejected": -0.677439272403717, + "step": 2450 + }, + { + "epoch": 0.33, + "learning_rate": 4.249158351283414e-06, + "logits/chosen": -1.7652698755264282, + "logits/rejected": -1.0435216426849365, + "logps/chosen": -533.5185546875, + "logps/rejected": -752.1669921875, + "loss": 0.1436, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.24484989047050476, + "rewards/margins": 0.30627909302711487, + "rewards/rejected": -0.5511289834976196, + "step": 2460 + }, + { + "epoch": 0.33, + "learning_rate": 4.240826169357024e-06, + "logits/chosen": -1.1542727947235107, + "logits/rejected": -0.8217121958732605, + "logps/chosen": -569.7604370117188, + "logps/rejected": -882.7029418945312, + "loss": 0.1906, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.34456318616867065, + "rewards/margins": 0.310018926858902, + "rewards/rejected": -0.654582142829895, + "step": 2470 + }, + { + "epoch": 0.33, + "learning_rate": 4.232456278273743e-06, + "logits/chosen": -1.4869928359985352, + "logits/rejected": -0.7625919580459595, + "logps/chosen": -582.61083984375, + "logps/rejected": -920.8674926757812, + "loss": 0.0979, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.300833523273468, + "rewards/margins": 0.3972635865211487, + "rewards/rejected": -0.6980971097946167, + "step": 2480 + }, + { + "epoch": 0.33, + "learning_rate": 4.224048859339175e-06, + "logits/chosen": -1.5701301097869873, + "logits/rejected": -0.9477971792221069, + "logps/chosen": -546.9784545898438, + "logps/rejected": -904.2532348632812, + "loss": 0.1018, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.2308419942855835, + "rewards/margins": 0.4231252074241638, + "rewards/rejected": -0.6539672613143921, + "step": 2490 + }, + { + "epoch": 0.33, + "learning_rate": 4.215604094671835e-06, + "logits/chosen": -1.3893605470657349, + "logits/rejected": -0.9853037595748901, + "logps/chosen": -458.06365966796875, + "logps/rejected": -872.8928833007812, + "loss": 0.0986, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.226444274187088, + "rewards/margins": 0.43268585205078125, + "rewards/rejected": -0.6591302156448364, + "step": 2500 + }, + { + "epoch": 0.33, + "learning_rate": 4.207122167199209e-06, + "logits/chosen": -1.5319633483886719, + "logits/rejected": -1.021113634109497, + "logps/chosen": -471.55902099609375, + "logps/rejected": -743.827392578125, + "loss": 0.1398, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1666310578584671, + "rewards/margins": 0.346027672290802, + "rewards/rejected": -0.5126587152481079, + "step": 2510 + }, + { + "epoch": 0.34, + "learning_rate": 4.198603260653792e-06, + "logits/chosen": -1.6109545230865479, + "logits/rejected": -0.9327214360237122, + "logps/chosen": -447.84344482421875, + "logps/rejected": -748.9298095703125, + "loss": 0.1313, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.13613386452198029, + "rewards/margins": 0.39785856008529663, + "rewards/rejected": -0.5339924097061157, + "step": 2520 + }, + { + "epoch": 0.34, + "learning_rate": 4.1900475595691044e-06, + "logits/chosen": -1.4914381504058838, + "logits/rejected": -0.8920121192932129, + "logps/chosen": -384.0286865234375, + "logps/rejected": -717.90478515625, + "loss": 0.1452, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.13352106511592865, + "rewards/margins": 0.37515324354171753, + "rewards/rejected": -0.5086743235588074, + "step": 2530 + }, + { + "epoch": 0.34, + "learning_rate": 4.181455249275701e-06, + "logits/chosen": -1.5109646320343018, + "logits/rejected": -1.1194543838500977, + "logps/chosen": -518.5322265625, + "logps/rejected": -869.0886840820312, + "loss": 0.1341, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.21598049998283386, + "rewards/margins": 0.38135606050491333, + "rewards/rejected": -0.5973365902900696, + "step": 2540 + }, + { + "epoch": 0.34, + "learning_rate": 4.172826515897146e-06, + "logits/chosen": -1.7286665439605713, + "logits/rejected": -1.1569503545761108, + "logps/chosen": -389.3125915527344, + "logps/rejected": -790.7874755859375, + "loss": 0.1067, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.17809465527534485, + "rewards/margins": 0.4077723026275635, + "rewards/rejected": -0.5858669877052307, + "step": 2550 + }, + { + "epoch": 0.34, + "learning_rate": 4.1641615463459926e-06, + "logits/chosen": -1.7263851165771484, + "logits/rejected": -1.102418065071106, + "logps/chosen": -431.767822265625, + "logps/rejected": -833.9580078125, + "loss": 0.0802, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.13353870809078217, + "rewards/margins": 0.4436104893684387, + "rewards/rejected": -0.5771491527557373, + "step": 2560 + }, + { + "epoch": 0.34, + "learning_rate": 4.1554605283197255e-06, + "logits/chosen": -1.7348525524139404, + "logits/rejected": -1.1465692520141602, + "logps/chosen": -418.4853515625, + "logps/rejected": -732.1386108398438, + "loss": 0.1604, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.14343003928661346, + "rewards/margins": 0.34780198335647583, + "rewards/rejected": -0.4912320077419281, + "step": 2570 + }, + { + "epoch": 0.34, + "learning_rate": 4.146723650296701e-06, + "logits/chosen": -1.5681244134902954, + "logits/rejected": -0.9826623797416687, + "logps/chosen": -454.61669921875, + "logps/rejected": -774.7626342773438, + "loss": 0.1508, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.15054744482040405, + "rewards/margins": 0.34130164980888367, + "rewards/rejected": -0.4918491244316101, + "step": 2580 + }, + { + "epoch": 0.35, + "learning_rate": 4.1379511015320625e-06, + "logits/chosen": -1.6330792903900146, + "logits/rejected": -1.0874810218811035, + "logps/chosen": -515.1699829101562, + "logps/rejected": -785.629638671875, + "loss": 0.1298, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.21226203441619873, + "rewards/margins": 0.3242810070514679, + "rewards/rejected": -0.5365430116653442, + "step": 2590 + }, + { + "epoch": 0.35, + "learning_rate": 4.129143072053639e-06, + "logits/chosen": -1.873373031616211, + "logits/rejected": -1.1326647996902466, + "logps/chosen": -452.2235412597656, + "logps/rejected": -815.4119873046875, + "loss": 0.1293, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.178654745221138, + "rewards/margins": 0.3714352250099182, + "rewards/rejected": -0.550089955329895, + "step": 2600 + }, + { + "epoch": 0.35, + "learning_rate": 4.120299752657828e-06, + "logits/chosen": -1.548407793045044, + "logits/rejected": -1.000870943069458, + "logps/chosen": -445.26361083984375, + "logps/rejected": -791.4591064453125, + "loss": 0.1351, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.2141580581665039, + "rewards/margins": 0.3528524339199066, + "rewards/rejected": -0.5670104622840881, + "step": 2610 + }, + { + "epoch": 0.35, + "learning_rate": 4.111421334905468e-06, + "logits/chosen": -1.5858871936798096, + "logits/rejected": -1.104501485824585, + "logps/chosen": -441.08575439453125, + "logps/rejected": -841.6727294921875, + "loss": 0.1148, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.17988021671772003, + "rewards/margins": 0.41606348752975464, + "rewards/rejected": -0.5959437489509583, + "step": 2620 + }, + { + "epoch": 0.35, + "learning_rate": 4.102508011117684e-06, + "logits/chosen": -1.5839554071426392, + "logits/rejected": -0.9679198265075684, + "logps/chosen": -413.9541931152344, + "logps/rejected": -723.2825927734375, + "loss": 0.1373, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.14662650227546692, + "rewards/margins": 0.3743670582771301, + "rewards/rejected": -0.5209935307502747, + "step": 2630 + }, + { + "epoch": 0.35, + "learning_rate": 4.093559974371725e-06, + "logits/chosen": -1.7011744976043701, + "logits/rejected": -1.1305286884307861, + "logps/chosen": -472.72796630859375, + "logps/rejected": -883.6267700195312, + "loss": 0.1277, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.16943880915641785, + "rewards/margins": 0.43496638536453247, + "rewards/rejected": -0.6044051647186279, + "step": 2640 + }, + { + "epoch": 0.35, + "learning_rate": 4.084577418496775e-06, + "logits/chosen": -1.632817029953003, + "logits/rejected": -1.0543255805969238, + "logps/chosen": -532.606201171875, + "logps/rejected": -842.3258056640625, + "loss": 0.1031, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.21153993904590607, + "rewards/margins": 0.35972151160240173, + "rewards/rejected": -0.5712614059448242, + "step": 2650 + }, + { + "epoch": 0.35, + "learning_rate": 4.075560538069767e-06, + "logits/chosen": -1.6261268854141235, + "logits/rejected": -1.127497911453247, + "logps/chosen": -509.52850341796875, + "logps/rejected": -826.4234619140625, + "loss": 0.1434, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.22948768734931946, + "rewards/margins": 0.346964955329895, + "rewards/rejected": -0.5764526128768921, + "step": 2660 + }, + { + "epoch": 0.36, + "learning_rate": 4.066509528411151e-06, + "logits/chosen": -1.5209250450134277, + "logits/rejected": -1.0693763494491577, + "logps/chosen": -490.5738220214844, + "logps/rejected": -792.5563354492188, + "loss": 0.1522, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.23105911910533905, + "rewards/margins": 0.33465856313705444, + "rewards/rejected": -0.5657176375389099, + "step": 2670 + }, + { + "epoch": 0.36, + "learning_rate": 4.05742458558068e-06, + "logits/chosen": -1.447706937789917, + "logits/rejected": -1.0158064365386963, + "logps/chosen": -509.220947265625, + "logps/rejected": -778.8126220703125, + "loss": 0.1688, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.22236888110637665, + "rewards/margins": 0.2937421202659607, + "rewards/rejected": -0.5161110162734985, + "step": 2680 + }, + { + "epoch": 0.36, + "learning_rate": 4.048305906373151e-06, + "logits/chosen": -1.3585022687911987, + "logits/rejected": -1.0554983615875244, + "logps/chosen": -467.7109375, + "logps/rejected": -855.8693237304688, + "loss": 0.0999, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.24606266617774963, + "rewards/margins": 0.36643314361572266, + "rewards/rejected": -0.6124957799911499, + "step": 2690 + }, + { + "epoch": 0.36, + "learning_rate": 4.039153688314146e-06, + "logits/chosen": -1.6710975170135498, + "logits/rejected": -1.0430233478546143, + "logps/chosen": -529.962890625, + "logps/rejected": -842.4357299804688, + "loss": 0.1731, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.2704309821128845, + "rewards/margins": 0.31774449348449707, + "rewards/rejected": -0.5881755352020264, + "step": 2700 + }, + { + "epoch": 0.36, + "learning_rate": 4.029968129655757e-06, + "logits/chosen": -1.5244982242584229, + "logits/rejected": -0.917557418346405, + "logps/chosen": -492.8162536621094, + "logps/rejected": -695.8026733398438, + "loss": 0.1951, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.24431486427783966, + "rewards/margins": 0.2869857847690582, + "rewards/rejected": -0.5313006639480591, + "step": 2710 + }, + { + "epoch": 0.36, + "learning_rate": 4.020749429372286e-06, + "logits/chosen": -1.7436425685882568, + "logits/rejected": -1.2350785732269287, + "logps/chosen": -436.5732421875, + "logps/rejected": -712.8914794921875, + "loss": 0.1449, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.21191930770874023, + "rewards/margins": 0.32732638716697693, + "rewards/rejected": -0.5392457246780396, + "step": 2720 + }, + { + "epoch": 0.36, + "learning_rate": 4.011497787155938e-06, + "logits/chosen": -1.5413177013397217, + "logits/rejected": -0.9734029769897461, + "logps/chosen": -423.8431091308594, + "logps/rejected": -793.4420166015625, + "loss": 0.1218, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.16747328639030457, + "rewards/margins": 0.3857666850090027, + "rewards/rejected": -0.5532399415969849, + "step": 2730 + }, + { + "epoch": 0.37, + "learning_rate": 4.002213403412492e-06, + "logits/chosen": -1.5266112089157104, + "logits/rejected": -0.9985775947570801, + "logps/chosen": -449.9827575683594, + "logps/rejected": -859.7175903320312, + "loss": 0.1585, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.19104166328907013, + "rewards/margins": 0.42445772886276245, + "rewards/rejected": -0.6154993772506714, + "step": 2740 + }, + { + "epoch": 0.37, + "learning_rate": 3.992896479256966e-06, + "logits/chosen": -1.478150725364685, + "logits/rejected": -1.0377126932144165, + "logps/chosen": -505.3447265625, + "logps/rejected": -800.2145385742188, + "loss": 0.173, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.21070396900177002, + "rewards/margins": 0.3541814088821411, + "rewards/rejected": -0.5648853182792664, + "step": 2750 + }, + { + "epoch": 0.37, + "learning_rate": 3.983547216509254e-06, + "logits/chosen": -1.5522105693817139, + "logits/rejected": -1.0478585958480835, + "logps/chosen": -468.27099609375, + "logps/rejected": -687.7916259765625, + "loss": 0.1426, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.19997310638427734, + "rewards/margins": 0.26816970109939575, + "rewards/rejected": -0.4681428074836731, + "step": 2760 + }, + { + "epoch": 0.37, + "learning_rate": 3.974165817689758e-06, + "logits/chosen": -1.373271107673645, + "logits/rejected": -0.8424400091171265, + "logps/chosen": -415.9774475097656, + "logps/rejected": -862.8703002929688, + "loss": 0.0752, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.17877209186553955, + "rewards/margins": 0.41697627305984497, + "rewards/rejected": -0.5957483649253845, + "step": 2770 + }, + { + "epoch": 0.37, + "learning_rate": 3.964752486015001e-06, + "logits/chosen": -1.390533208847046, + "logits/rejected": -0.8571497201919556, + "logps/chosen": -443.14984130859375, + "logps/rejected": -853.4337768554688, + "loss": 0.1228, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.154323548078537, + "rewards/margins": 0.4272845685482025, + "rewards/rejected": -0.5816081166267395, + "step": 2780 + }, + { + "epoch": 0.37, + "learning_rate": 3.955307425393224e-06, + "logits/chosen": -1.6358531713485718, + "logits/rejected": -0.7802098989486694, + "logps/chosen": -558.2034912109375, + "logps/rejected": -880.1002197265625, + "loss": 0.1227, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.2329464852809906, + "rewards/margins": 0.41606202721595764, + "rewards/rejected": -0.6490085124969482, + "step": 2790 + }, + { + "epoch": 0.37, + "learning_rate": 3.945830840419966e-06, + "logits/chosen": -1.4261865615844727, + "logits/rejected": -0.8663290739059448, + "logps/chosen": -381.68060302734375, + "logps/rejected": -708.1229248046875, + "loss": 0.154, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.1556844562292099, + "rewards/margins": 0.35576796531677246, + "rewards/rejected": -0.5114524364471436, + "step": 2800 + }, + { + "epoch": 0.37, + "learning_rate": 3.936322936373641e-06, + "logits/chosen": -1.616782546043396, + "logits/rejected": -0.6785197854042053, + "logps/chosen": -467.4331970214844, + "logps/rejected": -773.0416259765625, + "loss": 0.1211, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.17097237706184387, + "rewards/margins": 0.3937646746635437, + "rewards/rejected": -0.56473708152771, + "step": 2810 + }, + { + "epoch": 0.38, + "learning_rate": 3.92678391921108e-06, + "logits/chosen": -1.3506128787994385, + "logits/rejected": -0.9120258092880249, + "logps/chosen": -515.1641235351562, + "logps/rejected": -906.06689453125, + "loss": 0.0919, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.2047976702451706, + "rewards/margins": 0.4362905025482178, + "rewards/rejected": -0.6410881876945496, + "step": 2820 + }, + { + "epoch": 0.38, + "learning_rate": 3.9172139955630774e-06, + "logits/chosen": -1.5046745538711548, + "logits/rejected": -0.8224016427993774, + "logps/chosen": -412.1595764160156, + "logps/rejected": -697.3492431640625, + "loss": 0.1774, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.18449755012989044, + "rewards/margins": 0.3409607708454132, + "rewards/rejected": -0.5254582166671753, + "step": 2830 + }, + { + "epoch": 0.38, + "learning_rate": 3.907613372729916e-06, + "logits/chosen": -1.5527722835540771, + "logits/rejected": -0.9975868463516235, + "logps/chosen": -405.668212890625, + "logps/rejected": -790.2307739257812, + "loss": 0.1042, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.16405755281448364, + "rewards/margins": 0.3927202522754669, + "rewards/rejected": -0.556777834892273, + "step": 2840 + }, + { + "epoch": 0.38, + "learning_rate": 3.897982258676867e-06, + "logits/chosen": -1.488878607749939, + "logits/rejected": -1.200114130973816, + "logps/chosen": -445.1380920410156, + "logps/rejected": -784.933349609375, + "loss": 0.1423, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.1881919950246811, + "rewards/margins": 0.3287205696105957, + "rewards/rejected": -0.5169125199317932, + "step": 2850 + }, + { + "epoch": 0.38, + "learning_rate": 3.888320862029699e-06, + "logits/chosen": -1.3110463619232178, + "logits/rejected": -1.0240827798843384, + "logps/chosen": -434.48358154296875, + "logps/rejected": -900.2415161132812, + "loss": 0.1204, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.1918242871761322, + "rewards/margins": 0.4229181408882141, + "rewards/rejected": -0.6147423982620239, + "step": 2860 + }, + { + "epoch": 0.38, + "learning_rate": 3.878629392070143e-06, + "logits/chosen": -1.4728009700775146, + "logits/rejected": -0.9224138259887695, + "logps/chosen": -559.6575927734375, + "logps/rejected": -926.2965087890625, + "loss": 0.0955, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -0.2580861747264862, + "rewards/margins": 0.3979208469390869, + "rewards/rejected": -0.6560070514678955, + "step": 2870 + }, + { + "epoch": 0.38, + "learning_rate": 3.868908058731376e-06, + "logits/chosen": -1.3200956583023071, + "logits/rejected": -0.7975896596908569, + "logps/chosen": -405.00048828125, + "logps/rejected": -890.6468505859375, + "loss": 0.0923, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.16780821979045868, + "rewards/margins": 0.4599390923976898, + "rewards/rejected": -0.6277473568916321, + "step": 2880 + }, + { + "epoch": 0.39, + "learning_rate": 3.859157072593459e-06, + "logits/chosen": -1.4886184930801392, + "logits/rejected": -0.9175424575805664, + "logps/chosen": -541.4322509765625, + "logps/rejected": -944.6419677734375, + "loss": 0.0796, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.23973998427391052, + "rewards/margins": 0.4599900245666504, + "rewards/rejected": -0.6997300386428833, + "step": 2890 + }, + { + "epoch": 0.39, + "learning_rate": 3.849376644878783e-06, + "logits/chosen": -1.635000467300415, + "logits/rejected": -0.8878408670425415, + "logps/chosen": -491.761474609375, + "logps/rejected": -803.8742065429688, + "loss": 0.1206, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.19202962517738342, + "rewards/margins": 0.38747844099998474, + "rewards/rejected": -0.5795080661773682, + "step": 2900 + }, + { + "epoch": 0.39, + "learning_rate": 3.839566987447492e-06, + "logits/chosen": -1.3475700616836548, + "logits/rejected": -0.7748931050300598, + "logps/chosen": -468.58056640625, + "logps/rejected": -784.2288208007812, + "loss": 0.1174, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.18681365251541138, + "rewards/margins": 0.4012163281440735, + "rewards/rejected": -0.5880299806594849, + "step": 2910 + }, + { + "epoch": 0.39, + "learning_rate": 3.829728312792895e-06, + "logits/chosen": -1.4943805932998657, + "logits/rejected": -0.8177087903022766, + "logps/chosen": -441.33258056640625, + "logps/rejected": -884.5404052734375, + "loss": 0.1021, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.18188966810703278, + "rewards/margins": 0.4683989882469177, + "rewards/rejected": -0.6502886414527893, + "step": 2920 + }, + { + "epoch": 0.39, + "learning_rate": 3.819860834036859e-06, + "logits/chosen": -1.371438980102539, + "logits/rejected": -0.903844952583313, + "logps/chosen": -394.12615966796875, + "logps/rejected": -682.78955078125, + "loss": 0.1467, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.16386035084724426, + "rewards/margins": 0.3356771469116211, + "rewards/rejected": -0.49953746795654297, + "step": 2930 + }, + { + "epoch": 0.39, + "learning_rate": 3.8099647649251984e-06, + "logits/chosen": -1.5508685111999512, + "logits/rejected": -1.134049654006958, + "logps/chosen": -474.1361389160156, + "logps/rejected": -845.4661254882812, + "loss": 0.1215, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.21748395264148712, + "rewards/margins": 0.36039185523986816, + "rewards/rejected": -0.5778758525848389, + "step": 2940 + }, + { + "epoch": 0.39, + "learning_rate": 3.8000403198230385e-06, + "logits/chosen": -1.3952841758728027, + "logits/rejected": -0.7636578679084778, + "logps/chosen": -570.806396484375, + "logps/rejected": -870.5598754882812, + "loss": 0.1219, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.22723431885242462, + "rewards/margins": 0.38328367471694946, + "rewards/rejected": -0.6105180382728577, + "step": 2950 + }, + { + "epoch": 0.39, + "learning_rate": 3.790087713710179e-06, + "logits/chosen": -1.2440921068191528, + "logits/rejected": -0.8564810752868652, + "logps/chosen": -419.251220703125, + "logps/rejected": -766.0396118164062, + "loss": 0.1609, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.2219938039779663, + "rewards/margins": 0.33859145641326904, + "rewards/rejected": -0.5605852603912354, + "step": 2960 + }, + { + "epoch": 0.4, + "learning_rate": 3.780107162176429e-06, + "logits/chosen": -1.5238640308380127, + "logits/rejected": -0.9895439147949219, + "logps/chosen": -494.74609375, + "logps/rejected": -814.00537109375, + "loss": 0.1314, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.2533033490180969, + "rewards/margins": 0.3240547180175781, + "rewards/rejected": -0.5773580074310303, + "step": 2970 + }, + { + "epoch": 0.4, + "learning_rate": 3.770098881416945e-06, + "logits/chosen": -1.3618767261505127, + "logits/rejected": -0.6415785551071167, + "logps/chosen": -565.89501953125, + "logps/rejected": -892.7698364257812, + "loss": 0.1137, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.22178450226783752, + "rewards/margins": 0.44626274704933167, + "rewards/rejected": -0.6680471897125244, + "step": 2980 + }, + { + "epoch": 0.4, + "learning_rate": 3.760063088227542e-06, + "logits/chosen": -1.2707054615020752, + "logits/rejected": -0.9608383178710938, + "logps/chosen": -476.09246826171875, + "logps/rejected": -822.6774291992188, + "loss": 0.108, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.23713941872119904, + "rewards/margins": 0.37951499223709106, + "rewards/rejected": -0.6166543960571289, + "step": 2990 + }, + { + "epoch": 0.4, + "learning_rate": 3.7500000000000005e-06, + "logits/chosen": -1.34781813621521, + "logits/rejected": -0.8298260569572449, + "logps/chosen": -511.88116455078125, + "logps/rejected": -827.2025146484375, + "loss": 0.1354, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.21970734000205994, + "rewards/margins": 0.35646185278892517, + "rewards/rejected": -0.5761691927909851, + "step": 3000 + }, + { + "epoch": 0.4, + "learning_rate": 3.739909834717356e-06, + "logits/chosen": -1.3226549625396729, + "logits/rejected": -0.7820941209793091, + "logps/chosen": -473.028564453125, + "logps/rejected": -815.1127319335938, + "loss": 0.1202, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.16715632379055023, + "rewards/margins": 0.38655251264572144, + "rewards/rejected": -0.5537087917327881, + "step": 3010 + }, + { + "epoch": 0.4, + "learning_rate": 3.7297928109491765e-06, + "logits/chosen": -1.3209315538406372, + "logits/rejected": -0.8163756132125854, + "logps/chosen": -460.5916442871094, + "logps/rejected": -885.9065551757812, + "loss": 0.1215, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.1943354308605194, + "rewards/margins": 0.4527612626552582, + "rewards/rejected": -0.6470966339111328, + "step": 3020 + }, + { + "epoch": 0.4, + "learning_rate": 3.7196491478468322e-06, + "logits/chosen": -1.1771209239959717, + "logits/rejected": -0.7845407128334045, + "logps/chosen": -478.44390869140625, + "logps/rejected": -837.09521484375, + "loss": 0.1583, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.2497968226671219, + "rewards/margins": 0.34961241483688354, + "rewards/rejected": -0.5994092226028442, + "step": 3030 + }, + { + "epoch": 0.41, + "learning_rate": 3.7094790651387414e-06, + "logits/chosen": -1.425419807434082, + "logits/rejected": -0.9499618411064148, + "logps/chosen": -563.8905029296875, + "logps/rejected": -868.3553466796875, + "loss": 0.153, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.2709125578403473, + "rewards/margins": 0.30820900201797485, + "rewards/rejected": -0.5791215300559998, + "step": 3040 + }, + { + "epoch": 0.41, + "learning_rate": 3.699282783125616e-06, + "logits/chosen": -1.3669326305389404, + "logits/rejected": -0.837415874004364, + "logps/chosen": -629.0724487304688, + "logps/rejected": -925.4384765625, + "loss": 0.1291, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.2975293695926666, + "rewards/margins": 0.33974334597587585, + "rewards/rejected": -0.6372727155685425, + "step": 3050 + }, + { + "epoch": 0.41, + "learning_rate": 3.689060522675689e-06, + "logits/chosen": -1.4641399383544922, + "logits/rejected": -0.7488492727279663, + "logps/chosen": -615.8585205078125, + "logps/rejected": -912.4896240234375, + "loss": 0.1308, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.2836320400238037, + "rewards/margins": 0.3700219392776489, + "rewards/rejected": -0.6536539793014526, + "step": 3060 + }, + { + "epoch": 0.41, + "learning_rate": 3.6788125052199264e-06, + "logits/chosen": -1.3359787464141846, + "logits/rejected": -0.7459200024604797, + "logps/chosen": -444.7274475097656, + "logps/rejected": -821.0389404296875, + "loss": 0.1039, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.18269316852092743, + "rewards/margins": 0.4336867928504944, + "rewards/rejected": -0.6163799166679382, + "step": 3070 + }, + { + "epoch": 0.41, + "learning_rate": 3.668538952747236e-06, + "logits/chosen": -1.4183568954467773, + "logits/rejected": -0.9894925355911255, + "logps/chosen": -457.7557678222656, + "logps/rejected": -861.2824096679688, + "loss": 0.1165, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.21521393954753876, + "rewards/margins": 0.3878498077392578, + "rewards/rejected": -0.603063702583313, + "step": 3080 + }, + { + "epoch": 0.41, + "learning_rate": 3.658240087799655e-06, + "logits/chosen": -1.5412737131118774, + "logits/rejected": -0.9888244867324829, + "logps/chosen": -496.530517578125, + "logps/rejected": -874.2278442382812, + "loss": 0.1376, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.2602888345718384, + "rewards/margins": 0.362166166305542, + "rewards/rejected": -0.6224549412727356, + "step": 3090 + }, + { + "epoch": 0.41, + "learning_rate": 3.6479161334675294e-06, + "logits/chosen": -1.5275121927261353, + "logits/rejected": -0.8869367837905884, + "logps/chosen": -536.9756469726562, + "logps/rejected": -823.1558837890625, + "loss": 0.1431, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.2103598564863205, + "rewards/margins": 0.3759499192237854, + "rewards/rejected": -0.5863097906112671, + "step": 3100 + }, + { + "epoch": 0.41, + "learning_rate": 3.6375673133846847e-06, + "logits/chosen": -1.3907606601715088, + "logits/rejected": -0.9182275533676147, + "logps/chosen": -517.0655517578125, + "logps/rejected": -853.0217895507812, + "loss": 0.1194, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.22291526198387146, + "rewards/margins": 0.3772997558116913, + "rewards/rejected": -0.6002150774002075, + "step": 3110 + }, + { + "epoch": 0.42, + "learning_rate": 3.627193851723577e-06, + "logits/chosen": -1.3474422693252563, + "logits/rejected": -0.7908347845077515, + "logps/chosen": -425.14105224609375, + "logps/rejected": -707.5562744140625, + "loss": 0.2323, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.1527886688709259, + "rewards/margins": 0.3132486939430237, + "rewards/rejected": -0.4660373330116272, + "step": 3120 + }, + { + "epoch": 0.42, + "learning_rate": 3.616795973190442e-06, + "logits/chosen": -1.3398116827011108, + "logits/rejected": -0.9347062110900879, + "logps/chosen": -406.9312438964844, + "logps/rejected": -708.2562255859375, + "loss": 0.1497, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.18078385293483734, + "rewards/margins": 0.30927419662475586, + "rewards/rejected": -0.490058034658432, + "step": 3130 + }, + { + "epoch": 0.42, + "learning_rate": 3.6063739030204226e-06, + "logits/chosen": -1.6142152547836304, + "logits/rejected": -0.9039406776428223, + "logps/chosen": -384.0916748046875, + "logps/rejected": -773.7716064453125, + "loss": 0.1156, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.11609621345996857, + "rewards/margins": 0.4180026054382324, + "rewards/rejected": -0.534098744392395, + "step": 3140 + }, + { + "epoch": 0.42, + "learning_rate": 3.595927866972694e-06, + "logits/chosen": -1.4731504917144775, + "logits/rejected": -0.8567525148391724, + "logps/chosen": -488.36602783203125, + "logps/rejected": -888.5875854492188, + "loss": 0.1043, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.16218402981758118, + "rewards/margins": 0.41839224100112915, + "rewards/rejected": -0.5805762410163879, + "step": 3150 + }, + { + "epoch": 0.42, + "learning_rate": 3.5854580913255706e-06, + "logits/chosen": -1.484362244606018, + "logits/rejected": -0.9274988174438477, + "logps/chosen": -472.91107177734375, + "logps/rejected": -783.4113159179688, + "loss": 0.1497, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.14970546960830688, + "rewards/margins": 0.37639063596725464, + "rewards/rejected": -0.5260960459709167, + "step": 3160 + }, + { + "epoch": 0.42, + "learning_rate": 3.574964802871607e-06, + "logits/chosen": -1.385100245475769, + "logits/rejected": -0.8206901550292969, + "logps/chosen": -449.2620544433594, + "logps/rejected": -824.8053588867188, + "loss": 0.1342, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.19664952158927917, + "rewards/margins": 0.3465280532836914, + "rewards/rejected": -0.543177604675293, + "step": 3170 + }, + { + "epoch": 0.42, + "learning_rate": 3.564448228912682e-06, + "logits/chosen": -1.433672308921814, + "logits/rejected": -0.7420425415039062, + "logps/chosen": -449.03680419921875, + "logps/rejected": -821.3928833007812, + "loss": 0.1277, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.17572662234306335, + "rewards/margins": 0.4020700454711914, + "rewards/rejected": -0.5777965784072876, + "step": 3180 + }, + { + "epoch": 0.43, + "learning_rate": 3.5539085972550786e-06, + "logits/chosen": -1.5129320621490479, + "logits/rejected": -0.6774355173110962, + "logps/chosen": -465.81524658203125, + "logps/rejected": -860.1716918945312, + "loss": 0.0563, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -0.15053944289684296, + "rewards/margins": 0.453474760055542, + "rewards/rejected": -0.6040140986442566, + "step": 3190 + }, + { + "epoch": 0.43, + "learning_rate": 3.543346136204545e-06, + "logits/chosen": -1.5318454504013062, + "logits/rejected": -1.0314596891403198, + "logps/chosen": -443.3661193847656, + "logps/rejected": -740.8358764648438, + "loss": 0.1382, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.18502503633499146, + "rewards/margins": 0.36333030462265015, + "rewards/rejected": -0.5483554005622864, + "step": 3200 + }, + { + "epoch": 0.43, + "learning_rate": 3.532761074561355e-06, + "logits/chosen": -1.1741279363632202, + "logits/rejected": -0.5288771986961365, + "logps/chosen": -518.4847412109375, + "logps/rejected": -838.5154418945312, + "loss": 0.1252, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.20451363921165466, + "rewards/margins": 0.39652013778686523, + "rewards/rejected": -0.6010338068008423, + "step": 3210 + }, + { + "epoch": 0.43, + "learning_rate": 3.522153641615345e-06, + "logits/chosen": -1.1148918867111206, + "logits/rejected": -0.7859822511672974, + "logps/chosen": -379.1599426269531, + "logps/rejected": -732.1954345703125, + "loss": 0.1231, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.19221094250679016, + "rewards/margins": 0.33260732889175415, + "rewards/rejected": -0.5248182415962219, + "step": 3220 + }, + { + "epoch": 0.43, + "learning_rate": 3.5115240671409534e-06, + "logits/chosen": -1.4787633419036865, + "logits/rejected": -0.8908971548080444, + "logps/chosen": -522.7233276367188, + "logps/rejected": -811.7718505859375, + "loss": 0.136, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.2209792137145996, + "rewards/margins": 0.36088380217552185, + "rewards/rejected": -0.5818630456924438, + "step": 3230 + }, + { + "epoch": 0.43, + "learning_rate": 3.5008725813922383e-06, + "logits/chosen": -1.3831818103790283, + "logits/rejected": -0.928032398223877, + "logps/chosen": -450.38995361328125, + "logps/rejected": -843.4984130859375, + "loss": 0.0915, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.2006407231092453, + "rewards/margins": 0.36500436067581177, + "rewards/rejected": -0.5656450390815735, + "step": 3240 + }, + { + "epoch": 0.43, + "learning_rate": 3.4901994150978926e-06, + "logits/chosen": -1.2624738216400146, + "logits/rejected": -1.0254709720611572, + "logps/chosen": -492.21185302734375, + "logps/rejected": -830.1780395507812, + "loss": 0.1727, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.2421186864376068, + "rewards/margins": 0.3397548794746399, + "rewards/rejected": -0.5818734765052795, + "step": 3250 + }, + { + "epoch": 0.43, + "learning_rate": 3.4795047994562463e-06, + "logits/chosen": -1.327124834060669, + "logits/rejected": -0.8216224908828735, + "logps/chosen": -557.8870849609375, + "logps/rejected": -921.6619262695312, + "loss": 0.1273, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.26846641302108765, + "rewards/margins": 0.3692953884601593, + "rewards/rejected": -0.6377617716789246, + "step": 3260 + }, + { + "epoch": 0.44, + "learning_rate": 3.4687889661302577e-06, + "logits/chosen": -1.5927109718322754, + "logits/rejected": -1.05434250831604, + "logps/chosen": -528.978271484375, + "logps/rejected": -948.4773559570312, + "loss": 0.1141, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.2590687572956085, + "rewards/margins": 0.3986712098121643, + "rewards/rejected": -0.6577399969100952, + "step": 3270 + }, + { + "epoch": 0.44, + "learning_rate": 3.458052147242494e-06, + "logits/chosen": -1.6400234699249268, + "logits/rejected": -0.8933243751525879, + "logps/chosen": -522.9338989257812, + "logps/rejected": -923.7149658203125, + "loss": 0.0918, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.19308575987815857, + "rewards/margins": 0.486158549785614, + "rewards/rejected": -0.679244339466095, + "step": 3280 + }, + { + "epoch": 0.44, + "learning_rate": 3.4472945753701038e-06, + "logits/chosen": -1.5157047510147095, + "logits/rejected": -0.8758195042610168, + "logps/chosen": -530.8990478515625, + "logps/rejected": -964.5487060546875, + "loss": 0.0872, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.21965882182121277, + "rewards/margins": 0.4682764410972595, + "rewards/rejected": -0.6879353523254395, + "step": 3290 + }, + { + "epoch": 0.44, + "learning_rate": 3.436516483539781e-06, + "logits/chosen": -1.5231119394302368, + "logits/rejected": -1.2421231269836426, + "logps/chosen": -448.62506103515625, + "logps/rejected": -1045.0579833984375, + "loss": 0.0683, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.21021994948387146, + "rewards/margins": 0.49710339307785034, + "rewards/rejected": -0.707323431968689, + "step": 3300 + }, + { + "epoch": 0.44, + "learning_rate": 3.4257181052227133e-06, + "logits/chosen": -1.4369999170303345, + "logits/rejected": -0.8599227666854858, + "logps/chosen": -442.81646728515625, + "logps/rejected": -837.7467041015625, + "loss": 0.1153, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.20834989845752716, + "rewards/margins": 0.42062148451805115, + "rewards/rejected": -0.6289713382720947, + "step": 3310 + }, + { + "epoch": 0.44, + "learning_rate": 3.4148996743295305e-06, + "logits/chosen": -1.1891943216323853, + "logits/rejected": -0.7156860828399658, + "logps/chosen": -483.78857421875, + "logps/rejected": -823.6920166015625, + "loss": 0.1533, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.22720369696617126, + "rewards/margins": 0.39558321237564087, + "rewards/rejected": -0.6227868795394897, + "step": 3320 + }, + { + "epoch": 0.44, + "learning_rate": 3.4040614252052305e-06, + "logits/chosen": -1.3941460847854614, + "logits/rejected": -0.7187548875808716, + "logps/chosen": -565.5982666015625, + "logps/rejected": -822.6068115234375, + "loss": 0.1826, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.263577938079834, + "rewards/margins": 0.32034698128700256, + "rewards/rejected": -0.5839249491691589, + "step": 3330 + }, + { + "epoch": 0.45, + "learning_rate": 3.3932035926241103e-06, + "logits/chosen": -1.2055962085723877, + "logits/rejected": -0.8065207600593567, + "logps/chosen": -595.4679565429688, + "logps/rejected": -894.6494140625, + "loss": 0.2069, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.33704596757888794, + "rewards/margins": 0.32209089398384094, + "rewards/rejected": -0.6591368317604065, + "step": 3340 + }, + { + "epoch": 0.45, + "learning_rate": 3.3823264117846722e-06, + "logits/chosen": -1.1408950090408325, + "logits/rejected": -0.7288360595703125, + "logps/chosen": -504.2162170410156, + "logps/rejected": -865.4747314453125, + "loss": 0.1382, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.2324347048997879, + "rewards/margins": 0.37575414776802063, + "rewards/rejected": -0.608188807964325, + "step": 3350 + }, + { + "epoch": 0.45, + "learning_rate": 3.3714301183045382e-06, + "logits/chosen": -1.360996127128601, + "logits/rejected": -0.8717953562736511, + "logps/chosen": -476.2850036621094, + "logps/rejected": -867.3087768554688, + "loss": 0.0808, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.2431853711605072, + "rewards/margins": 0.39439740777015686, + "rewards/rejected": -0.6375828385353088, + "step": 3360 + }, + { + "epoch": 0.45, + "learning_rate": 3.360514948215339e-06, + "logits/chosen": -1.2623645067214966, + "logits/rejected": -1.0878899097442627, + "logps/chosen": -432.14813232421875, + "logps/rejected": -887.6476440429688, + "loss": 0.083, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.20338860154151917, + "rewards/margins": 0.41923999786376953, + "rewards/rejected": -0.6226286292076111, + "step": 3370 + }, + { + "epoch": 0.45, + "learning_rate": 3.349581137957604e-06, + "logits/chosen": -1.3514466285705566, + "logits/rejected": -0.8106681704521179, + "logps/chosen": -488.593994140625, + "logps/rejected": -883.98095703125, + "loss": 0.1353, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.23182758688926697, + "rewards/margins": 0.3952936828136444, + "rewards/rejected": -0.6271212697029114, + "step": 3380 + }, + { + "epoch": 0.45, + "learning_rate": 3.338628924375638e-06, + "logits/chosen": -1.448120355606079, + "logits/rejected": -0.6248105764389038, + "logps/chosen": -492.08331298828125, + "logps/rejected": -817.7333374023438, + "loss": 0.119, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.20928683876991272, + "rewards/margins": 0.4339142441749573, + "rewards/rejected": -0.6432010531425476, + "step": 3390 + }, + { + "epoch": 0.45, + "learning_rate": 3.3276585447123957e-06, + "logits/chosen": -1.4691401720046997, + "logits/rejected": -0.9449941515922546, + "logps/chosen": -499.2208557128906, + "logps/rejected": -860.2800903320312, + "loss": 0.1704, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.22295789420604706, + "rewards/margins": 0.38977617025375366, + "rewards/rejected": -0.6127340197563171, + "step": 3400 + }, + { + "epoch": 0.45, + "learning_rate": 3.3166702366043364e-06, + "logits/chosen": -1.6984446048736572, + "logits/rejected": -1.022908329963684, + "logps/chosen": -398.6534729003906, + "logps/rejected": -742.465576171875, + "loss": 0.1171, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.13367922604084015, + "rewards/margins": 0.40723562240600586, + "rewards/rejected": -0.54091477394104, + "step": 3410 + }, + { + "epoch": 0.46, + "learning_rate": 3.3056642380762783e-06, + "logits/chosen": -1.5041329860687256, + "logits/rejected": -0.7527793645858765, + "logps/chosen": -520.987060546875, + "logps/rejected": -816.6175537109375, + "loss": 0.1476, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.22410738468170166, + "rewards/margins": 0.37477830052375793, + "rewards/rejected": -0.5988856554031372, + "step": 3420 + }, + { + "epoch": 0.46, + "learning_rate": 3.294640787536245e-06, + "logits/chosen": -1.3957608938217163, + "logits/rejected": -0.8155180811882019, + "logps/chosen": -481.0667419433594, + "logps/rejected": -809.5614013671875, + "loss": 0.1274, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.18456891179084778, + "rewards/margins": 0.3828362822532654, + "rewards/rejected": -0.5674052238464355, + "step": 3430 + }, + { + "epoch": 0.46, + "learning_rate": 3.2836001237702993e-06, + "logits/chosen": -1.3207615613937378, + "logits/rejected": -0.7140559554100037, + "logps/chosen": -464.8232421875, + "logps/rejected": -858.9168090820312, + "loss": 0.1253, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.21229639649391174, + "rewards/margins": 0.41832393407821655, + "rewards/rejected": -0.6306203603744507, + "step": 3440 + }, + { + "epoch": 0.46, + "learning_rate": 3.272542485937369e-06, + "logits/chosen": -1.4193308353424072, + "logits/rejected": -0.9011315107345581, + "logps/chosen": -470.04986572265625, + "logps/rejected": -834.3380126953125, + "loss": 0.1232, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.2021806240081787, + "rewards/margins": 0.38284215331077576, + "rewards/rejected": -0.5850228071212769, + "step": 3450 + }, + { + "epoch": 0.46, + "learning_rate": 3.2614681135640696e-06, + "logits/chosen": -1.4956685304641724, + "logits/rejected": -1.192681074142456, + "logps/chosen": -351.14007568359375, + "logps/rejected": -777.5610961914062, + "loss": 0.1223, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.1393372118473053, + "rewards/margins": 0.39933890104293823, + "rewards/rejected": -0.5386761426925659, + "step": 3460 + }, + { + "epoch": 0.46, + "learning_rate": 3.2503772465395143e-06, + "logits/chosen": -1.298201084136963, + "logits/rejected": -1.021277666091919, + "logps/chosen": -355.12640380859375, + "logps/rejected": -815.8689575195312, + "loss": 0.1204, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.17074286937713623, + "rewards/margins": 0.39700907468795776, + "rewards/rejected": -0.5677520036697388, + "step": 3470 + }, + { + "epoch": 0.46, + "learning_rate": 3.2392701251101172e-06, + "logits/chosen": -1.5369417667388916, + "logits/rejected": -0.932148277759552, + "logps/chosen": -457.8705139160156, + "logps/rejected": -800.6802368164062, + "loss": 0.1309, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.17033250629901886, + "rewards/margins": 0.3913556933403015, + "rewards/rejected": -0.5616881847381592, + "step": 3480 + }, + { + "epoch": 0.47, + "learning_rate": 3.228146989874389e-06, + "logits/chosen": -1.6076122522354126, + "logits/rejected": -1.0611704587936401, + "logps/chosen": -472.25360107421875, + "logps/rejected": -867.3209228515625, + "loss": 0.1334, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.22368569672107697, + "rewards/margins": 0.3945960998535156, + "rewards/rejected": -0.6182817816734314, + "step": 3490 + }, + { + "epoch": 0.47, + "learning_rate": 3.217008081777726e-06, + "logits/chosen": -1.478785514831543, + "logits/rejected": -0.9631083607673645, + "logps/chosen": -506.24517822265625, + "logps/rejected": -775.35107421875, + "loss": 0.1945, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.24012240767478943, + "rewards/margins": 0.32278794050216675, + "rewards/rejected": -0.5629103183746338, + "step": 3500 + }, + { + "epoch": 0.47, + "learning_rate": 3.205853642107192e-06, + "logits/chosen": -1.480486512184143, + "logits/rejected": -1.117382287979126, + "logps/chosen": -494.24310302734375, + "logps/rejected": -832.8094482421875, + "loss": 0.1905, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.27982884645462036, + "rewards/margins": 0.32004794478416443, + "rewards/rejected": -0.5998767614364624, + "step": 3510 + }, + { + "epoch": 0.47, + "learning_rate": 3.1946839124862873e-06, + "logits/chosen": -1.1592427492141724, + "logits/rejected": -0.8009225726127625, + "logps/chosen": -427.3486328125, + "logps/rejected": -778.5949096679688, + "loss": 0.192, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.22615352272987366, + "rewards/margins": 0.3284720182418823, + "rewards/rejected": -0.5546255111694336, + "step": 3520 + }, + { + "epoch": 0.47, + "learning_rate": 3.183499134869721e-06, + "logits/chosen": -1.5271915197372437, + "logits/rejected": -0.893467128276825, + "logps/chosen": -505.61572265625, + "logps/rejected": -891.1272583007812, + "loss": 0.0979, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -0.21243330836296082, + "rewards/margins": 0.42251452803611755, + "rewards/rejected": -0.6349478960037231, + "step": 3530 + }, + { + "epoch": 0.47, + "learning_rate": 3.1722995515381644e-06, + "logits/chosen": -1.4152326583862305, + "logits/rejected": -0.8314677476882935, + "logps/chosen": -408.21337890625, + "logps/rejected": -841.8865966796875, + "loss": 0.1201, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.16699185967445374, + "rewards/margins": 0.4409754276275635, + "rewards/rejected": -0.6079672574996948, + "step": 3540 + }, + { + "epoch": 0.47, + "learning_rate": 3.1610854050930063e-06, + "logits/chosen": -1.2182379961013794, + "logits/rejected": -0.9724875688552856, + "logps/chosen": -522.8060302734375, + "logps/rejected": -934.7429809570312, + "loss": 0.1299, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.24052861332893372, + "rewards/margins": 0.3977331519126892, + "rewards/rejected": -0.6382617354393005, + "step": 3550 + }, + { + "epoch": 0.47, + "learning_rate": 3.149856938451094e-06, + "logits/chosen": -1.5642400979995728, + "logits/rejected": -1.1071844100952148, + "logps/chosen": -546.7676391601562, + "logps/rejected": -837.0343017578125, + "loss": 0.1513, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.2630346417427063, + "rewards/margins": 0.30036547780036926, + "rewards/rejected": -0.5634000897407532, + "step": 3560 + }, + { + "epoch": 0.48, + "learning_rate": 3.1386143948394764e-06, + "logits/chosen": -1.6900438070297241, + "logits/rejected": -1.0532042980194092, + "logps/chosen": -463.26873779296875, + "logps/rejected": -905.4603271484375, + "loss": 0.0941, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.1867888867855072, + "rewards/margins": 0.4671444892883301, + "rewards/rejected": -0.6539333462715149, + "step": 3570 + }, + { + "epoch": 0.48, + "learning_rate": 3.127358017790132e-06, + "logits/chosen": -1.4791187047958374, + "logits/rejected": -0.8488872647285461, + "logps/chosen": -542.0985717773438, + "logps/rejected": -987.3577270507812, + "loss": 0.1222, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.24738340079784393, + "rewards/margins": 0.47162240743637085, + "rewards/rejected": -0.719005823135376, + "step": 3580 + }, + { + "epoch": 0.48, + "learning_rate": 3.116088051134695e-06, + "logits/chosen": -1.496320128440857, + "logits/rejected": -0.7940270304679871, + "logps/chosen": -513.0885009765625, + "logps/rejected": -827.7974853515625, + "loss": 0.1197, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.2183947116136551, + "rewards/margins": 0.3765661120414734, + "rewards/rejected": -0.5949608087539673, + "step": 3590 + }, + { + "epoch": 0.48, + "learning_rate": 3.1048047389991693e-06, + "logits/chosen": -1.5090911388397217, + "logits/rejected": -0.9907795190811157, + "logps/chosen": -439.2784118652344, + "logps/rejected": -896.08203125, + "loss": 0.0892, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.19088810682296753, + "rewards/margins": 0.43076205253601074, + "rewards/rejected": -0.6216501593589783, + "step": 3600 + }, + { + "epoch": 0.48, + "learning_rate": 3.0935083257986493e-06, + "logits/chosen": -1.8225009441375732, + "logits/rejected": -1.1103214025497437, + "logps/chosen": -483.91375732421875, + "logps/rejected": -790.5393676757812, + "loss": 0.18, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.2046966850757599, + "rewards/margins": 0.34245455265045166, + "rewards/rejected": -0.5471512079238892, + "step": 3610 + }, + { + "epoch": 0.48, + "learning_rate": 3.082199056232015e-06, + "logits/chosen": -1.4404807090759277, + "logits/rejected": -0.9755549430847168, + "logps/chosen": -555.2987670898438, + "logps/rejected": -895.1951293945312, + "loss": 0.1219, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.27383953332901, + "rewards/margins": 0.34614747762680054, + "rewards/rejected": -0.6199870109558105, + "step": 3620 + }, + { + "epoch": 0.48, + "learning_rate": 3.0708771752766397e-06, + "logits/chosen": -1.5266478061676025, + "logits/rejected": -0.9134511947631836, + "logps/chosen": -450.92852783203125, + "logps/rejected": -807.119384765625, + "loss": 0.1357, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.17292854189872742, + "rewards/margins": 0.3673229515552521, + "rewards/rejected": -0.5402514338493347, + "step": 3630 + }, + { + "epoch": 0.49, + "learning_rate": 3.059542928183079e-06, + "logits/chosen": -1.5561821460723877, + "logits/rejected": -1.0397491455078125, + "logps/chosen": -460.45941162109375, + "logps/rejected": -793.2342529296875, + "loss": 0.1322, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.1769510954618454, + "rewards/margins": 0.3535122573375702, + "rewards/rejected": -0.5304633378982544, + "step": 3640 + }, + { + "epoch": 0.49, + "learning_rate": 3.0481965604697582e-06, + "logits/chosen": -1.623671531677246, + "logits/rejected": -0.8607115745544434, + "logps/chosen": -446.0001525878906, + "logps/rejected": -781.194091796875, + "loss": 0.1402, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.17367401719093323, + "rewards/margins": 0.3763401508331299, + "rewards/rejected": -0.5500141382217407, + "step": 3650 + }, + { + "epoch": 0.49, + "learning_rate": 3.0368383179176584e-06, + "logits/chosen": -1.7052555084228516, + "logits/rejected": -1.061753273010254, + "logps/chosen": -525.1064453125, + "logps/rejected": -799.73486328125, + "loss": 0.1289, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.17319978773593903, + "rewards/margins": 0.35249972343444824, + "rewards/rejected": -0.5256994962692261, + "step": 3660 + }, + { + "epoch": 0.49, + "learning_rate": 3.025468446564985e-06, + "logits/chosen": -1.3997642993927002, + "logits/rejected": -1.0487319231033325, + "logps/chosen": -529.3223876953125, + "logps/rejected": -924.0462646484375, + "loss": 0.0795, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.23997943103313446, + "rewards/margins": 0.42228931188583374, + "rewards/rejected": -0.6622687578201294, + "step": 3670 + }, + { + "epoch": 0.49, + "learning_rate": 3.0140871927018466e-06, + "logits/chosen": -1.5465452671051025, + "logits/rejected": -0.8040558695793152, + "logps/chosen": -551.8374633789062, + "logps/rejected": -901.1784057617188, + "loss": 0.1064, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.21528461575508118, + "rewards/margins": 0.4289511740207672, + "rewards/rejected": -0.6442357301712036, + "step": 3680 + }, + { + "epoch": 0.49, + "learning_rate": 3.002694802864912e-06, + "logits/chosen": -1.336032509803772, + "logits/rejected": -0.9531657099723816, + "logps/chosen": -409.7906188964844, + "logps/rejected": -796.7199096679688, + "loss": 0.1285, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.19792523980140686, + "rewards/margins": 0.40255865454673767, + "rewards/rejected": -0.6004839539527893, + "step": 3690 + }, + { + "epoch": 0.49, + "learning_rate": 2.9912915238320755e-06, + "logits/chosen": -1.6613883972167969, + "logits/rejected": -0.9797622561454773, + "logps/chosen": -476.36474609375, + "logps/rejected": -673.6729736328125, + "loss": 0.1785, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.1722622811794281, + "rewards/margins": 0.3274669051170349, + "rewards/rejected": -0.4997292160987854, + "step": 3700 + }, + { + "epoch": 0.49, + "learning_rate": 2.9798776026171087e-06, + "logits/chosen": -1.4321849346160889, + "logits/rejected": -0.8425230979919434, + "logps/chosen": -474.7699279785156, + "logps/rejected": -821.5621337890625, + "loss": 0.1192, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1794072985649109, + "rewards/margins": 0.40213608741760254, + "rewards/rejected": -0.5815433859825134, + "step": 3710 + }, + { + "epoch": 0.5, + "learning_rate": 2.9684532864643123e-06, + "logits/chosen": -1.423341989517212, + "logits/rejected": -0.8701097369194031, + "logps/chosen": -482.8060607910156, + "logps/rejected": -920.9959106445312, + "loss": 0.1074, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.191407710313797, + "rewards/margins": 0.44904837012290955, + "rewards/rejected": -0.6404560804367065, + "step": 3720 + }, + { + "epoch": 0.5, + "learning_rate": 2.957018822843154e-06, + "logits/chosen": -1.5194377899169922, + "logits/rejected": -0.7663905620574951, + "logps/chosen": -465.7847595214844, + "logps/rejected": -725.7530517578125, + "loss": 0.1096, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.1776033490896225, + "rewards/margins": 0.35331064462661743, + "rewards/rejected": -0.5309139490127563, + "step": 3730 + }, + { + "epoch": 0.5, + "learning_rate": 2.945574459442917e-06, + "logits/chosen": -1.799757957458496, + "logits/rejected": -0.9618920087814331, + "logps/chosen": -517.2938842773438, + "logps/rejected": -848.6304931640625, + "loss": 0.1334, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.17708583176136017, + "rewards/margins": 0.3980599045753479, + "rewards/rejected": -0.5751457214355469, + "step": 3740 + }, + { + "epoch": 0.5, + "learning_rate": 2.9341204441673267e-06, + "logits/chosen": -1.3543765544891357, + "logits/rejected": -0.9924432635307312, + "logps/chosen": -404.3013610839844, + "logps/rejected": -843.3447265625, + "loss": 0.079, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.1664157211780548, + "rewards/margins": 0.4492465555667877, + "rewards/rejected": -0.6156622171401978, + "step": 3750 + }, + { + "epoch": 0.5, + "learning_rate": 2.922657025129185e-06, + "logits/chosen": -1.5894160270690918, + "logits/rejected": -0.9104646444320679, + "logps/chosen": -456.28131103515625, + "logps/rejected": -912.9832763671875, + "loss": 0.0968, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.19846588373184204, + "rewards/margins": 0.45922431349754333, + "rewards/rejected": -0.6576902270317078, + "step": 3760 + }, + { + "epoch": 0.5, + "learning_rate": 2.9111844506449973e-06, + "logits/chosen": -1.4002254009246826, + "logits/rejected": -1.0213944911956787, + "logps/chosen": -473.43035888671875, + "logps/rejected": -841.4640502929688, + "loss": 0.1599, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.23086082935333252, + "rewards/margins": 0.3589969277381897, + "rewards/rejected": -0.5898576974868774, + "step": 3770 + }, + { + "epoch": 0.5, + "learning_rate": 2.8997029692295875e-06, + "logits/chosen": -1.4831597805023193, + "logits/rejected": -0.8599148988723755, + "logps/chosen": -390.19244384765625, + "logps/rejected": -815.3306884765625, + "loss": 0.1172, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.16593720018863678, + "rewards/margins": 0.4113589823246002, + "rewards/rejected": -0.5772961378097534, + "step": 3780 + }, + { + "epoch": 0.51, + "learning_rate": 2.888212829590719e-06, + "logits/chosen": -1.4598033428192139, + "logits/rejected": -0.924828052520752, + "logps/chosen": -414.89056396484375, + "logps/rejected": -792.7503662109375, + "loss": 0.1386, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.1876627504825592, + "rewards/margins": 0.38501837849617004, + "rewards/rejected": -0.5726811289787292, + "step": 3790 + }, + { + "epoch": 0.51, + "learning_rate": 2.876714280623708e-06, + "logits/chosen": -1.418028473854065, + "logits/rejected": -0.7986099123954773, + "logps/chosen": -550.6697998046875, + "logps/rejected": -891.5902099609375, + "loss": 0.153, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.2176857888698578, + "rewards/margins": 0.4177919328212738, + "rewards/rejected": -0.6354777812957764, + "step": 3800 + }, + { + "epoch": 0.51, + "learning_rate": 2.8652075714060296e-06, + "logits/chosen": -1.2953003644943237, + "logits/rejected": -0.7636483907699585, + "logps/chosen": -543.0831909179688, + "logps/rejected": -802.9742431640625, + "loss": 0.1486, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.3118182122707367, + "rewards/margins": 0.3155859410762787, + "rewards/rejected": -0.6274041533470154, + "step": 3810 + }, + { + "epoch": 0.51, + "learning_rate": 2.8536929511919227e-06, + "logits/chosen": -1.270843505859375, + "logits/rejected": -0.6340192556381226, + "logps/chosen": -525.5787963867188, + "logps/rejected": -820.7928466796875, + "loss": 0.1023, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.26634496450424194, + "rewards/margins": 0.3647981584072113, + "rewards/rejected": -0.6311432123184204, + "step": 3820 + }, + { + "epoch": 0.51, + "learning_rate": 2.842170669406993e-06, + "logits/chosen": -1.2666388750076294, + "logits/rejected": -0.5316375494003296, + "logps/chosen": -518.71044921875, + "logps/rejected": -888.7023315429688, + "loss": 0.0905, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.21681174635887146, + "rewards/margins": 0.4333477020263672, + "rewards/rejected": -0.6501595377922058, + "step": 3830 + }, + { + "epoch": 0.51, + "learning_rate": 2.8306409756428067e-06, + "logits/chosen": -1.432254433631897, + "logits/rejected": -0.826429545879364, + "logps/chosen": -476.503662109375, + "logps/rejected": -847.1607666015625, + "loss": 0.0783, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.2242472618818283, + "rewards/margins": 0.39870744943618774, + "rewards/rejected": -0.6229546666145325, + "step": 3840 + }, + { + "epoch": 0.51, + "learning_rate": 2.8191041196514874e-06, + "logits/chosen": -1.3423511981964111, + "logits/rejected": -0.8700442314147949, + "logps/chosen": -497.36370849609375, + "logps/rejected": -851.2994995117188, + "loss": 0.1482, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.26128625869750977, + "rewards/margins": 0.33478981256484985, + "rewards/rejected": -0.5960760712623596, + "step": 3850 + }, + { + "epoch": 0.51, + "learning_rate": 2.807560351340302e-06, + "logits/chosen": -1.5188156366348267, + "logits/rejected": -0.9726465940475464, + "logps/chosen": -368.9046325683594, + "logps/rejected": -713.8798828125, + "loss": 0.1383, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.15176090598106384, + "rewards/margins": 0.3572022020816803, + "rewards/rejected": -0.5089630484580994, + "step": 3860 + }, + { + "epoch": 0.52, + "learning_rate": 2.7960099207662535e-06, + "logits/chosen": -1.2664258480072021, + "logits/rejected": -0.7739121317863464, + "logps/chosen": -359.26123046875, + "logps/rejected": -743.7399291992188, + "loss": 0.1523, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.17123065888881683, + "rewards/margins": 0.3718946576118469, + "rewards/rejected": -0.5431252717971802, + "step": 3870 + }, + { + "epoch": 0.52, + "learning_rate": 2.7844530781306544e-06, + "logits/chosen": -1.3709384202957153, + "logits/rejected": -0.8558367490768433, + "logps/chosen": -428.074951171875, + "logps/rejected": -773.4097290039062, + "loss": 0.1323, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.1847999542951584, + "rewards/margins": 0.34443315863609314, + "rewards/rejected": -0.5292330980300903, + "step": 3880 + }, + { + "epoch": 0.52, + "learning_rate": 2.77289007377372e-06, + "logits/chosen": -1.3629367351531982, + "logits/rejected": -0.6934945583343506, + "logps/chosen": -459.5267639160156, + "logps/rejected": -874.0427856445312, + "loss": 0.1017, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.18202826380729675, + "rewards/margins": 0.4448707103729248, + "rewards/rejected": -0.626899003982544, + "step": 3890 + }, + { + "epoch": 0.52, + "learning_rate": 2.761321158169134e-06, + "logits/chosen": -1.4932496547698975, + "logits/rejected": -0.7716339826583862, + "logps/chosen": -508.470703125, + "logps/rejected": -864.6212158203125, + "loss": 0.1102, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.2065424621105194, + "rewards/margins": 0.4489240050315857, + "rewards/rejected": -0.6554665565490723, + "step": 3900 + }, + { + "epoch": 0.52, + "learning_rate": 2.749746581918629e-06, + "logits/chosen": -1.4111101627349854, + "logits/rejected": -0.871496856212616, + "logps/chosen": -451.28240966796875, + "logps/rejected": -913.0838012695312, + "loss": 0.0692, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.16033974289894104, + "rewards/margins": 0.4733968675136566, + "rewards/rejected": -0.6337365508079529, + "step": 3910 + }, + { + "epoch": 0.52, + "learning_rate": 2.738166595746554e-06, + "logits/chosen": -1.5585377216339111, + "logits/rejected": -0.8070343136787415, + "logps/chosen": -532.346435546875, + "logps/rejected": -916.8145751953125, + "loss": 0.1155, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.16915038228034973, + "rewards/margins": 0.45431309938430786, + "rewards/rejected": -0.62346351146698, + "step": 3920 + }, + { + "epoch": 0.52, + "learning_rate": 2.726581450494451e-06, + "logits/chosen": -1.497727632522583, + "logits/rejected": -0.9493281245231628, + "logps/chosen": -453.8994140625, + "logps/rejected": -849.1500244140625, + "loss": 0.1153, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -0.1854790449142456, + "rewards/margins": 0.4318001866340637, + "rewards/rejected": -0.6172792315483093, + "step": 3930 + }, + { + "epoch": 0.53, + "learning_rate": 2.7149913971156105e-06, + "logits/chosen": -1.0641905069351196, + "logits/rejected": -0.7125247716903687, + "logps/chosen": -440.34698486328125, + "logps/rejected": -811.2091674804688, + "loss": 0.118, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.19736629724502563, + "rewards/margins": 0.39964836835861206, + "rewards/rejected": -0.5970146059989929, + "step": 3940 + }, + { + "epoch": 0.53, + "learning_rate": 2.703396686669646e-06, + "logits/chosen": -1.396917700767517, + "logits/rejected": -1.0328669548034668, + "logps/chosen": -398.5068359375, + "logps/rejected": -766.6187744140625, + "loss": 0.1472, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.1600363403558731, + "rewards/margins": 0.3699353039264679, + "rewards/rejected": -0.5299716591835022, + "step": 3950 + }, + { + "epoch": 0.53, + "learning_rate": 2.6917975703170466e-06, + "logits/chosen": -1.546156406402588, + "logits/rejected": -0.8876574635505676, + "logps/chosen": -481.607666015625, + "logps/rejected": -790.7122192382812, + "loss": 0.1302, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.20209841430187225, + "rewards/margins": 0.36280348896980286, + "rewards/rejected": -0.5649019479751587, + "step": 3960 + }, + { + "epoch": 0.53, + "learning_rate": 2.6801942993137435e-06, + "logits/chosen": -1.3815232515335083, + "logits/rejected": -0.9472028613090515, + "logps/chosen": -370.59197998046875, + "logps/rejected": -752.8310546875, + "loss": 0.1158, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.1438537836074829, + "rewards/margins": 0.39828285574913025, + "rewards/rejected": -0.542136549949646, + "step": 3970 + }, + { + "epoch": 0.53, + "learning_rate": 2.668587125005663e-06, + "logits/chosen": -1.4644068479537964, + "logits/rejected": -0.8621865510940552, + "logps/chosen": -443.9444885253906, + "logps/rejected": -771.3038330078125, + "loss": 0.138, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.16472499072551727, + "rewards/margins": 0.38489940762519836, + "rewards/rejected": -0.5496243834495544, + "step": 3980 + }, + { + "epoch": 0.53, + "learning_rate": 2.6569762988232838e-06, + "logits/chosen": -1.2870821952819824, + "logits/rejected": -0.9376013875007629, + "logps/chosen": -398.32415771484375, + "logps/rejected": -729.791748046875, + "loss": 0.1378, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.18426913022994995, + "rewards/margins": 0.3460938334465027, + "rewards/rejected": -0.5303629040718079, + "step": 3990 + }, + { + "epoch": 0.53, + "learning_rate": 2.6453620722761897e-06, + "logits/chosen": -1.534701943397522, + "logits/rejected": -0.8690187335014343, + "logps/chosen": -446.68365478515625, + "logps/rejected": -857.0474853515625, + "loss": 0.1392, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.20550382137298584, + "rewards/margins": 0.41372913122177124, + "rewards/rejected": -0.6192329525947571, + "step": 4000 + }, + { + "epoch": 0.53, + "learning_rate": 2.6337446969476234e-06, + "logits/chosen": -1.697040319442749, + "logits/rejected": -1.0818397998809814, + "logps/chosen": -499.67626953125, + "logps/rejected": -910.4700927734375, + "loss": 0.1482, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.2038416862487793, + "rewards/margins": 0.41365185379981995, + "rewards/rejected": -0.6174935698509216, + "step": 4010 + }, + { + "epoch": 0.54, + "learning_rate": 2.6221244244890336e-06, + "logits/chosen": -1.2337590456008911, + "logits/rejected": -0.9159961938858032, + "logps/chosen": -573.4525146484375, + "logps/rejected": -832.6463012695312, + "loss": 0.1838, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.2931188642978668, + "rewards/margins": 0.27465683221817017, + "rewards/rejected": -0.5677756071090698, + "step": 4020 + }, + { + "epoch": 0.54, + "learning_rate": 2.6105015066146266e-06, + "logits/chosen": -1.4252393245697021, + "logits/rejected": -0.9289156198501587, + "logps/chosen": -476.4798889160156, + "logps/rejected": -786.7811889648438, + "loss": 0.1752, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.19843436777591705, + "rewards/margins": 0.3141877055168152, + "rewards/rejected": -0.512622058391571, + "step": 4030 + }, + { + "epoch": 0.54, + "learning_rate": 2.5988761950959133e-06, + "logits/chosen": -1.4496116638183594, + "logits/rejected": -1.0391935110092163, + "logps/chosen": -441.1336975097656, + "logps/rejected": -721.164794921875, + "loss": 0.179, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.22740332782268524, + "rewards/margins": 0.2868295907974243, + "rewards/rejected": -0.5142329931259155, + "step": 4040 + }, + { + "epoch": 0.54, + "learning_rate": 2.587248741756253e-06, + "logits/chosen": -1.5186278820037842, + "logits/rejected": -0.9992215037345886, + "logps/chosen": -456.5079040527344, + "logps/rejected": -799.9073486328125, + "loss": 0.1293, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.18669745326042175, + "rewards/margins": 0.3633093535900116, + "rewards/rejected": -0.5500068664550781, + "step": 4050 + }, + { + "epoch": 0.54, + "learning_rate": 2.575619398465402e-06, + "logits/chosen": -1.3673820495605469, + "logits/rejected": -0.8725525140762329, + "logps/chosen": -481.6690979003906, + "logps/rejected": -868.7838134765625, + "loss": 0.0755, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.17358894646167755, + "rewards/margins": 0.4075016975402832, + "rewards/rejected": -0.5810906291007996, + "step": 4060 + }, + { + "epoch": 0.54, + "learning_rate": 2.563988417134056e-06, + "logits/chosen": -1.3905086517333984, + "logits/rejected": -0.7997684478759766, + "logps/chosen": -513.9821166992188, + "logps/rejected": -743.789794921875, + "loss": 0.1757, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.1909521520137787, + "rewards/margins": 0.2950867712497711, + "rewards/rejected": -0.4860389828681946, + "step": 4070 + }, + { + "epoch": 0.54, + "learning_rate": 2.5523560497083927e-06, + "logits/chosen": -1.5381324291229248, + "logits/rejected": -1.2439024448394775, + "logps/chosen": -407.6680603027344, + "logps/rejected": -733.21337890625, + "loss": 0.158, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.16457518935203552, + "rewards/margins": 0.3094017505645752, + "rewards/rejected": -0.4739769399166107, + "step": 4080 + }, + { + "epoch": 0.55, + "learning_rate": 2.5407225481646146e-06, + "logits/chosen": -1.261339545249939, + "logits/rejected": -0.8529649972915649, + "logps/chosen": -363.42462158203125, + "logps/rejected": -758.0584716796875, + "loss": 0.1897, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.16312359273433685, + "rewards/margins": 0.36931270360946655, + "rewards/rejected": -0.5324362516403198, + "step": 4090 + }, + { + "epoch": 0.55, + "learning_rate": 2.5290881645034932e-06, + "logits/chosen": -1.5052446126937866, + "logits/rejected": -1.0409244298934937, + "logps/chosen": -478.3084411621094, + "logps/rejected": -739.9015502929688, + "loss": 0.1518, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.20306257903575897, + "rewards/margins": 0.3137751519680023, + "rewards/rejected": -0.5168377161026001, + "step": 4100 + }, + { + "epoch": 0.55, + "learning_rate": 2.517453150744904e-06, + "logits/chosen": -1.4215971231460571, + "logits/rejected": -0.9293550252914429, + "logps/chosen": -375.1071472167969, + "logps/rejected": -751.5859375, + "loss": 0.1263, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.1632181853055954, + "rewards/margins": 0.36968666315078735, + "rewards/rejected": -0.5329049229621887, + "step": 4110 + }, + { + "epoch": 0.55, + "learning_rate": 2.5058177589223766e-06, + "logits/chosen": -1.3039724826812744, + "logits/rejected": -0.7687618732452393, + "logps/chosen": -522.455810546875, + "logps/rejected": -841.9293212890625, + "loss": 0.1155, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.2402358055114746, + "rewards/margins": 0.35761570930480957, + "rewards/rejected": -0.597851574420929, + "step": 4120 + }, + { + "epoch": 0.55, + "learning_rate": 2.4941822410776247e-06, + "logits/chosen": -1.4730857610702515, + "logits/rejected": -0.8496094942092896, + "logps/chosen": -429.6819763183594, + "logps/rejected": -800.4998779296875, + "loss": 0.1311, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.16971971094608307, + "rewards/margins": 0.39771103858947754, + "rewards/rejected": -0.5674307346343994, + "step": 4130 + }, + { + "epoch": 0.55, + "learning_rate": 2.482546849255096e-06, + "logits/chosen": -1.4145101308822632, + "logits/rejected": -0.8954922556877136, + "logps/chosen": -546.86962890625, + "logps/rejected": -891.2218627929688, + "loss": 0.1549, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.229902982711792, + "rewards/margins": 0.3890032172203064, + "rewards/rejected": -0.6189061403274536, + "step": 4140 + }, + { + "epoch": 0.55, + "learning_rate": 2.470911835496508e-06, + "logits/chosen": -1.2700661420822144, + "logits/rejected": -0.9738311767578125, + "logps/chosen": -461.74658203125, + "logps/rejected": -822.8218994140625, + "loss": 0.1273, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.1996997594833374, + "rewards/margins": 0.3569287657737732, + "rewards/rejected": -0.5566284656524658, + "step": 4150 + }, + { + "epoch": 0.55, + "learning_rate": 2.4592774518353858e-06, + "logits/chosen": -1.367222547531128, + "logits/rejected": -0.9692174196243286, + "logps/chosen": -436.0655822753906, + "logps/rejected": -840.5070190429688, + "loss": 0.1205, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.21375672519207, + "rewards/margins": 0.4220011234283447, + "rewards/rejected": -0.6357578039169312, + "step": 4160 + }, + { + "epoch": 0.56, + "learning_rate": 2.447643950291608e-06, + "logits/chosen": -1.2921645641326904, + "logits/rejected": -0.7501717805862427, + "logps/chosen": -484.4534606933594, + "logps/rejected": -823.6820068359375, + "loss": 0.1049, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.1951848715543747, + "rewards/margins": 0.39403340220451355, + "rewards/rejected": -0.589218258857727, + "step": 4170 + }, + { + "epoch": 0.56, + "learning_rate": 2.436011582865945e-06, + "logits/chosen": -1.4127392768859863, + "logits/rejected": -0.9878486394882202, + "logps/chosen": -420.36163330078125, + "logps/rejected": -847.9583740234375, + "loss": 0.1344, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.1866758018732071, + "rewards/margins": 0.38426584005355835, + "rewards/rejected": -0.5709416270256042, + "step": 4180 + }, + { + "epoch": 0.56, + "learning_rate": 2.4243806015345988e-06, + "logits/chosen": -1.2964539527893066, + "logits/rejected": -0.8136736750602722, + "logps/chosen": -385.2383117675781, + "logps/rejected": -769.1781005859375, + "loss": 0.0946, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.16683003306388855, + "rewards/margins": 0.4348595142364502, + "rewards/rejected": -0.6016895174980164, + "step": 4190 + }, + { + "epoch": 0.56, + "learning_rate": 2.4127512582437486e-06, + "logits/chosen": -1.497622013092041, + "logits/rejected": -0.8333326578140259, + "logps/chosen": -465.08026123046875, + "logps/rejected": -788.7073974609375, + "loss": 0.0977, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.14578023552894592, + "rewards/margins": 0.41077151894569397, + "rewards/rejected": -0.5565518140792847, + "step": 4200 + }, + { + "epoch": 0.56, + "learning_rate": 2.4011238049040875e-06, + "logits/chosen": -1.2194325923919678, + "logits/rejected": -0.9886236190795898, + "logps/chosen": -443.2428283691406, + "logps/rejected": -905.9352416992188, + "loss": 0.1673, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.22071197628974915, + "rewards/margins": 0.39461037516593933, + "rewards/rejected": -0.6153223514556885, + "step": 4210 + }, + { + "epoch": 0.56, + "learning_rate": 2.3894984933853734e-06, + "logits/chosen": -1.3924311399459839, + "logits/rejected": -0.9971855878829956, + "logps/chosen": -450.23785400390625, + "logps/rejected": -876.2208251953125, + "loss": 0.0855, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.19274960458278656, + "rewards/margins": 0.4028767943382263, + "rewards/rejected": -0.5956264138221741, + "step": 4220 + }, + { + "epoch": 0.56, + "learning_rate": 2.377875575510967e-06, + "logits/chosen": -1.389924168586731, + "logits/rejected": -0.8687151074409485, + "logps/chosen": -508.93048095703125, + "logps/rejected": -852.8839721679688, + "loss": 0.1456, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.2281205952167511, + "rewards/margins": 0.3864901065826416, + "rewards/rejected": -0.6146107316017151, + "step": 4230 + }, + { + "epoch": 0.57, + "learning_rate": 2.366255303052377e-06, + "logits/chosen": -1.2372336387634277, + "logits/rejected": -0.7575851678848267, + "logps/chosen": -546.502197265625, + "logps/rejected": -838.81640625, + "loss": 0.191, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.2502215504646301, + "rewards/margins": 0.33994418382644653, + "rewards/rejected": -0.5901657342910767, + "step": 4240 + }, + { + "epoch": 0.57, + "learning_rate": 2.3546379277238107e-06, + "logits/chosen": -1.2474725246429443, + "logits/rejected": -0.8925794363021851, + "logps/chosen": -535.6268310546875, + "logps/rejected": -912.724609375, + "loss": 0.1311, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.2528509497642517, + "rewards/margins": 0.38632652163505554, + "rewards/rejected": -0.6391774415969849, + "step": 4250 + }, + { + "epoch": 0.57, + "learning_rate": 2.3430237011767166e-06, + "logits/chosen": -1.0119497776031494, + "logits/rejected": -0.8262157440185547, + "logps/chosen": -419.077392578125, + "logps/rejected": -869.7803955078125, + "loss": 0.1629, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.2351589947938919, + "rewards/margins": 0.36828702688217163, + "rewards/rejected": -0.6034461259841919, + "step": 4260 + }, + { + "epoch": 0.57, + "learning_rate": 2.3314128749943376e-06, + "logits/chosen": -1.3086574077606201, + "logits/rejected": -0.7333884239196777, + "logps/chosen": -440.7137145996094, + "logps/rejected": -882.5616455078125, + "loss": 0.1, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.16451126337051392, + "rewards/margins": 0.46316853165626526, + "rewards/rejected": -0.627679705619812, + "step": 4270 + }, + { + "epoch": 0.57, + "learning_rate": 2.319805700686257e-06, + "logits/chosen": -1.4417178630828857, + "logits/rejected": -0.9919298887252808, + "logps/chosen": -450.36883544921875, + "logps/rejected": -760.4411010742188, + "loss": 0.1301, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.20442244410514832, + "rewards/margins": 0.3279123604297638, + "rewards/rejected": -0.5323348045349121, + "step": 4280 + }, + { + "epoch": 0.57, + "learning_rate": 2.3082024296829538e-06, + "logits/chosen": -1.46039879322052, + "logits/rejected": -0.9283072352409363, + "logps/chosen": -419.25714111328125, + "logps/rejected": -750.0972290039062, + "loss": 0.1237, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.15519830584526062, + "rewards/margins": 0.3568214178085327, + "rewards/rejected": -0.5120197534561157, + "step": 4290 + }, + { + "epoch": 0.57, + "learning_rate": 2.296603313330355e-06, + "logits/chosen": -1.3774569034576416, + "logits/rejected": -0.7986065149307251, + "logps/chosen": -502.2596740722656, + "logps/rejected": -776.649658203125, + "loss": 0.1709, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.24051257967948914, + "rewards/margins": 0.33478790521621704, + "rewards/rejected": -0.5753005146980286, + "step": 4300 + }, + { + "epoch": 0.57, + "learning_rate": 2.2850086028843894e-06, + "logits/chosen": -1.1812444925308228, + "logits/rejected": -0.9695785641670227, + "logps/chosen": -589.576416015625, + "logps/rejected": -1001.6077880859375, + "loss": 0.1276, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -0.2969650626182556, + "rewards/margins": 0.3733198344707489, + "rewards/rejected": -0.6702848672866821, + "step": 4310 + }, + { + "epoch": 0.58, + "learning_rate": 2.2734185495055503e-06, + "logits/chosen": -1.3204107284545898, + "logits/rejected": -0.750912606716156, + "logps/chosen": -478.04534912109375, + "logps/rejected": -769.1867065429688, + "loss": 0.1379, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.18711444735527039, + "rewards/margins": 0.33850574493408203, + "rewards/rejected": -0.52562016248703, + "step": 4320 + }, + { + "epoch": 0.58, + "learning_rate": 2.2618334042534464e-06, + "logits/chosen": -1.3586585521697998, + "logits/rejected": -0.8808444142341614, + "logps/chosen": -494.4283142089844, + "logps/rejected": -857.0852661132812, + "loss": 0.117, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.20293231308460236, + "rewards/margins": 0.40129345655441284, + "rewards/rejected": -0.6042258143424988, + "step": 4330 + }, + { + "epoch": 0.58, + "learning_rate": 2.250253418081373e-06, + "logits/chosen": -1.2095056772232056, + "logits/rejected": -0.7229364514350891, + "logps/chosen": -462.7508850097656, + "logps/rejected": -860.0739135742188, + "loss": 0.1189, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1977497637271881, + "rewards/margins": 0.42359238862991333, + "rewards/rejected": -0.6213420629501343, + "step": 4340 + }, + { + "epoch": 0.58, + "learning_rate": 2.238678841830867e-06, + "logits/chosen": -1.38365638256073, + "logits/rejected": -0.6831182837486267, + "logps/chosen": -512.0778198242188, + "logps/rejected": -776.4503173828125, + "loss": 0.1178, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.20228242874145508, + "rewards/margins": 0.35464924573898315, + "rewards/rejected": -0.5569316744804382, + "step": 4350 + }, + { + "epoch": 0.58, + "learning_rate": 2.22710992622628e-06, + "logits/chosen": -1.2855836153030396, + "logits/rejected": -0.954262912273407, + "logps/chosen": -506.0787048339844, + "logps/rejected": -827.5216064453125, + "loss": 0.1135, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.22598835825920105, + "rewards/margins": 0.3631274998188019, + "rewards/rejected": -0.5891157984733582, + "step": 4360 + }, + { + "epoch": 0.58, + "learning_rate": 2.2155469218693464e-06, + "logits/chosen": -1.3684406280517578, + "logits/rejected": -0.729507327079773, + "logps/chosen": -547.8463134765625, + "logps/rejected": -814.87939453125, + "loss": 0.1016, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.20620174705982208, + "rewards/margins": 0.38247913122177124, + "rewards/rejected": -0.5886809229850769, + "step": 4370 + }, + { + "epoch": 0.58, + "learning_rate": 2.2039900792337477e-06, + "logits/chosen": -1.4247252941131592, + "logits/rejected": -0.8577558398246765, + "logps/chosen": -427.704833984375, + "logps/rejected": -700.4971923828125, + "loss": 0.1349, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.20636749267578125, + "rewards/margins": 0.31686440110206604, + "rewards/rejected": -0.5232318639755249, + "step": 4380 + }, + { + "epoch": 0.59, + "learning_rate": 2.192439648659699e-06, + "logits/chosen": -1.223595380783081, + "logits/rejected": -0.8186568021774292, + "logps/chosen": -466.29913330078125, + "logps/rejected": -889.1475830078125, + "loss": 0.0745, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.19520318508148193, + "rewards/margins": 0.4740225374698639, + "rewards/rejected": -0.6692256331443787, + "step": 4390 + }, + { + "epoch": 0.59, + "learning_rate": 2.1808958803485134e-06, + "logits/chosen": -1.2180200815200806, + "logits/rejected": -0.7487155199050903, + "logps/chosen": -400.9490966796875, + "logps/rejected": -729.8300170898438, + "loss": 0.1399, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.17819716036319733, + "rewards/margins": 0.3473852276802063, + "rewards/rejected": -0.5255824327468872, + "step": 4400 + }, + { + "epoch": 0.59, + "learning_rate": 2.1693590243571937e-06, + "logits/chosen": -1.048194169998169, + "logits/rejected": -0.563649594783783, + "logps/chosen": -494.73480224609375, + "logps/rejected": -786.9961547851562, + "loss": 0.23, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.2585517466068268, + "rewards/margins": 0.3485848009586334, + "rewards/rejected": -0.6071365475654602, + "step": 4410 + }, + { + "epoch": 0.59, + "learning_rate": 2.157829330593008e-06, + "logits/chosen": -1.3244564533233643, + "logits/rejected": -0.7698614597320557, + "logps/chosen": -516.279541015625, + "logps/rejected": -893.1917114257812, + "loss": 0.1343, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.2608487904071808, + "rewards/margins": 0.39661940932273865, + "rewards/rejected": -0.6574681997299194, + "step": 4420 + }, + { + "epoch": 0.59, + "learning_rate": 2.1463070488080777e-06, + "logits/chosen": -1.1845453977584839, + "logits/rejected": -0.6892284154891968, + "logps/chosen": -483.90234375, + "logps/rejected": -813.65380859375, + "loss": 0.1597, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.22065512835979462, + "rewards/margins": 0.3821337819099426, + "rewards/rejected": -0.6027888655662537, + "step": 4430 + }, + { + "epoch": 0.59, + "learning_rate": 2.134792428593971e-06, + "logits/chosen": -1.0602244138717651, + "logits/rejected": -0.5278804898262024, + "logps/chosen": -568.511474609375, + "logps/rejected": -895.26171875, + "loss": 0.1174, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.28543621301651, + "rewards/margins": 0.3837021291255951, + "rewards/rejected": -0.6691383719444275, + "step": 4440 + }, + { + "epoch": 0.59, + "learning_rate": 2.1232857193762923e-06, + "logits/chosen": -1.5215568542480469, + "logits/rejected": -0.8021551966667175, + "logps/chosen": -510.27978515625, + "logps/rejected": -886.8944091796875, + "loss": 0.127, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.2042963057756424, + "rewards/margins": 0.4145194888114929, + "rewards/rejected": -0.6188157796859741, + "step": 4450 + }, + { + "epoch": 0.59, + "learning_rate": 2.1117871704092818e-06, + "logits/chosen": -1.3588248491287231, + "logits/rejected": -0.8943771123886108, + "logps/chosen": -516.7095947265625, + "logps/rejected": -807.9676513671875, + "loss": 0.1561, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.2409473955631256, + "rewards/margins": 0.33728843927383423, + "rewards/rejected": -0.5782358050346375, + "step": 4460 + }, + { + "epoch": 0.6, + "learning_rate": 2.1002970307704134e-06, + "logits/chosen": -1.5449442863464355, + "logits/rejected": -0.8462098240852356, + "logps/chosen": -463.8545837402344, + "logps/rejected": -761.9031982421875, + "loss": 0.1171, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.17875051498413086, + "rewards/margins": 0.39860549569129944, + "rewards/rejected": -0.5773560404777527, + "step": 4470 + }, + { + "epoch": 0.6, + "learning_rate": 2.0888155493550027e-06, + "logits/chosen": -1.2901697158813477, + "logits/rejected": -0.7369778156280518, + "logps/chosen": -500.56597900390625, + "logps/rejected": -905.2454223632812, + "loss": 0.1076, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.2221985161304474, + "rewards/margins": 0.390224426984787, + "rewards/rejected": -0.6124228835105896, + "step": 4480 + }, + { + "epoch": 0.6, + "learning_rate": 2.0773429748708153e-06, + "logits/chosen": -1.1163994073867798, + "logits/rejected": -0.7667258977890015, + "logps/chosen": -470.02362060546875, + "logps/rejected": -905.4534912109375, + "loss": 0.1087, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.21903569996356964, + "rewards/margins": 0.4150877892971039, + "rewards/rejected": -0.6341235637664795, + "step": 4490 + }, + { + "epoch": 0.6, + "learning_rate": 2.0658795558326745e-06, + "logits/chosen": -1.0330345630645752, + "logits/rejected": -0.8973774909973145, + "logps/chosen": -397.1127624511719, + "logps/rejected": -838.93994140625, + "loss": 0.1465, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.24607495963573456, + "rewards/margins": 0.3894239664077759, + "rewards/rejected": -0.6354988813400269, + "step": 4500 + }, + { + "epoch": 0.6, + "learning_rate": 2.0544255405570843e-06, + "logits/chosen": -1.3431646823883057, + "logits/rejected": -0.8504700660705566, + "logps/chosen": -544.9601440429688, + "logps/rejected": -852.1673583984375, + "loss": 0.1146, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.2619132399559021, + "rewards/margins": 0.3727183938026428, + "rewards/rejected": -0.6346316337585449, + "step": 4510 + }, + { + "epoch": 0.6, + "learning_rate": 2.0429811771568468e-06, + "logits/chosen": -1.120411992073059, + "logits/rejected": -0.5610482096672058, + "logps/chosen": -440.0519104003906, + "logps/rejected": -734.9476318359375, + "loss": 0.1452, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.2120869904756546, + "rewards/margins": 0.338589608669281, + "rewards/rejected": -0.5506765246391296, + "step": 4520 + }, + { + "epoch": 0.6, + "learning_rate": 2.031546713535688e-06, + "logits/chosen": -1.2549145221710205, + "logits/rejected": -0.6215404272079468, + "logps/chosen": -570.1345825195312, + "logps/rejected": -889.5558471679688, + "loss": 0.1513, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.2643107771873474, + "rewards/margins": 0.3893239498138428, + "rewards/rejected": -0.6536347270011902, + "step": 4530 + }, + { + "epoch": 0.61, + "learning_rate": 2.0201223973828917e-06, + "logits/chosen": -1.3017076253890991, + "logits/rejected": -0.9707794189453125, + "logps/chosen": -416.0333557128906, + "logps/rejected": -801.4716796875, + "loss": 0.1628, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.20128902792930603, + "rewards/margins": 0.36991968750953674, + "rewards/rejected": -0.571208655834198, + "step": 4540 + }, + { + "epoch": 0.61, + "learning_rate": 2.0087084761679245e-06, + "logits/chosen": -1.2727489471435547, + "logits/rejected": -0.5715342164039612, + "logps/chosen": -480.99822998046875, + "logps/rejected": -861.1604614257812, + "loss": 0.1005, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.19247964024543762, + "rewards/margins": 0.43824878334999084, + "rewards/rejected": -0.6307284235954285, + "step": 4550 + }, + { + "epoch": 0.61, + "learning_rate": 1.997305197135089e-06, + "logits/chosen": -1.3220107555389404, + "logits/rejected": -0.6351078748703003, + "logps/chosen": -524.84716796875, + "logps/rejected": -857.89404296875, + "loss": 0.1023, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.23643498122692108, + "rewards/margins": 0.38632455468177795, + "rewards/rejected": -0.6227595210075378, + "step": 4560 + }, + { + "epoch": 0.61, + "learning_rate": 1.985912807298154e-06, + "logits/chosen": -1.4880512952804565, + "logits/rejected": -0.8141192197799683, + "logps/chosen": -608.7986450195312, + "logps/rejected": -985.9041748046875, + "loss": 0.1401, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.2824017107486725, + "rewards/margins": 0.3986584544181824, + "rewards/rejected": -0.6810601353645325, + "step": 4570 + }, + { + "epoch": 0.61, + "learning_rate": 1.9745315534350157e-06, + "logits/chosen": -1.3150464296340942, + "logits/rejected": -0.8679197430610657, + "logps/chosen": -486.95196533203125, + "logps/rejected": -788.3603515625, + "loss": 0.1861, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.2451094686985016, + "rewards/margins": 0.332103431224823, + "rewards/rejected": -0.577212929725647, + "step": 4580 + }, + { + "epoch": 0.61, + "learning_rate": 1.963161682082342e-06, + "logits/chosen": -1.1732512712478638, + "logits/rejected": -0.8104772567749023, + "logps/chosen": -465.8495178222656, + "logps/rejected": -956.5872192382812, + "loss": 0.1354, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.2538664937019348, + "rewards/margins": 0.42716091871261597, + "rewards/rejected": -0.6810274720191956, + "step": 4590 + }, + { + "epoch": 0.61, + "learning_rate": 1.9518034395302413e-06, + "logits/chosen": -1.0655146837234497, + "logits/rejected": -0.7662220597267151, + "logps/chosen": -423.4452209472656, + "logps/rejected": -801.560546875, + "loss": 0.1413, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.19744530320167542, + "rewards/margins": 0.366693377494812, + "rewards/rejected": -0.5641387104988098, + "step": 4600 + }, + { + "epoch": 0.61, + "learning_rate": 1.940457071816922e-06, + "logits/chosen": -1.2536901235580444, + "logits/rejected": -0.6289219856262207, + "logps/chosen": -515.1942138671875, + "logps/rejected": -915.4596557617188, + "loss": 0.064, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.19910120964050293, + "rewards/margins": 0.49133628606796265, + "rewards/rejected": -0.6904375553131104, + "step": 4610 + }, + { + "epoch": 0.62, + "learning_rate": 1.9291228247233607e-06, + "logits/chosen": -1.217125654220581, + "logits/rejected": -0.5908175110816956, + "logps/chosen": -514.2653198242188, + "logps/rejected": -868.1808471679688, + "loss": 0.1088, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.20080474019050598, + "rewards/margins": 0.41219455003738403, + "rewards/rejected": -0.6129993200302124, + "step": 4620 + }, + { + "epoch": 0.62, + "learning_rate": 1.9178009437679855e-06, + "logits/chosen": -1.2478214502334595, + "logits/rejected": -0.9087751507759094, + "logps/chosen": -518.7520141601562, + "logps/rejected": -933.6763916015625, + "loss": 0.135, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.22716772556304932, + "rewards/margins": 0.3981955647468567, + "rewards/rejected": -0.6253632307052612, + "step": 4630 + }, + { + "epoch": 0.62, + "learning_rate": 1.9064916742013515e-06, + "logits/chosen": -1.1873037815093994, + "logits/rejected": -0.5726872086524963, + "logps/chosen": -490.1280212402344, + "logps/rejected": -839.7019653320312, + "loss": 0.123, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.21053406596183777, + "rewards/margins": 0.3976525366306305, + "rewards/rejected": -0.6081866025924683, + "step": 4640 + }, + { + "epoch": 0.62, + "learning_rate": 1.895195261000831e-06, + "logits/chosen": -1.1191716194152832, + "logits/rejected": -1.1251859664916992, + "logps/chosen": -393.98504638671875, + "logps/rejected": -694.9435424804688, + "loss": 0.1647, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.23374716937541962, + "rewards/margins": 0.2719922959804535, + "rewards/rejected": -0.5057394504547119, + "step": 4650 + }, + { + "epoch": 0.62, + "learning_rate": 1.883911948865306e-06, + "logits/chosen": -1.1896827220916748, + "logits/rejected": -0.735011100769043, + "logps/chosen": -502.1512756347656, + "logps/rejected": -754.8998413085938, + "loss": 0.1682, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.21102912724018097, + "rewards/margins": 0.3427314758300781, + "rewards/rejected": -0.5537605881690979, + "step": 4660 + }, + { + "epoch": 0.62, + "learning_rate": 1.872641982209868e-06, + "logits/chosen": -1.2366998195648193, + "logits/rejected": -0.6492923498153687, + "logps/chosen": -550.7481689453125, + "logps/rejected": -848.9842529296875, + "loss": 0.1283, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.24898889660835266, + "rewards/margins": 0.3708731234073639, + "rewards/rejected": -0.6198620200157166, + "step": 4670 + }, + { + "epoch": 0.62, + "learning_rate": 1.8613856051605242e-06, + "logits/chosen": -1.2035579681396484, + "logits/rejected": -0.6728922128677368, + "logps/chosen": -477.9755859375, + "logps/rejected": -786.3551025390625, + "loss": 0.1105, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.19650466740131378, + "rewards/margins": 0.3582767844200134, + "rewards/rejected": -0.554781436920166, + "step": 4680 + }, + { + "epoch": 0.63, + "learning_rate": 1.850143061548907e-06, + "logits/chosen": -1.2262418270111084, + "logits/rejected": -0.9310606122016907, + "logps/chosen": -471.4136657714844, + "logps/rejected": -890.5929565429688, + "loss": 0.1287, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.2113111913204193, + "rewards/margins": 0.4012996554374695, + "rewards/rejected": -0.612610936164856, + "step": 4690 + }, + { + "epoch": 0.63, + "learning_rate": 1.8389145949069953e-06, + "logits/chosen": -1.2006455659866333, + "logits/rejected": -0.9293171167373657, + "logps/chosen": -377.11248779296875, + "logps/rejected": -778.6986083984375, + "loss": 0.1215, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.16506408154964447, + "rewards/margins": 0.37131091952323914, + "rewards/rejected": -0.5363749861717224, + "step": 4700 + }, + { + "epoch": 0.63, + "learning_rate": 1.827700448461836e-06, + "logits/chosen": -1.4228752851486206, + "logits/rejected": -1.103596568107605, + "logps/chosen": -459.54412841796875, + "logps/rejected": -877.4918823242188, + "loss": 0.1699, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.2065921276807785, + "rewards/margins": 0.36977314949035645, + "rewards/rejected": -0.5763653516769409, + "step": 4710 + }, + { + "epoch": 0.63, + "learning_rate": 1.816500865130279e-06, + "logits/chosen": -1.1741989850997925, + "logits/rejected": -0.665132999420166, + "logps/chosen": -459.66094970703125, + "logps/rejected": -793.422607421875, + "loss": 0.125, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.19809575378894806, + "rewards/margins": 0.3923488259315491, + "rewards/rejected": -0.5904445648193359, + "step": 4720 + }, + { + "epoch": 0.63, + "learning_rate": 1.8053160875137137e-06, + "logits/chosen": -1.3960039615631104, + "logits/rejected": -0.9243966341018677, + "logps/chosen": -489.2864685058594, + "logps/rejected": -757.47998046875, + "loss": 0.206, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.23237042129039764, + "rewards/margins": 0.3222199082374573, + "rewards/rejected": -0.5545903444290161, + "step": 4730 + }, + { + "epoch": 0.63, + "learning_rate": 1.7941463578928088e-06, + "logits/chosen": -1.2899222373962402, + "logits/rejected": -0.9475260972976685, + "logps/chosen": -479.715087890625, + "logps/rejected": -835.7548828125, + "loss": 0.1082, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.22872710227966309, + "rewards/margins": 0.37602663040161133, + "rewards/rejected": -0.6047536730766296, + "step": 4740 + }, + { + "epoch": 0.63, + "learning_rate": 1.7829919182222752e-06, + "logits/chosen": -1.4998255968093872, + "logits/rejected": -1.0836795568466187, + "logps/chosen": -425.07470703125, + "logps/rejected": -870.9599609375, + "loss": 0.0733, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.16609852015972137, + "rewards/margins": 0.43407559394836426, + "rewards/rejected": -0.6001741290092468, + "step": 4750 + }, + { + "epoch": 0.63, + "learning_rate": 1.7718530101256115e-06, + "logits/chosen": -1.3878366947174072, + "logits/rejected": -0.692621111869812, + "logps/chosen": -577.058349609375, + "logps/rejected": -932.189453125, + "loss": 0.126, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.23522964119911194, + "rewards/margins": 0.3958871364593506, + "rewards/rejected": -0.6311167478561401, + "step": 4760 + }, + { + "epoch": 0.64, + "learning_rate": 1.7607298748898844e-06, + "logits/chosen": -1.5390503406524658, + "logits/rejected": -0.9261069297790527, + "logps/chosen": -451.36297607421875, + "logps/rejected": -787.5709228515625, + "loss": 0.1081, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.16683024168014526, + "rewards/margins": 0.412998765707016, + "rewards/rejected": -0.5798289775848389, + "step": 4770 + }, + { + "epoch": 0.64, + "learning_rate": 1.7496227534604859e-06, + "logits/chosen": -1.3134208917617798, + "logits/rejected": -0.7826110124588013, + "logps/chosen": -433.45196533203125, + "logps/rejected": -854.2193603515625, + "loss": 0.1041, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.1616908758878708, + "rewards/margins": 0.4370441436767578, + "rewards/rejected": -0.5987350344657898, + "step": 4780 + }, + { + "epoch": 0.64, + "learning_rate": 1.7385318864359304e-06, + "logits/chosen": -1.5158641338348389, + "logits/rejected": -0.883456826210022, + "logps/chosen": -431.32720947265625, + "logps/rejected": -782.7167358398438, + "loss": 0.1139, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.14380015432834625, + "rewards/margins": 0.3827371895313263, + "rewards/rejected": -0.526537299156189, + "step": 4790 + }, + { + "epoch": 0.64, + "learning_rate": 1.7274575140626318e-06, + "logits/chosen": -1.2721059322357178, + "logits/rejected": -1.0025657415390015, + "logps/chosen": -487.8551330566406, + "logps/rejected": -818.991943359375, + "loss": 0.1359, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.19590890407562256, + "rewards/margins": 0.3537582457065582, + "rewards/rejected": -0.5496671795845032, + "step": 4800 + }, + { + "epoch": 0.64, + "learning_rate": 1.7163998762297013e-06, + "logits/chosen": -1.4880658388137817, + "logits/rejected": -1.024325966835022, + "logps/chosen": -494.30218505859375, + "logps/rejected": -823.3046875, + "loss": 0.111, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.24541957676410675, + "rewards/margins": 0.3485862910747528, + "rewards/rejected": -0.5940058827400208, + "step": 4810 + }, + { + "epoch": 0.64, + "learning_rate": 1.7053592124637557e-06, + "logits/chosen": -1.3536102771759033, + "logits/rejected": -0.9674237370491028, + "logps/chosen": -489.28485107421875, + "logps/rejected": -835.2489013671875, + "loss": 0.1225, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.20638641715049744, + "rewards/margins": 0.3501652777194977, + "rewards/rejected": -0.5565517544746399, + "step": 4820 + }, + { + "epoch": 0.64, + "learning_rate": 1.6943357619237227e-06, + "logits/chosen": -1.513027310371399, + "logits/rejected": -0.980889618396759, + "logps/chosen": -482.6014709472656, + "logps/rejected": -868.0419921875, + "loss": 0.1163, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.1919030249118805, + "rewards/margins": 0.41016140580177307, + "rewards/rejected": -0.6020644307136536, + "step": 4830 + }, + { + "epoch": 0.65, + "learning_rate": 1.6833297633956647e-06, + "logits/chosen": -1.4574648141860962, + "logits/rejected": -0.8266223073005676, + "logps/chosen": -510.8272399902344, + "logps/rejected": -938.8359375, + "loss": 0.1105, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.22904086112976074, + "rewards/margins": 0.41577833890914917, + "rewards/rejected": -0.6448192596435547, + "step": 4840 + }, + { + "epoch": 0.65, + "learning_rate": 1.6723414552876052e-06, + "logits/chosen": -1.4703316688537598, + "logits/rejected": -1.021376371383667, + "logps/chosen": -444.19305419921875, + "logps/rejected": -775.9967041015625, + "loss": 0.1324, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.18805596232414246, + "rewards/margins": 0.34675851464271545, + "rewards/rejected": -0.5348144769668579, + "step": 4850 + }, + { + "epoch": 0.65, + "learning_rate": 1.661371075624363e-06, + "logits/chosen": -1.4893542528152466, + "logits/rejected": -0.9865673780441284, + "logps/chosen": -449.78662109375, + "logps/rejected": -830.7879638671875, + "loss": 0.1196, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.19270475208759308, + "rewards/margins": 0.38258641958236694, + "rewards/rejected": -0.575291097164154, + "step": 4860 + }, + { + "epoch": 0.65, + "learning_rate": 1.6504188620423977e-06, + "logits/chosen": -1.3864202499389648, + "logits/rejected": -0.7918799519538879, + "logps/chosen": -403.9468078613281, + "logps/rejected": -640.9031982421875, + "loss": 0.1656, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.14833372831344604, + "rewards/margins": 0.30061405897140503, + "rewards/rejected": -0.44894781708717346, + "step": 4870 + }, + { + "epoch": 0.65, + "learning_rate": 1.6394850517846621e-06, + "logits/chosen": -1.5351966619491577, + "logits/rejected": -0.7196094393730164, + "logps/chosen": -473.9374084472656, + "logps/rejected": -769.9342041015625, + "loss": 0.0773, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.18467064201831818, + "rewards/margins": 0.4058653712272644, + "rewards/rejected": -0.5905359983444214, + "step": 4880 + }, + { + "epoch": 0.65, + "learning_rate": 1.6285698816954626e-06, + "logits/chosen": -1.4006057977676392, + "logits/rejected": -0.8048819303512573, + "logps/chosen": -499.81365966796875, + "logps/rejected": -856.2640380859375, + "loss": 0.0872, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.20508036017417908, + "rewards/margins": 0.41568273305892944, + "rewards/rejected": -0.6207630038261414, + "step": 4890 + }, + { + "epoch": 0.65, + "learning_rate": 1.6176735882153284e-06, + "logits/chosen": -1.6421102285385132, + "logits/rejected": -1.0298190116882324, + "logps/chosen": -520.8504028320312, + "logps/rejected": -850.0789184570312, + "loss": 0.1412, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.2191186249256134, + "rewards/margins": 0.3911200165748596, + "rewards/rejected": -0.6102386713027954, + "step": 4900 + }, + { + "epoch": 0.65, + "learning_rate": 1.6067964073758901e-06, + "logits/chosen": -1.4560716152191162, + "logits/rejected": -0.935789942741394, + "logps/chosen": -456.36334228515625, + "logps/rejected": -800.3477783203125, + "loss": 0.1096, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.21652796864509583, + "rewards/margins": 0.3556264340877533, + "rewards/rejected": -0.5721544027328491, + "step": 4910 + }, + { + "epoch": 0.66, + "learning_rate": 1.5959385747947697e-06, + "logits/chosen": -1.4257352352142334, + "logits/rejected": -0.9093335270881653, + "logps/chosen": -542.5935668945312, + "logps/rejected": -819.0216674804688, + "loss": 0.1967, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.2606956362724304, + "rewards/margins": 0.33398741483688354, + "rewards/rejected": -0.594683051109314, + "step": 4920 + }, + { + "epoch": 0.66, + "learning_rate": 1.5851003256704697e-06, + "logits/chosen": -1.327136754989624, + "logits/rejected": -1.1365773677825928, + "logps/chosen": -542.2097778320312, + "logps/rejected": -977.5718994140625, + "loss": 0.0953, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.2794331908226013, + "rewards/margins": 0.3725283145904541, + "rewards/rejected": -0.6519615054130554, + "step": 4930 + }, + { + "epoch": 0.66, + "learning_rate": 1.5742818947772875e-06, + "logits/chosen": -1.4624805450439453, + "logits/rejected": -0.7566857933998108, + "logps/chosen": -510.5843200683594, + "logps/rejected": -880.57861328125, + "loss": 0.1229, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.20490141212940216, + "rewards/margins": 0.42348456382751465, + "rewards/rejected": -0.6283859610557556, + "step": 4940 + }, + { + "epoch": 0.66, + "learning_rate": 1.56348351646022e-06, + "logits/chosen": -1.6112909317016602, + "logits/rejected": -0.8507854342460632, + "logps/chosen": -470.6692810058594, + "logps/rejected": -842.4249267578125, + "loss": 0.1195, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.19470658898353577, + "rewards/margins": 0.410107284784317, + "rewards/rejected": -0.6048139333724976, + "step": 4950 + }, + { + "epoch": 0.66, + "learning_rate": 1.552705424629898e-06, + "logits/chosen": -1.4331148862838745, + "logits/rejected": -0.9108486175537109, + "logps/chosen": -457.1399841308594, + "logps/rejected": -811.8322143554688, + "loss": 0.119, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.20481880009174347, + "rewards/margins": 0.3736223578453064, + "rewards/rejected": -0.5784412026405334, + "step": 4960 + }, + { + "epoch": 0.66, + "learning_rate": 1.5419478527575068e-06, + "logits/chosen": -1.3583507537841797, + "logits/rejected": -0.9901837110519409, + "logps/chosen": -456.38592529296875, + "logps/rejected": -837.0637817382812, + "loss": 0.0921, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.23552127182483673, + "rewards/margins": 0.38225287199020386, + "rewards/rejected": -0.6177741289138794, + "step": 4970 + }, + { + "epoch": 0.66, + "learning_rate": 1.5312110338697427e-06, + "logits/chosen": -1.4094290733337402, + "logits/rejected": -0.9213676452636719, + "logps/chosen": -499.94049072265625, + "logps/rejected": -883.0730590820312, + "loss": 0.0928, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.20382937788963318, + "rewards/margins": 0.43424925208091736, + "rewards/rejected": -0.6380786299705505, + "step": 4980 + }, + { + "epoch": 0.67, + "learning_rate": 1.520495200543754e-06, + "logits/chosen": -1.2632607221603394, + "logits/rejected": -1.0204585790634155, + "logps/chosen": -477.03753662109375, + "logps/rejected": -916.0748291015625, + "loss": 0.1045, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.21876640617847443, + "rewards/margins": 0.3910064995288849, + "rewards/rejected": -0.6097728610038757, + "step": 4990 + }, + { + "epoch": 0.67, + "learning_rate": 1.509800584902108e-06, + "logits/chosen": -1.5550910234451294, + "logits/rejected": -0.9333616495132446, + "logps/chosen": -517.1362915039062, + "logps/rejected": -892.5686645507812, + "loss": 0.126, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.22209139168262482, + "rewards/margins": 0.4143398404121399, + "rewards/rejected": -0.6364312171936035, + "step": 5000 + }, + { + "epoch": 0.67, + "learning_rate": 1.4991274186077632e-06, + "logits/chosen": -1.3997657299041748, + "logits/rejected": -0.8548883199691772, + "logps/chosen": -460.355224609375, + "logps/rejected": -829.3341674804688, + "loss": 0.0926, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.19663289189338684, + "rewards/margins": 0.3977494239807129, + "rewards/rejected": -0.5943823456764221, + "step": 5010 + }, + { + "epoch": 0.67, + "learning_rate": 1.4884759328590476e-06, + "logits/chosen": -1.4501681327819824, + "logits/rejected": -1.0469526052474976, + "logps/chosen": -440.510986328125, + "logps/rejected": -780.1736450195312, + "loss": 0.2179, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.19426096975803375, + "rewards/margins": 0.3323975205421448, + "rewards/rejected": -0.5266584753990173, + "step": 5020 + }, + { + "epoch": 0.67, + "learning_rate": 1.4778463583846553e-06, + "logits/chosen": -1.473638892173767, + "logits/rejected": -0.9495469331741333, + "logps/chosen": -526.7481689453125, + "logps/rejected": -855.7936401367188, + "loss": 0.1462, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.2372969686985016, + "rewards/margins": 0.3921995759010315, + "rewards/rejected": -0.6294964551925659, + "step": 5030 + }, + { + "epoch": 0.67, + "learning_rate": 1.467238925438646e-06, + "logits/chosen": -1.5571715831756592, + "logits/rejected": -0.7043315172195435, + "logps/chosen": -527.9861450195312, + "logps/rejected": -847.37353515625, + "loss": 0.1089, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.22108444571495056, + "rewards/margins": 0.369198203086853, + "rewards/rejected": -0.5902826189994812, + "step": 5040 + }, + { + "epoch": 0.67, + "learning_rate": 1.4566538637954556e-06, + "logits/chosen": -1.6278702020645142, + "logits/rejected": -1.074779748916626, + "logps/chosen": -430.89447021484375, + "logps/rejected": -711.3259887695312, + "loss": 0.1342, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.16959848999977112, + "rewards/margins": 0.35060760378837585, + "rewards/rejected": -0.520206093788147, + "step": 5050 + }, + { + "epoch": 0.67, + "learning_rate": 1.446091402744923e-06, + "logits/chosen": -1.4020555019378662, + "logits/rejected": -0.918738067150116, + "logps/chosen": -445.22186279296875, + "logps/rejected": -847.0481567382812, + "loss": 0.1695, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.20678460597991943, + "rewards/margins": 0.39482367038726807, + "rewards/rejected": -0.6016082167625427, + "step": 5060 + }, + { + "epoch": 0.68, + "learning_rate": 1.4355517710873184e-06, + "logits/chosen": -1.3787460327148438, + "logits/rejected": -1.0521621704101562, + "logps/chosen": -427.92388916015625, + "logps/rejected": -887.6427612304688, + "loss": 0.05, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.2269415408372879, + "rewards/margins": 0.4265185296535492, + "rewards/rejected": -0.6534601449966431, + "step": 5070 + }, + { + "epoch": 0.68, + "learning_rate": 1.4250351971283937e-06, + "logits/chosen": -1.522359848022461, + "logits/rejected": -1.0820255279541016, + "logps/chosen": -524.0896606445312, + "logps/rejected": -844.29052734375, + "loss": 0.1148, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.27214130759239197, + "rewards/margins": 0.37310561537742615, + "rewards/rejected": -0.6452468633651733, + "step": 5080 + }, + { + "epoch": 0.68, + "learning_rate": 1.41454190867443e-06, + "logits/chosen": -1.4404693841934204, + "logits/rejected": -1.0126572847366333, + "logps/chosen": -495.394775390625, + "logps/rejected": -934.8951416015625, + "loss": 0.0975, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.21723541617393494, + "rewards/margins": 0.41770267486572266, + "rewards/rejected": -0.63493812084198, + "step": 5090 + }, + { + "epoch": 0.68, + "learning_rate": 1.4040721330273063e-06, + "logits/chosen": -1.4913583993911743, + "logits/rejected": -0.9799544215202332, + "logps/chosen": -535.7487182617188, + "logps/rejected": -877.2818603515625, + "loss": 0.0793, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.24695873260498047, + "rewards/margins": 0.3976723849773407, + "rewards/rejected": -0.6446312069892883, + "step": 5100 + }, + { + "epoch": 0.68, + "learning_rate": 1.3936260969795778e-06, + "logits/chosen": -1.2997585535049438, + "logits/rejected": -0.9756921529769897, + "logps/chosen": -443.191162109375, + "logps/rejected": -821.5947265625, + "loss": 0.1377, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.21109957993030548, + "rewards/margins": 0.3828147053718567, + "rewards/rejected": -0.593914270401001, + "step": 5110 + }, + { + "epoch": 0.68, + "learning_rate": 1.3832040268095589e-06, + "logits/chosen": -1.3984973430633545, + "logits/rejected": -0.8584139943122864, + "logps/chosen": -571.9281005859375, + "logps/rejected": -915.3826904296875, + "loss": 0.0942, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -0.2879997491836548, + "rewards/margins": 0.3574121594429016, + "rewards/rejected": -0.6454118490219116, + "step": 5120 + }, + { + "epoch": 0.68, + "learning_rate": 1.3728061482764238e-06, + "logits/chosen": -1.660528540611267, + "logits/rejected": -1.1849465370178223, + "logps/chosen": -456.5572814941406, + "logps/rejected": -729.951904296875, + "loss": 0.1009, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.19059523940086365, + "rewards/margins": 0.32504329085350037, + "rewards/rejected": -0.515638530254364, + "step": 5130 + }, + { + "epoch": 0.69, + "learning_rate": 1.362432686615316e-06, + "logits/chosen": -1.5981024503707886, + "logits/rejected": -1.0829527378082275, + "logps/chosen": -470.1607360839844, + "logps/rejected": -828.2596435546875, + "loss": 0.1321, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.21122093498706818, + "rewards/margins": 0.40773335099220276, + "rewards/rejected": -0.6189543008804321, + "step": 5140 + }, + { + "epoch": 0.69, + "learning_rate": 1.3520838665324704e-06, + "logits/chosen": -1.6276485919952393, + "logits/rejected": -1.209346055984497, + "logps/chosen": -420.7687072753906, + "logps/rejected": -750.0629272460938, + "loss": 0.1565, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.1963231861591339, + "rewards/margins": 0.3372722864151001, + "rewards/rejected": -0.5335954427719116, + "step": 5150 + }, + { + "epoch": 0.69, + "learning_rate": 1.3417599122003464e-06, + "logits/chosen": -1.6005207300186157, + "logits/rejected": -1.080200433731079, + "logps/chosen": -449.393310546875, + "logps/rejected": -822.0933837890625, + "loss": 0.1065, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.1929277628660202, + "rewards/margins": 0.4021962583065033, + "rewards/rejected": -0.5951240658760071, + "step": 5160 + }, + { + "epoch": 0.69, + "learning_rate": 1.3314610472527645e-06, + "logits/chosen": -1.6019260883331299, + "logits/rejected": -0.8302914500236511, + "logps/chosen": -595.9198608398438, + "logps/rejected": -919.8703002929688, + "loss": 0.138, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.25709301233291626, + "rewards/margins": 0.39729732275009155, + "rewards/rejected": -0.654390275478363, + "step": 5170 + }, + { + "epoch": 0.69, + "learning_rate": 1.3211874947800747e-06, + "logits/chosen": -1.7015514373779297, + "logits/rejected": -1.154908537864685, + "logps/chosen": -480.40728759765625, + "logps/rejected": -860.6213989257812, + "loss": 0.1151, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.18247373402118683, + "rewards/margins": 0.40979132056236267, + "rewards/rejected": -0.5922650694847107, + "step": 5180 + }, + { + "epoch": 0.69, + "learning_rate": 1.3109394773243117e-06, + "logits/chosen": -1.5750248432159424, + "logits/rejected": -0.8017935752868652, + "logps/chosen": -551.1357421875, + "logps/rejected": -933.3966674804688, + "loss": 0.1147, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.23447374999523163, + "rewards/margins": 0.4425485134124756, + "rewards/rejected": -0.6770222187042236, + "step": 5190 + }, + { + "epoch": 0.69, + "learning_rate": 1.3007172168743854e-06, + "logits/chosen": -1.486428141593933, + "logits/rejected": -1.04713773727417, + "logps/chosen": -513.3190307617188, + "logps/rejected": -883.0814208984375, + "loss": 0.104, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.2615116238594055, + "rewards/margins": 0.36889469623565674, + "rewards/rejected": -0.6304062604904175, + "step": 5200 + }, + { + "epoch": 0.69, + "learning_rate": 1.2905209348612596e-06, + "logits/chosen": -1.4805870056152344, + "logits/rejected": -0.8896188735961914, + "logps/chosen": -512.0989990234375, + "logps/rejected": -872.7048950195312, + "loss": 0.1018, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.17740336060523987, + "rewards/margins": 0.42626601457595825, + "rewards/rejected": -0.6036693453788757, + "step": 5210 + }, + { + "epoch": 0.7, + "learning_rate": 1.280350852153168e-06, + "logits/chosen": -1.410609245300293, + "logits/rejected": -1.0304107666015625, + "logps/chosen": -523.1416625976562, + "logps/rejected": -867.2039184570312, + "loss": 0.1127, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -0.1862618625164032, + "rewards/margins": 0.40114229917526245, + "rewards/rejected": -0.587404191493988, + "step": 5220 + }, + { + "epoch": 0.7, + "learning_rate": 1.2702071890508235e-06, + "logits/chosen": -1.3426989316940308, + "logits/rejected": -1.0067704916000366, + "logps/chosen": -518.0101318359375, + "logps/rejected": -770.6543579101562, + "loss": 0.1914, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.23667840659618378, + "rewards/margins": 0.2956293523311615, + "rewards/rejected": -0.5323077440261841, + "step": 5230 + }, + { + "epoch": 0.7, + "learning_rate": 1.260090165282645e-06, + "logits/chosen": -1.3418034315109253, + "logits/rejected": -0.6315348148345947, + "logps/chosen": -537.9312744140625, + "logps/rejected": -858.8912353515625, + "loss": 0.0913, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.26209238171577454, + "rewards/margins": 0.3787650465965271, + "rewards/rejected": -0.6408575177192688, + "step": 5240 + }, + { + "epoch": 0.7, + "learning_rate": 1.2500000000000007e-06, + "logits/chosen": -1.4278695583343506, + "logits/rejected": -0.9040371775627136, + "logps/chosen": -548.1932373046875, + "logps/rejected": -871.0941162109375, + "loss": 0.1098, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.26213568449020386, + "rewards/margins": 0.40073925256729126, + "rewards/rejected": -0.6628749370574951, + "step": 5250 + }, + { + "epoch": 0.7, + "learning_rate": 1.2399369117724582e-06, + "logits/chosen": -1.5840961933135986, + "logits/rejected": -0.7718688249588013, + "logps/chosen": -483.53863525390625, + "logps/rejected": -894.3240356445312, + "loss": 0.0678, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.18216930329799652, + "rewards/margins": 0.45045241713523865, + "rewards/rejected": -0.632621705532074, + "step": 5260 + }, + { + "epoch": 0.7, + "learning_rate": 1.2299011185830557e-06, + "logits/chosen": -1.483337640762329, + "logits/rejected": -1.0077427625656128, + "logps/chosen": -470.9730529785156, + "logps/rejected": -837.7032470703125, + "loss": 0.116, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.22441604733467102, + "rewards/margins": 0.3804159164428711, + "rewards/rejected": -0.6048319935798645, + "step": 5270 + }, + { + "epoch": 0.7, + "learning_rate": 1.2198928378235717e-06, + "logits/chosen": -1.6518104076385498, + "logits/rejected": -0.8416474461555481, + "logps/chosen": -490.2295837402344, + "logps/rejected": -727.6588134765625, + "loss": 0.16, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.21518249809741974, + "rewards/margins": 0.32688936591148376, + "rewards/rejected": -0.5420718789100647, + "step": 5280 + }, + { + "epoch": 0.71, + "learning_rate": 1.2099122862898214e-06, + "logits/chosen": -1.5851049423217773, + "logits/rejected": -0.9384450912475586, + "logps/chosen": -555.9940185546875, + "logps/rejected": -929.8065185546875, + "loss": 0.0947, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.1925552785396576, + "rewards/margins": 0.4194509983062744, + "rewards/rejected": -0.6120061874389648, + "step": 5290 + }, + { + "epoch": 0.71, + "learning_rate": 1.1999596801769617e-06, + "logits/chosen": -1.3924330472946167, + "logits/rejected": -0.8665302991867065, + "logps/chosen": -516.6383666992188, + "logps/rejected": -852.7140502929688, + "loss": 0.1274, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.214178204536438, + "rewards/margins": 0.37730246782302856, + "rewards/rejected": -0.5914807319641113, + "step": 5300 + }, + { + "epoch": 0.71, + "learning_rate": 1.1900352350748026e-06, + "logits/chosen": -1.4407775402069092, + "logits/rejected": -0.857082724571228, + "logps/chosen": -558.5343017578125, + "logps/rejected": -859.2302856445312, + "loss": 0.1271, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.18870045244693756, + "rewards/margins": 0.4341113567352295, + "rewards/rejected": -0.6228117942810059, + "step": 5310 + }, + { + "epoch": 0.71, + "learning_rate": 1.1801391659631423e-06, + "logits/chosen": -1.574696660041809, + "logits/rejected": -1.0441734790802002, + "logps/chosen": -433.48211669921875, + "logps/rejected": -792.7039794921875, + "loss": 0.1499, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.2097112238407135, + "rewards/margins": 0.3791065812110901, + "rewards/rejected": -0.5888177752494812, + "step": 5320 + }, + { + "epoch": 0.71, + "learning_rate": 1.170271687207106e-06, + "logits/chosen": -1.5611820220947266, + "logits/rejected": -0.946784496307373, + "logps/chosen": -499.36419677734375, + "logps/rejected": -817.3825073242188, + "loss": 0.1409, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.20473620295524597, + "rewards/margins": 0.3804923892021179, + "rewards/rejected": -0.5852286219596863, + "step": 5330 + }, + { + "epoch": 0.71, + "learning_rate": 1.160433012552508e-06, + "logits/chosen": -1.5461738109588623, + "logits/rejected": -0.9032590985298157, + "logps/chosen": -444.84552001953125, + "logps/rejected": -742.2452392578125, + "loss": 0.1085, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.18285486102104187, + "rewards/margins": 0.3640900254249573, + "rewards/rejected": -0.5469449162483215, + "step": 5340 + }, + { + "epoch": 0.71, + "learning_rate": 1.1506233551212186e-06, + "logits/chosen": -1.4107246398925781, + "logits/rejected": -0.6989453434944153, + "logps/chosen": -573.1099853515625, + "logps/rejected": -791.4630737304688, + "loss": 0.1497, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.23020067811012268, + "rewards/margins": 0.3681562840938568, + "rewards/rejected": -0.5983569025993347, + "step": 5350 + }, + { + "epoch": 0.71, + "learning_rate": 1.1408429274065418e-06, + "logits/chosen": -1.3513542413711548, + "logits/rejected": -0.8181467056274414, + "logps/chosen": -462.7870178222656, + "logps/rejected": -814.8450927734375, + "loss": 0.1171, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.1746109426021576, + "rewards/margins": 0.4220661520957947, + "rewards/rejected": -0.5966770648956299, + "step": 5360 + }, + { + "epoch": 0.72, + "learning_rate": 1.1310919412686248e-06, + "logits/chosen": -1.6254085302352905, + "logits/rejected": -0.8846661448478699, + "logps/chosen": -421.10516357421875, + "logps/rejected": -821.2198486328125, + "loss": 0.098, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.19411344826221466, + "rewards/margins": 0.4121677875518799, + "rewards/rejected": -0.6062811613082886, + "step": 5370 + }, + { + "epoch": 0.72, + "learning_rate": 1.1213706079298566e-06, + "logits/chosen": -1.5590204000473022, + "logits/rejected": -0.9682413935661316, + "logps/chosen": -486.61163330078125, + "logps/rejected": -739.8365478515625, + "loss": 0.178, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.19468192756175995, + "rewards/margins": 0.33544445037841797, + "rewards/rejected": -0.5301263928413391, + "step": 5380 + }, + { + "epoch": 0.72, + "learning_rate": 1.1116791379703032e-06, + "logits/chosen": -1.5661708116531372, + "logits/rejected": -0.9602205157279968, + "logps/chosen": -535.9977416992188, + "logps/rejected": -872.9022216796875, + "loss": 0.1211, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.19395512342453003, + "rewards/margins": 0.4040953516960144, + "rewards/rejected": -0.5980504751205444, + "step": 5390 + }, + { + "epoch": 0.72, + "learning_rate": 1.1020177413231334e-06, + "logits/chosen": -1.4590649604797363, + "logits/rejected": -0.8140028119087219, + "logps/chosen": -472.7320861816406, + "logps/rejected": -786.4627685546875, + "loss": 0.0836, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.18391984701156616, + "rewards/margins": 0.4102323651313782, + "rewards/rejected": -0.5941521525382996, + "step": 5400 + }, + { + "epoch": 0.72, + "learning_rate": 1.0923866272700845e-06, + "logits/chosen": -1.2792116403579712, + "logits/rejected": -1.075791597366333, + "logps/chosen": -443.6817932128906, + "logps/rejected": -858.3514404296875, + "loss": 0.1238, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.24240417778491974, + "rewards/margins": 0.37138718366622925, + "rewards/rejected": -0.6137913465499878, + "step": 5410 + }, + { + "epoch": 0.72, + "learning_rate": 1.0827860044369226e-06, + "logits/chosen": -1.5652382373809814, + "logits/rejected": -1.2125917673110962, + "logps/chosen": -389.00341796875, + "logps/rejected": -668.3732299804688, + "loss": 0.1302, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.18405522406101227, + "rewards/margins": 0.2935299277305603, + "rewards/rejected": -0.47758516669273376, + "step": 5420 + }, + { + "epoch": 0.72, + "learning_rate": 1.073216080788921e-06, + "logits/chosen": -1.6024821996688843, + "logits/rejected": -0.9295442700386047, + "logps/chosen": -467.8318786621094, + "logps/rejected": -864.23974609375, + "loss": 0.0997, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.14879059791564941, + "rewards/margins": 0.4352756142616272, + "rewards/rejected": -0.5840662717819214, + "step": 5430 + }, + { + "epoch": 0.73, + "learning_rate": 1.06367706362636e-06, + "logits/chosen": -1.5812941789627075, + "logits/rejected": -1.0601966381072998, + "logps/chosen": -469.12841796875, + "logps/rejected": -844.4415893554688, + "loss": 0.1249, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.173747256398201, + "rewards/margins": 0.39489811658859253, + "rewards/rejected": -0.5686453580856323, + "step": 5440 + }, + { + "epoch": 0.73, + "learning_rate": 1.0541691595800338e-06, + "logits/chosen": -1.6072155237197876, + "logits/rejected": -1.0490585565567017, + "logps/chosen": -494.46875, + "logps/rejected": -798.1419677734375, + "loss": 0.1041, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.2061028927564621, + "rewards/margins": 0.3626430332660675, + "rewards/rejected": -0.5687459111213684, + "step": 5450 + }, + { + "epoch": 0.73, + "learning_rate": 1.0446925746067768e-06, + "logits/chosen": -1.5910913944244385, + "logits/rejected": -0.9691600799560547, + "logps/chosen": -541.7664184570312, + "logps/rejected": -948.0553588867188, + "loss": 0.0943, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -0.19300617277622223, + "rewards/margins": 0.4319871962070465, + "rewards/rejected": -0.6249933838844299, + "step": 5460 + }, + { + "epoch": 0.73, + "learning_rate": 1.0352475139849993e-06, + "logits/chosen": -1.5177781581878662, + "logits/rejected": -1.020815134048462, + "logps/chosen": -509.0782775878906, + "logps/rejected": -837.248046875, + "loss": 0.1226, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.20897535979747772, + "rewards/margins": 0.39487597346305847, + "rewards/rejected": -0.6038513779640198, + "step": 5470 + }, + { + "epoch": 0.73, + "learning_rate": 1.0258341823102418e-06, + "logits/chosen": -1.4822075366973877, + "logits/rejected": -0.8729708790779114, + "logps/chosen": -532.9627685546875, + "logps/rejected": -851.3942260742188, + "loss": 0.1571, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.23945197463035583, + "rewards/margins": 0.39208975434303284, + "rewards/rejected": -0.6315417885780334, + "step": 5480 + }, + { + "epoch": 0.73, + "learning_rate": 1.0164527834907468e-06, + "logits/chosen": -1.3401305675506592, + "logits/rejected": -1.0187923908233643, + "logps/chosen": -385.40570068359375, + "logps/rejected": -800.2244262695312, + "loss": 0.1085, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.20979323983192444, + "rewards/margins": 0.3925233781337738, + "rewards/rejected": -0.602316677570343, + "step": 5490 + }, + { + "epoch": 0.73, + "learning_rate": 1.0071035207430352e-06, + "logits/chosen": -1.360961675643921, + "logits/rejected": -0.7782384157180786, + "logps/chosen": -457.4180603027344, + "logps/rejected": -876.7483520507812, + "loss": 0.1563, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.23960646986961365, + "rewards/margins": 0.390997052192688, + "rewards/rejected": -0.630603551864624, + "step": 5500 + }, + { + "epoch": 0.73, + "learning_rate": 9.977865965875091e-07, + "logits/chosen": -1.8332687616348267, + "logits/rejected": -1.1459457874298096, + "logps/chosen": -512.2830810546875, + "logps/rejected": -801.5711669921875, + "loss": 0.1433, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.22823330760002136, + "rewards/margins": 0.3319735825061798, + "rewards/rejected": -0.5602068901062012, + "step": 5510 + }, + { + "epoch": 0.74, + "learning_rate": 9.88502212844063e-07, + "logits/chosen": -1.341430902481079, + "logits/rejected": -0.9758291244506836, + "logps/chosen": -451.395751953125, + "logps/rejected": -721.2365112304688, + "loss": 0.1819, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.24484524130821228, + "rewards/margins": 0.29828450083732605, + "rewards/rejected": -0.5431298017501831, + "step": 5520 + }, + { + "epoch": 0.74, + "learning_rate": 9.792505706277136e-07, + "logits/chosen": -1.4987294673919678, + "logits/rejected": -0.7679599523544312, + "logps/chosen": -491.7013244628906, + "logps/rejected": -751.9393310546875, + "loss": 0.1781, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.2140645980834961, + "rewards/margins": 0.3486694395542145, + "rewards/rejected": -0.5627340078353882, + "step": 5530 + }, + { + "epoch": 0.74, + "learning_rate": 9.700318703442437e-07, + "logits/chosen": -1.4482524394989014, + "logits/rejected": -0.9931025505065918, + "logps/chosen": -497.1576232910156, + "logps/rejected": -871.8629150390625, + "loss": 0.0972, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -0.2180767059326172, + "rewards/margins": 0.40633684396743774, + "rewards/rejected": -0.6244135499000549, + "step": 5540 + }, + { + "epoch": 0.74, + "learning_rate": 9.608463116858544e-07, + "logits/chosen": -1.70029616355896, + "logits/rejected": -1.0156257152557373, + "logps/chosen": -471.2583923339844, + "logps/rejected": -860.1076049804688, + "loss": 0.0949, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.18707410991191864, + "rewards/margins": 0.43171149492263794, + "rewards/rejected": -0.618785560131073, + "step": 5550 + }, + { + "epoch": 0.74, + "learning_rate": 9.516940936268504e-07, + "logits/chosen": -1.4975395202636719, + "logits/rejected": -1.0617475509643555, + "logps/chosen": -485.7090759277344, + "logps/rejected": -802.9081420898438, + "loss": 0.1614, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.21952207386493683, + "rewards/margins": 0.34664005041122437, + "rewards/rejected": -0.566162109375, + "step": 5560 + }, + { + "epoch": 0.74, + "learning_rate": 9.4257541441932e-07, + "logits/chosen": -1.3621257543563843, + "logits/rejected": -0.8228029012680054, + "logps/chosen": -453.7808532714844, + "logps/rejected": -819.4066162109375, + "loss": 0.1126, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.16346648335456848, + "rewards/margins": 0.42321619391441345, + "rewards/rejected": -0.5866826772689819, + "step": 5570 + }, + { + "epoch": 0.74, + "learning_rate": 9.334904715888496e-07, + "logits/chosen": -1.4563817977905273, + "logits/rejected": -1.0051524639129639, + "logps/chosen": -491.6202697753906, + "logps/rejected": -865.3167724609375, + "loss": 0.1398, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.24924297630786896, + "rewards/margins": 0.35569971799850464, + "rewards/rejected": -0.6049426794052124, + "step": 5580 + }, + { + "epoch": 0.75, + "learning_rate": 9.244394619302338e-07, + "logits/chosen": -1.4319621324539185, + "logits/rejected": -0.9194442629814148, + "logps/chosen": -470.22174072265625, + "logps/rejected": -769.84326171875, + "loss": 0.184, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.1991579532623291, + "rewards/margins": 0.3460138738155365, + "rewards/rejected": -0.545171856880188, + "step": 5590 + }, + { + "epoch": 0.75, + "learning_rate": 9.154225815032242e-07, + "logits/chosen": -1.3754334449768066, + "logits/rejected": -0.8846775889396667, + "logps/chosen": -550.1680908203125, + "logps/rejected": -831.501953125, + "loss": 0.1243, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.26152288913726807, + "rewards/margins": 0.35007160902023315, + "rewards/rejected": -0.6115944981575012, + "step": 5600 + }, + { + "epoch": 0.75, + "learning_rate": 9.064400256282757e-07, + "logits/chosen": -1.5064082145690918, + "logits/rejected": -0.9161268472671509, + "logps/chosen": -398.8093566894531, + "logps/rejected": -654.99072265625, + "loss": 0.1648, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.1780550479888916, + "rewards/margins": 0.298201322555542, + "rewards/rejected": -0.4762563109397888, + "step": 5610 + }, + { + "epoch": 0.75, + "learning_rate": 8.974919888823164e-07, + "logits/chosen": -1.4121335744857788, + "logits/rejected": -0.794890820980072, + "logps/chosen": -460.91998291015625, + "logps/rejected": -876.2742309570312, + "loss": 0.1196, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.1762869507074356, + "rewards/margins": 0.45224347710609436, + "rewards/rejected": -0.6285303831100464, + "step": 5620 + }, + { + "epoch": 0.75, + "learning_rate": 8.885786650945333e-07, + "logits/chosen": -1.3726732730865479, + "logits/rejected": -0.8681763410568237, + "logps/chosen": -510.92529296875, + "logps/rejected": -892.0538940429688, + "loss": 0.1079, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.23853686451911926, + "rewards/margins": 0.363789826631546, + "rewards/rejected": -0.6023266911506653, + "step": 5630 + }, + { + "epoch": 0.75, + "learning_rate": 8.797002473421729e-07, + "logits/chosen": -1.521254301071167, + "logits/rejected": -1.0100994110107422, + "logps/chosen": -459.5888671875, + "logps/rejected": -865.0677490234375, + "loss": 0.1113, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.22328796982765198, + "rewards/margins": 0.39054790139198303, + "rewards/rejected": -0.6138359308242798, + "step": 5640 + }, + { + "epoch": 0.75, + "learning_rate": 8.708569279463622e-07, + "logits/chosen": -1.2005363702774048, + "logits/rejected": -0.7217308282852173, + "logps/chosen": -518.8699340820312, + "logps/rejected": -975.6614990234375, + "loss": 0.1145, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -0.27335843443870544, + "rewards/margins": 0.45107731223106384, + "rewards/rejected": -0.7244357466697693, + "step": 5650 + }, + { + "epoch": 0.75, + "learning_rate": 8.620488984679378e-07, + "logits/chosen": -1.4894965887069702, + "logits/rejected": -0.9523374438285828, + "logps/chosen": -542.3692626953125, + "logps/rejected": -755.6817626953125, + "loss": 0.1605, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.2720409035682678, + "rewards/margins": 0.3039925694465637, + "rewards/rejected": -0.5760334730148315, + "step": 5660 + }, + { + "epoch": 0.76, + "learning_rate": 8.532763497032987e-07, + "logits/chosen": -1.4424383640289307, + "logits/rejected": -0.8664541244506836, + "logps/chosen": -482.08428955078125, + "logps/rejected": -854.2926025390625, + "loss": 0.0746, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.2148595154285431, + "rewards/margins": 0.4492289125919342, + "rewards/rejected": -0.6640883684158325, + "step": 5670 + }, + { + "epoch": 0.76, + "learning_rate": 8.445394716802754e-07, + "logits/chosen": -1.4953354597091675, + "logits/rejected": -1.0513901710510254, + "logps/chosen": -495.39727783203125, + "logps/rejected": -813.083984375, + "loss": 0.0884, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.23665395379066467, + "rewards/margins": 0.3620506525039673, + "rewards/rejected": -0.5987046957015991, + "step": 5680 + }, + { + "epoch": 0.76, + "learning_rate": 8.35838453654009e-07, + "logits/chosen": -1.5572940111160278, + "logits/rejected": -0.9097954034805298, + "logps/chosen": -520.28125, + "logps/rejected": -940.6891479492188, + "loss": 0.1427, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.19597239792346954, + "rewards/margins": 0.4340503215789795, + "rewards/rejected": -0.6300228238105774, + "step": 5690 + }, + { + "epoch": 0.76, + "learning_rate": 8.271734841028553e-07, + "logits/chosen": -1.45353364944458, + "logits/rejected": -1.0082950592041016, + "logps/chosen": -492.7423400878906, + "logps/rejected": -816.0848999023438, + "loss": 0.163, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.22472481429576874, + "rewards/margins": 0.35925525426864624, + "rewards/rejected": -0.583980143070221, + "step": 5700 + }, + { + "epoch": 0.76, + "learning_rate": 8.185447507243e-07, + "logits/chosen": -1.373942255973816, + "logits/rejected": -0.5300694704055786, + "logps/chosen": -515.666748046875, + "logps/rejected": -820.1891479492188, + "loss": 0.1535, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.21491403877735138, + "rewards/margins": 0.4033784866333008, + "rewards/rejected": -0.6182926297187805, + "step": 5710 + }, + { + "epoch": 0.76, + "learning_rate": 8.099524404308948e-07, + "logits/chosen": -1.2152715921401978, + "logits/rejected": -0.8097234964370728, + "logps/chosen": -469.317138671875, + "logps/rejected": -758.361328125, + "loss": 0.1487, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.25387001037597656, + "rewards/margins": 0.318764865398407, + "rewards/rejected": -0.5726348757743835, + "step": 5720 + }, + { + "epoch": 0.76, + "learning_rate": 8.013967393462094e-07, + "logits/chosen": -1.3372713327407837, + "logits/rejected": -0.9042348861694336, + "logps/chosen": -486.88983154296875, + "logps/rejected": -775.0665893554688, + "loss": 0.133, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.24530085921287537, + "rewards/margins": 0.3258090615272522, + "rewards/rejected": -0.5711098909378052, + "step": 5730 + }, + { + "epoch": 0.77, + "learning_rate": 7.928778328007918e-07, + "logits/chosen": -1.3288884162902832, + "logits/rejected": -0.7175564765930176, + "logps/chosen": -550.4727172851562, + "logps/rejected": -796.298583984375, + "loss": 0.1368, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.23931817710399628, + "rewards/margins": 0.3275078237056732, + "rewards/rejected": -0.5668259859085083, + "step": 5740 + }, + { + "epoch": 0.77, + "learning_rate": 7.843959053281663e-07, + "logits/chosen": -1.5332810878753662, + "logits/rejected": -0.8374568223953247, + "logps/chosen": -508.918212890625, + "logps/rejected": -825.8583984375, + "loss": 0.1465, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.20773963630199432, + "rewards/margins": 0.3811188340187073, + "rewards/rejected": -0.5888584852218628, + "step": 5750 + }, + { + "epoch": 0.77, + "learning_rate": 7.759511406608255e-07, + "logits/chosen": -1.5883281230926514, + "logits/rejected": -0.9644662737846375, + "logps/chosen": -477.9900817871094, + "logps/rejected": -839.3572387695312, + "loss": 0.0832, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.2028050422668457, + "rewards/margins": 0.40947359800338745, + "rewards/rejected": -0.6122786998748779, + "step": 5760 + }, + { + "epoch": 0.77, + "learning_rate": 7.675437217262571e-07, + "logits/chosen": -1.27748703956604, + "logits/rejected": -0.7101573944091797, + "logps/chosen": -469.8975524902344, + "logps/rejected": -739.5403442382812, + "loss": 0.183, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.22256311774253845, + "rewards/margins": 0.3478880524635315, + "rewards/rejected": -0.5704511404037476, + "step": 5770 + }, + { + "epoch": 0.77, + "learning_rate": 7.591738306429769e-07, + "logits/chosen": -1.1995785236358643, + "logits/rejected": -0.8539302945137024, + "logps/chosen": -464.575927734375, + "logps/rejected": -806.1849365234375, + "loss": 0.1375, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.21260254085063934, + "rewards/margins": 0.3813617527484894, + "rewards/rejected": -0.5939642786979675, + "step": 5780 + }, + { + "epoch": 0.77, + "learning_rate": 7.508416487165862e-07, + "logits/chosen": -1.2585513591766357, + "logits/rejected": -0.7907862663269043, + "logps/chosen": -524.2095947265625, + "logps/rejected": -848.9905395507812, + "loss": 0.1235, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.3004508912563324, + "rewards/margins": 0.35597458481788635, + "rewards/rejected": -0.656425416469574, + "step": 5790 + }, + { + "epoch": 0.77, + "learning_rate": 7.425473564358457e-07, + "logits/chosen": -1.5966112613677979, + "logits/rejected": -0.937767505645752, + "logps/chosen": -514.3389892578125, + "logps/rejected": -877.3151245117188, + "loss": 0.1471, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.2573555111885071, + "rewards/margins": 0.4029225707054138, + "rewards/rejected": -0.6602780818939209, + "step": 5800 + }, + { + "epoch": 0.77, + "learning_rate": 7.342911334687619e-07, + "logits/chosen": -1.4570611715316772, + "logits/rejected": -0.973736584186554, + "logps/chosen": -409.6090393066406, + "logps/rejected": -803.8706665039062, + "loss": 0.1114, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.1908612698316574, + "rewards/margins": 0.3969055116176605, + "rewards/rejected": -0.5877667665481567, + "step": 5810 + }, + { + "epoch": 0.78, + "learning_rate": 7.260731586586983e-07, + "logits/chosen": -1.5033175945281982, + "logits/rejected": -1.153529405593872, + "logps/chosen": -471.53924560546875, + "logps/rejected": -780.7069091796875, + "loss": 0.179, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.24635379016399384, + "rewards/margins": 0.29246678948402405, + "rewards/rejected": -0.5388205051422119, + "step": 5820 + }, + { + "epoch": 0.78, + "learning_rate": 7.178936100204994e-07, + "logits/chosen": -1.6408525705337524, + "logits/rejected": -1.0268441438674927, + "logps/chosen": -499.1380920410156, + "logps/rejected": -923.5458984375, + "loss": 0.0953, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -0.19846662878990173, + "rewards/margins": 0.46603909134864807, + "rewards/rejected": -0.664505660533905, + "step": 5830 + }, + { + "epoch": 0.78, + "learning_rate": 7.097526647366379e-07, + "logits/chosen": -1.351193904876709, + "logits/rejected": -0.8164033889770508, + "logps/chosen": -527.61572265625, + "logps/rejected": -889.2008666992188, + "loss": 0.1224, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.257089227437973, + "rewards/margins": 0.3886021077632904, + "rewards/rejected": -0.6456912755966187, + "step": 5840 + }, + { + "epoch": 0.78, + "learning_rate": 7.016504991533727e-07, + "logits/chosen": -1.2645586729049683, + "logits/rejected": -0.9257951974868774, + "logps/chosen": -491.51922607421875, + "logps/rejected": -855.71875, + "loss": 0.1333, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.25636953115463257, + "rewards/margins": 0.356289803981781, + "rewards/rejected": -0.6126593351364136, + "step": 5850 + }, + { + "epoch": 0.78, + "learning_rate": 6.935872887769299e-07, + "logits/chosen": -0.994711697101593, + "logits/rejected": -0.8577106595039368, + "logps/chosen": -418.3548889160156, + "logps/rejected": -819.3732299804688, + "loss": 0.1633, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.20737271010875702, + "rewards/margins": 0.3346274197101593, + "rewards/rejected": -0.5420001745223999, + "step": 5860 + }, + { + "epoch": 0.78, + "learning_rate": 6.855632082697045e-07, + "logits/chosen": -1.4238831996917725, + "logits/rejected": -0.7512753009796143, + "logps/chosen": -548.7970581054688, + "logps/rejected": -825.9925537109375, + "loss": 0.1311, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.2298368215560913, + "rewards/margins": 0.3555445373058319, + "rewards/rejected": -0.5853813886642456, + "step": 5870 + }, + { + "epoch": 0.78, + "learning_rate": 6.775784314464717e-07, + "logits/chosen": -1.6078815460205078, + "logits/rejected": -0.993118166923523, + "logps/chosen": -545.4841918945312, + "logps/rejected": -895.5103759765625, + "loss": 0.1547, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.2508589029312134, + "rewards/margins": 0.3634670674800873, + "rewards/rejected": -0.6143259406089783, + "step": 5880 + }, + { + "epoch": 0.79, + "learning_rate": 6.696331312706245e-07, + "logits/chosen": -1.5820848941802979, + "logits/rejected": -0.9387833476066589, + "logps/chosen": -587.0062255859375, + "logps/rejected": -810.4898681640625, + "loss": 0.1313, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.2731969952583313, + "rewards/margins": 0.3355793356895447, + "rewards/rejected": -0.608776330947876, + "step": 5890 + }, + { + "epoch": 0.79, + "learning_rate": 6.617274798504286e-07, + "logits/chosen": -1.398246169090271, + "logits/rejected": -1.0464344024658203, + "logps/chosen": -441.2962341308594, + "logps/rejected": -797.6106567382812, + "loss": 0.1301, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.2251112014055252, + "rewards/margins": 0.35472145676612854, + "rewards/rejected": -0.5798326134681702, + "step": 5900 + }, + { + "epoch": 0.79, + "learning_rate": 6.538616484352902e-07, + "logits/chosen": -1.4681236743927002, + "logits/rejected": -1.1271047592163086, + "logps/chosen": -479.44580078125, + "logps/rejected": -835.37744140625, + "loss": 0.1398, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.25705060362815857, + "rewards/margins": 0.36807265877723694, + "rewards/rejected": -0.6251233220100403, + "step": 5910 + }, + { + "epoch": 0.79, + "learning_rate": 6.460358074120518e-07, + "logits/chosen": -1.3405694961547852, + "logits/rejected": -0.8227552175521851, + "logps/chosen": -527.5181884765625, + "logps/rejected": -772.7880249023438, + "loss": 0.1881, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.2875741124153137, + "rewards/margins": 0.3079223036766052, + "rewards/rejected": -0.5954964756965637, + "step": 5920 + }, + { + "epoch": 0.79, + "learning_rate": 6.382501263012936e-07, + "logits/chosen": -1.526971459388733, + "logits/rejected": -0.8887739181518555, + "logps/chosen": -518.7437744140625, + "logps/rejected": -957.9097900390625, + "loss": 0.094, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -0.25182047486305237, + "rewards/margins": 0.44587215781211853, + "rewards/rejected": -0.6976926326751709, + "step": 5930 + }, + { + "epoch": 0.79, + "learning_rate": 6.305047737536707e-07, + "logits/chosen": -1.3693434000015259, + "logits/rejected": -0.8970683813095093, + "logps/chosen": -503.234130859375, + "logps/rejected": -836.6292724609375, + "loss": 0.1333, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.22358956933021545, + "rewards/margins": 0.36614367365837097, + "rewards/rejected": -0.5897333025932312, + "step": 5940 + }, + { + "epoch": 0.79, + "learning_rate": 6.227999175462521e-07, + "logits/chosen": -1.4342982769012451, + "logits/rejected": -0.8154445886611938, + "logps/chosen": -522.7168579101562, + "logps/rejected": -844.3760986328125, + "loss": 0.1606, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.26568546891212463, + "rewards/margins": 0.35932299494743347, + "rewards/rejected": -0.6250084638595581, + "step": 5950 + }, + { + "epoch": 0.79, + "learning_rate": 6.151357245788917e-07, + "logits/chosen": -1.4272327423095703, + "logits/rejected": -0.9210633039474487, + "logps/chosen": -579.3690185546875, + "logps/rejected": -838.6309814453125, + "loss": 0.1929, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.2933214008808136, + "rewards/margins": 0.3416084051132202, + "rewards/rejected": -0.6349297761917114, + "step": 5960 + }, + { + "epoch": 0.8, + "learning_rate": 6.075123608706093e-07, + "logits/chosen": -1.4912382364273071, + "logits/rejected": -0.6635556221008301, + "logps/chosen": -514.3550415039062, + "logps/rejected": -756.4337158203125, + "loss": 0.1437, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.2315598428249359, + "rewards/margins": 0.3265839219093323, + "rewards/rejected": -0.5581437349319458, + "step": 5970 + }, + { + "epoch": 0.8, + "learning_rate": 5.999299915559956e-07, + "logits/chosen": -1.5643749237060547, + "logits/rejected": -1.040816068649292, + "logps/chosen": -516.1356201171875, + "logps/rejected": -852.8410034179688, + "loss": 0.1228, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.23064880073070526, + "rewards/margins": 0.37677282094955444, + "rewards/rejected": -0.6074216365814209, + "step": 5980 + }, + { + "epoch": 0.8, + "learning_rate": 5.923887808816373e-07, + "logits/chosen": -1.2064945697784424, + "logits/rejected": -0.849543571472168, + "logps/chosen": -448.8682556152344, + "logps/rejected": -885.9103393554688, + "loss": 0.1258, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.24305430054664612, + "rewards/margins": 0.4217645227909088, + "rewards/rejected": -0.6648188233375549, + "step": 5990 + }, + { + "epoch": 0.8, + "learning_rate": 5.848888922025553e-07, + "logits/chosen": -1.6217724084854126, + "logits/rejected": -0.8952631950378418, + "logps/chosen": -597.3336181640625, + "logps/rejected": -819.6091918945312, + "loss": 0.1111, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.2815231680870056, + "rewards/margins": 0.3476121127605438, + "rewards/rejected": -0.6291353702545166, + "step": 6000 + }, + { + "epoch": 0.8, + "learning_rate": 5.774304879786688e-07, + "logits/chosen": -1.5052062273025513, + "logits/rejected": -1.112230658531189, + "logps/chosen": -436.70819091796875, + "logps/rejected": -769.6760864257812, + "loss": 0.1179, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.222015380859375, + "rewards/margins": 0.36521318554878235, + "rewards/rejected": -0.5872284770011902, + "step": 6010 + }, + { + "epoch": 0.8, + "learning_rate": 5.700137297712749e-07, + "logits/chosen": -1.4654874801635742, + "logits/rejected": -0.9464197158813477, + "logps/chosen": -471.94549560546875, + "logps/rejected": -863.0076904296875, + "loss": 0.1133, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.22802074253559113, + "rewards/margins": 0.39466968178749084, + "rewards/rejected": -0.6226904392242432, + "step": 6020 + }, + { + "epoch": 0.8, + "learning_rate": 5.626387782395512e-07, + "logits/chosen": -1.281158685684204, + "logits/rejected": -0.968732476234436, + "logps/chosen": -498.4610900878906, + "logps/rejected": -894.5953369140625, + "loss": 0.1437, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.26076218485832214, + "rewards/margins": 0.3403322994709015, + "rewards/rejected": -0.6010944843292236, + "step": 6030 + }, + { + "epoch": 0.81, + "learning_rate": 5.553057931370729e-07, + "logits/chosen": -1.4274104833602905, + "logits/rejected": -0.8784977793693542, + "logps/chosen": -604.6317138671875, + "logps/rejected": -912.6419067382812, + "loss": 0.1183, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.25740376114845276, + "rewards/margins": 0.41545647382736206, + "rewards/rejected": -0.6728602647781372, + "step": 6040 + }, + { + "epoch": 0.81, + "learning_rate": 5.48014933308352e-07, + "logits/chosen": -1.431522011756897, + "logits/rejected": -0.7600606679916382, + "logps/chosen": -506.6710510253906, + "logps/rejected": -824.3439331054688, + "loss": 0.1321, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.22056278586387634, + "rewards/margins": 0.3959851861000061, + "rewards/rejected": -0.6165480017662048, + "step": 6050 + }, + { + "epoch": 0.81, + "learning_rate": 5.407663566854008e-07, + "logits/chosen": -1.5452665090560913, + "logits/rejected": -0.8012416958808899, + "logps/chosen": -544.3170776367188, + "logps/rejected": -885.4869384765625, + "loss": 0.0834, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.2349054366350174, + "rewards/margins": 0.41422995924949646, + "rewards/rejected": -0.649135410785675, + "step": 6060 + }, + { + "epoch": 0.81, + "learning_rate": 5.335602202843054e-07, + "logits/chosen": -1.3676344156265259, + "logits/rejected": -0.7313020825386047, + "logps/chosen": -552.454833984375, + "logps/rejected": -818.5693359375, + "loss": 0.1449, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.2200872004032135, + "rewards/margins": 0.38154327869415283, + "rewards/rejected": -0.601630449295044, + "step": 6070 + }, + { + "epoch": 0.81, + "learning_rate": 5.263966802018275e-07, + "logits/chosen": -1.5200622081756592, + "logits/rejected": -0.8617421388626099, + "logps/chosen": -531.3231201171875, + "logps/rejected": -831.9881591796875, + "loss": 0.1326, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.25307679176330566, + "rewards/margins": 0.3641541004180908, + "rewards/rejected": -0.6172308921813965, + "step": 6080 + }, + { + "epoch": 0.81, + "learning_rate": 5.192758916120236e-07, + "logits/chosen": -1.6207752227783203, + "logits/rejected": -0.8784235715866089, + "logps/chosen": -567.6187744140625, + "logps/rejected": -901.7517700195312, + "loss": 0.0983, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.2562420964241028, + "rewards/margins": 0.4305727481842041, + "rewards/rejected": -0.6868148446083069, + "step": 6090 + }, + { + "epoch": 0.81, + "learning_rate": 5.121980087628802e-07, + "logits/chosen": -1.4444869756698608, + "logits/rejected": -1.1552057266235352, + "logps/chosen": -447.08575439453125, + "logps/rejected": -835.5751953125, + "loss": 0.121, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.2040017545223236, + "rewards/margins": 0.3553166687488556, + "rewards/rejected": -0.5593183636665344, + "step": 6100 + }, + { + "epoch": 0.81, + "learning_rate": 5.051631849729785e-07, + "logits/chosen": -1.4898678064346313, + "logits/rejected": -0.743249237537384, + "logps/chosen": -512.14697265625, + "logps/rejected": -751.2730712890625, + "loss": 0.1107, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.2078522890806198, + "rewards/margins": 0.36741724610328674, + "rewards/rejected": -0.5752695798873901, + "step": 6110 + }, + { + "epoch": 0.82, + "learning_rate": 4.981715726281666e-07, + "logits/chosen": -1.5638794898986816, + "logits/rejected": -0.8530174493789673, + "logps/chosen": -569.1683349609375, + "logps/rejected": -859.6868286132812, + "loss": 0.1614, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.2818874418735504, + "rewards/margins": 0.33524757623672485, + "rewards/rejected": -0.6171349883079529, + "step": 6120 + }, + { + "epoch": 0.82, + "learning_rate": 4.912233231782623e-07, + "logits/chosen": -1.4559434652328491, + "logits/rejected": -0.9756882786750793, + "logps/chosen": -414.2666931152344, + "logps/rejected": -723.321044921875, + "loss": 0.1484, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.17090296745300293, + "rewards/margins": 0.33516639471054077, + "rewards/rejected": -0.5060693621635437, + "step": 6130 + }, + { + "epoch": 0.82, + "learning_rate": 4.843185871337722e-07, + "logits/chosen": -1.5099248886108398, + "logits/rejected": -0.9456412196159363, + "logps/chosen": -560.4591064453125, + "logps/rejected": -834.3416137695312, + "loss": 0.172, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.24070127308368683, + "rewards/margins": 0.34346550703048706, + "rewards/rejected": -0.5841667652130127, + "step": 6140 + }, + { + "epoch": 0.82, + "learning_rate": 4.774575140626317e-07, + "logits/chosen": -1.7518455982208252, + "logits/rejected": -1.114401936531067, + "logps/chosen": -525.9067993164062, + "logps/rejected": -792.6747436523438, + "loss": 0.1686, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.23079843819141388, + "rewards/margins": 0.34348228573799133, + "rewards/rejected": -0.5742807388305664, + "step": 6150 + }, + { + "epoch": 0.82, + "learning_rate": 4.706402525869633e-07, + "logits/chosen": -1.432835340499878, + "logits/rejected": -0.8895187377929688, + "logps/chosen": -462.48699951171875, + "logps/rejected": -899.4519653320312, + "loss": 0.0915, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.18733736872673035, + "rewards/margins": 0.42375391721725464, + "rewards/rejected": -0.6110912561416626, + "step": 6160 + }, + { + "epoch": 0.82, + "learning_rate": 4.638669503798579e-07, + "logits/chosen": -1.5865939855575562, + "logits/rejected": -0.840206503868103, + "logps/chosen": -533.1041870117188, + "logps/rejected": -855.7658081054688, + "loss": 0.0956, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.21983318030834198, + "rewards/margins": 0.4039926528930664, + "rewards/rejected": -0.6238259077072144, + "step": 6170 + }, + { + "epoch": 0.82, + "learning_rate": 4.5713775416217884e-07, + "logits/chosen": -1.4522035121917725, + "logits/rejected": -1.3330981731414795, + "logps/chosen": -482.1822204589844, + "logps/rejected": -931.4268798828125, + "loss": 0.1176, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.2349972426891327, + "rewards/margins": 0.3949509561061859, + "rewards/rejected": -0.6299481987953186, + "step": 6180 + }, + { + "epoch": 0.83, + "learning_rate": 4.5045280969937847e-07, + "logits/chosen": -1.4791220426559448, + "logits/rejected": -0.9238311052322388, + "logps/chosen": -418.791748046875, + "logps/rejected": -879.8201904296875, + "loss": 0.0777, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.20514781773090363, + "rewards/margins": 0.4510798454284668, + "rewards/rejected": -0.6562276482582092, + "step": 6190 + }, + { + "epoch": 0.83, + "learning_rate": 4.438122617983442e-07, + "logits/chosen": -1.5650171041488647, + "logits/rejected": -0.9084704518318176, + "logps/chosen": -580.3602905273438, + "logps/rejected": -948.2159423828125, + "loss": 0.1289, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.23897108435630798, + "rewards/margins": 0.4406881332397461, + "rewards/rejected": -0.6796592473983765, + "step": 6200 + }, + { + "epoch": 0.83, + "learning_rate": 4.372162543042624e-07, + "logits/chosen": -1.5728212594985962, + "logits/rejected": -1.1380150318145752, + "logps/chosen": -489.7958984375, + "logps/rejected": -833.2279052734375, + "loss": 0.1187, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.20882126688957214, + "rewards/margins": 0.3733726143836975, + "rewards/rejected": -0.5821938514709473, + "step": 6210 + }, + { + "epoch": 0.83, + "learning_rate": 4.3066493009749853e-07, + "logits/chosen": -1.5403592586517334, + "logits/rejected": -0.9451869130134583, + "logps/chosen": -482.264892578125, + "logps/rejected": -828.3956298828125, + "loss": 0.1215, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.2161424160003662, + "rewards/margins": 0.414954274892807, + "rewards/rejected": -0.6310966610908508, + "step": 6220 + }, + { + "epoch": 0.83, + "learning_rate": 4.2415843109050667e-07, + "logits/chosen": -1.489720344543457, + "logits/rejected": -1.1852211952209473, + "logps/chosen": -470.1988830566406, + "logps/rejected": -860.6622924804688, + "loss": 0.1426, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.21867451071739197, + "rewards/margins": 0.3845387101173401, + "rewards/rejected": -0.6032131910324097, + "step": 6230 + }, + { + "epoch": 0.83, + "learning_rate": 4.1769689822475147e-07, + "logits/chosen": -1.3447024822235107, + "logits/rejected": -0.9154160618782043, + "logps/chosen": -440.44879150390625, + "logps/rejected": -818.2030029296875, + "loss": 0.1167, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.19660237431526184, + "rewards/margins": 0.3885725438594818, + "rewards/rejected": -0.5851748585700989, + "step": 6240 + }, + { + "epoch": 0.83, + "learning_rate": 4.1128047146765936e-07, + "logits/chosen": -1.6222617626190186, + "logits/rejected": -1.1826813220977783, + "logps/chosen": -540.1722412109375, + "logps/rejected": -831.1741333007812, + "loss": 0.1666, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.26988035440444946, + "rewards/margins": 0.30596500635147095, + "rewards/rejected": -0.5758453607559204, + "step": 6250 + }, + { + "epoch": 0.83, + "learning_rate": 4.049092898095816e-07, + "logits/chosen": -1.3060246706008911, + "logits/rejected": -0.8426834940910339, + "logps/chosen": -546.715087890625, + "logps/rejected": -976.0635986328125, + "loss": 0.1294, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.28321272134780884, + "rewards/margins": 0.4075555205345154, + "rewards/rejected": -0.690768301486969, + "step": 6260 + }, + { + "epoch": 0.84, + "learning_rate": 3.9858349126078945e-07, + "logits/chosen": -1.6732780933380127, + "logits/rejected": -0.7740602493286133, + "logps/chosen": -577.4724731445312, + "logps/rejected": -861.3342895507812, + "loss": 0.1205, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.2359362542629242, + "rewards/margins": 0.38236093521118164, + "rewards/rejected": -0.6182972192764282, + "step": 6270 + }, + { + "epoch": 0.84, + "learning_rate": 3.9230321284847856e-07, + "logits/chosen": -1.7987782955169678, + "logits/rejected": -1.325372338294983, + "logps/chosen": -440.06610107421875, + "logps/rejected": -801.0474853515625, + "loss": 0.1238, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.21796616911888123, + "rewards/margins": 0.3503972887992859, + "rewards/rejected": -0.5683634281158447, + "step": 6280 + }, + { + "epoch": 0.84, + "learning_rate": 3.86068590613804e-07, + "logits/chosen": -1.49826979637146, + "logits/rejected": -1.148301362991333, + "logps/chosen": -485.7596130371094, + "logps/rejected": -813.1477661132812, + "loss": 0.107, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.21670837700366974, + "rewards/margins": 0.36857548356056213, + "rewards/rejected": -0.5852838754653931, + "step": 6290 + }, + { + "epoch": 0.84, + "learning_rate": 3.798797596089351e-07, + "logits/chosen": -1.738193154335022, + "logits/rejected": -1.0076912641525269, + "logps/chosen": -429.09002685546875, + "logps/rejected": -824.4563598632812, + "loss": 0.0845, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1454305797815323, + "rewards/margins": 0.4517894685268402, + "rewards/rejected": -0.5972201228141785, + "step": 6300 + }, + { + "epoch": 0.84, + "learning_rate": 3.737368538941255e-07, + "logits/chosen": -1.3455018997192383, + "logits/rejected": -0.9166312217712402, + "logps/chosen": -503.8860778808594, + "logps/rejected": -904.0072021484375, + "loss": 0.1103, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.2505475878715515, + "rewards/margins": 0.4097828269004822, + "rewards/rejected": -0.6603304147720337, + "step": 6310 + }, + { + "epoch": 0.84, + "learning_rate": 3.6764000653481263e-07, + "logits/chosen": -1.3718827962875366, + "logits/rejected": -0.9547538757324219, + "logps/chosen": -516.5181274414062, + "logps/rejected": -796.8955078125, + "loss": 0.1531, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.22998102009296417, + "rewards/margins": 0.33470654487609863, + "rewards/rejected": -0.5646876096725464, + "step": 6320 + }, + { + "epoch": 0.84, + "learning_rate": 3.615893495987335e-07, + "logits/chosen": -1.692065954208374, + "logits/rejected": -1.0852649211883545, + "logps/chosen": -397.7701416015625, + "logps/rejected": -803.3123779296875, + "loss": 0.1231, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.20279788970947266, + "rewards/margins": 0.41056856513023376, + "rewards/rejected": -0.6133664846420288, + "step": 6330 + }, + { + "epoch": 0.85, + "learning_rate": 3.555850141530659e-07, + "logits/chosen": -1.4860546588897705, + "logits/rejected": -1.1353389024734497, + "logps/chosen": -407.8889465332031, + "logps/rejected": -835.9850463867188, + "loss": 0.1187, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.1942521333694458, + "rewards/margins": 0.3862732946872711, + "rewards/rejected": -0.5805253982543945, + "step": 6340 + }, + { + "epoch": 0.85, + "learning_rate": 3.4962713026158697e-07, + "logits/chosen": -1.5381252765655518, + "logits/rejected": -0.8441897630691528, + "logps/chosen": -568.1682739257812, + "logps/rejected": -914.52099609375, + "loss": 0.0725, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.26694822311401367, + "rewards/margins": 0.4364437460899353, + "rewards/rejected": -0.703391969203949, + "step": 6350 + }, + { + "epoch": 0.85, + "learning_rate": 3.4371582698185636e-07, + "logits/chosen": -1.5919785499572754, + "logits/rejected": -1.1603013277053833, + "logps/chosen": -470.9925231933594, + "logps/rejected": -867.21240234375, + "loss": 0.1121, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.21010012924671173, + "rewards/margins": 0.3647227883338928, + "rewards/rejected": -0.5748229026794434, + "step": 6360 + }, + { + "epoch": 0.85, + "learning_rate": 3.378512323624228e-07, + "logits/chosen": -1.5560038089752197, + "logits/rejected": -0.8677865862846375, + "logps/chosen": -498.01947021484375, + "logps/rejected": -804.9054565429688, + "loss": 0.142, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.21834082901477814, + "rewards/margins": 0.35967257618904114, + "rewards/rejected": -0.5780134201049805, + "step": 6370 + }, + { + "epoch": 0.85, + "learning_rate": 3.3203347344004737e-07, + "logits/chosen": -1.524595856666565, + "logits/rejected": -1.0703728199005127, + "logps/chosen": -485.6414489746094, + "logps/rejected": -871.1435546875, + "loss": 0.0843, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.19698016345500946, + "rewards/margins": 0.4223461151123047, + "rewards/rejected": -0.6193262934684753, + "step": 6380 + }, + { + "epoch": 0.85, + "learning_rate": 3.262626762369525e-07, + "logits/chosen": -1.3607814311981201, + "logits/rejected": -1.1445564031600952, + "logps/chosen": -532.5650634765625, + "logps/rejected": -907.86376953125, + "loss": 0.1871, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.307457834482193, + "rewards/margins": 0.30070099234580994, + "rewards/rejected": -0.6081588864326477, + "step": 6390 + }, + { + "epoch": 0.85, + "learning_rate": 3.2053896575809426e-07, + "logits/chosen": -1.4279563426971436, + "logits/rejected": -0.8247480392456055, + "logps/chosen": -432.3768615722656, + "logps/rejected": -800.62841796875, + "loss": 0.1202, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1926819384098053, + "rewards/margins": 0.4079027771949768, + "rewards/rejected": -0.6005846261978149, + "step": 6400 + }, + { + "epoch": 0.85, + "learning_rate": 3.148624659884508e-07, + "logits/chosen": -1.352543592453003, + "logits/rejected": -0.9658149480819702, + "logps/chosen": -481.97955322265625, + "logps/rejected": -777.39208984375, + "loss": 0.1552, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.2425815314054489, + "rewards/margins": 0.30716800689697266, + "rewards/rejected": -0.5497495532035828, + "step": 6410 + }, + { + "epoch": 0.86, + "learning_rate": 3.092332998903416e-07, + "logits/chosen": -1.5252506732940674, + "logits/rejected": -0.8489130735397339, + "logps/chosen": -495.26458740234375, + "logps/rejected": -972.8980712890625, + "loss": 0.0648, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -0.20235852897167206, + "rewards/margins": 0.48106271028518677, + "rewards/rejected": -0.68342125415802, + "step": 6420 + }, + { + "epoch": 0.86, + "learning_rate": 3.0365158940075664e-07, + "logits/chosen": -1.437496542930603, + "logits/rejected": -1.0414105653762817, + "logps/chosen": -404.98583984375, + "logps/rejected": -708.0098266601562, + "loss": 0.1245, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.17960545420646667, + "rewards/margins": 0.3412194848060608, + "rewards/rejected": -0.5208249092102051, + "step": 6430 + }, + { + "epoch": 0.86, + "learning_rate": 2.981174554287239e-07, + "logits/chosen": -1.4341957569122314, + "logits/rejected": -1.1686787605285645, + "logps/chosen": -483.11883544921875, + "logps/rejected": -777.0546264648438, + "loss": 0.2065, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.24856820702552795, + "rewards/margins": 0.3038688600063324, + "rewards/rejected": -0.5524370670318604, + "step": 6440 + }, + { + "epoch": 0.86, + "learning_rate": 2.9263101785268253e-07, + "logits/chosen": -1.4215772151947021, + "logits/rejected": -0.9602577090263367, + "logps/chosen": -494.32537841796875, + "logps/rejected": -817.6704711914062, + "loss": 0.1312, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.2329108715057373, + "rewards/margins": 0.3553210198879242, + "rewards/rejected": -0.5882318615913391, + "step": 6450 + }, + { + "epoch": 0.86, + "learning_rate": 2.871923955178918e-07, + "logits/chosen": -1.4866185188293457, + "logits/rejected": -0.9591636657714844, + "logps/chosen": -471.3702697753906, + "logps/rejected": -813.4729614257812, + "loss": 0.122, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.23452866077423096, + "rewards/margins": 0.375867635011673, + "rewards/rejected": -0.6103963255882263, + "step": 6460 + }, + { + "epoch": 0.86, + "learning_rate": 2.8180170623385213e-07, + "logits/chosen": -1.8203891515731812, + "logits/rejected": -0.9665926694869995, + "logps/chosen": -494.5782775878906, + "logps/rejected": -841.2796020507812, + "loss": 0.0975, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.18856938183307648, + "rewards/margins": 0.42396125197410583, + "rewards/rejected": -0.6125305891036987, + "step": 6470 + }, + { + "epoch": 0.86, + "learning_rate": 2.764590667717562e-07, + "logits/chosen": -1.4959276914596558, + "logits/rejected": -0.9779335260391235, + "logps/chosen": -551.3182373046875, + "logps/rejected": -888.9775390625, + "loss": 0.1604, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.2843288481235504, + "rewards/margins": 0.3726845383644104, + "rewards/rejected": -0.6570132970809937, + "step": 6480 + }, + { + "epoch": 0.87, + "learning_rate": 2.7116459286195887e-07, + "logits/chosen": -1.21626877784729, + "logits/rejected": -0.9062309265136719, + "logps/chosen": -490.125732421875, + "logps/rejected": -901.1393432617188, + "loss": 0.0989, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.2371952086687088, + "rewards/margins": 0.3920780420303345, + "rewards/rejected": -0.6292732357978821, + "step": 6490 + }, + { + "epoch": 0.87, + "learning_rate": 2.6591839919146963e-07, + "logits/chosen": -1.7364749908447266, + "logits/rejected": -1.07496178150177, + "logps/chosen": -514.854736328125, + "logps/rejected": -843.8689575195312, + "loss": 0.1104, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.18669569492340088, + "rewards/margins": 0.3989318013191223, + "rewards/rejected": -0.5856274962425232, + "step": 6500 + }, + { + "epoch": 0.87, + "learning_rate": 2.6072059940146775e-07, + "logits/chosen": -1.5681116580963135, + "logits/rejected": -1.097745418548584, + "logps/chosen": -456.2774353027344, + "logps/rejected": -841.9134521484375, + "loss": 0.1424, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.21643777191638947, + "rewards/margins": 0.3762553334236145, + "rewards/rejected": -0.592693030834198, + "step": 6510 + }, + { + "epoch": 0.87, + "learning_rate": 2.555713060848433e-07, + "logits/chosen": -1.4545785188674927, + "logits/rejected": -0.9097267389297485, + "logps/chosen": -462.0049743652344, + "logps/rejected": -810.7428588867188, + "loss": 0.1178, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.20860812067985535, + "rewards/margins": 0.3714580833911896, + "rewards/rejected": -0.5800662040710449, + "step": 6520 + }, + { + "epoch": 0.87, + "learning_rate": 2.504706307837551e-07, + "logits/chosen": -1.5105178356170654, + "logits/rejected": -0.9397289156913757, + "logps/chosen": -501.52630615234375, + "logps/rejected": -918.1564331054688, + "loss": 0.0959, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.26715564727783203, + "rewards/margins": 0.4018022119998932, + "rewards/rejected": -0.6689578294754028, + "step": 6530 + }, + { + "epoch": 0.87, + "learning_rate": 2.454186839872158e-07, + "logits/chosen": -1.3464300632476807, + "logits/rejected": -0.8303709030151367, + "logps/chosen": -504.12164306640625, + "logps/rejected": -879.8346557617188, + "loss": 0.1419, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.23833408951759338, + "rewards/margins": 0.40014615654945374, + "rewards/rejected": -0.6384803056716919, + "step": 6540 + }, + { + "epoch": 0.87, + "learning_rate": 2.404155751286988e-07, + "logits/chosen": -1.4623782634735107, + "logits/rejected": -1.1166012287139893, + "logps/chosen": -529.0921630859375, + "logps/rejected": -947.2532958984375, + "loss": 0.1206, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.22969821095466614, + "rewards/margins": 0.4077332019805908, + "rewards/rejected": -0.6374315023422241, + "step": 6550 + }, + { + "epoch": 0.87, + "learning_rate": 2.3546141258376786e-07, + "logits/chosen": -1.5455429553985596, + "logits/rejected": -1.0416425466537476, + "logps/chosen": -476.44134521484375, + "logps/rejected": -872.48779296875, + "loss": 0.1126, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.23007233440876007, + "rewards/margins": 0.4278647005558014, + "rewards/rejected": -0.6579370498657227, + "step": 6560 + }, + { + "epoch": 0.88, + "learning_rate": 2.3055630366772857e-07, + "logits/chosen": -1.398733377456665, + "logits/rejected": -0.8798559904098511, + "logps/chosen": -405.2383728027344, + "logps/rejected": -711.0206909179688, + "loss": 0.1581, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.16562074422836304, + "rewards/margins": 0.34439751505851746, + "rewards/rejected": -0.5100182890892029, + "step": 6570 + }, + { + "epoch": 0.88, + "learning_rate": 2.257003546333042e-07, + "logits/chosen": -1.3985016345977783, + "logits/rejected": -0.7250006794929504, + "logps/chosen": -490.95794677734375, + "logps/rejected": -854.8848876953125, + "loss": 0.0832, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.17378300428390503, + "rewards/margins": 0.46776333451271057, + "rewards/rejected": -0.6415463089942932, + "step": 6580 + }, + { + "epoch": 0.88, + "learning_rate": 2.208936706683351e-07, + "logits/chosen": -1.235718846321106, + "logits/rejected": -0.9032597541809082, + "logps/chosen": -488.0113830566406, + "logps/rejected": -893.2078857421875, + "loss": 0.1355, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.24955153465270996, + "rewards/margins": 0.38928717374801636, + "rewards/rejected": -0.6388388276100159, + "step": 6590 + }, + { + "epoch": 0.88, + "learning_rate": 2.1613635589349756e-07, + "logits/chosen": -1.7433204650878906, + "logits/rejected": -1.158917784690857, + "logps/chosen": -535.4383544921875, + "logps/rejected": -885.5426635742188, + "loss": 0.0958, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1996287703514099, + "rewards/margins": 0.43160730600357056, + "rewards/rejected": -0.6312360763549805, + "step": 6600 + }, + { + "epoch": 0.88, + "learning_rate": 2.1142851336005244e-07, + "logits/chosen": -1.3180363178253174, + "logits/rejected": -0.960915207862854, + "logps/chosen": -549.1765747070312, + "logps/rejected": -933.6243896484375, + "loss": 0.1536, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.25747647881507874, + "rewards/margins": 0.38168302178382874, + "rewards/rejected": -0.6391595005989075, + "step": 6610 + }, + { + "epoch": 0.88, + "learning_rate": 2.0677024504760752e-07, + "logits/chosen": -1.5789819955825806, + "logits/rejected": -1.0533421039581299, + "logps/chosen": -491.7295837402344, + "logps/rejected": -767.828857421875, + "loss": 0.1244, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.2204131782054901, + "rewards/margins": 0.3404414653778076, + "rewards/rejected": -0.5608546137809753, + "step": 6620 + }, + { + "epoch": 0.88, + "learning_rate": 2.0216165186191406e-07, + "logits/chosen": -1.5069531202316284, + "logits/rejected": -0.9445350766181946, + "logps/chosen": -547.0418701171875, + "logps/rejected": -885.5628051757812, + "loss": 0.1331, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.25327160954475403, + "rewards/margins": 0.3645995259284973, + "rewards/rejected": -0.617871105670929, + "step": 6630 + }, + { + "epoch": 0.89, + "learning_rate": 1.9760283363267684e-07, + "logits/chosen": -1.5341289043426514, + "logits/rejected": -0.9979984164237976, + "logps/chosen": -540.775146484375, + "logps/rejected": -982.7828369140625, + "loss": 0.1198, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.2584487795829773, + "rewards/margins": 0.4025515615940094, + "rewards/rejected": -0.6610003709793091, + "step": 6640 + }, + { + "epoch": 0.89, + "learning_rate": 1.9309388911139427e-07, + "logits/chosen": -1.5053001642227173, + "logits/rejected": -0.818169116973877, + "logps/chosen": -558.2037963867188, + "logps/rejected": -870.5218505859375, + "loss": 0.16, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.24096794426441193, + "rewards/margins": 0.3718743324279785, + "rewards/rejected": -0.6128423810005188, + "step": 6650 + }, + { + "epoch": 0.89, + "learning_rate": 1.8863491596921745e-07, + "logits/chosen": -1.5473806858062744, + "logits/rejected": -0.8469358682632446, + "logps/chosen": -522.5948486328125, + "logps/rejected": -878.1281127929688, + "loss": 0.1199, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -0.1900518834590912, + "rewards/margins": 0.4157015383243561, + "rewards/rejected": -0.6057534217834473, + "step": 6660 + }, + { + "epoch": 0.89, + "learning_rate": 1.8422601079483516e-07, + "logits/chosen": -1.5866916179656982, + "logits/rejected": -0.9989362955093384, + "logps/chosen": -437.7984924316406, + "logps/rejected": -676.7769165039062, + "loss": 0.1731, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.19794021546840668, + "rewards/margins": 0.33342671394348145, + "rewards/rejected": -0.5313669443130493, + "step": 6670 + }, + { + "epoch": 0.89, + "learning_rate": 1.798672690923828e-07, + "logits/chosen": -1.2771915197372437, + "logits/rejected": -0.7800703048706055, + "logps/chosen": -459.9149475097656, + "logps/rejected": -917.9443359375, + "loss": 0.1012, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.20444636046886444, + "rewards/margins": 0.46741732954978943, + "rewards/rejected": -0.6718636751174927, + "step": 6680 + }, + { + "epoch": 0.89, + "learning_rate": 1.7555878527937164e-07, + "logits/chosen": -1.3597838878631592, + "logits/rejected": -0.8564812541007996, + "logps/chosen": -446.06414794921875, + "logps/rejected": -770.828125, + "loss": 0.1859, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.16719356179237366, + "rewards/margins": 0.36050131916999817, + "rewards/rejected": -0.527694821357727, + "step": 6690 + }, + { + "epoch": 0.89, + "learning_rate": 1.713006526846439e-07, + "logits/chosen": -1.417188048362732, + "logits/rejected": -0.9561182260513306, + "logps/chosen": -540.4723510742188, + "logps/rejected": -972.0789794921875, + "loss": 0.0978, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.2296275794506073, + "rewards/margins": 0.42581015825271606, + "rewards/rejected": -0.655437707901001, + "step": 6700 + }, + { + "epoch": 0.89, + "learning_rate": 1.6709296354635335e-07, + "logits/chosen": -1.616417646408081, + "logits/rejected": -0.9375585317611694, + "logps/chosen": -472.4241638183594, + "logps/rejected": -782.0997314453125, + "loss": 0.1564, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.2145848572254181, + "rewards/margins": 0.3725607991218567, + "rewards/rejected": -0.5871456861495972, + "step": 6710 + }, + { + "epoch": 0.9, + "learning_rate": 1.629358090099639e-07, + "logits/chosen": -1.5024985074996948, + "logits/rejected": -0.7005435824394226, + "logps/chosen": -540.7516479492188, + "logps/rejected": -905.38525390625, + "loss": 0.0716, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.21200323104858398, + "rewards/margins": 0.445390522480011, + "rewards/rejected": -0.6573936939239502, + "step": 6720 + }, + { + "epoch": 0.9, + "learning_rate": 1.5882927912627772e-07, + "logits/chosen": -1.2667381763458252, + "logits/rejected": -0.9572645425796509, + "logps/chosen": -498.649169921875, + "logps/rejected": -937.0203247070312, + "loss": 0.088, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.25593119859695435, + "rewards/margins": 0.42086881399154663, + "rewards/rejected": -0.676800012588501, + "step": 6730 + }, + { + "epoch": 0.9, + "learning_rate": 1.5477346284948292e-07, + "logits/chosen": -1.6087108850479126, + "logits/rejected": -0.998091995716095, + "logps/chosen": -511.945068359375, + "logps/rejected": -836.44775390625, + "loss": 0.1134, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.18577376008033752, + "rewards/margins": 0.39662545919418335, + "rewards/rejected": -0.5823992490768433, + "step": 6740 + }, + { + "epoch": 0.9, + "learning_rate": 1.507684480352292e-07, + "logits/chosen": -1.4504854679107666, + "logits/rejected": -0.9381822347640991, + "logps/chosen": -489.9617614746094, + "logps/rejected": -790.9918212890625, + "loss": 0.1524, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.2527294158935547, + "rewards/margins": 0.33207181096076965, + "rewards/rejected": -0.584801197052002, + "step": 6750 + }, + { + "epoch": 0.9, + "learning_rate": 1.4681432143872133e-07, + "logits/chosen": -1.4582185745239258, + "logits/rejected": -1.1294184923171997, + "logps/chosen": -485.6172790527344, + "logps/rejected": -890.8049926757812, + "loss": 0.1488, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.25202372670173645, + "rewards/margins": 0.3751962184906006, + "rewards/rejected": -0.6272198557853699, + "step": 6760 + }, + { + "epoch": 0.9, + "learning_rate": 1.4291116871284205e-07, + "logits/chosen": -1.5899076461791992, + "logits/rejected": -0.7880529761314392, + "logps/chosen": -546.0626831054688, + "logps/rejected": -846.9996337890625, + "loss": 0.1414, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.25682902336120605, + "rewards/margins": 0.38425880670547485, + "rewards/rejected": -0.6410877704620361, + "step": 6770 + }, + { + "epoch": 0.9, + "learning_rate": 1.3905907440629752e-07, + "logits/chosen": -1.5304118394851685, + "logits/rejected": -1.0145976543426514, + "logps/chosen": -461.466552734375, + "logps/rejected": -848.7557373046875, + "loss": 0.1288, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.18898475170135498, + "rewards/margins": 0.4102934002876282, + "rewards/rejected": -0.5992781519889832, + "step": 6780 + }, + { + "epoch": 0.91, + "learning_rate": 1.352581219617824e-07, + "logits/chosen": -1.5305770635604858, + "logits/rejected": -1.2398301362991333, + "logps/chosen": -483.97308349609375, + "logps/rejected": -866.2571411132812, + "loss": 0.1351, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.23582473397254944, + "rewards/margins": 0.36082929372787476, + "rewards/rejected": -0.5966540575027466, + "step": 6790 + }, + { + "epoch": 0.91, + "learning_rate": 1.31508393714177e-07, + "logits/chosen": -1.4465588331222534, + "logits/rejected": -0.9440323114395142, + "logps/chosen": -428.13958740234375, + "logps/rejected": -710.9305419921875, + "loss": 0.1242, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.20407938957214355, + "rewards/margins": 0.34757596254348755, + "rewards/rejected": -0.5516553521156311, + "step": 6800 + }, + { + "epoch": 0.91, + "learning_rate": 1.278099708887587e-07, + "logits/chosen": -1.4667365550994873, + "logits/rejected": -1.0703423023223877, + "logps/chosen": -468.34747314453125, + "logps/rejected": -818.04052734375, + "loss": 0.1332, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.20959529280662537, + "rewards/margins": 0.36165666580200195, + "rewards/rejected": -0.5712519884109497, + "step": 6810 + }, + { + "epoch": 0.91, + "learning_rate": 1.241629335994471e-07, + "logits/chosen": -1.7341434955596924, + "logits/rejected": -1.1056764125823975, + "logps/chosen": -533.9725341796875, + "logps/rejected": -906.0426025390625, + "loss": 0.1348, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.25106552243232727, + "rewards/margins": 0.40419626235961914, + "rewards/rejected": -0.655261754989624, + "step": 6820 + }, + { + "epoch": 0.91, + "learning_rate": 1.2056736084706588e-07, + "logits/chosen": -1.7266349792480469, + "logits/rejected": -0.8465207815170288, + "logps/chosen": -586.2349853515625, + "logps/rejected": -901.1331176757812, + "loss": 0.0947, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1905709058046341, + "rewards/margins": 0.43153637647628784, + "rewards/rejected": -0.6221072673797607, + "step": 6830 + }, + { + "epoch": 0.91, + "learning_rate": 1.1702333051763271e-07, + "logits/chosen": -1.5130211114883423, + "logits/rejected": -0.8182242512702942, + "logps/chosen": -551.0518798828125, + "logps/rejected": -881.6070556640625, + "loss": 0.1177, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.23995597660541534, + "rewards/margins": 0.41696739196777344, + "rewards/rejected": -0.6569232940673828, + "step": 6840 + }, + { + "epoch": 0.91, + "learning_rate": 1.1353091938067024e-07, + "logits/chosen": -1.4435842037200928, + "logits/rejected": -0.8062426447868347, + "logps/chosen": -483.5755920410156, + "logps/rejected": -886.0081176757812, + "loss": 0.115, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.21369609236717224, + "rewards/margins": 0.4300483763217926, + "rewards/rejected": -0.6437444090843201, + "step": 6850 + }, + { + "epoch": 0.91, + "learning_rate": 1.1009020308754587e-07, + "logits/chosen": -1.1936941146850586, + "logits/rejected": -0.9400952458381653, + "logps/chosen": -454.123046875, + "logps/rejected": -839.8204345703125, + "loss": 0.1241, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.23472794890403748, + "rewards/margins": 0.3685305714607239, + "rewards/rejected": -0.603258490562439, + "step": 6860 + }, + { + "epoch": 0.92, + "learning_rate": 1.067012561698319e-07, + "logits/chosen": -1.328115701675415, + "logits/rejected": -1.104390025138855, + "logps/chosen": -499.9703063964844, + "logps/rejected": -967.9503784179688, + "loss": 0.1244, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.2539631724357605, + "rewards/margins": 0.43037325143814087, + "rewards/rejected": -0.6843363642692566, + "step": 6870 + }, + { + "epoch": 0.92, + "learning_rate": 1.0336415203768962e-07, + "logits/chosen": -1.4963847398757935, + "logits/rejected": -0.7440763711929321, + "logps/chosen": -537.2203979492188, + "logps/rejected": -843.69677734375, + "loss": 0.095, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.26004061102867126, + "rewards/margins": 0.40331918001174927, + "rewards/rejected": -0.6633597612380981, + "step": 6880 + }, + { + "epoch": 0.92, + "learning_rate": 1.0007896297828113e-07, + "logits/chosen": -1.5366075038909912, + "logits/rejected": -0.9819987416267395, + "logps/chosen": -506.972412109375, + "logps/rejected": -927.7294921875, + "loss": 0.1008, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.19697031378746033, + "rewards/margins": 0.4403607249259949, + "rewards/rejected": -0.6373311281204224, + "step": 6890 + }, + { + "epoch": 0.92, + "learning_rate": 9.684576015420277e-08, + "logits/chosen": -1.3810759782791138, + "logits/rejected": -0.8703498840332031, + "logps/chosen": -416.44244384765625, + "logps/rejected": -860.0406494140625, + "loss": 0.1008, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.1907399594783783, + "rewards/margins": 0.4118443429470062, + "rewards/rejected": -0.6025842428207397, + "step": 6900 + }, + { + "epoch": 0.92, + "learning_rate": 9.36646136019434e-08, + "logits/chosen": -1.2979885339736938, + "logits/rejected": -0.9057960510253906, + "logps/chosen": -505.91644287109375, + "logps/rejected": -838.0546875, + "loss": 0.133, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.23805785179138184, + "rewards/margins": 0.3697962164878845, + "rewards/rejected": -0.6078540086746216, + "step": 6910 + }, + { + "epoch": 0.92, + "learning_rate": 9.053559223036746e-08, + "logits/chosen": -1.4613134860992432, + "logits/rejected": -0.976353645324707, + "logps/chosen": -489.4410095214844, + "logps/rejected": -829.4075317382812, + "loss": 0.1649, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.24528256058692932, + "rewards/margins": 0.33869272470474243, + "rewards/rejected": -0.5839753746986389, + "step": 6920 + }, + { + "epoch": 0.92, + "learning_rate": 8.745876381922147e-08, + "logits/chosen": -1.5626704692840576, + "logits/rejected": -0.9608923196792603, + "logps/chosen": -494.12286376953125, + "logps/rejected": -1001.3294677734375, + "loss": 0.0784, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.2240646630525589, + "rewards/margins": 0.47084444761276245, + "rewards/rejected": -0.6949091553688049, + "step": 6930 + }, + { + "epoch": 0.93, + "learning_rate": 8.44341950176683e-08, + "logits/chosen": -1.4550175666809082, + "logits/rejected": -1.1943072080612183, + "logps/chosen": -452.69049072265625, + "logps/rejected": -897.54150390625, + "loss": 0.1415, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.23807552456855774, + "rewards/margins": 0.3828004002571106, + "rewards/rejected": -0.620875895023346, + "step": 6940 + }, + { + "epoch": 0.93, + "learning_rate": 8.146195134284052e-08, + "logits/chosen": -1.3164992332458496, + "logits/rejected": -0.8121700286865234, + "logps/chosen": -574.8497314453125, + "logps/rejected": -928.1170654296875, + "loss": 0.1265, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.2704790532588959, + "rewards/margins": 0.3813510537147522, + "rewards/rejected": -0.6518300771713257, + "step": 6950 + }, + { + "epoch": 0.93, + "learning_rate": 7.854209717842231e-08, + "logits/chosen": -1.461186408996582, + "logits/rejected": -1.1725343465805054, + "logps/chosen": -477.5694885253906, + "logps/rejected": -870.6781005859375, + "loss": 0.1222, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.24190500378608704, + "rewards/margins": 0.35992521047592163, + "rewards/rejected": -0.601830244064331, + "step": 6960 + }, + { + "epoch": 0.93, + "learning_rate": 7.567469577325598e-08, + "logits/chosen": -1.387152910232544, + "logits/rejected": -0.9511371850967407, + "logps/chosen": -476.19635009765625, + "logps/rejected": -967.412109375, + "loss": 0.095, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.20667386054992676, + "rewards/margins": 0.4530416429042816, + "rewards/rejected": -0.659715473651886, + "step": 6970 + }, + { + "epoch": 0.93, + "learning_rate": 7.285980923996989e-08, + "logits/chosen": -1.49413001537323, + "logits/rejected": -0.7625577449798584, + "logps/chosen": -521.863037109375, + "logps/rejected": -931.6103515625, + "loss": 0.0948, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.22189244627952576, + "rewards/margins": 0.451913982629776, + "rewards/rejected": -0.6738064885139465, + "step": 6980 + }, + { + "epoch": 0.93, + "learning_rate": 7.009749855363457e-08, + "logits/chosen": -1.3591458797454834, + "logits/rejected": -0.9288158416748047, + "logps/chosen": -439.74993896484375, + "logps/rejected": -842.5948486328125, + "loss": 0.1019, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.21595802903175354, + "rewards/margins": 0.42482906579971313, + "rewards/rejected": -0.6407870650291443, + "step": 6990 + }, + { + "epoch": 0.93, + "learning_rate": 6.738782355044048e-08, + "logits/chosen": -1.5925794839859009, + "logits/rejected": -0.8554970026016235, + "logps/chosen": -559.6893310546875, + "logps/rejected": -811.5947265625, + "loss": 0.1832, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.2499537169933319, + "rewards/margins": 0.3769868314266205, + "rewards/rejected": -0.6269404888153076, + "step": 7000 + }, + { + "epoch": 0.93, + "learning_rate": 6.47308429264032e-08, + "logits/chosen": -1.5308606624603271, + "logits/rejected": -1.0868406295776367, + "logps/chosen": -365.6773681640625, + "logps/rejected": -676.6890258789062, + "loss": 0.1413, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.15961240231990814, + "rewards/margins": 0.33673742413520813, + "rewards/rejected": -0.4963498115539551, + "step": 7010 + }, + { + "epoch": 0.94, + "learning_rate": 6.212661423609184e-08, + "logits/chosen": -1.453504204750061, + "logits/rejected": -1.0217665433883667, + "logps/chosen": -449.70880126953125, + "logps/rejected": -719.7049560546875, + "loss": 0.1692, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.19372782111167908, + "rewards/margins": 0.3215334117412567, + "rewards/rejected": -0.5152612924575806, + "step": 7020 + }, + { + "epoch": 0.94, + "learning_rate": 5.957519389138106e-08, + "logits/chosen": -1.5276187658309937, + "logits/rejected": -0.8919947743415833, + "logps/chosen": -510.0636291503906, + "logps/rejected": -823.1393432617188, + "loss": 0.1469, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.2167677879333496, + "rewards/margins": 0.38943716883659363, + "rewards/rejected": -0.6062048673629761, + "step": 7030 + }, + { + "epoch": 0.94, + "learning_rate": 5.707663716023021e-08, + "logits/chosen": -1.3801076412200928, + "logits/rejected": -0.7963376045227051, + "logps/chosen": -459.43792724609375, + "logps/rejected": -859.9181518554688, + "loss": 0.0769, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.19771870970726013, + "rewards/margins": 0.44179147481918335, + "rewards/rejected": -0.6395102739334106, + "step": 7040 + }, + { + "epoch": 0.94, + "learning_rate": 5.463099816548578e-08, + "logits/chosen": -1.586732268333435, + "logits/rejected": -1.0025181770324707, + "logps/chosen": -429.3204650878906, + "logps/rejected": -764.2335205078125, + "loss": 0.1221, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.21364133059978485, + "rewards/margins": 0.3726021349430084, + "rewards/rejected": -0.5862435102462769, + "step": 7050 + }, + { + "epoch": 0.94, + "learning_rate": 5.22383298837098e-08, + "logits/chosen": -1.5779712200164795, + "logits/rejected": -1.0023791790008545, + "logps/chosen": -552.7449340820312, + "logps/rejected": -950.6932373046875, + "loss": 0.1082, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.22625701129436493, + "rewards/margins": 0.39922064542770386, + "rewards/rejected": -0.62547767162323, + "step": 7060 + }, + { + "epoch": 0.94, + "learning_rate": 4.989868414403048e-08, + "logits/chosen": -1.1448707580566406, + "logits/rejected": -0.7802811861038208, + "logps/chosen": -524.0235595703125, + "logps/rejected": -941.0114135742188, + "loss": 0.1014, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.24730829894542694, + "rewards/margins": 0.39939436316490173, + "rewards/rejected": -0.6467026472091675, + "step": 7070 + }, + { + "epoch": 0.94, + "learning_rate": 4.761211162702117e-08, + "logits/chosen": -1.6764507293701172, + "logits/rejected": -0.8461052179336548, + "logps/chosen": -512.4414672851562, + "logps/rejected": -812.2349853515625, + "loss": 0.1489, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.23166947066783905, + "rewards/margins": 0.38821929693222046, + "rewards/rejected": -0.6198887825012207, + "step": 7080 + }, + { + "epoch": 0.95, + "learning_rate": 4.537866186360207e-08, + "logits/chosen": -1.4507992267608643, + "logits/rejected": -1.1204394102096558, + "logps/chosen": -492.83172607421875, + "logps/rejected": -920.1160278320312, + "loss": 0.1301, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.22511033713817596, + "rewards/margins": 0.38674965500831604, + "rewards/rejected": -0.6118600368499756, + "step": 7090 + }, + { + "epoch": 0.95, + "learning_rate": 4.319838323396691e-08, + "logits/chosen": -1.390995740890503, + "logits/rejected": -0.9562789797782898, + "logps/chosen": -543.2960205078125, + "logps/rejected": -980.0902099609375, + "loss": 0.1237, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.29335081577301025, + "rewards/margins": 0.4251405596733093, + "rewards/rejected": -0.7184914350509644, + "step": 7100 + }, + { + "epoch": 0.95, + "learning_rate": 4.1071322966535487e-08, + "logits/chosen": -1.4655487537384033, + "logits/rejected": -1.0551692247390747, + "logps/chosen": -470.721923828125, + "logps/rejected": -880.9104614257812, + "loss": 0.1372, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.20322676002979279, + "rewards/margins": 0.4107723832130432, + "rewards/rejected": -0.6139991283416748, + "step": 7110 + }, + { + "epoch": 0.95, + "learning_rate": 3.8997527136930004e-08, + "logits/chosen": -1.5086078643798828, + "logits/rejected": -0.8975256085395813, + "logps/chosen": -532.8182373046875, + "logps/rejected": -971.3642578125, + "loss": 0.0899, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.2515001893043518, + "rewards/margins": 0.44144564867019653, + "rewards/rejected": -0.6929458379745483, + "step": 7120 + }, + { + "epoch": 0.95, + "learning_rate": 3.6977040666977546e-08, + "logits/chosen": -1.493403673171997, + "logits/rejected": -1.049023985862732, + "logps/chosen": -445.50396728515625, + "logps/rejected": -776.2445068359375, + "loss": 0.1427, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.17905649542808533, + "rewards/margins": 0.3584365248680115, + "rewards/rejected": -0.5374930500984192, + "step": 7130 + }, + { + "epoch": 0.95, + "learning_rate": 3.5009907323737826e-08, + "logits/chosen": -1.2035696506500244, + "logits/rejected": -0.8543485403060913, + "logps/chosen": -412.0126953125, + "logps/rejected": -854.7228393554688, + "loss": 0.1275, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.21920505166053772, + "rewards/margins": 0.3983645737171173, + "rewards/rejected": -0.617569625377655, + "step": 7140 + }, + { + "epoch": 0.95, + "learning_rate": 3.309616971855195e-08, + "logits/chosen": -1.5766270160675049, + "logits/rejected": -0.9550241231918335, + "logps/chosen": -462.1172790527344, + "logps/rejected": -746.0142822265625, + "loss": 0.1635, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.21523483097553253, + "rewards/margins": 0.35420528054237366, + "rewards/rejected": -0.5694400668144226, + "step": 7150 + }, + { + "epoch": 0.95, + "learning_rate": 3.1235869306123766e-08, + "logits/chosen": -1.4890674352645874, + "logits/rejected": -0.964741051197052, + "logps/chosen": -499.34759521484375, + "logps/rejected": -769.0845947265625, + "loss": 0.1513, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.20712587237358093, + "rewards/margins": 0.35121747851371765, + "rewards/rejected": -0.5583433508872986, + "step": 7160 + }, + { + "epoch": 0.96, + "learning_rate": 2.9429046383618042e-08, + "logits/chosen": -1.4300627708435059, + "logits/rejected": -0.9817788004875183, + "logps/chosen": -498.69268798828125, + "logps/rejected": -900.4196166992188, + "loss": 0.1174, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.256017804145813, + "rewards/margins": 0.41507309675216675, + "rewards/rejected": -0.6710909605026245, + "step": 7170 + }, + { + "epoch": 0.96, + "learning_rate": 2.767574008979007e-08, + "logits/chosen": -1.3318694829940796, + "logits/rejected": -0.6323962211608887, + "logps/chosen": -522.8182373046875, + "logps/rejected": -926.5597534179688, + "loss": 0.0815, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.21063697338104248, + "rewards/margins": 0.46514520049095154, + "rewards/rejected": -0.6757822036743164, + "step": 7180 + }, + { + "epoch": 0.96, + "learning_rate": 2.59759884041369e-08, + "logits/chosen": -1.3710944652557373, + "logits/rejected": -0.9549352526664734, + "logps/chosen": -547.1004028320312, + "logps/rejected": -943.3342895507812, + "loss": 0.1506, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.2388937771320343, + "rewards/margins": 0.4177270531654358, + "rewards/rejected": -0.6566208600997925, + "step": 7190 + }, + { + "epoch": 0.96, + "learning_rate": 2.4329828146074096e-08, + "logits/chosen": -1.5238111019134521, + "logits/rejected": -0.9249058961868286, + "logps/chosen": -562.8206787109375, + "logps/rejected": -878.2859497070312, + "loss": 0.0809, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.25880998373031616, + "rewards/margins": 0.38135746121406555, + "rewards/rejected": -0.6401674151420593, + "step": 7200 + }, + { + "epoch": 0.96, + "learning_rate": 2.2737294974140013e-08, + "logits/chosen": -1.4199336767196655, + "logits/rejected": -0.930211067199707, + "logps/chosen": -514.1971435546875, + "logps/rejected": -947.0245361328125, + "loss": 0.0964, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.24326589703559875, + "rewards/margins": 0.4398914873600006, + "rewards/rejected": -0.6831573843955994, + "step": 7210 + }, + { + "epoch": 0.96, + "learning_rate": 2.1198423385220822e-08, + "logits/chosen": -1.3293390274047852, + "logits/rejected": -1.005180835723877, + "logps/chosen": -457.1399841308594, + "logps/rejected": -823.82470703125, + "loss": 0.1442, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.26542821526527405, + "rewards/margins": 0.3484458923339844, + "rewards/rejected": -0.613874077796936, + "step": 7220 + }, + { + "epoch": 0.96, + "learning_rate": 1.9713246713805588e-08, + "logits/chosen": -1.344305396080017, + "logits/rejected": -0.8286038637161255, + "logps/chosen": -508.82000732421875, + "logps/rejected": -935.9703979492188, + "loss": 0.113, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.26535990834236145, + "rewards/margins": 0.40891551971435547, + "rewards/rejected": -0.6742754578590393, + "step": 7230 + }, + { + "epoch": 0.97, + "learning_rate": 1.82817971312621e-08, + "logits/chosen": -1.4602086544036865, + "logits/rejected": -1.0767791271209717, + "logps/chosen": -545.56787109375, + "logps/rejected": -914.0446166992188, + "loss": 0.139, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.23958563804626465, + "rewards/margins": 0.3950881361961365, + "rewards/rejected": -0.6346737146377563, + "step": 7240 + }, + { + "epoch": 0.97, + "learning_rate": 1.6904105645142443e-08, + "logits/chosen": -1.5792304277420044, + "logits/rejected": -0.8432053327560425, + "logps/chosen": -544.8038330078125, + "logps/rejected": -911.0470581054688, + "loss": 0.104, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.21954479813575745, + "rewards/margins": 0.4037550985813141, + "rewards/rejected": -0.6232999563217163, + "step": 7250 + }, + { + "epoch": 0.97, + "learning_rate": 1.5580202098509078e-08, + "logits/chosen": -1.454495906829834, + "logits/rejected": -1.0603179931640625, + "logps/chosen": -511.02783203125, + "logps/rejected": -814.5255126953125, + "loss": 0.1394, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.2312597781419754, + "rewards/margins": 0.3558243215084076, + "rewards/rejected": -0.587084174156189, + "step": 7260 + }, + { + "epoch": 0.97, + "learning_rate": 1.4310115169289263e-08, + "logits/chosen": -1.4957376718521118, + "logits/rejected": -0.9885585904121399, + "logps/chosen": -570.3341064453125, + "logps/rejected": -943.06005859375, + "loss": 0.1166, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.24677403271198273, + "rewards/margins": 0.3916296362876892, + "rewards/rejected": -0.6384036540985107, + "step": 7270 + }, + { + "epoch": 0.97, + "learning_rate": 1.3093872369654148e-08, + "logits/chosen": -1.4875476360321045, + "logits/rejected": -0.9839875102043152, + "logps/chosen": -489.46600341796875, + "logps/rejected": -848.6428833007812, + "loss": 0.1694, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.23207266628742218, + "rewards/margins": 0.35699766874313354, + "rewards/rejected": -0.5890703201293945, + "step": 7280 + }, + { + "epoch": 0.97, + "learning_rate": 1.193150004542204e-08, + "logits/chosen": -1.609070062637329, + "logits/rejected": -0.6910431385040283, + "logps/chosen": -544.7098999023438, + "logps/rejected": -837.7176513671875, + "loss": 0.1248, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.18862029910087585, + "rewards/margins": 0.427751362323761, + "rewards/rejected": -0.6163716316223145, + "step": 7290 + }, + { + "epoch": 0.97, + "learning_rate": 1.0823023375489128e-08, + "logits/chosen": -1.5001671314239502, + "logits/rejected": -1.0567773580551147, + "logps/chosen": -374.1795349121094, + "logps/rejected": -784.8349609375, + "loss": 0.1002, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.16520938277244568, + "rewards/margins": 0.4134485125541687, + "rewards/rejected": -0.5786579847335815, + "step": 7300 + }, + { + "epoch": 0.97, + "learning_rate": 9.76846637128187e-09, + "logits/chosen": -1.4122244119644165, + "logits/rejected": -1.0274155139923096, + "logps/chosen": -451.5098571777344, + "logps/rejected": -796.8303833007812, + "loss": 0.1209, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.2220574915409088, + "rewards/margins": 0.363582968711853, + "rewards/rejected": -0.5856404304504395, + "step": 7310 + }, + { + "epoch": 0.98, + "learning_rate": 8.767851876239075e-09, + "logits/chosen": -1.361480951309204, + "logits/rejected": -1.0346735715866089, + "logps/chosen": -491.0078125, + "logps/rejected": -834.4171752929688, + "loss": 0.1703, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.2649536728858948, + "rewards/margins": 0.34156960248947144, + "rewards/rejected": -0.6065232753753662, + "step": 7320 + }, + { + "epoch": 0.98, + "learning_rate": 7.821201565316184e-09, + "logits/chosen": -1.4470902681350708, + "logits/rejected": -0.6657289862632751, + "logps/chosen": -558.2814331054688, + "logps/rejected": -830.0985107421875, + "loss": 0.1877, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.23699286580085754, + "rewards/margins": 0.38297975063323975, + "rewards/rejected": -0.6199725866317749, + "step": 7330 + }, + { + "epoch": 0.98, + "learning_rate": 6.9285359445145366e-09, + "logits/chosen": -1.2579832077026367, + "logits/rejected": -0.8175121545791626, + "logps/chosen": -511.2633361816406, + "logps/rejected": -880.8670654296875, + "loss": 0.0764, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.23451602458953857, + "rewards/margins": 0.411950021982193, + "rewards/rejected": -0.6464659571647644, + "step": 7340 + }, + { + "epoch": 0.98, + "learning_rate": 6.089874350439507e-09, + "logits/chosen": -1.3694077730178833, + "logits/rejected": -0.9959748983383179, + "logps/chosen": -499.08477783203125, + "logps/rejected": -932.9157104492188, + "loss": 0.1181, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.22797970473766327, + "rewards/margins": 0.44914698600769043, + "rewards/rejected": -0.6771267056465149, + "step": 7350 + }, + { + "epoch": 0.98, + "learning_rate": 5.305234949880001e-09, + "logits/chosen": -1.6984233856201172, + "logits/rejected": -1.1672899723052979, + "logps/chosen": -479.700927734375, + "logps/rejected": -907.1622924804688, + "loss": 0.1115, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.20574238896369934, + "rewards/margins": 0.4537379741668701, + "rewards/rejected": -0.6594803333282471, + "step": 7360 + }, + { + "epoch": 0.98, + "learning_rate": 4.57463473941544e-09, + "logits/chosen": -1.5077688694000244, + "logits/rejected": -0.8707934617996216, + "logps/chosen": -503.54412841796875, + "logps/rejected": -910.8088989257812, + "loss": 0.1117, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.2274107038974762, + "rewards/margins": 0.4351657032966614, + "rewards/rejected": -0.6625763773918152, + "step": 7370 + }, + { + "epoch": 0.98, + "learning_rate": 3.8980895450474455e-09, + "logits/chosen": -1.5453494787216187, + "logits/rejected": -0.8584077954292297, + "logps/chosen": -512.6802368164062, + "logps/rejected": -945.4680786132812, + "loss": 0.1014, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.20711109042167664, + "rewards/margins": 0.4724665582180023, + "rewards/rejected": -0.6795775294303894, + "step": 7380 + }, + { + "epoch": 0.99, + "learning_rate": 3.275614021857609e-09, + "logits/chosen": -1.203957200050354, + "logits/rejected": -0.789203405380249, + "logps/chosen": -470.5957946777344, + "logps/rejected": -899.1009521484375, + "loss": 0.1503, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.24380306899547577, + "rewards/margins": 0.42183107137680054, + "rewards/rejected": -0.6656340956687927, + "step": 7390 + }, + { + "epoch": 0.99, + "learning_rate": 2.7072216536885855e-09, + "logits/chosen": -1.386768102645874, + "logits/rejected": -0.9607473611831665, + "logps/chosen": -449.33380126953125, + "logps/rejected": -835.1878662109375, + "loss": 0.114, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.1930471509695053, + "rewards/margins": 0.3860008716583252, + "rewards/rejected": -0.5790480375289917, + "step": 7400 + }, + { + "epoch": 0.99, + "learning_rate": 2.192924752854042e-09, + "logits/chosen": -1.329189658164978, + "logits/rejected": -0.7284063100814819, + "logps/chosen": -495.65350341796875, + "logps/rejected": -949.3099365234375, + "loss": 0.1467, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.2535052001476288, + "rewards/margins": 0.43782129883766174, + "rewards/rejected": -0.6913265585899353, + "step": 7410 + }, + { + "epoch": 0.99, + "learning_rate": 1.7327344598702667e-09, + "logits/chosen": -1.5910580158233643, + "logits/rejected": -0.756806492805481, + "logps/chosen": -580.9312744140625, + "logps/rejected": -942.6422119140625, + "loss": 0.0622, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -0.2450539618730545, + "rewards/margins": 0.4502308964729309, + "rewards/rejected": -0.6952848434448242, + "step": 7420 + }, + { + "epoch": 0.99, + "learning_rate": 1.3266607432155243e-09, + "logits/chosen": -1.3756208419799805, + "logits/rejected": -0.8615263104438782, + "logps/chosen": -461.8587341308594, + "logps/rejected": -778.1923828125, + "loss": 0.1286, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.21157710254192352, + "rewards/margins": 0.33319228887557983, + "rewards/rejected": -0.5447694063186646, + "step": 7430 + }, + { + "epoch": 0.99, + "learning_rate": 9.747123991141193e-10, + "logits/chosen": -1.3460959196090698, + "logits/rejected": -0.6247268915176392, + "logps/chosen": -542.8588256835938, + "logps/rejected": -933.49609375, + "loss": 0.0912, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.24301931262016296, + "rewards/margins": 0.43395286798477173, + "rewards/rejected": -0.6769722104072571, + "step": 7440 + }, + { + "epoch": 0.99, + "learning_rate": 6.768970513457151e-10, + "logits/chosen": -1.4757121801376343, + "logits/rejected": -1.0189615488052368, + "logps/chosen": -477.2865295410156, + "logps/rejected": -878.6105346679688, + "loss": 0.1054, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.20012429356575012, + "rewards/margins": 0.4070053696632385, + "rewards/rejected": -0.607129693031311, + "step": 7450 + }, + { + "epoch": 0.99, + "learning_rate": 4.332211510807427e-10, + "logits/chosen": -1.508866548538208, + "logits/rejected": -1.2509024143218994, + "logps/chosen": -491.2704162597656, + "logps/rejected": -841.8795776367188, + "loss": 0.1706, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.2753935754299164, + "rewards/margins": 0.3065844178199768, + "rewards/rejected": -0.5819779634475708, + "step": 7460 + }, + { + "epoch": 1.0, + "learning_rate": 2.43689976739403e-10, + "logits/chosen": -1.3344948291778564, + "logits/rejected": -1.0201300382614136, + "logps/chosen": -427.71990966796875, + "logps/rejected": -858.7521362304688, + "loss": 0.1427, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.1936722844839096, + "rewards/margins": 0.4351615011692047, + "rewards/rejected": -0.6288337707519531, + "step": 7470 + }, + { + "epoch": 1.0, + "learning_rate": 1.0830763387897902e-10, + "logits/chosen": -1.5639954805374146, + "logits/rejected": -1.0077507495880127, + "logps/chosen": -418.7438049316406, + "logps/rejected": -676.6524047851562, + "loss": 0.1722, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.1520642638206482, + "rewards/margins": 0.3314986228942871, + "rewards/rejected": -0.4835628569126129, + "step": 7480 + }, + { + "epoch": 1.0, + "learning_rate": 2.7077055103075233e-11, + "logits/chosen": -1.5404798984527588, + "logits/rejected": -1.0517535209655762, + "logps/chosen": -540.0816650390625, + "logps/rejected": -884.8899536132812, + "loss": 0.0985, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.2020460069179535, + "rewards/margins": 0.3753736615180969, + "rewards/rejected": -0.5774196982383728, + "step": 7490 + }, + { + "epoch": 1.0, + "learning_rate": 0.0, + "logits/chosen": -1.3304396867752075, + "logits/rejected": -0.7842223048210144, + "logps/chosen": -452.8385314941406, + "logps/rejected": -821.3699340820312, + "loss": 0.1491, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.18334725499153137, + "rewards/margins": 0.4060141444206238, + "rewards/rejected": -0.5893615484237671, + "step": 7500 + }, + { + "epoch": 1.0, + "step": 7500, + "total_flos": 0.0, + "train_loss": 0.134382330707709, + "train_runtime": 31432.8555, + "train_samples_per_second": 0.954, + "train_steps_per_second": 0.239 + } + ], + "logging_steps": 10, + "max_steps": 7500, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 100, + "total_flos": 0.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +}