{ "best_metric": null, "best_model_checkpoint": null, "epoch": 5.9781818181818185, "eval_steps": 100, "global_step": 822, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 6.024096385542168e-09, "logits/generated": 3.8738255500793457, "logits/real": 3.7310919761657715, "logps/generated": -273.69012451171875, "logps/real": -2095.990478515625, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/generated": 0.0, "rewards/margins": 0.0, "rewards/real": 0.0, "step": 1 }, { "epoch": 0.07, "learning_rate": 6.024096385542168e-08, "logits/generated": 4.126138210296631, "logits/real": 3.344090700149536, "logps/generated": -323.2333984375, "logps/real": -1827.9246826171875, "loss": 0.6567, "rewards/accuracies": 0.6666666865348816, "rewards/generated": -0.010428529232740402, "rewards/margins": 0.10560464859008789, "rewards/real": 0.09517612308263779, "step": 10 }, { "epoch": 0.15, "learning_rate": 1.2048192771084337e-07, "logits/generated": 4.006103038787842, "logits/real": 3.526198625564575, "logps/generated": -331.4546813964844, "logps/real": -1660.641845703125, "loss": 0.3555, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -0.22063815593719482, "rewards/margins": 1.044826626777649, "rewards/real": 0.8241884112358093, "step": 20 }, { "epoch": 0.22, "learning_rate": 1.8072289156626505e-07, "logits/generated": 3.7952888011932373, "logits/real": 3.5554428100585938, "logps/generated": -326.00189208984375, "logps/real": -1852.1021728515625, "loss": 0.1915, "rewards/accuracies": 0.987500011920929, "rewards/generated": -0.43949466943740845, "rewards/margins": 2.4316487312316895, "rewards/real": 1.9921543598175049, "step": 30 }, { "epoch": 0.29, "learning_rate": 2.4096385542168674e-07, "logits/generated": 4.1279377937316895, "logits/real": 3.288907289505005, "logps/generated": -308.86260986328125, "logps/real": -1874.0267333984375, "loss": 0.1164, "rewards/accuracies": 1.0, "rewards/generated": -0.8577774167060852, "rewards/margins": 3.3216171264648438, "rewards/real": 2.463839530944824, "step": 40 }, { "epoch": 0.36, "learning_rate": 3.0120481927710845e-07, "logits/generated": 3.6804466247558594, "logits/real": 3.5043816566467285, "logps/generated": -304.4783630371094, "logps/real": -1555.8642578125, "loss": 0.077, "rewards/accuracies": 1.0, "rewards/generated": -1.176033616065979, "rewards/margins": 3.921290636062622, "rewards/real": 2.7452569007873535, "step": 50 }, { "epoch": 0.44, "learning_rate": 3.614457831325301e-07, "logits/generated": 3.985076904296875, "logits/real": 3.3642334938049316, "logps/generated": -345.8163757324219, "logps/real": -1842.0394287109375, "loss": 0.0535, "rewards/accuracies": 1.0, "rewards/generated": -1.6643917560577393, "rewards/margins": 5.579442024230957, "rewards/real": 3.915050506591797, "step": 60 }, { "epoch": 0.51, "learning_rate": 4.216867469879518e-07, "logits/generated": 3.898547410964966, "logits/real": 3.3067703247070312, "logps/generated": -300.430908203125, "logps/real": -1726.6646728515625, "loss": 0.0381, "rewards/accuracies": 1.0, "rewards/generated": -1.796960473060608, "rewards/margins": 5.964465618133545, "rewards/real": 4.167504787445068, "step": 70 }, { "epoch": 0.58, "learning_rate": 4.819277108433735e-07, "logits/generated": 4.144007682800293, "logits/real": 3.311328887939453, "logps/generated": -345.77728271484375, "logps/real": -1900.102294921875, "loss": 0.027, "rewards/accuracies": 1.0, "rewards/generated": -2.181682825088501, "rewards/margins": 7.163342475891113, "rewards/real": 4.981659889221191, "step": 80 }, { "epoch": 0.65, "learning_rate": 4.952638700947226e-07, "logits/generated": 4.0979204177856445, "logits/real": 2.9679250717163086, "logps/generated": -335.7123718261719, "logps/real": -2001.6461181640625, "loss": 0.0217, "rewards/accuracies": 1.0, "rewards/generated": -2.178663492202759, "rewards/margins": 8.057744979858398, "rewards/real": 5.879080295562744, "step": 90 }, { "epoch": 0.73, "learning_rate": 4.884979702300406e-07, "logits/generated": 4.0455756187438965, "logits/real": 3.4950740337371826, "logps/generated": -319.15447998046875, "logps/real": -1759.0865478515625, "loss": 0.0234, "rewards/accuracies": 1.0, "rewards/generated": -2.3023428916931152, "rewards/margins": 7.8619794845581055, "rewards/real": 5.559636116027832, "step": 100 }, { "epoch": 0.8, "learning_rate": 4.817320703653586e-07, "logits/generated": 3.7221503257751465, "logits/real": 3.437182903289795, "logps/generated": -372.59515380859375, "logps/real": -1791.433349609375, "loss": 0.0206, "rewards/accuracies": 1.0, "rewards/generated": -2.7497735023498535, "rewards/margins": 9.142165184020996, "rewards/real": 6.392390727996826, "step": 110 }, { "epoch": 0.87, "learning_rate": 4.7496617050067657e-07, "logits/generated": 3.810609817504883, "logits/real": 3.4206061363220215, "logps/generated": -348.82977294921875, "logps/real": -1596.9195556640625, "loss": 0.0155, "rewards/accuracies": 1.0, "rewards/generated": -2.860016345977783, "rewards/margins": 8.540342330932617, "rewards/real": 5.68032693862915, "step": 120 }, { "epoch": 0.95, "learning_rate": 4.6820027063599453e-07, "logits/generated": 3.9147820472717285, "logits/real": 3.2912585735321045, "logps/generated": -337.8192443847656, "logps/real": -1601.826904296875, "loss": 0.0151, "rewards/accuracies": 1.0, "rewards/generated": -3.173366069793701, "rewards/margins": 8.81226921081543, "rewards/real": 5.6389031410217285, "step": 130 }, { "epoch": 1.02, "learning_rate": 4.614343707713126e-07, "logits/generated": 3.896376132965088, "logits/real": 3.6063761711120605, "logps/generated": -359.6056213378906, "logps/real": -1766.2171630859375, "loss": 0.016, "rewards/accuracies": 1.0, "rewards/generated": -3.311199188232422, "rewards/margins": 9.132051467895508, "rewards/real": 5.820852756500244, "step": 140 }, { "epoch": 1.09, "learning_rate": 4.546684709066306e-07, "logits/generated": 3.836315870285034, "logits/real": 3.3494999408721924, "logps/generated": -334.84527587890625, "logps/real": -1951.1331787109375, "loss": 0.0152, "rewards/accuracies": 1.0, "rewards/generated": -3.0987415313720703, "rewards/margins": 10.636507034301758, "rewards/real": 7.537766456604004, "step": 150 }, { "epoch": 1.16, "learning_rate": 4.4790257104194856e-07, "logits/generated": 3.8280670642852783, "logits/real": 3.497692823410034, "logps/generated": -333.99896240234375, "logps/real": -1667.4837646484375, "loss": 0.0156, "rewards/accuracies": 1.0, "rewards/generated": -3.2428054809570312, "rewards/margins": 9.726168632507324, "rewards/real": 6.483364105224609, "step": 160 }, { "epoch": 1.24, "learning_rate": 4.4113667117726656e-07, "logits/generated": 3.9641876220703125, "logits/real": 3.590871810913086, "logps/generated": -351.3685607910156, "logps/real": -1849.7330322265625, "loss": 0.0116, "rewards/accuracies": 1.0, "rewards/generated": -3.2033190727233887, "rewards/margins": 10.444951057434082, "rewards/real": 7.241631507873535, "step": 170 }, { "epoch": 1.31, "learning_rate": 4.343707713125845e-07, "logits/generated": 3.8312644958496094, "logits/real": 3.291086196899414, "logps/generated": -332.4565734863281, "logps/real": -1553.10888671875, "loss": 0.0092, "rewards/accuracies": 1.0, "rewards/generated": -3.328856945037842, "rewards/margins": 9.635026931762695, "rewards/real": 6.306169033050537, "step": 180 }, { "epoch": 1.38, "learning_rate": 4.2760487144790253e-07, "logits/generated": 3.8795554637908936, "logits/real": 3.316781997680664, "logps/generated": -342.43206787109375, "logps/real": -1534.9920654296875, "loss": 0.0093, "rewards/accuracies": 1.0, "rewards/generated": -3.282355785369873, "rewards/margins": 10.086224555969238, "rewards/real": 6.803868770599365, "step": 190 }, { "epoch": 1.45, "learning_rate": 4.208389715832206e-07, "logits/generated": 4.1092209815979, "logits/real": 3.305363416671753, "logps/generated": -348.10906982421875, "logps/real": -1568.366943359375, "loss": 0.0098, "rewards/accuracies": 1.0, "rewards/generated": -3.756807327270508, "rewards/margins": 10.589798927307129, "rewards/real": 6.832991123199463, "step": 200 }, { "epoch": 1.53, "learning_rate": 4.1407307171853855e-07, "logits/generated": 3.8207149505615234, "logits/real": 3.4637844562530518, "logps/generated": -342.99212646484375, "logps/real": -1603.469482421875, "loss": 0.0098, "rewards/accuracies": 1.0, "rewards/generated": -3.7897486686706543, "rewards/margins": 11.39747142791748, "rewards/real": 7.607722282409668, "step": 210 }, { "epoch": 1.6, "learning_rate": 4.0730717185385656e-07, "logits/generated": 3.6602706909179688, "logits/real": 3.3105037212371826, "logps/generated": -353.58953857421875, "logps/real": -1564.878662109375, "loss": 0.0074, "rewards/accuracies": 1.0, "rewards/generated": -3.876565456390381, "rewards/margins": 10.988374710083008, "rewards/real": 7.111808776855469, "step": 220 }, { "epoch": 1.67, "learning_rate": 4.005412719891745e-07, "logits/generated": 4.050937652587891, "logits/real": 3.2149081230163574, "logps/generated": -343.61700439453125, "logps/real": -1867.1146240234375, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/generated": -3.812628984451294, "rewards/margins": 12.574882507324219, "rewards/real": 8.762253761291504, "step": 230 }, { "epoch": 1.75, "learning_rate": 3.937753721244925e-07, "logits/generated": 3.8790907859802246, "logits/real": 3.244337558746338, "logps/generated": -350.46807861328125, "logps/real": -1776.2249755859375, "loss": 0.0087, "rewards/accuracies": 1.0, "rewards/generated": -3.7245185375213623, "rewards/margins": 11.542536735534668, "rewards/real": 7.818017482757568, "step": 240 }, { "epoch": 1.82, "learning_rate": 3.8700947225981053e-07, "logits/generated": 4.189554691314697, "logits/real": 3.2744312286376953, "logps/generated": -306.65423583984375, "logps/real": -1908.651611328125, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/generated": -3.4162280559539795, "rewards/margins": 12.490008354187012, "rewards/real": 9.073779106140137, "step": 250 }, { "epoch": 1.89, "learning_rate": 3.8024357239512854e-07, "logits/generated": 3.996807813644409, "logits/real": 3.3394546508789062, "logps/generated": -349.5832214355469, "logps/real": -1604.595947265625, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/generated": -4.512771129608154, "rewards/margins": 12.467561721801758, "rewards/real": 7.9547905921936035, "step": 260 }, { "epoch": 1.96, "learning_rate": 3.7347767253044655e-07, "logits/generated": 3.9711928367614746, "logits/real": 3.210707187652588, "logps/generated": -352.9242248535156, "logps/real": -1650.7523193359375, "loss": 0.0076, "rewards/accuracies": 1.0, "rewards/generated": -3.945718288421631, "rewards/margins": 12.379148483276367, "rewards/real": 8.433430671691895, "step": 270 }, { "epoch": 2.04, "learning_rate": 3.667117726657645e-07, "logits/generated": 3.9078941345214844, "logits/real": 3.3292477130889893, "logps/generated": -385.12103271484375, "logps/real": -1738.5439453125, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/generated": -4.713086128234863, "rewards/margins": 13.36457347869873, "rewards/real": 8.651487350463867, "step": 280 }, { "epoch": 2.11, "learning_rate": 3.599458728010825e-07, "logits/generated": 3.8858847618103027, "logits/real": 3.1584792137145996, "logps/generated": -364.4229431152344, "logps/real": -1803.279052734375, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/generated": -4.948676586151123, "rewards/margins": 14.149557113647461, "rewards/real": 9.200879096984863, "step": 290 }, { "epoch": 2.18, "learning_rate": 3.5317997293640053e-07, "logits/generated": 3.8670012950897217, "logits/real": 3.3043994903564453, "logps/generated": -361.4666748046875, "logps/real": -1889.6123046875, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/generated": -4.797239780426025, "rewards/margins": 14.819738388061523, "rewards/real": 10.022500991821289, "step": 300 }, { "epoch": 2.25, "learning_rate": 3.4641407307171854e-07, "logits/generated": 3.9627106189727783, "logits/real": 3.099079132080078, "logps/generated": -343.8397216796875, "logps/real": -1466.379638671875, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/generated": -4.415881633758545, "rewards/margins": 12.417741775512695, "rewards/real": 8.001859664916992, "step": 310 }, { "epoch": 2.33, "learning_rate": 3.3964817320703655e-07, "logits/generated": 3.78568959236145, "logits/real": 3.1619513034820557, "logps/generated": -341.7552795410156, "logps/real": -1698.628173828125, "loss": 0.0076, "rewards/accuracies": 1.0, "rewards/generated": -4.093456268310547, "rewards/margins": 13.626190185546875, "rewards/real": 9.532732009887695, "step": 320 }, { "epoch": 2.4, "learning_rate": 3.328822733423545e-07, "logits/generated": 3.825019121170044, "logits/real": 3.3764641284942627, "logps/generated": -378.8895263671875, "logps/real": -1597.697998046875, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/generated": -4.438040733337402, "rewards/margins": 13.366531372070312, "rewards/real": 8.928491592407227, "step": 330 }, { "epoch": 2.47, "learning_rate": 3.261163734776725e-07, "logits/generated": 3.8288943767547607, "logits/real": 3.290604829788208, "logps/generated": -348.8912048339844, "logps/real": -1745.009765625, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/generated": -4.390578746795654, "rewards/margins": 14.805871963500977, "rewards/real": 10.415292739868164, "step": 340 }, { "epoch": 2.55, "learning_rate": 3.193504736129905e-07, "logits/generated": 3.911057233810425, "logits/real": 3.30967378616333, "logps/generated": -401.079345703125, "logps/real": -1907.7503662109375, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/generated": -5.009480953216553, "rewards/margins": 15.79051685333252, "rewards/real": 10.781034469604492, "step": 350 }, { "epoch": 2.62, "learning_rate": 3.125845737483085e-07, "logits/generated": 3.992907762527466, "logits/real": 3.1416165828704834, "logps/generated": -324.5121154785156, "logps/real": -1745.8681640625, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/generated": -4.020837306976318, "rewards/margins": 13.725107192993164, "rewards/real": 9.704269409179688, "step": 360 }, { "epoch": 2.69, "learning_rate": 3.0581867388362654e-07, "logits/generated": 3.7999634742736816, "logits/real": 3.126760959625244, "logps/generated": -355.0987243652344, "logps/real": -1759.5863037109375, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/generated": -4.705051898956299, "rewards/margins": 14.536712646484375, "rewards/real": 9.831659317016602, "step": 370 }, { "epoch": 2.76, "learning_rate": 2.990527740189445e-07, "logits/generated": 3.8943915367126465, "logits/real": 3.05126690864563, "logps/generated": -329.1960144042969, "logps/real": -1854.295654296875, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/generated": -4.683255672454834, "rewards/margins": 15.590433120727539, "rewards/real": 10.907175064086914, "step": 380 }, { "epoch": 2.84, "learning_rate": 2.922868741542625e-07, "logits/generated": 3.833286762237549, "logits/real": 3.238703489303589, "logps/generated": -391.62860107421875, "logps/real": -1781.9693603515625, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/generated": -5.05405330657959, "rewards/margins": 16.43929100036621, "rewards/real": 11.385236740112305, "step": 390 }, { "epoch": 2.91, "learning_rate": 2.855209742895805e-07, "logits/generated": 3.7886595726013184, "logits/real": 3.2782788276672363, "logps/generated": -355.33367919921875, "logps/real": -1638.6527099609375, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/generated": -4.672540664672852, "rewards/margins": 14.931330680847168, "rewards/real": 10.258790969848633, "step": 400 }, { "epoch": 2.98, "learning_rate": 2.787550744248985e-07, "logits/generated": 3.645900011062622, "logits/real": 2.916073799133301, "logps/generated": -387.02630615234375, "logps/real": -1707.5318603515625, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/generated": -5.253026485443115, "rewards/margins": 15.671981811523438, "rewards/real": 10.418952941894531, "step": 410 }, { "epoch": 3.05, "learning_rate": 2.719891745602165e-07, "logits/generated": 3.629896879196167, "logits/real": 3.1881396770477295, "logps/generated": -336.713134765625, "logps/real": -1568.805908203125, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/generated": -4.606196880340576, "rewards/margins": 14.205004692077637, "rewards/real": 9.598807334899902, "step": 420 }, { "epoch": 3.13, "learning_rate": 2.652232746955345e-07, "logits/generated": 3.917677402496338, "logits/real": 3.3961739540100098, "logps/generated": -363.67803955078125, "logps/real": -1797.0133056640625, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/generated": -4.953548431396484, "rewards/margins": 16.0413761138916, "rewards/real": 11.087827682495117, "step": 430 }, { "epoch": 3.2, "learning_rate": 2.584573748308525e-07, "logits/generated": 4.0271100997924805, "logits/real": 3.431720018386841, "logps/generated": -388.35455322265625, "logps/real": -1662.0523681640625, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/generated": -5.565513610839844, "rewards/margins": 16.632692337036133, "rewards/real": 11.067178726196289, "step": 440 }, { "epoch": 3.27, "learning_rate": 2.516914749661705e-07, "logits/generated": 3.826180934906006, "logits/real": 3.1580729484558105, "logps/generated": -367.4959411621094, "logps/real": -1647.990234375, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/generated": -5.352944850921631, "rewards/margins": 16.819311141967773, "rewards/real": 11.466364860534668, "step": 450 }, { "epoch": 3.35, "learning_rate": 2.4492557510148847e-07, "logits/generated": 3.8260669708251953, "logits/real": 3.0786235332489014, "logps/generated": -394.67022705078125, "logps/real": -1674.683837890625, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/generated": -5.170815467834473, "rewards/margins": 16.448078155517578, "rewards/real": 11.277264595031738, "step": 460 }, { "epoch": 3.42, "learning_rate": 2.381596752368065e-07, "logits/generated": 3.8089356422424316, "logits/real": 2.935865879058838, "logps/generated": -359.86328125, "logps/real": -1552.1591796875, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/generated": -5.342236518859863, "rewards/margins": 15.546061515808105, "rewards/real": 10.203824996948242, "step": 470 }, { "epoch": 3.49, "learning_rate": 2.313937753721245e-07, "logits/generated": 3.622739315032959, "logits/real": 3.2677950859069824, "logps/generated": -383.79327392578125, "logps/real": -1559.5203857421875, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/generated": -5.695761680603027, "rewards/margins": 16.01279640197754, "rewards/real": 10.317033767700195, "step": 480 }, { "epoch": 3.56, "learning_rate": 2.2462787550744247e-07, "logits/generated": 3.8529486656188965, "logits/real": 3.0930233001708984, "logps/generated": -352.42803955078125, "logps/real": -1770.759033203125, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/generated": -4.963102340698242, "rewards/margins": 16.599287033081055, "rewards/real": 11.636184692382812, "step": 490 }, { "epoch": 3.64, "learning_rate": 2.1786197564276048e-07, "logits/generated": 3.8578999042510986, "logits/real": 3.097928047180176, "logps/generated": -357.9004821777344, "logps/real": -1765.2509765625, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/generated": -5.565553188323975, "rewards/margins": 17.522903442382812, "rewards/real": 11.957350730895996, "step": 500 }, { "epoch": 3.71, "learning_rate": 2.1109607577807846e-07, "logits/generated": 3.7201836109161377, "logits/real": 3.169063091278076, "logps/generated": -355.1180419921875, "logps/real": -1743.1220703125, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/generated": -4.84972620010376, "rewards/margins": 17.09893226623535, "rewards/real": 12.249204635620117, "step": 510 }, { "epoch": 3.78, "learning_rate": 2.0433017591339647e-07, "logits/generated": 3.973576068878174, "logits/real": 3.258516311645508, "logps/generated": -363.7156982421875, "logps/real": -1810.015869140625, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/generated": -5.728123664855957, "rewards/margins": 18.64816665649414, "rewards/real": 12.92004108428955, "step": 520 }, { "epoch": 3.85, "learning_rate": 1.9756427604871448e-07, "logits/generated": 3.925982713699341, "logits/real": 3.269160509109497, "logps/generated": -386.85565185546875, "logps/real": -2033.0023193359375, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/generated": -5.752684593200684, "rewards/margins": 19.017629623413086, "rewards/real": 13.264944076538086, "step": 530 }, { "epoch": 3.93, "learning_rate": 1.9079837618403247e-07, "logits/generated": 3.872265338897705, "logits/real": 3.344080686569214, "logps/generated": -389.4714050292969, "logps/real": -1858.952392578125, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/generated": -6.137632846832275, "rewards/margins": 18.991987228393555, "rewards/real": 12.854354858398438, "step": 540 }, { "epoch": 4.0, "learning_rate": 1.8403247631935045e-07, "logits/generated": 3.995485782623291, "logits/real": 3.0428078174591064, "logps/generated": -366.5426940917969, "logps/real": -1872.344970703125, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/generated": -5.465807914733887, "rewards/margins": 18.97793960571289, "rewards/real": 13.512130737304688, "step": 550 }, { "epoch": 4.07, "learning_rate": 1.7726657645466846e-07, "logits/generated": 3.8400580883026123, "logits/real": 3.1361613273620605, "logps/generated": -412.7926330566406, "logps/real": -1755.785888671875, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/generated": -6.226557731628418, "rewards/margins": 18.641386032104492, "rewards/real": 12.414828300476074, "step": 560 }, { "epoch": 4.15, "learning_rate": 1.7050067658998647e-07, "logits/generated": 3.8298487663269043, "logits/real": 2.984595775604248, "logps/generated": -375.7140197753906, "logps/real": -1391.13671875, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/generated": -5.827058792114258, "rewards/margins": 16.213787078857422, "rewards/real": 10.386730194091797, "step": 570 }, { "epoch": 4.22, "learning_rate": 1.6373477672530445e-07, "logits/generated": 3.893458604812622, "logits/real": 3.2185893058776855, "logps/generated": -386.10601806640625, "logps/real": -1719.9033203125, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/generated": -5.855027675628662, "rewards/margins": 18.005151748657227, "rewards/real": 12.150121688842773, "step": 580 }, { "epoch": 4.29, "learning_rate": 1.5696887686062246e-07, "logits/generated": 3.7929599285125732, "logits/real": 3.190579891204834, "logps/generated": -386.9568786621094, "logps/real": -1818.0087890625, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/generated": -6.23530387878418, "rewards/margins": 20.180156707763672, "rewards/real": 13.944854736328125, "step": 590 }, { "epoch": 4.36, "learning_rate": 1.5020297699594044e-07, "logits/generated": 3.56512188911438, "logits/real": 3.106480121612549, "logps/generated": -370.51837158203125, "logps/real": -1503.3177490234375, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/generated": -5.6500020027160645, "rewards/margins": 16.554481506347656, "rewards/real": 10.90447998046875, "step": 600 }, { "epoch": 4.44, "learning_rate": 1.4343707713125843e-07, "logits/generated": 3.7323760986328125, "logits/real": 2.943189859390259, "logps/generated": -377.02252197265625, "logps/real": -1760.864501953125, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/generated": -6.176201343536377, "rewards/margins": 19.93837547302246, "rewards/real": 13.762173652648926, "step": 610 }, { "epoch": 4.51, "learning_rate": 1.3667117726657646e-07, "logits/generated": 3.663243532180786, "logits/real": 3.2114410400390625, "logps/generated": -397.3208312988281, "logps/real": -1699.3892822265625, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/generated": -6.474091529846191, "rewards/margins": 19.477985382080078, "rewards/real": 13.003893852233887, "step": 620 }, { "epoch": 4.58, "learning_rate": 1.2990527740189445e-07, "logits/generated": 3.869292736053467, "logits/real": 3.1606218814849854, "logps/generated": -391.5151672363281, "logps/real": -1592.6275634765625, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/generated": -6.212581157684326, "rewards/margins": 18.19385528564453, "rewards/real": 11.981274604797363, "step": 630 }, { "epoch": 4.65, "learning_rate": 1.2313937753721245e-07, "logits/generated": 3.861110210418701, "logits/real": 3.375795841217041, "logps/generated": -401.7237243652344, "logps/real": -1806.221435546875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/generated": -6.126347541809082, "rewards/margins": 19.71944808959961, "rewards/real": 13.593098640441895, "step": 640 }, { "epoch": 4.73, "learning_rate": 1.1637347767253045e-07, "logits/generated": 3.911762237548828, "logits/real": 3.0098249912261963, "logps/generated": -386.85784912109375, "logps/real": -1795.923583984375, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/generated": -5.990913391113281, "rewards/margins": 18.843658447265625, "rewards/real": 12.852745056152344, "step": 650 }, { "epoch": 4.8, "learning_rate": 1.0960757780784843e-07, "logits/generated": 3.659184217453003, "logits/real": 3.1364493370056152, "logps/generated": -394.750732421875, "logps/real": -1495.034423828125, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/generated": -6.731655120849609, "rewards/margins": 18.13627815246582, "rewards/real": 11.404624938964844, "step": 660 }, { "epoch": 4.87, "learning_rate": 1.0284167794316643e-07, "logits/generated": 3.732351779937744, "logits/real": 3.1695330142974854, "logps/generated": -379.30029296875, "logps/real": -1640.4886474609375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/generated": -5.839709281921387, "rewards/margins": 17.96188735961914, "rewards/real": 12.122178077697754, "step": 670 }, { "epoch": 4.95, "learning_rate": 9.607577807848444e-08, "logits/generated": 4.146600723266602, "logits/real": 3.1058061122894287, "logps/generated": -391.1810607910156, "logps/real": -1852.7978515625, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/generated": -6.588481903076172, "rewards/margins": 21.24244499206543, "rewards/real": 14.653963088989258, "step": 680 }, { "epoch": 5.02, "learning_rate": 8.930987821380242e-08, "logits/generated": 3.845707654953003, "logits/real": 3.167086124420166, "logps/generated": -409.24420166015625, "logps/real": -1766.3021240234375, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/generated": -6.590874671936035, "rewards/margins": 20.258441925048828, "rewards/real": 13.667569160461426, "step": 690 }, { "epoch": 5.09, "learning_rate": 8.254397834912043e-08, "logits/generated": 4.0798139572143555, "logits/real": 3.2149314880371094, "logps/generated": -413.5770568847656, "logps/real": -1552.6007080078125, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/generated": -6.7278242111206055, "rewards/margins": 19.34658432006836, "rewards/real": 12.618760108947754, "step": 700 }, { "epoch": 5.16, "learning_rate": 7.577807848443843e-08, "logits/generated": 3.630070924758911, "logits/real": 3.0653679370880127, "logps/generated": -391.5611572265625, "logps/real": -1715.609130859375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/generated": -6.323145866394043, "rewards/margins": 19.680736541748047, "rewards/real": 13.35759162902832, "step": 710 }, { "epoch": 5.24, "learning_rate": 6.901217861975642e-08, "logits/generated": 3.7875144481658936, "logits/real": 2.993046760559082, "logps/generated": -369.9081115722656, "logps/real": -1673.7435302734375, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/generated": -6.6384124755859375, "rewards/margins": 19.592199325561523, "rewards/real": 12.953786849975586, "step": 720 }, { "epoch": 5.31, "learning_rate": 6.224627875507442e-08, "logits/generated": 3.9582417011260986, "logits/real": 3.1155378818511963, "logps/generated": -400.16064453125, "logps/real": -1557.910888671875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/generated": -6.571238040924072, "rewards/margins": 19.208139419555664, "rewards/real": 12.63690185546875, "step": 730 }, { "epoch": 5.38, "learning_rate": 5.5480378890392424e-08, "logits/generated": 3.6610095500946045, "logits/real": 3.2329933643341064, "logps/generated": -405.45452880859375, "logps/real": -1639.8853759765625, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/generated": -6.967964172363281, "rewards/margins": 20.059215545654297, "rewards/real": 13.091253280639648, "step": 740 }, { "epoch": 5.45, "learning_rate": 4.8714479025710414e-08, "logits/generated": 3.5873844623565674, "logits/real": 3.1617331504821777, "logps/generated": -407.7115478515625, "logps/real": -1555.2928466796875, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/generated": -6.568167209625244, "rewards/margins": 19.3055419921875, "rewards/real": 12.737374305725098, "step": 750 }, { "epoch": 5.53, "learning_rate": 4.1948579161028417e-08, "logits/generated": 3.933115005493164, "logits/real": 3.093261241912842, "logps/generated": -443.93170166015625, "logps/real": -1752.468994140625, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/generated": -7.051682949066162, "rewards/margins": 20.773927688598633, "rewards/real": 13.722244262695312, "step": 760 }, { "epoch": 5.6, "learning_rate": 3.518267929634641e-08, "logits/generated": 3.6486358642578125, "logits/real": 2.7410190105438232, "logps/generated": -392.55792236328125, "logps/real": -1540.426025390625, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/generated": -6.260336399078369, "rewards/margins": 19.024744033813477, "rewards/real": 12.764406204223633, "step": 770 }, { "epoch": 5.67, "learning_rate": 2.8416779431664412e-08, "logits/generated": 3.8739190101623535, "logits/real": 3.2343928813934326, "logps/generated": -385.4478454589844, "logps/real": -1756.2265625, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/generated": -6.667592525482178, "rewards/margins": 20.330894470214844, "rewards/real": 13.663301467895508, "step": 780 }, { "epoch": 5.75, "learning_rate": 2.165087956698241e-08, "logits/generated": 3.894277572631836, "logits/real": 3.1963775157928467, "logps/generated": -336.4626159667969, "logps/real": -1663.1304931640625, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/generated": -5.930933952331543, "rewards/margins": 19.369308471679688, "rewards/real": 13.438372611999512, "step": 790 }, { "epoch": 5.82, "learning_rate": 1.4884979702300407e-08, "logits/generated": 3.5003573894500732, "logits/real": 3.1945197582244873, "logps/generated": -362.7236328125, "logps/real": -1370.6826171875, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/generated": -6.438204288482666, "rewards/margins": 17.731670379638672, "rewards/real": 11.293463706970215, "step": 800 }, { "epoch": 5.89, "learning_rate": 8.119079837618403e-09, "logits/generated": 3.6622180938720703, "logits/real": 2.940600872039795, "logps/generated": -349.09100341796875, "logps/real": -1680.0224609375, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/generated": -5.987884044647217, "rewards/margins": 19.526966094970703, "rewards/real": 13.539083480834961, "step": 810 }, { "epoch": 5.96, "learning_rate": 1.3531799729364005e-09, "logits/generated": 3.6205272674560547, "logits/real": 3.0839412212371826, "logps/generated": -368.858154296875, "logps/real": -1606.857666015625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/generated": -6.259243965148926, "rewards/margins": 20.033960342407227, "rewards/real": 13.774714469909668, "step": 820 }, { "epoch": 5.98, "step": 822, "total_flos": 0.0, "train_loss": 0.023352676501198516, "train_runtime": 11187.444, "train_samples_per_second": 4.72, "train_steps_per_second": 0.073 } ], "logging_steps": 10, "max_steps": 822, "num_input_tokens_seen": 0, "num_train_epochs": 6, "save_steps": 500, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }