|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9982631930527722, |
|
"eval_steps": 400, |
|
"global_step": 467, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01068804275217101, |
|
"grad_norm": 56.89954388226368, |
|
"learning_rate": 1.0638297872340425e-07, |
|
"logits/chosen": -1.0110526084899902, |
|
"logits/rejected": -0.9819751977920532, |
|
"logps/chosen": -0.27428290247917175, |
|
"logps/rejected": -0.27152150869369507, |
|
"loss": 3.074, |
|
"rewards/accuracies": 0.4312500059604645, |
|
"rewards/chosen": -2.7428293228149414, |
|
"rewards/margins": -0.027614299207925797, |
|
"rewards/rejected": -2.7152152061462402, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.02137608550434202, |
|
"grad_norm": 41.09804027603237, |
|
"learning_rate": 2.127659574468085e-07, |
|
"logits/chosen": -1.0431499481201172, |
|
"logits/rejected": -0.9762164354324341, |
|
"logps/chosen": -0.29434844851493835, |
|
"logps/rejected": -0.29961150884628296, |
|
"loss": 3.0104, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -2.943484306335449, |
|
"rewards/margins": 0.052630893886089325, |
|
"rewards/rejected": -2.9961154460906982, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.03206412825651302, |
|
"grad_norm": 54.12410975393932, |
|
"learning_rate": 3.1914893617021275e-07, |
|
"logits/chosen": -0.9705549478530884, |
|
"logits/rejected": -0.9907904863357544, |
|
"logps/chosen": -0.2644530236721039, |
|
"logps/rejected": -0.300653874874115, |
|
"loss": 2.9255, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -2.6445302963256836, |
|
"rewards/margins": 0.36200839281082153, |
|
"rewards/rejected": -3.0065386295318604, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.04275217100868404, |
|
"grad_norm": 70.58531376681509, |
|
"learning_rate": 4.25531914893617e-07, |
|
"logits/chosen": -0.9636842608451843, |
|
"logits/rejected": -0.9370673298835754, |
|
"logps/chosen": -0.27763131260871887, |
|
"logps/rejected": -0.29128938913345337, |
|
"loss": 2.9498, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -2.776313304901123, |
|
"rewards/margins": 0.13658040761947632, |
|
"rewards/rejected": -2.912893772125244, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.053440213760855046, |
|
"grad_norm": 56.29890733120682, |
|
"learning_rate": 5.319148936170212e-07, |
|
"logits/chosen": -1.021801233291626, |
|
"logits/rejected": -0.9922389984130859, |
|
"logps/chosen": -0.2716570198535919, |
|
"logps/rejected": -0.27810558676719666, |
|
"loss": 2.996, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -2.7165703773498535, |
|
"rewards/margins": 0.06448549777269363, |
|
"rewards/rejected": -2.7810559272766113, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.06412825651302605, |
|
"grad_norm": 43.82713664972921, |
|
"learning_rate": 6.382978723404255e-07, |
|
"logits/chosen": -0.9963313937187195, |
|
"logits/rejected": -0.9516829252243042, |
|
"logps/chosen": -0.2735390067100525, |
|
"logps/rejected": -0.2788829803466797, |
|
"loss": 2.9153, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -2.7353897094726562, |
|
"rewards/margins": 0.0534403920173645, |
|
"rewards/rejected": -2.788830041885376, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.07481629926519706, |
|
"grad_norm": 70.24698865073822, |
|
"learning_rate": 7.446808510638297e-07, |
|
"logits/chosen": -1.052671194076538, |
|
"logits/rejected": -0.9770715832710266, |
|
"logps/chosen": -0.2941210865974426, |
|
"logps/rejected": -0.32225456833839417, |
|
"loss": 2.901, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -2.941210985183716, |
|
"rewards/margins": 0.2813349962234497, |
|
"rewards/rejected": -3.222545623779297, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.08550434201736808, |
|
"grad_norm": 55.30141312601411, |
|
"learning_rate": 8.51063829787234e-07, |
|
"logits/chosen": -1.0156581401824951, |
|
"logits/rejected": -0.9718509912490845, |
|
"logps/chosen": -0.28176331520080566, |
|
"logps/rejected": -0.3227621614933014, |
|
"loss": 2.9376, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -2.8176331520080566, |
|
"rewards/margins": 0.40998831391334534, |
|
"rewards/rejected": -3.227621555328369, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.09619238476953908, |
|
"grad_norm": 38.45710685921155, |
|
"learning_rate": 9.574468085106384e-07, |
|
"logits/chosen": -1.0545786619186401, |
|
"logits/rejected": -1.0107605457305908, |
|
"logps/chosen": -0.32836729288101196, |
|
"logps/rejected": -0.37582582235336304, |
|
"loss": 2.944, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -3.28367280960083, |
|
"rewards/margins": 0.4745853841304779, |
|
"rewards/rejected": -3.75825834274292, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.10688042752171009, |
|
"grad_norm": 127.64245160013748, |
|
"learning_rate": 9.998741174712533e-07, |
|
"logits/chosen": -1.040578842163086, |
|
"logits/rejected": -0.9890502095222473, |
|
"logps/chosen": -0.3216663599014282, |
|
"logps/rejected": -0.3576571047306061, |
|
"loss": 3.0077, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -3.216662883758545, |
|
"rewards/margins": 0.35990777611732483, |
|
"rewards/rejected": -3.576570987701416, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.11756847027388109, |
|
"grad_norm": 52.19635359720767, |
|
"learning_rate": 9.991050648838675e-07, |
|
"logits/chosen": -1.0747789144515991, |
|
"logits/rejected": -1.0378679037094116, |
|
"logps/chosen": -0.28550446033477783, |
|
"logps/rejected": -0.3416834771633148, |
|
"loss": 2.8502, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -2.8550446033477783, |
|
"rewards/margins": 0.5617905855178833, |
|
"rewards/rejected": -3.416834592819214, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.1282565130260521, |
|
"grad_norm": 45.58873626472617, |
|
"learning_rate": 9.97637968732563e-07, |
|
"logits/chosen": -1.0949262380599976, |
|
"logits/rejected": -1.0604766607284546, |
|
"logps/chosen": -0.3153451085090637, |
|
"logps/rejected": -0.33443158864974976, |
|
"loss": 2.8532, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -3.1534507274627686, |
|
"rewards/margins": 0.19086508452892303, |
|
"rewards/rejected": -3.344316005706787, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.13894455577822312, |
|
"grad_norm": 57.26986612669826, |
|
"learning_rate": 9.954748808839674e-07, |
|
"logits/chosen": -1.0162328481674194, |
|
"logits/rejected": -0.9866952896118164, |
|
"logps/chosen": -0.35168641805648804, |
|
"logps/rejected": -0.39575284719467163, |
|
"loss": 2.8058, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -3.516864061355591, |
|
"rewards/margins": 0.4406643509864807, |
|
"rewards/rejected": -3.9575283527374268, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.14963259853039412, |
|
"grad_norm": 36.80891193149694, |
|
"learning_rate": 9.926188266120295e-07, |
|
"logits/chosen": -1.032679796218872, |
|
"logits/rejected": -1.009883999824524, |
|
"logps/chosen": -0.34069326519966125, |
|
"logps/rejected": -0.4097404479980469, |
|
"loss": 2.8492, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -3.406932830810547, |
|
"rewards/margins": 0.690471887588501, |
|
"rewards/rejected": -4.097404479980469, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.16032064128256512, |
|
"grad_norm": 52.89939132813969, |
|
"learning_rate": 9.890738003669027e-07, |
|
"logits/chosen": -0.9840335845947266, |
|
"logits/rejected": -0.9154711961746216, |
|
"logps/chosen": -0.3410232663154602, |
|
"logps/rejected": -0.39265531301498413, |
|
"loss": 2.8212, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -3.4102330207824707, |
|
"rewards/margins": 0.5163205862045288, |
|
"rewards/rejected": -3.926553249359131, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.17100868403473615, |
|
"grad_norm": 43.121051189441694, |
|
"learning_rate": 9.848447601883433e-07, |
|
"logits/chosen": -0.9788318872451782, |
|
"logits/rejected": -0.9655908346176147, |
|
"logps/chosen": -0.3363918364048004, |
|
"logps/rejected": -0.4268081784248352, |
|
"loss": 2.7186, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -3.3639183044433594, |
|
"rewards/margins": 0.9041633605957031, |
|
"rewards/rejected": -4.2680816650390625, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.18169672678690715, |
|
"grad_norm": 54.440571510963004, |
|
"learning_rate": 9.799376207714444e-07, |
|
"logits/chosen": -1.003329873085022, |
|
"logits/rejected": -0.9817102551460266, |
|
"logps/chosen": -0.3231724500656128, |
|
"logps/rejected": -0.37237557768821716, |
|
"loss": 2.7361, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -3.231724262237549, |
|
"rewards/margins": 0.4920312464237213, |
|
"rewards/rejected": -3.7237555980682373, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.19238476953907815, |
|
"grad_norm": 68.19418423227036, |
|
"learning_rate": 9.743592451943998e-07, |
|
"logits/chosen": -1.0398868322372437, |
|
"logits/rejected": -1.0045406818389893, |
|
"logps/chosen": -0.39259663224220276, |
|
"logps/rejected": -0.46695399284362793, |
|
"loss": 2.8464, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -3.925966262817383, |
|
"rewards/margins": 0.7435733675956726, |
|
"rewards/rejected": -4.669539451599121, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.20307281229124916, |
|
"grad_norm": 48.310300313424634, |
|
"learning_rate": 9.681174353198686e-07, |
|
"logits/chosen": -1.1072306632995605, |
|
"logits/rejected": -1.0259342193603516, |
|
"logps/chosen": -0.4255724549293518, |
|
"logps/rejected": -0.46118488907814026, |
|
"loss": 2.7441, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -4.2557244300842285, |
|
"rewards/margins": 0.3561245799064636, |
|
"rewards/rejected": -4.611849308013916, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.21376085504342018, |
|
"grad_norm": 65.13984774258292, |
|
"learning_rate": 9.612209208833646e-07, |
|
"logits/chosen": -1.0061118602752686, |
|
"logits/rejected": -0.9820231199264526, |
|
"logps/chosen": -0.4049428403377533, |
|
"logps/rejected": -0.4770785868167877, |
|
"loss": 2.7252, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -4.049428462982178, |
|
"rewards/margins": 0.7213573455810547, |
|
"rewards/rejected": -4.770786285400391, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.22444889779559118, |
|
"grad_norm": 67.14694271980007, |
|
"learning_rate": 9.536793472839324e-07, |
|
"logits/chosen": -1.0186512470245361, |
|
"logits/rejected": -0.9668880701065063, |
|
"logps/chosen": -0.40378451347351074, |
|
"logps/rejected": -0.4971798360347748, |
|
"loss": 2.7255, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -4.037845134735107, |
|
"rewards/margins": 0.9339532852172852, |
|
"rewards/rejected": -4.971798419952393, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.23513694054776219, |
|
"grad_norm": 66.18323566645698, |
|
"learning_rate": 9.455032620941839e-07, |
|
"logits/chosen": -0.9757324457168579, |
|
"logits/rejected": -0.9178048372268677, |
|
"logps/chosen": -0.45775121450424194, |
|
"logps/rejected": -0.5744206309318542, |
|
"loss": 2.6384, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -4.577511787414551, |
|
"rewards/margins": 1.1666945219039917, |
|
"rewards/rejected": -5.744206428527832, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.2458249832999332, |
|
"grad_norm": 59.4672474909725, |
|
"learning_rate": 9.367041003085648e-07, |
|
"logits/chosen": -1.0457837581634521, |
|
"logits/rejected": -0.9855262041091919, |
|
"logps/chosen": -0.5011107921600342, |
|
"logps/rejected": -0.5669585466384888, |
|
"loss": 2.5014, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -5.011107444763184, |
|
"rewards/margins": 0.6584769487380981, |
|
"rewards/rejected": -5.66958475112915, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.2565130260521042, |
|
"grad_norm": 66.03731752964929, |
|
"learning_rate": 9.272941683504808e-07, |
|
"logits/chosen": -0.9971206784248352, |
|
"logits/rejected": -0.9091175198554993, |
|
"logps/chosen": -0.5187912583351135, |
|
"logps/rejected": -0.7068670392036438, |
|
"loss": 2.3834, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -5.187912464141846, |
|
"rewards/margins": 1.8807573318481445, |
|
"rewards/rejected": -7.06866979598999, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.26720106880427524, |
|
"grad_norm": 71.68229077695405, |
|
"learning_rate": 9.172866268606513e-07, |
|
"logits/chosen": -1.0721819400787354, |
|
"logits/rejected": -1.030027151107788, |
|
"logps/chosen": -0.5926575660705566, |
|
"logps/rejected": -0.6795297861099243, |
|
"loss": 2.3213, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -5.926576137542725, |
|
"rewards/margins": 0.8687216639518738, |
|
"rewards/rejected": -6.795297145843506, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.27788911155644624, |
|
"grad_norm": 64.96923591468992, |
|
"learning_rate": 9.066954722907638e-07, |
|
"logits/chosen": -1.1085387468338013, |
|
"logits/rejected": -1.1034091711044312, |
|
"logps/chosen": -0.5947206020355225, |
|
"logps/rejected": -0.8544057607650757, |
|
"loss": 2.1425, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -5.947205543518066, |
|
"rewards/margins": 2.596850872039795, |
|
"rewards/rejected": -8.54405689239502, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.28857715430861725, |
|
"grad_norm": 66.68661326155792, |
|
"learning_rate": 8.955355173281707e-07, |
|
"logits/chosen": -1.0726548433303833, |
|
"logits/rejected": -1.0262948274612427, |
|
"logps/chosen": -0.708818256855011, |
|
"logps/rejected": -0.8760965466499329, |
|
"loss": 2.0661, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -7.0881829261779785, |
|
"rewards/margins": 1.67278254032135, |
|
"rewards/rejected": -8.760965347290039, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.29926519706078825, |
|
"grad_norm": 88.39987976267922, |
|
"learning_rate": 8.838223701790055e-07, |
|
"logits/chosen": -1.144708275794983, |
|
"logits/rejected": -1.1237266063690186, |
|
"logps/chosen": -0.8580458760261536, |
|
"logps/rejected": -1.0160259008407593, |
|
"loss": 2.0786, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -8.580458641052246, |
|
"rewards/margins": 1.5798008441925049, |
|
"rewards/rejected": -10.160260200500488, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.30995323981295925, |
|
"grad_norm": 90.83612810193658, |
|
"learning_rate": 8.71572412738697e-07, |
|
"logits/chosen": -1.0413812398910522, |
|
"logits/rejected": -1.0159306526184082, |
|
"logps/chosen": -0.9142902493476868, |
|
"logps/rejected": -1.1587440967559814, |
|
"loss": 2.0446, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -9.142904281616211, |
|
"rewards/margins": 2.444538116455078, |
|
"rewards/rejected": -11.587442398071289, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.32064128256513025, |
|
"grad_norm": 84.07324057563632, |
|
"learning_rate": 8.588027776804058e-07, |
|
"logits/chosen": -1.06027352809906, |
|
"logits/rejected": -1.0439517498016357, |
|
"logps/chosen": -1.0236120223999023, |
|
"logps/rejected": -1.2801861763000488, |
|
"loss": 1.9933, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -10.236120223999023, |
|
"rewards/margins": 2.565741777420044, |
|
"rewards/rejected": -12.801861763000488, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.33132932531730125, |
|
"grad_norm": 83.2873729325751, |
|
"learning_rate": 8.455313244934324e-07, |
|
"logits/chosen": -1.0788214206695557, |
|
"logits/rejected": -1.059463620185852, |
|
"logps/chosen": -1.1024755239486694, |
|
"logps/rejected": -1.424791932106018, |
|
"loss": 2.0534, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -11.024755477905273, |
|
"rewards/margins": 3.2231624126434326, |
|
"rewards/rejected": -14.247919082641602, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.3420173680694723, |
|
"grad_norm": 91.00920515798947, |
|
"learning_rate": 8.317766145051057e-07, |
|
"logits/chosen": -1.115094780921936, |
|
"logits/rejected": -1.0990221500396729, |
|
"logps/chosen": -1.2077633142471313, |
|
"logps/rejected": -1.5987141132354736, |
|
"loss": 1.9827, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -12.077634811401367, |
|
"rewards/margins": 3.909506320953369, |
|
"rewards/rejected": -15.987141609191895, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.3527054108216433, |
|
"grad_norm": 65.16453388698586, |
|
"learning_rate": 8.175578849210894e-07, |
|
"logits/chosen": -1.1276443004608154, |
|
"logits/rejected": -1.106072187423706, |
|
"logps/chosen": -1.156090259552002, |
|
"logps/rejected": -1.5160917043685913, |
|
"loss": 1.8258, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -11.560903549194336, |
|
"rewards/margins": 3.6000137329101562, |
|
"rewards/rejected": -15.160917282104492, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.3633934535738143, |
|
"grad_norm": 89.17851329246218, |
|
"learning_rate": 8.028950219204099e-07, |
|
"logits/chosen": -1.133327841758728, |
|
"logits/rejected": -1.1123793125152588, |
|
"logps/chosen": -1.1282349824905396, |
|
"logps/rejected": -1.5152461528778076, |
|
"loss": 1.805, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -11.282350540161133, |
|
"rewards/margins": 3.8701133728027344, |
|
"rewards/rejected": -15.15246295928955, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.3740814963259853, |
|
"grad_norm": 90.91620530583262, |
|
"learning_rate": 7.878085328428368e-07, |
|
"logits/chosen": -1.1684499979019165, |
|
"logits/rejected": -1.1230958700180054, |
|
"logps/chosen": -1.199479341506958, |
|
"logps/rejected": -1.480486273765564, |
|
"loss": 1.7009, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -11.994794845581055, |
|
"rewards/margins": 2.8100688457489014, |
|
"rewards/rejected": -14.804861068725586, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.3847695390781563, |
|
"grad_norm": 85.1133679017557, |
|
"learning_rate": 7.723195175075135e-07, |
|
"logits/chosen": -1.125327706336975, |
|
"logits/rejected": -1.107810139656067, |
|
"logps/chosen": -1.1546721458435059, |
|
"logps/rejected": -1.5301127433776855, |
|
"loss": 1.5941, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -11.546720504760742, |
|
"rewards/margins": 3.7544052600860596, |
|
"rewards/rejected": -15.301126480102539, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.3954575818303273, |
|
"grad_norm": 80.80074018938376, |
|
"learning_rate": 7.564496387029531e-07, |
|
"logits/chosen": -1.1530572175979614, |
|
"logits/rejected": -1.1004054546356201, |
|
"logps/chosen": -1.1977171897888184, |
|
"logps/rejected": -1.6391410827636719, |
|
"loss": 1.6323, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -11.977171897888184, |
|
"rewards/margins": 4.414237976074219, |
|
"rewards/rejected": -16.39141273498535, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.4061456245824983, |
|
"grad_norm": 87.8130211657885, |
|
"learning_rate": 7.402210918896689e-07, |
|
"logits/chosen": -1.1491271257400513, |
|
"logits/rejected": -1.159403681755066, |
|
"logps/chosen": -1.3192346096038818, |
|
"logps/rejected": -1.8000110387802124, |
|
"loss": 1.5253, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -13.192344665527344, |
|
"rewards/margins": 4.807766437530518, |
|
"rewards/rejected": -18.000110626220703, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.4168336673346693, |
|
"grad_norm": 84.55372178403601, |
|
"learning_rate": 7.236565741578162e-07, |
|
"logits/chosen": -1.1186103820800781, |
|
"logits/rejected": -1.1020419597625732, |
|
"logps/chosen": -1.367811918258667, |
|
"logps/rejected": -1.7383407354354858, |
|
"loss": 1.6656, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -13.678120613098145, |
|
"rewards/margins": 3.705289363861084, |
|
"rewards/rejected": -17.38340950012207, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.42752171008684037, |
|
"grad_norm": 84.20111385217919, |
|
"learning_rate": 7.067792524832603e-07, |
|
"logits/chosen": -1.1123974323272705, |
|
"logits/rejected": -1.1035972833633423, |
|
"logps/chosen": -1.3991031646728516, |
|
"logps/rejected": -1.8270728588104248, |
|
"loss": 1.514, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -13.991032600402832, |
|
"rewards/margins": 4.279696464538574, |
|
"rewards/rejected": -18.27073097229004, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.43820975283901137, |
|
"grad_norm": 95.99111629240849, |
|
"learning_rate": 6.896127313264642e-07, |
|
"logits/chosen": -1.174342393875122, |
|
"logits/rejected": -1.1257025003433228, |
|
"logps/chosen": -1.463107705116272, |
|
"logps/rejected": -1.938598871231079, |
|
"loss": 1.6893, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -14.631075859069824, |
|
"rewards/margins": 4.754912853240967, |
|
"rewards/rejected": -19.385990142822266, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.44889779559118237, |
|
"grad_norm": 100.92269633403131, |
|
"learning_rate": 6.721810196195174e-07, |
|
"logits/chosen": -1.2214075326919556, |
|
"logits/rejected": -1.2106014490127563, |
|
"logps/chosen": -1.468076467514038, |
|
"logps/rejected": -1.8905394077301025, |
|
"loss": 1.573, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -14.680766105651855, |
|
"rewards/margins": 4.224628925323486, |
|
"rewards/rejected": -18.905391693115234, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.45958583834335337, |
|
"grad_norm": 98.80302089659659, |
|
"learning_rate": 6.545084971874736e-07, |
|
"logits/chosen": -1.1770942211151123, |
|
"logits/rejected": -1.161029577255249, |
|
"logps/chosen": -1.4916527271270752, |
|
"logps/rejected": -1.9983189105987549, |
|
"loss": 1.4988, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -14.916528701782227, |
|
"rewards/margins": 5.0666608810424805, |
|
"rewards/rejected": -19.98318862915039, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.47027388109552437, |
|
"grad_norm": 94.29735384618333, |
|
"learning_rate": 6.3661988065096e-07, |
|
"logits/chosen": -1.2350765466690063, |
|
"logits/rejected": -1.2198480367660522, |
|
"logps/chosen": -1.5811980962753296, |
|
"logps/rejected": -2.1048731803894043, |
|
"loss": 1.3966, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -15.811983108520508, |
|
"rewards/margins": 5.236747741699219, |
|
"rewards/rejected": -21.048730850219727, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.48096192384769537, |
|
"grad_norm": 73.78349743811617, |
|
"learning_rate": 6.185401888577487e-07, |
|
"logits/chosen": -1.2252886295318604, |
|
"logits/rejected": -1.195698618888855, |
|
"logps/chosen": -1.6302112340927124, |
|
"logps/rejected": -2.095083475112915, |
|
"loss": 1.4949, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -16.302112579345703, |
|
"rewards/margins": 4.6487226486206055, |
|
"rewards/rejected": -20.950834274291992, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.4916499665998664, |
|
"grad_norm": 72.86824044049841, |
|
"learning_rate": 6.002947078916364e-07, |
|
"logits/chosen": -1.2797940969467163, |
|
"logits/rejected": -1.2363423109054565, |
|
"logps/chosen": -1.5519064664840698, |
|
"logps/rejected": -2.0577468872070312, |
|
"loss": 1.3568, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -15.519063949584961, |
|
"rewards/margins": 5.05840539932251, |
|
"rewards/rejected": -20.577470779418945, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.5023380093520374, |
|
"grad_norm": 87.56760475487422, |
|
"learning_rate": 5.819089557075688e-07, |
|
"logits/chosen": -1.3070341348648071, |
|
"logits/rejected": -1.2829915285110474, |
|
"logps/chosen": -1.5769996643066406, |
|
"logps/rejected": -2.101868152618408, |
|
"loss": 1.4121, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -15.769996643066406, |
|
"rewards/margins": 5.248685359954834, |
|
"rewards/rejected": -21.0186824798584, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.5130260521042084, |
|
"grad_norm": 97.95206336790469, |
|
"learning_rate": 5.634086464424742e-07, |
|
"logits/chosen": -1.2722411155700684, |
|
"logits/rejected": -1.2728557586669922, |
|
"logps/chosen": -1.5129470825195312, |
|
"logps/rejected": -2.0472776889801025, |
|
"loss": 1.4543, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -15.12946891784668, |
|
"rewards/margins": 5.343307018280029, |
|
"rewards/rejected": -20.4727783203125, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.5237140948563794, |
|
"grad_norm": 91.68979328640371, |
|
"learning_rate": 5.448196544517167e-07, |
|
"logits/chosen": -1.3419743776321411, |
|
"logits/rejected": -1.2915674448013306, |
|
"logps/chosen": -1.5888983011245728, |
|
"logps/rejected": -2.2375972270965576, |
|
"loss": 1.3296, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -15.888982772827148, |
|
"rewards/margins": 6.486991882324219, |
|
"rewards/rejected": -22.375974655151367, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.5344021376085505, |
|
"grad_norm": 90.04751296677803, |
|
"learning_rate": 5.26167978121472e-07, |
|
"logits/chosen": -1.3019921779632568, |
|
"logits/rejected": -1.287535309791565, |
|
"logps/chosen": -1.632799744606018, |
|
"logps/rejected": -2.2480623722076416, |
|
"loss": 1.3388, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -16.3279972076416, |
|
"rewards/margins": 6.1526265144348145, |
|
"rewards/rejected": -22.48062515258789, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.5450901803607214, |
|
"grad_norm": 72.22778279348458, |
|
"learning_rate": 5.074797035076318e-07, |
|
"logits/chosen": -1.3449289798736572, |
|
"logits/rejected": -1.321571707725525, |
|
"logps/chosen": -1.6949834823608398, |
|
"logps/rejected": -2.1791787147521973, |
|
"loss": 1.4266, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -16.949832916259766, |
|
"rewards/margins": 4.84195613861084, |
|
"rewards/rejected": -21.791790008544922, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.5557782231128925, |
|
"grad_norm": 82.32130834680915, |
|
"learning_rate": 4.887809678520975e-07, |
|
"logits/chosen": -1.3022774457931519, |
|
"logits/rejected": -1.2787460088729858, |
|
"logps/chosen": -1.6087223291397095, |
|
"logps/rejected": -2.112210750579834, |
|
"loss": 1.4418, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -16.087223052978516, |
|
"rewards/margins": 5.034885883331299, |
|
"rewards/rejected": -21.122108459472656, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.5664662658650634, |
|
"grad_norm": 82.10988858763459, |
|
"learning_rate": 4.700979230274829e-07, |
|
"logits/chosen": -1.263526201248169, |
|
"logits/rejected": -1.2527527809143066, |
|
"logps/chosen": -1.7088844776153564, |
|
"logps/rejected": -2.2491986751556396, |
|
"loss": 1.2866, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -17.08884620666504, |
|
"rewards/margins": 5.40314245223999, |
|
"rewards/rejected": -22.491985321044922, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.5771543086172345, |
|
"grad_norm": 74.22531378642971, |
|
"learning_rate": 4.514566989613559e-07, |
|
"logits/chosen": -1.276107907295227, |
|
"logits/rejected": -1.2517645359039307, |
|
"logps/chosen": -1.5033190250396729, |
|
"logps/rejected": -2.076201915740967, |
|
"loss": 1.2308, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -15.033190727233887, |
|
"rewards/margins": 5.728825569152832, |
|
"rewards/rejected": -20.762014389038086, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.5878423513694054, |
|
"grad_norm": 66.56842436857252, |
|
"learning_rate": 4.328833670911724e-07, |
|
"logits/chosen": -1.2568957805633545, |
|
"logits/rejected": -1.2262030839920044, |
|
"logps/chosen": -1.562673568725586, |
|
"logps/rejected": -2.0573482513427734, |
|
"loss": 1.4699, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -15.626736640930176, |
|
"rewards/margins": 4.946745872497559, |
|
"rewards/rejected": -20.573482513427734, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.5985303941215765, |
|
"grad_norm": 71.08794378986906, |
|
"learning_rate": 4.144039039010124e-07, |
|
"logits/chosen": -1.3348051309585571, |
|
"logits/rejected": -1.3145146369934082, |
|
"logps/chosen": -1.6333341598510742, |
|
"logps/rejected": -2.22784686088562, |
|
"loss": 1.3131, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -16.33333969116211, |
|
"rewards/margins": 5.945128440856934, |
|
"rewards/rejected": -22.27846908569336, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.6092184368737475, |
|
"grad_norm": 93.72278509593903, |
|
"learning_rate": 3.960441545911204e-07, |
|
"logits/chosen": -1.297483205795288, |
|
"logits/rejected": -1.2733924388885498, |
|
"logps/chosen": -1.650708794593811, |
|
"logps/rejected": -2.298219680786133, |
|
"loss": 1.0059, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -16.50708770751953, |
|
"rewards/margins": 6.4751081466674805, |
|
"rewards/rejected": -22.982196807861328, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.6199064796259185, |
|
"grad_norm": 76.68999166591557, |
|
"learning_rate": 3.778297969310529e-07, |
|
"logits/chosen": -1.32032310962677, |
|
"logits/rejected": -1.2831968069076538, |
|
"logps/chosen": -1.706647276878357, |
|
"logps/rejected": -2.2490804195404053, |
|
"loss": 1.3171, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -17.066471099853516, |
|
"rewards/margins": 5.424333572387695, |
|
"rewards/rejected": -22.490802764892578, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.6305945223780896, |
|
"grad_norm": 87.09453084365835, |
|
"learning_rate": 3.5978630534699865e-07, |
|
"logits/chosen": -1.2636520862579346, |
|
"logits/rejected": -1.2527822256088257, |
|
"logps/chosen": -1.6968777179718018, |
|
"logps/rejected": -2.257763624191284, |
|
"loss": 1.1554, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -16.96877670288086, |
|
"rewards/margins": 5.608861446380615, |
|
"rewards/rejected": -22.57763671875, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.6412825651302605, |
|
"grad_norm": 82.76223672337171, |
|
"learning_rate": 3.4193891529348795e-07, |
|
"logits/chosen": -1.206903100013733, |
|
"logits/rejected": -1.1841256618499756, |
|
"logps/chosen": -1.7743552923202515, |
|
"logps/rejected": -2.2721633911132812, |
|
"loss": 1.5577, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -17.74355125427246, |
|
"rewards/margins": 4.978079795837402, |
|
"rewards/rejected": -22.72163200378418, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.6519706078824316, |
|
"grad_norm": 78.16246422530884, |
|
"learning_rate": 3.243125879593286e-07, |
|
"logits/chosen": -1.3001482486724854, |
|
"logits/rejected": -1.2663486003875732, |
|
"logps/chosen": -1.6906330585479736, |
|
"logps/rejected": -2.1780858039855957, |
|
"loss": 1.3471, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -16.906330108642578, |
|
"rewards/margins": 4.874526023864746, |
|
"rewards/rejected": -21.78085708618164, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.6626586506346025, |
|
"grad_norm": 91.76909016032246, |
|
"learning_rate": 3.069319753571269e-07, |
|
"logits/chosen": -1.3407663106918335, |
|
"logits/rejected": -1.3254361152648926, |
|
"logps/chosen": -1.6954444646835327, |
|
"logps/rejected": -2.250342845916748, |
|
"loss": 1.3722, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -16.95444679260254, |
|
"rewards/margins": 5.548983573913574, |
|
"rewards/rejected": -22.503429412841797, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.6733466933867736, |
|
"grad_norm": 83.8485405296481, |
|
"learning_rate": 2.898213858452173e-07, |
|
"logits/chosen": -1.3287131786346436, |
|
"logits/rejected": -1.2830489873886108, |
|
"logps/chosen": -1.7108901739120483, |
|
"logps/rejected": -2.263986587524414, |
|
"loss": 1.2997, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -17.108901977539062, |
|
"rewards/margins": 5.530962944030762, |
|
"rewards/rejected": -22.63986587524414, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.6840347361389446, |
|
"grad_norm": 110.4735398683659, |
|
"learning_rate": 2.730047501302266e-07, |
|
"logits/chosen": -1.3247960805892944, |
|
"logits/rejected": -1.3241022825241089, |
|
"logps/chosen": -1.7461349964141846, |
|
"logps/rejected": -2.435873508453369, |
|
"loss": 1.1823, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -17.461349487304688, |
|
"rewards/margins": 6.8973846435546875, |
|
"rewards/rejected": -24.358732223510742, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.6947227788911156, |
|
"grad_norm": 83.494848255409, |
|
"learning_rate": 2.5650558779781635e-07, |
|
"logits/chosen": -1.332975149154663, |
|
"logits/rejected": -1.2938920259475708, |
|
"logps/chosen": -1.795236349105835, |
|
"logps/rejected": -2.5173287391662598, |
|
"loss": 1.2571, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -17.952363967895508, |
|
"rewards/margins": 7.220925331115723, |
|
"rewards/rejected": -25.173290252685547, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.7054108216432866, |
|
"grad_norm": 67.71202935932357, |
|
"learning_rate": 2.403469744184154e-07, |
|
"logits/chosen": -1.2552909851074219, |
|
"logits/rejected": -1.2208917140960693, |
|
"logps/chosen": -1.8006776571273804, |
|
"logps/rejected": -2.3320443630218506, |
|
"loss": 1.3218, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -18.006778717041016, |
|
"rewards/margins": 5.3136701583862305, |
|
"rewards/rejected": -23.32044792175293, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.7160988643954576, |
|
"grad_norm": 100.05067628008052, |
|
"learning_rate": 2.2455150927394878e-07, |
|
"logits/chosen": -1.2909272909164429, |
|
"logits/rejected": -1.279365062713623, |
|
"logps/chosen": -1.7781559228897095, |
|
"logps/rejected": -2.3740553855895996, |
|
"loss": 1.2001, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -17.781558990478516, |
|
"rewards/margins": 5.9589948654174805, |
|
"rewards/rejected": -23.740550994873047, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.7267869071476286, |
|
"grad_norm": 97.4036448779786, |
|
"learning_rate": 2.0914128375069722e-07, |
|
"logits/chosen": -1.2993793487548828, |
|
"logits/rejected": -1.2727620601654053, |
|
"logps/chosen": -1.6889365911483765, |
|
"logps/rejected": -2.2718539237976074, |
|
"loss": 1.3272, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -16.889366149902344, |
|
"rewards/margins": 5.829171180725098, |
|
"rewards/rejected": -22.718538284301758, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.7374749498997996, |
|
"grad_norm": 80.17854676377132, |
|
"learning_rate": 1.9413785044249676e-07, |
|
"logits/chosen": -1.3267228603363037, |
|
"logits/rejected": -1.307962417602539, |
|
"logps/chosen": -1.7601877450942993, |
|
"logps/rejected": -2.4487035274505615, |
|
"loss": 1.268, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -17.601877212524414, |
|
"rewards/margins": 6.885159492492676, |
|
"rewards/rejected": -24.487037658691406, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.7481629926519706, |
|
"grad_norm": 93.74425146775721, |
|
"learning_rate": 1.7956219300748792e-07, |
|
"logits/chosen": -1.301977515220642, |
|
"logits/rejected": -1.3074698448181152, |
|
"logps/chosen": -1.6333377361297607, |
|
"logps/rejected": -2.2102160453796387, |
|
"loss": 1.3055, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -16.333377838134766, |
|
"rewards/margins": 5.768781661987305, |
|
"rewards/rejected": -22.102161407470703, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.7588510354041417, |
|
"grad_norm": 81.82295078846326, |
|
"learning_rate": 1.6543469682057104e-07, |
|
"logits/chosen": -1.231096625328064, |
|
"logits/rejected": -1.248560905456543, |
|
"logps/chosen": -1.6608985662460327, |
|
"logps/rejected": -2.275078535079956, |
|
"loss": 1.1623, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -16.608983993530273, |
|
"rewards/margins": 6.141798496246338, |
|
"rewards/rejected": -22.750782012939453, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.7695390781563126, |
|
"grad_norm": 83.05095937518779, |
|
"learning_rate": 1.5177512046261666e-07, |
|
"logits/chosen": -1.2908953428268433, |
|
"logits/rejected": -1.2959532737731934, |
|
"logps/chosen": -1.669966459274292, |
|
"logps/rejected": -2.3578410148620605, |
|
"loss": 1.2917, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -16.69966697692871, |
|
"rewards/margins": 6.878741264343262, |
|
"rewards/rejected": -23.578407287597656, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.7802271209084837, |
|
"grad_norm": 74.89454351181128, |
|
"learning_rate": 1.3860256808630427e-07, |
|
"logits/chosen": -1.3286449909210205, |
|
"logits/rejected": -1.2749364376068115, |
|
"logps/chosen": -1.7136112451553345, |
|
"logps/rejected": -2.3888180255889893, |
|
"loss": 1.2062, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -17.136112213134766, |
|
"rewards/margins": 6.752066135406494, |
|
"rewards/rejected": -23.888179779052734, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.7909151636606546, |
|
"grad_norm": 86.53704054424027, |
|
"learning_rate": 1.2593546269723647e-07, |
|
"logits/chosen": -1.251564860343933, |
|
"logits/rejected": -1.2445452213287354, |
|
"logps/chosen": -1.689417839050293, |
|
"logps/rejected": -2.242030620574951, |
|
"loss": 1.2397, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -16.89417839050293, |
|
"rewards/margins": 5.526127338409424, |
|
"rewards/rejected": -22.420307159423828, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.8016032064128257, |
|
"grad_norm": 67.08154175834267, |
|
"learning_rate": 1.1379152038770029e-07, |
|
"logits/chosen": -1.2969611883163452, |
|
"logits/rejected": -1.3048449754714966, |
|
"logps/chosen": -1.8122236728668213, |
|
"logps/rejected": -2.4301235675811768, |
|
"loss": 1.1934, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -18.122234344482422, |
|
"rewards/margins": 6.178996562957764, |
|
"rewards/rejected": -24.301233291625977, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.8122912491649966, |
|
"grad_norm": 117.54208827116818, |
|
"learning_rate": 1.0218772555910954e-07, |
|
"logits/chosen": -1.2999813556671143, |
|
"logits/rejected": -1.2861607074737549, |
|
"logps/chosen": -1.7071046829223633, |
|
"logps/rejected": -2.316257953643799, |
|
"loss": 1.3229, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -17.071046829223633, |
|
"rewards/margins": 6.091533184051514, |
|
"rewards/rejected": -23.162578582763672, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.8229792919171677, |
|
"grad_norm": 81.44656810755579, |
|
"learning_rate": 9.114030716778432e-08, |
|
"logits/chosen": -1.2965797185897827, |
|
"logits/rejected": -1.2802777290344238, |
|
"logps/chosen": -1.7108449935913086, |
|
"logps/rejected": -2.4812264442443848, |
|
"loss": 1.0893, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -17.108448028564453, |
|
"rewards/margins": 7.703813076019287, |
|
"rewards/rejected": -24.8122615814209, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.8336673346693386, |
|
"grad_norm": 77.70445335852864, |
|
"learning_rate": 8.066471602728803e-08, |
|
"logits/chosen": -1.3073532581329346, |
|
"logits/rejected": -1.298214316368103, |
|
"logps/chosen": -1.7747161388397217, |
|
"logps/rejected": -2.438636302947998, |
|
"loss": 1.243, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -17.747161865234375, |
|
"rewards/margins": 6.639204502105713, |
|
"rewards/rejected": -24.38636589050293, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.8443553774215097, |
|
"grad_norm": 83.85991252299944, |
|
"learning_rate": 7.077560319906694e-08, |
|
"logits/chosen": -1.3133912086486816, |
|
"logits/rejected": -1.2960683107376099, |
|
"logps/chosen": -1.6985034942626953, |
|
"logps/rejected": -2.307638645172119, |
|
"loss": 1.1851, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -16.985034942626953, |
|
"rewards/margins": 6.091352462768555, |
|
"rewards/rejected": -23.07638931274414, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.8550434201736807, |
|
"grad_norm": 71.13795001226813, |
|
"learning_rate": 6.148679950161672e-08, |
|
"logits/chosen": -1.3109194040298462, |
|
"logits/rejected": -1.2997634410858154, |
|
"logps/chosen": -1.7445151805877686, |
|
"logps/rejected": -2.31221604347229, |
|
"loss": 1.1882, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -17.445152282714844, |
|
"rewards/margins": 5.677010536193848, |
|
"rewards/rejected": -23.122159957885742, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.8550434201736807, |
|
"eval_logits/chosen": -1.4831929206848145, |
|
"eval_logits/rejected": -1.4942151308059692, |
|
"eval_logps/chosen": -1.7345198392868042, |
|
"eval_logps/rejected": -2.3351681232452393, |
|
"eval_loss": 1.17947518825531, |
|
"eval_rewards/accuracies": 0.8434959053993225, |
|
"eval_rewards/chosen": -17.345199584960938, |
|
"eval_rewards/margins": 6.006482124328613, |
|
"eval_rewards/rejected": -23.351680755615234, |
|
"eval_runtime": 66.8816, |
|
"eval_samples_per_second": 29.32, |
|
"eval_steps_per_second": 1.839, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.8657314629258517, |
|
"grad_norm": 93.04526178302538, |
|
"learning_rate": 5.2811296166831666e-08, |
|
"logits/chosen": -1.2737401723861694, |
|
"logits/rejected": -1.2966753244400024, |
|
"logps/chosen": -1.8380101919174194, |
|
"logps/rejected": -2.4519333839416504, |
|
"loss": 1.1965, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -18.380102157592773, |
|
"rewards/margins": 6.139228820800781, |
|
"rewards/rejected": -24.51933479309082, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.8764195056780227, |
|
"grad_norm": 146.600711895034, |
|
"learning_rate": 4.4761226670592066e-08, |
|
"logits/chosen": -1.29901921749115, |
|
"logits/rejected": -1.287117838859558, |
|
"logps/chosen": -1.744970679283142, |
|
"logps/rejected": -2.3512680530548096, |
|
"loss": 1.2833, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -17.44970703125, |
|
"rewards/margins": 6.062973499298096, |
|
"rewards/rejected": -23.512680053710938, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.8871075484301937, |
|
"grad_norm": 75.88048221665215, |
|
"learning_rate": 3.734784976300165e-08, |
|
"logits/chosen": -1.2868596315383911, |
|
"logits/rejected": -1.2446833848953247, |
|
"logps/chosen": -1.6970937252044678, |
|
"logps/rejected": -2.4172720909118652, |
|
"loss": 1.2743, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -16.970937728881836, |
|
"rewards/margins": 7.201780796051025, |
|
"rewards/rejected": -24.172718048095703, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.8977955911823647, |
|
"grad_norm": 89.93720385918785, |
|
"learning_rate": 3.058153372200695e-08, |
|
"logits/chosen": -1.322579026222229, |
|
"logits/rejected": -1.2816030979156494, |
|
"logps/chosen": -1.6371269226074219, |
|
"logps/rejected": -2.3092269897460938, |
|
"loss": 1.1839, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -16.371267318725586, |
|
"rewards/margins": 6.720999240875244, |
|
"rewards/rejected": -23.092267990112305, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.9084836339345357, |
|
"grad_norm": 87.9502722305557, |
|
"learning_rate": 2.4471741852423233e-08, |
|
"logits/chosen": -1.320749044418335, |
|
"logits/rejected": -1.3164869546890259, |
|
"logps/chosen": -1.8138576745986938, |
|
"logps/rejected": -2.4155681133270264, |
|
"loss": 1.3872, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -18.13857650756836, |
|
"rewards/margins": 6.0171027183532715, |
|
"rewards/rejected": -24.15567970275879, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.9191716766867067, |
|
"grad_norm": 86.56225821481218, |
|
"learning_rate": 1.9027019250647036e-08, |
|
"logits/chosen": -1.2986432313919067, |
|
"logits/rejected": -1.2903183698654175, |
|
"logps/chosen": -1.7905200719833374, |
|
"logps/rejected": -2.436361789703369, |
|
"loss": 1.2511, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -17.905202865600586, |
|
"rewards/margins": 6.458415985107422, |
|
"rewards/rejected": -24.363616943359375, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.9298597194388778, |
|
"grad_norm": 72.5185778330974, |
|
"learning_rate": 1.4254980853566246e-08, |
|
"logits/chosen": -1.2492395639419556, |
|
"logits/rejected": -1.2206861972808838, |
|
"logps/chosen": -1.657965064048767, |
|
"logps/rejected": -2.2997374534606934, |
|
"loss": 1.1762, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -16.57965087890625, |
|
"rewards/margins": 6.417726039886475, |
|
"rewards/rejected": -22.99737548828125, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.9405477621910487, |
|
"grad_norm": 93.87912624876707, |
|
"learning_rate": 1.016230078838226e-08, |
|
"logits/chosen": -1.2905051708221436, |
|
"logits/rejected": -1.2409080266952515, |
|
"logps/chosen": -1.7906429767608643, |
|
"logps/rejected": -2.375685453414917, |
|
"loss": 1.2152, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -17.906429290771484, |
|
"rewards/margins": 5.850423812866211, |
|
"rewards/rejected": -23.756855010986328, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.9512358049432198, |
|
"grad_norm": 68.35754825310542, |
|
"learning_rate": 6.754703038239329e-09, |
|
"logits/chosen": -1.238844633102417, |
|
"logits/rejected": -1.2277412414550781, |
|
"logps/chosen": -1.769052267074585, |
|
"logps/rejected": -2.5054378509521484, |
|
"loss": 1.1183, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -17.690523147583008, |
|
"rewards/margins": 7.363855838775635, |
|
"rewards/rejected": -25.054378509521484, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.9619238476953907, |
|
"grad_norm": 83.54543382327525, |
|
"learning_rate": 4.036953436716895e-09, |
|
"logits/chosen": -1.346346139907837, |
|
"logits/rejected": -1.3324317932128906, |
|
"logps/chosen": -1.7270358800888062, |
|
"logps/rejected": -2.3151395320892334, |
|
"loss": 1.242, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -17.27035903930664, |
|
"rewards/margins": 5.881035804748535, |
|
"rewards/rejected": -23.15139389038086, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.9726118904475618, |
|
"grad_norm": 96.76907591863952, |
|
"learning_rate": 2.0128530023804656e-09, |
|
"logits/chosen": -1.3069255352020264, |
|
"logits/rejected": -1.277972936630249, |
|
"logps/chosen": -1.7244638204574585, |
|
"logps/rejected": -2.4305450916290283, |
|
"loss": 1.1012, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -17.244640350341797, |
|
"rewards/margins": 7.060812473297119, |
|
"rewards/rejected": -24.305450439453125, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.9832999331997327, |
|
"grad_norm": 95.7750256826231, |
|
"learning_rate": 6.852326227130833e-10, |
|
"logits/chosen": -1.3161975145339966, |
|
"logits/rejected": -1.3086355924606323, |
|
"logps/chosen": -1.768693208694458, |
|
"logps/rejected": -2.425410747528076, |
|
"loss": 1.1545, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -17.686931610107422, |
|
"rewards/margins": 6.567179203033447, |
|
"rewards/rejected": -24.25411033630371, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.9939879759519038, |
|
"grad_norm": 90.4800906374845, |
|
"learning_rate": 5.594909486328348e-11, |
|
"logits/chosen": -1.2925775051116943, |
|
"logits/rejected": -1.3013789653778076, |
|
"logps/chosen": -1.8294912576675415, |
|
"logps/rejected": -2.505312204360962, |
|
"loss": 1.2602, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -18.294912338256836, |
|
"rewards/margins": 6.758206844329834, |
|
"rewards/rejected": -25.053119659423828, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.9982631930527722, |
|
"step": 467, |
|
"total_flos": 0.0, |
|
"train_loss": 1.7882541670789045, |
|
"train_runtime": 8154.3088, |
|
"train_samples_per_second": 7.343, |
|
"train_steps_per_second": 0.057 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 467, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 1000000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|