|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"eval_steps": 100, |
|
"global_step": 1065, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"dpo_losses": 0.6931471824645996, |
|
"epoch": 0.0, |
|
"grad_norm": 1.6032202352154772, |
|
"learning_rate": 4.672897196261682e-08, |
|
"logits/chosen": -3.0016818046569824, |
|
"logits/rejected": -2.8469698429107666, |
|
"logps/chosen": -650.2908325195312, |
|
"logps/rejected": -359.48583984375, |
|
"loss": 0.6931, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/margins_max": 0.0, |
|
"rewards/margins_min": 0.0, |
|
"rewards/margins_std": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"dpo_losses": 0.6927387714385986, |
|
"epoch": 0.03, |
|
"grad_norm": 14.052093588804325, |
|
"learning_rate": 4.6728971962616824e-07, |
|
"logits/chosen": -2.9367923736572266, |
|
"logits/rejected": -2.819260835647583, |
|
"logps/chosen": -254.90475463867188, |
|
"logps/rejected": -170.36068725585938, |
|
"loss": 0.6989, |
|
"positive_losses": 0.033258650451898575, |
|
"rewards/accuracies": 0.5555555820465088, |
|
"rewards/chosen": 0.0008218331495299935, |
|
"rewards/margins": 0.0008189052459783852, |
|
"rewards/margins_max": 0.0018548837397247553, |
|
"rewards/margins_min": -0.00021707323321606964, |
|
"rewards/margins_std": 0.0014650949742645025, |
|
"rewards/rejected": 2.927754849224584e-06, |
|
"step": 10 |
|
}, |
|
{ |
|
"dpo_losses": 0.6923267245292664, |
|
"epoch": 0.06, |
|
"grad_norm": 1.844492423373157, |
|
"learning_rate": 9.345794392523365e-07, |
|
"logits/chosen": -2.7079358100891113, |
|
"logits/rejected": -2.7515180110931396, |
|
"logps/chosen": -306.1308898925781, |
|
"logps/rejected": -241.56021118164062, |
|
"loss": 0.6932, |
|
"positive_losses": 0.012112426571547985, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.006594317965209484, |
|
"rewards/margins": 0.0016433143755421042, |
|
"rewards/margins_max": 0.002823440358042717, |
|
"rewards/margins_min": 0.00046318816021084785, |
|
"rewards/margins_std": 0.001668950542807579, |
|
"rewards/rejected": 0.004951003938913345, |
|
"step": 20 |
|
}, |
|
{ |
|
"dpo_losses": 0.6899991631507874, |
|
"epoch": 0.08, |
|
"grad_norm": 2.1897418931727595, |
|
"learning_rate": 1.4018691588785047e-06, |
|
"logits/chosen": -2.904411792755127, |
|
"logits/rejected": -2.816619396209717, |
|
"logps/chosen": -358.5197448730469, |
|
"logps/rejected": -251.15103149414062, |
|
"loss": 0.6896, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.02147643454372883, |
|
"rewards/margins": 0.006321606691926718, |
|
"rewards/margins_max": 0.01196110900491476, |
|
"rewards/margins_min": 0.000682103622239083, |
|
"rewards/margins_std": 0.007975460961461067, |
|
"rewards/rejected": 0.015154826454818249, |
|
"step": 30 |
|
}, |
|
{ |
|
"dpo_losses": 0.6862105131149292, |
|
"epoch": 0.11, |
|
"grad_norm": 1.7300257406359418, |
|
"learning_rate": 1.869158878504673e-06, |
|
"logits/chosen": -2.8441336154937744, |
|
"logits/rejected": -2.7715249061584473, |
|
"logps/chosen": -327.30523681640625, |
|
"logps/rejected": -313.1446228027344, |
|
"loss": 0.6864, |
|
"positive_losses": 0.02085266076028347, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.03371895104646683, |
|
"rewards/margins": 0.013976506888866425, |
|
"rewards/margins_max": 0.017818700522184372, |
|
"rewards/margins_min": 0.010134311392903328, |
|
"rewards/margins_std": 0.0054336837492883205, |
|
"rewards/rejected": 0.019742444157600403, |
|
"step": 40 |
|
}, |
|
{ |
|
"dpo_losses": 0.6820067167282104, |
|
"epoch": 0.14, |
|
"grad_norm": 9.347589322785899, |
|
"learning_rate": 2.3364485981308413e-06, |
|
"logits/chosen": -2.795854091644287, |
|
"logits/rejected": -2.720963954925537, |
|
"logps/chosen": -217.7622833251953, |
|
"logps/rejected": -171.39205932617188, |
|
"loss": 0.6779, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.0513346791267395, |
|
"rewards/margins": 0.022588390856981277, |
|
"rewards/margins_max": 0.03625725582242012, |
|
"rewards/margins_min": 0.008919527754187584, |
|
"rewards/margins_std": 0.019330691546201706, |
|
"rewards/rejected": 0.028746291995048523, |
|
"step": 50 |
|
}, |
|
{ |
|
"dpo_losses": 0.6643597483634949, |
|
"epoch": 0.17, |
|
"grad_norm": 2.37274745731943, |
|
"learning_rate": 2.8037383177570094e-06, |
|
"logits/chosen": -2.7788054943084717, |
|
"logits/rejected": -2.710609197616577, |
|
"logps/chosen": -256.30633544921875, |
|
"logps/rejected": -233.06576538085938, |
|
"loss": 0.6666, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.08375100791454315, |
|
"rewards/margins": 0.05946110561490059, |
|
"rewards/margins_max": 0.08825884014368057, |
|
"rewards/margins_min": 0.030663389712572098, |
|
"rewards/margins_std": 0.040726132690906525, |
|
"rewards/rejected": 0.024289902299642563, |
|
"step": 60 |
|
}, |
|
{ |
|
"dpo_losses": 0.6566643714904785, |
|
"epoch": 0.2, |
|
"grad_norm": 1.6634540430479345, |
|
"learning_rate": 3.2710280373831774e-06, |
|
"logits/chosen": -2.635437488555908, |
|
"logits/rejected": -2.678208351135254, |
|
"logps/chosen": -283.38287353515625, |
|
"logps/rejected": -209.6460418701172, |
|
"loss": 0.6558, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.11453696340322495, |
|
"rewards/margins": 0.07607836276292801, |
|
"rewards/margins_max": 0.13534289598464966, |
|
"rewards/margins_min": 0.01681383326649666, |
|
"rewards/margins_std": 0.08381269872188568, |
|
"rewards/rejected": 0.03845860809087753, |
|
"step": 70 |
|
}, |
|
{ |
|
"dpo_losses": 0.6309095025062561, |
|
"epoch": 0.23, |
|
"grad_norm": 1.7989959804157094, |
|
"learning_rate": 3.738317757009346e-06, |
|
"logits/chosen": -2.9159035682678223, |
|
"logits/rejected": -2.8235018253326416, |
|
"logps/chosen": -335.8651123046875, |
|
"logps/rejected": -286.46331787109375, |
|
"loss": 0.6397, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.13743606209754944, |
|
"rewards/margins": 0.1305120289325714, |
|
"rewards/margins_max": 0.18503351509571075, |
|
"rewards/margins_min": 0.07599054276943207, |
|
"rewards/margins_std": 0.07710503041744232, |
|
"rewards/rejected": 0.0069240378215909, |
|
"step": 80 |
|
}, |
|
{ |
|
"dpo_losses": 0.6185696721076965, |
|
"epoch": 0.25, |
|
"grad_norm": 9.307634759665634, |
|
"learning_rate": 4.205607476635514e-06, |
|
"logits/chosen": -2.6819961071014404, |
|
"logits/rejected": -2.7166359424591064, |
|
"logps/chosen": -211.7088623046875, |
|
"logps/rejected": -203.97885131835938, |
|
"loss": 0.6145, |
|
"positive_losses": 0.035182952880859375, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.15242353081703186, |
|
"rewards/margins": 0.16033907234668732, |
|
"rewards/margins_max": 0.24873778223991394, |
|
"rewards/margins_min": 0.07194037735462189, |
|
"rewards/margins_std": 0.12501463294029236, |
|
"rewards/rejected": -0.007915569469332695, |
|
"step": 90 |
|
}, |
|
{ |
|
"dpo_losses": 0.6138414144515991, |
|
"epoch": 0.28, |
|
"grad_norm": 2.169680467803253, |
|
"learning_rate": 4.6728971962616825e-06, |
|
"logits/chosen": -2.783569812774658, |
|
"logits/rejected": -2.812309741973877, |
|
"logps/chosen": -288.1591796875, |
|
"logps/rejected": -341.5180969238281, |
|
"loss": 0.6275, |
|
"positive_losses": 0.8350906372070312, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.1585826575756073, |
|
"rewards/margins": 0.1714317500591278, |
|
"rewards/margins_max": 0.2515793442726135, |
|
"rewards/margins_min": 0.09128417074680328, |
|
"rewards/margins_std": 0.11334581673145294, |
|
"rewards/rejected": -0.012849109247326851, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"eval_dpo_losses": 0.6742300391197205, |
|
"eval_logits/chosen": -2.7527217864990234, |
|
"eval_logits/rejected": -2.71140456199646, |
|
"eval_logps/chosen": -276.58984375, |
|
"eval_logps/rejected": -254.9810333251953, |
|
"eval_loss": 0.8539575338363647, |
|
"eval_positive_losses": 1.6940749883651733, |
|
"eval_rewards/accuracies": 0.60317462682724, |
|
"eval_rewards/chosen": 0.08631354570388794, |
|
"eval_rewards/margins": 0.04429732263088226, |
|
"eval_rewards/margins_max": 0.21467885375022888, |
|
"eval_rewards/margins_min": -0.10308819264173508, |
|
"eval_rewards/margins_std": 0.14203837513923645, |
|
"eval_rewards/rejected": 0.042016226798295975, |
|
"eval_runtime": 285.3929, |
|
"eval_samples_per_second": 7.008, |
|
"eval_steps_per_second": 0.221, |
|
"step": 100 |
|
}, |
|
{ |
|
"dpo_losses": 0.5535503029823303, |
|
"epoch": 0.31, |
|
"grad_norm": 2.2536984881767905, |
|
"learning_rate": 4.999879018839288e-06, |
|
"logits/chosen": -2.7111623287200928, |
|
"logits/rejected": -2.6175503730773926, |
|
"logps/chosen": -252.84732055664062, |
|
"logps/rejected": -252.4491729736328, |
|
"loss": 0.5736, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2623223662376404, |
|
"rewards/margins": 0.32247892022132874, |
|
"rewards/margins_max": 0.4974708557128906, |
|
"rewards/margins_min": 0.14748699963092804, |
|
"rewards/margins_std": 0.24747595191001892, |
|
"rewards/rejected": -0.060156505554914474, |
|
"step": 110 |
|
}, |
|
{ |
|
"dpo_losses": 0.5708788633346558, |
|
"epoch": 0.34, |
|
"grad_norm": 1.8718792057149318, |
|
"learning_rate": 4.99772856836941e-06, |
|
"logits/chosen": -2.873108148574829, |
|
"logits/rejected": -2.8189544677734375, |
|
"logps/chosen": -373.77386474609375, |
|
"logps/rejected": -337.38922119140625, |
|
"loss": 0.5727, |
|
"positive_losses": 0.22691193222999573, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21501663327217102, |
|
"rewards/margins": 0.27264389395713806, |
|
"rewards/margins_max": 0.36815184354782104, |
|
"rewards/margins_min": 0.17713597416877747, |
|
"rewards/margins_std": 0.13506858050823212, |
|
"rewards/rejected": -0.057627253234386444, |
|
"step": 120 |
|
}, |
|
{ |
|
"dpo_losses": 0.5159657001495361, |
|
"epoch": 0.37, |
|
"grad_norm": 1.9587224479056975, |
|
"learning_rate": 4.992892309373227e-06, |
|
"logits/chosen": -2.7587242126464844, |
|
"logits/rejected": -2.689577341079712, |
|
"logps/chosen": -311.52978515625, |
|
"logps/rejected": -274.8511047363281, |
|
"loss": 0.5718, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.24945905804634094, |
|
"rewards/margins": 0.41450828313827515, |
|
"rewards/margins_max": 0.5637356638908386, |
|
"rewards/margins_min": 0.26528093218803406, |
|
"rewards/margins_std": 0.21103934943675995, |
|
"rewards/rejected": -0.1650492250919342, |
|
"step": 130 |
|
}, |
|
{ |
|
"dpo_losses": 0.5120642185211182, |
|
"epoch": 0.39, |
|
"grad_norm": 35.2981995380076, |
|
"learning_rate": 4.985375442281969e-06, |
|
"logits/chosen": -2.725268602371216, |
|
"logits/rejected": -2.7174267768859863, |
|
"logps/chosen": -270.7826843261719, |
|
"logps/rejected": -248.8843536376953, |
|
"loss": 0.5953, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.25090181827545166, |
|
"rewards/margins": 0.42299699783325195, |
|
"rewards/margins_max": 0.5964112877845764, |
|
"rewards/margins_min": 0.24958273768424988, |
|
"rewards/margins_std": 0.2452448159456253, |
|
"rewards/rejected": -0.1720951795578003, |
|
"step": 140 |
|
}, |
|
{ |
|
"dpo_losses": 0.4928362965583801, |
|
"epoch": 0.42, |
|
"grad_norm": 11.90624935921094, |
|
"learning_rate": 4.9751860499858175e-06, |
|
"logits/chosen": -2.72652530670166, |
|
"logits/rejected": -2.7453625202178955, |
|
"logps/chosen": -301.97021484375, |
|
"logps/rejected": -276.3653259277344, |
|
"loss": 0.5758, |
|
"positive_losses": 0.42821502685546875, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.19235308468341827, |
|
"rewards/margins": 0.47807592153549194, |
|
"rewards/margins_max": 0.6831300854682922, |
|
"rewards/margins_min": 0.2730218172073364, |
|
"rewards/margins_std": 0.2899903357028961, |
|
"rewards/rejected": -0.2857228219509125, |
|
"step": 150 |
|
}, |
|
{ |
|
"dpo_losses": 0.4960567355155945, |
|
"epoch": 0.45, |
|
"grad_norm": 15.847210447883002, |
|
"learning_rate": 4.962335089142376e-06, |
|
"logits/chosen": -2.81313157081604, |
|
"logits/rejected": -2.735961675643921, |
|
"logps/chosen": -244.3223419189453, |
|
"logps/rejected": -264.59417724609375, |
|
"loss": 0.559, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.25210094451904297, |
|
"rewards/margins": 0.46666574478149414, |
|
"rewards/margins_max": 0.6729411482810974, |
|
"rewards/margins_min": 0.26039019227027893, |
|
"rewards/margins_std": 0.2917175889015198, |
|
"rewards/rejected": -0.21456477046012878, |
|
"step": 160 |
|
}, |
|
{ |
|
"dpo_losses": 0.4443618357181549, |
|
"epoch": 0.48, |
|
"grad_norm": 2.921537165567133, |
|
"learning_rate": 4.946836378394967e-06, |
|
"logits/chosen": -2.8487417697906494, |
|
"logits/rejected": -2.7233359813690186, |
|
"logps/chosen": -293.14263916015625, |
|
"logps/rejected": -265.21044921875, |
|
"loss": 0.4792, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.32291096448898315, |
|
"rewards/margins": 0.6394142508506775, |
|
"rewards/margins_max": 0.8530386686325073, |
|
"rewards/margins_min": 0.42578983306884766, |
|
"rewards/margins_std": 0.30211058259010315, |
|
"rewards/rejected": -0.3165033161640167, |
|
"step": 170 |
|
}, |
|
{ |
|
"dpo_losses": 0.4956347942352295, |
|
"epoch": 0.51, |
|
"grad_norm": 23.196576832752985, |
|
"learning_rate": 4.928706583513441e-06, |
|
"logits/chosen": -2.7180655002593994, |
|
"logits/rejected": -2.674361228942871, |
|
"logps/chosen": -249.37704467773438, |
|
"logps/rejected": -410.07391357421875, |
|
"loss": 0.5836, |
|
"positive_losses": 1.074639916419983, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.17418113350868225, |
|
"rewards/margins": 0.47780531644821167, |
|
"rewards/margins_max": 0.6538316011428833, |
|
"rewards/margins_min": 0.3017791211605072, |
|
"rewards/margins_std": 0.24893875420093536, |
|
"rewards/rejected": -0.3036242425441742, |
|
"step": 180 |
|
}, |
|
{ |
|
"dpo_losses": 0.4040610194206238, |
|
"epoch": 0.54, |
|
"grad_norm": 2.662026262000448, |
|
"learning_rate": 4.907965199473471e-06, |
|
"logits/chosen": -2.6723411083221436, |
|
"logits/rejected": -2.5524985790252686, |
|
"logps/chosen": -320.6319274902344, |
|
"logps/rejected": -257.9935302734375, |
|
"loss": 0.5582, |
|
"positive_losses": 1.0019195079803467, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.3839383125305176, |
|
"rewards/margins": 0.7386760711669922, |
|
"rewards/margins_max": 0.9635330438613892, |
|
"rewards/margins_min": 0.51381915807724, |
|
"rewards/margins_std": 0.31799572706222534, |
|
"rewards/rejected": -0.3547378182411194, |
|
"step": 190 |
|
}, |
|
{ |
|
"dpo_losses": 0.4472725987434387, |
|
"epoch": 0.56, |
|
"grad_norm": 23.173407948282012, |
|
"learning_rate": 4.884634529493591e-06, |
|
"logits/chosen": -2.8709282875061035, |
|
"logits/rejected": -2.7968573570251465, |
|
"logps/chosen": -255.41879272460938, |
|
"logps/rejected": -237.76406860351562, |
|
"loss": 0.599, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.2910212576389313, |
|
"rewards/margins": 0.6229863166809082, |
|
"rewards/margins_max": 0.8043031692504883, |
|
"rewards/margins_min": 0.4416695535182953, |
|
"rewards/margins_std": 0.25642070174217224, |
|
"rewards/rejected": -0.3319651484489441, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"eval_dpo_losses": 0.6560041308403015, |
|
"eval_logits/chosen": -2.7841696739196777, |
|
"eval_logits/rejected": -2.738633632659912, |
|
"eval_logps/chosen": -291.0660095214844, |
|
"eval_logps/rejected": -275.99658203125, |
|
"eval_loss": 1.9206839799880981, |
|
"eval_positive_losses": 12.58076000213623, |
|
"eval_rewards/accuracies": 0.6388888955116272, |
|
"eval_rewards/chosen": -0.05844784155488014, |
|
"eval_rewards/margins": 0.10969138890504837, |
|
"eval_rewards/margins_max": 0.4903210401535034, |
|
"eval_rewards/margins_min": -0.25554272532463074, |
|
"eval_rewards/margins_std": 0.33160677552223206, |
|
"eval_rewards/rejected": -0.1681392341852188, |
|
"eval_runtime": 284.4185, |
|
"eval_samples_per_second": 7.032, |
|
"eval_steps_per_second": 0.222, |
|
"step": 200 |
|
}, |
|
{ |
|
"dpo_losses": 0.45512253046035767, |
|
"epoch": 0.59, |
|
"grad_norm": 7.436654691984327, |
|
"learning_rate": 4.858739661052539e-06, |
|
"logits/chosen": -2.5205092430114746, |
|
"logits/rejected": -2.5804672241210938, |
|
"logps/chosen": -240.2886962890625, |
|
"logps/rejected": -298.3849182128906, |
|
"loss": 0.5267, |
|
"positive_losses": 0.4501487612724304, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.38874852657318115, |
|
"rewards/margins": 0.6847165822982788, |
|
"rewards/margins_max": 1.0578687191009521, |
|
"rewards/margins_min": 0.31156447529792786, |
|
"rewards/margins_std": 0.5277167558670044, |
|
"rewards/rejected": -0.29596805572509766, |
|
"step": 210 |
|
}, |
|
{ |
|
"dpo_losses": 0.4267025589942932, |
|
"epoch": 0.62, |
|
"grad_norm": 12.252545150739026, |
|
"learning_rate": 4.830308438912687e-06, |
|
"logits/chosen": -2.901047945022583, |
|
"logits/rejected": -2.776557207107544, |
|
"logps/chosen": -341.5310363769531, |
|
"logps/rejected": -316.5777282714844, |
|
"loss": 0.5828, |
|
"positive_losses": 1.5781867504119873, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.28965142369270325, |
|
"rewards/margins": 0.6728664040565491, |
|
"rewards/margins_max": 0.8954153060913086, |
|
"rewards/margins_min": 0.4503174424171448, |
|
"rewards/margins_std": 0.3147316873073578, |
|
"rewards/rejected": -0.38321495056152344, |
|
"step": 220 |
|
}, |
|
{ |
|
"dpo_losses": 0.42703738808631897, |
|
"epoch": 0.65, |
|
"grad_norm": 2.1766995421765545, |
|
"learning_rate": 4.799371435178544e-06, |
|
"logits/chosen": -2.821802854537964, |
|
"logits/rejected": -2.777765989303589, |
|
"logps/chosen": -321.39501953125, |
|
"logps/rejected": -376.64483642578125, |
|
"loss": 0.5028, |
|
"positive_losses": 1.304276466369629, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.30395936965942383, |
|
"rewards/margins": 0.7244865298271179, |
|
"rewards/margins_max": 1.1590335369110107, |
|
"rewards/margins_min": 0.2899397909641266, |
|
"rewards/margins_std": 0.6145419478416443, |
|
"rewards/rejected": -0.42052727937698364, |
|
"step": 230 |
|
}, |
|
{ |
|
"dpo_losses": 0.4363502860069275, |
|
"epoch": 0.68, |
|
"grad_norm": 13.650828107929078, |
|
"learning_rate": 4.765961916422575e-06, |
|
"logits/chosen": -2.7546634674072266, |
|
"logits/rejected": -2.707695722579956, |
|
"logps/chosen": -219.1737518310547, |
|
"logps/rejected": -330.49444580078125, |
|
"loss": 0.5883, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.29220908880233765, |
|
"rewards/margins": 0.6587773561477661, |
|
"rewards/margins_max": 0.8973654508590698, |
|
"rewards/margins_min": 0.42018923163414, |
|
"rewards/margins_std": 0.33741456270217896, |
|
"rewards/rejected": -0.36656829714775085, |
|
"step": 240 |
|
}, |
|
{ |
|
"dpo_losses": 0.40715283155441284, |
|
"epoch": 0.7, |
|
"grad_norm": 5.403626615804181, |
|
"learning_rate": 4.730115807913627e-06, |
|
"logits/chosen": -2.786029577255249, |
|
"logits/rejected": -2.656646490097046, |
|
"logps/chosen": -316.26605224609375, |
|
"logps/rejected": -292.4571838378906, |
|
"loss": 0.4798, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.36320579051971436, |
|
"rewards/margins": 0.7509908676147461, |
|
"rewards/margins_max": 0.927462100982666, |
|
"rewards/margins_min": 0.5745195150375366, |
|
"rewards/margins_std": 0.24956803023815155, |
|
"rewards/rejected": -0.3877849876880646, |
|
"step": 250 |
|
}, |
|
{ |
|
"dpo_losses": 0.45335307717323303, |
|
"epoch": 0.73, |
|
"grad_norm": 29.921037643309493, |
|
"learning_rate": 4.691871654986485e-06, |
|
"logits/chosen": -2.8433797359466553, |
|
"logits/rejected": -2.7910611629486084, |
|
"logps/chosen": -240.71328735351562, |
|
"logps/rejected": -260.13897705078125, |
|
"loss": 0.5549, |
|
"positive_losses": 1.8418042659759521, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.2628856301307678, |
|
"rewards/margins": 0.6381598711013794, |
|
"rewards/margins_max": 0.8531384468078613, |
|
"rewards/margins_min": 0.42318135499954224, |
|
"rewards/margins_std": 0.3040255904197693, |
|
"rewards/rejected": -0.37527427077293396, |
|
"step": 260 |
|
}, |
|
{ |
|
"dpo_losses": 0.45805755257606506, |
|
"epoch": 0.76, |
|
"grad_norm": 3.6869955202700884, |
|
"learning_rate": 4.651270581594054e-06, |
|
"logits/chosen": -2.8275113105773926, |
|
"logits/rejected": -2.726349353790283, |
|
"logps/chosen": -264.3140869140625, |
|
"logps/rejected": -256.37506103515625, |
|
"loss": 0.5553, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.38106483221054077, |
|
"rewards/margins": 0.6026363372802734, |
|
"rewards/margins_max": 0.8345470428466797, |
|
"rewards/margins_min": 0.37072569131851196, |
|
"rewards/margins_std": 0.3279712498188019, |
|
"rewards/rejected": -0.22157149016857147, |
|
"step": 270 |
|
}, |
|
{ |
|
"dpo_losses": 0.46088677644729614, |
|
"epoch": 0.79, |
|
"grad_norm": 11.384071170544201, |
|
"learning_rate": 4.6083562460867545e-06, |
|
"logits/chosen": -2.7374978065490723, |
|
"logits/rejected": -2.705930233001709, |
|
"logps/chosen": -292.6180114746094, |
|
"logps/rejected": -295.0760803222656, |
|
"loss": 0.6126, |
|
"positive_losses": 1.0429108142852783, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.2991539239883423, |
|
"rewards/margins": 0.611792802810669, |
|
"rewards/margins_max": 0.8936999440193176, |
|
"rewards/margins_min": 0.32988566160202026, |
|
"rewards/margins_std": 0.39867693185806274, |
|
"rewards/rejected": -0.3126388192176819, |
|
"step": 280 |
|
}, |
|
{ |
|
"dpo_losses": 0.5089690685272217, |
|
"epoch": 0.82, |
|
"grad_norm": 2.7173946115224865, |
|
"learning_rate": 4.563174794266684e-06, |
|
"logits/chosen": -2.875331163406372, |
|
"logits/rejected": -2.819256544113159, |
|
"logps/chosen": -263.9188232421875, |
|
"logps/rejected": -286.82647705078125, |
|
"loss": 0.593, |
|
"positive_losses": 1.377386450767517, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.2715073823928833, |
|
"rewards/margins": 0.4865007996559143, |
|
"rewards/margins_max": 0.7703573703765869, |
|
"rewards/margins_min": 0.20264430344104767, |
|
"rewards/margins_std": 0.40143370628356934, |
|
"rewards/rejected": -0.2149934470653534, |
|
"step": 290 |
|
}, |
|
{ |
|
"dpo_losses": 0.44498148560523987, |
|
"epoch": 0.85, |
|
"grad_norm": 8.551683262439843, |
|
"learning_rate": 4.5157748097670125e-06, |
|
"logits/chosen": -2.9059486389160156, |
|
"logits/rejected": -2.793186902999878, |
|
"logps/chosen": -319.2405090332031, |
|
"logps/rejected": -338.54998779296875, |
|
"loss": 0.4901, |
|
"positive_losses": 0.0022247314918786287, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.35988515615463257, |
|
"rewards/margins": 0.6180437803268433, |
|
"rewards/margins_max": 0.8190226554870605, |
|
"rewards/margins_min": 0.41706475615501404, |
|
"rewards/margins_std": 0.2842271625995636, |
|
"rewards/rejected": -0.2581585943698883, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"eval_dpo_losses": 0.6506758332252502, |
|
"eval_logits/chosen": -2.7854835987091064, |
|
"eval_logits/rejected": -2.7329776287078857, |
|
"eval_logps/chosen": -303.7291564941406, |
|
"eval_logps/rejected": -289.5481872558594, |
|
"eval_loss": 2.8066518306732178, |
|
"eval_positive_losses": 22.214069366455078, |
|
"eval_rewards/accuracies": 0.6388888955116272, |
|
"eval_rewards/chosen": -0.18507955968379974, |
|
"eval_rewards/margins": 0.11857547610998154, |
|
"eval_rewards/margins_max": 0.47240880131721497, |
|
"eval_rewards/margins_min": -0.25752344727516174, |
|
"eval_rewards/margins_std": 0.32571399211883545, |
|
"eval_rewards/rejected": -0.3036550283432007, |
|
"eval_runtime": 284.7873, |
|
"eval_samples_per_second": 7.023, |
|
"eval_steps_per_second": 0.221, |
|
"step": 300 |
|
}, |
|
{ |
|
"dpo_losses": 0.43647676706314087, |
|
"epoch": 0.87, |
|
"grad_norm": 2.6162448381062275, |
|
"learning_rate": 4.466207261809989e-06, |
|
"logits/chosen": -2.9903199672698975, |
|
"logits/rejected": -2.7902731895446777, |
|
"logps/chosen": -293.12274169921875, |
|
"logps/rejected": -296.422119140625, |
|
"loss": 0.6852, |
|
"positive_losses": 0.944580078125, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.31041496992111206, |
|
"rewards/margins": 0.6650521755218506, |
|
"rewards/margins_max": 0.992100715637207, |
|
"rewards/margins_min": 0.338003545999527, |
|
"rewards/margins_std": 0.4625166058540344, |
|
"rewards/rejected": -0.35463717579841614, |
|
"step": 310 |
|
}, |
|
{ |
|
"dpo_losses": 0.4618608057498932, |
|
"epoch": 0.9, |
|
"grad_norm": 10.483471531606499, |
|
"learning_rate": 4.414525450399713e-06, |
|
"logits/chosen": -2.8283543586730957, |
|
"logits/rejected": -2.7349746227264404, |
|
"logps/chosen": -286.9427185058594, |
|
"logps/rejected": -262.766845703125, |
|
"loss": 0.527, |
|
"positive_losses": 0.8719180822372437, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.32706111669540405, |
|
"rewards/margins": 0.5900775790214539, |
|
"rewards/margins_max": 0.8372209668159485, |
|
"rewards/margins_min": 0.34293434023857117, |
|
"rewards/margins_std": 0.34951338171958923, |
|
"rewards/rejected": -0.2630165219306946, |
|
"step": 320 |
|
}, |
|
{ |
|
"dpo_losses": 0.40510478615760803, |
|
"epoch": 0.93, |
|
"grad_norm": 20.600609246290738, |
|
"learning_rate": 4.360784949008615e-06, |
|
"logits/chosen": -2.9669108390808105, |
|
"logits/rejected": -2.8032517433166504, |
|
"logps/chosen": -316.91192626953125, |
|
"logps/rejected": -283.3198547363281, |
|
"loss": 0.515, |
|
"positive_losses": 0.5270363092422485, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.45585203170776367, |
|
"rewards/margins": 0.8136453628540039, |
|
"rewards/margins_max": 1.1761964559555054, |
|
"rewards/margins_min": 0.45109423995018005, |
|
"rewards/margins_std": 0.512724757194519, |
|
"rewards/rejected": -0.35779333114624023, |
|
"step": 330 |
|
}, |
|
{ |
|
"dpo_losses": 0.47730112075805664, |
|
"epoch": 0.96, |
|
"grad_norm": 2.164730368336074, |
|
"learning_rate": 4.30504354481929e-06, |
|
"logits/chosen": -2.79738450050354, |
|
"logits/rejected": -2.7073657512664795, |
|
"logps/chosen": -230.3443145751953, |
|
"logps/rejected": -234.2275390625, |
|
"loss": 0.496, |
|
"positive_losses": 2.5491890907287598, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.2501987814903259, |
|
"rewards/margins": 0.5649263858795166, |
|
"rewards/margins_max": 0.820625901222229, |
|
"rewards/margins_min": 0.3092268109321594, |
|
"rewards/margins_std": 0.36161375045776367, |
|
"rewards/rejected": -0.3147276043891907, |
|
"step": 340 |
|
}, |
|
{ |
|
"dpo_losses": 0.3645946681499481, |
|
"epoch": 0.99, |
|
"grad_norm": 19.210785792660445, |
|
"learning_rate": 4.247361176585904e-06, |
|
"logits/chosen": -2.791806697845459, |
|
"logits/rejected": -2.676161289215088, |
|
"logps/chosen": -352.7079162597656, |
|
"logps/rejected": -353.04425048828125, |
|
"loss": 0.584, |
|
"positive_losses": 1.5420730113983154, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3225085139274597, |
|
"rewards/margins": 0.8690497279167175, |
|
"rewards/margins_max": 1.1524405479431152, |
|
"rewards/margins_min": 0.5856587886810303, |
|
"rewards/margins_std": 0.4007752537727356, |
|
"rewards/rejected": -0.546541154384613, |
|
"step": 350 |
|
}, |
|
{ |
|
"dpo_losses": 0.44031819701194763, |
|
"epoch": 1.01, |
|
"grad_norm": 3.4688322040336876, |
|
"learning_rate": 4.187799870182038e-06, |
|
"logits/chosen": -2.756261110305786, |
|
"logits/rejected": -2.6450822353363037, |
|
"logps/chosen": -273.16424560546875, |
|
"logps/rejected": -231.5010986328125, |
|
"loss": 0.4573, |
|
"positive_losses": 0.19403228163719177, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.35820913314819336, |
|
"rewards/margins": 0.6459983587265015, |
|
"rewards/margins_max": 0.8495124578475952, |
|
"rewards/margins_min": 0.4424843192100525, |
|
"rewards/margins_std": 0.28781232237815857, |
|
"rewards/rejected": -0.2877892851829529, |
|
"step": 360 |
|
}, |
|
{ |
|
"dpo_losses": 0.3558691143989563, |
|
"epoch": 1.04, |
|
"grad_norm": 71.7335292506231, |
|
"learning_rate": 4.1264236719042365e-06, |
|
"logits/chosen": -2.6822152137756348, |
|
"logits/rejected": -2.662559986114502, |
|
"logps/chosen": -320.59442138671875, |
|
"logps/rejected": -317.09295654296875, |
|
"loss": 0.4251, |
|
"positive_losses": 0.42721253633499146, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.45041507482528687, |
|
"rewards/margins": 0.9831393957138062, |
|
"rewards/margins_max": 1.3708398342132568, |
|
"rewards/margins_min": 0.5954390168190002, |
|
"rewards/margins_std": 0.5482910871505737, |
|
"rewards/rejected": -0.5327242612838745, |
|
"step": 370 |
|
}, |
|
{ |
|
"dpo_losses": 0.3393256664276123, |
|
"epoch": 1.07, |
|
"grad_norm": 4.337233890868313, |
|
"learning_rate": 4.063298579603001e-06, |
|
"logits/chosen": -2.7261626720428467, |
|
"logits/rejected": -2.5453438758850098, |
|
"logps/chosen": -265.1933288574219, |
|
"logps/rejected": -244.08682250976562, |
|
"loss": 0.3984, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.4042983055114746, |
|
"rewards/margins": 0.999946117401123, |
|
"rewards/margins_max": 1.2659950256347656, |
|
"rewards/margins_min": 0.7338972091674805, |
|
"rewards/margins_std": 0.3762499690055847, |
|
"rewards/rejected": -0.5956477522850037, |
|
"step": 380 |
|
}, |
|
{ |
|
"dpo_losses": 0.25663647055625916, |
|
"epoch": 1.1, |
|
"grad_norm": 35.28065142871338, |
|
"learning_rate": 3.998492471715272e-06, |
|
"logits/chosen": -2.7409512996673584, |
|
"logits/rejected": -2.752206325531006, |
|
"logps/chosen": -314.38153076171875, |
|
"logps/rejected": -423.4803161621094, |
|
"loss": 0.5701, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.4437999129295349, |
|
"rewards/margins": 1.3619416952133179, |
|
"rewards/margins_max": 1.7605613470077515, |
|
"rewards/margins_min": 0.9633218050003052, |
|
"rewards/margins_std": 0.5637335181236267, |
|
"rewards/rejected": -0.9181416630744934, |
|
"step": 390 |
|
}, |
|
{ |
|
"dpo_losses": 0.3513553738594055, |
|
"epoch": 1.13, |
|
"grad_norm": 4.052912034886079, |
|
"learning_rate": 3.932075034274723e-06, |
|
"logits/chosen": -2.73002552986145, |
|
"logits/rejected": -2.6879513263702393, |
|
"logps/chosen": -205.73922729492188, |
|
"logps/rejected": -290.40057373046875, |
|
"loss": 0.4414, |
|
"positive_losses": 0.4541704058647156, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3537348806858063, |
|
"rewards/margins": 0.9853051900863647, |
|
"rewards/margins_max": 1.2278480529785156, |
|
"rewards/margins_min": 0.7427625060081482, |
|
"rewards/margins_std": 0.34300726652145386, |
|
"rewards/rejected": -0.6315703988075256, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"eval_dpo_losses": 0.6385828852653503, |
|
"eval_logits/chosen": -2.71909236907959, |
|
"eval_logits/rejected": -2.670318365097046, |
|
"eval_logps/chosen": -299.07989501953125, |
|
"eval_logps/rejected": -291.5615539550781, |
|
"eval_loss": 2.6622352600097656, |
|
"eval_positive_losses": 20.927839279174805, |
|
"eval_rewards/accuracies": 0.6746031641960144, |
|
"eval_rewards/chosen": -0.13858698308467865, |
|
"eval_rewards/margins": 0.18520160019397736, |
|
"eval_rewards/margins_max": 0.6970763802528381, |
|
"eval_rewards/margins_min": -0.37488874793052673, |
|
"eval_rewards/margins_std": 0.4832788407802582, |
|
"eval_rewards/rejected": -0.323788583278656, |
|
"eval_runtime": 283.8974, |
|
"eval_samples_per_second": 7.045, |
|
"eval_steps_per_second": 0.222, |
|
"step": 400 |
|
}, |
|
{ |
|
"dpo_losses": 0.38113099336624146, |
|
"epoch": 1.15, |
|
"grad_norm": 1.9724463020625589, |
|
"learning_rate": 3.864117685978339e-06, |
|
"logits/chosen": -2.816284656524658, |
|
"logits/rejected": -2.7134735584259033, |
|
"logps/chosen": -242.77761840820312, |
|
"logps/rejected": -272.8990173339844, |
|
"loss": 0.4468, |
|
"positive_losses": 4.795651912689209, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.2276880443096161, |
|
"rewards/margins": 0.8958386182785034, |
|
"rewards/margins_max": 1.3816872835159302, |
|
"rewards/margins_min": 0.40999001264572144, |
|
"rewards/margins_std": 0.6870937943458557, |
|
"rewards/rejected": -0.6681506037712097, |
|
"step": 410 |
|
}, |
|
{ |
|
"dpo_losses": 0.33210596442222595, |
|
"epoch": 1.18, |
|
"grad_norm": 3.4285412656501766, |
|
"learning_rate": 3.794693501389861e-06, |
|
"logits/chosen": -2.8275389671325684, |
|
"logits/rejected": -2.7307045459747314, |
|
"logps/chosen": -293.709716796875, |
|
"logps/rejected": -331.89312744140625, |
|
"loss": 0.4087, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.4260416030883789, |
|
"rewards/margins": 1.108737587928772, |
|
"rewards/margins_max": 1.603941559791565, |
|
"rewards/margins_min": 0.613533616065979, |
|
"rewards/margins_std": 0.7003240585327148, |
|
"rewards/rejected": -0.6826959848403931, |
|
"step": 420 |
|
}, |
|
{ |
|
"dpo_losses": 0.34472885727882385, |
|
"epoch": 1.21, |
|
"grad_norm": 2.846847523997402, |
|
"learning_rate": 3.7238771323626822e-06, |
|
"logits/chosen": -2.7846486568450928, |
|
"logits/rejected": -2.6524085998535156, |
|
"logps/chosen": -342.40692138671875, |
|
"logps/rejected": -332.17010498046875, |
|
"loss": 0.5622, |
|
"positive_losses": 4.125036239624023, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.3678087592124939, |
|
"rewards/margins": 1.078407883644104, |
|
"rewards/margins_max": 1.4884113073349, |
|
"rewards/margins_min": 0.6684045195579529, |
|
"rewards/margins_std": 0.5798323154449463, |
|
"rewards/rejected": -0.7105990648269653, |
|
"step": 430 |
|
}, |
|
{ |
|
"dpo_losses": 0.32919952273368835, |
|
"epoch": 1.24, |
|
"grad_norm": 109.42335538231772, |
|
"learning_rate": 3.651744727766676e-06, |
|
"logits/chosen": -2.7272467613220215, |
|
"logits/rejected": -2.66713285446167, |
|
"logps/chosen": -210.4514617919922, |
|
"logps/rejected": -259.1316833496094, |
|
"loss": 0.4028, |
|
"positive_losses": 0.3457130491733551, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.36259937286376953, |
|
"rewards/margins": 1.016867995262146, |
|
"rewards/margins_max": 1.3856614828109741, |
|
"rewards/margins_min": 0.6480745077133179, |
|
"rewards/margins_std": 0.5215528607368469, |
|
"rewards/rejected": -0.6542686223983765, |
|
"step": 440 |
|
}, |
|
{ |
|
"dpo_losses": 0.31118613481521606, |
|
"epoch": 1.27, |
|
"grad_norm": 38.9440234553471, |
|
"learning_rate": 3.57837385160529e-06, |
|
"logits/chosen": -2.659485340118408, |
|
"logits/rejected": -2.6188011169433594, |
|
"logps/chosen": -273.7745361328125, |
|
"logps/rejected": -349.734619140625, |
|
"loss": 0.4823, |
|
"positive_losses": 2.6830811500549316, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.3455764055252075, |
|
"rewards/margins": 1.166411280632019, |
|
"rewards/margins_max": 1.5491106510162354, |
|
"rewards/margins_min": 0.7837120890617371, |
|
"rewards/margins_std": 0.5412184596061707, |
|
"rewards/rejected": -0.8208349347114563, |
|
"step": 450 |
|
}, |
|
{ |
|
"dpo_losses": 0.24196143448352814, |
|
"epoch": 1.3, |
|
"grad_norm": 2.8256101438878116, |
|
"learning_rate": 3.503843399610941e-06, |
|
"logits/chosen": -2.6595611572265625, |
|
"logits/rejected": -2.6660475730895996, |
|
"logps/chosen": -322.4607849121094, |
|
"logps/rejected": -492.70068359375, |
|
"loss": 0.4169, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.5088067650794983, |
|
"rewards/margins": 1.4272974729537964, |
|
"rewards/margins_max": 1.8303813934326172, |
|
"rewards/margins_min": 1.0242136716842651, |
|
"rewards/margins_std": 0.5700467824935913, |
|
"rewards/rejected": -0.9184908866882324, |
|
"step": 460 |
|
}, |
|
{ |
|
"dpo_losses": 0.32115817070007324, |
|
"epoch": 1.32, |
|
"grad_norm": 5.028579287398997, |
|
"learning_rate": 3.4282335144083985e-06, |
|
"logits/chosen": -2.567282199859619, |
|
"logits/rejected": -2.616426706314087, |
|
"logps/chosen": -219.5450439453125, |
|
"logps/rejected": -303.61566162109375, |
|
"loss": 0.537, |
|
"positive_losses": 2.556870937347412, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.25023719668388367, |
|
"rewards/margins": 1.0798122882843018, |
|
"rewards/margins_max": 1.3413639068603516, |
|
"rewards/margins_min": 0.8182605504989624, |
|
"rewards/margins_std": 0.36988988518714905, |
|
"rewards/rejected": -0.8295750617980957, |
|
"step": 470 |
|
}, |
|
{ |
|
"dpo_losses": 0.2937307357788086, |
|
"epoch": 1.35, |
|
"grad_norm": 70.59458692509646, |
|
"learning_rate": 3.351625499337395e-06, |
|
"logits/chosen": -2.821207284927368, |
|
"logits/rejected": -2.655557155609131, |
|
"logps/chosen": -336.3492126464844, |
|
"logps/rejected": -360.6393127441406, |
|
"loss": 0.4803, |
|
"positive_losses": 4.53096866607666, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.37952059507369995, |
|
"rewards/margins": 1.2218748331069946, |
|
"rewards/margins_max": 1.5149763822555542, |
|
"rewards/margins_min": 0.9287732243537903, |
|
"rewards/margins_std": 0.4145084023475647, |
|
"rewards/rejected": -0.8423541784286499, |
|
"step": 480 |
|
}, |
|
{ |
|
"dpo_losses": 0.38940221071243286, |
|
"epoch": 1.38, |
|
"grad_norm": 2.687569639501308, |
|
"learning_rate": 3.2741017310271056e-06, |
|
"logits/chosen": -2.6762735843658447, |
|
"logits/rejected": -2.549715280532837, |
|
"logps/chosen": -201.81640625, |
|
"logps/rejected": -277.5948791503906, |
|
"loss": 0.4423, |
|
"positive_losses": 0.7856195569038391, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2850914001464844, |
|
"rewards/margins": 0.8952637910842896, |
|
"rewards/margins_max": 1.2249016761779785, |
|
"rewards/margins_min": 0.5656259655952454, |
|
"rewards/margins_std": 0.46617835760116577, |
|
"rewards/rejected": -0.6101723909378052, |
|
"step": 490 |
|
}, |
|
{ |
|
"dpo_losses": 0.3359260559082031, |
|
"epoch": 1.41, |
|
"grad_norm": 8.068773298281624, |
|
"learning_rate": 3.195745570816532e-06, |
|
"logits/chosen": -2.582794189453125, |
|
"logits/rejected": -2.5295655727386475, |
|
"logps/chosen": -293.511962890625, |
|
"logps/rejected": -310.9229736328125, |
|
"loss": 0.4651, |
|
"positive_losses": 1.6097240447998047, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.3676465153694153, |
|
"rewards/margins": 1.0873607397079468, |
|
"rewards/margins_max": 1.359550952911377, |
|
"rewards/margins_min": 0.8151704668998718, |
|
"rewards/margins_std": 0.38493508100509644, |
|
"rewards/rejected": -0.7197142243385315, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"eval_dpo_losses": 0.6384106874465942, |
|
"eval_logits/chosen": -2.7216532230377197, |
|
"eval_logits/rejected": -2.6714365482330322, |
|
"eval_logps/chosen": -298.51165771484375, |
|
"eval_logps/rejected": -292.0330505371094, |
|
"eval_loss": 2.6646323204040527, |
|
"eval_positive_losses": 20.608970642089844, |
|
"eval_rewards/accuracies": 0.6626983880996704, |
|
"eval_rewards/chosen": -0.13290439546108246, |
|
"eval_rewards/margins": 0.1955995112657547, |
|
"eval_rewards/margins_max": 0.7628427743911743, |
|
"eval_rewards/margins_min": -0.3882632255554199, |
|
"eval_rewards/margins_std": 0.5195400714874268, |
|
"eval_rewards/rejected": -0.32850393652915955, |
|
"eval_runtime": 285.1068, |
|
"eval_samples_per_second": 7.015, |
|
"eval_steps_per_second": 0.221, |
|
"step": 500 |
|
}, |
|
{ |
|
"dpo_losses": 0.33750054240226746, |
|
"epoch": 1.44, |
|
"grad_norm": 5.205737491948956, |
|
"learning_rate": 3.116641275116018e-06, |
|
"logits/chosen": -2.409104108810425, |
|
"logits/rejected": -2.434281349182129, |
|
"logps/chosen": -200.69908142089844, |
|
"logps/rejected": -388.02001953125, |
|
"loss": 0.398, |
|
"positive_losses": 1.0130329132080078, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.26239317655563354, |
|
"rewards/margins": 1.0276142358779907, |
|
"rewards/margins_max": 1.2639634609222412, |
|
"rewards/margins_min": 0.7912648916244507, |
|
"rewards/margins_std": 0.3342483639717102, |
|
"rewards/rejected": -0.7652209997177124, |
|
"step": 510 |
|
}, |
|
{ |
|
"dpo_losses": 0.2813549041748047, |
|
"epoch": 1.46, |
|
"grad_norm": 81.57789306373847, |
|
"learning_rate": 3.0368739048062956e-06, |
|
"logits/chosen": -2.748539447784424, |
|
"logits/rejected": -2.641331672668457, |
|
"logps/chosen": -305.63671875, |
|
"logps/rejected": -331.99383544921875, |
|
"loss": 0.5374, |
|
"positive_losses": 10.878652572631836, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.30826514959335327, |
|
"rewards/margins": 1.2961227893829346, |
|
"rewards/margins_max": 1.768711805343628, |
|
"rewards/margins_min": 0.823533833026886, |
|
"rewards/margins_std": 0.6683418154716492, |
|
"rewards/rejected": -0.9878576397895813, |
|
"step": 520 |
|
}, |
|
{ |
|
"dpo_losses": 0.2712605893611908, |
|
"epoch": 1.49, |
|
"grad_norm": 75.79271498324394, |
|
"learning_rate": 2.956529233772492e-06, |
|
"logits/chosen": -2.689558744430542, |
|
"logits/rejected": -2.6852006912231445, |
|
"logps/chosen": -292.9363098144531, |
|
"logps/rejected": -357.29400634765625, |
|
"loss": 0.3968, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.4060862958431244, |
|
"rewards/margins": 1.2946475744247437, |
|
"rewards/margins_max": 1.633283019065857, |
|
"rewards/margins_min": 0.9560121297836304, |
|
"rewards/margins_std": 0.47890281677246094, |
|
"rewards/rejected": -0.8885613679885864, |
|
"step": 530 |
|
}, |
|
{ |
|
"dpo_losses": 0.27980148792266846, |
|
"epoch": 1.52, |
|
"grad_norm": 56.497845904041995, |
|
"learning_rate": 2.8756936566714317e-06, |
|
"logits/chosen": -2.7521424293518066, |
|
"logits/rejected": -2.6638569831848145, |
|
"logps/chosen": -310.28753662109375, |
|
"logps/rejected": -327.8934020996094, |
|
"loss": 0.5646, |
|
"positive_losses": 1.8035399913787842, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.4047401547431946, |
|
"rewards/margins": 1.3088172674179077, |
|
"rewards/margins_max": 1.7437057495117188, |
|
"rewards/margins_min": 0.8739286661148071, |
|
"rewards/margins_std": 0.6150254011154175, |
|
"rewards/rejected": -0.9040770530700684, |
|
"step": 540 |
|
}, |
|
{ |
|
"dpo_losses": 0.30083730816841125, |
|
"epoch": 1.55, |
|
"grad_norm": 4.003119682647961, |
|
"learning_rate": 2.794454096031429e-06, |
|
"logits/chosen": -2.722224235534668, |
|
"logits/rejected": -2.6790289878845215, |
|
"logps/chosen": -281.0094299316406, |
|
"logps/rejected": -354.3661804199219, |
|
"loss": 0.387, |
|
"positive_losses": 0.4849150776863098, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3756260275840759, |
|
"rewards/margins": 1.2488583326339722, |
|
"rewards/margins_max": 1.8290369510650635, |
|
"rewards/margins_min": 0.668679416179657, |
|
"rewards/margins_std": 0.820496678352356, |
|
"rewards/rejected": -0.8732322454452515, |
|
"step": 550 |
|
}, |
|
{ |
|
"dpo_losses": 0.2974298894405365, |
|
"epoch": 1.58, |
|
"grad_norm": 4.054485926091979, |
|
"learning_rate": 2.71289790878446e-06, |
|
"logits/chosen": -2.6345105171203613, |
|
"logits/rejected": -2.6252238750457764, |
|
"logps/chosen": -266.069580078125, |
|
"logps/rejected": -428.830322265625, |
|
"loss": 0.4149, |
|
"positive_losses": 0.9539718627929688, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.36717483401298523, |
|
"rewards/margins": 1.2800363302230835, |
|
"rewards/margins_max": 1.847495675086975, |
|
"rewards/margins_min": 0.7125769257545471, |
|
"rewards/margins_std": 0.8025087118148804, |
|
"rewards/rejected": -0.9128614664077759, |
|
"step": 560 |
|
}, |
|
{ |
|
"dpo_losses": 0.2434779852628708, |
|
"epoch": 1.61, |
|
"grad_norm": 13.695790466916172, |
|
"learning_rate": 2.6311127923312156e-06, |
|
"logits/chosen": -2.7691166400909424, |
|
"logits/rejected": -2.570652723312378, |
|
"logps/chosen": -357.65362548828125, |
|
"logps/rejected": -422.05902099609375, |
|
"loss": 0.3595, |
|
"positive_losses": 1.6939789056777954, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.35474830865859985, |
|
"rewards/margins": 1.4161592721939087, |
|
"rewards/margins_max": 1.7364327907562256, |
|
"rewards/margins_min": 1.0958856344223022, |
|
"rewards/margins_std": 0.4529353678226471, |
|
"rewards/rejected": -1.061410903930664, |
|
"step": 570 |
|
}, |
|
{ |
|
"dpo_losses": 0.26757892966270447, |
|
"epoch": 1.63, |
|
"grad_norm": 45.10124515413211, |
|
"learning_rate": 2.549186690240057e-06, |
|
"logits/chosen": -2.7345547676086426, |
|
"logits/rejected": -2.6686453819274902, |
|
"logps/chosen": -254.34683227539062, |
|
"logps/rejected": -315.84857177734375, |
|
"loss": 0.5253, |
|
"positive_losses": 0.2569518983364105, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.43774762749671936, |
|
"rewards/margins": 1.3818461894989014, |
|
"rewards/margins_max": 1.815768837928772, |
|
"rewards/margins_min": 0.9479236602783203, |
|
"rewards/margins_std": 0.6136592626571655, |
|
"rewards/rejected": -0.9440986514091492, |
|
"step": 580 |
|
}, |
|
{ |
|
"dpo_losses": 0.28964871168136597, |
|
"epoch": 1.66, |
|
"grad_norm": 7.214863048583421, |
|
"learning_rate": 2.4672076976812548e-06, |
|
"logits/chosen": -2.6155965328216553, |
|
"logits/rejected": -2.465445041656494, |
|
"logps/chosen": -330.9356994628906, |
|
"logps/rejected": -382.1274719238281, |
|
"loss": 0.4009, |
|
"positive_losses": 0.20948180556297302, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.42689600586891174, |
|
"rewards/margins": 1.3260236978530884, |
|
"rewards/margins_max": 1.9126968383789062, |
|
"rewards/margins_min": 0.7393506169319153, |
|
"rewards/margins_std": 0.8296809196472168, |
|
"rewards/rejected": -0.8991276025772095, |
|
"step": 590 |
|
}, |
|
{ |
|
"dpo_losses": 0.3019997179508209, |
|
"epoch": 1.69, |
|
"grad_norm": 2.5656935168095365, |
|
"learning_rate": 2.3852639666982218e-06, |
|
"logits/chosen": -2.696664571762085, |
|
"logits/rejected": -2.6669843196868896, |
|
"logps/chosen": -210.6801300048828, |
|
"logps/rejected": -339.8411560058594, |
|
"loss": 0.5269, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.4121875762939453, |
|
"rewards/margins": 1.2055537700653076, |
|
"rewards/margins_max": 1.5584628582000732, |
|
"rewards/margins_min": 0.8526442646980286, |
|
"rewards/margins_std": 0.49908918142318726, |
|
"rewards/rejected": -0.793366014957428, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"eval_dpo_losses": 0.6337167024612427, |
|
"eval_logits/chosen": -2.653167724609375, |
|
"eval_logits/rejected": -2.6025989055633545, |
|
"eval_logps/chosen": -326.8940734863281, |
|
"eval_logps/rejected": -323.9284362792969, |
|
"eval_loss": 5.016211986541748, |
|
"eval_positive_losses": 46.1312141418457, |
|
"eval_rewards/accuracies": 0.6626983880996704, |
|
"eval_rewards/chosen": -0.4167284667491913, |
|
"eval_rewards/margins": 0.2307295948266983, |
|
"eval_rewards/margins_max": 0.8626330494880676, |
|
"eval_rewards/margins_min": -0.4616139829158783, |
|
"eval_rewards/margins_std": 0.5963027477264404, |
|
"eval_rewards/rejected": -0.647458016872406, |
|
"eval_runtime": 284.3544, |
|
"eval_samples_per_second": 7.033, |
|
"eval_steps_per_second": 0.222, |
|
"step": 600 |
|
}, |
|
{ |
|
"dpo_losses": 0.3822602331638336, |
|
"epoch": 1.72, |
|
"grad_norm": 4.499258828055022, |
|
"learning_rate": 2.303443611417584e-06, |
|
"logits/chosen": -2.5053551197052, |
|
"logits/rejected": -2.452122449874878, |
|
"logps/chosen": -285.8536682128906, |
|
"logps/rejected": -345.1878662109375, |
|
"loss": 0.5838, |
|
"positive_losses": 7.787275791168213, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.20293152332305908, |
|
"rewards/margins": 0.9711725115776062, |
|
"rewards/margins_max": 1.5771162509918213, |
|
"rewards/margins_min": 0.36522871255874634, |
|
"rewards/margins_std": 0.8569338917732239, |
|
"rewards/rejected": -0.7682409286499023, |
|
"step": 610 |
|
}, |
|
{ |
|
"dpo_losses": 0.2892194390296936, |
|
"epoch": 1.75, |
|
"grad_norm": 5.081357332154091, |
|
"learning_rate": 2.2218346133000264e-06, |
|
"logits/chosen": -2.5583109855651855, |
|
"logits/rejected": -2.4557156562805176, |
|
"logps/chosen": -241.0048370361328, |
|
"logps/rejected": -288.3791809082031, |
|
"loss": 0.4921, |
|
"positive_losses": 4.499431610107422, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3376957178115845, |
|
"rewards/margins": 1.2491505146026611, |
|
"rewards/margins_max": 1.6783252954483032, |
|
"rewards/margins_min": 0.8199755549430847, |
|
"rewards/margins_std": 0.606944739818573, |
|
"rewards/rejected": -0.9114546775817871, |
|
"step": 620 |
|
}, |
|
{ |
|
"dpo_losses": 0.28637608885765076, |
|
"epoch": 1.77, |
|
"grad_norm": 31.375745057762174, |
|
"learning_rate": 2.140524726533792e-06, |
|
"logits/chosen": -2.611680030822754, |
|
"logits/rejected": -2.492157459259033, |
|
"logps/chosen": -342.9209899902344, |
|
"logps/rejected": -305.1431884765625, |
|
"loss": 0.381, |
|
"positive_losses": 1.477830171585083, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.5035167336463928, |
|
"rewards/margins": 1.3139656782150269, |
|
"rewards/margins_max": 1.746787428855896, |
|
"rewards/margins_min": 0.8811438679695129, |
|
"rewards/margins_std": 0.6121026277542114, |
|
"rewards/rejected": -0.8104490041732788, |
|
"step": 630 |
|
}, |
|
{ |
|
"dpo_losses": 0.27914196252822876, |
|
"epoch": 1.8, |
|
"grad_norm": 56.714432514737815, |
|
"learning_rate": 2.059601383672566e-06, |
|
"logits/chosen": -2.6837282180786133, |
|
"logits/rejected": -2.669649600982666, |
|
"logps/chosen": -205.0702362060547, |
|
"logps/rejected": -292.3086853027344, |
|
"loss": 0.6023, |
|
"positive_losses": 3.721278429031372, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2986445426940918, |
|
"rewards/margins": 1.2236577272415161, |
|
"rewards/margins_max": 1.4498487710952759, |
|
"rewards/margins_min": 0.9974665641784668, |
|
"rewards/margins_std": 0.31988245248794556, |
|
"rewards/rejected": -0.9250132441520691, |
|
"step": 640 |
|
}, |
|
{ |
|
"dpo_losses": 0.3276744782924652, |
|
"epoch": 1.83, |
|
"grad_norm": 108.41037124625116, |
|
"learning_rate": 1.9791516016192214e-06, |
|
"logits/chosen": -2.7006583213806152, |
|
"logits/rejected": -2.657177686691284, |
|
"logps/chosen": -219.15249633789062, |
|
"logps/rejected": -298.5721130371094, |
|
"loss": 0.3902, |
|
"positive_losses": 0.02580871619284153, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2983975410461426, |
|
"rewards/margins": 1.0909839868545532, |
|
"rewards/margins_max": 1.5546290874481201, |
|
"rewards/margins_min": 0.6273389458656311, |
|
"rewards/margins_std": 0.6556931138038635, |
|
"rewards/rejected": -0.7925864458084106, |
|
"step": 650 |
|
}, |
|
{ |
|
"dpo_losses": 0.37573254108428955, |
|
"epoch": 1.86, |
|
"grad_norm": 4.49512981087327, |
|
"learning_rate": 1.8992618880565039e-06, |
|
"logits/chosen": -2.4442310333251953, |
|
"logits/rejected": -2.430908679962158, |
|
"logps/chosen": -247.6465301513672, |
|
"logps/rejected": -270.6328125, |
|
"loss": 0.673, |
|
"positive_losses": 9.613517761230469, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.23618540167808533, |
|
"rewards/margins": 1.0627477169036865, |
|
"rewards/margins_max": 1.7770532369613647, |
|
"rewards/margins_min": 0.3484421372413635, |
|
"rewards/margins_std": 1.0101807117462158, |
|
"rewards/rejected": -0.8265622854232788, |
|
"step": 660 |
|
}, |
|
{ |
|
"dpo_losses": 0.2606434226036072, |
|
"epoch": 1.89, |
|
"grad_norm": 16.900887081181846, |
|
"learning_rate": 1.8200181484252888e-06, |
|
"logits/chosen": -2.728989601135254, |
|
"logits/rejected": -2.65732741355896, |
|
"logps/chosen": -339.34649658203125, |
|
"logps/rejected": -414.9603576660156, |
|
"loss": 0.3802, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.46093645691871643, |
|
"rewards/margins": 1.4545660018920898, |
|
"rewards/margins_max": 1.9622220993041992, |
|
"rewards/margins_min": 0.9469099044799805, |
|
"rewards/margins_std": 0.7179341316223145, |
|
"rewards/rejected": -0.9936296343803406, |
|
"step": 670 |
|
}, |
|
{ |
|
"dpo_losses": 0.22198085486888885, |
|
"epoch": 1.92, |
|
"grad_norm": 41.51526627620706, |
|
"learning_rate": 1.7415055935504234e-06, |
|
"logits/chosen": -2.705850601196289, |
|
"logits/rejected": -2.6019129753112793, |
|
"logps/chosen": -284.8177795410156, |
|
"logps/rejected": -411.76708984375, |
|
"loss": 0.4159, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.37507936358451843, |
|
"rewards/margins": 1.5223026275634766, |
|
"rewards/margins_max": 1.767469048500061, |
|
"rewards/margins_min": 1.2771363258361816, |
|
"rewards/margins_std": 0.34671759605407715, |
|
"rewards/rejected": -1.1472233533859253, |
|
"step": 680 |
|
}, |
|
{ |
|
"dpo_losses": 0.3486565351486206, |
|
"epoch": 1.94, |
|
"grad_norm": 138.4896910648948, |
|
"learning_rate": 1.6638086480134954e-06, |
|
"logits/chosen": -2.577733039855957, |
|
"logits/rejected": -2.557359218597412, |
|
"logps/chosen": -144.18289184570312, |
|
"logps/rejected": -205.9375762939453, |
|
"loss": 0.4276, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3236793875694275, |
|
"rewards/margins": 1.0879504680633545, |
|
"rewards/margins_max": 1.6922643184661865, |
|
"rewards/margins_min": 0.4836367070674896, |
|
"rewards/margins_std": 0.85462886095047, |
|
"rewards/rejected": -0.7642711400985718, |
|
"step": 690 |
|
}, |
|
{ |
|
"dpo_losses": 0.24665436148643494, |
|
"epoch": 1.97, |
|
"grad_norm": 22.11936611709013, |
|
"learning_rate": 1.5870108593710473e-06, |
|
"logits/chosen": -2.422232151031494, |
|
"logits/rejected": -2.351428508758545, |
|
"logps/chosen": -301.96270751953125, |
|
"logps/rejected": -312.5522766113281, |
|
"loss": 0.3513, |
|
"positive_losses": 0.03521118313074112, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.5125211477279663, |
|
"rewards/margins": 1.4875143766403198, |
|
"rewards/margins_max": 1.8352491855621338, |
|
"rewards/margins_min": 1.139779806137085, |
|
"rewards/margins_std": 0.4917708933353424, |
|
"rewards/rejected": -0.9749932289123535, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"eval_dpo_losses": 0.6398608684539795, |
|
"eval_logits/chosen": -2.631686210632324, |
|
"eval_logits/rejected": -2.5807785987854004, |
|
"eval_logps/chosen": -326.29583740234375, |
|
"eval_logps/rejected": -325.2173156738281, |
|
"eval_loss": 4.895449161529541, |
|
"eval_positive_losses": 45.593257904052734, |
|
"eval_rewards/accuracies": 0.6626983880996704, |
|
"eval_rewards/chosen": -0.41074639558792114, |
|
"eval_rewards/margins": 0.24960003793239594, |
|
"eval_rewards/margins_max": 0.9743701815605164, |
|
"eval_rewards/margins_min": -0.5254129767417908, |
|
"eval_rewards/margins_std": 0.6826153993606567, |
|
"eval_rewards/rejected": -0.6603464484214783, |
|
"eval_runtime": 284.0532, |
|
"eval_samples_per_second": 7.041, |
|
"eval_steps_per_second": 0.222, |
|
"step": 700 |
|
}, |
|
{ |
|
"dpo_losses": 0.32716676592826843, |
|
"epoch": 2.0, |
|
"grad_norm": 24.97077759875969, |
|
"learning_rate": 1.511194808315853e-06, |
|
"logits/chosen": -2.5247268676757812, |
|
"logits/rejected": -2.486575126647949, |
|
"logps/chosen": -229.55859375, |
|
"logps/rejected": -268.9668273925781, |
|
"loss": 0.4163, |
|
"positive_losses": 0.5168693661689758, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.28237053751945496, |
|
"rewards/margins": 1.146689772605896, |
|
"rewards/margins_max": 1.594560146331787, |
|
"rewards/margins_min": 0.6988194584846497, |
|
"rewards/margins_std": 0.6333842873573303, |
|
"rewards/rejected": -0.8643192052841187, |
|
"step": 710 |
|
}, |
|
{ |
|
"dpo_losses": 0.21982404589653015, |
|
"epoch": 2.03, |
|
"grad_norm": 1.38353688722549, |
|
"learning_rate": 1.4364420198778662e-06, |
|
"logits/chosen": -2.7155685424804688, |
|
"logits/rejected": -2.609267234802246, |
|
"logps/chosen": -343.7250061035156, |
|
"logps/rejected": -450.3816833496094, |
|
"loss": 0.3634, |
|
"positive_losses": 4.519556999206543, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.456549733877182, |
|
"rewards/margins": 1.6569738388061523, |
|
"rewards/margins_max": 2.1707637310028076, |
|
"rewards/margins_min": 1.143183946609497, |
|
"rewards/margins_std": 0.7266086935997009, |
|
"rewards/rejected": -1.200424075126648, |
|
"step": 720 |
|
}, |
|
{ |
|
"dpo_losses": 0.25513142347335815, |
|
"epoch": 2.06, |
|
"grad_norm": 3.318839287140489, |
|
"learning_rate": 1.3628328757603243e-06, |
|
"logits/chosen": -2.6959056854248047, |
|
"logits/rejected": -2.5843894481658936, |
|
"logps/chosen": -267.92010498046875, |
|
"logps/rejected": -357.7880554199219, |
|
"loss": 0.2684, |
|
"positive_losses": 0.038549043238162994, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.31957200169563293, |
|
"rewards/margins": 1.4021821022033691, |
|
"rewards/margins_max": 1.6910970211029053, |
|
"rewards/margins_min": 1.113266944885254, |
|
"rewards/margins_std": 0.4085877537727356, |
|
"rewards/rejected": -1.082610011100769, |
|
"step": 730 |
|
}, |
|
{ |
|
"dpo_losses": 0.30432650446891785, |
|
"epoch": 2.08, |
|
"grad_norm": 36.95329512381558, |
|
"learning_rate": 1.2904465279052725e-06, |
|
"logits/chosen": -2.634579658508301, |
|
"logits/rejected": -2.56650710105896, |
|
"logps/chosen": -284.7083740234375, |
|
"logps/rejected": -317.93389892578125, |
|
"loss": 0.4788, |
|
"positive_losses": 3.9446158409118652, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.22743673622608185, |
|
"rewards/margins": 1.2028728723526, |
|
"rewards/margins_max": 1.6675021648406982, |
|
"rewards/margins_min": 0.7382434606552124, |
|
"rewards/margins_std": 0.6570851802825928, |
|
"rewards/rejected": -0.9754360914230347, |
|
"step": 740 |
|
}, |
|
{ |
|
"dpo_losses": 0.2779385447502136, |
|
"epoch": 2.11, |
|
"grad_norm": 5.125527517570657, |
|
"learning_rate": 1.219360813381446e-06, |
|
"logits/chosen": -2.462111234664917, |
|
"logits/rejected": -2.498530387878418, |
|
"logps/chosen": -159.8828887939453, |
|
"logps/rejected": -236.5124053955078, |
|
"loss": 0.2882, |
|
"positive_losses": 0.18086472153663635, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3097127676010132, |
|
"rewards/margins": 1.3083655834197998, |
|
"rewards/margins_max": 1.592053771018982, |
|
"rewards/margins_min": 1.0246771574020386, |
|
"rewards/margins_std": 0.4011960029602051, |
|
"rewards/rejected": -0.9986528158187866, |
|
"step": 750 |
|
}, |
|
{ |
|
"dpo_losses": 0.22775745391845703, |
|
"epoch": 2.14, |
|
"grad_norm": 18.30294185818396, |
|
"learning_rate": 1.1496521706860392e-06, |
|
"logits/chosen": -2.651033401489258, |
|
"logits/rejected": -2.537503242492676, |
|
"logps/chosen": -291.1076354980469, |
|
"logps/rejected": -382.2750244140625, |
|
"loss": 0.3201, |
|
"positive_losses": 2.4156768321990967, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.36460763216018677, |
|
"rewards/margins": 1.5596258640289307, |
|
"rewards/margins_max": 1.959398627281189, |
|
"rewards/margins_min": 1.1598527431488037, |
|
"rewards/margins_std": 0.5653643012046814, |
|
"rewards/rejected": -1.1950181722640991, |
|
"step": 760 |
|
}, |
|
{ |
|
"dpo_losses": 0.21675769984722137, |
|
"epoch": 2.17, |
|
"grad_norm": 21.74451175593295, |
|
"learning_rate": 1.0813955575503588e-06, |
|
"logits/chosen": -2.604640483856201, |
|
"logits/rejected": -2.5890743732452393, |
|
"logps/chosen": -301.3707580566406, |
|
"logps/rejected": -381.50506591796875, |
|
"loss": 0.3818, |
|
"positive_losses": 0.353890985250473, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.4118216633796692, |
|
"rewards/margins": 1.6504104137420654, |
|
"rewards/margins_max": 2.0086562633514404, |
|
"rewards/margins_min": 1.2921648025512695, |
|
"rewards/margins_std": 0.5066360235214233, |
|
"rewards/rejected": -1.2385889291763306, |
|
"step": 770 |
|
}, |
|
{ |
|
"dpo_losses": 0.24907536804676056, |
|
"epoch": 2.2, |
|
"grad_norm": 5.9813942121541706, |
|
"learning_rate": 1.0146643703377488e-06, |
|
"logits/chosen": -2.734790325164795, |
|
"logits/rejected": -2.537445306777954, |
|
"logps/chosen": -292.0768127441406, |
|
"logps/rejected": -332.3907775878906, |
|
"loss": 0.4576, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.5042055249214172, |
|
"rewards/margins": 1.5501843690872192, |
|
"rewards/margins_max": 2.1402950286865234, |
|
"rewards/margins_min": 0.9600737690925598, |
|
"rewards/margins_std": 0.8345423936843872, |
|
"rewards/rejected": -1.0459789037704468, |
|
"step": 780 |
|
}, |
|
{ |
|
"dpo_losses": 0.20471492409706116, |
|
"epoch": 2.23, |
|
"grad_norm": 2.116074836272933, |
|
"learning_rate": 9.495303651204496e-07, |
|
"logits/chosen": -2.611013889312744, |
|
"logits/rejected": -2.5461339950561523, |
|
"logps/chosen": -319.31951904296875, |
|
"logps/rejected": -404.64886474609375, |
|
"loss": 0.4666, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.3257637321949005, |
|
"rewards/margins": 1.60427725315094, |
|
"rewards/margins_max": 1.9402239322662354, |
|
"rewards/margins_min": 1.2683299779891968, |
|
"rewards/margins_std": 0.4751007556915283, |
|
"rewards/rejected": -1.2785133123397827, |
|
"step": 790 |
|
}, |
|
{ |
|
"dpo_losses": 0.2155081331729889, |
|
"epoch": 2.25, |
|
"grad_norm": 12.840834237921664, |
|
"learning_rate": 8.860635805202616e-07, |
|
"logits/chosen": -2.615548610687256, |
|
"logits/rejected": -2.5271685123443604, |
|
"logps/chosen": -304.5693054199219, |
|
"logps/rejected": -362.62225341796875, |
|
"loss": 0.2795, |
|
"positive_losses": 0.01874256134033203, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.4078141152858734, |
|
"rewards/margins": 1.58090341091156, |
|
"rewards/margins_max": 1.9335031509399414, |
|
"rewards/margins_min": 1.2283036708831787, |
|
"rewards/margins_std": 0.49865132570266724, |
|
"rewards/rejected": -1.1730893850326538, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"eval_dpo_losses": 0.6266348958015442, |
|
"eval_logits/chosen": -2.604722738265991, |
|
"eval_logits/rejected": -2.554541826248169, |
|
"eval_logps/chosen": -324.4103088378906, |
|
"eval_logps/rejected": -327.570556640625, |
|
"eval_loss": 4.769333839416504, |
|
"eval_positive_losses": 43.908966064453125, |
|
"eval_rewards/accuracies": 0.682539701461792, |
|
"eval_rewards/chosen": -0.3918909430503845, |
|
"eval_rewards/margins": 0.29198840260505676, |
|
"eval_rewards/margins_max": 1.0657094717025757, |
|
"eval_rewards/margins_min": -0.5265500545501709, |
|
"eval_rewards/margins_std": 0.7165747284889221, |
|
"eval_rewards/rejected": -0.6838793158531189, |
|
"eval_runtime": 284.6208, |
|
"eval_samples_per_second": 7.027, |
|
"eval_steps_per_second": 0.221, |
|
"step": 800 |
|
}, |
|
{ |
|
"dpo_losses": 0.25243309140205383, |
|
"epoch": 2.28, |
|
"grad_norm": 71.28169182787225, |
|
"learning_rate": 8.24332262395994e-07, |
|
"logits/chosen": -2.6843011379241943, |
|
"logits/rejected": -2.6510274410247803, |
|
"logps/chosen": -252.87222290039062, |
|
"logps/rejected": -349.50506591796875, |
|
"loss": 0.3457, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3779425024986267, |
|
"rewards/margins": 1.4469493627548218, |
|
"rewards/margins_max": 2.0055932998657227, |
|
"rewards/margins_min": 0.8883053660392761, |
|
"rewards/margins_std": 0.7900420427322388, |
|
"rewards/rejected": -1.0690069198608398, |
|
"step": 810 |
|
}, |
|
{ |
|
"dpo_losses": 0.25832101702690125, |
|
"epoch": 2.31, |
|
"grad_norm": 245.8246008336025, |
|
"learning_rate": 7.644027904586587e-07, |
|
"logits/chosen": -2.637300968170166, |
|
"logits/rejected": -2.5708765983581543, |
|
"logps/chosen": -227.47787475585938, |
|
"logps/rejected": -322.4635925292969, |
|
"loss": 0.5117, |
|
"positive_losses": 4.7760443687438965, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2738664150238037, |
|
"rewards/margins": 1.4220813512802124, |
|
"rewards/margins_max": 1.896773338317871, |
|
"rewards/margins_min": 0.9473894238471985, |
|
"rewards/margins_std": 0.6713159084320068, |
|
"rewards/rejected": -1.1482150554656982, |
|
"step": 820 |
|
}, |
|
{ |
|
"dpo_losses": 0.16726334393024445, |
|
"epoch": 2.34, |
|
"grad_norm": 1.7593306703555782, |
|
"learning_rate": 7.06339606893347e-07, |
|
"logits/chosen": -2.6265785694122314, |
|
"logits/rejected": -2.5026650428771973, |
|
"logps/chosen": -399.26031494140625, |
|
"logps/rejected": -387.8680419921875, |
|
"loss": 0.2112, |
|
"positive_losses": 0.06428833305835724, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.5203009247779846, |
|
"rewards/margins": 1.8566944599151611, |
|
"rewards/margins_max": 2.1375911235809326, |
|
"rewards/margins_min": 1.5757976770401, |
|
"rewards/margins_std": 0.3972480893135071, |
|
"rewards/rejected": -1.3363934755325317, |
|
"step": 830 |
|
}, |
|
{ |
|
"dpo_losses": 0.2129584103822708, |
|
"epoch": 2.37, |
|
"grad_norm": 28.81303382097675, |
|
"learning_rate": 6.502051470645149e-07, |
|
"logits/chosen": -2.721235513687134, |
|
"logits/rejected": -2.5673904418945312, |
|
"logps/chosen": -341.94073486328125, |
|
"logps/rejected": -413.451171875, |
|
"loss": 0.3816, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.34823325276374817, |
|
"rewards/margins": 1.6308362483978271, |
|
"rewards/margins_max": 1.947928786277771, |
|
"rewards/margins_min": 1.313744068145752, |
|
"rewards/margins_std": 0.4484362006187439, |
|
"rewards/rejected": -1.282603144645691, |
|
"step": 840 |
|
}, |
|
{ |
|
"dpo_losses": 0.204869344830513, |
|
"epoch": 2.39, |
|
"grad_norm": 1.8754374311724713, |
|
"learning_rate": 5.960597723792194e-07, |
|
"logits/chosen": -2.610276937484741, |
|
"logits/rejected": -2.4925060272216797, |
|
"logps/chosen": -280.25665283203125, |
|
"logps/rejected": -387.3306579589844, |
|
"loss": 0.429, |
|
"positive_losses": 4.123325824737549, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3918009400367737, |
|
"rewards/margins": 1.684704065322876, |
|
"rewards/margins_max": 2.1113224029541016, |
|
"rewards/margins_min": 1.2580856084823608, |
|
"rewards/margins_std": 0.6033294796943665, |
|
"rewards/rejected": -1.2929030656814575, |
|
"step": 850 |
|
}, |
|
{ |
|
"dpo_losses": 0.18849320709705353, |
|
"epoch": 2.42, |
|
"grad_norm": 2.415129688011, |
|
"learning_rate": 5.43961705380465e-07, |
|
"logits/chosen": -2.5959548950195312, |
|
"logits/rejected": -2.5745034217834473, |
|
"logps/chosen": -274.2839660644531, |
|
"logps/rejected": -413.91650390625, |
|
"loss": 0.3573, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.4205331802368164, |
|
"rewards/margins": 1.8768619298934937, |
|
"rewards/margins_max": 2.31927490234375, |
|
"rewards/margins_min": 1.4344491958618164, |
|
"rewards/margins_std": 0.6256662011146545, |
|
"rewards/rejected": -1.4563289880752563, |
|
"step": 860 |
|
}, |
|
{ |
|
"dpo_losses": 0.24235720932483673, |
|
"epoch": 2.45, |
|
"grad_norm": 2.200547137281921, |
|
"learning_rate": 4.939669671404871e-07, |
|
"logits/chosen": -2.5770421028137207, |
|
"logits/rejected": -2.521031618118286, |
|
"logps/chosen": -251.25564575195312, |
|
"logps/rejected": -441.2269592285156, |
|
"loss": 0.4093, |
|
"positive_losses": 5.246364116668701, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2414274662733078, |
|
"rewards/margins": 1.4953609704971313, |
|
"rewards/margins_max": 1.9941730499267578, |
|
"rewards/margins_min": 0.9965487718582153, |
|
"rewards/margins_std": 0.7054268717765808, |
|
"rewards/rejected": -1.2539334297180176, |
|
"step": 870 |
|
}, |
|
{ |
|
"dpo_losses": 0.2292724847793579, |
|
"epoch": 2.48, |
|
"grad_norm": 33.76430360961392, |
|
"learning_rate": 4.461293170212644e-07, |
|
"logits/chosen": -2.6965794563293457, |
|
"logits/rejected": -2.543576717376709, |
|
"logps/chosen": -292.703125, |
|
"logps/rejected": -368.0157775878906, |
|
"loss": 0.3654, |
|
"positive_losses": 5.510960578918457, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.29951637983322144, |
|
"rewards/margins": 1.5260313749313354, |
|
"rewards/margins_max": 1.979828119277954, |
|
"rewards/margins_min": 1.0722346305847168, |
|
"rewards/margins_std": 0.6417653560638428, |
|
"rewards/rejected": -1.2265150547027588, |
|
"step": 880 |
|
}, |
|
{ |
|
"dpo_losses": 0.15858207643032074, |
|
"epoch": 2.51, |
|
"grad_norm": 5.727775081054632, |
|
"learning_rate": 4.005001948670606e-07, |
|
"logits/chosen": -2.694242238998413, |
|
"logits/rejected": -2.595343828201294, |
|
"logps/chosen": -382.9683532714844, |
|
"logps/rejected": -468.80157470703125, |
|
"loss": 0.463, |
|
"positive_losses": 0.11419792473316193, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.5407828092575073, |
|
"rewards/margins": 1.968488097190857, |
|
"rewards/margins_max": 2.390479564666748, |
|
"rewards/margins_min": 1.546496868133545, |
|
"rewards/margins_std": 0.5967859029769897, |
|
"rewards/rejected": -1.4277052879333496, |
|
"step": 890 |
|
}, |
|
{ |
|
"dpo_losses": 0.18081924319267273, |
|
"epoch": 2.54, |
|
"grad_norm": 6.1887239729076455, |
|
"learning_rate": 3.571286656911377e-07, |
|
"logits/chosen": -2.6035306453704834, |
|
"logits/rejected": -2.4794845581054688, |
|
"logps/chosen": -310.08013916015625, |
|
"logps/rejected": -408.18426513671875, |
|
"loss": 0.3544, |
|
"positive_losses": 2.6008810997009277, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.4102245271205902, |
|
"rewards/margins": 1.8388206958770752, |
|
"rewards/margins_max": 2.2580726146698, |
|
"rewards/margins_min": 1.4195688962936401, |
|
"rewards/margins_std": 0.5929116606712341, |
|
"rewards/rejected": -1.4285962581634521, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"eval_dpo_losses": 0.6314364075660706, |
|
"eval_logits/chosen": -2.586303472518921, |
|
"eval_logits/rejected": -2.535871744155884, |
|
"eval_logps/chosen": -332.5704345703125, |
|
"eval_logps/rejected": -335.68133544921875, |
|
"eval_loss": 5.36396598815918, |
|
"eval_positive_losses": 51.33633804321289, |
|
"eval_rewards/accuracies": 0.670634925365448, |
|
"eval_rewards/chosen": -0.47349241375923157, |
|
"eval_rewards/margins": 0.29149433970451355, |
|
"eval_rewards/margins_max": 1.078196406364441, |
|
"eval_rewards/margins_min": -0.5344981551170349, |
|
"eval_rewards/margins_std": 0.72893226146698, |
|
"eval_rewards/rejected": -0.7649868130683899, |
|
"eval_runtime": 284.4452, |
|
"eval_samples_per_second": 7.031, |
|
"eval_steps_per_second": 0.221, |
|
"step": 900 |
|
}, |
|
{ |
|
"dpo_losses": 0.20350190997123718, |
|
"epoch": 2.56, |
|
"grad_norm": 1.9747106750644823, |
|
"learning_rate": 3.1606136691612555e-07, |
|
"logits/chosen": -2.7836008071899414, |
|
"logits/rejected": -2.5904271602630615, |
|
"logps/chosen": -345.66265869140625, |
|
"logps/rejected": -354.3844299316406, |
|
"loss": 0.2637, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.5323250889778137, |
|
"rewards/margins": 1.6922286748886108, |
|
"rewards/margins_max": 2.0907249450683594, |
|
"rewards/margins_min": 1.2937328815460205, |
|
"rewards/margins_std": 0.5635584592819214, |
|
"rewards/rejected": -1.159903883934021, |
|
"step": 910 |
|
}, |
|
{ |
|
"dpo_losses": 0.22471606731414795, |
|
"epoch": 2.59, |
|
"grad_norm": 11.212944207381554, |
|
"learning_rate": 2.773424582247844e-07, |
|
"logits/chosen": -2.5793869495391846, |
|
"logits/rejected": -2.4063210487365723, |
|
"logps/chosen": -291.543701171875, |
|
"logps/rejected": -320.06353759765625, |
|
"loss": 0.6166, |
|
"positive_losses": 8.062161445617676, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.38504648208618164, |
|
"rewards/margins": 1.6606292724609375, |
|
"rewards/margins_max": 2.1240899562835693, |
|
"rewards/margins_min": 1.1971690654754639, |
|
"rewards/margins_std": 0.655431866645813, |
|
"rewards/rejected": -1.275583028793335, |
|
"step": 920 |
|
}, |
|
{ |
|
"dpo_losses": 0.20711331069469452, |
|
"epoch": 2.62, |
|
"grad_norm": 190.13690585667476, |
|
"learning_rate": 2.410135740750821e-07, |
|
"logits/chosen": -2.6692299842834473, |
|
"logits/rejected": -2.5721378326416016, |
|
"logps/chosen": -300.9726257324219, |
|
"logps/rejected": -399.5599365234375, |
|
"loss": 0.4929, |
|
"positive_losses": 3.012037754058838, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.3912240266799927, |
|
"rewards/margins": 1.6907918453216553, |
|
"rewards/margins_max": 2.0557963848114014, |
|
"rewards/margins_min": 1.3257873058319092, |
|
"rewards/margins_std": 0.5161946415901184, |
|
"rewards/rejected": -1.2995678186416626, |
|
"step": 930 |
|
}, |
|
{ |
|
"dpo_losses": 0.3051915466785431, |
|
"epoch": 2.65, |
|
"grad_norm": 189.05899790144875, |
|
"learning_rate": 2.0711377893064182e-07, |
|
"logits/chosen": -2.639585256576538, |
|
"logits/rejected": -2.488219738006592, |
|
"logps/chosen": -312.65863037109375, |
|
"logps/rejected": -305.5802917480469, |
|
"loss": 0.4531, |
|
"positive_losses": 3.7967441082000732, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2920045554637909, |
|
"rewards/margins": 1.2456369400024414, |
|
"rewards/margins_max": 1.7247259616851807, |
|
"rewards/margins_min": 0.7665479183197021, |
|
"rewards/margins_std": 0.6775342226028442, |
|
"rewards/rejected": -0.9536323547363281, |
|
"step": 940 |
|
}, |
|
{ |
|
"dpo_losses": 0.2704788148403168, |
|
"epoch": 2.68, |
|
"grad_norm": 15.556722893889498, |
|
"learning_rate": 1.756795252547111e-07, |
|
"logits/chosen": -2.588268518447876, |
|
"logits/rejected": -2.501624584197998, |
|
"logps/chosen": -225.04928588867188, |
|
"logps/rejected": -314.3163757324219, |
|
"loss": 0.4599, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3465590476989746, |
|
"rewards/margins": 1.4794371128082275, |
|
"rewards/margins_max": 1.9303573369979858, |
|
"rewards/margins_min": 1.0285165309906006, |
|
"rewards/margins_std": 0.6376978158950806, |
|
"rewards/rejected": -1.1328779458999634, |
|
"step": 950 |
|
}, |
|
{ |
|
"dpo_losses": 0.23120097815990448, |
|
"epoch": 2.7, |
|
"grad_norm": 3.6975387738343986, |
|
"learning_rate": 1.4674461431281013e-07, |
|
"logits/chosen": -2.7935328483581543, |
|
"logits/rejected": -2.7169508934020996, |
|
"logps/chosen": -246.69778442382812, |
|
"logps/rejected": -358.2559509277344, |
|
"loss": 0.3766, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3893265724182129, |
|
"rewards/margins": 1.5332249402999878, |
|
"rewards/margins_max": 2.008496046066284, |
|
"rewards/margins_min": 1.0579537153244019, |
|
"rewards/margins_std": 0.6721349954605103, |
|
"rewards/rejected": -1.143898367881775, |
|
"step": 960 |
|
}, |
|
{ |
|
"dpo_losses": 0.18317696452140808, |
|
"epoch": 2.73, |
|
"grad_norm": 25.967042428441264, |
|
"learning_rate": 1.2034015982622243e-07, |
|
"logits/chosen": -2.68410587310791, |
|
"logits/rejected": -2.5668373107910156, |
|
"logps/chosen": -320.6241760253906, |
|
"logps/rejected": -454.39849853515625, |
|
"loss": 0.3194, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.4021673798561096, |
|
"rewards/margins": 1.9114774465560913, |
|
"rewards/margins_max": 2.451068878173828, |
|
"rewards/margins_min": 1.3718855381011963, |
|
"rewards/margins_std": 0.763097882270813, |
|
"rewards/rejected": -1.5093098878860474, |
|
"step": 970 |
|
}, |
|
{ |
|
"dpo_losses": 0.2419268637895584, |
|
"epoch": 2.76, |
|
"grad_norm": 19.29396011638503, |
|
"learning_rate": 9.649455451539419e-08, |
|
"logits/chosen": -2.555974006652832, |
|
"logits/rejected": -2.4670310020446777, |
|
"logps/chosen": -218.39334106445312, |
|
"logps/rejected": -300.92254638671875, |
|
"loss": 0.4254, |
|
"positive_losses": 4.289657115936279, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.25288599729537964, |
|
"rewards/margins": 1.518112301826477, |
|
"rewards/margins_max": 1.9375699758529663, |
|
"rewards/margins_min": 1.098654866218567, |
|
"rewards/margins_std": 0.5932024717330933, |
|
"rewards/rejected": -1.2652263641357422, |
|
"step": 980 |
|
}, |
|
{ |
|
"dpo_losses": 0.20426790416240692, |
|
"epoch": 2.79, |
|
"grad_norm": 145.7358684722982, |
|
"learning_rate": 7.523343956923196e-08, |
|
"logits/chosen": -2.7547340393066406, |
|
"logits/rejected": -2.6413354873657227, |
|
"logps/chosen": -303.62115478515625, |
|
"logps/rejected": -412.58782958984375, |
|
"loss": 0.406, |
|
"positive_losses": 1.1116502285003662, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.4799574315547943, |
|
"rewards/margins": 1.7528730630874634, |
|
"rewards/margins_max": 2.2721505165100098, |
|
"rewards/margins_min": 1.2335954904556274, |
|
"rewards/margins_std": 0.734369158744812, |
|
"rewards/rejected": -1.2729156017303467, |
|
"step": 990 |
|
}, |
|
{ |
|
"dpo_losses": 0.2937398850917816, |
|
"epoch": 2.82, |
|
"grad_norm": 147.9672419405728, |
|
"learning_rate": 5.657967707312195e-08, |
|
"logits/chosen": -2.519782543182373, |
|
"logits/rejected": -2.54045033454895, |
|
"logps/chosen": -236.8069610595703, |
|
"logps/rejected": -393.01373291015625, |
|
"loss": 0.545, |
|
"positive_losses": 6.547940254211426, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.24184127151966095, |
|
"rewards/margins": 1.3283250331878662, |
|
"rewards/margins_max": 1.8528366088867188, |
|
"rewards/margins_min": 0.8038133382797241, |
|
"rewards/margins_std": 0.7417714595794678, |
|
"rewards/rejected": -1.0864837169647217, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"eval_dpo_losses": 0.6312186121940613, |
|
"eval_logits/chosen": -2.5872504711151123, |
|
"eval_logits/rejected": -2.5366668701171875, |
|
"eval_logps/chosen": -330.9984436035156, |
|
"eval_logps/rejected": -333.99945068359375, |
|
"eval_loss": 5.222360134124756, |
|
"eval_positive_losses": 49.98057556152344, |
|
"eval_rewards/accuracies": 0.6626983880996704, |
|
"eval_rewards/chosen": -0.4577721953392029, |
|
"eval_rewards/margins": 0.29039543867111206, |
|
"eval_rewards/margins_max": 1.0717767477035522, |
|
"eval_rewards/margins_min": -0.533184289932251, |
|
"eval_rewards/margins_std": 0.724482536315918, |
|
"eval_rewards/rejected": -0.7481676340103149, |
|
"eval_runtime": 284.8086, |
|
"eval_samples_per_second": 7.022, |
|
"eval_steps_per_second": 0.221, |
|
"step": 1000 |
|
}, |
|
{ |
|
"dpo_losses": 0.2392820119857788, |
|
"epoch": 2.85, |
|
"grad_norm": 87.50201169562474, |
|
"learning_rate": 4.055332542531959e-08, |
|
"logits/chosen": -2.7165019512176514, |
|
"logits/rejected": -2.6357262134552, |
|
"logps/chosen": -229.1401824951172, |
|
"logps/rejected": -371.04571533203125, |
|
"loss": 0.5645, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.34021449089050293, |
|
"rewards/margins": 1.4760607481002808, |
|
"rewards/margins_max": 1.7028331756591797, |
|
"rewards/margins_min": 1.2492884397506714, |
|
"rewards/margins_std": 0.3207046389579773, |
|
"rewards/rejected": -1.1358463764190674, |
|
"step": 1010 |
|
}, |
|
{ |
|
"dpo_losses": 0.2636774182319641, |
|
"epoch": 2.87, |
|
"grad_norm": 9.243316710391014, |
|
"learning_rate": 2.7171617768147472e-08, |
|
"logits/chosen": -2.5805556774139404, |
|
"logits/rejected": -2.4946963787078857, |
|
"logps/chosen": -200.70706176757812, |
|
"logps/rejected": -348.9754638671875, |
|
"loss": 0.4736, |
|
"positive_losses": 4.167427062988281, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.226848766207695, |
|
"rewards/margins": 1.4039170742034912, |
|
"rewards/margins_max": 1.8958208560943604, |
|
"rewards/margins_min": 0.9120131731033325, |
|
"rewards/margins_std": 0.6956570148468018, |
|
"rewards/rejected": -1.1770681142807007, |
|
"step": 1020 |
|
}, |
|
{ |
|
"dpo_losses": 0.26111191511154175, |
|
"epoch": 2.9, |
|
"grad_norm": 3.7450554356463743, |
|
"learning_rate": 1.6448943457189616e-08, |
|
"logits/chosen": -2.5760231018066406, |
|
"logits/rejected": -2.58748197555542, |
|
"logps/chosen": -268.255126953125, |
|
"logps/rejected": -387.044921875, |
|
"loss": 0.2968, |
|
"positive_losses": 1.4128901958465576, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.34100010991096497, |
|
"rewards/margins": 1.478846788406372, |
|
"rewards/margins_max": 2.0194473266601562, |
|
"rewards/margins_min": 0.9382462501525879, |
|
"rewards/margins_std": 0.7645247578620911, |
|
"rewards/rejected": -1.1378467082977295, |
|
"step": 1030 |
|
}, |
|
{ |
|
"dpo_losses": 0.24359698593616486, |
|
"epoch": 2.93, |
|
"grad_norm": 187.10263101103095, |
|
"learning_rate": 8.39683258841123e-09, |
|
"logits/chosen": -2.5231451988220215, |
|
"logits/rejected": -2.408517360687256, |
|
"logps/chosen": -264.57916259765625, |
|
"logps/rejected": -332.7992248535156, |
|
"loss": 0.3754, |
|
"positive_losses": 0.07207755744457245, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.4693472981452942, |
|
"rewards/margins": 1.5425517559051514, |
|
"rewards/margins_max": 1.9166587591171265, |
|
"rewards/margins_min": 1.1684446334838867, |
|
"rewards/margins_std": 0.52906733751297, |
|
"rewards/rejected": -1.0732043981552124, |
|
"step": 1040 |
|
}, |
|
{ |
|
"dpo_losses": 0.20071451365947723, |
|
"epoch": 2.96, |
|
"grad_norm": 3.6062297906425043, |
|
"learning_rate": 3.0239435998430376e-09, |
|
"logits/chosen": -2.645131826400757, |
|
"logits/rejected": -2.5101490020751953, |
|
"logps/chosen": -282.20855712890625, |
|
"logps/rejected": -383.17950439453125, |
|
"loss": 0.3373, |
|
"positive_losses": 1.7565370798110962, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.41964656114578247, |
|
"rewards/margins": 1.6733496189117432, |
|
"rewards/margins_max": 2.073215961456299, |
|
"rewards/margins_min": 1.2734830379486084, |
|
"rewards/margins_std": 0.5654967427253723, |
|
"rewards/rejected": -1.253702998161316, |
|
"step": 1050 |
|
}, |
|
{ |
|
"dpo_losses": 0.22902190685272217, |
|
"epoch": 2.99, |
|
"grad_norm": 3.308213249224383, |
|
"learning_rate": 3.3605396115826695e-10, |
|
"logits/chosen": -2.394101142883301, |
|
"logits/rejected": -2.4773335456848145, |
|
"logps/chosen": -165.21871948242188, |
|
"logps/rejected": -327.92352294921875, |
|
"loss": 0.3158, |
|
"positive_losses": 1.1246204376220703, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.30173832178115845, |
|
"rewards/margins": 1.4864323139190674, |
|
"rewards/margins_max": 1.822080373764038, |
|
"rewards/margins_min": 1.150783896446228, |
|
"rewards/margins_std": 0.4746781885623932, |
|
"rewards/rejected": -1.1846938133239746, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 1065, |
|
"total_flos": 0.0, |
|
"train_loss": 0.48024289137880566, |
|
"train_runtime": 8933.1726, |
|
"train_samples_per_second": 1.907, |
|
"train_steps_per_second": 0.119 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1065, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|