{ "best_metric": null, "best_model_checkpoint": null, "epoch": 4.988190836088805, "eval_steps": 50, "global_step": 1760, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "dpo_loss": 0.6931471824645996, "epoch": 0.002834199338686821, "grad_norm": 17675.585799054454, "learning_rate": 5.681818181818181e-09, "logits": -1.2867579460144043, "logps": -84.34933471679688, "loss": 169.5214, "objective": 153.4677734375, "ranking_idealized": 0.6458333134651184, "ranking_idealized_expo": 0.5833333134651184, "ranking_simple": 0.5833333134651184, "regularize": 0.3618059456348419, "step": 1, "wo_beta": 14.83154582977295 }, { "dpo_loss": 0.6930850148200989, "epoch": 0.014170996693434105, "grad_norm": 16809.76979726276, "learning_rate": 2.8409090909090908e-08, "logits": -1.4291090965270996, "logps": -83.86122131347656, "loss": 181.7047, "objective": 168.55690002441406, "ranking_idealized": 0.5625, "ranking_idealized_expo": 0.4895833432674408, "ranking_simple": 0.4895833432674408, "regularize": 0.4036543667316437, "step": 5, "wo_beta": 16.679672241210938 }, { "dpo_loss": 0.6930658221244812, "epoch": 0.02834199338686821, "grad_norm": 18604.90219885959, "learning_rate": 5.6818181818181815e-08, "logits": -1.4008290767669678, "logps": -84.83370971679688, "loss": 177.0775, "objective": 170.34666442871094, "ranking_idealized": 0.6708333492279053, "ranking_idealized_expo": 0.5833333134651184, "ranking_simple": 0.5791666507720947, "regularize": 0.40391480922698975, "step": 10, "wo_beta": 15.222626686096191 }, { "dpo_loss": 0.6919592618942261, "epoch": 0.042512990080302314, "grad_norm": 17866.85697228391, "learning_rate": 8.522727272727271e-08, "logits": -1.5378918647766113, "logps": -84.51753234863281, "loss": 178.9384, "objective": 187.3764190673828, "ranking_idealized": 0.6499999761581421, "ranking_idealized_expo": 0.5708333253860474, "ranking_simple": 0.5666666626930237, "regularize": 0.44199517369270325, "step": 15, "wo_beta": 15.720404624938965 }, { "dpo_loss": 0.6915046572685242, "epoch": 0.05668398677373642, "grad_norm": 17562.319543911097, "learning_rate": 1.1363636363636363e-07, "logits": -1.3619273900985718, "logps": -83.62174224853516, "loss": 185.6226, "objective": 203.74549865722656, "ranking_idealized": 0.5833333134651184, "ranking_idealized_expo": 0.4833333194255829, "ranking_simple": 0.48750001192092896, "regularize": 0.4415209889411926, "step": 20, "wo_beta": 16.53324317932129 }, { "dpo_loss": 0.6925450563430786, "epoch": 0.07085498346717052, "grad_norm": 16842.244030261496, "learning_rate": 1.4204545454545455e-07, "logits": -1.369999647140503, "logps": -83.69309997558594, "loss": 181.9124, "objective": 172.8611297607422, "ranking_idealized": 0.6458333134651184, "ranking_idealized_expo": 0.5583333373069763, "ranking_simple": 0.5666666626930237, "regularize": 0.4071991741657257, "step": 25, "wo_beta": 15.610980987548828 }, { "dpo_loss": 0.6898643970489502, "epoch": 0.08502598016060463, "grad_norm": 14842.574916726253, "learning_rate": 1.7045454545454543e-07, "logits": -1.432415246963501, "logps": -83.48454284667969, "loss": 181.3521, "objective": 176.5283203125, "ranking_idealized": 0.5541666746139526, "ranking_idealized_expo": 0.4833333194255829, "ranking_simple": 0.48750001192092896, "regularize": 0.4289272427558899, "step": 30, "wo_beta": 17.00359344482422 }, { "dpo_loss": 0.6909116506576538, "epoch": 0.09919697685403873, "grad_norm": 16058.543561158533, "learning_rate": 1.9886363636363636e-07, "logits": -1.4108096361160278, "logps": -82.71344757080078, "loss": 183.8373, "objective": 173.34014892578125, "ranking_idealized": 0.6291666626930237, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.5416666865348816, "regularize": 0.38034114241600037, "step": 35, "wo_beta": 16.153711318969727 }, { "dpo_loss": 0.6891058683395386, "epoch": 0.11336797354747284, "grad_norm": 17014.23191466682, "learning_rate": 2.2727272727272726e-07, "logits": -1.402835488319397, "logps": -83.338134765625, "loss": 187.3552, "objective": 182.01144409179688, "ranking_idealized": 0.5708333253860474, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.5166666507720947, "regularize": 0.4058202803134918, "step": 40, "wo_beta": 14.262288093566895 }, { "dpo_loss": 0.684747040271759, "epoch": 0.12753897024090693, "grad_norm": 15346.751264548873, "learning_rate": 2.5568181818181816e-07, "logits": -1.419245958328247, "logps": -83.82090759277344, "loss": 171.4244, "objective": 183.38385009765625, "ranking_idealized": 0.5958333611488342, "ranking_idealized_expo": 0.5083333253860474, "ranking_simple": 0.5166666507720947, "regularize": 0.4130297601222992, "step": 45, "wo_beta": 14.29751968383789 }, { "dpo_loss": 0.6823928356170654, "epoch": 0.14170996693434104, "grad_norm": 16514.084391847242, "learning_rate": 2.840909090909091e-07, "logits": -1.4350523948669434, "logps": -84.8818359375, "loss": 181.5404, "objective": 186.33828735351562, "ranking_idealized": 0.5916666388511658, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5041666626930237, "regularize": 0.4348808526992798, "step": 50, "wo_beta": 15.604106903076172 }, { "epoch": 0.14170996693434104, "eval_dpo_loss": 0.6889749765396118, "eval_logits": -1.4233466386795044, "eval_logps": -90.91888427734375, "eval_loss": 182.35984802246094, "eval_objective": 180.32789611816406, "eval_ranking_idealized": 0.6024844646453857, "eval_ranking_idealized_expo": 0.5232919454574585, "eval_ranking_simple": 0.5263975262641907, "eval_regularize": 0.40881022810935974, "eval_runtime": 472.6615, "eval_samples_per_second": 12.25, "eval_steps_per_second": 1.022, "eval_wo_beta": 16.297378540039062, "step": 50 }, { "dpo_loss": 0.6824547052383423, "epoch": 0.15588096362777515, "grad_norm": 17699.4671939912, "learning_rate": 3.1249999999999997e-07, "logits": -1.3973591327667236, "logps": -84.62629699707031, "loss": 170.6542, "objective": 174.4287872314453, "ranking_idealized": 0.5583333373069763, "ranking_idealized_expo": 0.49166667461395264, "ranking_simple": 0.4833333194255829, "regularize": 0.3742547035217285, "step": 55, "wo_beta": 15.11441421508789 }, { "dpo_loss": 0.6806777715682983, "epoch": 0.17005196032120926, "grad_norm": 16100.449715737686, "learning_rate": 3.4090909090909085e-07, "logits": -1.329344391822815, "logps": -85.16632843017578, "loss": 174.0689, "objective": 174.0922393798828, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.5291666388511658, "regularize": 0.40893226861953735, "step": 60, "wo_beta": 14.438634872436523 }, { "dpo_loss": 0.6708158254623413, "epoch": 0.18422295701464336, "grad_norm": 16302.471134333027, "learning_rate": 3.693181818181818e-07, "logits": -1.428707480430603, "logps": -82.03670501708984, "loss": 172.5426, "objective": 161.09950256347656, "ranking_idealized": 0.637499988079071, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.5333333611488342, "regularize": 0.36983728408813477, "step": 65, "wo_beta": 15.067524909973145 }, { "dpo_loss": 0.6730712652206421, "epoch": 0.19839395370807747, "grad_norm": 15662.31236602018, "learning_rate": 3.977272727272727e-07, "logits": -1.4695442914962769, "logps": -84.42548370361328, "loss": 174.7341, "objective": 175.19439697265625, "ranking_idealized": 0.5916666388511658, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.5375000238418579, "regularize": 0.4013313353061676, "step": 70, "wo_beta": 15.375307083129883 }, { "dpo_loss": 0.6741575002670288, "epoch": 0.21256495040151158, "grad_norm": 18686.585950552704, "learning_rate": 4.2613636363636364e-07, "logits": -1.393960952758789, "logps": -84.16697692871094, "loss": 174.6645, "objective": 164.988525390625, "ranking_idealized": 0.6083333492279053, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.5208333134651184, "regularize": 0.38378840684890747, "step": 75, "wo_beta": 15.075023651123047 }, { "dpo_loss": 0.669329822063446, "epoch": 0.22673594709494568, "grad_norm": 18561.894559157903, "learning_rate": 4.545454545454545e-07, "logits": -1.4905359745025635, "logps": -83.4140396118164, "loss": 169.0661, "objective": 177.64450073242188, "ranking_idealized": 0.6291666626930237, "ranking_idealized_expo": 0.5291666388511658, "ranking_simple": 0.5458333492279053, "regularize": 0.4245981276035309, "step": 80, "wo_beta": 16.333538055419922 }, { "dpo_loss": 0.6659378409385681, "epoch": 0.2409069437883798, "grad_norm": 15942.37358833672, "learning_rate": 4.829545454545455e-07, "logits": -1.4715605974197388, "logps": -83.54389190673828, "loss": 171.1414, "objective": 182.98324584960938, "ranking_idealized": 0.625, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.5541666746139526, "regularize": 0.4137464463710785, "step": 85, "wo_beta": 15.189921379089355 }, { "dpo_loss": 0.6600526571273804, "epoch": 0.25507794048181387, "grad_norm": 16315.909705896804, "learning_rate": 5.113636363636363e-07, "logits": -1.571618914604187, "logps": -84.54931640625, "loss": 168.3022, "objective": 174.0519561767578, "ranking_idealized": 0.612500011920929, "ranking_idealized_expo": 0.5291666388511658, "ranking_simple": 0.550000011920929, "regularize": 0.39906471967697144, "step": 90, "wo_beta": 15.517964363098145 }, { "dpo_loss": 0.6545840501785278, "epoch": 0.269248937175248, "grad_norm": 17445.518244074756, "learning_rate": 5.397727272727273e-07, "logits": -1.49222731590271, "logps": -84.54743194580078, "loss": 168.7617, "objective": 175.46524047851562, "ranking_idealized": 0.5583333373069763, "ranking_idealized_expo": 0.4791666567325592, "ranking_simple": 0.512499988079071, "regularize": 0.4040308892726898, "step": 95, "wo_beta": 16.429697036743164 }, { "dpo_loss": 0.6560600399971008, "epoch": 0.2834199338686821, "grad_norm": 16343.369412455128, "learning_rate": 5.681818181818182e-07, "logits": -1.370269775390625, "logps": -83.43912506103516, "loss": 156.9096, "objective": 160.82919311523438, "ranking_idealized": 0.5958333611488342, "ranking_idealized_expo": 0.4833333194255829, "ranking_simple": 0.5, "regularize": 0.3631521761417389, "step": 100, "wo_beta": 15.597589492797852 }, { "epoch": 0.2834199338686821, "eval_dpo_loss": 0.6855058073997498, "eval_logits": -1.470232367515564, "eval_logps": -91.45095825195312, "eval_loss": 181.86407470703125, "eval_objective": 180.31504821777344, "eval_ranking_idealized": 0.6024844646453857, "eval_ranking_idealized_expo": 0.5232919454574585, "eval_ranking_simple": 0.531573474407196, "eval_regularize": 0.41007429361343384, "eval_runtime": 479.0094, "eval_samples_per_second": 12.087, "eval_steps_per_second": 1.008, "eval_wo_beta": 16.373079299926758, "step": 100 }, { "dpo_loss": 0.6687707901000977, "epoch": 0.2975909305621162, "grad_norm": 20737.972285358017, "learning_rate": 5.965909090909091e-07, "logits": -1.572224736213684, "logps": -86.08336639404297, "loss": 161.5898, "objective": 164.3712615966797, "ranking_idealized": 0.625, "ranking_idealized_expo": 0.5291666388511658, "ranking_simple": 0.5416666865348816, "regularize": 0.3831757605075836, "step": 105, "wo_beta": 14.803333282470703 }, { "dpo_loss": 0.6561999917030334, "epoch": 0.3117619272555503, "grad_norm": 16188.43984842568, "learning_rate": 6.249999999999999e-07, "logits": -1.4707790613174438, "logps": -84.74868774414062, "loss": 158.3984, "objective": 159.52267456054688, "ranking_idealized": 0.5874999761581421, "ranking_idealized_expo": 0.5041666626930237, "ranking_simple": 0.5166666507720947, "regularize": 0.38105159997940063, "step": 110, "wo_beta": 15.120772361755371 }, { "dpo_loss": 0.6603504419326782, "epoch": 0.32593292394898443, "grad_norm": 16290.29619326225, "learning_rate": 6.534090909090909e-07, "logits": -1.4433757066726685, "logps": -83.40989685058594, "loss": 149.8614, "objective": 154.2146453857422, "ranking_idealized": 0.6208333373069763, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.5375000238418579, "regularize": 0.3704533576965332, "step": 115, "wo_beta": 16.445148468017578 }, { "dpo_loss": 0.6463068127632141, "epoch": 0.3401039206424185, "grad_norm": 15623.51190253056, "learning_rate": 6.818181818181817e-07, "logits": -1.4353134632110596, "logps": -83.36263275146484, "loss": 156.1384, "objective": 165.0032501220703, "ranking_idealized": 0.5916666388511658, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.5458333492279053, "regularize": 0.3623816668987274, "step": 120, "wo_beta": 15.72484302520752 }, { "dpo_loss": 0.6474981904029846, "epoch": 0.35427491733585265, "grad_norm": 15992.631664901073, "learning_rate": 7.102272727272727e-07, "logits": -1.4708176851272583, "logps": -87.08245086669922, "loss": 148.8453, "objective": 139.25869750976562, "ranking_idealized": 0.5583333373069763, "ranking_idealized_expo": 0.48750001192092896, "ranking_simple": 0.5083333253860474, "regularize": 0.3299652636051178, "step": 125, "wo_beta": 16.12550163269043 }, { "dpo_loss": 0.6404248476028442, "epoch": 0.3684459140292867, "grad_norm": 18351.934143281596, "learning_rate": 7.386363636363636e-07, "logits": -1.4490153789520264, "logps": -85.12788391113281, "loss": 156.9957, "objective": 159.24720764160156, "ranking_idealized": 0.6458333134651184, "ranking_idealized_expo": 0.5916666388511658, "ranking_simple": 0.5791666507720947, "regularize": 0.3523053526878357, "step": 130, "wo_beta": 16.6445255279541 }, { "dpo_loss": 0.6560899615287781, "epoch": 0.3826169107227208, "grad_norm": 23473.507695048622, "learning_rate": 7.670454545454545e-07, "logits": -1.4993882179260254, "logps": -85.93272399902344, "loss": 163.276, "objective": 171.45176696777344, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.4958333373069763, "ranking_simple": 0.5, "regularize": 0.3585022985935211, "step": 135, "wo_beta": 14.440187454223633 }, { "dpo_loss": 0.6453083753585815, "epoch": 0.39678790741615494, "grad_norm": 18800.531975208964, "learning_rate": 7.954545454545454e-07, "logits": -1.4704848527908325, "logps": -84.99346160888672, "loss": 140.3663, "objective": 156.8263702392578, "ranking_idealized": 0.612500011920929, "ranking_idealized_expo": 0.5416666865348816, "ranking_simple": 0.5583333373069763, "regularize": 0.35928240418434143, "step": 140, "wo_beta": 14.692411422729492 }, { "dpo_loss": 0.6320348978042603, "epoch": 0.410958904109589, "grad_norm": 16753.19118195896, "learning_rate": 8.238636363636363e-07, "logits": -1.481634259223938, "logps": -85.03217315673828, "loss": 148.4437, "objective": 142.04251098632812, "ranking_idealized": 0.6666666865348816, "ranking_idealized_expo": 0.5583333373069763, "ranking_simple": 0.6083333492279053, "regularize": 0.34694570302963257, "step": 145, "wo_beta": 14.103859901428223 }, { "dpo_loss": 0.6397809386253357, "epoch": 0.42512990080302315, "grad_norm": 15467.131473675328, "learning_rate": 8.522727272727273e-07, "logits": -1.5027910470962524, "logps": -85.37592315673828, "loss": 145.838, "objective": 148.38160705566406, "ranking_idealized": 0.5958333611488342, "ranking_idealized_expo": 0.5541666746139526, "ranking_simple": 0.5625, "regularize": 0.3435940146446228, "step": 150, "wo_beta": 17.392671585083008 }, { "epoch": 0.42512990080302315, "eval_dpo_loss": 0.6789573431015015, "eval_logits": -1.4503501653671265, "eval_logps": -90.70494842529297, "eval_loss": 180.64788818359375, "eval_objective": 178.1704864501953, "eval_ranking_idealized": 0.6024844646453857, "eval_ranking_idealized_expo": 0.5232919454574585, "eval_ranking_simple": 0.5383023023605347, "eval_regularize": 0.40225014090538025, "eval_runtime": 484.5212, "eval_samples_per_second": 11.95, "eval_steps_per_second": 0.997, "eval_wo_beta": 16.587987899780273, "step": 150 }, { "dpo_loss": 0.6387067437171936, "epoch": 0.43930089749645723, "grad_norm": 15641.193562303264, "learning_rate": 8.806818181818182e-07, "logits": -1.5433834791183472, "logps": -83.86524200439453, "loss": 145.3558, "objective": 149.48431396484375, "ranking_idealized": 0.5666666626930237, "ranking_idealized_expo": 0.48750001192092896, "ranking_simple": 0.5041666626930237, "regularize": 0.3321545720100403, "step": 155, "wo_beta": 15.563851356506348 }, { "dpo_loss": 0.6263092160224915, "epoch": 0.45347189418989137, "grad_norm": 17105.26137174702, "learning_rate": 9.09090909090909e-07, "logits": -1.4153720140457153, "logps": -85.28386688232422, "loss": 153.0504, "objective": 153.1988067626953, "ranking_idealized": 0.5916666388511658, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.5583333373069763, "regularize": 0.3481307625770569, "step": 160, "wo_beta": 14.662632942199707 }, { "dpo_loss": 0.6309160590171814, "epoch": 0.46764289088332545, "grad_norm": 17759.815020595273, "learning_rate": 9.374999999999999e-07, "logits": -1.4963940382003784, "logps": -87.69454956054688, "loss": 139.2377, "objective": 131.2418670654297, "ranking_idealized": 0.6083333492279053, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.550000011920929, "regularize": 0.3078847825527191, "step": 165, "wo_beta": 15.923318862915039 }, { "dpo_loss": 0.6393815279006958, "epoch": 0.4818138875767596, "grad_norm": 14258.083724870265, "learning_rate": 9.65909090909091e-07, "logits": -1.525942087173462, "logps": -87.34074401855469, "loss": 149.6952, "objective": 141.63162231445312, "ranking_idealized": 0.5583333373069763, "ranking_idealized_expo": 0.5041666626930237, "ranking_simple": 0.5208333134651184, "regularize": 0.3343699276447296, "step": 170, "wo_beta": 16.248130798339844 }, { "dpo_loss": 0.6326501369476318, "epoch": 0.49598488427019366, "grad_norm": 15096.239809153309, "learning_rate": 9.943181818181817e-07, "logits": -1.3718321323394775, "logps": -87.4573745727539, "loss": 140.2749, "objective": 132.79156494140625, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.5375000238418579, "regularize": 0.3121780455112457, "step": 175, "wo_beta": 17.698331832885742 }, { "dpo_loss": 0.622785747051239, "epoch": 0.5101558809636277, "grad_norm": 16631.252094969073, "learning_rate": 9.999842657116664e-07, "logits": -1.3456240892410278, "logps": -86.42423248291016, "loss": 143.2666, "objective": 151.05718994140625, "ranking_idealized": 0.5708333253860474, "ranking_idealized_expo": 0.48750001192092896, "ranking_simple": 0.512499988079071, "regularize": 0.3372686207294464, "step": 180, "wo_beta": 14.807291030883789 }, { "dpo_loss": 0.6099674701690674, "epoch": 0.5243268776570619, "grad_norm": 20691.36637721674, "learning_rate": 9.999203468625015e-07, "logits": -1.3633224964141846, "logps": -85.25286102294922, "loss": 132.6151, "objective": 133.30491638183594, "ranking_idealized": 0.5791666507720947, "ranking_idealized_expo": 0.4833333194255829, "ranking_simple": 0.5333333611488342, "regularize": 0.3143846392631531, "step": 185, "wo_beta": 14.758675575256348 }, { "dpo_loss": 0.596558690071106, "epoch": 0.538497874350496, "grad_norm": 16323.28292515014, "learning_rate": 9.998072663403656e-07, "logits": -1.4109238386154175, "logps": -83.85755157470703, "loss": 142.4777, "objective": 132.50650024414062, "ranking_idealized": 0.6625000238418579, "ranking_idealized_expo": 0.5625, "ranking_simple": 0.612500011920929, "regularize": 0.2925921082496643, "step": 190, "wo_beta": 17.561918258666992 }, { "dpo_loss": 0.608472466468811, "epoch": 0.5526688710439301, "grad_norm": 14605.697671098327, "learning_rate": 9.99645035265485e-07, "logits": -1.426125407218933, "logps": -83.3570556640625, "loss": 148.3801, "objective": 154.04542541503906, "ranking_idealized": 0.625, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.5708333253860474, "regularize": 0.3404535949230194, "step": 195, "wo_beta": 15.011509895324707 }, { "dpo_loss": 0.6035653948783875, "epoch": 0.5668398677373642, "grad_norm": 14961.86824726212, "learning_rate": 9.99433669591504e-07, "logits": -1.4208530187606812, "logps": -83.7520523071289, "loss": 140.9398, "objective": 150.76983642578125, "ranking_idealized": 0.5833333134651184, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.5208333134651184, "regularize": 0.34441787004470825, "step": 200, "wo_beta": 16.120277404785156 }, { "epoch": 0.5668398677373642, "eval_dpo_loss": 0.6803466081619263, "eval_logits": -1.3894833326339722, "eval_logps": -90.33295440673828, "eval_loss": 184.49874877929688, "eval_objective": 181.54510498046875, "eval_ranking_idealized": 0.6024844646453857, "eval_ranking_idealized_expo": 0.5232919454574585, "eval_ranking_simple": 0.532608687877655, "eval_regularize": 0.4101283848285675, "eval_runtime": 475.1985, "eval_samples_per_second": 12.184, "eval_steps_per_second": 1.016, "eval_wo_beta": 16.141496658325195, "step": 200 }, { "dpo_loss": 0.6270676255226135, "epoch": 0.5810108644307983, "grad_norm": 16340.681317011602, "learning_rate": 9.991731901039136e-07, "logits": -1.283570647239685, "logps": -84.95980834960938, "loss": 136.3843, "objective": 133.73294067382812, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.5041666626930237, "ranking_simple": 0.5416666865348816, "regularize": 0.3290613889694214, "step": 205, "wo_beta": 16.529329299926758 }, { "dpo_loss": 0.6101997494697571, "epoch": 0.5951818611242324, "grad_norm": 16979.514024444066, "learning_rate": 9.988636224180095e-07, "logits": -1.3387362957000732, "logps": -85.54541015625, "loss": 149.2125, "objective": 162.19125366210938, "ranking_idealized": 0.6041666865348816, "ranking_idealized_expo": 0.5208333134651184, "ranking_simple": 0.5625, "regularize": 0.3633294999599457, "step": 210, "wo_beta": 15.476922988891602 }, { "dpo_loss": 0.5931335687637329, "epoch": 0.6093528578176665, "grad_norm": 16588.23739039735, "learning_rate": 9.985049969763719e-07, "logits": -1.458817720413208, "logps": -84.46039581298828, "loss": 133.2822, "objective": 143.83396911621094, "ranking_idealized": 0.5791666507720947, "ranking_idealized_expo": 0.4749999940395355, "ranking_simple": 0.5166666507720947, "regularize": 0.3306182324886322, "step": 215, "wo_beta": 16.599056243896484 }, { "dpo_loss": 0.6022905111312866, "epoch": 0.6235238545111006, "grad_norm": 17119.52021011513, "learning_rate": 9.980973490458728e-07, "logits": -1.4839917421340942, "logps": -84.08710479736328, "loss": 143.4095, "objective": 144.29782104492188, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.4791666567325592, "ranking_simple": 0.5083333253860474, "regularize": 0.32684001326560974, "step": 220, "wo_beta": 16.91693878173828 }, { "dpo_loss": 0.5977518558502197, "epoch": 0.6376948512045347, "grad_norm": 14023.197866950057, "learning_rate": 9.976407187142064e-07, "logits": -1.534485936164856, "logps": -85.1946792602539, "loss": 138.8846, "objective": 137.76622009277344, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.5041666626930237, "ranking_simple": 0.5583333373069763, "regularize": 0.31227758526802063, "step": 225, "wo_beta": 15.36359691619873 }, { "dpo_loss": 0.5947220921516418, "epoch": 0.6518658478979689, "grad_norm": 14605.487004157298, "learning_rate": 9.971351508859486e-07, "logits": -1.439586877822876, "logps": -85.27981567382812, "loss": 124.6336, "objective": 121.90718078613281, "ranking_idealized": 0.637499988079071, "ranking_idealized_expo": 0.5666666626930237, "ranking_simple": 0.6000000238418579, "regularize": 0.2932659685611725, "step": 230, "wo_beta": 17.20786476135254 }, { "dpo_loss": 0.6003122925758362, "epoch": 0.6660368445914029, "grad_norm": 16685.644038837043, "learning_rate": 9.9658069527814e-07, "logits": -1.3658267259597778, "logps": -86.23738098144531, "loss": 121.5208, "objective": 116.9168472290039, "ranking_idealized": 0.5874999761581421, "ranking_idealized_expo": 0.5416666865348816, "ranking_simple": 0.5583333373069763, "regularize": 0.2670106589794159, "step": 235, "wo_beta": 16.473398208618164 }, { "dpo_loss": 0.5931513905525208, "epoch": 0.680207841284837, "grad_norm": 18082.47037845429, "learning_rate": 9.959774064153975e-07, "logits": -1.5063189268112183, "logps": -85.80690002441406, "loss": 131.6654, "objective": 136.83932495117188, "ranking_idealized": 0.6041666865348816, "ranking_idealized_expo": 0.5083333253860474, "ranking_simple": 0.550000011920929, "regularize": 0.29019126296043396, "step": 240, "wo_beta": 16.562297821044922 }, { "dpo_loss": 0.6120952367782593, "epoch": 0.6943788379782712, "grad_norm": 16231.64241500278, "learning_rate": 9.953253436245516e-07, "logits": -1.5183242559432983, "logps": -85.21266174316406, "loss": 120.6441, "objective": 111.80670928955078, "ranking_idealized": 0.6166666746139526, "ranking_idealized_expo": 0.5458333492279053, "ranking_simple": 0.5666666626930237, "regularize": 0.2561970055103302, "step": 245, "wo_beta": 16.04390525817871 }, { "dpo_loss": 0.5938802361488342, "epoch": 0.7085498346717053, "grad_norm": 15964.589309173105, "learning_rate": 9.94624571028813e-07, "logits": -1.3114020824432373, "logps": -83.990478515625, "loss": 131.1439, "objective": 132.464599609375, "ranking_idealized": 0.612500011920929, "ranking_idealized_expo": 0.5291666388511658, "ranking_simple": 0.5874999761581421, "regularize": 0.29094573855400085, "step": 250, "wo_beta": 15.082120895385742 }, { "epoch": 0.7085498346717053, "eval_dpo_loss": 0.6797458529472351, "eval_logits": -1.478871464729309, "eval_logps": -91.22461700439453, "eval_loss": 182.20773315429688, "eval_objective": 178.44094848632812, "eval_ranking_idealized": 0.6024844646453857, "eval_ranking_idealized_expo": 0.5232919454574585, "eval_ranking_simple": 0.532608687877655, "eval_regularize": 0.4058575928211212, "eval_runtime": 475.3374, "eval_samples_per_second": 12.181, "eval_steps_per_second": 1.016, "eval_wo_beta": 16.368268966674805, "step": 250 }, { "dpo_loss": 0.6089769005775452, "epoch": 0.7227208313651393, "grad_norm": 14595.146016283225, "learning_rate": 9.938751575414661e-07, "logits": -1.5532639026641846, "logps": -83.39389038085938, "loss": 133.1451, "objective": 121.37617492675781, "ranking_idealized": 0.5708333253860474, "ranking_idealized_expo": 0.42916667461395264, "ranking_simple": 0.5041666626930237, "regularize": 0.2965226471424103, "step": 255, "wo_beta": 15.059760093688965 }, { "dpo_loss": 0.5949603915214539, "epoch": 0.7368918280585735, "grad_norm": 15154.916516529278, "learning_rate": 9.930771768590933e-07, "logits": -1.5184205770492554, "logps": -85.99275970458984, "loss": 128.7971, "objective": 149.26617431640625, "ranking_idealized": 0.5416666865348816, "ranking_idealized_expo": 0.4625000059604645, "ranking_simple": 0.5208333134651184, "regularize": 0.3260208070278168, "step": 260, "wo_beta": 15.812520027160645 }, { "dpo_loss": 0.5942420959472656, "epoch": 0.7510628247520076, "grad_norm": 13672.874013609171, "learning_rate": 9.92230707454326e-07, "logits": -1.438194990158081, "logps": -86.4264907836914, "loss": 119.4127, "objective": 127.40038299560547, "ranking_idealized": 0.6625000238418579, "ranking_idealized_expo": 0.5666666626930237, "ranking_simple": 0.6416666507720947, "regularize": 0.29675182700157166, "step": 265, "wo_beta": 16.794330596923828 }, { "dpo_loss": 0.6142985224723816, "epoch": 0.7652338214454416, "grad_norm": 14406.751122728363, "learning_rate": 9.91335832568129e-07, "logits": -1.5249485969543457, "logps": -87.38147735595703, "loss": 129.203, "objective": 141.37374877929688, "ranking_idealized": 0.5249999761581421, "ranking_idealized_expo": 0.4583333432674408, "ranking_simple": 0.4749999940395355, "regularize": 0.2984028458595276, "step": 270, "wo_beta": 14.417384147644043 }, { "dpo_loss": 0.5954193472862244, "epoch": 0.7794048181388757, "grad_norm": 17040.572933936153, "learning_rate": 9.90392640201615e-07, "logits": -1.3636622428894043, "logps": -86.6485595703125, "loss": 118.1932, "objective": 113.61885833740234, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.5041666626930237, "ranking_simple": 0.5375000238418579, "regularize": 0.2610357701778412, "step": 275, "wo_beta": 15.509458541870117 }, { "dpo_loss": 0.5917832851409912, "epoch": 0.7935758148323099, "grad_norm": 17559.793763685935, "learning_rate": 9.894012231073895e-07, "logits": -1.4590952396392822, "logps": -87.64340209960938, "loss": 132.6812, "objective": 137.506103515625, "ranking_idealized": 0.5874999761581421, "ranking_idealized_expo": 0.5208333134651184, "ranking_simple": 0.5541666746139526, "regularize": 0.2935800850391388, "step": 280, "wo_beta": 15.80073070526123 }, { "dpo_loss": 0.5836724042892456, "epoch": 0.807746811525744, "grad_norm": 14579.651979817574, "learning_rate": 9.88361678780429e-07, "logits": -1.4701313972473145, "logps": -88.11650085449219, "loss": 118.3926, "objective": 111.54865264892578, "ranking_idealized": 0.5666666626930237, "ranking_idealized_expo": 0.4791666567325592, "ranking_simple": 0.5208333134651184, "regularize": 0.2552913427352905, "step": 285, "wo_beta": 16.792234420776367 }, { "dpo_loss": 0.5677815079689026, "epoch": 0.821917808219178, "grad_norm": 15029.308678016287, "learning_rate": 9.872741094484964e-07, "logits": -1.500461220741272, "logps": -86.58364868164062, "loss": 116.8557, "objective": 106.32292175292969, "ranking_idealized": 0.5874999761581421, "ranking_idealized_expo": 0.5041666626930237, "ranking_simple": 0.550000011920929, "regularize": 0.24703934788703918, "step": 290, "wo_beta": 16.14396095275879 }, { "dpo_loss": 0.5853282809257507, "epoch": 0.8360888049126122, "grad_norm": 15120.877217642179, "learning_rate": 9.86138622062085e-07, "logits": -1.494510293006897, "logps": -86.35259246826172, "loss": 116.1266, "objective": 112.15760803222656, "ranking_idealized": 0.550000011920929, "ranking_idealized_expo": 0.4958333373069763, "ranking_simple": 0.5208333134651184, "regularize": 0.2589784264564514, "step": 295, "wo_beta": 16.28350257873535 }, { "dpo_loss": 0.5893528461456299, "epoch": 0.8502598016060463, "grad_norm": 14818.401223627045, "learning_rate": 9.849553282839024e-07, "logits": -1.4687484502792358, "logps": -85.012939453125, "loss": 118.3192, "objective": 113.60901641845703, "ranking_idealized": 0.625, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.6000000238418579, "regularize": 0.26101434230804443, "step": 300, "wo_beta": 15.157808303833008 }, { "epoch": 0.8502598016060463, "eval_dpo_loss": 0.681740403175354, "eval_logits": -1.4551842212677002, "eval_logps": -92.57705688476562, "eval_loss": 183.44589233398438, "eval_objective": 180.4713592529297, "eval_ranking_idealized": 0.6024844646453857, "eval_ranking_idealized_expo": 0.5232919454574585, "eval_ranking_simple": 0.532608687877655, "eval_regularize": 0.41230443120002747, "eval_runtime": 479.855, "eval_samples_per_second": 12.066, "eval_steps_per_second": 1.007, "eval_wo_beta": 16.404129028320312, "step": 300 }, { "dpo_loss": 0.5834535956382751, "epoch": 0.8644307982994804, "grad_norm": 14881.03810672454, "learning_rate": 9.837243444778899e-07, "logits": -1.4318089485168457, "logps": -85.52223205566406, "loss": 117.2997, "objective": 119.20571899414062, "ranking_idealized": 0.6166666746139526, "ranking_idealized_expo": 0.5208333134651184, "ranking_simple": 0.6083333492279053, "regularize": 0.2612408697605133, "step": 305, "wo_beta": 15.858050346374512 }, { "dpo_loss": 0.5729119181632996, "epoch": 0.8786017949929145, "grad_norm": 13728.643717044331, "learning_rate": 9.824457916977784e-07, "logits": -1.430962085723877, "logps": -84.47950744628906, "loss": 113.9787, "objective": 119.12039184570312, "ranking_idealized": 0.5958333611488342, "ranking_idealized_expo": 0.4833333194255829, "ranking_simple": 0.5541666746139526, "regularize": 0.2695327699184418, "step": 310, "wo_beta": 15.427461624145508 }, { "dpo_loss": 0.5748838782310486, "epoch": 0.8927727916863486, "grad_norm": 15353.814970462101, "learning_rate": 9.81119795675185e-07, "logits": -1.4459213018417358, "logps": -83.27306365966797, "loss": 112.487, "objective": 110.93157196044922, "ranking_idealized": 0.5541666746139526, "ranking_idealized_expo": 0.47083333134651184, "ranking_simple": 0.5333333611488342, "regularize": 0.2502378225326538, "step": 315, "wo_beta": 15.45988941192627 }, { "dpo_loss": 0.5748109221458435, "epoch": 0.9069437883797827, "grad_norm": 15007.545319328356, "learning_rate": 9.797464868072486e-07, "logits": -1.4066462516784668, "logps": -86.03001403808594, "loss": 110.898, "objective": 109.38225555419922, "ranking_idealized": 0.6208333373069763, "ranking_idealized_expo": 0.5583333373069763, "ranking_simple": 0.6083333492279053, "regularize": 0.2464234083890915, "step": 320, "wo_beta": 15.732470512390137 }, { "dpo_loss": 0.5822945833206177, "epoch": 0.9211147850732169, "grad_norm": 13633.021631468031, "learning_rate": 9.783260001438066e-07, "logits": -1.4706979990005493, "logps": -87.00752258300781, "loss": 114.9803, "objective": 106.17591857910156, "ranking_idealized": 0.6083333492279053, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5708333253860474, "regularize": 0.25146251916885376, "step": 325, "wo_beta": 15.325220108032227 }, { "dpo_loss": 0.5598011016845703, "epoch": 0.9352857817666509, "grad_norm": 14695.63914534257, "learning_rate": 9.768584753741134e-07, "logits": -1.3177284002304077, "logps": -86.90360260009766, "loss": 116.6896, "objective": 123.9805679321289, "ranking_idealized": 0.6166666746139526, "ranking_idealized_expo": 0.5291666388511658, "ranking_simple": 0.5916666388511658, "regularize": 0.2586965262889862, "step": 330, "wo_beta": 16.747480392456055 }, { "dpo_loss": 0.5746142864227295, "epoch": 0.949456778460085, "grad_norm": 14706.814411020761, "learning_rate": 9.753440568131054e-07, "logits": -1.3514246940612793, "logps": -86.81550598144531, "loss": 115.5651, "objective": 113.5698471069336, "ranking_idealized": 0.5874999761581421, "ranking_idealized_expo": 0.5208333134651184, "ranking_simple": 0.5791666507720947, "regularize": 0.25022396445274353, "step": 335, "wo_beta": 15.857748031616211 }, { "dpo_loss": 0.5717839598655701, "epoch": 0.9636277751535192, "grad_norm": 13577.369360499106, "learning_rate": 9.737828933872073e-07, "logits": -1.400834321975708, "logps": -85.29247283935547, "loss": 118.1002, "objective": 108.19886779785156, "ranking_idealized": 0.6041666865348816, "ranking_idealized_expo": 0.5458333492279053, "ranking_simple": 0.5958333611488342, "regularize": 0.24417272210121155, "step": 340, "wo_beta": 16.27320098876953 }, { "dpo_loss": 0.5746095776557922, "epoch": 0.9777987718469532, "grad_norm": 13673.428728913288, "learning_rate": 9.721751386196885e-07, "logits": -1.4508498907089233, "logps": -84.16486358642578, "loss": 110.1951, "objective": 103.0552749633789, "ranking_idealized": 0.6458333134651184, "ranking_idealized_expo": 0.5833333134651184, "ranking_simple": 0.6291666626930237, "regularize": 0.23596970736980438, "step": 345, "wo_beta": 15.449429512023926 }, { "dpo_loss": 0.5632264018058777, "epoch": 0.9919697685403873, "grad_norm": 13613.304013119689, "learning_rate": 9.705209506155634e-07, "logits": -1.3619670867919922, "logps": -86.77315521240234, "loss": 108.5029, "objective": 110.73800659179688, "ranking_idealized": 0.6583333611488342, "ranking_idealized_expo": 0.574999988079071, "ranking_simple": 0.625, "regularize": 0.26065030694007874, "step": 350, "wo_beta": 15.869379997253418 }, { "epoch": 0.9919697685403873, "eval_dpo_loss": 0.678183376789093, "eval_logits": -1.4316504001617432, "eval_logps": -92.18038177490234, "eval_loss": 183.9593048095703, "eval_objective": 180.11509704589844, "eval_ranking_idealized": 0.6024844646453857, "eval_ranking_idealized_expo": 0.5232919454574585, "eval_ranking_simple": 0.5320910811424255, "eval_regularize": 0.40945151448249817, "eval_runtime": 476.2355, "eval_samples_per_second": 12.158, "eval_steps_per_second": 1.014, "eval_wo_beta": 16.336669921875, "step": 350 }, { "dpo_loss": 0.5633755326271057, "epoch": 1.0061407652338215, "grad_norm": 13717.944611215353, "learning_rate": 9.688204920460466e-07, "logits": -1.542311191558838, "logps": -84.23912811279297, "loss": 104.9579, "objective": 99.2624740600586, "ranking_idealized": 0.6083333492279053, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.5791666507720947, "regularize": 0.2348737269639969, "step": 355, "wo_beta": 16.799049377441406 }, { "dpo_loss": 0.5596449971199036, "epoch": 1.0203117619272555, "grad_norm": 15569.178838691118, "learning_rate": 9.670739301325534e-07, "logits": -1.4423024654388428, "logps": -84.60731506347656, "loss": 97.354, "objective": 96.60607147216797, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.5083333253860474, "ranking_simple": 0.5625, "regularize": 0.2134130448102951, "step": 360, "wo_beta": 16.611034393310547 }, { "dpo_loss": 0.5573465824127197, "epoch": 1.0344827586206897, "grad_norm": 14412.61274623368, "learning_rate": 9.652814366302568e-07, "logits": -1.4710925817489624, "logps": -84.47969818115234, "loss": 109.2182, "objective": 110.00160217285156, "ranking_idealized": 0.5833333134651184, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5833333134651184, "regularize": 0.2383657544851303, "step": 365, "wo_beta": 14.846599578857422 }, { "dpo_loss": 0.559634804725647, "epoch": 1.0486537553141237, "grad_norm": 15121.427522934051, "learning_rate": 9.63443187811197e-07, "logits": -1.407724142074585, "logps": -82.60728454589844, "loss": 94.8917, "objective": 93.84876251220703, "ranking_idealized": 0.5958333611488342, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.5666666626930237, "regularize": 0.21665388345718384, "step": 370, "wo_beta": 15.743396759033203 }, { "dpo_loss": 0.5503371357917786, "epoch": 1.0628247520075578, "grad_norm": 14225.520073845873, "learning_rate": 9.61559364446946e-07, "logits": -1.4566776752471924, "logps": -84.27056121826172, "loss": 96.0324, "objective": 91.85355377197266, "ranking_idealized": 0.6083333492279053, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.5791666507720947, "regularize": 0.20962905883789062, "step": 375, "wo_beta": 16.301313400268555 }, { "dpo_loss": 0.5628603100776672, "epoch": 1.076995748700992, "grad_norm": 14108.362094897184, "learning_rate": 9.596301517908328e-07, "logits": -1.4387798309326172, "logps": -86.27851867675781, "loss": 98.7923, "objective": 108.01164245605469, "ranking_idealized": 0.6625000238418579, "ranking_idealized_expo": 0.574999988079071, "ranking_simple": 0.6291666626930237, "regularize": 0.2488705962896347, "step": 380, "wo_beta": 15.773112297058105 }, { "dpo_loss": 0.5771984457969666, "epoch": 1.091166745394426, "grad_norm": 13105.168740611702, "learning_rate": 9.576557395597236e-07, "logits": -1.4021495580673218, "logps": -85.1259536743164, "loss": 99.6716, "objective": 109.83814239501953, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.5583333373069763, "ranking_simple": 0.5791666507720947, "regularize": 0.23721593618392944, "step": 385, "wo_beta": 15.801533699035645 }, { "dpo_loss": 0.5509606599807739, "epoch": 1.10533774208786, "grad_norm": 13663.873020268169, "learning_rate": 9.556363219153662e-07, "logits": -1.3366678953170776, "logps": -86.07147979736328, "loss": 96.1117, "objective": 90.10648345947266, "ranking_idealized": 0.637499988079071, "ranking_idealized_expo": 0.5625, "ranking_simple": 0.6333333253860474, "regularize": 0.2138025164604187, "step": 390, "wo_beta": 16.90329933166504 }, { "dpo_loss": 0.5398973822593689, "epoch": 1.1195087387812943, "grad_norm": 14913.448008058538, "learning_rate": 9.53572097445297e-07, "logits": -1.3910351991653442, "logps": -84.76091766357422, "loss": 99.588, "objective": 102.71925354003906, "ranking_idealized": 0.5791666507720947, "ranking_idealized_expo": 0.5041666626930237, "ranking_simple": 0.5708333253860474, "regularize": 0.21998313069343567, "step": 395, "wo_beta": 14.880850791931152 }, { "dpo_loss": 0.5650266408920288, "epoch": 1.1336797354747283, "grad_norm": 14606.821946811386, "learning_rate": 9.514632691433106e-07, "logits": -1.4497681856155396, "logps": -82.1307373046875, "loss": 104.6813, "objective": 107.99799346923828, "ranking_idealized": 0.6291666626930237, "ranking_idealized_expo": 0.5416666865348816, "ranking_simple": 0.6166666746139526, "regularize": 0.23726312816143036, "step": 400, "wo_beta": 15.854341506958008 }, { "epoch": 1.1336797354747283, "eval_dpo_loss": 0.6800939440727234, "eval_logits": -1.3930206298828125, "eval_logps": -89.72613525390625, "eval_loss": 183.87586975097656, "eval_objective": 180.28396606445312, "eval_ranking_idealized": 0.6024844646453857, "eval_ranking_idealized_expo": 0.5232919454574585, "eval_ranking_simple": 0.5310559272766113, "eval_regularize": 0.40940526127815247, "eval_runtime": 478.3822, "eval_samples_per_second": 12.103, "eval_steps_per_second": 1.01, "eval_wo_beta": 16.22085189819336, "step": 400 }, { "dpo_loss": 0.5639857053756714, "epoch": 1.1478507321681626, "grad_norm": 15414.866076924996, "learning_rate": 9.493100443894984e-07, "logits": -1.416764736175537, "logps": -84.40596771240234, "loss": 97.7792, "objective": 106.99815368652344, "ranking_idealized": 0.5666666626930237, "ranking_idealized_expo": 0.4625000059604645, "ranking_simple": 0.5416666865348816, "regularize": 0.22935108840465546, "step": 405, "wo_beta": 17.16730499267578 }, { "dpo_loss": 0.5612814426422119, "epoch": 1.1620217288615966, "grad_norm": 13730.11308532576, "learning_rate": 9.471126349298556e-07, "logits": -1.4282060861587524, "logps": -84.3336410522461, "loss": 96.1344, "objective": 93.89948272705078, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.5208333134651184, "ranking_simple": 0.5874999761581421, "regularize": 0.20958545804023743, "step": 410, "wo_beta": 16.73971939086914 }, { "dpo_loss": 0.5569156408309937, "epoch": 1.1761927255550306, "grad_norm": 11975.058144386021, "learning_rate": 9.448712568554571e-07, "logits": -1.3549463748931885, "logps": -83.00645446777344, "loss": 93.1875, "objective": 96.11307525634766, "ranking_idealized": 0.612500011920929, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.5833333134651184, "regularize": 0.22849011421203613, "step": 415, "wo_beta": 16.471454620361328 }, { "dpo_loss": 0.5578625202178955, "epoch": 1.1903637222484649, "grad_norm": 13553.103377125492, "learning_rate": 9.425861305812081e-07, "logits": -1.3200798034667969, "logps": -84.18423461914062, "loss": 99.8958, "objective": 90.86384582519531, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.4958333373069763, "ranking_simple": 0.5666666626930237, "regularize": 0.2039288878440857, "step": 420, "wo_beta": 16.64999008178711 }, { "dpo_loss": 0.5598068237304688, "epoch": 1.204534718941899, "grad_norm": 13382.98806426423, "learning_rate": 9.40257480824169e-07, "logits": -1.368670105934143, "logps": -82.51498413085938, "loss": 95.7898, "objective": 98.82903289794922, "ranking_idealized": 0.5958333611488342, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.5791666507720947, "regularize": 0.21656714379787445, "step": 425, "wo_beta": 15.240234375 }, { "dpo_loss": 0.5631528496742249, "epoch": 1.2187057156353331, "grad_norm": 13379.590249575365, "learning_rate": 9.378855365814557e-07, "logits": -1.3373157978057861, "logps": -84.1694107055664, "loss": 89.0871, "objective": 83.64144897460938, "ranking_idealized": 0.5625, "ranking_idealized_expo": 0.4958333373069763, "ranking_simple": 0.5708333253860474, "regularize": 0.19078685343265533, "step": 430, "wo_beta": 16.387685775756836 }, { "dpo_loss": 0.5549448132514954, "epoch": 1.2328767123287672, "grad_norm": 13813.435024161312, "learning_rate": 9.354705311077218e-07, "logits": -1.287793755531311, "logps": -83.4052963256836, "loss": 93.9205, "objective": 94.07813262939453, "ranking_idealized": 0.6458333134651184, "ranking_idealized_expo": 0.5958333611488342, "ranking_simple": 0.637499988079071, "regularize": 0.21654988825321198, "step": 435, "wo_beta": 17.72869110107422 }, { "dpo_loss": 0.5550996661186218, "epoch": 1.2470477090222012, "grad_norm": 15408.139135942378, "learning_rate": 9.330127018922193e-07, "logits": -1.302925705909729, "logps": -83.39546203613281, "loss": 87.7477, "objective": 81.88467407226562, "ranking_idealized": 0.612500011920929, "ranking_idealized_expo": 0.5541666746139526, "ranking_simple": 0.612500011920929, "regularize": 0.18918146193027496, "step": 440, "wo_beta": 15.06383991241455 }, { "dpo_loss": 0.544273316860199, "epoch": 1.2612187057156352, "grad_norm": 13358.127194753248, "learning_rate": 9.305122906354448e-07, "logits": -1.3234721422195435, "logps": -85.1892318725586, "loss": 91.347, "objective": 87.14881896972656, "ranking_idealized": 0.637499988079071, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.6208333373069763, "regularize": 0.2082992047071457, "step": 445, "wo_beta": 17.48933219909668 }, { "dpo_loss": 0.5497770309448242, "epoch": 1.2753897024090695, "grad_norm": 13860.879601223209, "learning_rate": 9.279695432253708e-07, "logits": -1.4758702516555786, "logps": -84.91988372802734, "loss": 90.5585, "objective": 87.8936996459961, "ranking_idealized": 0.5541666746139526, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5666666626930237, "regularize": 0.2043653279542923, "step": 450, "wo_beta": 15.034831047058105 }, { "epoch": 1.2753897024090695, "eval_dpo_loss": 0.6794779300689697, "eval_logits": -1.3663489818572998, "eval_logps": -91.20365905761719, "eval_loss": 184.06732177734375, "eval_objective": 180.62957763671875, "eval_ranking_idealized": 0.6024844646453857, "eval_ranking_idealized_expo": 0.5232919454574585, "eval_ranking_simple": 0.5357142686843872, "eval_regularize": 0.41047051548957825, "eval_runtime": 479.5938, "eval_samples_per_second": 12.073, "eval_steps_per_second": 1.007, "eval_wo_beta": 16.288923263549805, "step": 450 }, { "dpo_loss": 0.5493736267089844, "epoch": 1.2895606991025035, "grad_norm": 12737.57571248245, "learning_rate": 9.253847097132655e-07, "logits": -1.2778384685516357, "logps": -85.39282989501953, "loss": 90.8388, "objective": 97.43504333496094, "ranking_idealized": 0.5458333492279053, "ranking_idealized_expo": 0.49166667461395264, "ranking_simple": 0.5375000238418579, "regularize": 0.21359196305274963, "step": 455, "wo_beta": 15.552309036254883 }, { "dpo_loss": 0.5543228983879089, "epoch": 1.3037316957959377, "grad_norm": 14070.394055394958, "learning_rate": 9.227580442891021e-07, "logits": -1.3934885263442993, "logps": -84.22640991210938, "loss": 89.7715, "objective": 87.21723175048828, "ranking_idealized": 0.5583333373069763, "ranking_idealized_expo": 0.4749999940395355, "ranking_simple": 0.5333333611488342, "regularize": 0.1974954754114151, "step": 460, "wo_beta": 16.378904342651367 }, { "dpo_loss": 0.5476227402687073, "epoch": 1.3179026924893718, "grad_norm": 11947.40976577932, "learning_rate": 9.200898052565637e-07, "logits": -1.3618992567062378, "logps": -82.62676239013672, "loss": 89.4031, "objective": 95.53166961669922, "ranking_idealized": 0.5541666746139526, "ranking_idealized_expo": 0.4749999940395355, "ranking_simple": 0.5416666865348816, "regularize": 0.2165236622095108, "step": 465, "wo_beta": 14.700522422790527 }, { "dpo_loss": 0.5633279085159302, "epoch": 1.3320736891828058, "grad_norm": 14427.62714295139, "learning_rate": 9.173802550076401e-07, "logits": -1.4394139051437378, "logps": -81.61421203613281, "loss": 96.3098, "objective": 105.95228576660156, "ranking_idealized": 0.612500011920929, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5833333134651184, "regularize": 0.22128254175186157, "step": 470, "wo_beta": 14.451654434204102 }, { "dpo_loss": 0.5512283444404602, "epoch": 1.34624468587624, "grad_norm": 15510.676068153169, "learning_rate": 9.146296599968258e-07, "logits": -1.334899663925171, "logps": -84.10041809082031, "loss": 85.2643, "objective": 97.66104125976562, "ranking_idealized": 0.6416666507720947, "ranking_idealized_expo": 0.574999988079071, "ranking_simple": 0.6208333373069763, "regularize": 0.22577306628227234, "step": 475, "wo_beta": 16.777812957763672 }, { "dpo_loss": 0.5490090847015381, "epoch": 1.360415682569674, "grad_norm": 13039.859969979723, "learning_rate": 9.118382907149163e-07, "logits": -1.396318793296814, "logps": -84.05583953857422, "loss": 92.9048, "objective": 106.32127380371094, "ranking_idealized": 0.5666666626930237, "ranking_idealized_expo": 0.4791666567325592, "ranking_simple": 0.5291666388511658, "regularize": 0.22388581931591034, "step": 480, "wo_beta": 18.35649871826172 }, { "dpo_loss": 0.55390864610672, "epoch": 1.3745866792631083, "grad_norm": 14303.492597277622, "learning_rate": 9.090064216624092e-07, "logits": -1.3780549764633179, "logps": -81.48451232910156, "loss": 89.9123, "objective": 85.18955993652344, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.4958333373069763, "ranking_simple": 0.5666666626930237, "regularize": 0.19940294325351715, "step": 485, "wo_beta": 16.11130714416504 }, { "dpo_loss": 0.5646805167198181, "epoch": 1.3887576759565423, "grad_norm": 13569.748240897005, "learning_rate": 9.061343313225087e-07, "logits": -1.3297451734542847, "logps": -84.58447265625, "loss": 91.7915, "objective": 92.44489288330078, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.5708333253860474, "regularize": 0.20839503407478333, "step": 490, "wo_beta": 15.887747764587402 }, { "dpo_loss": 0.5439994931221008, "epoch": 1.4029286726499763, "grad_norm": 14224.725006990095, "learning_rate": 9.032223021337413e-07, "logits": -1.3493283987045288, "logps": -84.9798355102539, "loss": 89.0675, "objective": 84.06017303466797, "ranking_idealized": 0.5583333373069763, "ranking_idealized_expo": 0.4791666567325592, "ranking_simple": 0.550000011920929, "regularize": 0.1910681426525116, "step": 495, "wo_beta": 15.590251922607422 }, { "dpo_loss": 0.539610743522644, "epoch": 1.4170996693434104, "grad_norm": 14123.937473491551, "learning_rate": 9.002706204621802e-07, "logits": -1.278394341468811, "logps": -83.08454132080078, "loss": 91.2372, "objective": 89.69623565673828, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.4833333194255829, "ranking_simple": 0.574999988079071, "regularize": 0.20472820103168488, "step": 500, "wo_beta": 15.177144050598145 }, { "epoch": 1.4170996693434104, "eval_dpo_loss": 0.6782248020172119, "eval_logits": -1.3281084299087524, "eval_logps": -89.4298095703125, "eval_loss": 185.71939086914062, "eval_objective": 180.8789520263672, "eval_ranking_idealized": 0.6024844646453857, "eval_ranking_idealized_expo": 0.5232919454574585, "eval_ranking_simple": 0.534679114818573, "eval_regularize": 0.41098901629447937, "eval_runtime": 475.7419, "eval_samples_per_second": 12.17, "eval_steps_per_second": 1.015, "eval_wo_beta": 16.044300079345703, "step": 500 }, { "dpo_loss": 0.5400077700614929, "epoch": 1.4312706660368446, "grad_norm": 13097.852775439285, "learning_rate": 8.972795765732846e-07, "logits": -1.3413732051849365, "logps": -82.83694458007812, "loss": 96.4414, "objective": 99.75823211669922, "ranking_idealized": 0.625, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.5874999761581421, "regularize": 0.20853710174560547, "step": 505, "wo_beta": 16.662084579467773 }, { "dpo_loss": 0.5544862151145935, "epoch": 1.4454416627302786, "grad_norm": 13707.829813480788, "learning_rate": 8.942494646033554e-07, "logits": -1.3700981140136719, "logps": -84.05197143554688, "loss": 86.1912, "objective": 85.1514663696289, "ranking_idealized": 0.5874999761581421, "ranking_idealized_expo": 0.5208333134651184, "ranking_simple": 0.5833333134651184, "regularize": 0.18519388139247894, "step": 510, "wo_beta": 15.12604808807373 }, { "dpo_loss": 0.5474262833595276, "epoch": 1.4596126594237129, "grad_norm": 16384.18532468762, "learning_rate": 8.911805825306096e-07, "logits": -1.4208234548568726, "logps": -85.2526626586914, "loss": 86.2928, "objective": 94.92349243164062, "ranking_idealized": 0.550000011920929, "ranking_idealized_expo": 0.4625000059604645, "ranking_simple": 0.5249999761581421, "regularize": 0.21247318387031555, "step": 515, "wo_beta": 16.363656997680664 }, { "dpo_loss": 0.5538729429244995, "epoch": 1.473783656117147, "grad_norm": 14068.735921521182, "learning_rate": 8.880732321458784e-07, "logits": -1.3451961278915405, "logps": -81.92323303222656, "loss": 85.1002, "objective": 84.61219024658203, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.49166667461395264, "ranking_simple": 0.5416666865348816, "regularize": 0.19058094918727875, "step": 520, "wo_beta": 16.039138793945312 }, { "dpo_loss": 0.5211088061332703, "epoch": 1.487954652810581, "grad_norm": 12355.643543079665, "learning_rate": 8.849277190229283e-07, "logits": -1.2561639547348022, "logps": -80.8559341430664, "loss": 87.4323, "objective": 88.15239715576172, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.5, "ranking_simple": 0.6000000238418579, "regularize": 0.19789734482765198, "step": 525, "wo_beta": 15.64743423461914 }, { "dpo_loss": 0.5529366731643677, "epoch": 1.5021256495040152, "grad_norm": 13792.295805387279, "learning_rate": 8.817443524884117e-07, "logits": -1.4202781915664673, "logps": -84.57428741455078, "loss": 89.1332, "objective": 93.31535339355469, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5708333253860474, "regularize": 0.20485611259937286, "step": 530, "wo_beta": 15.418906211853027 }, { "dpo_loss": 0.5380304455757141, "epoch": 1.5162966461974492, "grad_norm": 12748.671458728879, "learning_rate": 8.785234455914488e-07, "logits": -1.4013686180114746, "logps": -83.34593963623047, "loss": 86.3246, "objective": 83.55619812011719, "ranking_idealized": 0.5833333134651184, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.5791666507720947, "regularize": 0.19464156031608582, "step": 535, "wo_beta": 15.718771934509277 }, { "dpo_loss": 0.5602856874465942, "epoch": 1.5304676428908834, "grad_norm": 13600.712518077142, "learning_rate": 8.752653150728411e-07, "logits": -1.3116246461868286, "logps": -83.8393783569336, "loss": 85.7548, "objective": 85.53334045410156, "ranking_idealized": 0.6291666626930237, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.5874999761581421, "regularize": 0.19387957453727722, "step": 540, "wo_beta": 15.35750961303711 }, { "dpo_loss": 0.5608557462692261, "epoch": 1.5446386395843175, "grad_norm": 13202.179692261727, "learning_rate": 8.719702813339247e-07, "logits": -1.4217339754104614, "logps": -85.13090515136719, "loss": 78.3728, "objective": 73.64112854003906, "ranking_idealized": 0.6583333611488342, "ranking_idealized_expo": 0.5874999761581421, "ranking_simple": 0.6499999761581421, "regularize": 0.17463207244873047, "step": 545, "wo_beta": 14.742905616760254 }, { "dpo_loss": 0.5433780550956726, "epoch": 1.5588096362777515, "grad_norm": 13773.885858068237, "learning_rate": 8.68638668405062e-07, "logits": -1.4105440378189087, "logps": -85.73950958251953, "loss": 85.7307, "objective": 91.58641815185547, "ranking_idealized": 0.5625, "ranking_idealized_expo": 0.46666666865348816, "ranking_simple": 0.5291666388511658, "regularize": 0.20735137164592743, "step": 550, "wo_beta": 15.781331062316895 }, { "epoch": 1.5588096362777515, "eval_dpo_loss": 0.6799347400665283, "eval_logits": -1.36829674243927, "eval_logps": -91.68656921386719, "eval_loss": 186.22413635253906, "eval_objective": 182.13821411132812, "eval_ranking_idealized": 0.6024844646453857, "eval_ranking_idealized_expo": 0.5232919454574585, "eval_ranking_simple": 0.533643901348114, "eval_regularize": 0.4147377014160156, "eval_runtime": 478.899, "eval_samples_per_second": 12.09, "eval_steps_per_second": 1.009, "eval_wo_beta": 16.186290740966797, "step": 550 }, { "dpo_loss": 0.5562130212783813, "epoch": 1.5729806329711855, "grad_norm": 13716.988937741002, "learning_rate": 8.652708039137766e-07, "logits": -1.2273495197296143, "logps": -85.2579116821289, "loss": 90.1931, "objective": 91.27943420410156, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.5208333134651184, "ranking_simple": 0.5833333134651184, "regularize": 0.19078856706619263, "step": 555, "wo_beta": 14.82008171081543 }, { "dpo_loss": 0.5405426621437073, "epoch": 1.5871516296646198, "grad_norm": 13222.290128913079, "learning_rate": 8.61867019052535e-07, "logits": -1.3004463911056519, "logps": -84.03120422363281, "loss": 82.5026, "objective": 82.23470306396484, "ranking_idealized": 0.6083333492279053, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.574999988079071, "regularize": 0.19372278451919556, "step": 560, "wo_beta": 16.210308074951172 }, { "dpo_loss": 0.545985221862793, "epoch": 1.601322626358054, "grad_norm": 13798.95251346989, "learning_rate": 8.584276485461775e-07, "logits": -1.2903294563293457, "logps": -85.43083190917969, "loss": 87.1773, "objective": 87.97190856933594, "ranking_idealized": 0.5458333492279053, "ranking_idealized_expo": 0.44999998807907104, "ranking_simple": 0.5458333492279053, "regularize": 0.18329960107803345, "step": 565, "wo_beta": 15.259784698486328 }, { "dpo_loss": 0.5544782280921936, "epoch": 1.615493623051488, "grad_norm": 14310.342902213652, "learning_rate": 8.549530306190014e-07, "logits": -1.4501588344573975, "logps": -85.62173461914062, "loss": 86.1569, "objective": 88.04158020019531, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.5708333253860474, "regularize": 0.19149872660636902, "step": 570, "wo_beta": 15.673080444335938 }, { "dpo_loss": 0.5482128858566284, "epoch": 1.629664619744922, "grad_norm": 13673.298787796572, "learning_rate": 8.514435069615004e-07, "logits": -1.380743384361267, "logps": -83.26321411132812, "loss": 78.7831, "objective": 86.95629119873047, "ranking_idealized": 0.637499988079071, "ranking_idealized_expo": 0.5583333373069763, "ranking_simple": 0.6166666746139526, "regularize": 0.1899789720773697, "step": 575, "wo_beta": 16.415205001831055 }, { "dpo_loss": 0.5394086241722107, "epoch": 1.643835616438356, "grad_norm": 13082.53312626321, "learning_rate": 8.478994226967638e-07, "logits": -1.4001491069793701, "logps": -83.10562133789062, "loss": 76.6065, "objective": 77.20848846435547, "ranking_idealized": 0.6083333492279053, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.6000000238418579, "regularize": 0.1699313372373581, "step": 580, "wo_beta": 14.931032180786133 }, { "dpo_loss": 0.5450774431228638, "epoch": 1.6580066131317903, "grad_norm": 14803.266258769623, "learning_rate": 8.443211263465362e-07, "logits": -1.2514622211456299, "logps": -82.91756439208984, "loss": 81.1936, "objective": 78.58777618408203, "ranking_idealized": 0.5333333611488342, "ranking_idealized_expo": 0.4625000059604645, "ranking_simple": 0.5291666388511658, "regularize": 0.18020884692668915, "step": 585, "wo_beta": 16.229631423950195 }, { "dpo_loss": 0.5452067852020264, "epoch": 1.6721776098252243, "grad_norm": 14897.05549715986, "learning_rate": 8.407089697969456e-07, "logits": -1.310152530670166, "logps": -82.58568572998047, "loss": 84.6601, "objective": 89.34095764160156, "ranking_idealized": 0.6208333373069763, "ranking_idealized_expo": 0.5083333253860474, "ranking_simple": 0.5874999761581421, "regularize": 0.18940496444702148, "step": 590, "wo_beta": 13.480273246765137 }, { "dpo_loss": 0.5498862862586975, "epoch": 1.6863486065186586, "grad_norm": 13388.885538994262, "learning_rate": 8.370633082638975e-07, "logits": -1.2777602672576904, "logps": -84.25193786621094, "loss": 85.2501, "objective": 97.64402770996094, "ranking_idealized": 0.6166666746139526, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.5791666507720947, "regularize": 0.21810217201709747, "step": 595, "wo_beta": 17.075584411621094 }, { "dpo_loss": 0.541688084602356, "epoch": 1.7005196032120926, "grad_norm": 12810.439354567186, "learning_rate": 8.333845002581458e-07, "logits": -1.3377609252929688, "logps": -85.63569641113281, "loss": 79.9458, "objective": 90.4583740234375, "ranking_idealized": 0.6333333253860474, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.612500011920929, "regularize": 0.2045913189649582, "step": 600, "wo_beta": 16.088045120239258 }, { "epoch": 1.7005196032120926, "eval_dpo_loss": 0.6794285774230957, "eval_logits": -1.3519084453582764, "eval_logps": -91.084716796875, "eval_loss": 186.21368408203125, "eval_objective": 181.86863708496094, "eval_ranking_idealized": 0.6024844646453857, "eval_ranking_idealized_expo": 0.5232919454574585, "eval_ranking_simple": 0.5372670888900757, "eval_regularize": 0.4135282337665558, "eval_runtime": 449.0944, "eval_samples_per_second": 12.893, "eval_steps_per_second": 1.075, "eval_wo_beta": 16.10601043701172, "step": 600 }, { "dpo_loss": 0.5528364777565002, "epoch": 1.7146905999055266, "grad_norm": 12864.49342558613, "learning_rate": 8.296729075500343e-07, "logits": -1.2839235067367554, "logps": -85.77102661132812, "loss": 81.7288, "objective": 90.60871124267578, "ranking_idealized": 0.5833333134651184, "ranking_idealized_expo": 0.5291666388511658, "ranking_simple": 0.574999988079071, "regularize": 0.2010929137468338, "step": 605, "wo_beta": 18.23944091796875 }, { "dpo_loss": 0.5559037327766418, "epoch": 1.7288615965989607, "grad_norm": 14171.44704590598, "learning_rate": 8.259288951339232e-07, "logits": -1.3577406406402588, "logps": -83.76995086669922, "loss": 81.4701, "objective": 75.51998138427734, "ranking_idealized": 0.6291666626930237, "ranking_idealized_expo": 0.5708333253860474, "ranking_simple": 0.6291666626930237, "regularize": 0.17047205567359924, "step": 610, "wo_beta": 16.163959503173828 }, { "dpo_loss": 0.5623223185539246, "epoch": 1.743032593292395, "grad_norm": 14064.695817652162, "learning_rate": 8.221528311922941e-07, "logits": -1.3709431886672974, "logps": -83.62710571289062, "loss": 84.5652, "objective": 85.32384490966797, "ranking_idealized": 0.5833333134651184, "ranking_idealized_expo": 0.5291666388511658, "ranking_simple": 0.5791666507720947, "regularize": 0.19118142127990723, "step": 615, "wo_beta": 15.722049713134766 }, { "dpo_loss": 0.5426214933395386, "epoch": 1.7572035899858292, "grad_norm": 13161.981948520664, "learning_rate": 8.183450870595441e-07, "logits": -1.3993924856185913, "logps": -83.90966796875, "loss": 81.5518, "objective": 84.29554748535156, "ranking_idealized": 0.6208333373069763, "ranking_idealized_expo": 0.5416666865348816, "ranking_simple": 0.6000000238418579, "regularize": 0.19568467140197754, "step": 620, "wo_beta": 16.582374572753906 }, { "dpo_loss": 0.5548843145370483, "epoch": 1.7713745866792632, "grad_norm": 13578.593083281268, "learning_rate": 8.145060371854691e-07, "logits": -1.3166680335998535, "logps": -83.37279510498047, "loss": 77.6344, "objective": 80.62175750732422, "ranking_idealized": 0.6083333492279053, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.5916666388511658, "regularize": 0.17566385865211487, "step": 625, "wo_beta": 15.19571304321289 }, { "dpo_loss": 0.548730194568634, "epoch": 1.7855455833726972, "grad_norm": 12867.261945978005, "learning_rate": 8.106360590984404e-07, "logits": -1.3329386711120605, "logps": -85.60625457763672, "loss": 75.8762, "objective": 75.14217376708984, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.49166667461395264, "ranking_simple": 0.5583333373069763, "regularize": 0.17412720620632172, "step": 630, "wo_beta": 16.33298110961914 }, { "dpo_loss": 0.5529462695121765, "epoch": 1.7997165800661312, "grad_norm": 12432.106461076137, "learning_rate": 8.067355333682797e-07, "logits": -1.4188921451568604, "logps": -84.8874282836914, "loss": 78.6516, "objective": 76.64624786376953, "ranking_idealized": 0.637499988079071, "ranking_idealized_expo": 0.5291666388511658, "ranking_simple": 0.5916666388511658, "regularize": 0.17813840508460999, "step": 635, "wo_beta": 16.95586395263672 }, { "dpo_loss": 0.5410430431365967, "epoch": 1.8138875767595655, "grad_norm": 12324.183379735212, "learning_rate": 8.028048435688333e-07, "logits": -1.3641606569290161, "logps": -85.47127532958984, "loss": 78.7118, "objective": 82.21182250976562, "ranking_idealized": 0.5666666626930237, "ranking_idealized_expo": 0.5083333253860474, "ranking_simple": 0.5666666626930237, "regularize": 0.18489192426204681, "step": 640, "wo_beta": 17.22258186340332 }, { "dpo_loss": 0.5470555424690247, "epoch": 1.8280585734529995, "grad_norm": 13971.672253595729, "learning_rate": 7.988443762402523e-07, "logits": -1.4050637483596802, "logps": -85.07406616210938, "loss": 78.6084, "objective": 74.21890258789062, "ranking_idealized": 0.6416666507720947, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.6291666626930237, "regularize": 0.16714391112327576, "step": 645, "wo_beta": 16.80624008178711 }, { "dpo_loss": 0.5424051880836487, "epoch": 1.8422295701464337, "grad_norm": 15285.601428700493, "learning_rate": 7.948545208509811e-07, "logits": -1.440900444984436, "logps": -84.5870590209961, "loss": 86.7578, "objective": 89.12664031982422, "ranking_idealized": 0.612500011920929, "ranking_idealized_expo": 0.5208333134651184, "ranking_simple": 0.6000000238418579, "regularize": 0.18858183920383453, "step": 650, "wo_beta": 15.562705039978027 }, { "epoch": 1.8422295701464337, "eval_dpo_loss": 0.6796835660934448, "eval_logits": -1.3402661085128784, "eval_logps": -89.40703582763672, "eval_loss": 186.7196044921875, "eval_objective": 182.49703979492188, "eval_ranking_idealized": 0.6024844646453857, "eval_ranking_idealized_expo": 0.5232919454574585, "eval_ranking_simple": 0.531573474407196, "eval_regularize": 0.4141009747982025, "eval_runtime": 450.1436, "eval_samples_per_second": 12.863, "eval_steps_per_second": 1.073, "eval_wo_beta": 16.0269832611084, "step": 650 }, { "dpo_loss": 0.5390594601631165, "epoch": 1.8564005668398678, "grad_norm": 14945.717954531257, "learning_rate": 7.90835669759456e-07, "logits": -1.292981505393982, "logps": -81.8280029296875, "loss": 79.8064, "objective": 77.88701629638672, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.48750001192092896, "ranking_simple": 0.6000000238418579, "regularize": 0.18046139180660248, "step": 655, "wo_beta": 15.520308494567871 }, { "dpo_loss": 0.5524376034736633, "epoch": 1.8705715635333018, "grad_norm": 12956.308969791295, "learning_rate": 7.86788218175523e-07, "logits": -1.3386873006820679, "logps": -84.97721862792969, "loss": 77.9731, "objective": 77.8855972290039, "ranking_idealized": 0.6041666865348816, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.6041666865348816, "regularize": 0.17455393075942993, "step": 660, "wo_beta": 17.077417373657227 }, { "dpo_loss": 0.562981903553009, "epoch": 1.8847425602267358, "grad_norm": 12832.376229580192, "learning_rate": 7.827125641215718e-07, "logits": -1.334754228591919, "logps": -83.5533447265625, "loss": 82.4367, "objective": 85.92207336425781, "ranking_idealized": 0.6333333253860474, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.625, "regularize": 0.1833416372537613, "step": 665, "wo_beta": 15.230433464050293 }, { "dpo_loss": 0.548839807510376, "epoch": 1.89891355692017, "grad_norm": 13460.183191194346, "learning_rate": 7.786091083933949e-07, "logits": -1.273821473121643, "logps": -81.98705291748047, "loss": 71.3613, "objective": 68.62953186035156, "ranking_idealized": 0.5666666626930237, "ranking_idealized_expo": 0.4791666567325592, "ranking_simple": 0.5375000238418579, "regularize": 0.16619008779525757, "step": 670, "wo_beta": 16.408151626586914 }, { "dpo_loss": 0.5611833930015564, "epoch": 1.9130845536136043, "grad_norm": 12953.446893922981, "learning_rate": 7.744782545207744e-07, "logits": -1.2947652339935303, "logps": -83.05793762207031, "loss": 71.3196, "objective": 74.63235473632812, "ranking_idealized": 0.6166666746139526, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.6083333492279053, "regularize": 0.16350051760673523, "step": 675, "wo_beta": 15.741961479187012 }, { "dpo_loss": 0.5451231598854065, "epoch": 1.9272555503070383, "grad_norm": 13412.02601484903, "learning_rate": 7.703204087277988e-07, "logits": -1.3697810173034668, "logps": -85.1467056274414, "loss": 71.5185, "objective": 70.06403350830078, "ranking_idealized": 0.6166666746139526, "ranking_idealized_expo": 0.5, "ranking_simple": 0.6000000238418579, "regularize": 0.16510257124900818, "step": 680, "wo_beta": 15.431069374084473 }, { "dpo_loss": 0.5437536835670471, "epoch": 1.9414265470004723, "grad_norm": 13070.654673150682, "learning_rate": 7.661359798929152e-07, "logits": -1.2984110116958618, "logps": -82.4813003540039, "loss": 72.6279, "objective": 63.83388137817383, "ranking_idealized": 0.5291666388511658, "ranking_idealized_expo": 0.4791666567325592, "ranking_simple": 0.5333333611488342, "regularize": 0.1474105566740036, "step": 685, "wo_beta": 15.765579223632812 }, { "dpo_loss": 0.5520148873329163, "epoch": 1.9555975436939064, "grad_norm": 13585.612422979371, "learning_rate": 7.619253795087208e-07, "logits": -1.3621736764907837, "logps": -83.20579528808594, "loss": 70.4149, "objective": 71.44465637207031, "ranking_idealized": 0.6041666865348816, "ranking_idealized_expo": 0.5583333373069763, "ranking_simple": 0.5916666388511658, "regularize": 0.15733769536018372, "step": 690, "wo_beta": 16.008506774902344 }, { "dpo_loss": 0.5521395802497864, "epoch": 1.9697685403873406, "grad_norm": 12626.830880791873, "learning_rate": 7.576890216414972e-07, "logits": -1.2345752716064453, "logps": -84.00497436523438, "loss": 69.938, "objective": 70.55232238769531, "ranking_idealized": 0.5666666626930237, "ranking_idealized_expo": 0.4833333194255829, "ranking_simple": 0.5541666746139526, "regularize": 0.15369382500648499, "step": 695, "wo_beta": 16.505474090576172 }, { "dpo_loss": 0.5477771759033203, "epoch": 1.9839395370807746, "grad_norm": 14507.10563022748, "learning_rate": 7.534273228904915e-07, "logits": -1.2208502292633057, "logps": -84.28005981445312, "loss": 76.2665, "objective": 85.08452606201172, "ranking_idealized": 0.6041666865348816, "ranking_idealized_expo": 0.5208333134651184, "ranking_simple": 0.574999988079071, "regularize": 0.1893630176782608, "step": 700, "wo_beta": 15.212244987487793 }, { "epoch": 1.9839395370807746, "eval_dpo_loss": 0.6800020337104797, "eval_logits": -1.3222942352294922, "eval_logps": -89.5856704711914, "eval_loss": 186.28018188476562, "eval_objective": 182.39332580566406, "eval_ranking_idealized": 0.6024844646453857, "eval_ranking_idealized_expo": 0.5232919454574585, "eval_ranking_simple": 0.5310559272766113, "eval_regularize": 0.4136333167552948, "eval_runtime": 489.8617, "eval_samples_per_second": 11.82, "eval_steps_per_second": 0.986, "eval_wo_beta": 16.111663818359375, "step": 700 }, { "dpo_loss": 0.5436014533042908, "epoch": 1.9981105337742089, "grad_norm": 14761.110739737924, "learning_rate": 7.49140702346948e-07, "logits": -1.1587742567062378, "logps": -83.4106216430664, "loss": 71.0478, "objective": 77.40288543701172, "ranking_idealized": 0.5916666388511658, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5541666746139526, "regularize": 0.1687079817056656, "step": 705, "wo_beta": 17.46946907043457 }, { "dpo_loss": 0.5400715470314026, "epoch": 2.012281530467643, "grad_norm": 13854.290443619322, "learning_rate": 7.448295815528956e-07, "logits": -1.3091672658920288, "logps": -83.20928192138672, "loss": 68.6235, "objective": 74.59980773925781, "ranking_idealized": 0.6083333492279053, "ranking_idealized_expo": 0.5208333134651184, "ranking_simple": 0.6000000238418579, "regularize": 0.15744589269161224, "step": 710, "wo_beta": 16.282772064208984 }, { "dpo_loss": 0.5266523957252502, "epoch": 2.026452527161077, "grad_norm": 12772.47402835887, "learning_rate": 7.404943844596938e-07, "logits": -1.3287214040756226, "logps": -82.50818634033203, "loss": 67.4219, "objective": 67.50071716308594, "ranking_idealized": 0.6791666746139526, "ranking_idealized_expo": 0.5708333253860474, "ranking_simple": 0.6625000238418579, "regularize": 0.15344351530075073, "step": 715, "wo_beta": 15.63277816772461 }, { "dpo_loss": 0.5342952013015747, "epoch": 2.040623523854511, "grad_norm": 12280.29550374932, "learning_rate": 7.361355373863413e-07, "logits": -1.3206126689910889, "logps": -83.4239273071289, "loss": 65.7671, "objective": 62.988590240478516, "ranking_idealized": 0.6291666626930237, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.6041666865348816, "regularize": 0.13976921141147614, "step": 720, "wo_beta": 16.120634078979492 }, { "dpo_loss": 0.5416182279586792, "epoch": 2.0547945205479454, "grad_norm": 11934.95995024634, "learning_rate": 7.317534689775527e-07, "logits": -1.329419732093811, "logps": -86.18152618408203, "loss": 73.1378, "objective": 77.66006469726562, "ranking_idealized": 0.5833333134651184, "ranking_idealized_expo": 0.4958333373069763, "ranking_simple": 0.574999988079071, "regularize": 0.1658337563276291, "step": 725, "wo_beta": 14.640992164611816 }, { "dpo_loss": 0.5336278080940247, "epoch": 2.0689655172413794, "grad_norm": 13017.829141332633, "learning_rate": 7.273486101616056e-07, "logits": -1.4032765626907349, "logps": -83.5689468383789, "loss": 73.2891, "objective": 73.26839447021484, "ranking_idealized": 0.5791666507720947, "ranking_idealized_expo": 0.49166667461395264, "ranking_simple": 0.5874999761581421, "regularize": 0.15773232281208038, "step": 730, "wo_beta": 15.76942253112793 }, { "dpo_loss": 0.5291448831558228, "epoch": 2.0831365139348135, "grad_norm": 13426.194750558408, "learning_rate": 7.229213941079639e-07, "logits": -1.3250770568847656, "logps": -82.74713897705078, "loss": 59.2054, "objective": 57.16627883911133, "ranking_idealized": 0.5625, "ranking_idealized_expo": 0.47083333134651184, "ranking_simple": 0.550000011920929, "regularize": 0.14032262563705444, "step": 735, "wo_beta": 17.056970596313477 }, { "dpo_loss": 0.5465752482414246, "epoch": 2.0973075106282475, "grad_norm": 11906.26841829341, "learning_rate": 7.184722561846797e-07, "logits": -1.3804094791412354, "logps": -82.77980041503906, "loss": 62.2469, "objective": 65.71126556396484, "ranking_idealized": 0.6166666746139526, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.6083333492279053, "regularize": 0.14654967188835144, "step": 740, "wo_beta": 15.721449851989746 }, { "dpo_loss": 0.5360319018363953, "epoch": 2.1114785073216815, "grad_norm": 13337.057180758171, "learning_rate": 7.14001633915581e-07, "logits": -1.313341498374939, "logps": -83.15229797363281, "loss": 60.0244, "objective": 60.3892822265625, "ranking_idealized": 0.6333333253860474, "ranking_idealized_expo": 0.5583333373069763, "ranking_simple": 0.625, "regularize": 0.13975684344768524, "step": 745, "wo_beta": 15.697921752929688 }, { "dpo_loss": 0.5399072170257568, "epoch": 2.1256495040151155, "grad_norm": 13331.418550163386, "learning_rate": 7.095099669372443e-07, "logits": -1.3453633785247803, "logps": -82.3453369140625, "loss": 65.1575, "objective": 60.51906967163086, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.4833333194255829, "ranking_simple": 0.5666666626930237, "regularize": 0.1316269487142563, "step": 750, "wo_beta": 15.831055641174316 }, { "epoch": 2.1256495040151155, "eval_dpo_loss": 0.6806153059005737, "eval_logits": -1.3253074884414673, "eval_logps": -90.24537658691406, "eval_loss": 188.15711975097656, "eval_objective": 184.20758056640625, "eval_ranking_idealized": 0.6024844646453857, "eval_ranking_idealized_expo": 0.5232919454574585, "eval_ranking_simple": 0.5320910811424255, "eval_regularize": 0.4179456830024719, "eval_runtime": 478.2913, "eval_samples_per_second": 12.106, "eval_steps_per_second": 1.01, "eval_wo_beta": 15.917864799499512, "step": 750 }, { "dpo_loss": 0.5414224863052368, "epoch": 2.13982050070855, "grad_norm": 14942.893679399409, "learning_rate": 7.049976969557623e-07, "logits": -1.3125241994857788, "logps": -85.55477905273438, "loss": 70.5458, "objective": 72.25684356689453, "ranking_idealized": 0.5916666388511658, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.5874999761581421, "regularize": 0.15286041796207428, "step": 755, "wo_beta": 16.596240997314453 }, { "dpo_loss": 0.5502544641494751, "epoch": 2.153991497401984, "grad_norm": 14884.220119069658, "learning_rate": 7.004652677033068e-07, "logits": -1.2573704719543457, "logps": -81.78999328613281, "loss": 66.5347, "objective": 56.669010162353516, "ranking_idealized": 0.5958333611488342, "ranking_idealized_expo": 0.5208333134651184, "ranking_simple": 0.5833333134651184, "regularize": 0.1313803344964981, "step": 760, "wo_beta": 14.706622123718262 }, { "dpo_loss": 0.537317156791687, "epoch": 2.168162494095418, "grad_norm": 12849.6702201699, "learning_rate": 6.959131248944922e-07, "logits": -1.3043426275253296, "logps": -82.6404037475586, "loss": 60.5154, "objective": 57.57880401611328, "ranking_idealized": 0.6208333373069763, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.6041666865348816, "regularize": 0.13467958569526672, "step": 765, "wo_beta": 16.29267120361328 }, { "dpo_loss": 0.5396389365196228, "epoch": 2.182333490788852, "grad_norm": 13686.597971217428, "learning_rate": 6.913417161825449e-07, "logits": -1.3148149251937866, "logps": -82.22266387939453, "loss": 66.4186, "objective": 71.55656433105469, "ranking_idealized": 0.5916666388511658, "ranking_idealized_expo": 0.49166667461395264, "ranking_simple": 0.574999988079071, "regularize": 0.15875324606895447, "step": 770, "wo_beta": 14.279667854309082 }, { "dpo_loss": 0.5356777906417847, "epoch": 2.196504487482286, "grad_norm": 13109.133649943296, "learning_rate": 6.867514911152806e-07, "logits": -1.279820203781128, "logps": -82.98641204833984, "loss": 62.1208, "objective": 65.08477020263672, "ranking_idealized": 0.6291666626930237, "ranking_idealized_expo": 0.5791666507720947, "ranking_simple": 0.625, "regularize": 0.14647550880908966, "step": 775, "wo_beta": 17.69573211669922 }, { "dpo_loss": 0.5467700362205505, "epoch": 2.21067548417572, "grad_norm": 13977.878251046886, "learning_rate": 6.821429010908971e-07, "logits": -1.2058584690093994, "logps": -82.53013610839844, "loss": 63.1931, "objective": 62.46464538574219, "ranking_idealized": 0.512499988079071, "ranking_idealized_expo": 0.44583332538604736, "ranking_simple": 0.5041666626930237, "regularize": 0.13306237757205963, "step": 780, "wo_beta": 15.349116325378418 }, { "dpo_loss": 0.5252477526664734, "epoch": 2.2248464808691546, "grad_norm": 13522.027705329157, "learning_rate": 6.775163993135842e-07, "logits": -1.20766019821167, "logps": -81.99567413330078, "loss": 66.8492, "objective": 59.73252487182617, "ranking_idealized": 0.5833333134651184, "ranking_idealized_expo": 0.4791666567325592, "ranking_simple": 0.5708333253860474, "regularize": 0.13541431725025177, "step": 785, "wo_beta": 15.272583961486816 }, { "dpo_loss": 0.5247431993484497, "epoch": 2.2390174775625886, "grad_norm": 12425.328833284808, "learning_rate": 6.728724407489553e-07, "logits": -1.205735445022583, "logps": -82.88821411132812, "loss": 66.8893, "objective": 59.76982498168945, "ranking_idealized": 0.637499988079071, "ranking_idealized_expo": 0.5625, "ranking_simple": 0.637499988079071, "regularize": 0.14265631139278412, "step": 790, "wo_beta": 15.509627342224121 }, { "dpo_loss": 0.5296502113342285, "epoch": 2.2531884742560226, "grad_norm": 11978.127680414538, "learning_rate": 6.682114820793074e-07, "logits": -1.2859066724777222, "logps": -84.08002471923828, "loss": 63.7577, "objective": 59.34935760498047, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.5041666626930237, "ranking_simple": 0.612500011920929, "regularize": 0.14149998128414154, "step": 795, "wo_beta": 14.346338272094727 }, { "dpo_loss": 0.5199058651924133, "epoch": 2.2673594709494567, "grad_norm": 12421.855115848897, "learning_rate": 6.635339816587108e-07, "logits": -1.3125, "logps": -83.2691879272461, "loss": 66.0375, "objective": 66.00747680664062, "ranking_idealized": 0.5916666388511658, "ranking_idealized_expo": 0.5041666626930237, "ranking_simple": 0.6041666865348816, "regularize": 0.14774902164936066, "step": 800, "wo_beta": 14.81782341003418 }, { "epoch": 2.2673594709494567, "eval_dpo_loss": 0.6780735850334167, "eval_logits": -1.3137409687042236, "eval_logps": -88.58743286132812, "eval_loss": 186.72210693359375, "eval_objective": 181.93551635742188, "eval_ranking_idealized": 0.6024844646453857, "eval_ranking_idealized_expo": 0.5232919454574585, "eval_ranking_simple": 0.533643901348114, "eval_regularize": 0.4137285053730011, "eval_runtime": 481.1373, "eval_samples_per_second": 12.034, "eval_steps_per_second": 1.004, "eval_wo_beta": 15.987866401672363, "step": 800 }, { "dpo_loss": 0.5357646942138672, "epoch": 2.2815304676428907, "grad_norm": 12458.299460461743, "learning_rate": 6.588403994679354e-07, "logits": -1.319643497467041, "logps": -81.99591827392578, "loss": 60.8943, "objective": 64.13407135009766, "ranking_idealized": 0.5833333134651184, "ranking_idealized_expo": 0.5208333134651184, "ranking_simple": 0.574999988079071, "regularize": 0.13611546158790588, "step": 805, "wo_beta": 16.0935001373291 }, { "dpo_loss": 0.5283416509628296, "epoch": 2.295701464336325, "grad_norm": 12943.281420533918, "learning_rate": 6.541311970692162e-07, "logits": -1.4129080772399902, "logps": -81.64440155029297, "loss": 61.2974, "objective": 61.06173324584961, "ranking_idealized": 0.6458333134651184, "ranking_idealized_expo": 0.5458333492279053, "ranking_simple": 0.6499999761581421, "regularize": 0.13648824393749237, "step": 810, "wo_beta": 14.963865280151367 }, { "dpo_loss": 0.5351440906524658, "epoch": 2.309872461029759, "grad_norm": 12894.991014128658, "learning_rate": 6.494068375608646e-07, "logits": -1.352980136871338, "logps": -83.23399353027344, "loss": 60.8069, "objective": 63.9875602722168, "ranking_idealized": 0.5791666507720947, "ranking_idealized_expo": 0.47083333134651184, "ranking_simple": 0.5791666507720947, "regularize": 0.13894489407539368, "step": 815, "wo_beta": 15.326094627380371 }, { "dpo_loss": 0.5325611233711243, "epoch": 2.324043457723193, "grad_norm": 12345.798302601574, "learning_rate": 6.446677855317264e-07, "logits": -1.2916339635849, "logps": -81.8837661743164, "loss": 59.9305, "objective": 55.95283126831055, "ranking_idealized": 0.5874999761581421, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.5708333253860474, "regularize": 0.12038219720125198, "step": 820, "wo_beta": 15.182144165039062 }, { "dpo_loss": 0.5271125435829163, "epoch": 2.3382144544166272, "grad_norm": 12783.217599288302, "learning_rate": 6.39914507015496e-07, "logits": -1.3013333082199097, "logps": -81.13337707519531, "loss": 58.233, "objective": 62.38441467285156, "ranking_idealized": 0.625, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.612500011920929, "regularize": 0.14106927812099457, "step": 825, "wo_beta": 16.586782455444336 }, { "dpo_loss": 0.5309893488883972, "epoch": 2.3523854511100613, "grad_norm": 14368.93982814313, "learning_rate": 6.351474694448864e-07, "logits": -1.2905962467193604, "logps": -83.69612121582031, "loss": 59.3517, "objective": 62.03671646118164, "ranking_idealized": 0.6416666507720947, "ranking_idealized_expo": 0.5416666865348816, "ranking_simple": 0.6416666507720947, "regularize": 0.13450145721435547, "step": 830, "wo_beta": 16.384456634521484 }, { "dpo_loss": 0.5386961102485657, "epoch": 2.3665564478034957, "grad_norm": 12278.034874198123, "learning_rate": 6.303671416056621e-07, "logits": -1.2532858848571777, "logps": -83.63367462158203, "loss": 63.5605, "objective": 61.1205940246582, "ranking_idealized": 0.6166666746139526, "ranking_idealized_expo": 0.5083333253860474, "ranking_simple": 0.6041666865348816, "regularize": 0.1340387463569641, "step": 835, "wo_beta": 15.07408618927002 }, { "dpo_loss": 0.5518457293510437, "epoch": 2.3807274444969297, "grad_norm": 12325.077561512098, "learning_rate": 6.255739935905395e-07, "logits": -1.222998023033142, "logps": -83.31403350830078, "loss": 56.4779, "objective": 54.8234977722168, "ranking_idealized": 0.5249999761581421, "ranking_idealized_expo": 0.4749999940395355, "ranking_simple": 0.5041666626930237, "regularize": 0.12345383316278458, "step": 840, "wo_beta": 15.817675590515137 }, { "dpo_loss": 0.5455399751663208, "epoch": 2.3948984411903638, "grad_norm": 14534.352470484577, "learning_rate": 6.207684967529592e-07, "logits": -1.2789770364761353, "logps": -84.17676544189453, "loss": 61.3501, "objective": 56.92399978637695, "ranking_idealized": 0.5958333611488342, "ranking_idealized_expo": 0.5416666865348816, "ranking_simple": 0.5958333611488342, "regularize": 0.12513183057308197, "step": 845, "wo_beta": 16.274921417236328 }, { "dpo_loss": 0.5384249091148376, "epoch": 2.409069437883798, "grad_norm": 11975.067630184618, "learning_rate": 6.159511236607315e-07, "logits": -1.3067547082901, "logps": -81.92616271972656, "loss": 55.6773, "objective": 53.89519500732422, "ranking_idealized": 0.5874999761581421, "ranking_idealized_expo": 0.49166667461395264, "ranking_simple": 0.5541666746139526, "regularize": 0.12293127924203873, "step": 850, "wo_beta": 15.953167915344238 }, { "epoch": 2.409069437883798, "eval_dpo_loss": 0.680902361869812, "eval_logits": -1.311160922050476, "eval_logps": -88.26885986328125, "eval_loss": 189.5397491455078, "eval_objective": 185.2095947265625, "eval_ranking_idealized": 0.6024844646453857, "eval_ranking_idealized_expo": 0.5232919454574585, "eval_ranking_simple": 0.5300207138061523, "eval_regularize": 0.42031434178352356, "eval_runtime": 490.419, "eval_samples_per_second": 11.806, "eval_steps_per_second": 0.985, "eval_wo_beta": 15.931052207946777, "step": 850 }, { "dpo_loss": 0.5426651239395142, "epoch": 2.423240434577232, "grad_norm": 13056.278516188751, "learning_rate": 6.111223480495671e-07, "logits": -1.3305928707122803, "logps": -80.8778076171875, "loss": 60.7771, "objective": 57.707275390625, "ranking_idealized": 0.5916666388511658, "ranking_idealized_expo": 0.5291666388511658, "ranking_simple": 0.5916666388511658, "regularize": 0.1298539638519287, "step": 855, "wo_beta": 15.123750686645508 }, { "dpo_loss": 0.537179172039032, "epoch": 2.4374114312706663, "grad_norm": 13276.37666715339, "learning_rate": 6.062826447764883e-07, "logits": -1.2815066576004028, "logps": -82.55672454833984, "loss": 55.8238, "objective": 53.87760925292969, "ranking_idealized": 0.5874999761581421, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.6000000238418579, "regularize": 0.12577569484710693, "step": 860, "wo_beta": 16.197458267211914 }, { "dpo_loss": 0.53245609998703, "epoch": 2.4515824279641003, "grad_norm": 13115.296464572477, "learning_rate": 6.014324897731333e-07, "logits": -1.305693507194519, "logps": -81.65880584716797, "loss": 57.2162, "objective": 57.622314453125, "ranking_idealized": 0.5791666507720947, "ranking_idealized_expo": 0.4958333373069763, "ranking_simple": 0.5708333253860474, "regularize": 0.12618619203567505, "step": 865, "wo_beta": 16.600849151611328 }, { "dpo_loss": 0.53475421667099, "epoch": 2.4657534246575343, "grad_norm": 13057.72282671728, "learning_rate": 5.965723599989528e-07, "logits": -1.347506046295166, "logps": -82.02439880371094, "loss": 59.1596, "objective": 58.05669403076172, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.5208333134651184, "ranking_simple": 0.5916666388511658, "regularize": 0.12966732680797577, "step": 870, "wo_beta": 15.612308502197266 }, { "dpo_loss": 0.5284960865974426, "epoch": 2.4799244213509684, "grad_norm": 13136.725552830958, "learning_rate": 5.917027333943072e-07, "logits": -1.2931278944015503, "logps": -82.28563690185547, "loss": 52.9771, "objective": 52.34040069580078, "ranking_idealized": 0.6166666746139526, "ranking_idealized_expo": 0.5708333253860474, "ranking_simple": 0.6041666865348816, "regularize": 0.12045804411172867, "step": 875, "wo_beta": 17.299848556518555 }, { "dpo_loss": 0.5465295910835266, "epoch": 2.4940954180444024, "grad_norm": 12177.118012490373, "learning_rate": 5.868240888334652e-07, "logits": -1.206485390663147, "logps": -82.52658081054688, "loss": 59.4905, "objective": 58.06962203979492, "ranking_idealized": 0.5625, "ranking_idealized_expo": 0.5, "ranking_simple": 0.574999988079071, "regularize": 0.13045351207256317, "step": 880, "wo_beta": 17.634618759155273 }, { "dpo_loss": 0.5378908514976501, "epoch": 2.5082664147378364, "grad_norm": 12653.821371026783, "learning_rate": 5.819369060775124e-07, "logits": -1.3703595399856567, "logps": -81.24169921875, "loss": 54.0377, "objective": 55.50392150878906, "ranking_idealized": 0.5916666388511658, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.6000000238418579, "regularize": 0.1243302971124649, "step": 885, "wo_beta": 16.991498947143555 }, { "dpo_loss": 0.521662712097168, "epoch": 2.5224374114312704, "grad_norm": 13224.96582542829, "learning_rate": 5.770416657271728e-07, "logits": -1.2803348302841187, "logps": -80.2920913696289, "loss": 54.9019, "objective": 55.66249084472656, "ranking_idealized": 0.5874999761581421, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5833333134651184, "regularize": 0.12915806472301483, "step": 890, "wo_beta": 14.390849113464355 }, { "dpo_loss": 0.538814902305603, "epoch": 2.536608408124705, "grad_norm": 13679.562551953088, "learning_rate": 5.721388491755455e-07, "logits": -1.2745685577392578, "logps": -82.53682708740234, "loss": 55.8587, "objective": 53.00823211669922, "ranking_idealized": 0.5458333492279053, "ranking_idealized_expo": 0.49166667461395264, "ranking_simple": 0.5458333492279053, "regularize": 0.12104254215955734, "step": 895, "wo_beta": 16.952863693237305 }, { "dpo_loss": 0.5534180998802185, "epoch": 2.550779404818139, "grad_norm": 12973.031921366075, "learning_rate": 5.67228938560766e-07, "logits": -1.2396929264068604, "logps": -81.03583526611328, "loss": 54.3682, "objective": 53.294551849365234, "ranking_idealized": 0.6166666746139526, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.6000000238418579, "regularize": 0.12195997685194016, "step": 900, "wo_beta": 15.316643714904785 }, { "epoch": 2.550779404818139, "eval_dpo_loss": 0.6793311238288879, "eval_logits": -1.3258877992630005, "eval_logps": -88.36107635498047, "eval_loss": 188.23812866210938, "eval_objective": 184.16783142089844, "eval_ranking_idealized": 0.6024844646453857, "eval_ranking_idealized_expo": 0.5232919454574585, "eval_ranking_simple": 0.5310559272766113, "eval_regularize": 0.41672980785369873, "eval_runtime": 486.377, "eval_samples_per_second": 11.904, "eval_steps_per_second": 0.993, "eval_wo_beta": 15.968037605285645, "step": 900 }, { "dpo_loss": 0.5381408929824829, "epoch": 2.564950401511573, "grad_norm": 11810.259224351357, "learning_rate": 5.623124167185929e-07, "logits": -1.3189753293991089, "logps": -81.03609466552734, "loss": 51.9527, "objective": 49.01388931274414, "ranking_idealized": 0.5708333253860474, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5541666746139526, "regularize": 0.11513598263263702, "step": 905, "wo_beta": 15.316691398620605 }, { "dpo_loss": 0.5291991829872131, "epoch": 2.579121398205007, "grad_norm": 12343.801160156707, "learning_rate": 5.573897671349268e-07, "logits": -1.2955931425094604, "logps": -83.91735076904297, "loss": 55.8812, "objective": 63.70806121826172, "ranking_idealized": 0.612500011920929, "ranking_idealized_expo": 0.4958333373069763, "ranking_simple": 0.5958333611488342, "regularize": 0.13904932141304016, "step": 910, "wo_beta": 16.40995216369629 }, { "dpo_loss": 0.5379226803779602, "epoch": 2.593292394898441, "grad_norm": 12490.177742860027, "learning_rate": 5.524614738982637e-07, "logits": -1.4045764207839966, "logps": -82.5849838256836, "loss": 55.5769, "objective": 54.98591613769531, "ranking_idealized": 0.6291666626930237, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.612500011920929, "regularize": 0.12587417662143707, "step": 915, "wo_beta": 15.309656143188477 }, { "dpo_loss": 0.5216780304908752, "epoch": 2.6074633915918755, "grad_norm": 12017.347028460124, "learning_rate": 5.475280216520912e-07, "logits": -1.2480995655059814, "logps": -82.11782836914062, "loss": 56.8294, "objective": 57.75908660888672, "ranking_idealized": 0.6041666865348816, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.6041666865348816, "regularize": 0.12090341001749039, "step": 920, "wo_beta": 16.191049575805664 }, { "dpo_loss": 0.5298858284950256, "epoch": 2.6216343882853095, "grad_norm": 14009.68291839978, "learning_rate": 5.42589895547229e-07, "logits": -1.280160665512085, "logps": -82.20765686035156, "loss": 53.1774, "objective": 55.67765426635742, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.4749999940395355, "ranking_simple": 0.5625, "regularize": 0.12424833327531815, "step": 925, "wo_beta": 16.476573944091797 }, { "dpo_loss": 0.5387442111968994, "epoch": 2.6358053849787435, "grad_norm": 12640.001047074344, "learning_rate": 5.376475811941191e-07, "logits": -1.2655282020568848, "logps": -82.08385467529297, "loss": 52.6196, "objective": 55.54609680175781, "ranking_idealized": 0.5416666865348816, "ranking_idealized_expo": 0.44583332538604736, "ranking_simple": 0.5333333611488342, "regularize": 0.12455514818429947, "step": 930, "wo_beta": 16.72053337097168 }, { "dpo_loss": 0.5488451719284058, "epoch": 2.6499763816721775, "grad_norm": 12698.751364257567, "learning_rate": 5.327015646150716e-07, "logits": -1.2632043361663818, "logps": -81.3023910522461, "loss": 50.4175, "objective": 51.81110763549805, "ranking_idealized": 0.6499999761581421, "ranking_idealized_expo": 0.574999988079071, "ranking_simple": 0.6291666626930237, "regularize": 0.1139976978302002, "step": 935, "wo_beta": 16.381933212280273 }, { "dpo_loss": 0.518785834312439, "epoch": 2.6641473783656116, "grad_norm": 14057.06029309221, "learning_rate": 5.277523321964701e-07, "logits": -1.3097693920135498, "logps": -84.61360931396484, "loss": 52.2129, "objective": 56.00838088989258, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.6041666865348816, "regularize": 0.11512833088636398, "step": 940, "wo_beta": 17.616283416748047 }, { "dpo_loss": 0.5271897912025452, "epoch": 2.678318375059046, "grad_norm": 13084.001689574132, "learning_rate": 5.228003706409409e-07, "logits": -1.3481143712997437, "logps": -83.27128601074219, "loss": 49.6737, "objective": 52.79602813720703, "ranking_idealized": 0.5666666626930237, "ranking_idealized_expo": 0.48750001192092896, "ranking_simple": 0.5541666746139526, "regularize": 0.11426286399364471, "step": 945, "wo_beta": 16.029043197631836 }, { "dpo_loss": 0.5474939942359924, "epoch": 2.69248937175248, "grad_norm": 13821.932425093552, "learning_rate": 5.178461669194903e-07, "logits": -1.2337779998779297, "logps": -83.05430603027344, "loss": 50.3775, "objective": 45.27042007446289, "ranking_idealized": 0.637499988079071, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.625, "regularize": 0.10929083079099655, "step": 950, "wo_beta": 15.533432006835938 }, { "epoch": 2.69248937175248, "eval_dpo_loss": 0.6802442669868469, "eval_logits": -1.3090835809707642, "eval_logps": -88.80048370361328, "eval_loss": 189.54185485839844, "eval_objective": 185.00436401367188, "eval_ranking_idealized": 0.6024844646453857, "eval_ranking_idealized_expo": 0.5232919454574585, "eval_ranking_simple": 0.5331262946128845, "eval_regularize": 0.418261855840683, "eval_runtime": 491.779, "eval_samples_per_second": 11.774, "eval_steps_per_second": 0.982, "eval_wo_beta": 15.998626708984375, "step": 950 }, { "dpo_loss": 0.5236973166465759, "epoch": 2.706660368445914, "grad_norm": 13266.227245283348, "learning_rate": 5.128902082236175e-07, "logits": -1.319283127784729, "logps": -82.27372741699219, "loss": 46.7135, "objective": 43.35396194458008, "ranking_idealized": 0.5874999761581421, "ranking_idealized_expo": 0.4749999940395355, "ranking_simple": 0.574999988079071, "regularize": 0.10942530632019043, "step": 955, "wo_beta": 14.039530754089355 }, { "dpo_loss": 0.5411895513534546, "epoch": 2.720831365139348, "grad_norm": 13668.800292035428, "learning_rate": 5.07932981917404e-07, "logits": -1.2875874042510986, "logps": -81.88396453857422, "loss": 53.1799, "objective": 54.5617561340332, "ranking_idealized": 0.5791666507720947, "ranking_idealized_expo": 0.49166667461395264, "ranking_simple": 0.5791666507720947, "regularize": 0.11944962292909622, "step": 960, "wo_beta": 16.39274787902832 }, { "dpo_loss": 0.5236133933067322, "epoch": 2.735002361832782, "grad_norm": 12761.114664799663, "learning_rate": 5.029749754895868e-07, "logits": -1.306726098060608, "logps": -82.27013397216797, "loss": 49.2644, "objective": 47.3409309387207, "ranking_idealized": 0.6416666507720947, "ranking_idealized_expo": 0.5666666626930237, "ranking_simple": 0.6291666626930237, "regularize": 0.10921643674373627, "step": 965, "wo_beta": 15.65440559387207 }, { "dpo_loss": 0.5498754382133484, "epoch": 2.7491733585262166, "grad_norm": 12565.339155193906, "learning_rate": 4.980166765056193e-07, "logits": -1.3193691968917847, "logps": -83.46347045898438, "loss": 52.7234, "objective": 56.7745246887207, "ranking_idealized": 0.6208333373069763, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.6083333492279053, "regularize": 0.13472451269626617, "step": 970, "wo_beta": 15.647109031677246 }, { "dpo_loss": 0.5260103344917297, "epoch": 2.7633443552196506, "grad_norm": 13363.677196616523, "learning_rate": 4.930585725597247e-07, "logits": -1.240022897720337, "logps": -81.51500701904297, "loss": 50.997, "objective": 53.95423889160156, "ranking_idealized": 0.6333333253860474, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.625, "regularize": 0.12574762105941772, "step": 975, "wo_beta": 16.371328353881836 }, { "dpo_loss": 0.5399420261383057, "epoch": 2.7775153519130846, "grad_norm": 13969.44472204385, "learning_rate": 4.881011512269463e-07, "logits": -1.35780930519104, "logps": -81.2794189453125, "loss": 51.6737, "objective": 55.6290283203125, "ranking_idealized": 0.5916666388511658, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5666666626930237, "regularize": 0.12999171018600464, "step": 980, "wo_beta": 14.558424949645996 }, { "dpo_loss": 0.5281099677085876, "epoch": 2.7916863486065187, "grad_norm": 11586.92970672364, "learning_rate": 4.831449000151997e-07, "logits": -1.205262303352356, "logps": -79.56948852539062, "loss": 49.5107, "objective": 46.61149597167969, "ranking_idealized": 0.5791666507720947, "ranking_idealized_expo": 0.48750001192092896, "ranking_simple": 0.5791666507720947, "regularize": 0.10813379287719727, "step": 985, "wo_beta": 14.642525672912598 }, { "dpo_loss": 0.5295001268386841, "epoch": 2.8058573452999527, "grad_norm": 12278.903797254565, "learning_rate": 4.78190306317332e-07, "logits": -1.268909215927124, "logps": -82.44329071044922, "loss": 47.3581, "objective": 51.39979553222656, "ranking_idealized": 0.5666666626930237, "ranking_idealized_expo": 0.4749999940395355, "ranking_simple": 0.5708333253860474, "regularize": 0.11149868369102478, "step": 990, "wo_beta": 16.07427406311035 }, { "dpo_loss": 0.5399483442306519, "epoch": 2.820028341993387, "grad_norm": 12982.312529844054, "learning_rate": 4.732378573631924e-07, "logits": -1.3312995433807373, "logps": -80.66969299316406, "loss": 49.758, "objective": 55.4227409362793, "ranking_idealized": 0.6208333373069763, "ranking_idealized_expo": 0.5458333492279053, "ranking_simple": 0.6166666746139526, "regularize": 0.12711945176124573, "step": 995, "wo_beta": 16.746198654174805 }, { "dpo_loss": 0.526489794254303, "epoch": 2.8341993386868207, "grad_norm": 12173.86125870911, "learning_rate": 4.682880401717177e-07, "logits": -1.271032691001892, "logps": -79.56470489501953, "loss": 45.9449, "objective": 40.13682174682617, "ranking_idealized": 0.6166666746139526, "ranking_idealized_expo": 0.5083333253860474, "ranking_simple": 0.6166666746139526, "regularize": 0.09338556975126266, "step": 1000, "wo_beta": 15.067657470703125 }, { "epoch": 2.8341993386868207, "eval_dpo_loss": 0.6791692972183228, "eval_logits": -1.2989623546600342, "eval_logps": -87.81481170654297, "eval_loss": 187.70785522460938, "eval_objective": 183.56761169433594, "eval_ranking_idealized": 0.6024844646453857, "eval_ranking_idealized_expo": 0.5232919454574585, "eval_ranking_simple": 0.5300207138061523, "eval_regularize": 0.4161270260810852, "eval_runtime": 491.2083, "eval_samples_per_second": 11.787, "eval_steps_per_second": 0.983, "eval_wo_beta": 15.995977401733398, "step": 1000 }, { "dpo_loss": 0.5403110384941101, "epoch": 2.848370335380255, "grad_norm": 13425.378037887134, "learning_rate": 4.633413415030401e-07, "logits": -1.2654575109481812, "logps": -80.49606323242188, "loss": 48.7319, "objective": 47.16264724731445, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.6000000238418579, "regularize": 0.10651734471321106, "step": 1005, "wo_beta": 16.28557586669922 }, { "dpo_loss": 0.5306838750839233, "epoch": 2.862541332073689, "grad_norm": 13143.964606052063, "learning_rate": 4.5839824781061886e-07, "logits": -1.32563316822052, "logps": -81.29505920410156, "loss": 51.8292, "objective": 49.8996467590332, "ranking_idealized": 0.612500011920929, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.6083333492279053, "regularize": 0.11315880715847015, "step": 1010, "wo_beta": 15.957425117492676 }, { "dpo_loss": 0.5331242084503174, "epoch": 2.8767123287671232, "grad_norm": 12600.166168740529, "learning_rate": 4.53459245193404e-07, "logits": -1.2467234134674072, "logps": -80.21656799316406, "loss": 44.7609, "objective": 42.55329895019531, "ranking_idealized": 0.6541666388511658, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.6333333253860474, "regularize": 0.09937479346990585, "step": 1015, "wo_beta": 15.586889266967773 }, { "dpo_loss": 0.5346752405166626, "epoch": 2.8908833254605573, "grad_norm": 14111.243992297606, "learning_rate": 4.4852481934803277e-07, "logits": -1.2140835523605347, "logps": -82.13688659667969, "loss": 46.0337, "objective": 43.36848831176758, "ranking_idealized": 0.5833333134651184, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.5708333253860474, "regularize": 0.09756989777088165, "step": 1020, "wo_beta": 18.553333282470703 }, { "dpo_loss": 0.5420379042625427, "epoch": 2.9050543221539913, "grad_norm": 12276.868793163067, "learning_rate": 4.435954555210676e-07, "logits": -1.3084660768508911, "logps": -81.93505096435547, "loss": 46.0381, "objective": 48.77103042602539, "ranking_idealized": 0.6208333373069763, "ranking_idealized_expo": 0.5083333253860474, "ranking_simple": 0.612500011920929, "regularize": 0.10658075660467148, "step": 1025, "wo_beta": 15.018412590026855 }, { "dpo_loss": 0.5342170000076294, "epoch": 2.9192253188474258, "grad_norm": 12677.814826562366, "learning_rate": 4.3867163846127674e-07, "logits": -1.3350425958633423, "logps": -81.84678649902344, "loss": 47.2693, "objective": 41.97852325439453, "ranking_idealized": 0.5416666865348816, "ranking_idealized_expo": 0.4791666567325592, "ranking_simple": 0.5458333492279053, "regularize": 0.09485388547182083, "step": 1030, "wo_beta": 16.526702880859375 }, { "dpo_loss": 0.5289677977561951, "epoch": 2.9333963155408598, "grad_norm": 13399.69328236257, "learning_rate": 4.3375385237196507e-07, "logits": -1.3010871410369873, "logps": -82.80349731445312, "loss": 43.5011, "objective": 41.88113784790039, "ranking_idealized": 0.625, "ranking_idealized_expo": 0.5, "ranking_simple": 0.6083333492279053, "regularize": 0.09509758651256561, "step": 1035, "wo_beta": 14.980511665344238 }, { "dpo_loss": 0.5463218688964844, "epoch": 2.947567312234294, "grad_norm": 12072.270375502065, "learning_rate": 4.2884258086335745e-07, "logits": -1.2975058555603027, "logps": -82.66610717773438, "loss": 45.0537, "objective": 48.81401062011719, "ranking_idealized": 0.5708333253860474, "ranking_idealized_expo": 0.4833333194255829, "ranking_simple": 0.5583333373069763, "regularize": 0.10974690318107605, "step": 1040, "wo_beta": 16.447132110595703 }, { "dpo_loss": 0.5381548404693604, "epoch": 2.961738308927728, "grad_norm": 13887.433179664138, "learning_rate": 4.2393830690504165e-07, "logits": -1.2503575086593628, "logps": -84.04967498779297, "loss": 44.8665, "objective": 42.995948791503906, "ranking_idealized": 0.6208333373069763, "ranking_idealized_expo": 0.5666666626930237, "ranking_simple": 0.6208333373069763, "regularize": 0.09885497391223907, "step": 1045, "wo_beta": 18.053199768066406 }, { "dpo_loss": 0.5348830819129944, "epoch": 2.975909305621162, "grad_norm": 13502.021630049758, "learning_rate": 4.1904151277847305e-07, "logits": -1.2968212366104126, "logps": -79.87500762939453, "loss": 49.0003, "objective": 50.04111862182617, "ranking_idealized": 0.6541666388511658, "ranking_idealized_expo": 0.5541666746139526, "ranking_simple": 0.6458333134651184, "regularize": 0.11395598948001862, "step": 1050, "wo_beta": 15.20615291595459 }, { "epoch": 2.975909305621162, "eval_dpo_loss": 0.6791855692863464, "eval_logits": -1.2778165340423584, "eval_logps": -88.30037689208984, "eval_loss": 188.00396728515625, "eval_objective": 184.00155639648438, "eval_ranking_idealized": 0.6024844646453857, "eval_ranking_idealized_expo": 0.5232919454574585, "eval_ranking_simple": 0.5341615080833435, "eval_regularize": 0.41730284690856934, "eval_runtime": 486.4753, "eval_samples_per_second": 11.902, "eval_steps_per_second": 0.993, "eval_wo_beta": 16.040319442749023, "step": 1050 }, { "dpo_loss": 0.5416039824485779, "epoch": 2.9900803023145963, "grad_norm": 13186.167879544177, "learning_rate": 4.141526800295481e-07, "logits": -1.2704575061798096, "logps": -81.0667724609375, "loss": 43.7316, "objective": 46.92390441894531, "ranking_idealized": 0.6083333492279053, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.6041666865348816, "regularize": 0.11228723078966141, "step": 1055, "wo_beta": 15.320064544677734 }, { "dpo_loss": 0.5175911784172058, "epoch": 3.0042512990080303, "grad_norm": 11979.18084085825, "learning_rate": 4.092722894212487e-07, "logits": -1.291445255279541, "logps": -82.69534301757812, "loss": 44.4026, "objective": 47.78953552246094, "ranking_idealized": 0.6083333492279053, "ranking_idealized_expo": 0.5208333134651184, "ranking_simple": 0.612500011920929, "regularize": 0.10625550150871277, "step": 1060, "wo_beta": 15.794866561889648 }, { "dpo_loss": 0.523690938949585, "epoch": 3.0184222957014644, "grad_norm": 12600.45509733284, "learning_rate": 4.0440082088636546e-07, "logits": -1.3265612125396729, "logps": -84.14775848388672, "loss": 41.3718, "objective": 38.99584197998047, "ranking_idealized": 0.5916666388511658, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5958333611488342, "regularize": 0.09401161223649979, "step": 1065, "wo_beta": 16.806358337402344 }, { "dpo_loss": 0.5429927706718445, "epoch": 3.0325932923948984, "grad_norm": 13459.06076930384, "learning_rate": 3.995387534803005e-07, "logits": -1.2817329168319702, "logps": -81.6548080444336, "loss": 44.6891, "objective": 43.239158630371094, "ranking_idealized": 0.637499988079071, "ranking_idealized_expo": 0.5791666507720947, "ranking_simple": 0.625, "regularize": 0.09600695967674255, "step": 1070, "wo_beta": 17.19818878173828 }, { "dpo_loss": 0.5399213433265686, "epoch": 3.0467642890883324, "grad_norm": 12451.835928919867, "learning_rate": 3.9468656533395934e-07, "logits": -1.2840524911880493, "logps": -81.64595031738281, "loss": 38.4816, "objective": 40.692039489746094, "ranking_idealized": 0.5874999761581421, "ranking_idealized_expo": 0.5083333253860474, "ranking_simple": 0.5833333134651184, "regularize": 0.09315841645002365, "step": 1075, "wo_beta": 15.41653060913086 }, { "dpo_loss": 0.5243366360664368, "epoch": 3.0609352857817664, "grad_norm": 12956.687806008335, "learning_rate": 3.8984473360672967e-07, "logits": -1.3753383159637451, "logps": -82.9805908203125, "loss": 40.18, "objective": 39.79288864135742, "ranking_idealized": 0.6416666507720947, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.6291666626930237, "regularize": 0.09019829332828522, "step": 1080, "wo_beta": 17.60961151123047 }, { "dpo_loss": 0.5346547365188599, "epoch": 3.075106282475201, "grad_norm": 12876.9232360082, "learning_rate": 3.850137344395598e-07, "logits": -1.318056344985962, "logps": -83.30501556396484, "loss": 39.6664, "objective": 41.40624237060547, "ranking_idealized": 0.550000011920929, "ranking_idealized_expo": 0.49166667461395264, "ranking_simple": 0.5458333492279053, "regularize": 0.0875302404165268, "step": 1085, "wo_beta": 15.289043426513672 }, { "dpo_loss": 0.5314586162567139, "epoch": 3.089277279168635, "grad_norm": 12423.675708081033, "learning_rate": 3.801940429081345e-07, "logits": -1.297440767288208, "logps": -81.59999084472656, "loss": 40.7964, "objective": 42.56759262084961, "ranking_idealized": 0.6083333492279053, "ranking_idealized_expo": 0.5083333253860474, "ranking_simple": 0.6291666626930237, "regularize": 0.09419377893209457, "step": 1090, "wo_beta": 15.919710159301758 }, { "dpo_loss": 0.5284194946289062, "epoch": 3.103448275862069, "grad_norm": 12843.979452626416, "learning_rate": 3.7538613297615706e-07, "logits": -1.2590415477752686, "logps": -83.42412567138672, "loss": 40.9535, "objective": 44.701377868652344, "ranking_idealized": 0.625, "ranking_idealized_expo": 0.5708333253860474, "ranking_simple": 0.612500011920929, "regularize": 0.10055555403232574, "step": 1095, "wo_beta": 16.360620498657227 }, { "dpo_loss": 0.5163142681121826, "epoch": 3.117619272555503, "grad_norm": 11098.073660723994, "learning_rate": 3.7059047744873955e-07, "logits": -1.2521919012069702, "logps": -82.35820770263672, "loss": 40.2428, "objective": 41.402366638183594, "ranking_idealized": 0.5874999761581421, "ranking_idealized_expo": 0.4958333373069763, "ranking_simple": 0.5833333134651184, "regularize": 0.08817121386528015, "step": 1100, "wo_beta": 16.056493759155273 }, { "epoch": 3.117619272555503, "eval_dpo_loss": 0.680143415927887, "eval_logits": -1.2988417148590088, "eval_logps": -88.64698028564453, "eval_loss": 188.7165985107422, "eval_objective": 184.38153076171875, "eval_ranking_idealized": 0.6024844646453857, "eval_ranking_idealized_expo": 0.5232919454574585, "eval_ranking_simple": 0.532608687877655, "eval_regularize": 0.41805195808410645, "eval_runtime": 486.8996, "eval_samples_per_second": 11.892, "eval_steps_per_second": 0.992, "eval_wo_beta": 15.998079299926758, "step": 1100 }, { "dpo_loss": 0.532370924949646, "epoch": 3.131790269248937, "grad_norm": 12884.072735206462, "learning_rate": 3.658075479259087e-07, "logits": -1.3051170110702515, "logps": -82.9980239868164, "loss": 43.5912, "objective": 42.78650665283203, "ranking_idealized": 0.5833333134651184, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.5666666626930237, "regularize": 0.09705787152051926, "step": 1105, "wo_beta": 17.55166244506836 }, { "dpo_loss": 0.5135348439216614, "epoch": 3.1459612659423715, "grad_norm": 13710.402810117148, "learning_rate": 3.6103781475622786e-07, "logits": -1.2103074789047241, "logps": -83.2777328491211, "loss": 35.6812, "objective": 35.80618667602539, "ranking_idealized": 0.612500011920929, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.6083333492279053, "regularize": 0.07978586852550507, "step": 1110, "wo_beta": 16.995450973510742 }, { "dpo_loss": 0.5229103565216064, "epoch": 3.1601322626358055, "grad_norm": 12411.913045675534, "learning_rate": 3.562817469905442e-07, "logits": -1.2619822025299072, "logps": -82.1358642578125, "loss": 38.5951, "objective": 36.70951461791992, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.6083333492279053, "regularize": 0.08537817001342773, "step": 1115, "wo_beta": 16.52168846130371 }, { "dpo_loss": 0.5169024467468262, "epoch": 3.1743032593292395, "grad_norm": 12747.527049209308, "learning_rate": 3.5153981233586274e-07, "logits": -1.2052761316299438, "logps": -80.89930725097656, "loss": 35.9412, "objective": 35.01757049560547, "ranking_idealized": 0.612500011920929, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.6083333492279053, "regularize": 0.08051317185163498, "step": 1120, "wo_beta": 15.474043846130371 }, { "dpo_loss": 0.5393829941749573, "epoch": 3.1884742560226735, "grad_norm": 13071.804290926188, "learning_rate": 3.468124771093519e-07, "logits": -1.263301134109497, "logps": -83.0383529663086, "loss": 37.8478, "objective": 38.899776458740234, "ranking_idealized": 0.612500011920929, "ranking_idealized_expo": 0.5083333253860474, "ranking_simple": 0.6000000238418579, "regularize": 0.09170109778642654, "step": 1125, "wo_beta": 15.088132858276367 }, { "dpo_loss": 0.5208443999290466, "epoch": 3.2026452527161076, "grad_norm": 13439.120791203995, "learning_rate": 3.421002061924876e-07, "logits": -1.298660159111023, "logps": -82.7750473022461, "loss": 34.6631, "objective": 33.578922271728516, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.5083333253860474, "ranking_simple": 0.5958333611488342, "regularize": 0.07489284873008728, "step": 1130, "wo_beta": 15.427777290344238 }, { "dpo_loss": 0.5245645642280579, "epoch": 3.2168162494095416, "grad_norm": 11480.67381753106, "learning_rate": 3.374034629853356e-07, "logits": -1.3043017387390137, "logps": -80.89866638183594, "loss": 35.7927, "objective": 35.20330047607422, "ranking_idealized": 0.5291666388511658, "ranking_idealized_expo": 0.4625000059604645, "ranking_simple": 0.5249999761581421, "regularize": 0.08528413623571396, "step": 1135, "wo_beta": 16.220800399780273 }, { "dpo_loss": 0.5402042269706726, "epoch": 3.230987246102976, "grad_norm": 12946.274800579084, "learning_rate": 3.327227093609824e-07, "logits": -1.1506885290145874, "logps": -81.15502166748047, "loss": 40.5475, "objective": 40.8009033203125, "ranking_idealized": 0.5958333611488342, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5833333134651184, "regularize": 0.09735415130853653, "step": 1140, "wo_beta": 16.101863861083984 }, { "dpo_loss": 0.5243603587150574, "epoch": 3.24515824279641, "grad_norm": 13000.005011572795, "learning_rate": 3.2805840562011465e-07, "logits": -1.2146347761154175, "logps": -83.07351684570312, "loss": 40.1207, "objective": 42.64434814453125, "ranking_idealized": 0.6541666388511658, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.6333333253860474, "regularize": 0.09260058403015137, "step": 1145, "wo_beta": 15.928021430969238 }, { "dpo_loss": 0.5314944386482239, "epoch": 3.259329239489844, "grad_norm": 12558.545529727347, "learning_rate": 3.234110104457536e-07, "logits": -1.352626085281372, "logps": -80.92655181884766, "loss": 37.177, "objective": 37.67503356933594, "ranking_idealized": 0.5874999761581421, "ranking_idealized_expo": 0.5083333253860474, "ranking_simple": 0.5874999761581421, "regularize": 0.08233184367418289, "step": 1150, "wo_beta": 14.541799545288086 }, { "epoch": 3.259329239489844, "eval_dpo_loss": 0.6804221868515015, "eval_logits": -1.2842507362365723, "eval_logps": -87.92387390136719, "eval_loss": 188.25633239746094, "eval_objective": 184.33511352539062, "eval_ranking_idealized": 0.6024844646453857, "eval_ranking_idealized_expo": 0.5232919454574585, "eval_ranking_simple": 0.5357142686843872, "eval_regularize": 0.4183206856250763, "eval_runtime": 488.1945, "eval_samples_per_second": 11.86, "eval_steps_per_second": 0.989, "eval_wo_beta": 16.0123348236084, "step": 1150 }, { "dpo_loss": 0.5527331829071045, "epoch": 3.273500236183278, "grad_norm": 12600.213804572502, "learning_rate": 3.187809808581492e-07, "logits": -1.225222110748291, "logps": -80.92967987060547, "loss": 37.9886, "objective": 43.58564376831055, "ranking_idealized": 0.5958333611488342, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.5874999761581421, "regularize": 0.09985193610191345, "step": 1155, "wo_beta": 16.76634407043457 }, { "dpo_loss": 0.5320346355438232, "epoch": 3.287671232876712, "grad_norm": 13215.429208773, "learning_rate": 3.141687721698363e-07, "logits": -1.287786602973938, "logps": -83.13336944580078, "loss": 34.714, "objective": 32.02961349487305, "ranking_idealized": 0.5416666865348816, "ranking_idealized_expo": 0.4749999940395355, "ranking_simple": 0.5416666865348816, "regularize": 0.07429231703281403, "step": 1160, "wo_beta": 17.868885040283203 }, { "dpo_loss": 0.5377687215805054, "epoch": 3.3018422295701466, "grad_norm": 12734.199495358569, "learning_rate": 3.095748379408603e-07, "logits": -1.3172459602355957, "logps": -80.96276092529297, "loss": 34.2009, "objective": 33.96812057495117, "ranking_idealized": 0.6333333253860474, "ranking_idealized_expo": 0.5458333492279053, "ranking_simple": 0.6166666746139526, "regularize": 0.08176220953464508, "step": 1165, "wo_beta": 15.74937629699707 }, { "dpo_loss": 0.5269596576690674, "epoch": 3.3160132262635806, "grad_norm": 14339.996000811438, "learning_rate": 3.049996299341742e-07, "logits": -1.267351746559143, "logps": -82.11973571777344, "loss": 34.9879, "objective": 35.85028076171875, "ranking_idealized": 0.6041666865348816, "ranking_idealized_expo": 0.4958333373069763, "ranking_simple": 0.6041666865348816, "regularize": 0.08146883547306061, "step": 1170, "wo_beta": 15.652009963989258 }, { "dpo_loss": 0.531762957572937, "epoch": 3.3301842229570147, "grad_norm": 12543.440661095656, "learning_rate": 3.004435980712129e-07, "logits": -1.257896900177002, "logps": -82.12284088134766, "loss": 38.0949, "objective": 35.93735122680664, "ranking_idealized": 0.5708333253860474, "ranking_idealized_expo": 0.4749999940395355, "ranking_simple": 0.574999988079071, "regularize": 0.08384241163730621, "step": 1175, "wo_beta": 13.72645378112793 }, { "dpo_loss": 0.5355243682861328, "epoch": 3.3443552196504487, "grad_norm": 11718.716469797973, "learning_rate": 2.959071903876486e-07, "logits": -1.3486711978912354, "logps": -82.8729248046875, "loss": 35.7799, "objective": 35.360801696777344, "ranking_idealized": 0.5583333373069763, "ranking_idealized_expo": 0.49166667461395264, "ranking_simple": 0.5625, "regularize": 0.07788892835378647, "step": 1180, "wo_beta": 16.274147033691406 }, { "dpo_loss": 0.5254151225090027, "epoch": 3.3585262163438827, "grad_norm": 13437.960403836023, "learning_rate": 2.913908529893304e-07, "logits": -1.1963578462600708, "logps": -83.22509002685547, "loss": 33.4865, "objective": 33.50373840332031, "ranking_idealized": 0.5874999761581421, "ranking_idealized_expo": 0.48750001192092896, "ranking_simple": 0.5833333134651184, "regularize": 0.07612194865942001, "step": 1185, "wo_beta": 15.737934112548828 }, { "dpo_loss": 0.5395456552505493, "epoch": 3.372697213037317, "grad_norm": 12206.27505785514, "learning_rate": 2.86895030008416e-07, "logits": -1.3092117309570312, "logps": -81.93521118164062, "loss": 33.053, "objective": 29.232421875, "ranking_idealized": 0.5833333134651184, "ranking_idealized_expo": 0.5291666388511658, "ranking_simple": 0.5874999761581421, "regularize": 0.07262556999921799, "step": 1190, "wo_beta": 15.487491607666016 }, { "dpo_loss": 0.5137616991996765, "epoch": 3.386868209730751, "grad_norm": 11921.58688181337, "learning_rate": 2.824201635596951e-07, "logits": -1.2198973894119263, "logps": -82.35958099365234, "loss": 29.3695, "objective": 29.94867706298828, "ranking_idealized": 0.5833333134651184, "ranking_idealized_expo": 0.48750001192092896, "ranking_simple": 0.5666666626930237, "regularize": 0.06865646690130234, "step": 1195, "wo_beta": 15.531022071838379 }, { "dpo_loss": 0.5208079814910889, "epoch": 3.4010392064241852, "grad_norm": 12726.149489712327, "learning_rate": 2.779666936971129e-07, "logits": -1.3937805891036987, "logps": -82.6730728149414, "loss": 34.9809, "objective": 31.1435489654541, "ranking_idealized": 0.6208333373069763, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.6166666746139526, "regularize": 0.07431173324584961, "step": 1200, "wo_beta": 18.369197845458984 }, { "epoch": 3.4010392064241852, "eval_dpo_loss": 0.6805519461631775, "eval_logits": -1.289951205253601, "eval_logps": -88.11286926269531, "eval_loss": 189.17047119140625, "eval_objective": 184.87181091308594, "eval_ranking_idealized": 0.6024844646453857, "eval_ranking_idealized_expo": 0.5232919454574585, "eval_ranking_simple": 0.532608687877655, "eval_regularize": 0.41934508085250854, "eval_runtime": 498.5381, "eval_samples_per_second": 11.614, "eval_steps_per_second": 0.969, "eval_wo_beta": 15.953052520751953, "step": 1200 }, { "dpo_loss": 0.5354551672935486, "epoch": 3.4152102031176192, "grad_norm": 12302.298902716244, "learning_rate": 2.7353505837049583e-07, "logits": -1.293818712234497, "logps": -81.88545989990234, "loss": 33.6714, "objective": 31.525800704956055, "ranking_idealized": 0.6333333253860474, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.6208333373069763, "regularize": 0.07086090743541718, "step": 1205, "wo_beta": 15.15488052368164 }, { "dpo_loss": 0.5308886170387268, "epoch": 3.4293811998110533, "grad_norm": 11960.890598119064, "learning_rate": 2.6912569338248315e-07, "logits": -1.300658941268921, "logps": -83.05274200439453, "loss": 36.2356, "objective": 35.77425003051758, "ranking_idealized": 0.5874999761581421, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5791666507720947, "regularize": 0.08412022143602371, "step": 1210, "wo_beta": 16.733659744262695 }, { "dpo_loss": 0.528823733329773, "epoch": 3.4435521965044873, "grad_norm": 13078.935439317174, "learning_rate": 2.64739032345671e-07, "logits": -1.3109962940216064, "logps": -84.07682037353516, "loss": 35.0362, "objective": 32.51463317871094, "ranking_idealized": 0.5666666626930237, "ranking_idealized_expo": 0.5041666626930237, "ranking_simple": 0.5666666626930237, "regularize": 0.0771927461028099, "step": 1215, "wo_beta": 15.212308883666992 }, { "dpo_loss": 0.540026843547821, "epoch": 3.4577231931979218, "grad_norm": 12256.162682293258, "learning_rate": 2.603755066399718e-07, "logits": -1.149971842765808, "logps": -82.9686508178711, "loss": 33.1832, "objective": 32.34642028808594, "ranking_idealized": 0.6041666865348816, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.5833333134651184, "regularize": 0.06764715164899826, "step": 1220, "wo_beta": 16.678075790405273 }, { "dpo_loss": 0.524185061454773, "epoch": 3.471894189891356, "grad_norm": 12930.685272364457, "learning_rate": 2.560355453701919e-07, "logits": -1.302108645439148, "logps": -82.00885772705078, "loss": 33.7294, "objective": 32.768775939941406, "ranking_idealized": 0.5666666626930237, "ranking_idealized_expo": 0.46666666865348816, "ranking_simple": 0.5541666746139526, "regularize": 0.0753529891371727, "step": 1225, "wo_beta": 16.024269104003906 }, { "dpo_loss": 0.5251755118370056, "epoch": 3.48606518658479, "grad_norm": 12434.433063668528, "learning_rate": 2.517195753238345e-07, "logits": -1.325141191482544, "logps": -82.18378448486328, "loss": 35.229, "objective": 33.25638961791992, "ranking_idealized": 0.6291666626930237, "ranking_idealized_expo": 0.5416666865348816, "ranking_simple": 0.625, "regularize": 0.0771695226430893, "step": 1230, "wo_beta": 16.292001724243164 }, { "dpo_loss": 0.5132806897163391, "epoch": 3.500236183278224, "grad_norm": 13558.533453277203, "learning_rate": 2.474280209291299e-07, "logits": -1.245792031288147, "logps": -81.74018096923828, "loss": 33.2282, "objective": 33.390872955322266, "ranking_idealized": 0.5916666388511658, "ranking_idealized_expo": 0.5041666626930237, "ranking_simple": 0.5874999761581421, "regularize": 0.07453177124261856, "step": 1235, "wo_beta": 16.363548278808594 }, { "dpo_loss": 0.5296925902366638, "epoch": 3.514407179971658, "grad_norm": 12949.63094083325, "learning_rate": 2.4316130421329696e-07, "logits": -1.238582968711853, "logps": -82.47282409667969, "loss": 34.0652, "objective": 31.30968475341797, "ranking_idealized": 0.5708333253860474, "ranking_idealized_expo": 0.5041666626930237, "ranking_simple": 0.5666666626930237, "regularize": 0.06809426844120026, "step": 1240, "wo_beta": 15.522791862487793 }, { "dpo_loss": 0.5323511362075806, "epoch": 3.528578176665092, "grad_norm": 13527.106344889547, "learning_rate": 2.389198447610418e-07, "logits": -1.3098766803741455, "logps": -83.17538452148438, "loss": 30.2807, "objective": 31.539880752563477, "ranking_idealized": 0.6416666507720947, "ranking_idealized_expo": 0.5541666746139526, "ranking_simple": 0.625, "regularize": 0.07134827226400375, "step": 1245, "wo_beta": 15.821925163269043 }, { "dpo_loss": 0.5260218977928162, "epoch": 3.5427491733585263, "grad_norm": 13239.929991928584, "learning_rate": 2.3470405967329604e-07, "logits": -1.2133029699325562, "logps": -81.8626480102539, "loss": 34.073, "objective": 34.22465515136719, "ranking_idealized": 0.6291666626930237, "ranking_idealized_expo": 0.5458333492279053, "ranking_simple": 0.625, "regularize": 0.08072555810213089, "step": 1250, "wo_beta": 15.293652534484863 }, { "epoch": 3.5427491733585263, "eval_dpo_loss": 0.6802147626876831, "eval_logits": -1.289227843284607, "eval_logps": -88.56167602539062, "eval_loss": 188.2202911376953, "eval_objective": 184.19659423828125, "eval_ranking_idealized": 0.6024844646453857, "eval_ranking_idealized_expo": 0.5232919454574585, "eval_ranking_simple": 0.533643901348114, "eval_regularize": 0.4176720380783081, "eval_runtime": 501.867, "eval_samples_per_second": 11.537, "eval_steps_per_second": 0.962, "eval_wo_beta": 16.002193450927734, "step": 1250 }, { "dpo_loss": 0.5387216806411743, "epoch": 3.5569201700519604, "grad_norm": 12534.49899559166, "learning_rate": 2.3051436352620036e-07, "logits": -1.2683520317077637, "logps": -82.32015991210938, "loss": 36.4025, "objective": 32.02161407470703, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.4791666567325592, "ranking_simple": 0.5666666626930237, "regularize": 0.07073788344860077, "step": 1255, "wo_beta": 15.795002937316895 }, { "dpo_loss": 0.530408501625061, "epoch": 3.5710911667453944, "grad_norm": 12912.721697415427, "learning_rate": 2.2635116833033392e-07, "logits": -1.2373536825180054, "logps": -81.3061294555664, "loss": 30.8038, "objective": 33.21593475341797, "ranking_idealized": 0.6333333253860474, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.6208333373069763, "regularize": 0.07438240200281143, "step": 1260, "wo_beta": 16.231142044067383 }, { "dpo_loss": 0.5247560739517212, "epoch": 3.5852621634388284, "grad_norm": 12931.353378545553, "learning_rate": 2.2221488349019902e-07, "logits": -1.2455730438232422, "logps": -80.93061828613281, "loss": 29.6738, "objective": 31.222820281982422, "ranking_idealized": 0.6166666746139526, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.612500011920929, "regularize": 0.06749995797872543, "step": 1265, "wo_beta": 14.711896896362305 }, { "dpo_loss": 0.5356096625328064, "epoch": 3.5994331601322624, "grad_norm": 13549.0763306813, "learning_rate": 2.181059157639598e-07, "logits": -1.3499952554702759, "logps": -81.31751251220703, "loss": 30.5338, "objective": 30.125825881958008, "ranking_idealized": 0.5333333611488342, "ranking_idealized_expo": 0.4833333194255829, "ranking_simple": 0.5333333611488342, "regularize": 0.06899719685316086, "step": 1270, "wo_beta": 13.772916793823242 }, { "dpo_loss": 0.5210896134376526, "epoch": 3.613604156825697, "grad_norm": 14924.204646126253, "learning_rate": 2.1402466922344303e-07, "logits": -1.210523247718811, "logps": -82.34052276611328, "loss": 29.82, "objective": 29.18175506591797, "ranking_idealized": 0.5874999761581421, "ranking_idealized_expo": 0.49166667461395264, "ranking_simple": 0.5791666507720947, "regularize": 0.06544475257396698, "step": 1275, "wo_beta": 15.473977088928223 }, { "dpo_loss": 0.5307682752609253, "epoch": 3.627775153519131, "grad_norm": 12824.51476470017, "learning_rate": 2.0997154521440097e-07, "logits": -1.2915035486221313, "logps": -81.79452514648438, "loss": 30.8024, "objective": 29.49608612060547, "ranking_idealized": 0.5541666746139526, "ranking_idealized_expo": 0.4541666805744171, "ranking_simple": 0.5375000238418579, "regularize": 0.06567243486642838, "step": 1280, "wo_beta": 15.875335693359375 }, { "dpo_loss": 0.5249419212341309, "epoch": 3.641946150212565, "grad_norm": 13428.13142246555, "learning_rate": 2.0594694231704373e-07, "logits": -1.2426308393478394, "logps": -81.00833892822266, "loss": 30.3043, "objective": 30.617321014404297, "ranking_idealized": 0.5916666388511658, "ranking_idealized_expo": 0.4958333373069763, "ranking_simple": 0.5916666388511658, "regularize": 0.07462318986654282, "step": 1285, "wo_beta": 17.443321228027344 }, { "dpo_loss": 0.5173429250717163, "epoch": 3.656117146905999, "grad_norm": 12671.749777744226, "learning_rate": 2.0195125630684428e-07, "logits": -1.245200276374817, "logps": -81.8724594116211, "loss": 28.4671, "objective": 27.68103790283203, "ranking_idealized": 0.6166666746139526, "ranking_idealized_expo": 0.5208333134651184, "ranking_simple": 0.6000000238418579, "regularize": 0.06590177118778229, "step": 1290, "wo_beta": 17.08915138244629 }, { "dpo_loss": 0.5335291028022766, "epoch": 3.670288143599433, "grad_norm": 13021.653293493737, "learning_rate": 1.979848801156167e-07, "logits": -1.3040084838867188, "logps": -81.88176727294922, "loss": 28.4196, "objective": 28.575376510620117, "ranking_idealized": 0.5958333611488342, "ranking_idealized_expo": 0.49166667461395264, "ranking_simple": 0.5874999761581421, "regularize": 0.0632786899805069, "step": 1295, "wo_beta": 14.829022407531738 }, { "dpo_loss": 0.5276142954826355, "epoch": 3.6844591402928675, "grad_norm": 11978.937253641576, "learning_rate": 1.9404820379287672e-07, "logits": -1.187487244606018, "logps": -80.9906005859375, "loss": 28.4565, "objective": 28.971555709838867, "ranking_idealized": 0.6041666865348816, "ranking_idealized_expo": 0.5458333492279053, "ranking_simple": 0.6166666746139526, "regularize": 0.06764063984155655, "step": 1300, "wo_beta": 17.09331512451172 }, { "epoch": 3.6844591402928675, "eval_dpo_loss": 0.680322527885437, "eval_logits": -1.2942335605621338, "eval_logps": -88.08357238769531, "eval_loss": 188.31890869140625, "eval_objective": 184.1293182373047, "eval_ranking_idealized": 0.6024844646453857, "eval_ranking_idealized_expo": 0.5232919454574585, "eval_ranking_simple": 0.5331262946128845, "eval_regularize": 0.4177800714969635, "eval_runtime": 491.4726, "eval_samples_per_second": 11.781, "eval_steps_per_second": 0.983, "eval_wo_beta": 16.008142471313477, "step": 1300 }, { "dpo_loss": 0.5322309732437134, "epoch": 3.6986301369863015, "grad_norm": 13019.22557555901, "learning_rate": 1.9014161446748422e-07, "logits": -1.2798058986663818, "logps": -81.99161529541016, "loss": 30.5992, "objective": 32.30867004394531, "ranking_idealized": 0.5458333492279053, "ranking_idealized_expo": 0.47083333134651184, "ranking_simple": 0.5416666865348816, "regularize": 0.0735287144780159, "step": 1305, "wo_beta": 15.798765182495117 }, { "dpo_loss": 0.5227470397949219, "epoch": 3.7128011336797355, "grad_norm": 12226.371631865619, "learning_rate": 1.8626549630957395e-07, "logits": -1.2566769123077393, "logps": -81.54576110839844, "loss": 28.0805, "objective": 26.042844772338867, "ranking_idealized": 0.5833333134651184, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.5916666388511658, "regularize": 0.06227840855717659, "step": 1310, "wo_beta": 15.27546501159668 }, { "dpo_loss": 0.537932813167572, "epoch": 3.7269721303731695, "grad_norm": 12444.517818477534, "learning_rate": 1.8242023049277555e-07, "logits": -1.2929528951644897, "logps": -81.47209167480469, "loss": 30.7473, "objective": 30.499658584594727, "ranking_idealized": 0.5625, "ranking_idealized_expo": 0.5083333253860474, "ranking_simple": 0.5583333373069763, "regularize": 0.07173587381839752, "step": 1315, "wo_beta": 15.575103759765625 }, { "dpo_loss": 0.5317214131355286, "epoch": 3.7411431270666036, "grad_norm": 13300.946248563114, "learning_rate": 1.7860619515673032e-07, "logits": -1.3597683906555176, "logps": -83.02255249023438, "loss": 29.6239, "objective": 28.020469665527344, "ranking_idealized": 0.6541666388511658, "ranking_idealized_expo": 0.5708333253860474, "ranking_simple": 0.6541666388511658, "regularize": 0.06609723716974258, "step": 1320, "wo_beta": 16.70941734313965 }, { "dpo_loss": 0.5467905402183533, "epoch": 3.755314123760038, "grad_norm": 11933.522036621489, "learning_rate": 1.7482376536990474e-07, "logits": -1.2760491371154785, "logps": -81.77200317382812, "loss": 28.58, "objective": 27.297456741333008, "ranking_idealized": 0.5791666507720947, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.5791666507720947, "regularize": 0.061256349086761475, "step": 1325, "wo_beta": 14.996780395507812 }, { "dpo_loss": 0.5155090689659119, "epoch": 3.769485120453472, "grad_norm": 12146.906265203044, "learning_rate": 1.7107331309270684e-07, "logits": -1.2232296466827393, "logps": -81.67552185058594, "loss": 25.7046, "objective": 24.283742904663086, "ranking_idealized": 0.6041666865348816, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.6083333492279053, "regularize": 0.05803535133600235, "step": 1330, "wo_beta": 14.960771560668945 }, { "dpo_loss": 0.5197141766548157, "epoch": 3.783656117146906, "grad_norm": 13269.1257120231, "learning_rate": 1.6735520714090778e-07, "logits": -1.3548495769500732, "logps": -82.88711547851562, "loss": 25.5411, "objective": 23.988988876342773, "ranking_idealized": 0.637499988079071, "ranking_idealized_expo": 0.5541666746139526, "ranking_simple": 0.6333333253860474, "regularize": 0.05831415578722954, "step": 1335, "wo_beta": 15.491255760192871 }, { "dpo_loss": 0.5400987863540649, "epoch": 3.79782711384034, "grad_norm": 12222.682651732252, "learning_rate": 1.6366981314937372e-07, "logits": -1.3011000156402588, "logps": -81.44950866699219, "loss": 26.7414, "objective": 27.633180618286133, "ranking_idealized": 0.6166666746139526, "ranking_idealized_expo": 0.5541666746139526, "ranking_simple": 0.612500011920929, "regularize": 0.06863755732774734, "step": 1340, "wo_beta": 15.513628005981445 }, { "dpo_loss": 0.5207428336143494, "epoch": 3.811998110533774, "grad_norm": 13292.031759115218, "learning_rate": 1.6001749353610815e-07, "logits": -1.2988460063934326, "logps": -81.9979019165039, "loss": 27.5342, "objective": 26.436460494995117, "ranking_idealized": 0.6291666626930237, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.637499988079071, "regularize": 0.060691170394420624, "step": 1345, "wo_beta": 16.72386360168457 }, { "dpo_loss": 0.5372669100761414, "epoch": 3.826169107227208, "grad_norm": 12429.085011694839, "learning_rate": 1.5639860746661338e-07, "logits": -1.3200603723526, "logps": -80.8891830444336, "loss": 27.4636, "objective": 27.883655548095703, "ranking_idealized": 0.5625, "ranking_idealized_expo": 0.4958333373069763, "ranking_simple": 0.5666666626930237, "regularize": 0.06708240509033203, "step": 1350, "wo_beta": 15.541132926940918 }, { "epoch": 3.826169107227208, "eval_dpo_loss": 0.6802567839622498, "eval_logits": -1.2973301410675049, "eval_logps": -88.45857238769531, "eval_loss": 188.3022003173828, "eval_objective": 184.21910095214844, "eval_ranking_idealized": 0.6024844646453857, "eval_ranking_idealized_expo": 0.5232919454574585, "eval_ranking_simple": 0.5320910811424255, "eval_regularize": 0.4178454279899597, "eval_runtime": 484.5227, "eval_samples_per_second": 11.95, "eval_steps_per_second": 0.997, "eval_wo_beta": 15.999577522277832, "step": 1350 }, { "dpo_loss": 0.5401098132133484, "epoch": 3.840340103920642, "grad_norm": 12546.873988889934, "learning_rate": 1.5281351081856976e-07, "logits": -1.3091717958450317, "logps": -81.95738983154297, "loss": 24.53, "objective": 23.978574752807617, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.5208333134651184, "ranking_simple": 0.6000000238418579, "regularize": 0.05746602639555931, "step": 1355, "wo_beta": 15.96954345703125 }, { "dpo_loss": 0.5322627425193787, "epoch": 3.8545111006140766, "grad_norm": 12396.074158573574, "learning_rate": 1.492625561468393e-07, "logits": -1.2270203828811646, "logps": -81.94197082519531, "loss": 27.8079, "objective": 25.823699951171875, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.5958333611488342, "regularize": 0.06090990826487541, "step": 1360, "wo_beta": 15.92143440246582 }, { "dpo_loss": 0.5215187668800354, "epoch": 3.8686820973075107, "grad_norm": 12924.951740893872, "learning_rate": 1.4574609264879632e-07, "logits": -1.2885017395019531, "logps": -81.9835205078125, "loss": 24.6244, "objective": 21.932554244995117, "ranking_idealized": 0.6166666746139526, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.612500011920929, "regularize": 0.05161268636584282, "step": 1365, "wo_beta": 15.276032447814941 }, { "dpo_loss": 0.5322207808494568, "epoch": 3.8828530940009447, "grad_norm": 11760.04729219421, "learning_rate": 1.4226446612998671e-07, "logits": -1.325412631034851, "logps": -82.93399810791016, "loss": 25.2873, "objective": 22.0572566986084, "ranking_idealized": 0.6208333373069763, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.637499988079071, "regularize": 0.05589644983410835, "step": 1370, "wo_beta": 16.43442726135254 }, { "dpo_loss": 0.5177661776542664, "epoch": 3.8970240906943787, "grad_norm": 12668.234366032097, "learning_rate": 1.3881801897012224e-07, "logits": -1.3054790496826172, "logps": -81.97600555419922, "loss": 25.3791, "objective": 25.463533401489258, "ranking_idealized": 0.5708333253860474, "ranking_idealized_expo": 0.48750001192092896, "ranking_simple": 0.5791666507720947, "regularize": 0.06239425763487816, "step": 1375, "wo_beta": 15.740779876708984 }, { "dpo_loss": 0.5269008874893188, "epoch": 3.9111950873878127, "grad_norm": 11613.901925945589, "learning_rate": 1.3540709008941147e-07, "logits": -1.2125933170318604, "logps": -81.08470153808594, "loss": 24.5614, "objective": 27.379404067993164, "ranking_idealized": 0.6416666507720947, "ranking_idealized_expo": 0.5666666626930237, "ranking_simple": 0.6499999761581421, "regularize": 0.06529100984334946, "step": 1380, "wo_beta": 16.324913024902344 }, { "dpo_loss": 0.5204812288284302, "epoch": 3.925366084081247, "grad_norm": 12262.7455062338, "learning_rate": 1.3203201491523024e-07, "logits": -1.1872669458389282, "logps": -82.68800354003906, "loss": 26.2354, "objective": 27.383338928222656, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.48750001192092896, "ranking_simple": 0.5708333253860474, "regularize": 0.06124182417988777, "step": 1385, "wo_beta": 17.184247970581055 }, { "dpo_loss": 0.5293174982070923, "epoch": 3.9395370807746812, "grad_norm": 11656.13294817261, "learning_rate": 1.2869312534913685e-07, "logits": -1.3625025749206543, "logps": -81.69257354736328, "loss": 25.8656, "objective": 27.87486457824707, "ranking_idealized": 0.5375000238418579, "ranking_idealized_expo": 0.4541666805744171, "ranking_simple": 0.5208333134651184, "regularize": 0.06757337599992752, "step": 1390, "wo_beta": 14.843222618103027 }, { "dpo_loss": 0.5323649644851685, "epoch": 3.9537080774681153, "grad_norm": 12688.563452750986, "learning_rate": 1.2539074973423204e-07, "logits": -1.344056487083435, "logps": -82.50756072998047, "loss": 25.269, "objective": 20.71147346496582, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.5, "ranking_simple": 0.574999988079071, "regularize": 0.05190667137503624, "step": 1395, "wo_beta": 15.608321189880371 }, { "dpo_loss": 0.5348060727119446, "epoch": 3.9678790741615493, "grad_norm": 13248.732573569929, "learning_rate": 1.2212521282287093e-07, "logits": -1.2224748134613037, "logps": -80.45255279541016, "loss": 27.3902, "objective": 28.852842330932617, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.5708333253860474, "regularize": 0.0665024146437645, "step": 1400, "wo_beta": 16.69828987121582 }, { "epoch": 3.9678790741615493, "eval_dpo_loss": 0.6798388957977295, "eval_logits": -1.2974461317062378, "eval_logps": -88.3134765625, "eval_loss": 187.96913146972656, "eval_objective": 183.7815704345703, "eval_ranking_idealized": 0.6024844646453857, "eval_ranking_idealized_expo": 0.5232919454574585, "eval_ranking_simple": 0.5320910811424255, "eval_regularize": 0.4168493151664734, "eval_runtime": 519.2809, "eval_samples_per_second": 11.15, "eval_steps_per_second": 0.93, "eval_wo_beta": 15.978778839111328, "step": 1400 }, { "dpo_loss": 0.5318723320960999, "epoch": 3.9820500708549833, "grad_norm": 12626.278495743487, "learning_rate": 1.1889683574472692e-07, "logits": -1.2031117677688599, "logps": -81.43195343017578, "loss": 25.6619, "objective": 22.53989028930664, "ranking_idealized": 0.5958333611488342, "ranking_idealized_expo": 0.4958333373069763, "ranking_simple": 0.5874999761581421, "regularize": 0.05257093533873558, "step": 1405, "wo_beta": 14.43735122680664 }, { "dpo_loss": 0.5325983762741089, "epoch": 3.9962210675484178, "grad_norm": 12962.865030589033, "learning_rate": 1.15705935975212e-07, "logits": -1.2109463214874268, "logps": -80.95507049560547, "loss": 25.0327, "objective": 27.48863410949707, "ranking_idealized": 0.6541666388511658, "ranking_idealized_expo": 0.5583333373069763, "ranking_simple": 0.637499988079071, "regularize": 0.06891029328107834, "step": 1410, "wo_beta": 15.6097993850708 }, { "dpo_loss": 0.5317092537879944, "epoch": 4.010392064241851, "grad_norm": 12833.61434685088, "learning_rate": 1.1255282730425708e-07, "logits": -1.2491552829742432, "logps": -81.32047271728516, "loss": 22.2145, "objective": 24.41758155822754, "ranking_idealized": 0.5874999761581421, "ranking_idealized_expo": 0.4833333194255829, "ranking_simple": 0.5874999761581421, "regularize": 0.06543368101119995, "step": 1415, "wo_beta": 15.283975601196289 }, { "dpo_loss": 0.5239009261131287, "epoch": 4.024563060935286, "grad_norm": 13451.327899072105, "learning_rate": 1.094378198054533e-07, "logits": -1.353010654449463, "logps": -83.2571792602539, "loss": 23.0966, "objective": 24.90163230895996, "ranking_idealized": 0.5291666388511658, "ranking_idealized_expo": 0.4416666626930237, "ranking_simple": 0.5166666507720947, "regularize": 0.0562543049454689, "step": 1420, "wo_beta": 16.40116310119629 }, { "dpo_loss": 0.53034508228302, "epoch": 4.03873405762872, "grad_norm": 13582.157317581643, "learning_rate": 1.063612198055604e-07, "logits": -1.2672284841537476, "logps": -82.41036987304688, "loss": 19.725, "objective": 18.898433685302734, "ranking_idealized": 0.5666666626930237, "ranking_idealized_expo": 0.5041666626930237, "ranking_simple": 0.5583333373069763, "regularize": 0.04213841259479523, "step": 1425, "wo_beta": 17.573118209838867 }, { "dpo_loss": 0.5290653109550476, "epoch": 4.052905054322154, "grad_norm": 12471.786390228664, "learning_rate": 1.0332332985438247e-07, "logits": -1.2409167289733887, "logps": -82.05091094970703, "loss": 21.8465, "objective": 20.57358741760254, "ranking_idealized": 0.6166666746139526, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.6083333492279053, "regularize": 0.05022308602929115, "step": 1430, "wo_beta": 17.054475784301758 }, { "dpo_loss": 0.5352352261543274, "epoch": 4.067076051015588, "grad_norm": 12729.012234556472, "learning_rate": 1.0032444869501577e-07, "logits": -1.1344469785690308, "logps": -84.53145599365234, "loss": 23.6283, "objective": 21.45845603942871, "ranking_idealized": 0.5375000238418579, "ranking_idealized_expo": 0.4749999940395355, "ranking_simple": 0.550000011920929, "regularize": 0.046408891677856445, "step": 1435, "wo_beta": 17.1253719329834 }, { "dpo_loss": 0.5163091421127319, "epoch": 4.081247047709022, "grad_norm": 12403.62054840324, "learning_rate": 9.736487123447068e-08, "logits": -1.3162797689437866, "logps": -83.0071792602539, "loss": 18.4912, "objective": 19.839466094970703, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.5916666388511658, "regularize": 0.04717810079455376, "step": 1440, "wo_beta": 16.756040573120117 }, { "dpo_loss": 0.5394971966743469, "epoch": 4.095418044402456, "grad_norm": 13017.070767832263, "learning_rate": 9.444488851467041e-08, "logits": -1.2141478061676025, "logps": -81.8912582397461, "loss": 22.8616, "objective": 24.104333877563477, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.5291666388511658, "ranking_simple": 0.5916666388511658, "regularize": 0.05671803280711174, "step": 1445, "wo_beta": 15.497802734375 }, { "dpo_loss": 0.5386175513267517, "epoch": 4.109589041095891, "grad_norm": 12319.490850536135, "learning_rate": 9.156478768383058e-08, "logits": -1.2780787944793701, "logps": -82.44509887695312, "loss": 21.2906, "objective": 22.363698959350586, "ranking_idealized": 0.5625, "ranking_idealized_expo": 0.5083333253860474, "ranking_simple": 0.5666666626930237, "regularize": 0.05074004456400871, "step": 1450, "wo_beta": 16.318647384643555 }, { "epoch": 4.109589041095891, "eval_dpo_loss": 0.6796455383300781, "eval_logits": -1.2975972890853882, "eval_logps": -88.12124633789062, "eval_loss": 187.89852905273438, "eval_objective": 183.65463256835938, "eval_ranking_idealized": 0.6024844646453857, "eval_ranking_idealized_expo": 0.5232919454574585, "eval_ranking_simple": 0.5320910811424255, "eval_regularize": 0.4164124131202698, "eval_runtime": 516.4821, "eval_samples_per_second": 11.21, "eval_steps_per_second": 0.935, "eval_wo_beta": 15.985260009765625, "step": 1450 }, { "dpo_loss": 0.5313987135887146, "epoch": 4.123760037789324, "grad_norm": 12478.853769070673, "learning_rate": 8.872485196822122e-08, "logits": -1.2814396619796753, "logps": -81.72008514404297, "loss": 22.8821, "objective": 23.81187629699707, "ranking_idealized": 0.6416666507720947, "ranking_idealized_expo": 0.5916666388511658, "ranking_simple": 0.6458333134651184, "regularize": 0.05551544576883316, "step": 1455, "wo_beta": 18.80474090576172 }, { "dpo_loss": 0.5285670161247253, "epoch": 4.137931034482759, "grad_norm": 13016.324616810654, "learning_rate": 8.592536064431466e-08, "logits": -1.3169968128204346, "logps": -82.27637481689453, "loss": 21.0762, "objective": 22.214412689208984, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.5708333253860474, "regularize": 0.05284254625439644, "step": 1460, "wo_beta": 16.45089340209961 }, { "dpo_loss": 0.5275595784187317, "epoch": 4.1521020311761925, "grad_norm": 12828.215315021795, "learning_rate": 8.316658901132163e-08, "logits": -1.2044638395309448, "logps": -83.09059143066406, "loss": 20.1235, "objective": 19.89800453186035, "ranking_idealized": 0.6166666746139526, "ranking_idealized_expo": 0.5541666746139526, "ranking_simple": 0.6208333373069763, "regularize": 0.04856906086206436, "step": 1465, "wo_beta": 16.143047332763672 }, { "dpo_loss": 0.5317350029945374, "epoch": 4.166273027869627, "grad_norm": 13452.677353962536, "learning_rate": 8.044880836411888e-08, "logits": -1.312625527381897, "logps": -80.955810546875, "loss": 18.8621, "objective": 22.22332000732422, "ranking_idealized": 0.5583333373069763, "ranking_idealized_expo": 0.4749999940395355, "ranking_simple": 0.5583333373069763, "regularize": 0.05493269860744476, "step": 1470, "wo_beta": 14.867803573608398 }, { "dpo_loss": 0.5068629384040833, "epoch": 4.1804440245630605, "grad_norm": 12445.31776981503, "learning_rate": 7.777228596656993e-08, "logits": -1.2618132829666138, "logps": -83.48854064941406, "loss": 18.8691, "objective": 17.614728927612305, "ranking_idealized": 0.6166666746139526, "ranking_idealized_expo": 0.5541666746139526, "ranking_simple": 0.612500011920929, "regularize": 0.04399799555540085, "step": 1475, "wo_beta": 17.06732940673828 }, { "dpo_loss": 0.5202235579490662, "epoch": 4.194615021256495, "grad_norm": 12224.02993997593, "learning_rate": 7.513728502524286e-08, "logits": -1.1893463134765625, "logps": -81.5462417602539, "loss": 19.5471, "objective": 21.709897994995117, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.5083333253860474, "ranking_simple": 0.5916666388511658, "regularize": 0.05476529151201248, "step": 1480, "wo_beta": 16.902223587036133 }, { "dpo_loss": 0.528392493724823, "epoch": 4.2087860179499295, "grad_norm": 12678.153549499324, "learning_rate": 7.25440646635268e-08, "logits": -1.3054612874984741, "logps": -80.2231674194336, "loss": 19.6042, "objective": 19.114337921142578, "ranking_idealized": 0.5833333134651184, "ranking_idealized_expo": 0.5208333134651184, "ranking_simple": 0.574999988079071, "regularize": 0.04563932120800018, "step": 1485, "wo_beta": 16.017080307006836 }, { "dpo_loss": 0.5332812070846558, "epoch": 4.222957014643363, "grad_norm": 12851.091233563351, "learning_rate": 6.999287989614971e-08, "logits": -1.368248462677002, "logps": -81.43551635742188, "loss": 19.3664, "objective": 18.39341926574707, "ranking_idealized": 0.5583333373069763, "ranking_idealized_expo": 0.47083333134651184, "ranking_simple": 0.5625, "regularize": 0.0449262373149395, "step": 1490, "wo_beta": 14.998396873474121 }, { "dpo_loss": 0.5162668824195862, "epoch": 4.2371280113367975, "grad_norm": 13439.750358421123, "learning_rate": 6.74839816041013e-08, "logits": -1.3570283651351929, "logps": -81.74089050292969, "loss": 16.8521, "objective": 18.91334342956543, "ranking_idealized": 0.5958333611488342, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.5958333611488342, "regularize": 0.04384367913007736, "step": 1495, "wo_beta": 18.151466369628906 }, { "dpo_loss": 0.5271181464195251, "epoch": 4.251299008030231, "grad_norm": 12462.836104102607, "learning_rate": 6.501761650996052e-08, "logits": -1.3143360614776611, "logps": -83.34208679199219, "loss": 19.8787, "objective": 20.79971694946289, "ranking_idealized": 0.6083333492279053, "ranking_idealized_expo": 0.5083333253860474, "ranking_simple": 0.5958333611488342, "regularize": 0.04957110807299614, "step": 1500, "wo_beta": 16.131967544555664 }, { "epoch": 4.251299008030231, "eval_dpo_loss": 0.679940402507782, "eval_logits": -1.2942196130752563, "eval_logps": -88.3078384399414, "eval_loss": 188.08248901367188, "eval_objective": 183.8683624267578, "eval_ranking_idealized": 0.6024844646453857, "eval_ranking_idealized_expo": 0.5232919454574585, "eval_ranking_simple": 0.5320910811424255, "eval_regularize": 0.4168849587440491, "eval_runtime": 525.9589, "eval_samples_per_second": 11.008, "eval_steps_per_second": 0.918, "eval_wo_beta": 15.983942031860352, "step": 1500 }, { "dpo_loss": 0.5348100066184998, "epoch": 4.2654700047236656, "grad_norm": 12354.445167507907, "learning_rate": 6.259402715363394e-08, "logits": -1.3128606081008911, "logps": -83.40116119384766, "loss": 18.971, "objective": 17.431968688964844, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.4958333373069763, "ranking_simple": 0.5666666626930237, "regularize": 0.0443451851606369, "step": 1505, "wo_beta": 15.766800880432129 }, { "dpo_loss": 0.5173017382621765, "epoch": 4.2796410014171, "grad_norm": 13102.659789592512, "learning_rate": 6.021345186850418e-08, "logits": -1.2090104818344116, "logps": -81.23714447021484, "loss": 21.191, "objective": 21.426023483276367, "ranking_idealized": 0.625, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.6166666746139526, "regularize": 0.04612095281481743, "step": 1510, "wo_beta": 15.78390121459961 }, { "dpo_loss": 0.5307953357696533, "epoch": 4.293811998110534, "grad_norm": 12945.706552780925, "learning_rate": 5.787612475799269e-08, "logits": -1.367775559425354, "logps": -82.44042205810547, "loss": 16.9107, "objective": 16.15281867980957, "ranking_idealized": 0.625, "ranking_idealized_expo": 0.5208333134651184, "ranking_simple": 0.6291666626930237, "regularize": 0.03850070759654045, "step": 1515, "wo_beta": 16.397567749023438 }, { "dpo_loss": 0.5217214226722717, "epoch": 4.307982994803968, "grad_norm": 12094.58497098056, "learning_rate": 5.5582275672538316e-08, "logits": -1.2217297554016113, "logps": -81.85955047607422, "loss": 18.4539, "objective": 19.449350357055664, "ranking_idealized": 0.6791666746139526, "ranking_idealized_expo": 0.6041666865348816, "ranking_simple": 0.6791666746139526, "regularize": 0.04723352938890457, "step": 1520, "wo_beta": 16.819021224975586 }, { "dpo_loss": 0.5168942809104919, "epoch": 4.322153991497402, "grad_norm": 12423.169223430634, "learning_rate": 5.333213018699356e-08, "logits": -1.2731564044952393, "logps": -81.88040161132812, "loss": 21.872, "objective": 21.83941078186035, "ranking_idealized": 0.6166666746139526, "ranking_idealized_expo": 0.5458333492279053, "ranking_simple": 0.6291666626930237, "regularize": 0.054684512317180634, "step": 1525, "wo_beta": 14.882065773010254 }, { "dpo_loss": 0.5241533517837524, "epoch": 4.336324988190836, "grad_norm": 13878.266489791004, "learning_rate": 5.112590957844232e-08, "logits": -1.3176230192184448, "logps": -83.9821548461914, "loss": 20.6818, "objective": 16.49356460571289, "ranking_idealized": 0.625, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.612500011920929, "regularize": 0.04093782603740692, "step": 1530, "wo_beta": 16.301631927490234 }, { "dpo_loss": 0.5228941440582275, "epoch": 4.350495984884271, "grad_norm": 12969.925803784026, "learning_rate": 4.896383080443933e-08, "logits": -1.216440200805664, "logps": -82.53515625, "loss": 18.6444, "objective": 18.548452377319336, "ranking_idealized": 0.5708333253860474, "ranking_idealized_expo": 0.4625000059604645, "ranking_simple": 0.5791666507720947, "regularize": 0.047610316425561905, "step": 1535, "wo_beta": 15.176318168640137 }, { "dpo_loss": 0.5215330123901367, "epoch": 4.364666981577704, "grad_norm": 12724.232692363212, "learning_rate": 4.684610648167503e-08, "logits": -1.3027079105377197, "logps": -81.91221618652344, "loss": 21.6498, "objective": 21.20092010498047, "ranking_idealized": 0.5874999761581421, "ranking_idealized_expo": 0.5416666865348816, "ranking_simple": 0.5833333134651184, "regularize": 0.05302129685878754, "step": 1540, "wo_beta": 16.298704147338867 }, { "dpo_loss": 0.5356315970420837, "epoch": 4.378837978271139, "grad_norm": 12224.725778808395, "learning_rate": 4.4772944865067055e-08, "logits": -1.3303568363189697, "logps": -83.5517578125, "loss": 17.8525, "objective": 20.373811721801758, "ranking_idealized": 0.6333333253860474, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.6166666746139526, "regularize": 0.050720926374197006, "step": 1545, "wo_beta": 15.988405227661133 }, { "dpo_loss": 0.5247156023979187, "epoch": 4.393008974964572, "grad_norm": 12442.357612605178, "learning_rate": 4.274454982728032e-08, "logits": -1.246690034866333, "logps": -81.54380798339844, "loss": 18.4741, "objective": 19.52410316467285, "ranking_idealized": 0.5833333134651184, "ranking_idealized_expo": 0.4958333373069763, "ranking_simple": 0.5833333134651184, "regularize": 0.049736883491277695, "step": 1550, "wo_beta": 17.08685874938965 }, { "epoch": 4.393008974964572, "eval_dpo_loss": 0.6802076697349548, "eval_logits": -1.2950727939605713, "eval_logps": -88.48546600341797, "eval_loss": 188.04074096679688, "eval_objective": 184.0446319580078, "eval_ranking_idealized": 0.6024844646453857, "eval_ranking_idealized_expo": 0.5232919454574585, "eval_ranking_simple": 0.532608687877655, "eval_regularize": 0.4172796308994293, "eval_runtime": 533.9808, "eval_samples_per_second": 10.843, "eval_steps_per_second": 0.905, "eval_wo_beta": 15.994985580444336, "step": 1550 }, { "dpo_loss": 0.5181335210800171, "epoch": 4.407179971658007, "grad_norm": 13002.101456533634, "learning_rate": 4.0761120838678776e-08, "logits": -1.3068591356277466, "logps": -81.81246185302734, "loss": 16.5342, "objective": 14.914339065551758, "ranking_idealized": 0.5708333253860474, "ranking_idealized_expo": 0.4833333194255829, "ranking_simple": 0.5708333253860474, "regularize": 0.0403703935444355, "step": 1555, "wo_beta": 17.326810836791992 }, { "dpo_loss": 0.5418220162391663, "epoch": 4.42135096835144, "grad_norm": 11889.810698222469, "learning_rate": 3.882285294770937e-08, "logits": -1.2680351734161377, "logps": -80.56555938720703, "loss": 16.764, "objective": 17.03957176208496, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.5291666388511658, "ranking_simple": 0.5916666388511658, "regularize": 0.04209714010357857, "step": 1560, "wo_beta": 14.395126342773438 }, { "dpo_loss": 0.5355924963951111, "epoch": 4.435521965044875, "grad_norm": 12185.339277571, "learning_rate": 3.6929936761721403e-08, "logits": -1.2988630533218384, "logps": -80.5867919921875, "loss": 21.4187, "objective": 21.873271942138672, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.5791666507720947, "regularize": 0.05235178396105766, "step": 1565, "wo_beta": 14.826796531677246 }, { "dpo_loss": 0.5378596782684326, "epoch": 4.449692961738309, "grad_norm": 11114.71452911412, "learning_rate": 3.508255842822255e-08, "logits": -1.3118114471435547, "logps": -81.77924346923828, "loss": 18.6149, "objective": 20.33370590209961, "ranking_idealized": 0.612500011920929, "ranking_idealized_expo": 0.5083333253860474, "ranking_simple": 0.5958333611488342, "regularize": 0.05320237576961517, "step": 1570, "wo_beta": 15.688643455505371 }, { "dpo_loss": 0.5132429599761963, "epoch": 4.463863958431743, "grad_norm": 12945.538981188476, "learning_rate": 3.3280899616572656e-08, "logits": -1.3532111644744873, "logps": -84.82633209228516, "loss": 17.216, "objective": 17.143177032470703, "ranking_idealized": 0.5833333134651184, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.574999988079071, "regularize": 0.042684536427259445, "step": 1575, "wo_beta": 17.00408935546875 }, { "dpo_loss": 0.5311785340309143, "epoch": 4.478034955125177, "grad_norm": 13235.594805356337, "learning_rate": 3.15251375001192e-08, "logits": -1.2649219036102295, "logps": -82.44920349121094, "loss": 17.9899, "objective": 17.875553131103516, "ranking_idealized": 0.5708333253860474, "ranking_idealized_expo": 0.5208333134651184, "ranking_simple": 0.5708333253860474, "regularize": 0.043413810431957245, "step": 1580, "wo_beta": 17.040142059326172 }, { "dpo_loss": 0.5295437574386597, "epoch": 4.492205951818612, "grad_norm": 13164.392376509253, "learning_rate": 2.98154447387739e-08, "logits": -1.318244457244873, "logps": -81.6868667602539, "loss": 18.7186, "objective": 14.95705509185791, "ranking_idealized": 0.6041666865348816, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.5958333611488342, "regularize": 0.03320387750864029, "step": 1585, "wo_beta": 17.157299041748047 }, { "dpo_loss": 0.527228593826294, "epoch": 4.506376948512045, "grad_norm": 12124.025371614676, "learning_rate": 2.8151989462033787e-08, "logits": -1.1829341650009155, "logps": -83.83565521240234, "loss": 18.9673, "objective": 16.773042678833008, "ranking_idealized": 0.5791666507720947, "ranking_idealized_expo": 0.5083333253860474, "ranking_simple": 0.5833333134651184, "regularize": 0.042777713388204575, "step": 1590, "wo_beta": 16.952783584594727 }, { "dpo_loss": 0.5242041349411011, "epoch": 4.52054794520548, "grad_norm": 11927.935212297323, "learning_rate": 2.653493525244721e-08, "logits": -1.2492893934249878, "logps": -82.36843872070312, "loss": 17.1521, "objective": 18.047021865844727, "ranking_idealized": 0.6208333373069763, "ranking_idealized_expo": 0.5, "ranking_simple": 0.6041666865348816, "regularize": 0.04608127102255821, "step": 1595, "wo_beta": 15.581862449645996 }, { "dpo_loss": 0.5243973135948181, "epoch": 4.534718941898913, "grad_norm": 12379.266840127142, "learning_rate": 2.4964441129527335e-08, "logits": -1.2830615043640137, "logps": -82.28716278076172, "loss": 20.4794, "objective": 17.599641799926758, "ranking_idealized": 0.6333333253860474, "ranking_idealized_expo": 0.5416666865348816, "ranking_simple": 0.6333333253860474, "regularize": 0.042389459908008575, "step": 1600, "wo_beta": 16.58247184753418 }, { "epoch": 4.534718941898913, "eval_dpo_loss": 0.6798632740974426, "eval_logits": -1.2950247526168823, "eval_logps": -88.43807983398438, "eval_loss": 187.9060821533203, "eval_objective": 183.82763671875, "eval_ranking_idealized": 0.6024844646453857, "eval_ranking_idealized_expo": 0.5232919454574585, "eval_ranking_simple": 0.5331262946128845, "eval_regularize": 0.4167550504207611, "eval_runtime": 510.5256, "eval_samples_per_second": 11.341, "eval_steps_per_second": 0.946, "eval_wo_beta": 16.000411987304688, "step": 1600 }, { "dpo_loss": 0.5349418520927429, "epoch": 4.548889938592348, "grad_norm": 13428.292487446544, "learning_rate": 2.3440661534114557e-08, "logits": -1.2768018245697021, "logps": -83.37641906738281, "loss": 17.8123, "objective": 14.984145164489746, "ranking_idealized": 0.5541666746139526, "ranking_idealized_expo": 0.4749999940395355, "ranking_simple": 0.550000011920929, "regularize": 0.0358855277299881, "step": 1605, "wo_beta": 16.76499366760254 }, { "dpo_loss": 0.5164486169815063, "epoch": 4.563060935285781, "grad_norm": 12892.913912379732, "learning_rate": 2.1963746313188757e-08, "logits": -1.249220371246338, "logps": -81.78076171875, "loss": 17.1832, "objective": 20.233509063720703, "ranking_idealized": 0.5833333134651184, "ranking_idealized_expo": 0.47083333134651184, "ranking_simple": 0.574999988079071, "regularize": 0.048116158694028854, "step": 1610, "wo_beta": 15.82449722290039 }, { "dpo_loss": 0.5349178314208984, "epoch": 4.577231931979216, "grad_norm": 12493.396334435913, "learning_rate": 2.053384070513353e-08, "logits": -1.2513455152511597, "logps": -80.9568862915039, "loss": 18.7751, "objective": 20.071449279785156, "ranking_idealized": 0.5291666388511658, "ranking_idealized_expo": 0.46666666865348816, "ranking_simple": 0.5249999761581421, "regularize": 0.04651705548167229, "step": 1615, "wo_beta": 14.514166831970215 }, { "dpo_loss": 0.5360397100448608, "epoch": 4.59140292867265, "grad_norm": 12311.497249141552, "learning_rate": 1.915108532545351e-08, "logits": -1.3831831216812134, "logps": -81.701904296875, "loss": 16.5863, "objective": 13.440372467041016, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.5083333253860474, "ranking_simple": 0.5791666507720947, "regularize": 0.03207644820213318, "step": 1620, "wo_beta": 16.37172508239746 }, { "dpo_loss": 0.5228015780448914, "epoch": 4.605573925366084, "grad_norm": 12520.657843831757, "learning_rate": 1.781561615294652e-08, "logits": -1.3208075761795044, "logps": -82.14677429199219, "loss": 17.2643, "objective": 16.142719268798828, "ranking_idealized": 0.6791666746139526, "ranking_idealized_expo": 0.5916666388511658, "ranking_simple": 0.6875, "regularize": 0.03792344033718109, "step": 1625, "wo_beta": 15.518718719482422 }, { "dpo_loss": 0.5221564173698425, "epoch": 4.619744922059518, "grad_norm": 11926.649260036038, "learning_rate": 1.6527564516331638e-08, "logits": -1.1876474618911743, "logps": -82.74609375, "loss": 17.5255, "objective": 16.14875602722168, "ranking_idealized": 0.6708333492279053, "ranking_idealized_expo": 0.5874999761581421, "ranking_simple": 0.6708333492279053, "regularize": 0.039545025676488876, "step": 1630, "wo_beta": 17.103187561035156 }, { "dpo_loss": 0.5277553796768188, "epoch": 4.633915918752952, "grad_norm": 12387.92239266219, "learning_rate": 1.5287057081333988e-08, "logits": -1.303261399269104, "logps": -82.68264770507812, "loss": 17.5837, "objective": 18.295978546142578, "ranking_idealized": 0.625, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.6291666626930237, "regularize": 0.04383000358939171, "step": 1635, "wo_beta": 16.273590087890625 }, { "dpo_loss": 0.5235089063644409, "epoch": 4.648086915446386, "grad_norm": 13550.591286437839, "learning_rate": 1.4094215838229172e-08, "logits": -1.3104770183563232, "logps": -81.95443725585938, "loss": 16.0714, "objective": 18.62168312072754, "ranking_idealized": 0.5874999761581421, "ranking_idealized_expo": 0.5041666626930237, "ranking_simple": 0.5958333611488342, "regularize": 0.044566281139850616, "step": 1640, "wo_beta": 14.541909217834473 }, { "dpo_loss": 0.5459772944450378, "epoch": 4.662257912139821, "grad_norm": 12589.25993273719, "learning_rate": 1.2949158089846368e-08, "logits": -1.2789607048034668, "logps": -80.86375427246094, "loss": 15.9698, "objective": 15.747620582580566, "ranking_idealized": 0.5708333253860474, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.5708333253860474, "regularize": 0.03958037868142128, "step": 1645, "wo_beta": 16.792747497558594 }, { "dpo_loss": 0.5279684066772461, "epoch": 4.6764289088332545, "grad_norm": 11986.458011152894, "learning_rate": 1.1851996440033318e-08, "logits": -1.224802017211914, "logps": -81.75625610351562, "loss": 17.2115, "objective": 18.047420501708984, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.5416666865348816, "ranking_simple": 0.5958333611488342, "regularize": 0.04608708992600441, "step": 1650, "wo_beta": 17.34733772277832 }, { "epoch": 4.6764289088332545, "eval_dpo_loss": 0.6798492074012756, "eval_logits": -1.293831467628479, "eval_logps": -88.41741943359375, "eval_loss": 187.95040893554688, "eval_objective": 183.85658264160156, "eval_ranking_idealized": 0.6024844646453857, "eval_ranking_idealized_expo": 0.5232919454574585, "eval_ranking_simple": 0.532608687877655, "eval_regularize": 0.416820764541626, "eval_runtime": 510.4095, "eval_samples_per_second": 11.344, "eval_steps_per_second": 0.946, "eval_wo_beta": 15.994239807128906, "step": 1650 }, { "dpo_loss": 0.5183621048927307, "epoch": 4.690599905526689, "grad_norm": 12001.298881228338, "learning_rate": 1.0802838782582535e-08, "logits": -1.2560440301895142, "logps": -81.986083984375, "loss": 18.141, "objective": 16.23440170288086, "ranking_idealized": 0.5958333611488342, "ranking_idealized_expo": 0.5291666388511658, "ranking_simple": 0.6041666865348816, "regularize": 0.0418228842318058, "step": 1655, "wo_beta": 14.709871292114258 }, { "dpo_loss": 0.5308786034584045, "epoch": 4.7047709022201225, "grad_norm": 12471.919482995943, "learning_rate": 9.801788290621505e-09, "logits": -1.242910623550415, "logps": -82.37290954589844, "loss": 19.8764, "objective": 21.41328239440918, "ranking_idealized": 0.6083333492279053, "ranking_idealized_expo": 0.5208333134651184, "ranking_simple": 0.6083333492279053, "regularize": 0.04805602878332138, "step": 1660, "wo_beta": 16.161657333374023 }, { "dpo_loss": 0.517335832118988, "epoch": 4.718941898913557, "grad_norm": 12326.624130987268, "learning_rate": 8.848943406466468e-09, "logits": -1.2066967487335205, "logps": -81.63778686523438, "loss": 17.9054, "objective": 18.123321533203125, "ranking_idealized": 0.550000011920929, "ranking_idealized_expo": 0.47083333134651184, "ranking_simple": 0.550000011920929, "regularize": 0.04272852838039398, "step": 1665, "wo_beta": 15.821066856384277 }, { "dpo_loss": 0.534516453742981, "epoch": 4.733112895606991, "grad_norm": 13120.765521158273, "learning_rate": 7.944397831941951e-09, "logits": -1.3101601600646973, "logps": -83.31844329833984, "loss": 15.3296, "objective": 14.363126754760742, "ranking_idealized": 0.5625, "ranking_idealized_expo": 0.47083333134651184, "ranking_simple": 0.5541666746139526, "regularize": 0.0361357256770134, "step": 1670, "wo_beta": 15.148748397827148 }, { "dpo_loss": 0.5110668540000916, "epoch": 4.747283892300425, "grad_norm": 12106.475879366208, "learning_rate": 7.088240519165955e-09, "logits": -1.2715505361557007, "logps": -83.65233612060547, "loss": 18.7232, "objective": 22.049705505371094, "ranking_idealized": 0.5708333253860474, "ranking_idealized_expo": 0.4791666567325592, "ranking_simple": 0.5583333373069763, "regularize": 0.04454280436038971, "step": 1675, "wo_beta": 16.55459213256836 }, { "dpo_loss": 0.5210347771644592, "epoch": 4.7614548889938595, "grad_norm": 13458.285236730762, "learning_rate": 6.280555661802856e-09, "logits": -1.2422146797180176, "logps": -82.28036499023438, "loss": 16.7571, "objective": 16.147016525268555, "ranking_idealized": 0.637499988079071, "ranking_idealized_expo": 0.5833333134651184, "ranking_simple": 0.637499988079071, "regularize": 0.03472811356186867, "step": 1680, "wo_beta": 17.648740768432617 }, { "dpo_loss": 0.5201699733734131, "epoch": 4.775625885687293, "grad_norm": 13687.817133347355, "learning_rate": 5.521422686783294e-09, "logits": -1.308603286743164, "logps": -82.1572265625, "loss": 17.374, "objective": 18.0618839263916, "ranking_idealized": 0.5791666507720947, "ranking_idealized_expo": 0.4583333432674408, "ranking_simple": 0.5625, "regularize": 0.039491456001996994, "step": 1685, "wo_beta": 14.411401748657227 }, { "dpo_loss": 0.5319506525993347, "epoch": 4.7897968823807275, "grad_norm": 11765.631080020812, "learning_rate": 4.810916246494157e-09, "logits": -1.3420146703720093, "logps": -81.82181549072266, "loss": 16.2518, "objective": 15.689167976379395, "ranking_idealized": 0.6291666626930237, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.612500011920929, "regularize": 0.03873560577630997, "step": 1690, "wo_beta": 15.535360336303711 }, { "dpo_loss": 0.5226943492889404, "epoch": 4.803967879074161, "grad_norm": 15212.188996211064, "learning_rate": 4.149106211436659e-09, "logits": -1.205290675163269, "logps": -81.14673614501953, "loss": 15.6316, "objective": 14.224554061889648, "ranking_idealized": 0.6333333253860474, "ranking_idealized_expo": 0.5708333253860474, "ranking_simple": 0.6333333253860474, "regularize": 0.033357344567775726, "step": 1695, "wo_beta": 17.657291412353516 }, { "dpo_loss": 0.5265616178512573, "epoch": 4.818138875767596, "grad_norm": 14041.074803893325, "learning_rate": 3.5360576633558513e-09, "logits": -1.3079345226287842, "logps": -80.5920639038086, "loss": 16.5799, "objective": 17.133312225341797, "ranking_idealized": 0.625, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.6208333373069763, "regularize": 0.042179401963949203, "step": 1700, "wo_beta": 14.612165451049805 }, { "epoch": 4.818138875767596, "eval_dpo_loss": 0.6798617839813232, "eval_logits": -1.2946054935455322, "eval_logps": -88.42201232910156, "eval_loss": 187.93597412109375, "eval_objective": 183.8405303955078, "eval_ranking_idealized": 0.6024844646453857, "eval_ranking_idealized_expo": 0.5232919454574585, "eval_ranking_simple": 0.532608687877655, "eval_regularize": 0.4168170392513275, "eval_runtime": 537.7382, "eval_samples_per_second": 10.767, "eval_steps_per_second": 0.898, "eval_wo_beta": 15.996342658996582, "step": 1700 }, { "dpo_loss": 0.5323117971420288, "epoch": 4.83230987246103, "grad_norm": 12547.78573915316, "learning_rate": 2.9718308888401767e-09, "logits": -1.3183315992355347, "logps": -81.7763442993164, "loss": 16.0513, "objective": 17.510692596435547, "ranking_idealized": 0.6791666746139526, "ranking_idealized_expo": 0.5833333134651184, "ranking_simple": 0.6708333492279053, "regularize": 0.040996309369802475, "step": 1705, "wo_beta": 17.88062858581543 }, { "dpo_loss": 0.5359232425689697, "epoch": 4.846480869154464, "grad_norm": 13302.316035438349, "learning_rate": 2.4564813733932155e-09, "logits": -1.316437840461731, "logps": -81.5803451538086, "loss": 17.058, "objective": 15.084990501403809, "ranking_idealized": 0.5916666388511658, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5874999761581421, "regularize": 0.0330289825797081, "step": 1710, "wo_beta": 14.95897102355957 }, { "dpo_loss": 0.5298423171043396, "epoch": 4.860651865847898, "grad_norm": 13131.732232168924, "learning_rate": 1.9900597959770505e-09, "logits": -1.2239762544631958, "logps": -80.93972778320312, "loss": 15.5353, "objective": 14.398134231567383, "ranking_idealized": 0.5874999761581421, "ranking_idealized_expo": 0.5208333134651184, "ranking_simple": 0.5833333134651184, "regularize": 0.03392880782485008, "step": 1715, "wo_beta": 16.470539093017578 }, { "dpo_loss": 0.5080859065055847, "epoch": 4.874822862541333, "grad_norm": 13218.33236233331, "learning_rate": 1.5726120240288631e-09, "logits": -1.2538625001907349, "logps": -80.96495819091797, "loss": 16.0016, "objective": 18.5091552734375, "ranking_idealized": 0.612500011920929, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5874999761581421, "regularize": 0.045930005609989166, "step": 1720, "wo_beta": 17.185333251953125 }, { "dpo_loss": 0.5118470788002014, "epoch": 4.888993859234766, "grad_norm": 12268.487941087904, "learning_rate": 1.2041791089499875e-09, "logits": -1.279910683631897, "logps": -79.85582733154297, "loss": 13.4289, "objective": 14.366524696350098, "ranking_idealized": 0.6416666507720947, "ranking_idealized_expo": 0.5708333253860474, "ranking_simple": 0.6416666507720947, "regularize": 0.03632321581244469, "step": 1725, "wo_beta": 17.114274978637695 }, { "dpo_loss": 0.5286470055580139, "epoch": 4.903164855928201, "grad_norm": 11950.336190164535, "learning_rate": 8.847972820693051e-10, "logits": -1.2914131879806519, "logps": -80.19400787353516, "loss": 16.9458, "objective": 18.679357528686523, "ranking_idealized": 0.5416666865348816, "ranking_idealized_expo": 0.4416666626930237, "ranking_simple": 0.5249999761581421, "regularize": 0.04655119404196739, "step": 1730, "wo_beta": 14.276873588562012 }, { "dpo_loss": 0.528618574142456, "epoch": 4.917335852621634, "grad_norm": 12737.302460928488, "learning_rate": 6.144979510802062e-10, "logits": -1.4132698774337769, "logps": -82.34892272949219, "loss": 18.3815, "objective": 18.776357650756836, "ranking_idealized": 0.6458333134651184, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.637499988079071, "regularize": 0.047753263264894485, "step": 1735, "wo_beta": 15.833959579467773 }, { "dpo_loss": 0.5292457938194275, "epoch": 4.931506849315069, "grad_norm": 13241.609695831672, "learning_rate": 3.933076969516724e-10, "logits": -1.2396830320358276, "logps": -81.28510284423828, "loss": 15.2755, "objective": 15.8608980178833, "ranking_idealized": 0.5874999761581421, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.5916666388511658, "regularize": 0.041682373732328415, "step": 1740, "wo_beta": 15.47945499420166 }, { "dpo_loss": 0.5308272838592529, "epoch": 4.945677846008502, "grad_norm": 12128.166835896209, "learning_rate": 2.212482713149222e-10, "logits": -1.2960669994354248, "logps": -80.84746551513672, "loss": 15.3037, "objective": 12.663678169250488, "ranking_idealized": 0.6083333492279053, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.6041666865348816, "regularize": 0.03369910642504692, "step": 1745, "wo_beta": 16.19184112548828 }, { "dpo_loss": 0.5277208089828491, "epoch": 4.959848842701937, "grad_norm": 12921.297125323947, "learning_rate": 9.833659432367803e-11, "logits": -1.2565745115280151, "logps": -82.744873046875, "loss": 16.689, "objective": 16.856407165527344, "ranking_idealized": 0.5874999761581421, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.5874999761581421, "regularize": 0.04240218922495842, "step": 1750, "wo_beta": 16.752824783325195 }, { "epoch": 4.959848842701937, "eval_dpo_loss": 0.6798657774925232, "eval_logits": -1.2945247888565063, "eval_logps": -88.4161605834961, "eval_loss": 187.94732666015625, "eval_objective": 183.85096740722656, "eval_ranking_idealized": 0.6024844646453857, "eval_ranking_idealized_expo": 0.5232919454574585, "eval_ranking_simple": 0.532608687877655, "eval_regularize": 0.4168415367603302, "eval_runtime": 526.9139, "eval_samples_per_second": 10.989, "eval_steps_per_second": 0.917, "eval_wo_beta": 15.995292663574219, "step": 1750 }, { "dpo_loss": 0.5391930937767029, "epoch": 4.974019839395371, "grad_norm": 11466.754753582296, "learning_rate": 2.4584752990997048e-11, "logits": -1.29628324508667, "logps": -82.2157211303711, "loss": 14.7634, "objective": 15.14171314239502, "ranking_idealized": 0.6041666865348816, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.6041666865348816, "regularize": 0.04220600798726082, "step": 1755, "wo_beta": 16.21957778930664 }, { "dpo_loss": 0.525145411491394, "epoch": 4.988190836088805, "grad_norm": 12278.79483067917, "learning_rate": 0.0, "logits": -1.2211812734603882, "logps": -82.23439025878906, "loss": 15.7539, "objective": 15.124394416809082, "ranking_idealized": 0.6291666626930237, "ranking_idealized_expo": 0.5416666865348816, "ranking_simple": 0.6208333373069763, "regularize": 0.035570546984672546, "step": 1760, "wo_beta": 17.11547088623047 }, { "epoch": 4.988190836088805, "step": 1760, "total_flos": 0.0, "train_loss": 67.88850653388283, "train_runtime": 74214.1269, "train_samples_per_second": 3.423, "train_steps_per_second": 0.024 } ], "logging_steps": 5, "max_steps": 1760, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }