diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,6285 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 4.988190836088805, + "eval_steps": 50, + "global_step": 1760, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "dpo_loss": 0.6931471824645996, + "epoch": 0.002834199338686821, + "grad_norm": 17675.585799054454, + "learning_rate": 5.681818181818181e-09, + "logits": -1.2867579460144043, + "logps": -84.34933471679688, + "loss": 169.5214, + "objective": 153.4677734375, + "ranking_idealized": 0.6458333134651184, + "ranking_idealized_expo": 0.5833333134651184, + "ranking_simple": 0.5833333134651184, + "regularize": 0.3618059456348419, + "step": 1, + "wo_beta": 14.83154582977295 + }, + { + "dpo_loss": 0.6930850148200989, + "epoch": 0.014170996693434105, + "grad_norm": 16809.76979726276, + "learning_rate": 2.8409090909090908e-08, + "logits": -1.4291090965270996, + "logps": -83.86122131347656, + "loss": 181.7047, + "objective": 168.55690002441406, + "ranking_idealized": 0.5625, + "ranking_idealized_expo": 0.4895833432674408, + "ranking_simple": 0.4895833432674408, + "regularize": 0.4036543667316437, + "step": 5, + "wo_beta": 16.679672241210938 + }, + { + "dpo_loss": 0.6930658221244812, + "epoch": 0.02834199338686821, + "grad_norm": 18604.90219885959, + "learning_rate": 5.6818181818181815e-08, + "logits": -1.4008290767669678, + "logps": -84.83370971679688, + "loss": 177.0775, + "objective": 170.34666442871094, + "ranking_idealized": 0.6708333492279053, + "ranking_idealized_expo": 0.5833333134651184, + "ranking_simple": 0.5791666507720947, + "regularize": 0.40391480922698975, + "step": 10, + "wo_beta": 15.222626686096191 + }, + { + "dpo_loss": 0.6919592618942261, + "epoch": 0.042512990080302314, + "grad_norm": 17866.85697228391, + "learning_rate": 8.522727272727271e-08, + "logits": -1.5378918647766113, + "logps": -84.51753234863281, + "loss": 178.9384, + "objective": 187.3764190673828, + "ranking_idealized": 0.6499999761581421, + "ranking_idealized_expo": 0.5708333253860474, + "ranking_simple": 0.5666666626930237, + "regularize": 0.44199517369270325, + "step": 15, + "wo_beta": 15.720404624938965 + }, + { + "dpo_loss": 0.6915046572685242, + "epoch": 0.05668398677373642, + "grad_norm": 17562.319543911097, + "learning_rate": 1.1363636363636363e-07, + "logits": -1.3619273900985718, + "logps": -83.62174224853516, + "loss": 185.6226, + "objective": 203.74549865722656, + "ranking_idealized": 0.5833333134651184, + "ranking_idealized_expo": 0.4833333194255829, + "ranking_simple": 0.48750001192092896, + "regularize": 0.4415209889411926, + "step": 20, + "wo_beta": 16.53324317932129 + }, + { + "dpo_loss": 0.6925450563430786, + "epoch": 0.07085498346717052, + "grad_norm": 16842.244030261496, + "learning_rate": 1.4204545454545455e-07, + "logits": -1.369999647140503, + "logps": -83.69309997558594, + "loss": 181.9124, + "objective": 172.8611297607422, + "ranking_idealized": 0.6458333134651184, + "ranking_idealized_expo": 0.5583333373069763, + "ranking_simple": 0.5666666626930237, + "regularize": 0.4071991741657257, + "step": 25, + "wo_beta": 15.610980987548828 + }, + { + "dpo_loss": 0.6898643970489502, + "epoch": 0.08502598016060463, + "grad_norm": 14842.574916726253, + "learning_rate": 1.7045454545454543e-07, + "logits": -1.432415246963501, + "logps": -83.48454284667969, + "loss": 181.3521, + "objective": 176.5283203125, + "ranking_idealized": 0.5541666746139526, + "ranking_idealized_expo": 0.4833333194255829, + "ranking_simple": 0.48750001192092896, + "regularize": 0.4289272427558899, + "step": 30, + "wo_beta": 17.00359344482422 + }, + { + "dpo_loss": 0.6909116506576538, + "epoch": 0.09919697685403873, + "grad_norm": 16058.543561158533, + "learning_rate": 1.9886363636363636e-07, + "logits": -1.4108096361160278, + "logps": -82.71344757080078, + "loss": 183.8373, + "objective": 173.34014892578125, + "ranking_idealized": 0.6291666626930237, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.5416666865348816, + "regularize": 0.38034114241600037, + "step": 35, + "wo_beta": 16.153711318969727 + }, + { + "dpo_loss": 0.6891058683395386, + "epoch": 0.11336797354747284, + "grad_norm": 17014.23191466682, + "learning_rate": 2.2727272727272726e-07, + "logits": -1.402835488319397, + "logps": -83.338134765625, + "loss": 187.3552, + "objective": 182.01144409179688, + "ranking_idealized": 0.5708333253860474, + "ranking_idealized_expo": 0.5166666507720947, + "ranking_simple": 0.5166666507720947, + "regularize": 0.4058202803134918, + "step": 40, + "wo_beta": 14.262288093566895 + }, + { + "dpo_loss": 0.684747040271759, + "epoch": 0.12753897024090693, + "grad_norm": 15346.751264548873, + "learning_rate": 2.5568181818181816e-07, + "logits": -1.419245958328247, + "logps": -83.82090759277344, + "loss": 171.4244, + "objective": 183.38385009765625, + "ranking_idealized": 0.5958333611488342, + "ranking_idealized_expo": 0.5083333253860474, + "ranking_simple": 0.5166666507720947, + "regularize": 0.4130297601222992, + "step": 45, + "wo_beta": 14.29751968383789 + }, + { + "dpo_loss": 0.6823928356170654, + "epoch": 0.14170996693434104, + "grad_norm": 16514.084391847242, + "learning_rate": 2.840909090909091e-07, + "logits": -1.4350523948669434, + "logps": -84.8818359375, + "loss": 181.5404, + "objective": 186.33828735351562, + "ranking_idealized": 0.5916666388511658, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.5041666626930237, + "regularize": 0.4348808526992798, + "step": 50, + "wo_beta": 15.604106903076172 + }, + { + "epoch": 0.14170996693434104, + "eval_dpo_loss": 0.6889749765396118, + "eval_logits": -1.4233466386795044, + "eval_logps": -90.91888427734375, + "eval_loss": 182.35984802246094, + "eval_objective": 180.32789611816406, + "eval_ranking_idealized": 0.6024844646453857, + "eval_ranking_idealized_expo": 0.5232919454574585, + "eval_ranking_simple": 0.5263975262641907, + "eval_regularize": 0.40881022810935974, + "eval_runtime": 472.6615, + "eval_samples_per_second": 12.25, + "eval_steps_per_second": 1.022, + "eval_wo_beta": 16.297378540039062, + "step": 50 + }, + { + "dpo_loss": 0.6824547052383423, + "epoch": 0.15588096362777515, + "grad_norm": 17699.4671939912, + "learning_rate": 3.1249999999999997e-07, + "logits": -1.3973591327667236, + "logps": -84.62629699707031, + "loss": 170.6542, + "objective": 174.4287872314453, + "ranking_idealized": 0.5583333373069763, + "ranking_idealized_expo": 0.49166667461395264, + "ranking_simple": 0.4833333194255829, + "regularize": 0.3742547035217285, + "step": 55, + "wo_beta": 15.11441421508789 + }, + { + "dpo_loss": 0.6806777715682983, + "epoch": 0.17005196032120926, + "grad_norm": 16100.449715737686, + "learning_rate": 3.4090909090909085e-07, + "logits": -1.329344391822815, + "logps": -85.16632843017578, + "loss": 174.0689, + "objective": 174.0922393798828, + "ranking_idealized": 0.6000000238418579, + "ranking_idealized_expo": 0.5166666507720947, + "ranking_simple": 0.5291666388511658, + "regularize": 0.40893226861953735, + "step": 60, + "wo_beta": 14.438634872436523 + }, + { + "dpo_loss": 0.6708158254623413, + "epoch": 0.18422295701464336, + "grad_norm": 16302.471134333027, + "learning_rate": 3.693181818181818e-07, + "logits": -1.428707480430603, + "logps": -82.03670501708984, + "loss": 172.5426, + "objective": 161.09950256347656, + "ranking_idealized": 0.637499988079071, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.5333333611488342, + "regularize": 0.36983728408813477, + "step": 65, + "wo_beta": 15.067524909973145 + }, + { + "dpo_loss": 0.6730712652206421, + "epoch": 0.19839395370807747, + "grad_norm": 15662.31236602018, + "learning_rate": 3.977272727272727e-07, + "logits": -1.4695442914962769, + "logps": -84.42548370361328, + "loss": 174.7341, + "objective": 175.19439697265625, + "ranking_idealized": 0.5916666388511658, + "ranking_idealized_expo": 0.5333333611488342, + "ranking_simple": 0.5375000238418579, + "regularize": 0.4013313353061676, + "step": 70, + "wo_beta": 15.375307083129883 + }, + { + "dpo_loss": 0.6741575002670288, + "epoch": 0.21256495040151158, + "grad_norm": 18686.585950552704, + "learning_rate": 4.2613636363636364e-07, + "logits": -1.393960952758789, + "logps": -84.16697692871094, + "loss": 174.6645, + "objective": 164.988525390625, + "ranking_idealized": 0.6083333492279053, + "ranking_idealized_expo": 0.5166666507720947, + "ranking_simple": 0.5208333134651184, + "regularize": 0.38378840684890747, + "step": 75, + "wo_beta": 15.075023651123047 + }, + { + "dpo_loss": 0.669329822063446, + "epoch": 0.22673594709494568, + "grad_norm": 18561.894559157903, + "learning_rate": 4.545454545454545e-07, + "logits": -1.4905359745025635, + "logps": -83.4140396118164, + "loss": 169.0661, + "objective": 177.64450073242188, + "ranking_idealized": 0.6291666626930237, + "ranking_idealized_expo": 0.5291666388511658, + "ranking_simple": 0.5458333492279053, + "regularize": 0.4245981276035309, + "step": 80, + "wo_beta": 16.333538055419922 + }, + { + "dpo_loss": 0.6659378409385681, + "epoch": 0.2409069437883798, + "grad_norm": 15942.37358833672, + "learning_rate": 4.829545454545455e-07, + "logits": -1.4715605974197388, + "logps": -83.54389190673828, + "loss": 171.1414, + "objective": 182.98324584960938, + "ranking_idealized": 0.625, + "ranking_idealized_expo": 0.550000011920929, + "ranking_simple": 0.5541666746139526, + "regularize": 0.4137464463710785, + "step": 85, + "wo_beta": 15.189921379089355 + }, + { + "dpo_loss": 0.6600526571273804, + "epoch": 0.25507794048181387, + "grad_norm": 16315.909705896804, + "learning_rate": 5.113636363636363e-07, + "logits": -1.571618914604187, + "logps": -84.54931640625, + "loss": 168.3022, + "objective": 174.0519561767578, + "ranking_idealized": 0.612500011920929, + "ranking_idealized_expo": 0.5291666388511658, + "ranking_simple": 0.550000011920929, + "regularize": 0.39906471967697144, + "step": 90, + "wo_beta": 15.517964363098145 + }, + { + "dpo_loss": 0.6545840501785278, + "epoch": 0.269248937175248, + "grad_norm": 17445.518244074756, + "learning_rate": 5.397727272727273e-07, + "logits": -1.49222731590271, + "logps": -84.54743194580078, + "loss": 168.7617, + "objective": 175.46524047851562, + "ranking_idealized": 0.5583333373069763, + "ranking_idealized_expo": 0.4791666567325592, + "ranking_simple": 0.512499988079071, + "regularize": 0.4040308892726898, + "step": 95, + "wo_beta": 16.429697036743164 + }, + { + "dpo_loss": 0.6560600399971008, + "epoch": 0.2834199338686821, + "grad_norm": 16343.369412455128, + "learning_rate": 5.681818181818182e-07, + "logits": -1.370269775390625, + "logps": -83.43912506103516, + "loss": 156.9096, + "objective": 160.82919311523438, + "ranking_idealized": 0.5958333611488342, + "ranking_idealized_expo": 0.4833333194255829, + "ranking_simple": 0.5, + "regularize": 0.3631521761417389, + "step": 100, + "wo_beta": 15.597589492797852 + }, + { + "epoch": 0.2834199338686821, + "eval_dpo_loss": 0.6855058073997498, + "eval_logits": -1.470232367515564, + "eval_logps": -91.45095825195312, + "eval_loss": 181.86407470703125, + "eval_objective": 180.31504821777344, + "eval_ranking_idealized": 0.6024844646453857, + "eval_ranking_idealized_expo": 0.5232919454574585, + "eval_ranking_simple": 0.531573474407196, + "eval_regularize": 0.41007429361343384, + "eval_runtime": 479.0094, + "eval_samples_per_second": 12.087, + "eval_steps_per_second": 1.008, + "eval_wo_beta": 16.373079299926758, + "step": 100 + }, + { + "dpo_loss": 0.6687707901000977, + "epoch": 0.2975909305621162, + "grad_norm": 20737.972285358017, + "learning_rate": 5.965909090909091e-07, + "logits": -1.572224736213684, + "logps": -86.08336639404297, + "loss": 161.5898, + "objective": 164.3712615966797, + "ranking_idealized": 0.625, + "ranking_idealized_expo": 0.5291666388511658, + "ranking_simple": 0.5416666865348816, + "regularize": 0.3831757605075836, + "step": 105, + "wo_beta": 14.803333282470703 + }, + { + "dpo_loss": 0.6561999917030334, + "epoch": 0.3117619272555503, + "grad_norm": 16188.43984842568, + "learning_rate": 6.249999999999999e-07, + "logits": -1.4707790613174438, + "logps": -84.74868774414062, + "loss": 158.3984, + "objective": 159.52267456054688, + "ranking_idealized": 0.5874999761581421, + "ranking_idealized_expo": 0.5041666626930237, + "ranking_simple": 0.5166666507720947, + "regularize": 0.38105159997940063, + "step": 110, + "wo_beta": 15.120772361755371 + }, + { + "dpo_loss": 0.6603504419326782, + "epoch": 0.32593292394898443, + "grad_norm": 16290.29619326225, + "learning_rate": 6.534090909090909e-07, + "logits": -1.4433757066726685, + "logps": -83.40989685058594, + "loss": 149.8614, + "objective": 154.2146453857422, + "ranking_idealized": 0.6208333373069763, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.5375000238418579, + "regularize": 0.3704533576965332, + "step": 115, + "wo_beta": 16.445148468017578 + }, + { + "dpo_loss": 0.6463068127632141, + "epoch": 0.3401039206424185, + "grad_norm": 15623.51190253056, + "learning_rate": 6.818181818181817e-07, + "logits": -1.4353134632110596, + "logps": -83.36263275146484, + "loss": 156.1384, + "objective": 165.0032501220703, + "ranking_idealized": 0.5916666388511658, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.5458333492279053, + "regularize": 0.3623816668987274, + "step": 120, + "wo_beta": 15.72484302520752 + }, + { + "dpo_loss": 0.6474981904029846, + "epoch": 0.35427491733585265, + "grad_norm": 15992.631664901073, + "learning_rate": 7.102272727272727e-07, + "logits": -1.4708176851272583, + "logps": -87.08245086669922, + "loss": 148.8453, + "objective": 139.25869750976562, + "ranking_idealized": 0.5583333373069763, + "ranking_idealized_expo": 0.48750001192092896, + "ranking_simple": 0.5083333253860474, + "regularize": 0.3299652636051178, + "step": 125, + "wo_beta": 16.12550163269043 + }, + { + "dpo_loss": 0.6404248476028442, + "epoch": 0.3684459140292867, + "grad_norm": 18351.934143281596, + "learning_rate": 7.386363636363636e-07, + "logits": -1.4490153789520264, + "logps": -85.12788391113281, + "loss": 156.9957, + "objective": 159.24720764160156, + "ranking_idealized": 0.6458333134651184, + "ranking_idealized_expo": 0.5916666388511658, + "ranking_simple": 0.5791666507720947, + "regularize": 0.3523053526878357, + "step": 130, + "wo_beta": 16.6445255279541 + }, + { + "dpo_loss": 0.6560899615287781, + "epoch": 0.3826169107227208, + "grad_norm": 23473.507695048622, + "learning_rate": 7.670454545454545e-07, + "logits": -1.4993882179260254, + "logps": -85.93272399902344, + "loss": 163.276, + "objective": 171.45176696777344, + "ranking_idealized": 0.574999988079071, + "ranking_idealized_expo": 0.4958333373069763, + "ranking_simple": 0.5, + "regularize": 0.3585022985935211, + "step": 135, + "wo_beta": 14.440187454223633 + }, + { + "dpo_loss": 0.6453083753585815, + "epoch": 0.39678790741615494, + "grad_norm": 18800.531975208964, + "learning_rate": 7.954545454545454e-07, + "logits": -1.4704848527908325, + "logps": -84.99346160888672, + "loss": 140.3663, + "objective": 156.8263702392578, + "ranking_idealized": 0.612500011920929, + "ranking_idealized_expo": 0.5416666865348816, + "ranking_simple": 0.5583333373069763, + "regularize": 0.35928240418434143, + "step": 140, + "wo_beta": 14.692411422729492 + }, + { + "dpo_loss": 0.6320348978042603, + "epoch": 0.410958904109589, + "grad_norm": 16753.19118195896, + "learning_rate": 8.238636363636363e-07, + "logits": -1.481634259223938, + "logps": -85.03217315673828, + "loss": 148.4437, + "objective": 142.04251098632812, + "ranking_idealized": 0.6666666865348816, + "ranking_idealized_expo": 0.5583333373069763, + "ranking_simple": 0.6083333492279053, + "regularize": 0.34694570302963257, + "step": 145, + "wo_beta": 14.103859901428223 + }, + { + "dpo_loss": 0.6397809386253357, + "epoch": 0.42512990080302315, + "grad_norm": 15467.131473675328, + "learning_rate": 8.522727272727273e-07, + "logits": -1.5027910470962524, + "logps": -85.37592315673828, + "loss": 145.838, + "objective": 148.38160705566406, + "ranking_idealized": 0.5958333611488342, + "ranking_idealized_expo": 0.5541666746139526, + "ranking_simple": 0.5625, + "regularize": 0.3435940146446228, + "step": 150, + "wo_beta": 17.392671585083008 + }, + { + "epoch": 0.42512990080302315, + "eval_dpo_loss": 0.6789573431015015, + "eval_logits": -1.4503501653671265, + "eval_logps": -90.70494842529297, + "eval_loss": 180.64788818359375, + "eval_objective": 178.1704864501953, + "eval_ranking_idealized": 0.6024844646453857, + "eval_ranking_idealized_expo": 0.5232919454574585, + "eval_ranking_simple": 0.5383023023605347, + "eval_regularize": 0.40225014090538025, + "eval_runtime": 484.5212, + "eval_samples_per_second": 11.95, + "eval_steps_per_second": 0.997, + "eval_wo_beta": 16.587987899780273, + "step": 150 + }, + { + "dpo_loss": 0.6387067437171936, + "epoch": 0.43930089749645723, + "grad_norm": 15641.193562303264, + "learning_rate": 8.806818181818182e-07, + "logits": -1.5433834791183472, + "logps": -83.86524200439453, + "loss": 145.3558, + "objective": 149.48431396484375, + "ranking_idealized": 0.5666666626930237, + "ranking_idealized_expo": 0.48750001192092896, + "ranking_simple": 0.5041666626930237, + "regularize": 0.3321545720100403, + "step": 155, + "wo_beta": 15.563851356506348 + }, + { + "dpo_loss": 0.6263092160224915, + "epoch": 0.45347189418989137, + "grad_norm": 17105.26137174702, + "learning_rate": 9.09090909090909e-07, + "logits": -1.4153720140457153, + "logps": -85.28386688232422, + "loss": 153.0504, + "objective": 153.1988067626953, + "ranking_idealized": 0.5916666388511658, + "ranking_idealized_expo": 0.5333333611488342, + "ranking_simple": 0.5583333373069763, + "regularize": 0.3481307625770569, + "step": 160, + "wo_beta": 14.662632942199707 + }, + { + "dpo_loss": 0.6309160590171814, + "epoch": 0.46764289088332545, + "grad_norm": 17759.815020595273, + "learning_rate": 9.374999999999999e-07, + "logits": -1.4963940382003784, + "logps": -87.69454956054688, + "loss": 139.2377, + "objective": 131.2418670654297, + "ranking_idealized": 0.6083333492279053, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.550000011920929, + "regularize": 0.3078847825527191, + "step": 165, + "wo_beta": 15.923318862915039 + }, + { + "dpo_loss": 0.6393815279006958, + "epoch": 0.4818138875767596, + "grad_norm": 14258.083724870265, + "learning_rate": 9.65909090909091e-07, + "logits": -1.525942087173462, + "logps": -87.34074401855469, + "loss": 149.6952, + "objective": 141.63162231445312, + "ranking_idealized": 0.5583333373069763, + "ranking_idealized_expo": 0.5041666626930237, + "ranking_simple": 0.5208333134651184, + "regularize": 0.3343699276447296, + "step": 170, + "wo_beta": 16.248130798339844 + }, + { + "dpo_loss": 0.6326501369476318, + "epoch": 0.49598488427019366, + "grad_norm": 15096.239809153309, + "learning_rate": 9.943181818181817e-07, + "logits": -1.3718321323394775, + "logps": -87.4573745727539, + "loss": 140.2749, + "objective": 132.79156494140625, + "ranking_idealized": 0.574999988079071, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.5375000238418579, + "regularize": 0.3121780455112457, + "step": 175, + "wo_beta": 17.698331832885742 + }, + { + "dpo_loss": 0.622785747051239, + "epoch": 0.5101558809636277, + "grad_norm": 16631.252094969073, + "learning_rate": 9.999842657116664e-07, + "logits": -1.3456240892410278, + "logps": -86.42423248291016, + "loss": 143.2666, + "objective": 151.05718994140625, + "ranking_idealized": 0.5708333253860474, + "ranking_idealized_expo": 0.48750001192092896, + "ranking_simple": 0.512499988079071, + "regularize": 0.3372686207294464, + "step": 180, + "wo_beta": 14.807291030883789 + }, + { + "dpo_loss": 0.6099674701690674, + "epoch": 0.5243268776570619, + "grad_norm": 20691.36637721674, + "learning_rate": 9.999203468625015e-07, + "logits": -1.3633224964141846, + "logps": -85.25286102294922, + "loss": 132.6151, + "objective": 133.30491638183594, + "ranking_idealized": 0.5791666507720947, + "ranking_idealized_expo": 0.4833333194255829, + "ranking_simple": 0.5333333611488342, + "regularize": 0.3143846392631531, + "step": 185, + "wo_beta": 14.758675575256348 + }, + { + "dpo_loss": 0.596558690071106, + "epoch": 0.538497874350496, + "grad_norm": 16323.28292515014, + "learning_rate": 9.998072663403656e-07, + "logits": -1.4109238386154175, + "logps": -83.85755157470703, + "loss": 142.4777, + "objective": 132.50650024414062, + "ranking_idealized": 0.6625000238418579, + "ranking_idealized_expo": 0.5625, + "ranking_simple": 0.612500011920929, + "regularize": 0.2925921082496643, + "step": 190, + "wo_beta": 17.561918258666992 + }, + { + "dpo_loss": 0.608472466468811, + "epoch": 0.5526688710439301, + "grad_norm": 14605.697671098327, + "learning_rate": 9.99645035265485e-07, + "logits": -1.426125407218933, + "logps": -83.3570556640625, + "loss": 148.3801, + "objective": 154.04542541503906, + "ranking_idealized": 0.625, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.5708333253860474, + "regularize": 0.3404535949230194, + "step": 195, + "wo_beta": 15.011509895324707 + }, + { + "dpo_loss": 0.6035653948783875, + "epoch": 0.5668398677373642, + "grad_norm": 14961.86824726212, + "learning_rate": 9.99433669591504e-07, + "logits": -1.4208530187606812, + "logps": -83.7520523071289, + "loss": 140.9398, + "objective": 150.76983642578125, + "ranking_idealized": 0.5833333134651184, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.5208333134651184, + "regularize": 0.34441787004470825, + "step": 200, + "wo_beta": 16.120277404785156 + }, + { + "epoch": 0.5668398677373642, + "eval_dpo_loss": 0.6803466081619263, + "eval_logits": -1.3894833326339722, + "eval_logps": -90.33295440673828, + "eval_loss": 184.49874877929688, + "eval_objective": 181.54510498046875, + "eval_ranking_idealized": 0.6024844646453857, + "eval_ranking_idealized_expo": 0.5232919454574585, + "eval_ranking_simple": 0.532608687877655, + "eval_regularize": 0.4101283848285675, + "eval_runtime": 475.1985, + "eval_samples_per_second": 12.184, + "eval_steps_per_second": 1.016, + "eval_wo_beta": 16.141496658325195, + "step": 200 + }, + { + "dpo_loss": 0.6270676255226135, + "epoch": 0.5810108644307983, + "grad_norm": 16340.681317011602, + "learning_rate": 9.991731901039136e-07, + "logits": -1.283570647239685, + "logps": -84.95980834960938, + "loss": 136.3843, + "objective": 133.73294067382812, + "ranking_idealized": 0.574999988079071, + "ranking_idealized_expo": 0.5041666626930237, + "ranking_simple": 0.5416666865348816, + "regularize": 0.3290613889694214, + "step": 205, + "wo_beta": 16.529329299926758 + }, + { + "dpo_loss": 0.6101997494697571, + "epoch": 0.5951818611242324, + "grad_norm": 16979.514024444066, + "learning_rate": 9.988636224180095e-07, + "logits": -1.3387362957000732, + "logps": -85.54541015625, + "loss": 149.2125, + "objective": 162.19125366210938, + "ranking_idealized": 0.6041666865348816, + "ranking_idealized_expo": 0.5208333134651184, + "ranking_simple": 0.5625, + "regularize": 0.3633294999599457, + "step": 210, + "wo_beta": 15.476922988891602 + }, + { + "dpo_loss": 0.5931335687637329, + "epoch": 0.6093528578176665, + "grad_norm": 16588.23739039735, + "learning_rate": 9.985049969763719e-07, + "logits": -1.458817720413208, + "logps": -84.46039581298828, + "loss": 133.2822, + "objective": 143.83396911621094, + "ranking_idealized": 0.5791666507720947, + "ranking_idealized_expo": 0.4749999940395355, + "ranking_simple": 0.5166666507720947, + "regularize": 0.3306182324886322, + "step": 215, + "wo_beta": 16.599056243896484 + }, + { + "dpo_loss": 0.6022905111312866, + "epoch": 0.6235238545111006, + "grad_norm": 17119.52021011513, + "learning_rate": 9.980973490458728e-07, + "logits": -1.4839917421340942, + "logps": -84.08710479736328, + "loss": 143.4095, + "objective": 144.29782104492188, + "ranking_idealized": 0.574999988079071, + "ranking_idealized_expo": 0.4791666567325592, + "ranking_simple": 0.5083333253860474, + "regularize": 0.32684001326560974, + "step": 220, + "wo_beta": 16.91693878173828 + }, + { + "dpo_loss": 0.5977518558502197, + "epoch": 0.6376948512045347, + "grad_norm": 14023.197866950057, + "learning_rate": 9.976407187142064e-07, + "logits": -1.534485936164856, + "logps": -85.1946792602539, + "loss": 138.8846, + "objective": 137.76622009277344, + "ranking_idealized": 0.6000000238418579, + "ranking_idealized_expo": 0.5041666626930237, + "ranking_simple": 0.5583333373069763, + "regularize": 0.31227758526802063, + "step": 225, + "wo_beta": 15.36359691619873 + }, + { + "dpo_loss": 0.5947220921516418, + "epoch": 0.6518658478979689, + "grad_norm": 14605.487004157298, + "learning_rate": 9.971351508859486e-07, + "logits": -1.439586877822876, + "logps": -85.27981567382812, + "loss": 124.6336, + "objective": 121.90718078613281, + "ranking_idealized": 0.637499988079071, + "ranking_idealized_expo": 0.5666666626930237, + "ranking_simple": 0.6000000238418579, + "regularize": 0.2932659685611725, + "step": 230, + "wo_beta": 17.20786476135254 + }, + { + "dpo_loss": 0.6003122925758362, + "epoch": 0.6660368445914029, + "grad_norm": 16685.644038837043, + "learning_rate": 9.9658069527814e-07, + "logits": -1.3658267259597778, + "logps": -86.23738098144531, + "loss": 121.5208, + "objective": 116.9168472290039, + "ranking_idealized": 0.5874999761581421, + "ranking_idealized_expo": 0.5416666865348816, + "ranking_simple": 0.5583333373069763, + "regularize": 0.2670106589794159, + "step": 235, + "wo_beta": 16.473398208618164 + }, + { + "dpo_loss": 0.5931513905525208, + "epoch": 0.680207841284837, + "grad_norm": 18082.47037845429, + "learning_rate": 9.959774064153975e-07, + "logits": -1.5063189268112183, + "logps": -85.80690002441406, + "loss": 131.6654, + "objective": 136.83932495117188, + "ranking_idealized": 0.6041666865348816, + "ranking_idealized_expo": 0.5083333253860474, + "ranking_simple": 0.550000011920929, + "regularize": 0.29019126296043396, + "step": 240, + "wo_beta": 16.562297821044922 + }, + { + "dpo_loss": 0.6120952367782593, + "epoch": 0.6943788379782712, + "grad_norm": 16231.64241500278, + "learning_rate": 9.953253436245516e-07, + "logits": -1.5183242559432983, + "logps": -85.21266174316406, + "loss": 120.6441, + "objective": 111.80670928955078, + "ranking_idealized": 0.6166666746139526, + "ranking_idealized_expo": 0.5458333492279053, + "ranking_simple": 0.5666666626930237, + "regularize": 0.2561970055103302, + "step": 245, + "wo_beta": 16.04390525817871 + }, + { + "dpo_loss": 0.5938802361488342, + "epoch": 0.7085498346717053, + "grad_norm": 15964.589309173105, + "learning_rate": 9.94624571028813e-07, + "logits": -1.3114020824432373, + "logps": -83.990478515625, + "loss": 131.1439, + "objective": 132.464599609375, + "ranking_idealized": 0.612500011920929, + "ranking_idealized_expo": 0.5291666388511658, + "ranking_simple": 0.5874999761581421, + "regularize": 0.29094573855400085, + "step": 250, + "wo_beta": 15.082120895385742 + }, + { + "epoch": 0.7085498346717053, + "eval_dpo_loss": 0.6797458529472351, + "eval_logits": -1.478871464729309, + "eval_logps": -91.22461700439453, + "eval_loss": 182.20773315429688, + "eval_objective": 178.44094848632812, + "eval_ranking_idealized": 0.6024844646453857, + "eval_ranking_idealized_expo": 0.5232919454574585, + "eval_ranking_simple": 0.532608687877655, + "eval_regularize": 0.4058575928211212, + "eval_runtime": 475.3374, + "eval_samples_per_second": 12.181, + "eval_steps_per_second": 1.016, + "eval_wo_beta": 16.368268966674805, + "step": 250 + }, + { + "dpo_loss": 0.6089769005775452, + "epoch": 0.7227208313651393, + "grad_norm": 14595.146016283225, + "learning_rate": 9.938751575414661e-07, + "logits": -1.5532639026641846, + "logps": -83.39389038085938, + "loss": 133.1451, + "objective": 121.37617492675781, + "ranking_idealized": 0.5708333253860474, + "ranking_idealized_expo": 0.42916667461395264, + "ranking_simple": 0.5041666626930237, + "regularize": 0.2965226471424103, + "step": 255, + "wo_beta": 15.059760093688965 + }, + { + "dpo_loss": 0.5949603915214539, + "epoch": 0.7368918280585735, + "grad_norm": 15154.916516529278, + "learning_rate": 9.930771768590933e-07, + "logits": -1.5184205770492554, + "logps": -85.99275970458984, + "loss": 128.7971, + "objective": 149.26617431640625, + "ranking_idealized": 0.5416666865348816, + "ranking_idealized_expo": 0.4625000059604645, + "ranking_simple": 0.5208333134651184, + "regularize": 0.3260208070278168, + "step": 260, + "wo_beta": 15.812520027160645 + }, + { + "dpo_loss": 0.5942420959472656, + "epoch": 0.7510628247520076, + "grad_norm": 13672.874013609171, + "learning_rate": 9.92230707454326e-07, + "logits": -1.438194990158081, + "logps": -86.4264907836914, + "loss": 119.4127, + "objective": 127.40038299560547, + "ranking_idealized": 0.6625000238418579, + "ranking_idealized_expo": 0.5666666626930237, + "ranking_simple": 0.6416666507720947, + "regularize": 0.29675182700157166, + "step": 265, + "wo_beta": 16.794330596923828 + }, + { + "dpo_loss": 0.6142985224723816, + "epoch": 0.7652338214454416, + "grad_norm": 14406.751122728363, + "learning_rate": 9.91335832568129e-07, + "logits": -1.5249485969543457, + "logps": -87.38147735595703, + "loss": 129.203, + "objective": 141.37374877929688, + "ranking_idealized": 0.5249999761581421, + "ranking_idealized_expo": 0.4583333432674408, + "ranking_simple": 0.4749999940395355, + "regularize": 0.2984028458595276, + "step": 270, + "wo_beta": 14.417384147644043 + }, + { + "dpo_loss": 0.5954193472862244, + "epoch": 0.7794048181388757, + "grad_norm": 17040.572933936153, + "learning_rate": 9.90392640201615e-07, + "logits": -1.3636622428894043, + "logps": -86.6485595703125, + "loss": 118.1932, + "objective": 113.61885833740234, + "ranking_idealized": 0.574999988079071, + "ranking_idealized_expo": 0.5041666626930237, + "ranking_simple": 0.5375000238418579, + "regularize": 0.2610357701778412, + "step": 275, + "wo_beta": 15.509458541870117 + }, + { + "dpo_loss": 0.5917832851409912, + "epoch": 0.7935758148323099, + "grad_norm": 17559.793763685935, + "learning_rate": 9.894012231073895e-07, + "logits": -1.4590952396392822, + "logps": -87.64340209960938, + "loss": 132.6812, + "objective": 137.506103515625, + "ranking_idealized": 0.5874999761581421, + "ranking_idealized_expo": 0.5208333134651184, + "ranking_simple": 0.5541666746139526, + "regularize": 0.2935800850391388, + "step": 280, + "wo_beta": 15.80073070526123 + }, + { + "dpo_loss": 0.5836724042892456, + "epoch": 0.807746811525744, + "grad_norm": 14579.651979817574, + "learning_rate": 9.88361678780429e-07, + "logits": -1.4701313972473145, + "logps": -88.11650085449219, + "loss": 118.3926, + "objective": 111.54865264892578, + "ranking_idealized": 0.5666666626930237, + "ranking_idealized_expo": 0.4791666567325592, + "ranking_simple": 0.5208333134651184, + "regularize": 0.2552913427352905, + "step": 285, + "wo_beta": 16.792234420776367 + }, + { + "dpo_loss": 0.5677815079689026, + "epoch": 0.821917808219178, + "grad_norm": 15029.308678016287, + "learning_rate": 9.872741094484964e-07, + "logits": -1.500461220741272, + "logps": -86.58364868164062, + "loss": 116.8557, + "objective": 106.32292175292969, + "ranking_idealized": 0.5874999761581421, + "ranking_idealized_expo": 0.5041666626930237, + "ranking_simple": 0.550000011920929, + "regularize": 0.24703934788703918, + "step": 290, + "wo_beta": 16.14396095275879 + }, + { + "dpo_loss": 0.5853282809257507, + "epoch": 0.8360888049126122, + "grad_norm": 15120.877217642179, + "learning_rate": 9.86138622062085e-07, + "logits": -1.494510293006897, + "logps": -86.35259246826172, + "loss": 116.1266, + "objective": 112.15760803222656, + "ranking_idealized": 0.550000011920929, + "ranking_idealized_expo": 0.4958333373069763, + "ranking_simple": 0.5208333134651184, + "regularize": 0.2589784264564514, + "step": 295, + "wo_beta": 16.28350257873535 + }, + { + "dpo_loss": 0.5893528461456299, + "epoch": 0.8502598016060463, + "grad_norm": 14818.401223627045, + "learning_rate": 9.849553282839024e-07, + "logits": -1.4687484502792358, + "logps": -85.012939453125, + "loss": 118.3192, + "objective": 113.60901641845703, + "ranking_idealized": 0.625, + "ranking_idealized_expo": 0.550000011920929, + "ranking_simple": 0.6000000238418579, + "regularize": 0.26101434230804443, + "step": 300, + "wo_beta": 15.157808303833008 + }, + { + "epoch": 0.8502598016060463, + "eval_dpo_loss": 0.681740403175354, + "eval_logits": -1.4551842212677002, + "eval_logps": -92.57705688476562, + "eval_loss": 183.44589233398438, + "eval_objective": 180.4713592529297, + "eval_ranking_idealized": 0.6024844646453857, + "eval_ranking_idealized_expo": 0.5232919454574585, + "eval_ranking_simple": 0.532608687877655, + "eval_regularize": 0.41230443120002747, + "eval_runtime": 479.855, + "eval_samples_per_second": 12.066, + "eval_steps_per_second": 1.007, + "eval_wo_beta": 16.404129028320312, + "step": 300 + }, + { + "dpo_loss": 0.5834535956382751, + "epoch": 0.8644307982994804, + "grad_norm": 14881.03810672454, + "learning_rate": 9.837243444778899e-07, + "logits": -1.4318089485168457, + "logps": -85.52223205566406, + "loss": 117.2997, + "objective": 119.20571899414062, + "ranking_idealized": 0.6166666746139526, + "ranking_idealized_expo": 0.5208333134651184, + "ranking_simple": 0.6083333492279053, + "regularize": 0.2612408697605133, + "step": 305, + "wo_beta": 15.858050346374512 + }, + { + "dpo_loss": 0.5729119181632996, + "epoch": 0.8786017949929145, + "grad_norm": 13728.643717044331, + "learning_rate": 9.824457916977784e-07, + "logits": -1.430962085723877, + "logps": -84.47950744628906, + "loss": 113.9787, + "objective": 119.12039184570312, + "ranking_idealized": 0.5958333611488342, + "ranking_idealized_expo": 0.4833333194255829, + "ranking_simple": 0.5541666746139526, + "regularize": 0.2695327699184418, + "step": 310, + "wo_beta": 15.427461624145508 + }, + { + "dpo_loss": 0.5748838782310486, + "epoch": 0.8927727916863486, + "grad_norm": 15353.814970462101, + "learning_rate": 9.81119795675185e-07, + "logits": -1.4459213018417358, + "logps": -83.27306365966797, + "loss": 112.487, + "objective": 110.93157196044922, + "ranking_idealized": 0.5541666746139526, + "ranking_idealized_expo": 0.47083333134651184, + "ranking_simple": 0.5333333611488342, + "regularize": 0.2502378225326538, + "step": 315, + "wo_beta": 15.45988941192627 + }, + { + "dpo_loss": 0.5748109221458435, + "epoch": 0.9069437883797827, + "grad_norm": 15007.545319328356, + "learning_rate": 9.797464868072486e-07, + "logits": -1.4066462516784668, + "logps": -86.03001403808594, + "loss": 110.898, + "objective": 109.38225555419922, + "ranking_idealized": 0.6208333373069763, + "ranking_idealized_expo": 0.5583333373069763, + "ranking_simple": 0.6083333492279053, + "regularize": 0.2464234083890915, + "step": 320, + "wo_beta": 15.732470512390137 + }, + { + "dpo_loss": 0.5822945833206177, + "epoch": 0.9211147850732169, + "grad_norm": 13633.021631468031, + "learning_rate": 9.783260001438066e-07, + "logits": -1.4706979990005493, + "logps": -87.00752258300781, + "loss": 114.9803, + "objective": 106.17591857910156, + "ranking_idealized": 0.6083333492279053, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.5708333253860474, + "regularize": 0.25146251916885376, + "step": 325, + "wo_beta": 15.325220108032227 + }, + { + "dpo_loss": 0.5598011016845703, + "epoch": 0.9352857817666509, + "grad_norm": 14695.63914534257, + "learning_rate": 9.768584753741134e-07, + "logits": -1.3177284002304077, + "logps": -86.90360260009766, + "loss": 116.6896, + "objective": 123.9805679321289, + "ranking_idealized": 0.6166666746139526, + "ranking_idealized_expo": 0.5291666388511658, + "ranking_simple": 0.5916666388511658, + "regularize": 0.2586965262889862, + "step": 330, + "wo_beta": 16.747480392456055 + }, + { + "dpo_loss": 0.5746142864227295, + "epoch": 0.949456778460085, + "grad_norm": 14706.814411020761, + "learning_rate": 9.753440568131054e-07, + "logits": -1.3514246940612793, + "logps": -86.81550598144531, + "loss": 115.5651, + "objective": 113.5698471069336, + "ranking_idealized": 0.5874999761581421, + "ranking_idealized_expo": 0.5208333134651184, + "ranking_simple": 0.5791666507720947, + "regularize": 0.25022396445274353, + "step": 335, + "wo_beta": 15.857748031616211 + }, + { + "dpo_loss": 0.5717839598655701, + "epoch": 0.9636277751535192, + "grad_norm": 13577.369360499106, + "learning_rate": 9.737828933872073e-07, + "logits": -1.400834321975708, + "logps": -85.29247283935547, + "loss": 118.1002, + "objective": 108.19886779785156, + "ranking_idealized": 0.6041666865348816, + "ranking_idealized_expo": 0.5458333492279053, + "ranking_simple": 0.5958333611488342, + "regularize": 0.24417272210121155, + "step": 340, + "wo_beta": 16.27320098876953 + }, + { + "dpo_loss": 0.5746095776557922, + "epoch": 0.9777987718469532, + "grad_norm": 13673.428728913288, + "learning_rate": 9.721751386196885e-07, + "logits": -1.4508498907089233, + "logps": -84.16486358642578, + "loss": 110.1951, + "objective": 103.0552749633789, + "ranking_idealized": 0.6458333134651184, + "ranking_idealized_expo": 0.5833333134651184, + "ranking_simple": 0.6291666626930237, + "regularize": 0.23596970736980438, + "step": 345, + "wo_beta": 15.449429512023926 + }, + { + "dpo_loss": 0.5632264018058777, + "epoch": 0.9919697685403873, + "grad_norm": 13613.304013119689, + "learning_rate": 9.705209506155634e-07, + "logits": -1.3619670867919922, + "logps": -86.77315521240234, + "loss": 108.5029, + "objective": 110.73800659179688, + "ranking_idealized": 0.6583333611488342, + "ranking_idealized_expo": 0.574999988079071, + "ranking_simple": 0.625, + "regularize": 0.26065030694007874, + "step": 350, + "wo_beta": 15.869379997253418 + }, + { + "epoch": 0.9919697685403873, + "eval_dpo_loss": 0.678183376789093, + "eval_logits": -1.4316504001617432, + "eval_logps": -92.18038177490234, + "eval_loss": 183.9593048095703, + "eval_objective": 180.11509704589844, + "eval_ranking_idealized": 0.6024844646453857, + "eval_ranking_idealized_expo": 0.5232919454574585, + "eval_ranking_simple": 0.5320910811424255, + "eval_regularize": 0.40945151448249817, + "eval_runtime": 476.2355, + "eval_samples_per_second": 12.158, + "eval_steps_per_second": 1.014, + "eval_wo_beta": 16.336669921875, + "step": 350 + }, + { + "dpo_loss": 0.5633755326271057, + "epoch": 1.0061407652338215, + "grad_norm": 13717.944611215353, + "learning_rate": 9.688204920460466e-07, + "logits": -1.542311191558838, + "logps": -84.23912811279297, + "loss": 104.9579, + "objective": 99.2624740600586, + "ranking_idealized": 0.6083333492279053, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.5791666507720947, + "regularize": 0.2348737269639969, + "step": 355, + "wo_beta": 16.799049377441406 + }, + { + "dpo_loss": 0.5596449971199036, + "epoch": 1.0203117619272555, + "grad_norm": 15569.178838691118, + "learning_rate": 9.670739301325534e-07, + "logits": -1.4423024654388428, + "logps": -84.60731506347656, + "loss": 97.354, + "objective": 96.60607147216797, + "ranking_idealized": 0.574999988079071, + "ranking_idealized_expo": 0.5083333253860474, + "ranking_simple": 0.5625, + "regularize": 0.2134130448102951, + "step": 360, + "wo_beta": 16.611034393310547 + }, + { + "dpo_loss": 0.5573465824127197, + "epoch": 1.0344827586206897, + "grad_norm": 14412.61274623368, + "learning_rate": 9.652814366302568e-07, + "logits": -1.4710925817489624, + "logps": -84.47969818115234, + "loss": 109.2182, + "objective": 110.00160217285156, + "ranking_idealized": 0.5833333134651184, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.5833333134651184, + "regularize": 0.2383657544851303, + "step": 365, + "wo_beta": 14.846599578857422 + }, + { + "dpo_loss": 0.559634804725647, + "epoch": 1.0486537553141237, + "grad_norm": 15121.427522934051, + "learning_rate": 9.63443187811197e-07, + "logits": -1.407724142074585, + "logps": -82.60728454589844, + "loss": 94.8917, + "objective": 93.84876251220703, + "ranking_idealized": 0.5958333611488342, + "ranking_idealized_expo": 0.5166666507720947, + "ranking_simple": 0.5666666626930237, + "regularize": 0.21665388345718384, + "step": 370, + "wo_beta": 15.743396759033203 + }, + { + "dpo_loss": 0.5503371357917786, + "epoch": 1.0628247520075578, + "grad_norm": 14225.520073845873, + "learning_rate": 9.61559364446946e-07, + "logits": -1.4566776752471924, + "logps": -84.27056121826172, + "loss": 96.0324, + "objective": 91.85355377197266, + "ranking_idealized": 0.6083333492279053, + "ranking_idealized_expo": 0.5166666507720947, + "ranking_simple": 0.5791666507720947, + "regularize": 0.20962905883789062, + "step": 375, + "wo_beta": 16.301313400268555 + }, + { + "dpo_loss": 0.5628603100776672, + "epoch": 1.076995748700992, + "grad_norm": 14108.362094897184, + "learning_rate": 9.596301517908328e-07, + "logits": -1.4387798309326172, + "logps": -86.27851867675781, + "loss": 98.7923, + "objective": 108.01164245605469, + "ranking_idealized": 0.6625000238418579, + "ranking_idealized_expo": 0.574999988079071, + "ranking_simple": 0.6291666626930237, + "regularize": 0.2488705962896347, + "step": 380, + "wo_beta": 15.773112297058105 + }, + { + "dpo_loss": 0.5771984457969666, + "epoch": 1.091166745394426, + "grad_norm": 13105.168740611702, + "learning_rate": 9.576557395597236e-07, + "logits": -1.4021495580673218, + "logps": -85.1259536743164, + "loss": 99.6716, + "objective": 109.83814239501953, + "ranking_idealized": 0.6000000238418579, + "ranking_idealized_expo": 0.5583333373069763, + "ranking_simple": 0.5791666507720947, + "regularize": 0.23721593618392944, + "step": 385, + "wo_beta": 15.801533699035645 + }, + { + "dpo_loss": 0.5509606599807739, + "epoch": 1.10533774208786, + "grad_norm": 13663.873020268169, + "learning_rate": 9.556363219153662e-07, + "logits": -1.3366678953170776, + "logps": -86.07147979736328, + "loss": 96.1117, + "objective": 90.10648345947266, + "ranking_idealized": 0.637499988079071, + "ranking_idealized_expo": 0.5625, + "ranking_simple": 0.6333333253860474, + "regularize": 0.2138025164604187, + "step": 390, + "wo_beta": 16.90329933166504 + }, + { + "dpo_loss": 0.5398973822593689, + "epoch": 1.1195087387812943, + "grad_norm": 14913.448008058538, + "learning_rate": 9.53572097445297e-07, + "logits": -1.3910351991653442, + "logps": -84.76091766357422, + "loss": 99.588, + "objective": 102.71925354003906, + "ranking_idealized": 0.5791666507720947, + "ranking_idealized_expo": 0.5041666626930237, + "ranking_simple": 0.5708333253860474, + "regularize": 0.21998313069343567, + "step": 395, + "wo_beta": 14.880850791931152 + }, + { + "dpo_loss": 0.5650266408920288, + "epoch": 1.1336797354747283, + "grad_norm": 14606.821946811386, + "learning_rate": 9.514632691433106e-07, + "logits": -1.4497681856155396, + "logps": -82.1307373046875, + "loss": 104.6813, + "objective": 107.99799346923828, + "ranking_idealized": 0.6291666626930237, + "ranking_idealized_expo": 0.5416666865348816, + "ranking_simple": 0.6166666746139526, + "regularize": 0.23726312816143036, + "step": 400, + "wo_beta": 15.854341506958008 + }, + { + "epoch": 1.1336797354747283, + "eval_dpo_loss": 0.6800939440727234, + "eval_logits": -1.3930206298828125, + "eval_logps": -89.72613525390625, + "eval_loss": 183.87586975097656, + "eval_objective": 180.28396606445312, + "eval_ranking_idealized": 0.6024844646453857, + "eval_ranking_idealized_expo": 0.5232919454574585, + "eval_ranking_simple": 0.5310559272766113, + "eval_regularize": 0.40940526127815247, + "eval_runtime": 478.3822, + "eval_samples_per_second": 12.103, + "eval_steps_per_second": 1.01, + "eval_wo_beta": 16.22085189819336, + "step": 400 + }, + { + "dpo_loss": 0.5639857053756714, + "epoch": 1.1478507321681626, + "grad_norm": 15414.866076924996, + "learning_rate": 9.493100443894984e-07, + "logits": -1.416764736175537, + "logps": -84.40596771240234, + "loss": 97.7792, + "objective": 106.99815368652344, + "ranking_idealized": 0.5666666626930237, + "ranking_idealized_expo": 0.4625000059604645, + "ranking_simple": 0.5416666865348816, + "regularize": 0.22935108840465546, + "step": 405, + "wo_beta": 17.16730499267578 + }, + { + "dpo_loss": 0.5612814426422119, + "epoch": 1.1620217288615966, + "grad_norm": 13730.11308532576, + "learning_rate": 9.471126349298556e-07, + "logits": -1.4282060861587524, + "logps": -84.3336410522461, + "loss": 96.1344, + "objective": 93.89948272705078, + "ranking_idealized": 0.6000000238418579, + "ranking_idealized_expo": 0.5208333134651184, + "ranking_simple": 0.5874999761581421, + "regularize": 0.20958545804023743, + "step": 410, + "wo_beta": 16.73971939086914 + }, + { + "dpo_loss": 0.5569156408309937, + "epoch": 1.1761927255550306, + "grad_norm": 11975.058144386021, + "learning_rate": 9.448712568554571e-07, + "logits": -1.3549463748931885, + "logps": -83.00645446777344, + "loss": 93.1875, + "objective": 96.11307525634766, + "ranking_idealized": 0.612500011920929, + "ranking_idealized_expo": 0.5166666507720947, + "ranking_simple": 0.5833333134651184, + "regularize": 0.22849011421203613, + "step": 415, + "wo_beta": 16.471454620361328 + }, + { + "dpo_loss": 0.5578625202178955, + "epoch": 1.1903637222484649, + "grad_norm": 13553.103377125492, + "learning_rate": 9.425861305812081e-07, + "logits": -1.3200798034667969, + "logps": -84.18423461914062, + "loss": 99.8958, + "objective": 90.86384582519531, + "ranking_idealized": 0.6000000238418579, + "ranking_idealized_expo": 0.4958333373069763, + "ranking_simple": 0.5666666626930237, + "regularize": 0.2039288878440857, + "step": 420, + "wo_beta": 16.64999008178711 + }, + { + "dpo_loss": 0.5598068237304688, + "epoch": 1.204534718941899, + "grad_norm": 13382.98806426423, + "learning_rate": 9.40257480824169e-07, + "logits": -1.368670105934143, + "logps": -82.51498413085938, + "loss": 95.7898, + "objective": 98.82903289794922, + "ranking_idealized": 0.5958333611488342, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.5791666507720947, + "regularize": 0.21656714379787445, + "step": 425, + "wo_beta": 15.240234375 + }, + { + "dpo_loss": 0.5631528496742249, + "epoch": 1.2187057156353331, + "grad_norm": 13379.590249575365, + "learning_rate": 9.378855365814557e-07, + "logits": -1.3373157978057861, + "logps": -84.1694107055664, + "loss": 89.0871, + "objective": 83.64144897460938, + "ranking_idealized": 0.5625, + "ranking_idealized_expo": 0.4958333373069763, + "ranking_simple": 0.5708333253860474, + "regularize": 0.19078685343265533, + "step": 430, + "wo_beta": 16.387685775756836 + }, + { + "dpo_loss": 0.5549448132514954, + "epoch": 1.2328767123287672, + "grad_norm": 13813.435024161312, + "learning_rate": 9.354705311077218e-07, + "logits": -1.287793755531311, + "logps": -83.4052963256836, + "loss": 93.9205, + "objective": 94.07813262939453, + "ranking_idealized": 0.6458333134651184, + "ranking_idealized_expo": 0.5958333611488342, + "ranking_simple": 0.637499988079071, + "regularize": 0.21654988825321198, + "step": 435, + "wo_beta": 17.72869110107422 + }, + { + "dpo_loss": 0.5550996661186218, + "epoch": 1.2470477090222012, + "grad_norm": 15408.139135942378, + "learning_rate": 9.330127018922193e-07, + "logits": -1.302925705909729, + "logps": -83.39546203613281, + "loss": 87.7477, + "objective": 81.88467407226562, + "ranking_idealized": 0.612500011920929, + "ranking_idealized_expo": 0.5541666746139526, + "ranking_simple": 0.612500011920929, + "regularize": 0.18918146193027496, + "step": 440, + "wo_beta": 15.06383991241455 + }, + { + "dpo_loss": 0.544273316860199, + "epoch": 1.2612187057156352, + "grad_norm": 13358.127194753248, + "learning_rate": 9.305122906354448e-07, + "logits": -1.3234721422195435, + "logps": -85.1892318725586, + "loss": 91.347, + "objective": 87.14881896972656, + "ranking_idealized": 0.637499988079071, + "ranking_idealized_expo": 0.5333333611488342, + "ranking_simple": 0.6208333373069763, + "regularize": 0.2082992047071457, + "step": 445, + "wo_beta": 17.48933219909668 + }, + { + "dpo_loss": 0.5497770309448242, + "epoch": 1.2753897024090695, + "grad_norm": 13860.879601223209, + "learning_rate": 9.279695432253708e-07, + "logits": -1.4758702516555786, + "logps": -84.91988372802734, + "loss": 90.5585, + "objective": 87.8936996459961, + "ranking_idealized": 0.5541666746139526, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.5666666626930237, + "regularize": 0.2043653279542923, + "step": 450, + "wo_beta": 15.034831047058105 + }, + { + "epoch": 1.2753897024090695, + "eval_dpo_loss": 0.6794779300689697, + "eval_logits": -1.3663489818572998, + "eval_logps": -91.20365905761719, + "eval_loss": 184.06732177734375, + "eval_objective": 180.62957763671875, + "eval_ranking_idealized": 0.6024844646453857, + "eval_ranking_idealized_expo": 0.5232919454574585, + "eval_ranking_simple": 0.5357142686843872, + "eval_regularize": 0.41047051548957825, + "eval_runtime": 479.5938, + "eval_samples_per_second": 12.073, + "eval_steps_per_second": 1.007, + "eval_wo_beta": 16.288923263549805, + "step": 450 + }, + { + "dpo_loss": 0.5493736267089844, + "epoch": 1.2895606991025035, + "grad_norm": 12737.57571248245, + "learning_rate": 9.253847097132655e-07, + "logits": -1.2778384685516357, + "logps": -85.39282989501953, + "loss": 90.8388, + "objective": 97.43504333496094, + "ranking_idealized": 0.5458333492279053, + "ranking_idealized_expo": 0.49166667461395264, + "ranking_simple": 0.5375000238418579, + "regularize": 0.21359196305274963, + "step": 455, + "wo_beta": 15.552309036254883 + }, + { + "dpo_loss": 0.5543228983879089, + "epoch": 1.3037316957959377, + "grad_norm": 14070.394055394958, + "learning_rate": 9.227580442891021e-07, + "logits": -1.3934885263442993, + "logps": -84.22640991210938, + "loss": 89.7715, + "objective": 87.21723175048828, + "ranking_idealized": 0.5583333373069763, + "ranking_idealized_expo": 0.4749999940395355, + "ranking_simple": 0.5333333611488342, + "regularize": 0.1974954754114151, + "step": 460, + "wo_beta": 16.378904342651367 + }, + { + "dpo_loss": 0.5476227402687073, + "epoch": 1.3179026924893718, + "grad_norm": 11947.40976577932, + "learning_rate": 9.200898052565637e-07, + "logits": -1.3618992567062378, + "logps": -82.62676239013672, + "loss": 89.4031, + "objective": 95.53166961669922, + "ranking_idealized": 0.5541666746139526, + "ranking_idealized_expo": 0.4749999940395355, + "ranking_simple": 0.5416666865348816, + "regularize": 0.2165236622095108, + "step": 465, + "wo_beta": 14.700522422790527 + }, + { + "dpo_loss": 0.5633279085159302, + "epoch": 1.3320736891828058, + "grad_norm": 14427.62714295139, + "learning_rate": 9.173802550076401e-07, + "logits": -1.4394139051437378, + "logps": -81.61421203613281, + "loss": 96.3098, + "objective": 105.95228576660156, + "ranking_idealized": 0.612500011920929, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.5833333134651184, + "regularize": 0.22128254175186157, + "step": 470, + "wo_beta": 14.451654434204102 + }, + { + "dpo_loss": 0.5512283444404602, + "epoch": 1.34624468587624, + "grad_norm": 15510.676068153169, + "learning_rate": 9.146296599968258e-07, + "logits": -1.334899663925171, + "logps": -84.10041809082031, + "loss": 85.2643, + "objective": 97.66104125976562, + "ranking_idealized": 0.6416666507720947, + "ranking_idealized_expo": 0.574999988079071, + "ranking_simple": 0.6208333373069763, + "regularize": 0.22577306628227234, + "step": 475, + "wo_beta": 16.777812957763672 + }, + { + "dpo_loss": 0.5490090847015381, + "epoch": 1.360415682569674, + "grad_norm": 13039.859969979723, + "learning_rate": 9.118382907149163e-07, + "logits": -1.396318793296814, + "logps": -84.05583953857422, + "loss": 92.9048, + "objective": 106.32127380371094, + "ranking_idealized": 0.5666666626930237, + "ranking_idealized_expo": 0.4791666567325592, + "ranking_simple": 0.5291666388511658, + "regularize": 0.22388581931591034, + "step": 480, + "wo_beta": 18.35649871826172 + }, + { + "dpo_loss": 0.55390864610672, + "epoch": 1.3745866792631083, + "grad_norm": 14303.492597277622, + "learning_rate": 9.090064216624092e-07, + "logits": -1.3780549764633179, + "logps": -81.48451232910156, + "loss": 89.9123, + "objective": 85.18955993652344, + "ranking_idealized": 0.574999988079071, + "ranking_idealized_expo": 0.4958333373069763, + "ranking_simple": 0.5666666626930237, + "regularize": 0.19940294325351715, + "step": 485, + "wo_beta": 16.11130714416504 + }, + { + "dpo_loss": 0.5646805167198181, + "epoch": 1.3887576759565423, + "grad_norm": 13569.748240897005, + "learning_rate": 9.061343313225087e-07, + "logits": -1.3297451734542847, + "logps": -84.58447265625, + "loss": 91.7915, + "objective": 92.44489288330078, + "ranking_idealized": 0.574999988079071, + "ranking_idealized_expo": 0.5333333611488342, + "ranking_simple": 0.5708333253860474, + "regularize": 0.20839503407478333, + "step": 490, + "wo_beta": 15.887747764587402 + }, + { + "dpo_loss": 0.5439994931221008, + "epoch": 1.4029286726499763, + "grad_norm": 14224.725006990095, + "learning_rate": 9.032223021337413e-07, + "logits": -1.3493283987045288, + "logps": -84.9798355102539, + "loss": 89.0675, + "objective": 84.06017303466797, + "ranking_idealized": 0.5583333373069763, + "ranking_idealized_expo": 0.4791666567325592, + "ranking_simple": 0.550000011920929, + "regularize": 0.1910681426525116, + "step": 495, + "wo_beta": 15.590251922607422 + }, + { + "dpo_loss": 0.539610743522644, + "epoch": 1.4170996693434104, + "grad_norm": 14123.937473491551, + "learning_rate": 9.002706204621802e-07, + "logits": -1.278394341468811, + "logps": -83.08454132080078, + "loss": 91.2372, + "objective": 89.69623565673828, + "ranking_idealized": 0.6000000238418579, + "ranking_idealized_expo": 0.4833333194255829, + "ranking_simple": 0.574999988079071, + "regularize": 0.20472820103168488, + "step": 500, + "wo_beta": 15.177144050598145 + }, + { + "epoch": 1.4170996693434104, + "eval_dpo_loss": 0.6782248020172119, + "eval_logits": -1.3281084299087524, + "eval_logps": -89.4298095703125, + "eval_loss": 185.71939086914062, + "eval_objective": 180.8789520263672, + "eval_ranking_idealized": 0.6024844646453857, + "eval_ranking_idealized_expo": 0.5232919454574585, + "eval_ranking_simple": 0.534679114818573, + "eval_regularize": 0.41098901629447937, + "eval_runtime": 475.7419, + "eval_samples_per_second": 12.17, + "eval_steps_per_second": 1.015, + "eval_wo_beta": 16.044300079345703, + "step": 500 + }, + { + "dpo_loss": 0.5400077700614929, + "epoch": 1.4312706660368446, + "grad_norm": 13097.852775439285, + "learning_rate": 8.972795765732846e-07, + "logits": -1.3413732051849365, + "logps": -82.83694458007812, + "loss": 96.4414, + "objective": 99.75823211669922, + "ranking_idealized": 0.625, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.5874999761581421, + "regularize": 0.20853710174560547, + "step": 505, + "wo_beta": 16.662084579467773 + }, + { + "dpo_loss": 0.5544862151145935, + "epoch": 1.4454416627302786, + "grad_norm": 13707.829813480788, + "learning_rate": 8.942494646033554e-07, + "logits": -1.3700981140136719, + "logps": -84.05197143554688, + "loss": 86.1912, + "objective": 85.1514663696289, + "ranking_idealized": 0.5874999761581421, + "ranking_idealized_expo": 0.5208333134651184, + "ranking_simple": 0.5833333134651184, + "regularize": 0.18519388139247894, + "step": 510, + "wo_beta": 15.12604808807373 + }, + { + "dpo_loss": 0.5474262833595276, + "epoch": 1.4596126594237129, + "grad_norm": 16384.18532468762, + "learning_rate": 8.911805825306096e-07, + "logits": -1.4208234548568726, + "logps": -85.2526626586914, + "loss": 86.2928, + "objective": 94.92349243164062, + "ranking_idealized": 0.550000011920929, + "ranking_idealized_expo": 0.4625000059604645, + "ranking_simple": 0.5249999761581421, + "regularize": 0.21247318387031555, + "step": 515, + "wo_beta": 16.363656997680664 + }, + { + "dpo_loss": 0.5538729429244995, + "epoch": 1.473783656117147, + "grad_norm": 14068.735921521182, + "learning_rate": 8.880732321458784e-07, + "logits": -1.3451961278915405, + "logps": -81.92323303222656, + "loss": 85.1002, + "objective": 84.61219024658203, + "ranking_idealized": 0.574999988079071, + "ranking_idealized_expo": 0.49166667461395264, + "ranking_simple": 0.5416666865348816, + "regularize": 0.19058094918727875, + "step": 520, + "wo_beta": 16.039138793945312 + }, + { + "dpo_loss": 0.5211088061332703, + "epoch": 1.487954652810581, + "grad_norm": 12355.643543079665, + "learning_rate": 8.849277190229283e-07, + "logits": -1.2561639547348022, + "logps": -80.8559341430664, + "loss": 87.4323, + "objective": 88.15239715576172, + "ranking_idealized": 0.6000000238418579, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.6000000238418579, + "regularize": 0.19789734482765198, + "step": 525, + "wo_beta": 15.64743423461914 + }, + { + "dpo_loss": 0.5529366731643677, + "epoch": 1.5021256495040152, + "grad_norm": 13792.295805387279, + "learning_rate": 8.817443524884117e-07, + "logits": -1.4202781915664673, + "logps": -84.57428741455078, + "loss": 89.1332, + "objective": 93.31535339355469, + "ranking_idealized": 0.574999988079071, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.5708333253860474, + "regularize": 0.20485611259937286, + "step": 530, + "wo_beta": 15.418906211853027 + }, + { + "dpo_loss": 0.5380304455757141, + "epoch": 1.5162966461974492, + "grad_norm": 12748.671458728879, + "learning_rate": 8.785234455914488e-07, + "logits": -1.4013686180114746, + "logps": -83.34593963623047, + "loss": 86.3246, + "objective": 83.55619812011719, + "ranking_idealized": 0.5833333134651184, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.5791666507720947, + "regularize": 0.19464156031608582, + "step": 535, + "wo_beta": 15.718771934509277 + }, + { + "dpo_loss": 0.5602856874465942, + "epoch": 1.5304676428908834, + "grad_norm": 13600.712518077142, + "learning_rate": 8.752653150728411e-07, + "logits": -1.3116246461868286, + "logps": -83.8393783569336, + "loss": 85.7548, + "objective": 85.53334045410156, + "ranking_idealized": 0.6291666626930237, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.5874999761581421, + "regularize": 0.19387957453727722, + "step": 540, + "wo_beta": 15.35750961303711 + }, + { + "dpo_loss": 0.5608557462692261, + "epoch": 1.5446386395843175, + "grad_norm": 13202.179692261727, + "learning_rate": 8.719702813339247e-07, + "logits": -1.4217339754104614, + "logps": -85.13090515136719, + "loss": 78.3728, + "objective": 73.64112854003906, + "ranking_idealized": 0.6583333611488342, + "ranking_idealized_expo": 0.5874999761581421, + "ranking_simple": 0.6499999761581421, + "regularize": 0.17463207244873047, + "step": 545, + "wo_beta": 14.742905616760254 + }, + { + "dpo_loss": 0.5433780550956726, + "epoch": 1.5588096362777515, + "grad_norm": 13773.885858068237, + "learning_rate": 8.68638668405062e-07, + "logits": -1.4105440378189087, + "logps": -85.73950958251953, + "loss": 85.7307, + "objective": 91.58641815185547, + "ranking_idealized": 0.5625, + "ranking_idealized_expo": 0.46666666865348816, + "ranking_simple": 0.5291666388511658, + "regularize": 0.20735137164592743, + "step": 550, + "wo_beta": 15.781331062316895 + }, + { + "epoch": 1.5588096362777515, + "eval_dpo_loss": 0.6799347400665283, + "eval_logits": -1.36829674243927, + "eval_logps": -91.68656921386719, + "eval_loss": 186.22413635253906, + "eval_objective": 182.13821411132812, + "eval_ranking_idealized": 0.6024844646453857, + "eval_ranking_idealized_expo": 0.5232919454574585, + "eval_ranking_simple": 0.533643901348114, + "eval_regularize": 0.4147377014160156, + "eval_runtime": 478.899, + "eval_samples_per_second": 12.09, + "eval_steps_per_second": 1.009, + "eval_wo_beta": 16.186290740966797, + "step": 550 + }, + { + "dpo_loss": 0.5562130212783813, + "epoch": 1.5729806329711855, + "grad_norm": 13716.988937741002, + "learning_rate": 8.652708039137766e-07, + "logits": -1.2273495197296143, + "logps": -85.2579116821289, + "loss": 90.1931, + "objective": 91.27943420410156, + "ranking_idealized": 0.6000000238418579, + "ranking_idealized_expo": 0.5208333134651184, + "ranking_simple": 0.5833333134651184, + "regularize": 0.19078856706619263, + "step": 555, + "wo_beta": 14.82008171081543 + }, + { + "dpo_loss": 0.5405426621437073, + "epoch": 1.5871516296646198, + "grad_norm": 13222.290128913079, + "learning_rate": 8.61867019052535e-07, + "logits": -1.3004463911056519, + "logps": -84.03120422363281, + "loss": 82.5026, + "objective": 82.23470306396484, + "ranking_idealized": 0.6083333492279053, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.574999988079071, + "regularize": 0.19372278451919556, + "step": 560, + "wo_beta": 16.210308074951172 + }, + { + "dpo_loss": 0.545985221862793, + "epoch": 1.601322626358054, + "grad_norm": 13798.95251346989, + "learning_rate": 8.584276485461775e-07, + "logits": -1.2903294563293457, + "logps": -85.43083190917969, + "loss": 87.1773, + "objective": 87.97190856933594, + "ranking_idealized": 0.5458333492279053, + "ranking_idealized_expo": 0.44999998807907104, + "ranking_simple": 0.5458333492279053, + "regularize": 0.18329960107803345, + "step": 565, + "wo_beta": 15.259784698486328 + }, + { + "dpo_loss": 0.5544782280921936, + "epoch": 1.615493623051488, + "grad_norm": 14310.342902213652, + "learning_rate": 8.549530306190014e-07, + "logits": -1.4501588344573975, + "logps": -85.62173461914062, + "loss": 86.1569, + "objective": 88.04158020019531, + "ranking_idealized": 0.6000000238418579, + "ranking_idealized_expo": 0.5166666507720947, + "ranking_simple": 0.5708333253860474, + "regularize": 0.19149872660636902, + "step": 570, + "wo_beta": 15.673080444335938 + }, + { + "dpo_loss": 0.5482128858566284, + "epoch": 1.629664619744922, + "grad_norm": 13673.298787796572, + "learning_rate": 8.514435069615004e-07, + "logits": -1.380743384361267, + "logps": -83.26321411132812, + "loss": 78.7831, + "objective": 86.95629119873047, + "ranking_idealized": 0.637499988079071, + "ranking_idealized_expo": 0.5583333373069763, + "ranking_simple": 0.6166666746139526, + "regularize": 0.1899789720773697, + "step": 575, + "wo_beta": 16.415205001831055 + }, + { + "dpo_loss": 0.5394086241722107, + "epoch": 1.643835616438356, + "grad_norm": 13082.53312626321, + "learning_rate": 8.478994226967638e-07, + "logits": -1.4001491069793701, + "logps": -83.10562133789062, + "loss": 76.6065, + "objective": 77.20848846435547, + "ranking_idealized": 0.6083333492279053, + "ranking_idealized_expo": 0.5333333611488342, + "ranking_simple": 0.6000000238418579, + "regularize": 0.1699313372373581, + "step": 580, + "wo_beta": 14.931032180786133 + }, + { + "dpo_loss": 0.5450774431228638, + "epoch": 1.6580066131317903, + "grad_norm": 14803.266258769623, + "learning_rate": 8.443211263465362e-07, + "logits": -1.2514622211456299, + "logps": -82.91756439208984, + "loss": 81.1936, + "objective": 78.58777618408203, + "ranking_idealized": 0.5333333611488342, + "ranking_idealized_expo": 0.4625000059604645, + "ranking_simple": 0.5291666388511658, + "regularize": 0.18020884692668915, + "step": 585, + "wo_beta": 16.229631423950195 + }, + { + "dpo_loss": 0.5452067852020264, + "epoch": 1.6721776098252243, + "grad_norm": 14897.05549715986, + "learning_rate": 8.407089697969456e-07, + "logits": -1.310152530670166, + "logps": -82.58568572998047, + "loss": 84.6601, + "objective": 89.34095764160156, + "ranking_idealized": 0.6208333373069763, + "ranking_idealized_expo": 0.5083333253860474, + "ranking_simple": 0.5874999761581421, + "regularize": 0.18940496444702148, + "step": 590, + "wo_beta": 13.480273246765137 + }, + { + "dpo_loss": 0.5498862862586975, + "epoch": 1.6863486065186586, + "grad_norm": 13388.885538994262, + "learning_rate": 8.370633082638975e-07, + "logits": -1.2777602672576904, + "logps": -84.25193786621094, + "loss": 85.2501, + "objective": 97.64402770996094, + "ranking_idealized": 0.6166666746139526, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.5791666507720947, + "regularize": 0.21810217201709747, + "step": 595, + "wo_beta": 17.075584411621094 + }, + { + "dpo_loss": 0.541688084602356, + "epoch": 1.7005196032120926, + "grad_norm": 12810.439354567186, + "learning_rate": 8.333845002581458e-07, + "logits": -1.3377609252929688, + "logps": -85.63569641113281, + "loss": 79.9458, + "objective": 90.4583740234375, + "ranking_idealized": 0.6333333253860474, + "ranking_idealized_expo": 0.550000011920929, + "ranking_simple": 0.612500011920929, + "regularize": 0.2045913189649582, + "step": 600, + "wo_beta": 16.088045120239258 + }, + { + "epoch": 1.7005196032120926, + "eval_dpo_loss": 0.6794285774230957, + "eval_logits": -1.3519084453582764, + "eval_logps": -91.084716796875, + "eval_loss": 186.21368408203125, + "eval_objective": 181.86863708496094, + "eval_ranking_idealized": 0.6024844646453857, + "eval_ranking_idealized_expo": 0.5232919454574585, + "eval_ranking_simple": 0.5372670888900757, + "eval_regularize": 0.4135282337665558, + "eval_runtime": 449.0944, + "eval_samples_per_second": 12.893, + "eval_steps_per_second": 1.075, + "eval_wo_beta": 16.10601043701172, + "step": 600 + }, + { + "dpo_loss": 0.5528364777565002, + "epoch": 1.7146905999055266, + "grad_norm": 12864.49342558613, + "learning_rate": 8.296729075500343e-07, + "logits": -1.2839235067367554, + "logps": -85.77102661132812, + "loss": 81.7288, + "objective": 90.60871124267578, + "ranking_idealized": 0.5833333134651184, + "ranking_idealized_expo": 0.5291666388511658, + "ranking_simple": 0.574999988079071, + "regularize": 0.2010929137468338, + "step": 605, + "wo_beta": 18.23944091796875 + }, + { + "dpo_loss": 0.5559037327766418, + "epoch": 1.7288615965989607, + "grad_norm": 14171.44704590598, + "learning_rate": 8.259288951339232e-07, + "logits": -1.3577406406402588, + "logps": -83.76995086669922, + "loss": 81.4701, + "objective": 75.51998138427734, + "ranking_idealized": 0.6291666626930237, + "ranking_idealized_expo": 0.5708333253860474, + "ranking_simple": 0.6291666626930237, + "regularize": 0.17047205567359924, + "step": 610, + "wo_beta": 16.163959503173828 + }, + { + "dpo_loss": 0.5623223185539246, + "epoch": 1.743032593292395, + "grad_norm": 14064.695817652162, + "learning_rate": 8.221528311922941e-07, + "logits": -1.3709431886672974, + "logps": -83.62710571289062, + "loss": 84.5652, + "objective": 85.32384490966797, + "ranking_idealized": 0.5833333134651184, + "ranking_idealized_expo": 0.5291666388511658, + "ranking_simple": 0.5791666507720947, + "regularize": 0.19118142127990723, + "step": 615, + "wo_beta": 15.722049713134766 + }, + { + "dpo_loss": 0.5426214933395386, + "epoch": 1.7572035899858292, + "grad_norm": 13161.981948520664, + "learning_rate": 8.183450870595441e-07, + "logits": -1.3993924856185913, + "logps": -83.90966796875, + "loss": 81.5518, + "objective": 84.29554748535156, + "ranking_idealized": 0.6208333373069763, + "ranking_idealized_expo": 0.5416666865348816, + "ranking_simple": 0.6000000238418579, + "regularize": 0.19568467140197754, + "step": 620, + "wo_beta": 16.582374572753906 + }, + { + "dpo_loss": 0.5548843145370483, + "epoch": 1.7713745866792632, + "grad_norm": 13578.593083281268, + "learning_rate": 8.145060371854691e-07, + "logits": -1.3166680335998535, + "logps": -83.37279510498047, + "loss": 77.6344, + "objective": 80.62175750732422, + "ranking_idealized": 0.6083333492279053, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.5916666388511658, + "regularize": 0.17566385865211487, + "step": 625, + "wo_beta": 15.19571304321289 + }, + { + "dpo_loss": 0.548730194568634, + "epoch": 1.7855455833726972, + "grad_norm": 12867.261945978005, + "learning_rate": 8.106360590984404e-07, + "logits": -1.3329386711120605, + "logps": -85.60625457763672, + "loss": 75.8762, + "objective": 75.14217376708984, + "ranking_idealized": 0.574999988079071, + "ranking_idealized_expo": 0.49166667461395264, + "ranking_simple": 0.5583333373069763, + "regularize": 0.17412720620632172, + "step": 630, + "wo_beta": 16.33298110961914 + }, + { + "dpo_loss": 0.5529462695121765, + "epoch": 1.7997165800661312, + "grad_norm": 12432.106461076137, + "learning_rate": 8.067355333682797e-07, + "logits": -1.4188921451568604, + "logps": -84.8874282836914, + "loss": 78.6516, + "objective": 76.64624786376953, + "ranking_idealized": 0.637499988079071, + "ranking_idealized_expo": 0.5291666388511658, + "ranking_simple": 0.5916666388511658, + "regularize": 0.17813840508460999, + "step": 635, + "wo_beta": 16.95586395263672 + }, + { + "dpo_loss": 0.5410430431365967, + "epoch": 1.8138875767595655, + "grad_norm": 12324.183379735212, + "learning_rate": 8.028048435688333e-07, + "logits": -1.3641606569290161, + "logps": -85.47127532958984, + "loss": 78.7118, + "objective": 82.21182250976562, + "ranking_idealized": 0.5666666626930237, + "ranking_idealized_expo": 0.5083333253860474, + "ranking_simple": 0.5666666626930237, + "regularize": 0.18489192426204681, + "step": 640, + "wo_beta": 17.22258186340332 + }, + { + "dpo_loss": 0.5470555424690247, + "epoch": 1.8280585734529995, + "grad_norm": 13971.672253595729, + "learning_rate": 7.988443762402523e-07, + "logits": -1.4050637483596802, + "logps": -85.07406616210938, + "loss": 78.6084, + "objective": 74.21890258789062, + "ranking_idealized": 0.6416666507720947, + "ranking_idealized_expo": 0.550000011920929, + "ranking_simple": 0.6291666626930237, + "regularize": 0.16714391112327576, + "step": 645, + "wo_beta": 16.80624008178711 + }, + { + "dpo_loss": 0.5424051880836487, + "epoch": 1.8422295701464337, + "grad_norm": 15285.601428700493, + "learning_rate": 7.948545208509811e-07, + "logits": -1.440900444984436, + "logps": -84.5870590209961, + "loss": 86.7578, + "objective": 89.12664031982422, + "ranking_idealized": 0.612500011920929, + "ranking_idealized_expo": 0.5208333134651184, + "ranking_simple": 0.6000000238418579, + "regularize": 0.18858183920383453, + "step": 650, + "wo_beta": 15.562705039978027 + }, + { + "epoch": 1.8422295701464337, + "eval_dpo_loss": 0.6796835660934448, + "eval_logits": -1.3402661085128784, + "eval_logps": -89.40703582763672, + "eval_loss": 186.7196044921875, + "eval_objective": 182.49703979492188, + "eval_ranking_idealized": 0.6024844646453857, + "eval_ranking_idealized_expo": 0.5232919454574585, + "eval_ranking_simple": 0.531573474407196, + "eval_regularize": 0.4141009747982025, + "eval_runtime": 450.1436, + "eval_samples_per_second": 12.863, + "eval_steps_per_second": 1.073, + "eval_wo_beta": 16.0269832611084, + "step": 650 + }, + { + "dpo_loss": 0.5390594601631165, + "epoch": 1.8564005668398678, + "grad_norm": 14945.717954531257, + "learning_rate": 7.90835669759456e-07, + "logits": -1.292981505393982, + "logps": -81.8280029296875, + "loss": 79.8064, + "objective": 77.88701629638672, + "ranking_idealized": 0.6000000238418579, + "ranking_idealized_expo": 0.48750001192092896, + "ranking_simple": 0.6000000238418579, + "regularize": 0.18046139180660248, + "step": 655, + "wo_beta": 15.520308494567871 + }, + { + "dpo_loss": 0.5524376034736633, + "epoch": 1.8705715635333018, + "grad_norm": 12956.308969791295, + "learning_rate": 7.86788218175523e-07, + "logits": -1.3386873006820679, + "logps": -84.97721862792969, + "loss": 77.9731, + "objective": 77.8855972290039, + "ranking_idealized": 0.6041666865348816, + "ranking_idealized_expo": 0.5333333611488342, + "ranking_simple": 0.6041666865348816, + "regularize": 0.17455393075942993, + "step": 660, + "wo_beta": 17.077417373657227 + }, + { + "dpo_loss": 0.562981903553009, + "epoch": 1.8847425602267358, + "grad_norm": 12832.376229580192, + "learning_rate": 7.827125641215718e-07, + "logits": -1.334754228591919, + "logps": -83.5533447265625, + "loss": 82.4367, + "objective": 85.92207336425781, + "ranking_idealized": 0.6333333253860474, + "ranking_idealized_expo": 0.550000011920929, + "ranking_simple": 0.625, + "regularize": 0.1833416372537613, + "step": 665, + "wo_beta": 15.230433464050293 + }, + { + "dpo_loss": 0.548839807510376, + "epoch": 1.89891355692017, + "grad_norm": 13460.183191194346, + "learning_rate": 7.786091083933949e-07, + "logits": -1.273821473121643, + "logps": -81.98705291748047, + "loss": 71.3613, + "objective": 68.62953186035156, + "ranking_idealized": 0.5666666626930237, + "ranking_idealized_expo": 0.4791666567325592, + "ranking_simple": 0.5375000238418579, + "regularize": 0.16619008779525757, + "step": 670, + "wo_beta": 16.408151626586914 + }, + { + "dpo_loss": 0.5611833930015564, + "epoch": 1.9130845536136043, + "grad_norm": 12953.446893922981, + "learning_rate": 7.744782545207744e-07, + "logits": -1.2947652339935303, + "logps": -83.05793762207031, + "loss": 71.3196, + "objective": 74.63235473632812, + "ranking_idealized": 0.6166666746139526, + "ranking_idealized_expo": 0.5333333611488342, + "ranking_simple": 0.6083333492279053, + "regularize": 0.16350051760673523, + "step": 675, + "wo_beta": 15.741961479187012 + }, + { + "dpo_loss": 0.5451231598854065, + "epoch": 1.9272555503070383, + "grad_norm": 13412.02601484903, + "learning_rate": 7.703204087277988e-07, + "logits": -1.3697810173034668, + "logps": -85.1467056274414, + "loss": 71.5185, + "objective": 70.06403350830078, + "ranking_idealized": 0.6166666746139526, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.6000000238418579, + "regularize": 0.16510257124900818, + "step": 680, + "wo_beta": 15.431069374084473 + }, + { + "dpo_loss": 0.5437536835670471, + "epoch": 1.9414265470004723, + "grad_norm": 13070.654673150682, + "learning_rate": 7.661359798929152e-07, + "logits": -1.2984110116958618, + "logps": -82.4813003540039, + "loss": 72.6279, + "objective": 63.83388137817383, + "ranking_idealized": 0.5291666388511658, + "ranking_idealized_expo": 0.4791666567325592, + "ranking_simple": 0.5333333611488342, + "regularize": 0.1474105566740036, + "step": 685, + "wo_beta": 15.765579223632812 + }, + { + "dpo_loss": 0.5520148873329163, + "epoch": 1.9555975436939064, + "grad_norm": 13585.612422979371, + "learning_rate": 7.619253795087208e-07, + "logits": -1.3621736764907837, + "logps": -83.20579528808594, + "loss": 70.4149, + "objective": 71.44465637207031, + "ranking_idealized": 0.6041666865348816, + "ranking_idealized_expo": 0.5583333373069763, + "ranking_simple": 0.5916666388511658, + "regularize": 0.15733769536018372, + "step": 690, + "wo_beta": 16.008506774902344 + }, + { + "dpo_loss": 0.5521395802497864, + "epoch": 1.9697685403873406, + "grad_norm": 12626.830880791873, + "learning_rate": 7.576890216414972e-07, + "logits": -1.2345752716064453, + "logps": -84.00497436523438, + "loss": 69.938, + "objective": 70.55232238769531, + "ranking_idealized": 0.5666666626930237, + "ranking_idealized_expo": 0.4833333194255829, + "ranking_simple": 0.5541666746139526, + "regularize": 0.15369382500648499, + "step": 695, + "wo_beta": 16.505474090576172 + }, + { + "dpo_loss": 0.5477771759033203, + "epoch": 1.9839395370807746, + "grad_norm": 14507.10563022748, + "learning_rate": 7.534273228904915e-07, + "logits": -1.2208502292633057, + "logps": -84.28005981445312, + "loss": 76.2665, + "objective": 85.08452606201172, + "ranking_idealized": 0.6041666865348816, + "ranking_idealized_expo": 0.5208333134651184, + "ranking_simple": 0.574999988079071, + "regularize": 0.1893630176782608, + "step": 700, + "wo_beta": 15.212244987487793 + }, + { + "epoch": 1.9839395370807746, + "eval_dpo_loss": 0.6800020337104797, + "eval_logits": -1.3222942352294922, + "eval_logps": -89.5856704711914, + "eval_loss": 186.28018188476562, + "eval_objective": 182.39332580566406, + "eval_ranking_idealized": 0.6024844646453857, + "eval_ranking_idealized_expo": 0.5232919454574585, + "eval_ranking_simple": 0.5310559272766113, + "eval_regularize": 0.4136333167552948, + "eval_runtime": 489.8617, + "eval_samples_per_second": 11.82, + "eval_steps_per_second": 0.986, + "eval_wo_beta": 16.111663818359375, + "step": 700 + }, + { + "dpo_loss": 0.5436014533042908, + "epoch": 1.9981105337742089, + "grad_norm": 14761.110739737924, + "learning_rate": 7.49140702346948e-07, + "logits": -1.1587742567062378, + "logps": -83.4106216430664, + "loss": 71.0478, + "objective": 77.40288543701172, + "ranking_idealized": 0.5916666388511658, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.5541666746139526, + "regularize": 0.1687079817056656, + "step": 705, + "wo_beta": 17.46946907043457 + }, + { + "dpo_loss": 0.5400715470314026, + "epoch": 2.012281530467643, + "grad_norm": 13854.290443619322, + "learning_rate": 7.448295815528956e-07, + "logits": -1.3091672658920288, + "logps": -83.20928192138672, + "loss": 68.6235, + "objective": 74.59980773925781, + "ranking_idealized": 0.6083333492279053, + "ranking_idealized_expo": 0.5208333134651184, + "ranking_simple": 0.6000000238418579, + "regularize": 0.15744589269161224, + "step": 710, + "wo_beta": 16.282772064208984 + }, + { + "dpo_loss": 0.5266523957252502, + "epoch": 2.026452527161077, + "grad_norm": 12772.47402835887, + "learning_rate": 7.404943844596938e-07, + "logits": -1.3287214040756226, + "logps": -82.50818634033203, + "loss": 67.4219, + "objective": 67.50071716308594, + "ranking_idealized": 0.6791666746139526, + "ranking_idealized_expo": 0.5708333253860474, + "ranking_simple": 0.6625000238418579, + "regularize": 0.15344351530075073, + "step": 715, + "wo_beta": 15.63277816772461 + }, + { + "dpo_loss": 0.5342952013015747, + "epoch": 2.040623523854511, + "grad_norm": 12280.29550374932, + "learning_rate": 7.361355373863413e-07, + "logits": -1.3206126689910889, + "logps": -83.4239273071289, + "loss": 65.7671, + "objective": 62.988590240478516, + "ranking_idealized": 0.6291666626930237, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.6041666865348816, + "regularize": 0.13976921141147614, + "step": 720, + "wo_beta": 16.120634078979492 + }, + { + "dpo_loss": 0.5416182279586792, + "epoch": 2.0547945205479454, + "grad_norm": 11934.95995024634, + "learning_rate": 7.317534689775527e-07, + "logits": -1.329419732093811, + "logps": -86.18152618408203, + "loss": 73.1378, + "objective": 77.66006469726562, + "ranking_idealized": 0.5833333134651184, + "ranking_idealized_expo": 0.4958333373069763, + "ranking_simple": 0.574999988079071, + "regularize": 0.1658337563276291, + "step": 725, + "wo_beta": 14.640992164611816 + }, + { + "dpo_loss": 0.5336278080940247, + "epoch": 2.0689655172413794, + "grad_norm": 13017.829141332633, + "learning_rate": 7.273486101616056e-07, + "logits": -1.4032765626907349, + "logps": -83.5689468383789, + "loss": 73.2891, + "objective": 73.26839447021484, + "ranking_idealized": 0.5791666507720947, + "ranking_idealized_expo": 0.49166667461395264, + "ranking_simple": 0.5874999761581421, + "regularize": 0.15773232281208038, + "step": 730, + "wo_beta": 15.76942253112793 + }, + { + "dpo_loss": 0.5291448831558228, + "epoch": 2.0831365139348135, + "grad_norm": 13426.194750558408, + "learning_rate": 7.229213941079639e-07, + "logits": -1.3250770568847656, + "logps": -82.74713897705078, + "loss": 59.2054, + "objective": 57.16627883911133, + "ranking_idealized": 0.5625, + "ranking_idealized_expo": 0.47083333134651184, + "ranking_simple": 0.550000011920929, + "regularize": 0.14032262563705444, + "step": 735, + "wo_beta": 17.056970596313477 + }, + { + "dpo_loss": 0.5465752482414246, + "epoch": 2.0973075106282475, + "grad_norm": 11906.26841829341, + "learning_rate": 7.184722561846797e-07, + "logits": -1.3804094791412354, + "logps": -82.77980041503906, + "loss": 62.2469, + "objective": 65.71126556396484, + "ranking_idealized": 0.6166666746139526, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.6083333492279053, + "regularize": 0.14654967188835144, + "step": 740, + "wo_beta": 15.721449851989746 + }, + { + "dpo_loss": 0.5360319018363953, + "epoch": 2.1114785073216815, + "grad_norm": 13337.057180758171, + "learning_rate": 7.14001633915581e-07, + "logits": -1.313341498374939, + "logps": -83.15229797363281, + "loss": 60.0244, + "objective": 60.3892822265625, + "ranking_idealized": 0.6333333253860474, + "ranking_idealized_expo": 0.5583333373069763, + "ranking_simple": 0.625, + "regularize": 0.13975684344768524, + "step": 745, + "wo_beta": 15.697921752929688 + }, + { + "dpo_loss": 0.5399072170257568, + "epoch": 2.1256495040151155, + "grad_norm": 13331.418550163386, + "learning_rate": 7.095099669372443e-07, + "logits": -1.3453633785247803, + "logps": -82.3453369140625, + "loss": 65.1575, + "objective": 60.51906967163086, + "ranking_idealized": 0.574999988079071, + "ranking_idealized_expo": 0.4833333194255829, + "ranking_simple": 0.5666666626930237, + "regularize": 0.1316269487142563, + "step": 750, + "wo_beta": 15.831055641174316 + }, + { + "epoch": 2.1256495040151155, + "eval_dpo_loss": 0.6806153059005737, + "eval_logits": -1.3253074884414673, + "eval_logps": -90.24537658691406, + "eval_loss": 188.15711975097656, + "eval_objective": 184.20758056640625, + "eval_ranking_idealized": 0.6024844646453857, + "eval_ranking_idealized_expo": 0.5232919454574585, + "eval_ranking_simple": 0.5320910811424255, + "eval_regularize": 0.4179456830024719, + "eval_runtime": 478.2913, + "eval_samples_per_second": 12.106, + "eval_steps_per_second": 1.01, + "eval_wo_beta": 15.917864799499512, + "step": 750 + }, + { + "dpo_loss": 0.5414224863052368, + "epoch": 2.13982050070855, + "grad_norm": 14942.893679399409, + "learning_rate": 7.049976969557623e-07, + "logits": -1.3125241994857788, + "logps": -85.55477905273438, + "loss": 70.5458, + "objective": 72.25684356689453, + "ranking_idealized": 0.5916666388511658, + "ranking_idealized_expo": 0.5166666507720947, + "ranking_simple": 0.5874999761581421, + "regularize": 0.15286041796207428, + "step": 755, + "wo_beta": 16.596240997314453 + }, + { + "dpo_loss": 0.5502544641494751, + "epoch": 2.153991497401984, + "grad_norm": 14884.220119069658, + "learning_rate": 7.004652677033068e-07, + "logits": -1.2573704719543457, + "logps": -81.78999328613281, + "loss": 66.5347, + "objective": 56.669010162353516, + "ranking_idealized": 0.5958333611488342, + "ranking_idealized_expo": 0.5208333134651184, + "ranking_simple": 0.5833333134651184, + "regularize": 0.1313803344964981, + "step": 760, + "wo_beta": 14.706622123718262 + }, + { + "dpo_loss": 0.537317156791687, + "epoch": 2.168162494095418, + "grad_norm": 12849.6702201699, + "learning_rate": 6.959131248944922e-07, + "logits": -1.3043426275253296, + "logps": -82.6404037475586, + "loss": 60.5154, + "objective": 57.57880401611328, + "ranking_idealized": 0.6208333373069763, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.6041666865348816, + "regularize": 0.13467958569526672, + "step": 765, + "wo_beta": 16.29267120361328 + }, + { + "dpo_loss": 0.5396389365196228, + "epoch": 2.182333490788852, + "grad_norm": 13686.597971217428, + "learning_rate": 6.913417161825449e-07, + "logits": -1.3148149251937866, + "logps": -82.22266387939453, + "loss": 66.4186, + "objective": 71.55656433105469, + "ranking_idealized": 0.5916666388511658, + "ranking_idealized_expo": 0.49166667461395264, + "ranking_simple": 0.574999988079071, + "regularize": 0.15875324606895447, + "step": 770, + "wo_beta": 14.279667854309082 + }, + { + "dpo_loss": 0.5356777906417847, + "epoch": 2.196504487482286, + "grad_norm": 13109.133649943296, + "learning_rate": 6.867514911152806e-07, + "logits": -1.279820203781128, + "logps": -82.98641204833984, + "loss": 62.1208, + "objective": 65.08477020263672, + "ranking_idealized": 0.6291666626930237, + "ranking_idealized_expo": 0.5791666507720947, + "ranking_simple": 0.625, + "regularize": 0.14647550880908966, + "step": 775, + "wo_beta": 17.69573211669922 + }, + { + "dpo_loss": 0.5467700362205505, + "epoch": 2.21067548417572, + "grad_norm": 13977.878251046886, + "learning_rate": 6.821429010908971e-07, + "logits": -1.2058584690093994, + "logps": -82.53013610839844, + "loss": 63.1931, + "objective": 62.46464538574219, + "ranking_idealized": 0.512499988079071, + "ranking_idealized_expo": 0.44583332538604736, + "ranking_simple": 0.5041666626930237, + "regularize": 0.13306237757205963, + "step": 780, + "wo_beta": 15.349116325378418 + }, + { + "dpo_loss": 0.5252477526664734, + "epoch": 2.2248464808691546, + "grad_norm": 13522.027705329157, + "learning_rate": 6.775163993135842e-07, + "logits": -1.20766019821167, + "logps": -81.99567413330078, + "loss": 66.8492, + "objective": 59.73252487182617, + "ranking_idealized": 0.5833333134651184, + "ranking_idealized_expo": 0.4791666567325592, + "ranking_simple": 0.5708333253860474, + "regularize": 0.13541431725025177, + "step": 785, + "wo_beta": 15.272583961486816 + }, + { + "dpo_loss": 0.5247431993484497, + "epoch": 2.2390174775625886, + "grad_norm": 12425.328833284808, + "learning_rate": 6.728724407489553e-07, + "logits": -1.205735445022583, + "logps": -82.88821411132812, + "loss": 66.8893, + "objective": 59.76982498168945, + "ranking_idealized": 0.637499988079071, + "ranking_idealized_expo": 0.5625, + "ranking_simple": 0.637499988079071, + "regularize": 0.14265631139278412, + "step": 790, + "wo_beta": 15.509627342224121 + }, + { + "dpo_loss": 0.5296502113342285, + "epoch": 2.2531884742560226, + "grad_norm": 11978.127680414538, + "learning_rate": 6.682114820793074e-07, + "logits": -1.2859066724777222, + "logps": -84.08002471923828, + "loss": 63.7577, + "objective": 59.34935760498047, + "ranking_idealized": 0.6000000238418579, + "ranking_idealized_expo": 0.5041666626930237, + "ranking_simple": 0.612500011920929, + "regularize": 0.14149998128414154, + "step": 795, + "wo_beta": 14.346338272094727 + }, + { + "dpo_loss": 0.5199058651924133, + "epoch": 2.2673594709494567, + "grad_norm": 12421.855115848897, + "learning_rate": 6.635339816587108e-07, + "logits": -1.3125, + "logps": -83.2691879272461, + "loss": 66.0375, + "objective": 66.00747680664062, + "ranking_idealized": 0.5916666388511658, + "ranking_idealized_expo": 0.5041666626930237, + "ranking_simple": 0.6041666865348816, + "regularize": 0.14774902164936066, + "step": 800, + "wo_beta": 14.81782341003418 + }, + { + "epoch": 2.2673594709494567, + "eval_dpo_loss": 0.6780735850334167, + "eval_logits": -1.3137409687042236, + "eval_logps": -88.58743286132812, + "eval_loss": 186.72210693359375, + "eval_objective": 181.93551635742188, + "eval_ranking_idealized": 0.6024844646453857, + "eval_ranking_idealized_expo": 0.5232919454574585, + "eval_ranking_simple": 0.533643901348114, + "eval_regularize": 0.4137285053730011, + "eval_runtime": 481.1373, + "eval_samples_per_second": 12.034, + "eval_steps_per_second": 1.004, + "eval_wo_beta": 15.987866401672363, + "step": 800 + }, + { + "dpo_loss": 0.5357646942138672, + "epoch": 2.2815304676428907, + "grad_norm": 12458.299460461743, + "learning_rate": 6.588403994679354e-07, + "logits": -1.319643497467041, + "logps": -81.99591827392578, + "loss": 60.8943, + "objective": 64.13407135009766, + "ranking_idealized": 0.5833333134651184, + "ranking_idealized_expo": 0.5208333134651184, + "ranking_simple": 0.574999988079071, + "regularize": 0.13611546158790588, + "step": 805, + "wo_beta": 16.0935001373291 + }, + { + "dpo_loss": 0.5283416509628296, + "epoch": 2.295701464336325, + "grad_norm": 12943.281420533918, + "learning_rate": 6.541311970692162e-07, + "logits": -1.4129080772399902, + "logps": -81.64440155029297, + "loss": 61.2974, + "objective": 61.06173324584961, + "ranking_idealized": 0.6458333134651184, + "ranking_idealized_expo": 0.5458333492279053, + "ranking_simple": 0.6499999761581421, + "regularize": 0.13648824393749237, + "step": 810, + "wo_beta": 14.963865280151367 + }, + { + "dpo_loss": 0.5351440906524658, + "epoch": 2.309872461029759, + "grad_norm": 12894.991014128658, + "learning_rate": 6.494068375608646e-07, + "logits": -1.352980136871338, + "logps": -83.23399353027344, + "loss": 60.8069, + "objective": 63.9875602722168, + "ranking_idealized": 0.5791666507720947, + "ranking_idealized_expo": 0.47083333134651184, + "ranking_simple": 0.5791666507720947, + "regularize": 0.13894489407539368, + "step": 815, + "wo_beta": 15.326094627380371 + }, + { + "dpo_loss": 0.5325611233711243, + "epoch": 2.324043457723193, + "grad_norm": 12345.798302601574, + "learning_rate": 6.446677855317264e-07, + "logits": -1.2916339635849, + "logps": -81.8837661743164, + "loss": 59.9305, + "objective": 55.95283126831055, + "ranking_idealized": 0.5874999761581421, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.5708333253860474, + "regularize": 0.12038219720125198, + "step": 820, + "wo_beta": 15.182144165039062 + }, + { + "dpo_loss": 0.5271125435829163, + "epoch": 2.3382144544166272, + "grad_norm": 12783.217599288302, + "learning_rate": 6.39914507015496e-07, + "logits": -1.3013333082199097, + "logps": -81.13337707519531, + "loss": 58.233, + "objective": 62.38441467285156, + "ranking_idealized": 0.625, + "ranking_idealized_expo": 0.550000011920929, + "ranking_simple": 0.612500011920929, + "regularize": 0.14106927812099457, + "step": 825, + "wo_beta": 16.586782455444336 + }, + { + "dpo_loss": 0.5309893488883972, + "epoch": 2.3523854511100613, + "grad_norm": 14368.93982814313, + "learning_rate": 6.351474694448864e-07, + "logits": -1.2905962467193604, + "logps": -83.69612121582031, + "loss": 59.3517, + "objective": 62.03671646118164, + "ranking_idealized": 0.6416666507720947, + "ranking_idealized_expo": 0.5416666865348816, + "ranking_simple": 0.6416666507720947, + "regularize": 0.13450145721435547, + "step": 830, + "wo_beta": 16.384456634521484 + }, + { + "dpo_loss": 0.5386961102485657, + "epoch": 2.3665564478034957, + "grad_norm": 12278.034874198123, + "learning_rate": 6.303671416056621e-07, + "logits": -1.2532858848571777, + "logps": -83.63367462158203, + "loss": 63.5605, + "objective": 61.1205940246582, + "ranking_idealized": 0.6166666746139526, + "ranking_idealized_expo": 0.5083333253860474, + "ranking_simple": 0.6041666865348816, + "regularize": 0.1340387463569641, + "step": 835, + "wo_beta": 15.07408618927002 + }, + { + "dpo_loss": 0.5518457293510437, + "epoch": 2.3807274444969297, + "grad_norm": 12325.077561512098, + "learning_rate": 6.255739935905395e-07, + "logits": -1.222998023033142, + "logps": -83.31403350830078, + "loss": 56.4779, + "objective": 54.8234977722168, + "ranking_idealized": 0.5249999761581421, + "ranking_idealized_expo": 0.4749999940395355, + "ranking_simple": 0.5041666626930237, + "regularize": 0.12345383316278458, + "step": 840, + "wo_beta": 15.817675590515137 + }, + { + "dpo_loss": 0.5455399751663208, + "epoch": 2.3948984411903638, + "grad_norm": 14534.352470484577, + "learning_rate": 6.207684967529592e-07, + "logits": -1.2789770364761353, + "logps": -84.17676544189453, + "loss": 61.3501, + "objective": 56.92399978637695, + "ranking_idealized": 0.5958333611488342, + "ranking_idealized_expo": 0.5416666865348816, + "ranking_simple": 0.5958333611488342, + "regularize": 0.12513183057308197, + "step": 845, + "wo_beta": 16.274921417236328 + }, + { + "dpo_loss": 0.5384249091148376, + "epoch": 2.409069437883798, + "grad_norm": 11975.067630184618, + "learning_rate": 6.159511236607315e-07, + "logits": -1.3067547082901, + "logps": -81.92616271972656, + "loss": 55.6773, + "objective": 53.89519500732422, + "ranking_idealized": 0.5874999761581421, + "ranking_idealized_expo": 0.49166667461395264, + "ranking_simple": 0.5541666746139526, + "regularize": 0.12293127924203873, + "step": 850, + "wo_beta": 15.953167915344238 + }, + { + "epoch": 2.409069437883798, + "eval_dpo_loss": 0.680902361869812, + "eval_logits": -1.311160922050476, + "eval_logps": -88.26885986328125, + "eval_loss": 189.5397491455078, + "eval_objective": 185.2095947265625, + "eval_ranking_idealized": 0.6024844646453857, + "eval_ranking_idealized_expo": 0.5232919454574585, + "eval_ranking_simple": 0.5300207138061523, + "eval_regularize": 0.42031434178352356, + "eval_runtime": 490.419, + "eval_samples_per_second": 11.806, + "eval_steps_per_second": 0.985, + "eval_wo_beta": 15.931052207946777, + "step": 850 + }, + { + "dpo_loss": 0.5426651239395142, + "epoch": 2.423240434577232, + "grad_norm": 13056.278516188751, + "learning_rate": 6.111223480495671e-07, + "logits": -1.3305928707122803, + "logps": -80.8778076171875, + "loss": 60.7771, + "objective": 57.707275390625, + "ranking_idealized": 0.5916666388511658, + "ranking_idealized_expo": 0.5291666388511658, + "ranking_simple": 0.5916666388511658, + "regularize": 0.1298539638519287, + "step": 855, + "wo_beta": 15.123750686645508 + }, + { + "dpo_loss": 0.537179172039032, + "epoch": 2.4374114312706663, + "grad_norm": 13276.37666715339, + "learning_rate": 6.062826447764883e-07, + "logits": -1.2815066576004028, + "logps": -82.55672454833984, + "loss": 55.8238, + "objective": 53.87760925292969, + "ranking_idealized": 0.5874999761581421, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.6000000238418579, + "regularize": 0.12577569484710693, + "step": 860, + "wo_beta": 16.197458267211914 + }, + { + "dpo_loss": 0.53245609998703, + "epoch": 2.4515824279641003, + "grad_norm": 13115.296464572477, + "learning_rate": 6.014324897731333e-07, + "logits": -1.305693507194519, + "logps": -81.65880584716797, + "loss": 57.2162, + "objective": 57.622314453125, + "ranking_idealized": 0.5791666507720947, + "ranking_idealized_expo": 0.4958333373069763, + "ranking_simple": 0.5708333253860474, + "regularize": 0.12618619203567505, + "step": 865, + "wo_beta": 16.600849151611328 + }, + { + "dpo_loss": 0.53475421667099, + "epoch": 2.4657534246575343, + "grad_norm": 13057.72282671728, + "learning_rate": 5.965723599989528e-07, + "logits": -1.347506046295166, + "logps": -82.02439880371094, + "loss": 59.1596, + "objective": 58.05669403076172, + "ranking_idealized": 0.6000000238418579, + "ranking_idealized_expo": 0.5208333134651184, + "ranking_simple": 0.5916666388511658, + "regularize": 0.12966732680797577, + "step": 870, + "wo_beta": 15.612308502197266 + }, + { + "dpo_loss": 0.5284960865974426, + "epoch": 2.4799244213509684, + "grad_norm": 13136.725552830958, + "learning_rate": 5.917027333943072e-07, + "logits": -1.2931278944015503, + "logps": -82.28563690185547, + "loss": 52.9771, + "objective": 52.34040069580078, + "ranking_idealized": 0.6166666746139526, + "ranking_idealized_expo": 0.5708333253860474, + "ranking_simple": 0.6041666865348816, + "regularize": 0.12045804411172867, + "step": 875, + "wo_beta": 17.299848556518555 + }, + { + "dpo_loss": 0.5465295910835266, + "epoch": 2.4940954180444024, + "grad_norm": 12177.118012490373, + "learning_rate": 5.868240888334652e-07, + "logits": -1.206485390663147, + "logps": -82.52658081054688, + "loss": 59.4905, + "objective": 58.06962203979492, + "ranking_idealized": 0.5625, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.574999988079071, + "regularize": 0.13045351207256317, + "step": 880, + "wo_beta": 17.634618759155273 + }, + { + "dpo_loss": 0.5378908514976501, + "epoch": 2.5082664147378364, + "grad_norm": 12653.821371026783, + "learning_rate": 5.819369060775124e-07, + "logits": -1.3703595399856567, + "logps": -81.24169921875, + "loss": 54.0377, + "objective": 55.50392150878906, + "ranking_idealized": 0.5916666388511658, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.6000000238418579, + "regularize": 0.1243302971124649, + "step": 885, + "wo_beta": 16.991498947143555 + }, + { + "dpo_loss": 0.521662712097168, + "epoch": 2.5224374114312704, + "grad_norm": 13224.96582542829, + "learning_rate": 5.770416657271728e-07, + "logits": -1.2803348302841187, + "logps": -80.2920913696289, + "loss": 54.9019, + "objective": 55.66249084472656, + "ranking_idealized": 0.5874999761581421, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.5833333134651184, + "regularize": 0.12915806472301483, + "step": 890, + "wo_beta": 14.390849113464355 + }, + { + "dpo_loss": 0.538814902305603, + "epoch": 2.536608408124705, + "grad_norm": 13679.562551953088, + "learning_rate": 5.721388491755455e-07, + "logits": -1.2745685577392578, + "logps": -82.53682708740234, + "loss": 55.8587, + "objective": 53.00823211669922, + "ranking_idealized": 0.5458333492279053, + "ranking_idealized_expo": 0.49166667461395264, + "ranking_simple": 0.5458333492279053, + "regularize": 0.12104254215955734, + "step": 895, + "wo_beta": 16.952863693237305 + }, + { + "dpo_loss": 0.5534180998802185, + "epoch": 2.550779404818139, + "grad_norm": 12973.031921366075, + "learning_rate": 5.67228938560766e-07, + "logits": -1.2396929264068604, + "logps": -81.03583526611328, + "loss": 54.3682, + "objective": 53.294551849365234, + "ranking_idealized": 0.6166666746139526, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.6000000238418579, + "regularize": 0.12195997685194016, + "step": 900, + "wo_beta": 15.316643714904785 + }, + { + "epoch": 2.550779404818139, + "eval_dpo_loss": 0.6793311238288879, + "eval_logits": -1.3258877992630005, + "eval_logps": -88.36107635498047, + "eval_loss": 188.23812866210938, + "eval_objective": 184.16783142089844, + "eval_ranking_idealized": 0.6024844646453857, + "eval_ranking_idealized_expo": 0.5232919454574585, + "eval_ranking_simple": 0.5310559272766113, + "eval_regularize": 0.41672980785369873, + "eval_runtime": 486.377, + "eval_samples_per_second": 11.904, + "eval_steps_per_second": 0.993, + "eval_wo_beta": 15.968037605285645, + "step": 900 + }, + { + "dpo_loss": 0.5381408929824829, + "epoch": 2.564950401511573, + "grad_norm": 11810.259224351357, + "learning_rate": 5.623124167185929e-07, + "logits": -1.3189753293991089, + "logps": -81.03609466552734, + "loss": 51.9527, + "objective": 49.01388931274414, + "ranking_idealized": 0.5708333253860474, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.5541666746139526, + "regularize": 0.11513598263263702, + "step": 905, + "wo_beta": 15.316691398620605 + }, + { + "dpo_loss": 0.5291991829872131, + "epoch": 2.579121398205007, + "grad_norm": 12343.801160156707, + "learning_rate": 5.573897671349268e-07, + "logits": -1.2955931425094604, + "logps": -83.91735076904297, + "loss": 55.8812, + "objective": 63.70806121826172, + "ranking_idealized": 0.612500011920929, + "ranking_idealized_expo": 0.4958333373069763, + "ranking_simple": 0.5958333611488342, + "regularize": 0.13904932141304016, + "step": 910, + "wo_beta": 16.40995216369629 + }, + { + "dpo_loss": 0.5379226803779602, + "epoch": 2.593292394898441, + "grad_norm": 12490.177742860027, + "learning_rate": 5.524614738982637e-07, + "logits": -1.4045764207839966, + "logps": -82.5849838256836, + "loss": 55.5769, + "objective": 54.98591613769531, + "ranking_idealized": 0.6291666626930237, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.612500011920929, + "regularize": 0.12587417662143707, + "step": 915, + "wo_beta": 15.309656143188477 + }, + { + "dpo_loss": 0.5216780304908752, + "epoch": 2.6074633915918755, + "grad_norm": 12017.347028460124, + "learning_rate": 5.475280216520912e-07, + "logits": -1.2480995655059814, + "logps": -82.11782836914062, + "loss": 56.8294, + "objective": 57.75908660888672, + "ranking_idealized": 0.6041666865348816, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.6041666865348816, + "regularize": 0.12090341001749039, + "step": 920, + "wo_beta": 16.191049575805664 + }, + { + "dpo_loss": 0.5298858284950256, + "epoch": 2.6216343882853095, + "grad_norm": 14009.68291839978, + "learning_rate": 5.42589895547229e-07, + "logits": -1.280160665512085, + "logps": -82.20765686035156, + "loss": 53.1774, + "objective": 55.67765426635742, + "ranking_idealized": 0.574999988079071, + "ranking_idealized_expo": 0.4749999940395355, + "ranking_simple": 0.5625, + "regularize": 0.12424833327531815, + "step": 925, + "wo_beta": 16.476573944091797 + }, + { + "dpo_loss": 0.5387442111968994, + "epoch": 2.6358053849787435, + "grad_norm": 12640.001047074344, + "learning_rate": 5.376475811941191e-07, + "logits": -1.2655282020568848, + "logps": -82.08385467529297, + "loss": 52.6196, + "objective": 55.54609680175781, + "ranking_idealized": 0.5416666865348816, + "ranking_idealized_expo": 0.44583332538604736, + "ranking_simple": 0.5333333611488342, + "regularize": 0.12455514818429947, + "step": 930, + "wo_beta": 16.72053337097168 + }, + { + "dpo_loss": 0.5488451719284058, + "epoch": 2.6499763816721775, + "grad_norm": 12698.751364257567, + "learning_rate": 5.327015646150716e-07, + "logits": -1.2632043361663818, + "logps": -81.3023910522461, + "loss": 50.4175, + "objective": 51.81110763549805, + "ranking_idealized": 0.6499999761581421, + "ranking_idealized_expo": 0.574999988079071, + "ranking_simple": 0.6291666626930237, + "regularize": 0.1139976978302002, + "step": 935, + "wo_beta": 16.381933212280273 + }, + { + "dpo_loss": 0.518785834312439, + "epoch": 2.6641473783656116, + "grad_norm": 14057.06029309221, + "learning_rate": 5.277523321964701e-07, + "logits": -1.3097693920135498, + "logps": -84.61360931396484, + "loss": 52.2129, + "objective": 56.00838088989258, + "ranking_idealized": 0.6000000238418579, + "ranking_idealized_expo": 0.5333333611488342, + "ranking_simple": 0.6041666865348816, + "regularize": 0.11512833088636398, + "step": 940, + "wo_beta": 17.616283416748047 + }, + { + "dpo_loss": 0.5271897912025452, + "epoch": 2.678318375059046, + "grad_norm": 13084.001689574132, + "learning_rate": 5.228003706409409e-07, + "logits": -1.3481143712997437, + "logps": -83.27128601074219, + "loss": 49.6737, + "objective": 52.79602813720703, + "ranking_idealized": 0.5666666626930237, + "ranking_idealized_expo": 0.48750001192092896, + "ranking_simple": 0.5541666746139526, + "regularize": 0.11426286399364471, + "step": 945, + "wo_beta": 16.029043197631836 + }, + { + "dpo_loss": 0.5474939942359924, + "epoch": 2.69248937175248, + "grad_norm": 13821.932425093552, + "learning_rate": 5.178461669194903e-07, + "logits": -1.2337779998779297, + "logps": -83.05430603027344, + "loss": 50.3775, + "objective": 45.27042007446289, + "ranking_idealized": 0.637499988079071, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.625, + "regularize": 0.10929083079099655, + "step": 950, + "wo_beta": 15.533432006835938 + }, + { + "epoch": 2.69248937175248, + "eval_dpo_loss": 0.6802442669868469, + "eval_logits": -1.3090835809707642, + "eval_logps": -88.80048370361328, + "eval_loss": 189.54185485839844, + "eval_objective": 185.00436401367188, + "eval_ranking_idealized": 0.6024844646453857, + "eval_ranking_idealized_expo": 0.5232919454574585, + "eval_ranking_simple": 0.5331262946128845, + "eval_regularize": 0.418261855840683, + "eval_runtime": 491.779, + "eval_samples_per_second": 11.774, + "eval_steps_per_second": 0.982, + "eval_wo_beta": 15.998626708984375, + "step": 950 + }, + { + "dpo_loss": 0.5236973166465759, + "epoch": 2.706660368445914, + "grad_norm": 13266.227245283348, + "learning_rate": 5.128902082236175e-07, + "logits": -1.319283127784729, + "logps": -82.27372741699219, + "loss": 46.7135, + "objective": 43.35396194458008, + "ranking_idealized": 0.5874999761581421, + "ranking_idealized_expo": 0.4749999940395355, + "ranking_simple": 0.574999988079071, + "regularize": 0.10942530632019043, + "step": 955, + "wo_beta": 14.039530754089355 + }, + { + "dpo_loss": 0.5411895513534546, + "epoch": 2.720831365139348, + "grad_norm": 13668.800292035428, + "learning_rate": 5.07932981917404e-07, + "logits": -1.2875874042510986, + "logps": -81.88396453857422, + "loss": 53.1799, + "objective": 54.5617561340332, + "ranking_idealized": 0.5791666507720947, + "ranking_idealized_expo": 0.49166667461395264, + "ranking_simple": 0.5791666507720947, + "regularize": 0.11944962292909622, + "step": 960, + "wo_beta": 16.39274787902832 + }, + { + "dpo_loss": 0.5236133933067322, + "epoch": 2.735002361832782, + "grad_norm": 12761.114664799663, + "learning_rate": 5.029749754895868e-07, + "logits": -1.306726098060608, + "logps": -82.27013397216797, + "loss": 49.2644, + "objective": 47.3409309387207, + "ranking_idealized": 0.6416666507720947, + "ranking_idealized_expo": 0.5666666626930237, + "ranking_simple": 0.6291666626930237, + "regularize": 0.10921643674373627, + "step": 965, + "wo_beta": 15.65440559387207 + }, + { + "dpo_loss": 0.5498754382133484, + "epoch": 2.7491733585262166, + "grad_norm": 12565.339155193906, + "learning_rate": 4.980166765056193e-07, + "logits": -1.3193691968917847, + "logps": -83.46347045898438, + "loss": 52.7234, + "objective": 56.7745246887207, + "ranking_idealized": 0.6208333373069763, + "ranking_idealized_expo": 0.5333333611488342, + "ranking_simple": 0.6083333492279053, + "regularize": 0.13472451269626617, + "step": 970, + "wo_beta": 15.647109031677246 + }, + { + "dpo_loss": 0.5260103344917297, + "epoch": 2.7633443552196506, + "grad_norm": 13363.677196616523, + "learning_rate": 4.930585725597247e-07, + "logits": -1.240022897720337, + "logps": -81.51500701904297, + "loss": 50.997, + "objective": 53.95423889160156, + "ranking_idealized": 0.6333333253860474, + "ranking_idealized_expo": 0.5333333611488342, + "ranking_simple": 0.625, + "regularize": 0.12574762105941772, + "step": 975, + "wo_beta": 16.371328353881836 + }, + { + "dpo_loss": 0.5399420261383057, + "epoch": 2.7775153519130846, + "grad_norm": 13969.44472204385, + "learning_rate": 4.881011512269463e-07, + "logits": -1.35780930519104, + "logps": -81.2794189453125, + "loss": 51.6737, + "objective": 55.6290283203125, + "ranking_idealized": 0.5916666388511658, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.5666666626930237, + "regularize": 0.12999171018600464, + "step": 980, + "wo_beta": 14.558424949645996 + }, + { + "dpo_loss": 0.5281099677085876, + "epoch": 2.7916863486065187, + "grad_norm": 11586.92970672364, + "learning_rate": 4.831449000151997e-07, + "logits": -1.205262303352356, + "logps": -79.56948852539062, + "loss": 49.5107, + "objective": 46.61149597167969, + "ranking_idealized": 0.5791666507720947, + "ranking_idealized_expo": 0.48750001192092896, + "ranking_simple": 0.5791666507720947, + "regularize": 0.10813379287719727, + "step": 985, + "wo_beta": 14.642525672912598 + }, + { + "dpo_loss": 0.5295001268386841, + "epoch": 2.8058573452999527, + "grad_norm": 12278.903797254565, + "learning_rate": 4.78190306317332e-07, + "logits": -1.268909215927124, + "logps": -82.44329071044922, + "loss": 47.3581, + "objective": 51.39979553222656, + "ranking_idealized": 0.5666666626930237, + "ranking_idealized_expo": 0.4749999940395355, + "ranking_simple": 0.5708333253860474, + "regularize": 0.11149868369102478, + "step": 990, + "wo_beta": 16.07427406311035 + }, + { + "dpo_loss": 0.5399483442306519, + "epoch": 2.820028341993387, + "grad_norm": 12982.312529844054, + "learning_rate": 4.732378573631924e-07, + "logits": -1.3312995433807373, + "logps": -80.66969299316406, + "loss": 49.758, + "objective": 55.4227409362793, + "ranking_idealized": 0.6208333373069763, + "ranking_idealized_expo": 0.5458333492279053, + "ranking_simple": 0.6166666746139526, + "regularize": 0.12711945176124573, + "step": 995, + "wo_beta": 16.746198654174805 + }, + { + "dpo_loss": 0.526489794254303, + "epoch": 2.8341993386868207, + "grad_norm": 12173.86125870911, + "learning_rate": 4.682880401717177e-07, + "logits": -1.271032691001892, + "logps": -79.56470489501953, + "loss": 45.9449, + "objective": 40.13682174682617, + "ranking_idealized": 0.6166666746139526, + "ranking_idealized_expo": 0.5083333253860474, + "ranking_simple": 0.6166666746139526, + "regularize": 0.09338556975126266, + "step": 1000, + "wo_beta": 15.067657470703125 + }, + { + "epoch": 2.8341993386868207, + "eval_dpo_loss": 0.6791692972183228, + "eval_logits": -1.2989623546600342, + "eval_logps": -87.81481170654297, + "eval_loss": 187.70785522460938, + "eval_objective": 183.56761169433594, + "eval_ranking_idealized": 0.6024844646453857, + "eval_ranking_idealized_expo": 0.5232919454574585, + "eval_ranking_simple": 0.5300207138061523, + "eval_regularize": 0.4161270260810852, + "eval_runtime": 491.2083, + "eval_samples_per_second": 11.787, + "eval_steps_per_second": 0.983, + "eval_wo_beta": 15.995977401733398, + "step": 1000 + }, + { + "dpo_loss": 0.5403110384941101, + "epoch": 2.848370335380255, + "grad_norm": 13425.378037887134, + "learning_rate": 4.633413415030401e-07, + "logits": -1.2654575109481812, + "logps": -80.49606323242188, + "loss": 48.7319, + "objective": 47.16264724731445, + "ranking_idealized": 0.6000000238418579, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.6000000238418579, + "regularize": 0.10651734471321106, + "step": 1005, + "wo_beta": 16.28557586669922 + }, + { + "dpo_loss": 0.5306838750839233, + "epoch": 2.862541332073689, + "grad_norm": 13143.964606052063, + "learning_rate": 4.5839824781061886e-07, + "logits": -1.32563316822052, + "logps": -81.29505920410156, + "loss": 51.8292, + "objective": 49.8996467590332, + "ranking_idealized": 0.612500011920929, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.6083333492279053, + "regularize": 0.11315880715847015, + "step": 1010, + "wo_beta": 15.957425117492676 + }, + { + "dpo_loss": 0.5331242084503174, + "epoch": 2.8767123287671232, + "grad_norm": 12600.166168740529, + "learning_rate": 4.53459245193404e-07, + "logits": -1.2467234134674072, + "logps": -80.21656799316406, + "loss": 44.7609, + "objective": 42.55329895019531, + "ranking_idealized": 0.6541666388511658, + "ranking_idealized_expo": 0.550000011920929, + "ranking_simple": 0.6333333253860474, + "regularize": 0.09937479346990585, + "step": 1015, + "wo_beta": 15.586889266967773 + }, + { + "dpo_loss": 0.5346752405166626, + "epoch": 2.8908833254605573, + "grad_norm": 14111.243992297606, + "learning_rate": 4.4852481934803277e-07, + "logits": -1.2140835523605347, + "logps": -82.13688659667969, + "loss": 46.0337, + "objective": 43.36848831176758, + "ranking_idealized": 0.5833333134651184, + "ranking_idealized_expo": 0.5166666507720947, + "ranking_simple": 0.5708333253860474, + "regularize": 0.09756989777088165, + "step": 1020, + "wo_beta": 18.553333282470703 + }, + { + "dpo_loss": 0.5420379042625427, + "epoch": 2.9050543221539913, + "grad_norm": 12276.868793163067, + "learning_rate": 4.435954555210676e-07, + "logits": -1.3084660768508911, + "logps": -81.93505096435547, + "loss": 46.0381, + "objective": 48.77103042602539, + "ranking_idealized": 0.6208333373069763, + "ranking_idealized_expo": 0.5083333253860474, + "ranking_simple": 0.612500011920929, + "regularize": 0.10658075660467148, + "step": 1025, + "wo_beta": 15.018412590026855 + }, + { + "dpo_loss": 0.5342170000076294, + "epoch": 2.9192253188474258, + "grad_norm": 12677.814826562366, + "learning_rate": 4.3867163846127674e-07, + "logits": -1.3350425958633423, + "logps": -81.84678649902344, + "loss": 47.2693, + "objective": 41.97852325439453, + "ranking_idealized": 0.5416666865348816, + "ranking_idealized_expo": 0.4791666567325592, + "ranking_simple": 0.5458333492279053, + "regularize": 0.09485388547182083, + "step": 1030, + "wo_beta": 16.526702880859375 + }, + { + "dpo_loss": 0.5289677977561951, + "epoch": 2.9333963155408598, + "grad_norm": 13399.69328236257, + "learning_rate": 4.3375385237196507e-07, + "logits": -1.3010871410369873, + "logps": -82.80349731445312, + "loss": 43.5011, + "objective": 41.88113784790039, + "ranking_idealized": 0.625, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.6083333492279053, + "regularize": 0.09509758651256561, + "step": 1035, + "wo_beta": 14.980511665344238 + }, + { + "dpo_loss": 0.5463218688964844, + "epoch": 2.947567312234294, + "grad_norm": 12072.270375502065, + "learning_rate": 4.2884258086335745e-07, + "logits": -1.2975058555603027, + "logps": -82.66610717773438, + "loss": 45.0537, + "objective": 48.81401062011719, + "ranking_idealized": 0.5708333253860474, + "ranking_idealized_expo": 0.4833333194255829, + "ranking_simple": 0.5583333373069763, + "regularize": 0.10974690318107605, + "step": 1040, + "wo_beta": 16.447132110595703 + }, + { + "dpo_loss": 0.5381548404693604, + "epoch": 2.961738308927728, + "grad_norm": 13887.433179664138, + "learning_rate": 4.2393830690504165e-07, + "logits": -1.2503575086593628, + "logps": -84.04967498779297, + "loss": 44.8665, + "objective": 42.995948791503906, + "ranking_idealized": 0.6208333373069763, + "ranking_idealized_expo": 0.5666666626930237, + "ranking_simple": 0.6208333373069763, + "regularize": 0.09885497391223907, + "step": 1045, + "wo_beta": 18.053199768066406 + }, + { + "dpo_loss": 0.5348830819129944, + "epoch": 2.975909305621162, + "grad_norm": 13502.021630049758, + "learning_rate": 4.1904151277847305e-07, + "logits": -1.2968212366104126, + "logps": -79.87500762939453, + "loss": 49.0003, + "objective": 50.04111862182617, + "ranking_idealized": 0.6541666388511658, + "ranking_idealized_expo": 0.5541666746139526, + "ranking_simple": 0.6458333134651184, + "regularize": 0.11395598948001862, + "step": 1050, + "wo_beta": 15.20615291595459 + }, + { + "epoch": 2.975909305621162, + "eval_dpo_loss": 0.6791855692863464, + "eval_logits": -1.2778165340423584, + "eval_logps": -88.30037689208984, + "eval_loss": 188.00396728515625, + "eval_objective": 184.00155639648438, + "eval_ranking_idealized": 0.6024844646453857, + "eval_ranking_idealized_expo": 0.5232919454574585, + "eval_ranking_simple": 0.5341615080833435, + "eval_regularize": 0.41730284690856934, + "eval_runtime": 486.4753, + "eval_samples_per_second": 11.902, + "eval_steps_per_second": 0.993, + "eval_wo_beta": 16.040319442749023, + "step": 1050 + }, + { + "dpo_loss": 0.5416039824485779, + "epoch": 2.9900803023145963, + "grad_norm": 13186.167879544177, + "learning_rate": 4.141526800295481e-07, + "logits": -1.2704575061798096, + "logps": -81.0667724609375, + "loss": 43.7316, + "objective": 46.92390441894531, + "ranking_idealized": 0.6083333492279053, + "ranking_idealized_expo": 0.550000011920929, + "ranking_simple": 0.6041666865348816, + "regularize": 0.11228723078966141, + "step": 1055, + "wo_beta": 15.320064544677734 + }, + { + "dpo_loss": 0.5175911784172058, + "epoch": 3.0042512990080303, + "grad_norm": 11979.18084085825, + "learning_rate": 4.092722894212487e-07, + "logits": -1.291445255279541, + "logps": -82.69534301757812, + "loss": 44.4026, + "objective": 47.78953552246094, + "ranking_idealized": 0.6083333492279053, + "ranking_idealized_expo": 0.5208333134651184, + "ranking_simple": 0.612500011920929, + "regularize": 0.10625550150871277, + "step": 1060, + "wo_beta": 15.794866561889648 + }, + { + "dpo_loss": 0.523690938949585, + "epoch": 3.0184222957014644, + "grad_norm": 12600.45509733284, + "learning_rate": 4.0440082088636546e-07, + "logits": -1.3265612125396729, + "logps": -84.14775848388672, + "loss": 41.3718, + "objective": 38.99584197998047, + "ranking_idealized": 0.5916666388511658, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.5958333611488342, + "regularize": 0.09401161223649979, + "step": 1065, + "wo_beta": 16.806358337402344 + }, + { + "dpo_loss": 0.5429927706718445, + "epoch": 3.0325932923948984, + "grad_norm": 13459.06076930384, + "learning_rate": 3.995387534803005e-07, + "logits": -1.2817329168319702, + "logps": -81.6548080444336, + "loss": 44.6891, + "objective": 43.239158630371094, + "ranking_idealized": 0.637499988079071, + "ranking_idealized_expo": 0.5791666507720947, + "ranking_simple": 0.625, + "regularize": 0.09600695967674255, + "step": 1070, + "wo_beta": 17.19818878173828 + }, + { + "dpo_loss": 0.5399213433265686, + "epoch": 3.0467642890883324, + "grad_norm": 12451.835928919867, + "learning_rate": 3.9468656533395934e-07, + "logits": -1.2840524911880493, + "logps": -81.64595031738281, + "loss": 38.4816, + "objective": 40.692039489746094, + "ranking_idealized": 0.5874999761581421, + "ranking_idealized_expo": 0.5083333253860474, + "ranking_simple": 0.5833333134651184, + "regularize": 0.09315841645002365, + "step": 1075, + "wo_beta": 15.41653060913086 + }, + { + "dpo_loss": 0.5243366360664368, + "epoch": 3.0609352857817664, + "grad_norm": 12956.687806008335, + "learning_rate": 3.8984473360672967e-07, + "logits": -1.3753383159637451, + "logps": -82.9805908203125, + "loss": 40.18, + "objective": 39.79288864135742, + "ranking_idealized": 0.6416666507720947, + "ranking_idealized_expo": 0.550000011920929, + "ranking_simple": 0.6291666626930237, + "regularize": 0.09019829332828522, + "step": 1080, + "wo_beta": 17.60961151123047 + }, + { + "dpo_loss": 0.5346547365188599, + "epoch": 3.075106282475201, + "grad_norm": 12876.9232360082, + "learning_rate": 3.850137344395598e-07, + "logits": -1.318056344985962, + "logps": -83.30501556396484, + "loss": 39.6664, + "objective": 41.40624237060547, + "ranking_idealized": 0.550000011920929, + "ranking_idealized_expo": 0.49166667461395264, + "ranking_simple": 0.5458333492279053, + "regularize": 0.0875302404165268, + "step": 1085, + "wo_beta": 15.289043426513672 + }, + { + "dpo_loss": 0.5314586162567139, + "epoch": 3.089277279168635, + "grad_norm": 12423.675708081033, + "learning_rate": 3.801940429081345e-07, + "logits": -1.297440767288208, + "logps": -81.59999084472656, + "loss": 40.7964, + "objective": 42.56759262084961, + "ranking_idealized": 0.6083333492279053, + "ranking_idealized_expo": 0.5083333253860474, + "ranking_simple": 0.6291666626930237, + "regularize": 0.09419377893209457, + "step": 1090, + "wo_beta": 15.919710159301758 + }, + { + "dpo_loss": 0.5284194946289062, + "epoch": 3.103448275862069, + "grad_norm": 12843.979452626416, + "learning_rate": 3.7538613297615706e-07, + "logits": -1.2590415477752686, + "logps": -83.42412567138672, + "loss": 40.9535, + "objective": 44.701377868652344, + "ranking_idealized": 0.625, + "ranking_idealized_expo": 0.5708333253860474, + "ranking_simple": 0.612500011920929, + "regularize": 0.10055555403232574, + "step": 1095, + "wo_beta": 16.360620498657227 + }, + { + "dpo_loss": 0.5163142681121826, + "epoch": 3.117619272555503, + "grad_norm": 11098.073660723994, + "learning_rate": 3.7059047744873955e-07, + "logits": -1.2521919012069702, + "logps": -82.35820770263672, + "loss": 40.2428, + "objective": 41.402366638183594, + "ranking_idealized": 0.5874999761581421, + "ranking_idealized_expo": 0.4958333373069763, + "ranking_simple": 0.5833333134651184, + "regularize": 0.08817121386528015, + "step": 1100, + "wo_beta": 16.056493759155273 + }, + { + "epoch": 3.117619272555503, + "eval_dpo_loss": 0.680143415927887, + "eval_logits": -1.2988417148590088, + "eval_logps": -88.64698028564453, + "eval_loss": 188.7165985107422, + "eval_objective": 184.38153076171875, + "eval_ranking_idealized": 0.6024844646453857, + "eval_ranking_idealized_expo": 0.5232919454574585, + "eval_ranking_simple": 0.532608687877655, + "eval_regularize": 0.41805195808410645, + "eval_runtime": 486.8996, + "eval_samples_per_second": 11.892, + "eval_steps_per_second": 0.992, + "eval_wo_beta": 15.998079299926758, + "step": 1100 + }, + { + "dpo_loss": 0.532370924949646, + "epoch": 3.131790269248937, + "grad_norm": 12884.072735206462, + "learning_rate": 3.658075479259087e-07, + "logits": -1.3051170110702515, + "logps": -82.9980239868164, + "loss": 43.5912, + "objective": 42.78650665283203, + "ranking_idealized": 0.5833333134651184, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.5666666626930237, + "regularize": 0.09705787152051926, + "step": 1105, + "wo_beta": 17.55166244506836 + }, + { + "dpo_loss": 0.5135348439216614, + "epoch": 3.1459612659423715, + "grad_norm": 13710.402810117148, + "learning_rate": 3.6103781475622786e-07, + "logits": -1.2103074789047241, + "logps": -83.2777328491211, + "loss": 35.6812, + "objective": 35.80618667602539, + "ranking_idealized": 0.612500011920929, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.6083333492279053, + "regularize": 0.07978586852550507, + "step": 1110, + "wo_beta": 16.995450973510742 + }, + { + "dpo_loss": 0.5229103565216064, + "epoch": 3.1601322626358055, + "grad_norm": 12411.913045675534, + "learning_rate": 3.562817469905442e-07, + "logits": -1.2619822025299072, + "logps": -82.1358642578125, + "loss": 38.5951, + "objective": 36.70951461791992, + "ranking_idealized": 0.6000000238418579, + "ranking_idealized_expo": 0.5166666507720947, + "ranking_simple": 0.6083333492279053, + "regularize": 0.08537817001342773, + "step": 1115, + "wo_beta": 16.52168846130371 + }, + { + "dpo_loss": 0.5169024467468262, + "epoch": 3.1743032593292395, + "grad_norm": 12747.527049209308, + "learning_rate": 3.5153981233586274e-07, + "logits": -1.2052761316299438, + "logps": -80.89930725097656, + "loss": 35.9412, + "objective": 35.01757049560547, + "ranking_idealized": 0.612500011920929, + "ranking_idealized_expo": 0.5333333611488342, + "ranking_simple": 0.6083333492279053, + "regularize": 0.08051317185163498, + "step": 1120, + "wo_beta": 15.474043846130371 + }, + { + "dpo_loss": 0.5393829941749573, + "epoch": 3.1884742560226735, + "grad_norm": 13071.804290926188, + "learning_rate": 3.468124771093519e-07, + "logits": -1.263301134109497, + "logps": -83.0383529663086, + "loss": 37.8478, + "objective": 38.899776458740234, + "ranking_idealized": 0.612500011920929, + "ranking_idealized_expo": 0.5083333253860474, + "ranking_simple": 0.6000000238418579, + "regularize": 0.09170109778642654, + "step": 1125, + "wo_beta": 15.088132858276367 + }, + { + "dpo_loss": 0.5208443999290466, + "epoch": 3.2026452527161076, + "grad_norm": 13439.120791203995, + "learning_rate": 3.421002061924876e-07, + "logits": -1.298660159111023, + "logps": -82.7750473022461, + "loss": 34.6631, + "objective": 33.578922271728516, + "ranking_idealized": 0.6000000238418579, + "ranking_idealized_expo": 0.5083333253860474, + "ranking_simple": 0.5958333611488342, + "regularize": 0.07489284873008728, + "step": 1130, + "wo_beta": 15.427777290344238 + }, + { + "dpo_loss": 0.5245645642280579, + "epoch": 3.2168162494095416, + "grad_norm": 11480.67381753106, + "learning_rate": 3.374034629853356e-07, + "logits": -1.3043017387390137, + "logps": -80.89866638183594, + "loss": 35.7927, + "objective": 35.20330047607422, + "ranking_idealized": 0.5291666388511658, + "ranking_idealized_expo": 0.4625000059604645, + "ranking_simple": 0.5249999761581421, + "regularize": 0.08528413623571396, + "step": 1135, + "wo_beta": 16.220800399780273 + }, + { + "dpo_loss": 0.5402042269706726, + "epoch": 3.230987246102976, + "grad_norm": 12946.274800579084, + "learning_rate": 3.327227093609824e-07, + "logits": -1.1506885290145874, + "logps": -81.15502166748047, + "loss": 40.5475, + "objective": 40.8009033203125, + "ranking_idealized": 0.5958333611488342, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.5833333134651184, + "regularize": 0.09735415130853653, + "step": 1140, + "wo_beta": 16.101863861083984 + }, + { + "dpo_loss": 0.5243603587150574, + "epoch": 3.24515824279641, + "grad_norm": 13000.005011572795, + "learning_rate": 3.2805840562011465e-07, + "logits": -1.2146347761154175, + "logps": -83.07351684570312, + "loss": 40.1207, + "objective": 42.64434814453125, + "ranking_idealized": 0.6541666388511658, + "ranking_idealized_expo": 0.550000011920929, + "ranking_simple": 0.6333333253860474, + "regularize": 0.09260058403015137, + "step": 1145, + "wo_beta": 15.928021430969238 + }, + { + "dpo_loss": 0.5314944386482239, + "epoch": 3.259329239489844, + "grad_norm": 12558.545529727347, + "learning_rate": 3.234110104457536e-07, + "logits": -1.352626085281372, + "logps": -80.92655181884766, + "loss": 37.177, + "objective": 37.67503356933594, + "ranking_idealized": 0.5874999761581421, + "ranking_idealized_expo": 0.5083333253860474, + "ranking_simple": 0.5874999761581421, + "regularize": 0.08233184367418289, + "step": 1150, + "wo_beta": 14.541799545288086 + }, + { + "epoch": 3.259329239489844, + "eval_dpo_loss": 0.6804221868515015, + "eval_logits": -1.2842507362365723, + "eval_logps": -87.92387390136719, + "eval_loss": 188.25633239746094, + "eval_objective": 184.33511352539062, + "eval_ranking_idealized": 0.6024844646453857, + "eval_ranking_idealized_expo": 0.5232919454574585, + "eval_ranking_simple": 0.5357142686843872, + "eval_regularize": 0.4183206856250763, + "eval_runtime": 488.1945, + "eval_samples_per_second": 11.86, + "eval_steps_per_second": 0.989, + "eval_wo_beta": 16.0123348236084, + "step": 1150 + }, + { + "dpo_loss": 0.5527331829071045, + "epoch": 3.273500236183278, + "grad_norm": 12600.213804572502, + "learning_rate": 3.187809808581492e-07, + "logits": -1.225222110748291, + "logps": -80.92967987060547, + "loss": 37.9886, + "objective": 43.58564376831055, + "ranking_idealized": 0.5958333611488342, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.5874999761581421, + "regularize": 0.09985193610191345, + "step": 1155, + "wo_beta": 16.76634407043457 + }, + { + "dpo_loss": 0.5320346355438232, + "epoch": 3.287671232876712, + "grad_norm": 13215.429208773, + "learning_rate": 3.141687721698363e-07, + "logits": -1.287786602973938, + "logps": -83.13336944580078, + "loss": 34.714, + "objective": 32.02961349487305, + "ranking_idealized": 0.5416666865348816, + "ranking_idealized_expo": 0.4749999940395355, + "ranking_simple": 0.5416666865348816, + "regularize": 0.07429231703281403, + "step": 1160, + "wo_beta": 17.868885040283203 + }, + { + "dpo_loss": 0.5377687215805054, + "epoch": 3.3018422295701466, + "grad_norm": 12734.199495358569, + "learning_rate": 3.095748379408603e-07, + "logits": -1.3172459602355957, + "logps": -80.96276092529297, + "loss": 34.2009, + "objective": 33.96812057495117, + "ranking_idealized": 0.6333333253860474, + "ranking_idealized_expo": 0.5458333492279053, + "ranking_simple": 0.6166666746139526, + "regularize": 0.08176220953464508, + "step": 1165, + "wo_beta": 15.74937629699707 + }, + { + "dpo_loss": 0.5269596576690674, + "epoch": 3.3160132262635806, + "grad_norm": 14339.996000811438, + "learning_rate": 3.049996299341742e-07, + "logits": -1.267351746559143, + "logps": -82.11973571777344, + "loss": 34.9879, + "objective": 35.85028076171875, + "ranking_idealized": 0.6041666865348816, + "ranking_idealized_expo": 0.4958333373069763, + "ranking_simple": 0.6041666865348816, + "regularize": 0.08146883547306061, + "step": 1170, + "wo_beta": 15.652009963989258 + }, + { + "dpo_loss": 0.531762957572937, + "epoch": 3.3301842229570147, + "grad_norm": 12543.440661095656, + "learning_rate": 3.004435980712129e-07, + "logits": -1.257896900177002, + "logps": -82.12284088134766, + "loss": 38.0949, + "objective": 35.93735122680664, + "ranking_idealized": 0.5708333253860474, + "ranking_idealized_expo": 0.4749999940395355, + "ranking_simple": 0.574999988079071, + "regularize": 0.08384241163730621, + "step": 1175, + "wo_beta": 13.72645378112793 + }, + { + "dpo_loss": 0.5355243682861328, + "epoch": 3.3443552196504487, + "grad_norm": 11718.716469797973, + "learning_rate": 2.959071903876486e-07, + "logits": -1.3486711978912354, + "logps": -82.8729248046875, + "loss": 35.7799, + "objective": 35.360801696777344, + "ranking_idealized": 0.5583333373069763, + "ranking_idealized_expo": 0.49166667461395264, + "ranking_simple": 0.5625, + "regularize": 0.07788892835378647, + "step": 1180, + "wo_beta": 16.274147033691406 + }, + { + "dpo_loss": 0.5254151225090027, + "epoch": 3.3585262163438827, + "grad_norm": 13437.960403836023, + "learning_rate": 2.913908529893304e-07, + "logits": -1.1963578462600708, + "logps": -83.22509002685547, + "loss": 33.4865, + "objective": 33.50373840332031, + "ranking_idealized": 0.5874999761581421, + "ranking_idealized_expo": 0.48750001192092896, + "ranking_simple": 0.5833333134651184, + "regularize": 0.07612194865942001, + "step": 1185, + "wo_beta": 15.737934112548828 + }, + { + "dpo_loss": 0.5395456552505493, + "epoch": 3.372697213037317, + "grad_norm": 12206.27505785514, + "learning_rate": 2.86895030008416e-07, + "logits": -1.3092117309570312, + "logps": -81.93521118164062, + "loss": 33.053, + "objective": 29.232421875, + "ranking_idealized": 0.5833333134651184, + "ranking_idealized_expo": 0.5291666388511658, + "ranking_simple": 0.5874999761581421, + "regularize": 0.07262556999921799, + "step": 1190, + "wo_beta": 15.487491607666016 + }, + { + "dpo_loss": 0.5137616991996765, + "epoch": 3.386868209730751, + "grad_norm": 11921.58688181337, + "learning_rate": 2.824201635596951e-07, + "logits": -1.2198973894119263, + "logps": -82.35958099365234, + "loss": 29.3695, + "objective": 29.94867706298828, + "ranking_idealized": 0.5833333134651184, + "ranking_idealized_expo": 0.48750001192092896, + "ranking_simple": 0.5666666626930237, + "regularize": 0.06865646690130234, + "step": 1195, + "wo_beta": 15.531022071838379 + }, + { + "dpo_loss": 0.5208079814910889, + "epoch": 3.4010392064241852, + "grad_norm": 12726.149489712327, + "learning_rate": 2.779666936971129e-07, + "logits": -1.3937805891036987, + "logps": -82.6730728149414, + "loss": 34.9809, + "objective": 31.1435489654541, + "ranking_idealized": 0.6208333373069763, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.6166666746139526, + "regularize": 0.07431173324584961, + "step": 1200, + "wo_beta": 18.369197845458984 + }, + { + "epoch": 3.4010392064241852, + "eval_dpo_loss": 0.6805519461631775, + "eval_logits": -1.289951205253601, + "eval_logps": -88.11286926269531, + "eval_loss": 189.17047119140625, + "eval_objective": 184.87181091308594, + "eval_ranking_idealized": 0.6024844646453857, + "eval_ranking_idealized_expo": 0.5232919454574585, + "eval_ranking_simple": 0.532608687877655, + "eval_regularize": 0.41934508085250854, + "eval_runtime": 498.5381, + "eval_samples_per_second": 11.614, + "eval_steps_per_second": 0.969, + "eval_wo_beta": 15.953052520751953, + "step": 1200 + }, + { + "dpo_loss": 0.5354551672935486, + "epoch": 3.4152102031176192, + "grad_norm": 12302.298902716244, + "learning_rate": 2.7353505837049583e-07, + "logits": -1.293818712234497, + "logps": -81.88545989990234, + "loss": 33.6714, + "objective": 31.525800704956055, + "ranking_idealized": 0.6333333253860474, + "ranking_idealized_expo": 0.5333333611488342, + "ranking_simple": 0.6208333373069763, + "regularize": 0.07086090743541718, + "step": 1205, + "wo_beta": 15.15488052368164 + }, + { + "dpo_loss": 0.5308886170387268, + "epoch": 3.4293811998110533, + "grad_norm": 11960.890598119064, + "learning_rate": 2.6912569338248315e-07, + "logits": -1.300658941268921, + "logps": -83.05274200439453, + "loss": 36.2356, + "objective": 35.77425003051758, + "ranking_idealized": 0.5874999761581421, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.5791666507720947, + "regularize": 0.08412022143602371, + "step": 1210, + "wo_beta": 16.733659744262695 + }, + { + "dpo_loss": 0.528823733329773, + "epoch": 3.4435521965044873, + "grad_norm": 13078.935439317174, + "learning_rate": 2.64739032345671e-07, + "logits": -1.3109962940216064, + "logps": -84.07682037353516, + "loss": 35.0362, + "objective": 32.51463317871094, + "ranking_idealized": 0.5666666626930237, + "ranking_idealized_expo": 0.5041666626930237, + "ranking_simple": 0.5666666626930237, + "regularize": 0.0771927461028099, + "step": 1215, + "wo_beta": 15.212308883666992 + }, + { + "dpo_loss": 0.540026843547821, + "epoch": 3.4577231931979218, + "grad_norm": 12256.162682293258, + "learning_rate": 2.603755066399718e-07, + "logits": -1.149971842765808, + "logps": -82.9686508178711, + "loss": 33.1832, + "objective": 32.34642028808594, + "ranking_idealized": 0.6041666865348816, + "ranking_idealized_expo": 0.5166666507720947, + "ranking_simple": 0.5833333134651184, + "regularize": 0.06764715164899826, + "step": 1220, + "wo_beta": 16.678075790405273 + }, + { + "dpo_loss": 0.524185061454773, + "epoch": 3.471894189891356, + "grad_norm": 12930.685272364457, + "learning_rate": 2.560355453701919e-07, + "logits": -1.302108645439148, + "logps": -82.00885772705078, + "loss": 33.7294, + "objective": 32.768775939941406, + "ranking_idealized": 0.5666666626930237, + "ranking_idealized_expo": 0.46666666865348816, + "ranking_simple": 0.5541666746139526, + "regularize": 0.0753529891371727, + "step": 1225, + "wo_beta": 16.024269104003906 + }, + { + "dpo_loss": 0.5251755118370056, + "epoch": 3.48606518658479, + "grad_norm": 12434.433063668528, + "learning_rate": 2.517195753238345e-07, + "logits": -1.325141191482544, + "logps": -82.18378448486328, + "loss": 35.229, + "objective": 33.25638961791992, + "ranking_idealized": 0.6291666626930237, + "ranking_idealized_expo": 0.5416666865348816, + "ranking_simple": 0.625, + "regularize": 0.0771695226430893, + "step": 1230, + "wo_beta": 16.292001724243164 + }, + { + "dpo_loss": 0.5132806897163391, + "epoch": 3.500236183278224, + "grad_norm": 13558.533453277203, + "learning_rate": 2.474280209291299e-07, + "logits": -1.245792031288147, + "logps": -81.74018096923828, + "loss": 33.2282, + "objective": 33.390872955322266, + "ranking_idealized": 0.5916666388511658, + "ranking_idealized_expo": 0.5041666626930237, + "ranking_simple": 0.5874999761581421, + "regularize": 0.07453177124261856, + "step": 1235, + "wo_beta": 16.363548278808594 + }, + { + "dpo_loss": 0.5296925902366638, + "epoch": 3.514407179971658, + "grad_norm": 12949.63094083325, + "learning_rate": 2.4316130421329696e-07, + "logits": -1.238582968711853, + "logps": -82.47282409667969, + "loss": 34.0652, + "objective": 31.30968475341797, + "ranking_idealized": 0.5708333253860474, + "ranking_idealized_expo": 0.5041666626930237, + "ranking_simple": 0.5666666626930237, + "regularize": 0.06809426844120026, + "step": 1240, + "wo_beta": 15.522791862487793 + }, + { + "dpo_loss": 0.5323511362075806, + "epoch": 3.528578176665092, + "grad_norm": 13527.106344889547, + "learning_rate": 2.389198447610418e-07, + "logits": -1.3098766803741455, + "logps": -83.17538452148438, + "loss": 30.2807, + "objective": 31.539880752563477, + "ranking_idealized": 0.6416666507720947, + "ranking_idealized_expo": 0.5541666746139526, + "ranking_simple": 0.625, + "regularize": 0.07134827226400375, + "step": 1245, + "wo_beta": 15.821925163269043 + }, + { + "dpo_loss": 0.5260218977928162, + "epoch": 3.5427491733585263, + "grad_norm": 13239.929991928584, + "learning_rate": 2.3470405967329604e-07, + "logits": -1.2133029699325562, + "logps": -81.8626480102539, + "loss": 34.073, + "objective": 34.22465515136719, + "ranking_idealized": 0.6291666626930237, + "ranking_idealized_expo": 0.5458333492279053, + "ranking_simple": 0.625, + "regularize": 0.08072555810213089, + "step": 1250, + "wo_beta": 15.293652534484863 + }, + { + "epoch": 3.5427491733585263, + "eval_dpo_loss": 0.6802147626876831, + "eval_logits": -1.289227843284607, + "eval_logps": -88.56167602539062, + "eval_loss": 188.2202911376953, + "eval_objective": 184.19659423828125, + "eval_ranking_idealized": 0.6024844646453857, + "eval_ranking_idealized_expo": 0.5232919454574585, + "eval_ranking_simple": 0.533643901348114, + "eval_regularize": 0.4176720380783081, + "eval_runtime": 501.867, + "eval_samples_per_second": 11.537, + "eval_steps_per_second": 0.962, + "eval_wo_beta": 16.002193450927734, + "step": 1250 + }, + { + "dpo_loss": 0.5387216806411743, + "epoch": 3.5569201700519604, + "grad_norm": 12534.49899559166, + "learning_rate": 2.3051436352620036e-07, + "logits": -1.2683520317077637, + "logps": -82.32015991210938, + "loss": 36.4025, + "objective": 32.02161407470703, + "ranking_idealized": 0.574999988079071, + "ranking_idealized_expo": 0.4791666567325592, + "ranking_simple": 0.5666666626930237, + "regularize": 0.07073788344860077, + "step": 1255, + "wo_beta": 15.795002937316895 + }, + { + "dpo_loss": 0.530408501625061, + "epoch": 3.5710911667453944, + "grad_norm": 12912.721697415427, + "learning_rate": 2.2635116833033392e-07, + "logits": -1.2373536825180054, + "logps": -81.3061294555664, + "loss": 30.8038, + "objective": 33.21593475341797, + "ranking_idealized": 0.6333333253860474, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.6208333373069763, + "regularize": 0.07438240200281143, + "step": 1260, + "wo_beta": 16.231142044067383 + }, + { + "dpo_loss": 0.5247560739517212, + "epoch": 3.5852621634388284, + "grad_norm": 12931.353378545553, + "learning_rate": 2.2221488349019902e-07, + "logits": -1.2455730438232422, + "logps": -80.93061828613281, + "loss": 29.6738, + "objective": 31.222820281982422, + "ranking_idealized": 0.6166666746139526, + "ranking_idealized_expo": 0.5166666507720947, + "ranking_simple": 0.612500011920929, + "regularize": 0.06749995797872543, + "step": 1265, + "wo_beta": 14.711896896362305 + }, + { + "dpo_loss": 0.5356096625328064, + "epoch": 3.5994331601322624, + "grad_norm": 13549.0763306813, + "learning_rate": 2.181059157639598e-07, + "logits": -1.3499952554702759, + "logps": -81.31751251220703, + "loss": 30.5338, + "objective": 30.125825881958008, + "ranking_idealized": 0.5333333611488342, + "ranking_idealized_expo": 0.4833333194255829, + "ranking_simple": 0.5333333611488342, + "regularize": 0.06899719685316086, + "step": 1270, + "wo_beta": 13.772916793823242 + }, + { + "dpo_loss": 0.5210896134376526, + "epoch": 3.613604156825697, + "grad_norm": 14924.204646126253, + "learning_rate": 2.1402466922344303e-07, + "logits": -1.210523247718811, + "logps": -82.34052276611328, + "loss": 29.82, + "objective": 29.18175506591797, + "ranking_idealized": 0.5874999761581421, + "ranking_idealized_expo": 0.49166667461395264, + "ranking_simple": 0.5791666507720947, + "regularize": 0.06544475257396698, + "step": 1275, + "wo_beta": 15.473977088928223 + }, + { + "dpo_loss": 0.5307682752609253, + "epoch": 3.627775153519131, + "grad_norm": 12824.51476470017, + "learning_rate": 2.0997154521440097e-07, + "logits": -1.2915035486221313, + "logps": -81.79452514648438, + "loss": 30.8024, + "objective": 29.49608612060547, + "ranking_idealized": 0.5541666746139526, + "ranking_idealized_expo": 0.4541666805744171, + "ranking_simple": 0.5375000238418579, + "regularize": 0.06567243486642838, + "step": 1280, + "wo_beta": 15.875335693359375 + }, + { + "dpo_loss": 0.5249419212341309, + "epoch": 3.641946150212565, + "grad_norm": 13428.13142246555, + "learning_rate": 2.0594694231704373e-07, + "logits": -1.2426308393478394, + "logps": -81.00833892822266, + "loss": 30.3043, + "objective": 30.617321014404297, + "ranking_idealized": 0.5916666388511658, + "ranking_idealized_expo": 0.4958333373069763, + "ranking_simple": 0.5916666388511658, + "regularize": 0.07462318986654282, + "step": 1285, + "wo_beta": 17.443321228027344 + }, + { + "dpo_loss": 0.5173429250717163, + "epoch": 3.656117146905999, + "grad_norm": 12671.749777744226, + "learning_rate": 2.0195125630684428e-07, + "logits": -1.245200276374817, + "logps": -81.8724594116211, + "loss": 28.4671, + "objective": 27.68103790283203, + "ranking_idealized": 0.6166666746139526, + "ranking_idealized_expo": 0.5208333134651184, + "ranking_simple": 0.6000000238418579, + "regularize": 0.06590177118778229, + "step": 1290, + "wo_beta": 17.08915138244629 + }, + { + "dpo_loss": 0.5335291028022766, + "epoch": 3.670288143599433, + "grad_norm": 13021.653293493737, + "learning_rate": 1.979848801156167e-07, + "logits": -1.3040084838867188, + "logps": -81.88176727294922, + "loss": 28.4196, + "objective": 28.575376510620117, + "ranking_idealized": 0.5958333611488342, + "ranking_idealized_expo": 0.49166667461395264, + "ranking_simple": 0.5874999761581421, + "regularize": 0.0632786899805069, + "step": 1295, + "wo_beta": 14.829022407531738 + }, + { + "dpo_loss": 0.5276142954826355, + "epoch": 3.6844591402928675, + "grad_norm": 11978.937253641576, + "learning_rate": 1.9404820379287672e-07, + "logits": -1.187487244606018, + "logps": -80.9906005859375, + "loss": 28.4565, + "objective": 28.971555709838867, + "ranking_idealized": 0.6041666865348816, + "ranking_idealized_expo": 0.5458333492279053, + "ranking_simple": 0.6166666746139526, + "regularize": 0.06764063984155655, + "step": 1300, + "wo_beta": 17.09331512451172 + }, + { + "epoch": 3.6844591402928675, + "eval_dpo_loss": 0.680322527885437, + "eval_logits": -1.2942335605621338, + "eval_logps": -88.08357238769531, + "eval_loss": 188.31890869140625, + "eval_objective": 184.1293182373047, + "eval_ranking_idealized": 0.6024844646453857, + "eval_ranking_idealized_expo": 0.5232919454574585, + "eval_ranking_simple": 0.5331262946128845, + "eval_regularize": 0.4177800714969635, + "eval_runtime": 491.4726, + "eval_samples_per_second": 11.781, + "eval_steps_per_second": 0.983, + "eval_wo_beta": 16.008142471313477, + "step": 1300 + }, + { + "dpo_loss": 0.5322309732437134, + "epoch": 3.6986301369863015, + "grad_norm": 13019.22557555901, + "learning_rate": 1.9014161446748422e-07, + "logits": -1.2798058986663818, + "logps": -81.99161529541016, + "loss": 30.5992, + "objective": 32.30867004394531, + "ranking_idealized": 0.5458333492279053, + "ranking_idealized_expo": 0.47083333134651184, + "ranking_simple": 0.5416666865348816, + "regularize": 0.0735287144780159, + "step": 1305, + "wo_beta": 15.798765182495117 + }, + { + "dpo_loss": 0.5227470397949219, + "epoch": 3.7128011336797355, + "grad_norm": 12226.371631865619, + "learning_rate": 1.8626549630957395e-07, + "logits": -1.2566769123077393, + "logps": -81.54576110839844, + "loss": 28.0805, + "objective": 26.042844772338867, + "ranking_idealized": 0.5833333134651184, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.5916666388511658, + "regularize": 0.06227840855717659, + "step": 1310, + "wo_beta": 15.27546501159668 + }, + { + "dpo_loss": 0.537932813167572, + "epoch": 3.7269721303731695, + "grad_norm": 12444.517818477534, + "learning_rate": 1.8242023049277555e-07, + "logits": -1.2929528951644897, + "logps": -81.47209167480469, + "loss": 30.7473, + "objective": 30.499658584594727, + "ranking_idealized": 0.5625, + "ranking_idealized_expo": 0.5083333253860474, + "ranking_simple": 0.5583333373069763, + "regularize": 0.07173587381839752, + "step": 1315, + "wo_beta": 15.575103759765625 + }, + { + "dpo_loss": 0.5317214131355286, + "epoch": 3.7411431270666036, + "grad_norm": 13300.946248563114, + "learning_rate": 1.7860619515673032e-07, + "logits": -1.3597683906555176, + "logps": -83.02255249023438, + "loss": 29.6239, + "objective": 28.020469665527344, + "ranking_idealized": 0.6541666388511658, + "ranking_idealized_expo": 0.5708333253860474, + "ranking_simple": 0.6541666388511658, + "regularize": 0.06609723716974258, + "step": 1320, + "wo_beta": 16.70941734313965 + }, + { + "dpo_loss": 0.5467905402183533, + "epoch": 3.755314123760038, + "grad_norm": 11933.522036621489, + "learning_rate": 1.7482376536990474e-07, + "logits": -1.2760491371154785, + "logps": -81.77200317382812, + "loss": 28.58, + "objective": 27.297456741333008, + "ranking_idealized": 0.5791666507720947, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.5791666507720947, + "regularize": 0.061256349086761475, + "step": 1325, + "wo_beta": 14.996780395507812 + }, + { + "dpo_loss": 0.5155090689659119, + "epoch": 3.769485120453472, + "grad_norm": 12146.906265203044, + "learning_rate": 1.7107331309270684e-07, + "logits": -1.2232296466827393, + "logps": -81.67552185058594, + "loss": 25.7046, + "objective": 24.283742904663086, + "ranking_idealized": 0.6041666865348816, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.6083333492279053, + "regularize": 0.05803535133600235, + "step": 1330, + "wo_beta": 14.960771560668945 + }, + { + "dpo_loss": 0.5197141766548157, + "epoch": 3.783656117146906, + "grad_norm": 13269.1257120231, + "learning_rate": 1.6735520714090778e-07, + "logits": -1.3548495769500732, + "logps": -82.88711547851562, + "loss": 25.5411, + "objective": 23.988988876342773, + "ranking_idealized": 0.637499988079071, + "ranking_idealized_expo": 0.5541666746139526, + "ranking_simple": 0.6333333253860474, + "regularize": 0.05831415578722954, + "step": 1335, + "wo_beta": 15.491255760192871 + }, + { + "dpo_loss": 0.5400987863540649, + "epoch": 3.79782711384034, + "grad_norm": 12222.682651732252, + "learning_rate": 1.6366981314937372e-07, + "logits": -1.3011000156402588, + "logps": -81.44950866699219, + "loss": 26.7414, + "objective": 27.633180618286133, + "ranking_idealized": 0.6166666746139526, + "ranking_idealized_expo": 0.5541666746139526, + "ranking_simple": 0.612500011920929, + "regularize": 0.06863755732774734, + "step": 1340, + "wo_beta": 15.513628005981445 + }, + { + "dpo_loss": 0.5207428336143494, + "epoch": 3.811998110533774, + "grad_norm": 13292.031759115218, + "learning_rate": 1.6001749353610815e-07, + "logits": -1.2988460063934326, + "logps": -81.9979019165039, + "loss": 27.5342, + "objective": 26.436460494995117, + "ranking_idealized": 0.6291666626930237, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.637499988079071, + "regularize": 0.060691170394420624, + "step": 1345, + "wo_beta": 16.72386360168457 + }, + { + "dpo_loss": 0.5372669100761414, + "epoch": 3.826169107227208, + "grad_norm": 12429.085011694839, + "learning_rate": 1.5639860746661338e-07, + "logits": -1.3200603723526, + "logps": -80.8891830444336, + "loss": 27.4636, + "objective": 27.883655548095703, + "ranking_idealized": 0.5625, + "ranking_idealized_expo": 0.4958333373069763, + "ranking_simple": 0.5666666626930237, + "regularize": 0.06708240509033203, + "step": 1350, + "wo_beta": 15.541132926940918 + }, + { + "epoch": 3.826169107227208, + "eval_dpo_loss": 0.6802567839622498, + "eval_logits": -1.2973301410675049, + "eval_logps": -88.45857238769531, + "eval_loss": 188.3022003173828, + "eval_objective": 184.21910095214844, + "eval_ranking_idealized": 0.6024844646453857, + "eval_ranking_idealized_expo": 0.5232919454574585, + "eval_ranking_simple": 0.5320910811424255, + "eval_regularize": 0.4178454279899597, + "eval_runtime": 484.5227, + "eval_samples_per_second": 11.95, + "eval_steps_per_second": 0.997, + "eval_wo_beta": 15.999577522277832, + "step": 1350 + }, + { + "dpo_loss": 0.5401098132133484, + "epoch": 3.840340103920642, + "grad_norm": 12546.873988889934, + "learning_rate": 1.5281351081856976e-07, + "logits": -1.3091717958450317, + "logps": -81.95738983154297, + "loss": 24.53, + "objective": 23.978574752807617, + "ranking_idealized": 0.6000000238418579, + "ranking_idealized_expo": 0.5208333134651184, + "ranking_simple": 0.6000000238418579, + "regularize": 0.05746602639555931, + "step": 1355, + "wo_beta": 15.96954345703125 + }, + { + "dpo_loss": 0.5322627425193787, + "epoch": 3.8545111006140766, + "grad_norm": 12396.074158573574, + "learning_rate": 1.492625561468393e-07, + "logits": -1.2270203828811646, + "logps": -81.94197082519531, + "loss": 27.8079, + "objective": 25.823699951171875, + "ranking_idealized": 0.6000000238418579, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.5958333611488342, + "regularize": 0.06090990826487541, + "step": 1360, + "wo_beta": 15.92143440246582 + }, + { + "dpo_loss": 0.5215187668800354, + "epoch": 3.8686820973075107, + "grad_norm": 12924.951740893872, + "learning_rate": 1.4574609264879632e-07, + "logits": -1.2885017395019531, + "logps": -81.9835205078125, + "loss": 24.6244, + "objective": 21.932554244995117, + "ranking_idealized": 0.6166666746139526, + "ranking_idealized_expo": 0.5333333611488342, + "ranking_simple": 0.612500011920929, + "regularize": 0.05161268636584282, + "step": 1365, + "wo_beta": 15.276032447814941 + }, + { + "dpo_loss": 0.5322207808494568, + "epoch": 3.8828530940009447, + "grad_norm": 11760.04729219421, + "learning_rate": 1.4226446612998671e-07, + "logits": -1.325412631034851, + "logps": -82.93399810791016, + "loss": 25.2873, + "objective": 22.0572566986084, + "ranking_idealized": 0.6208333373069763, + "ranking_idealized_expo": 0.550000011920929, + "ranking_simple": 0.637499988079071, + "regularize": 0.05589644983410835, + "step": 1370, + "wo_beta": 16.43442726135254 + }, + { + "dpo_loss": 0.5177661776542664, + "epoch": 3.8970240906943787, + "grad_norm": 12668.234366032097, + "learning_rate": 1.3881801897012224e-07, + "logits": -1.3054790496826172, + "logps": -81.97600555419922, + "loss": 25.3791, + "objective": 25.463533401489258, + "ranking_idealized": 0.5708333253860474, + "ranking_idealized_expo": 0.48750001192092896, + "ranking_simple": 0.5791666507720947, + "regularize": 0.06239425763487816, + "step": 1375, + "wo_beta": 15.740779876708984 + }, + { + "dpo_loss": 0.5269008874893188, + "epoch": 3.9111950873878127, + "grad_norm": 11613.901925945589, + "learning_rate": 1.3540709008941147e-07, + "logits": -1.2125933170318604, + "logps": -81.08470153808594, + "loss": 24.5614, + "objective": 27.379404067993164, + "ranking_idealized": 0.6416666507720947, + "ranking_idealized_expo": 0.5666666626930237, + "ranking_simple": 0.6499999761581421, + "regularize": 0.06529100984334946, + "step": 1380, + "wo_beta": 16.324913024902344 + }, + { + "dpo_loss": 0.5204812288284302, + "epoch": 3.925366084081247, + "grad_norm": 12262.7455062338, + "learning_rate": 1.3203201491523024e-07, + "logits": -1.1872669458389282, + "logps": -82.68800354003906, + "loss": 26.2354, + "objective": 27.383338928222656, + "ranking_idealized": 0.574999988079071, + "ranking_idealized_expo": 0.48750001192092896, + "ranking_simple": 0.5708333253860474, + "regularize": 0.06124182417988777, + "step": 1385, + "wo_beta": 17.184247970581055 + }, + { + "dpo_loss": 0.5293174982070923, + "epoch": 3.9395370807746812, + "grad_norm": 11656.13294817261, + "learning_rate": 1.2869312534913685e-07, + "logits": -1.3625025749206543, + "logps": -81.69257354736328, + "loss": 25.8656, + "objective": 27.87486457824707, + "ranking_idealized": 0.5375000238418579, + "ranking_idealized_expo": 0.4541666805744171, + "ranking_simple": 0.5208333134651184, + "regularize": 0.06757337599992752, + "step": 1390, + "wo_beta": 14.843222618103027 + }, + { + "dpo_loss": 0.5323649644851685, + "epoch": 3.9537080774681153, + "grad_norm": 12688.563452750986, + "learning_rate": 1.2539074973423204e-07, + "logits": -1.344056487083435, + "logps": -82.50756072998047, + "loss": 25.269, + "objective": 20.71147346496582, + "ranking_idealized": 0.574999988079071, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.574999988079071, + "regularize": 0.05190667137503624, + "step": 1395, + "wo_beta": 15.608321189880371 + }, + { + "dpo_loss": 0.5348060727119446, + "epoch": 3.9678790741615493, + "grad_norm": 13248.732573569929, + "learning_rate": 1.2212521282287093e-07, + "logits": -1.2224748134613037, + "logps": -80.45255279541016, + "loss": 27.3902, + "objective": 28.852842330932617, + "ranking_idealized": 0.574999988079071, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.5708333253860474, + "regularize": 0.0665024146437645, + "step": 1400, + "wo_beta": 16.69828987121582 + }, + { + "epoch": 3.9678790741615493, + "eval_dpo_loss": 0.6798388957977295, + "eval_logits": -1.2974461317062378, + "eval_logps": -88.3134765625, + "eval_loss": 187.96913146972656, + "eval_objective": 183.7815704345703, + "eval_ranking_idealized": 0.6024844646453857, + "eval_ranking_idealized_expo": 0.5232919454574585, + "eval_ranking_simple": 0.5320910811424255, + "eval_regularize": 0.4168493151664734, + "eval_runtime": 519.2809, + "eval_samples_per_second": 11.15, + "eval_steps_per_second": 0.93, + "eval_wo_beta": 15.978778839111328, + "step": 1400 + }, + { + "dpo_loss": 0.5318723320960999, + "epoch": 3.9820500708549833, + "grad_norm": 12626.278495743487, + "learning_rate": 1.1889683574472692e-07, + "logits": -1.2031117677688599, + "logps": -81.43195343017578, + "loss": 25.6619, + "objective": 22.53989028930664, + "ranking_idealized": 0.5958333611488342, + "ranking_idealized_expo": 0.4958333373069763, + "ranking_simple": 0.5874999761581421, + "regularize": 0.05257093533873558, + "step": 1405, + "wo_beta": 14.43735122680664 + }, + { + "dpo_loss": 0.5325983762741089, + "epoch": 3.9962210675484178, + "grad_norm": 12962.865030589033, + "learning_rate": 1.15705935975212e-07, + "logits": -1.2109463214874268, + "logps": -80.95507049560547, + "loss": 25.0327, + "objective": 27.48863410949707, + "ranking_idealized": 0.6541666388511658, + "ranking_idealized_expo": 0.5583333373069763, + "ranking_simple": 0.637499988079071, + "regularize": 0.06891029328107834, + "step": 1410, + "wo_beta": 15.6097993850708 + }, + { + "dpo_loss": 0.5317092537879944, + "epoch": 4.010392064241851, + "grad_norm": 12833.61434685088, + "learning_rate": 1.1255282730425708e-07, + "logits": -1.2491552829742432, + "logps": -81.32047271728516, + "loss": 22.2145, + "objective": 24.41758155822754, + "ranking_idealized": 0.5874999761581421, + "ranking_idealized_expo": 0.4833333194255829, + "ranking_simple": 0.5874999761581421, + "regularize": 0.06543368101119995, + "step": 1415, + "wo_beta": 15.283975601196289 + }, + { + "dpo_loss": 0.5239009261131287, + "epoch": 4.024563060935286, + "grad_norm": 13451.327899072105, + "learning_rate": 1.094378198054533e-07, + "logits": -1.353010654449463, + "logps": -83.2571792602539, + "loss": 23.0966, + "objective": 24.90163230895996, + "ranking_idealized": 0.5291666388511658, + "ranking_idealized_expo": 0.4416666626930237, + "ranking_simple": 0.5166666507720947, + "regularize": 0.0562543049454689, + "step": 1420, + "wo_beta": 16.40116310119629 + }, + { + "dpo_loss": 0.53034508228302, + "epoch": 4.03873405762872, + "grad_norm": 13582.157317581643, + "learning_rate": 1.063612198055604e-07, + "logits": -1.2672284841537476, + "logps": -82.41036987304688, + "loss": 19.725, + "objective": 18.898433685302734, + "ranking_idealized": 0.5666666626930237, + "ranking_idealized_expo": 0.5041666626930237, + "ranking_simple": 0.5583333373069763, + "regularize": 0.04213841259479523, + "step": 1425, + "wo_beta": 17.573118209838867 + }, + { + "dpo_loss": 0.5290653109550476, + "epoch": 4.052905054322154, + "grad_norm": 12471.786390228664, + "learning_rate": 1.0332332985438247e-07, + "logits": -1.2409167289733887, + "logps": -82.05091094970703, + "loss": 21.8465, + "objective": 20.57358741760254, + "ranking_idealized": 0.6166666746139526, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.6083333492279053, + "regularize": 0.05022308602929115, + "step": 1430, + "wo_beta": 17.054475784301758 + }, + { + "dpo_loss": 0.5352352261543274, + "epoch": 4.067076051015588, + "grad_norm": 12729.012234556472, + "learning_rate": 1.0032444869501577e-07, + "logits": -1.1344469785690308, + "logps": -84.53145599365234, + "loss": 23.6283, + "objective": 21.45845603942871, + "ranking_idealized": 0.5375000238418579, + "ranking_idealized_expo": 0.4749999940395355, + "ranking_simple": 0.550000011920929, + "regularize": 0.046408891677856445, + "step": 1435, + "wo_beta": 17.1253719329834 + }, + { + "dpo_loss": 0.5163091421127319, + "epoch": 4.081247047709022, + "grad_norm": 12403.62054840324, + "learning_rate": 9.736487123447068e-08, + "logits": -1.3162797689437866, + "logps": -83.0071792602539, + "loss": 18.4912, + "objective": 19.839466094970703, + "ranking_idealized": 0.6000000238418579, + "ranking_idealized_expo": 0.5166666507720947, + "ranking_simple": 0.5916666388511658, + "regularize": 0.04717810079455376, + "step": 1440, + "wo_beta": 16.756040573120117 + }, + { + "dpo_loss": 0.5394971966743469, + "epoch": 4.095418044402456, + "grad_norm": 13017.070767832263, + "learning_rate": 9.444488851467041e-08, + "logits": -1.2141478061676025, + "logps": -81.8912582397461, + "loss": 22.8616, + "objective": 24.104333877563477, + "ranking_idealized": 0.6000000238418579, + "ranking_idealized_expo": 0.5291666388511658, + "ranking_simple": 0.5916666388511658, + "regularize": 0.05671803280711174, + "step": 1445, + "wo_beta": 15.497802734375 + }, + { + "dpo_loss": 0.5386175513267517, + "epoch": 4.109589041095891, + "grad_norm": 12319.490850536135, + "learning_rate": 9.156478768383058e-08, + "logits": -1.2780787944793701, + "logps": -82.44509887695312, + "loss": 21.2906, + "objective": 22.363698959350586, + "ranking_idealized": 0.5625, + "ranking_idealized_expo": 0.5083333253860474, + "ranking_simple": 0.5666666626930237, + "regularize": 0.05074004456400871, + "step": 1450, + "wo_beta": 16.318647384643555 + }, + { + "epoch": 4.109589041095891, + "eval_dpo_loss": 0.6796455383300781, + "eval_logits": -1.2975972890853882, + "eval_logps": -88.12124633789062, + "eval_loss": 187.89852905273438, + "eval_objective": 183.65463256835938, + "eval_ranking_idealized": 0.6024844646453857, + "eval_ranking_idealized_expo": 0.5232919454574585, + "eval_ranking_simple": 0.5320910811424255, + "eval_regularize": 0.4164124131202698, + "eval_runtime": 516.4821, + "eval_samples_per_second": 11.21, + "eval_steps_per_second": 0.935, + "eval_wo_beta": 15.985260009765625, + "step": 1450 + }, + { + "dpo_loss": 0.5313987135887146, + "epoch": 4.123760037789324, + "grad_norm": 12478.853769070673, + "learning_rate": 8.872485196822122e-08, + "logits": -1.2814396619796753, + "logps": -81.72008514404297, + "loss": 22.8821, + "objective": 23.81187629699707, + "ranking_idealized": 0.6416666507720947, + "ranking_idealized_expo": 0.5916666388511658, + "ranking_simple": 0.6458333134651184, + "regularize": 0.05551544576883316, + "step": 1455, + "wo_beta": 18.80474090576172 + }, + { + "dpo_loss": 0.5285670161247253, + "epoch": 4.137931034482759, + "grad_norm": 13016.324616810654, + "learning_rate": 8.592536064431466e-08, + "logits": -1.3169968128204346, + "logps": -82.27637481689453, + "loss": 21.0762, + "objective": 22.214412689208984, + "ranking_idealized": 0.574999988079071, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.5708333253860474, + "regularize": 0.05284254625439644, + "step": 1460, + "wo_beta": 16.45089340209961 + }, + { + "dpo_loss": 0.5275595784187317, + "epoch": 4.1521020311761925, + "grad_norm": 12828.215315021795, + "learning_rate": 8.316658901132163e-08, + "logits": -1.2044638395309448, + "logps": -83.09059143066406, + "loss": 20.1235, + "objective": 19.89800453186035, + "ranking_idealized": 0.6166666746139526, + "ranking_idealized_expo": 0.5541666746139526, + "ranking_simple": 0.6208333373069763, + "regularize": 0.04856906086206436, + "step": 1465, + "wo_beta": 16.143047332763672 + }, + { + "dpo_loss": 0.5317350029945374, + "epoch": 4.166273027869627, + "grad_norm": 13452.677353962536, + "learning_rate": 8.044880836411888e-08, + "logits": -1.312625527381897, + "logps": -80.955810546875, + "loss": 18.8621, + "objective": 22.22332000732422, + "ranking_idealized": 0.5583333373069763, + "ranking_idealized_expo": 0.4749999940395355, + "ranking_simple": 0.5583333373069763, + "regularize": 0.05493269860744476, + "step": 1470, + "wo_beta": 14.867803573608398 + }, + { + "dpo_loss": 0.5068629384040833, + "epoch": 4.1804440245630605, + "grad_norm": 12445.31776981503, + "learning_rate": 7.777228596656993e-08, + "logits": -1.2618132829666138, + "logps": -83.48854064941406, + "loss": 18.8691, + "objective": 17.614728927612305, + "ranking_idealized": 0.6166666746139526, + "ranking_idealized_expo": 0.5541666746139526, + "ranking_simple": 0.612500011920929, + "regularize": 0.04399799555540085, + "step": 1475, + "wo_beta": 17.06732940673828 + }, + { + "dpo_loss": 0.5202235579490662, + "epoch": 4.194615021256495, + "grad_norm": 12224.02993997593, + "learning_rate": 7.513728502524286e-08, + "logits": -1.1893463134765625, + "logps": -81.5462417602539, + "loss": 19.5471, + "objective": 21.709897994995117, + "ranking_idealized": 0.6000000238418579, + "ranking_idealized_expo": 0.5083333253860474, + "ranking_simple": 0.5916666388511658, + "regularize": 0.05476529151201248, + "step": 1480, + "wo_beta": 16.902223587036133 + }, + { + "dpo_loss": 0.528392493724823, + "epoch": 4.2087860179499295, + "grad_norm": 12678.153549499324, + "learning_rate": 7.25440646635268e-08, + "logits": -1.3054612874984741, + "logps": -80.2231674194336, + "loss": 19.6042, + "objective": 19.114337921142578, + "ranking_idealized": 0.5833333134651184, + "ranking_idealized_expo": 0.5208333134651184, + "ranking_simple": 0.574999988079071, + "regularize": 0.04563932120800018, + "step": 1485, + "wo_beta": 16.017080307006836 + }, + { + "dpo_loss": 0.5332812070846558, + "epoch": 4.222957014643363, + "grad_norm": 12851.091233563351, + "learning_rate": 6.999287989614971e-08, + "logits": -1.368248462677002, + "logps": -81.43551635742188, + "loss": 19.3664, + "objective": 18.39341926574707, + "ranking_idealized": 0.5583333373069763, + "ranking_idealized_expo": 0.47083333134651184, + "ranking_simple": 0.5625, + "regularize": 0.0449262373149395, + "step": 1490, + "wo_beta": 14.998396873474121 + }, + { + "dpo_loss": 0.5162668824195862, + "epoch": 4.2371280113367975, + "grad_norm": 13439.750358421123, + "learning_rate": 6.74839816041013e-08, + "logits": -1.3570283651351929, + "logps": -81.74089050292969, + "loss": 16.8521, + "objective": 18.91334342956543, + "ranking_idealized": 0.5958333611488342, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.5958333611488342, + "regularize": 0.04384367913007736, + "step": 1495, + "wo_beta": 18.151466369628906 + }, + { + "dpo_loss": 0.5271181464195251, + "epoch": 4.251299008030231, + "grad_norm": 12462.836104102607, + "learning_rate": 6.501761650996052e-08, + "logits": -1.3143360614776611, + "logps": -83.34208679199219, + "loss": 19.8787, + "objective": 20.79971694946289, + "ranking_idealized": 0.6083333492279053, + "ranking_idealized_expo": 0.5083333253860474, + "ranking_simple": 0.5958333611488342, + "regularize": 0.04957110807299614, + "step": 1500, + "wo_beta": 16.131967544555664 + }, + { + "epoch": 4.251299008030231, + "eval_dpo_loss": 0.679940402507782, + "eval_logits": -1.2942196130752563, + "eval_logps": -88.3078384399414, + "eval_loss": 188.08248901367188, + "eval_objective": 183.8683624267578, + "eval_ranking_idealized": 0.6024844646453857, + "eval_ranking_idealized_expo": 0.5232919454574585, + "eval_ranking_simple": 0.5320910811424255, + "eval_regularize": 0.4168849587440491, + "eval_runtime": 525.9589, + "eval_samples_per_second": 11.008, + "eval_steps_per_second": 0.918, + "eval_wo_beta": 15.983942031860352, + "step": 1500 + }, + { + "dpo_loss": 0.5348100066184998, + "epoch": 4.2654700047236656, + "grad_norm": 12354.445167507907, + "learning_rate": 6.259402715363394e-08, + "logits": -1.3128606081008911, + "logps": -83.40116119384766, + "loss": 18.971, + "objective": 17.431968688964844, + "ranking_idealized": 0.574999988079071, + "ranking_idealized_expo": 0.4958333373069763, + "ranking_simple": 0.5666666626930237, + "regularize": 0.0443451851606369, + "step": 1505, + "wo_beta": 15.766800880432129 + }, + { + "dpo_loss": 0.5173017382621765, + "epoch": 4.2796410014171, + "grad_norm": 13102.659789592512, + "learning_rate": 6.021345186850418e-08, + "logits": -1.2090104818344116, + "logps": -81.23714447021484, + "loss": 21.191, + "objective": 21.426023483276367, + "ranking_idealized": 0.625, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.6166666746139526, + "regularize": 0.04612095281481743, + "step": 1510, + "wo_beta": 15.78390121459961 + }, + { + "dpo_loss": 0.5307953357696533, + "epoch": 4.293811998110534, + "grad_norm": 12945.706552780925, + "learning_rate": 5.787612475799269e-08, + "logits": -1.367775559425354, + "logps": -82.44042205810547, + "loss": 16.9107, + "objective": 16.15281867980957, + "ranking_idealized": 0.625, + "ranking_idealized_expo": 0.5208333134651184, + "ranking_simple": 0.6291666626930237, + "regularize": 0.03850070759654045, + "step": 1515, + "wo_beta": 16.397567749023438 + }, + { + "dpo_loss": 0.5217214226722717, + "epoch": 4.307982994803968, + "grad_norm": 12094.58497098056, + "learning_rate": 5.5582275672538316e-08, + "logits": -1.2217297554016113, + "logps": -81.85955047607422, + "loss": 18.4539, + "objective": 19.449350357055664, + "ranking_idealized": 0.6791666746139526, + "ranking_idealized_expo": 0.6041666865348816, + "ranking_simple": 0.6791666746139526, + "regularize": 0.04723352938890457, + "step": 1520, + "wo_beta": 16.819021224975586 + }, + { + "dpo_loss": 0.5168942809104919, + "epoch": 4.322153991497402, + "grad_norm": 12423.169223430634, + "learning_rate": 5.333213018699356e-08, + "logits": -1.2731564044952393, + "logps": -81.88040161132812, + "loss": 21.872, + "objective": 21.83941078186035, + "ranking_idealized": 0.6166666746139526, + "ranking_idealized_expo": 0.5458333492279053, + "ranking_simple": 0.6291666626930237, + "regularize": 0.054684512317180634, + "step": 1525, + "wo_beta": 14.882065773010254 + }, + { + "dpo_loss": 0.5241533517837524, + "epoch": 4.336324988190836, + "grad_norm": 13878.266489791004, + "learning_rate": 5.112590957844232e-08, + "logits": -1.3176230192184448, + "logps": -83.9821548461914, + "loss": 20.6818, + "objective": 16.49356460571289, + "ranking_idealized": 0.625, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.612500011920929, + "regularize": 0.04093782603740692, + "step": 1530, + "wo_beta": 16.301631927490234 + }, + { + "dpo_loss": 0.5228941440582275, + "epoch": 4.350495984884271, + "grad_norm": 12969.925803784026, + "learning_rate": 4.896383080443933e-08, + "logits": -1.216440200805664, + "logps": -82.53515625, + "loss": 18.6444, + "objective": 18.548452377319336, + "ranking_idealized": 0.5708333253860474, + "ranking_idealized_expo": 0.4625000059604645, + "ranking_simple": 0.5791666507720947, + "regularize": 0.047610316425561905, + "step": 1535, + "wo_beta": 15.176318168640137 + }, + { + "dpo_loss": 0.5215330123901367, + "epoch": 4.364666981577704, + "grad_norm": 12724.232692363212, + "learning_rate": 4.684610648167503e-08, + "logits": -1.3027079105377197, + "logps": -81.91221618652344, + "loss": 21.6498, + "objective": 21.20092010498047, + "ranking_idealized": 0.5874999761581421, + "ranking_idealized_expo": 0.5416666865348816, + "ranking_simple": 0.5833333134651184, + "regularize": 0.05302129685878754, + "step": 1540, + "wo_beta": 16.298704147338867 + }, + { + "dpo_loss": 0.5356315970420837, + "epoch": 4.378837978271139, + "grad_norm": 12224.725778808395, + "learning_rate": 4.4772944865067055e-08, + "logits": -1.3303568363189697, + "logps": -83.5517578125, + "loss": 17.8525, + "objective": 20.373811721801758, + "ranking_idealized": 0.6333333253860474, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.6166666746139526, + "regularize": 0.050720926374197006, + "step": 1545, + "wo_beta": 15.988405227661133 + }, + { + "dpo_loss": 0.5247156023979187, + "epoch": 4.393008974964572, + "grad_norm": 12442.357612605178, + "learning_rate": 4.274454982728032e-08, + "logits": -1.246690034866333, + "logps": -81.54380798339844, + "loss": 18.4741, + "objective": 19.52410316467285, + "ranking_idealized": 0.5833333134651184, + "ranking_idealized_expo": 0.4958333373069763, + "ranking_simple": 0.5833333134651184, + "regularize": 0.049736883491277695, + "step": 1550, + "wo_beta": 17.08685874938965 + }, + { + "epoch": 4.393008974964572, + "eval_dpo_loss": 0.6802076697349548, + "eval_logits": -1.2950727939605713, + "eval_logps": -88.48546600341797, + "eval_loss": 188.04074096679688, + "eval_objective": 184.0446319580078, + "eval_ranking_idealized": 0.6024844646453857, + "eval_ranking_idealized_expo": 0.5232919454574585, + "eval_ranking_simple": 0.532608687877655, + "eval_regularize": 0.4172796308994293, + "eval_runtime": 533.9808, + "eval_samples_per_second": 10.843, + "eval_steps_per_second": 0.905, + "eval_wo_beta": 15.994985580444336, + "step": 1550 + }, + { + "dpo_loss": 0.5181335210800171, + "epoch": 4.407179971658007, + "grad_norm": 13002.101456533634, + "learning_rate": 4.0761120838678776e-08, + "logits": -1.3068591356277466, + "logps": -81.81246185302734, + "loss": 16.5342, + "objective": 14.914339065551758, + "ranking_idealized": 0.5708333253860474, + "ranking_idealized_expo": 0.4833333194255829, + "ranking_simple": 0.5708333253860474, + "regularize": 0.0403703935444355, + "step": 1555, + "wo_beta": 17.326810836791992 + }, + { + "dpo_loss": 0.5418220162391663, + "epoch": 4.42135096835144, + "grad_norm": 11889.810698222469, + "learning_rate": 3.882285294770937e-08, + "logits": -1.2680351734161377, + "logps": -80.56555938720703, + "loss": 16.764, + "objective": 17.03957176208496, + "ranking_idealized": 0.6000000238418579, + "ranking_idealized_expo": 0.5291666388511658, + "ranking_simple": 0.5916666388511658, + "regularize": 0.04209714010357857, + "step": 1560, + "wo_beta": 14.395126342773438 + }, + { + "dpo_loss": 0.5355924963951111, + "epoch": 4.435521965044875, + "grad_norm": 12185.339277571, + "learning_rate": 3.6929936761721403e-08, + "logits": -1.2988630533218384, + "logps": -80.5867919921875, + "loss": 21.4187, + "objective": 21.873271942138672, + "ranking_idealized": 0.574999988079071, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.5791666507720947, + "regularize": 0.05235178396105766, + "step": 1565, + "wo_beta": 14.826796531677246 + }, + { + "dpo_loss": 0.5378596782684326, + "epoch": 4.449692961738309, + "grad_norm": 11114.71452911412, + "learning_rate": 3.508255842822255e-08, + "logits": -1.3118114471435547, + "logps": -81.77924346923828, + "loss": 18.6149, + "objective": 20.33370590209961, + "ranking_idealized": 0.612500011920929, + "ranking_idealized_expo": 0.5083333253860474, + "ranking_simple": 0.5958333611488342, + "regularize": 0.05320237576961517, + "step": 1570, + "wo_beta": 15.688643455505371 + }, + { + "dpo_loss": 0.5132429599761963, + "epoch": 4.463863958431743, + "grad_norm": 12945.538981188476, + "learning_rate": 3.3280899616572656e-08, + "logits": -1.3532111644744873, + "logps": -84.82633209228516, + "loss": 17.216, + "objective": 17.143177032470703, + "ranking_idealized": 0.5833333134651184, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.574999988079071, + "regularize": 0.042684536427259445, + "step": 1575, + "wo_beta": 17.00408935546875 + }, + { + "dpo_loss": 0.5311785340309143, + "epoch": 4.478034955125177, + "grad_norm": 13235.594805356337, + "learning_rate": 3.15251375001192e-08, + "logits": -1.2649219036102295, + "logps": -82.44920349121094, + "loss": 17.9899, + "objective": 17.875553131103516, + "ranking_idealized": 0.5708333253860474, + "ranking_idealized_expo": 0.5208333134651184, + "ranking_simple": 0.5708333253860474, + "regularize": 0.043413810431957245, + "step": 1580, + "wo_beta": 17.040142059326172 + }, + { + "dpo_loss": 0.5295437574386597, + "epoch": 4.492205951818612, + "grad_norm": 13164.392376509253, + "learning_rate": 2.98154447387739e-08, + "logits": -1.318244457244873, + "logps": -81.6868667602539, + "loss": 18.7186, + "objective": 14.95705509185791, + "ranking_idealized": 0.6041666865348816, + "ranking_idealized_expo": 0.550000011920929, + "ranking_simple": 0.5958333611488342, + "regularize": 0.03320387750864029, + "step": 1585, + "wo_beta": 17.157299041748047 + }, + { + "dpo_loss": 0.527228593826294, + "epoch": 4.506376948512045, + "grad_norm": 12124.025371614676, + "learning_rate": 2.8151989462033787e-08, + "logits": -1.1829341650009155, + "logps": -83.83565521240234, + "loss": 18.9673, + "objective": 16.773042678833008, + "ranking_idealized": 0.5791666507720947, + "ranking_idealized_expo": 0.5083333253860474, + "ranking_simple": 0.5833333134651184, + "regularize": 0.042777713388204575, + "step": 1590, + "wo_beta": 16.952783584594727 + }, + { + "dpo_loss": 0.5242041349411011, + "epoch": 4.52054794520548, + "grad_norm": 11927.935212297323, + "learning_rate": 2.653493525244721e-08, + "logits": -1.2492893934249878, + "logps": -82.36843872070312, + "loss": 17.1521, + "objective": 18.047021865844727, + "ranking_idealized": 0.6208333373069763, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.6041666865348816, + "regularize": 0.04608127102255821, + "step": 1595, + "wo_beta": 15.581862449645996 + }, + { + "dpo_loss": 0.5243973135948181, + "epoch": 4.534718941898913, + "grad_norm": 12379.266840127142, + "learning_rate": 2.4964441129527335e-08, + "logits": -1.2830615043640137, + "logps": -82.28716278076172, + "loss": 20.4794, + "objective": 17.599641799926758, + "ranking_idealized": 0.6333333253860474, + "ranking_idealized_expo": 0.5416666865348816, + "ranking_simple": 0.6333333253860474, + "regularize": 0.042389459908008575, + "step": 1600, + "wo_beta": 16.58247184753418 + }, + { + "epoch": 4.534718941898913, + "eval_dpo_loss": 0.6798632740974426, + "eval_logits": -1.2950247526168823, + "eval_logps": -88.43807983398438, + "eval_loss": 187.9060821533203, + "eval_objective": 183.82763671875, + "eval_ranking_idealized": 0.6024844646453857, + "eval_ranking_idealized_expo": 0.5232919454574585, + "eval_ranking_simple": 0.5331262946128845, + "eval_regularize": 0.4167550504207611, + "eval_runtime": 510.5256, + "eval_samples_per_second": 11.341, + "eval_steps_per_second": 0.946, + "eval_wo_beta": 16.000411987304688, + "step": 1600 + }, + { + "dpo_loss": 0.5349418520927429, + "epoch": 4.548889938592348, + "grad_norm": 13428.292487446544, + "learning_rate": 2.3440661534114557e-08, + "logits": -1.2768018245697021, + "logps": -83.37641906738281, + "loss": 17.8123, + "objective": 14.984145164489746, + "ranking_idealized": 0.5541666746139526, + "ranking_idealized_expo": 0.4749999940395355, + "ranking_simple": 0.550000011920929, + "regularize": 0.0358855277299881, + "step": 1605, + "wo_beta": 16.76499366760254 + }, + { + "dpo_loss": 0.5164486169815063, + "epoch": 4.563060935285781, + "grad_norm": 12892.913912379732, + "learning_rate": 2.1963746313188757e-08, + "logits": -1.249220371246338, + "logps": -81.78076171875, + "loss": 17.1832, + "objective": 20.233509063720703, + "ranking_idealized": 0.5833333134651184, + "ranking_idealized_expo": 0.47083333134651184, + "ranking_simple": 0.574999988079071, + "regularize": 0.048116158694028854, + "step": 1610, + "wo_beta": 15.82449722290039 + }, + { + "dpo_loss": 0.5349178314208984, + "epoch": 4.577231931979216, + "grad_norm": 12493.396334435913, + "learning_rate": 2.053384070513353e-08, + "logits": -1.2513455152511597, + "logps": -80.9568862915039, + "loss": 18.7751, + "objective": 20.071449279785156, + "ranking_idealized": 0.5291666388511658, + "ranking_idealized_expo": 0.46666666865348816, + "ranking_simple": 0.5249999761581421, + "regularize": 0.04651705548167229, + "step": 1615, + "wo_beta": 14.514166831970215 + }, + { + "dpo_loss": 0.5360397100448608, + "epoch": 4.59140292867265, + "grad_norm": 12311.497249141552, + "learning_rate": 1.915108532545351e-08, + "logits": -1.3831831216812134, + "logps": -81.701904296875, + "loss": 16.5863, + "objective": 13.440372467041016, + "ranking_idealized": 0.574999988079071, + "ranking_idealized_expo": 0.5083333253860474, + "ranking_simple": 0.5791666507720947, + "regularize": 0.03207644820213318, + "step": 1620, + "wo_beta": 16.37172508239746 + }, + { + "dpo_loss": 0.5228015780448914, + "epoch": 4.605573925366084, + "grad_norm": 12520.657843831757, + "learning_rate": 1.781561615294652e-08, + "logits": -1.3208075761795044, + "logps": -82.14677429199219, + "loss": 17.2643, + "objective": 16.142719268798828, + "ranking_idealized": 0.6791666746139526, + "ranking_idealized_expo": 0.5916666388511658, + "ranking_simple": 0.6875, + "regularize": 0.03792344033718109, + "step": 1625, + "wo_beta": 15.518718719482422 + }, + { + "dpo_loss": 0.5221564173698425, + "epoch": 4.619744922059518, + "grad_norm": 11926.649260036038, + "learning_rate": 1.6527564516331638e-08, + "logits": -1.1876474618911743, + "logps": -82.74609375, + "loss": 17.5255, + "objective": 16.14875602722168, + "ranking_idealized": 0.6708333492279053, + "ranking_idealized_expo": 0.5874999761581421, + "ranking_simple": 0.6708333492279053, + "regularize": 0.039545025676488876, + "step": 1630, + "wo_beta": 17.103187561035156 + }, + { + "dpo_loss": 0.5277553796768188, + "epoch": 4.633915918752952, + "grad_norm": 12387.92239266219, + "learning_rate": 1.5287057081333988e-08, + "logits": -1.303261399269104, + "logps": -82.68264770507812, + "loss": 17.5837, + "objective": 18.295978546142578, + "ranking_idealized": 0.625, + "ranking_idealized_expo": 0.550000011920929, + "ranking_simple": 0.6291666626930237, + "regularize": 0.04383000358939171, + "step": 1635, + "wo_beta": 16.273590087890625 + }, + { + "dpo_loss": 0.5235089063644409, + "epoch": 4.648086915446386, + "grad_norm": 13550.591286437839, + "learning_rate": 1.4094215838229172e-08, + "logits": -1.3104770183563232, + "logps": -81.95443725585938, + "loss": 16.0714, + "objective": 18.62168312072754, + "ranking_idealized": 0.5874999761581421, + "ranking_idealized_expo": 0.5041666626930237, + "ranking_simple": 0.5958333611488342, + "regularize": 0.044566281139850616, + "step": 1640, + "wo_beta": 14.541909217834473 + }, + { + "dpo_loss": 0.5459772944450378, + "epoch": 4.662257912139821, + "grad_norm": 12589.25993273719, + "learning_rate": 1.2949158089846368e-08, + "logits": -1.2789607048034668, + "logps": -80.86375427246094, + "loss": 15.9698, + "objective": 15.747620582580566, + "ranking_idealized": 0.5708333253860474, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.5708333253860474, + "regularize": 0.03958037868142128, + "step": 1645, + "wo_beta": 16.792747497558594 + }, + { + "dpo_loss": 0.5279684066772461, + "epoch": 4.6764289088332545, + "grad_norm": 11986.458011152894, + "learning_rate": 1.1851996440033318e-08, + "logits": -1.224802017211914, + "logps": -81.75625610351562, + "loss": 17.2115, + "objective": 18.047420501708984, + "ranking_idealized": 0.6000000238418579, + "ranking_idealized_expo": 0.5416666865348816, + "ranking_simple": 0.5958333611488342, + "regularize": 0.04608708992600441, + "step": 1650, + "wo_beta": 17.34733772277832 + }, + { + "epoch": 4.6764289088332545, + "eval_dpo_loss": 0.6798492074012756, + "eval_logits": -1.293831467628479, + "eval_logps": -88.41741943359375, + "eval_loss": 187.95040893554688, + "eval_objective": 183.85658264160156, + "eval_ranking_idealized": 0.6024844646453857, + "eval_ranking_idealized_expo": 0.5232919454574585, + "eval_ranking_simple": 0.532608687877655, + "eval_regularize": 0.416820764541626, + "eval_runtime": 510.4095, + "eval_samples_per_second": 11.344, + "eval_steps_per_second": 0.946, + "eval_wo_beta": 15.994239807128906, + "step": 1650 + }, + { + "dpo_loss": 0.5183621048927307, + "epoch": 4.690599905526689, + "grad_norm": 12001.298881228338, + "learning_rate": 1.0802838782582535e-08, + "logits": -1.2560440301895142, + "logps": -81.986083984375, + "loss": 18.141, + "objective": 16.23440170288086, + "ranking_idealized": 0.5958333611488342, + "ranking_idealized_expo": 0.5291666388511658, + "ranking_simple": 0.6041666865348816, + "regularize": 0.0418228842318058, + "step": 1655, + "wo_beta": 14.709871292114258 + }, + { + "dpo_loss": 0.5308786034584045, + "epoch": 4.7047709022201225, + "grad_norm": 12471.919482995943, + "learning_rate": 9.801788290621505e-09, + "logits": -1.242910623550415, + "logps": -82.37290954589844, + "loss": 19.8764, + "objective": 21.41328239440918, + "ranking_idealized": 0.6083333492279053, + "ranking_idealized_expo": 0.5208333134651184, + "ranking_simple": 0.6083333492279053, + "regularize": 0.04805602878332138, + "step": 1660, + "wo_beta": 16.161657333374023 + }, + { + "dpo_loss": 0.517335832118988, + "epoch": 4.718941898913557, + "grad_norm": 12326.624130987268, + "learning_rate": 8.848943406466468e-09, + "logits": -1.2066967487335205, + "logps": -81.63778686523438, + "loss": 17.9054, + "objective": 18.123321533203125, + "ranking_idealized": 0.550000011920929, + "ranking_idealized_expo": 0.47083333134651184, + "ranking_simple": 0.550000011920929, + "regularize": 0.04272852838039398, + "step": 1665, + "wo_beta": 15.821066856384277 + }, + { + "dpo_loss": 0.534516453742981, + "epoch": 4.733112895606991, + "grad_norm": 13120.765521158273, + "learning_rate": 7.944397831941951e-09, + "logits": -1.3101601600646973, + "logps": -83.31844329833984, + "loss": 15.3296, + "objective": 14.363126754760742, + "ranking_idealized": 0.5625, + "ranking_idealized_expo": 0.47083333134651184, + "ranking_simple": 0.5541666746139526, + "regularize": 0.0361357256770134, + "step": 1670, + "wo_beta": 15.148748397827148 + }, + { + "dpo_loss": 0.5110668540000916, + "epoch": 4.747283892300425, + "grad_norm": 12106.475879366208, + "learning_rate": 7.088240519165955e-09, + "logits": -1.2715505361557007, + "logps": -83.65233612060547, + "loss": 18.7232, + "objective": 22.049705505371094, + "ranking_idealized": 0.5708333253860474, + "ranking_idealized_expo": 0.4791666567325592, + "ranking_simple": 0.5583333373069763, + "regularize": 0.04454280436038971, + "step": 1675, + "wo_beta": 16.55459213256836 + }, + { + "dpo_loss": 0.5210347771644592, + "epoch": 4.7614548889938595, + "grad_norm": 13458.285236730762, + "learning_rate": 6.280555661802856e-09, + "logits": -1.2422146797180176, + "logps": -82.28036499023438, + "loss": 16.7571, + "objective": 16.147016525268555, + "ranking_idealized": 0.637499988079071, + "ranking_idealized_expo": 0.5833333134651184, + "ranking_simple": 0.637499988079071, + "regularize": 0.03472811356186867, + "step": 1680, + "wo_beta": 17.648740768432617 + }, + { + "dpo_loss": 0.5201699733734131, + "epoch": 4.775625885687293, + "grad_norm": 13687.817133347355, + "learning_rate": 5.521422686783294e-09, + "logits": -1.308603286743164, + "logps": -82.1572265625, + "loss": 17.374, + "objective": 18.0618839263916, + "ranking_idealized": 0.5791666507720947, + "ranking_idealized_expo": 0.4583333432674408, + "ranking_simple": 0.5625, + "regularize": 0.039491456001996994, + "step": 1685, + "wo_beta": 14.411401748657227 + }, + { + "dpo_loss": 0.5319506525993347, + "epoch": 4.7897968823807275, + "grad_norm": 11765.631080020812, + "learning_rate": 4.810916246494157e-09, + "logits": -1.3420146703720093, + "logps": -81.82181549072266, + "loss": 16.2518, + "objective": 15.689167976379395, + "ranking_idealized": 0.6291666626930237, + "ranking_idealized_expo": 0.5166666507720947, + "ranking_simple": 0.612500011920929, + "regularize": 0.03873560577630997, + "step": 1690, + "wo_beta": 15.535360336303711 + }, + { + "dpo_loss": 0.5226943492889404, + "epoch": 4.803967879074161, + "grad_norm": 15212.188996211064, + "learning_rate": 4.149106211436659e-09, + "logits": -1.205290675163269, + "logps": -81.14673614501953, + "loss": 15.6316, + "objective": 14.224554061889648, + "ranking_idealized": 0.6333333253860474, + "ranking_idealized_expo": 0.5708333253860474, + "ranking_simple": 0.6333333253860474, + "regularize": 0.033357344567775726, + "step": 1695, + "wo_beta": 17.657291412353516 + }, + { + "dpo_loss": 0.5265616178512573, + "epoch": 4.818138875767596, + "grad_norm": 14041.074803893325, + "learning_rate": 3.5360576633558513e-09, + "logits": -1.3079345226287842, + "logps": -80.5920639038086, + "loss": 16.5799, + "objective": 17.133312225341797, + "ranking_idealized": 0.625, + "ranking_idealized_expo": 0.5333333611488342, + "ranking_simple": 0.6208333373069763, + "regularize": 0.042179401963949203, + "step": 1700, + "wo_beta": 14.612165451049805 + }, + { + "epoch": 4.818138875767596, + "eval_dpo_loss": 0.6798617839813232, + "eval_logits": -1.2946054935455322, + "eval_logps": -88.42201232910156, + "eval_loss": 187.93597412109375, + "eval_objective": 183.8405303955078, + "eval_ranking_idealized": 0.6024844646453857, + "eval_ranking_idealized_expo": 0.5232919454574585, + "eval_ranking_simple": 0.532608687877655, + "eval_regularize": 0.4168170392513275, + "eval_runtime": 537.7382, + "eval_samples_per_second": 10.767, + "eval_steps_per_second": 0.898, + "eval_wo_beta": 15.996342658996582, + "step": 1700 + }, + { + "dpo_loss": 0.5323117971420288, + "epoch": 4.83230987246103, + "grad_norm": 12547.78573915316, + "learning_rate": 2.9718308888401767e-09, + "logits": -1.3183315992355347, + "logps": -81.7763442993164, + "loss": 16.0513, + "objective": 17.510692596435547, + "ranking_idealized": 0.6791666746139526, + "ranking_idealized_expo": 0.5833333134651184, + "ranking_simple": 0.6708333492279053, + "regularize": 0.040996309369802475, + "step": 1705, + "wo_beta": 17.88062858581543 + }, + { + "dpo_loss": 0.5359232425689697, + "epoch": 4.846480869154464, + "grad_norm": 13302.316035438349, + "learning_rate": 2.4564813733932155e-09, + "logits": -1.316437840461731, + "logps": -81.5803451538086, + "loss": 17.058, + "objective": 15.084990501403809, + "ranking_idealized": 0.5916666388511658, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.5874999761581421, + "regularize": 0.0330289825797081, + "step": 1710, + "wo_beta": 14.95897102355957 + }, + { + "dpo_loss": 0.5298423171043396, + "epoch": 4.860651865847898, + "grad_norm": 13131.732232168924, + "learning_rate": 1.9900597959770505e-09, + "logits": -1.2239762544631958, + "logps": -80.93972778320312, + "loss": 15.5353, + "objective": 14.398134231567383, + "ranking_idealized": 0.5874999761581421, + "ranking_idealized_expo": 0.5208333134651184, + "ranking_simple": 0.5833333134651184, + "regularize": 0.03392880782485008, + "step": 1715, + "wo_beta": 16.470539093017578 + }, + { + "dpo_loss": 0.5080859065055847, + "epoch": 4.874822862541333, + "grad_norm": 13218.33236233331, + "learning_rate": 1.5726120240288631e-09, + "logits": -1.2538625001907349, + "logps": -80.96495819091797, + "loss": 16.0016, + "objective": 18.5091552734375, + "ranking_idealized": 0.612500011920929, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.5874999761581421, + "regularize": 0.045930005609989166, + "step": 1720, + "wo_beta": 17.185333251953125 + }, + { + "dpo_loss": 0.5118470788002014, + "epoch": 4.888993859234766, + "grad_norm": 12268.487941087904, + "learning_rate": 1.2041791089499875e-09, + "logits": -1.279910683631897, + "logps": -79.85582733154297, + "loss": 13.4289, + "objective": 14.366524696350098, + "ranking_idealized": 0.6416666507720947, + "ranking_idealized_expo": 0.5708333253860474, + "ranking_simple": 0.6416666507720947, + "regularize": 0.03632321581244469, + "step": 1725, + "wo_beta": 17.114274978637695 + }, + { + "dpo_loss": 0.5286470055580139, + "epoch": 4.903164855928201, + "grad_norm": 11950.336190164535, + "learning_rate": 8.847972820693051e-10, + "logits": -1.2914131879806519, + "logps": -80.19400787353516, + "loss": 16.9458, + "objective": 18.679357528686523, + "ranking_idealized": 0.5416666865348816, + "ranking_idealized_expo": 0.4416666626930237, + "ranking_simple": 0.5249999761581421, + "regularize": 0.04655119404196739, + "step": 1730, + "wo_beta": 14.276873588562012 + }, + { + "dpo_loss": 0.528618574142456, + "epoch": 4.917335852621634, + "grad_norm": 12737.302460928488, + "learning_rate": 6.144979510802062e-10, + "logits": -1.4132698774337769, + "logps": -82.34892272949219, + "loss": 18.3815, + "objective": 18.776357650756836, + "ranking_idealized": 0.6458333134651184, + "ranking_idealized_expo": 0.5166666507720947, + "ranking_simple": 0.637499988079071, + "regularize": 0.047753263264894485, + "step": 1735, + "wo_beta": 15.833959579467773 + }, + { + "dpo_loss": 0.5292457938194275, + "epoch": 4.931506849315069, + "grad_norm": 13241.609695831672, + "learning_rate": 3.933076969516724e-10, + "logits": -1.2396830320358276, + "logps": -81.28510284423828, + "loss": 15.2755, + "objective": 15.8608980178833, + "ranking_idealized": 0.5874999761581421, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.5916666388511658, + "regularize": 0.041682373732328415, + "step": 1740, + "wo_beta": 15.47945499420166 + }, + { + "dpo_loss": 0.5308272838592529, + "epoch": 4.945677846008502, + "grad_norm": 12128.166835896209, + "learning_rate": 2.212482713149222e-10, + "logits": -1.2960669994354248, + "logps": -80.84746551513672, + "loss": 15.3037, + "objective": 12.663678169250488, + "ranking_idealized": 0.6083333492279053, + "ranking_idealized_expo": 0.5166666507720947, + "ranking_simple": 0.6041666865348816, + "regularize": 0.03369910642504692, + "step": 1745, + "wo_beta": 16.19184112548828 + }, + { + "dpo_loss": 0.5277208089828491, + "epoch": 4.959848842701937, + "grad_norm": 12921.297125323947, + "learning_rate": 9.833659432367803e-11, + "logits": -1.2565745115280151, + "logps": -82.744873046875, + "loss": 16.689, + "objective": 16.856407165527344, + "ranking_idealized": 0.5874999761581421, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.5874999761581421, + "regularize": 0.04240218922495842, + "step": 1750, + "wo_beta": 16.752824783325195 + }, + { + "epoch": 4.959848842701937, + "eval_dpo_loss": 0.6798657774925232, + "eval_logits": -1.2945247888565063, + "eval_logps": -88.4161605834961, + "eval_loss": 187.94732666015625, + "eval_objective": 183.85096740722656, + "eval_ranking_idealized": 0.6024844646453857, + "eval_ranking_idealized_expo": 0.5232919454574585, + "eval_ranking_simple": 0.532608687877655, + "eval_regularize": 0.4168415367603302, + "eval_runtime": 526.9139, + "eval_samples_per_second": 10.989, + "eval_steps_per_second": 0.917, + "eval_wo_beta": 15.995292663574219, + "step": 1750 + }, + { + "dpo_loss": 0.5391930937767029, + "epoch": 4.974019839395371, + "grad_norm": 11466.754753582296, + "learning_rate": 2.4584752990997048e-11, + "logits": -1.29628324508667, + "logps": -82.2157211303711, + "loss": 14.7634, + "objective": 15.14171314239502, + "ranking_idealized": 0.6041666865348816, + "ranking_idealized_expo": 0.5333333611488342, + "ranking_simple": 0.6041666865348816, + "regularize": 0.04220600798726082, + "step": 1755, + "wo_beta": 16.21957778930664 + }, + { + "dpo_loss": 0.525145411491394, + "epoch": 4.988190836088805, + "grad_norm": 12278.79483067917, + "learning_rate": 0.0, + "logits": -1.2211812734603882, + "logps": -82.23439025878906, + "loss": 15.7539, + "objective": 15.124394416809082, + "ranking_idealized": 0.6291666626930237, + "ranking_idealized_expo": 0.5416666865348816, + "ranking_simple": 0.6208333373069763, + "regularize": 0.035570546984672546, + "step": 1760, + "wo_beta": 17.11547088623047 + }, + { + "epoch": 4.988190836088805, + "step": 1760, + "total_flos": 0.0, + "train_loss": 67.88850653388283, + "train_runtime": 74214.1269, + "train_samples_per_second": 3.423, + "train_steps_per_second": 0.024 + } + ], + "logging_steps": 5, + "max_steps": 1760, + "num_input_tokens_seen": 0, + "num_train_epochs": 5, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +}