|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 4.988190836088805, |
|
"eval_steps": 50, |
|
"global_step": 1760, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"dpo_loss": 0.6931471824645996, |
|
"epoch": 0.002834199338686821, |
|
"grad_norm": 17675.585799054454, |
|
"learning_rate": 5.681818181818181e-09, |
|
"logits": -1.2867579460144043, |
|
"logps": -84.34933471679688, |
|
"loss": 169.5214, |
|
"objective": 153.4677734375, |
|
"ranking_idealized": 0.6458333134651184, |
|
"ranking_idealized_expo": 0.5833333134651184, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.3618059456348419, |
|
"step": 1, |
|
"wo_beta": 14.83154582977295 |
|
}, |
|
{ |
|
"dpo_loss": 0.6930850148200989, |
|
"epoch": 0.014170996693434105, |
|
"grad_norm": 16809.76979726276, |
|
"learning_rate": 2.8409090909090908e-08, |
|
"logits": -1.4291090965270996, |
|
"logps": -83.86122131347656, |
|
"loss": 181.7047, |
|
"objective": 168.55690002441406, |
|
"ranking_idealized": 0.5625, |
|
"ranking_idealized_expo": 0.4895833432674408, |
|
"ranking_simple": 0.4895833432674408, |
|
"regularize": 0.4036543667316437, |
|
"step": 5, |
|
"wo_beta": 16.679672241210938 |
|
}, |
|
{ |
|
"dpo_loss": 0.6930658221244812, |
|
"epoch": 0.02834199338686821, |
|
"grad_norm": 18604.90219885959, |
|
"learning_rate": 5.6818181818181815e-08, |
|
"logits": -1.4008290767669678, |
|
"logps": -84.83370971679688, |
|
"loss": 177.0775, |
|
"objective": 170.34666442871094, |
|
"ranking_idealized": 0.6708333492279053, |
|
"ranking_idealized_expo": 0.5833333134651184, |
|
"ranking_simple": 0.5791666507720947, |
|
"regularize": 0.40391480922698975, |
|
"step": 10, |
|
"wo_beta": 15.222626686096191 |
|
}, |
|
{ |
|
"dpo_loss": 0.6919592618942261, |
|
"epoch": 0.042512990080302314, |
|
"grad_norm": 17866.85697228391, |
|
"learning_rate": 8.522727272727271e-08, |
|
"logits": -1.5378918647766113, |
|
"logps": -84.51753234863281, |
|
"loss": 178.9384, |
|
"objective": 187.3764190673828, |
|
"ranking_idealized": 0.6499999761581421, |
|
"ranking_idealized_expo": 0.5708333253860474, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.44199517369270325, |
|
"step": 15, |
|
"wo_beta": 15.720404624938965 |
|
}, |
|
{ |
|
"dpo_loss": 0.6915046572685242, |
|
"epoch": 0.05668398677373642, |
|
"grad_norm": 17562.319543911097, |
|
"learning_rate": 1.1363636363636363e-07, |
|
"logits": -1.3619273900985718, |
|
"logps": -83.62174224853516, |
|
"loss": 185.6226, |
|
"objective": 203.74549865722656, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.48750001192092896, |
|
"regularize": 0.4415209889411926, |
|
"step": 20, |
|
"wo_beta": 16.53324317932129 |
|
}, |
|
{ |
|
"dpo_loss": 0.6925450563430786, |
|
"epoch": 0.07085498346717052, |
|
"grad_norm": 16842.244030261496, |
|
"learning_rate": 1.4204545454545455e-07, |
|
"logits": -1.369999647140503, |
|
"logps": -83.69309997558594, |
|
"loss": 181.9124, |
|
"objective": 172.8611297607422, |
|
"ranking_idealized": 0.6458333134651184, |
|
"ranking_idealized_expo": 0.5583333373069763, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.4071991741657257, |
|
"step": 25, |
|
"wo_beta": 15.610980987548828 |
|
}, |
|
{ |
|
"dpo_loss": 0.6898643970489502, |
|
"epoch": 0.08502598016060463, |
|
"grad_norm": 14842.574916726253, |
|
"learning_rate": 1.7045454545454543e-07, |
|
"logits": -1.432415246963501, |
|
"logps": -83.48454284667969, |
|
"loss": 181.3521, |
|
"objective": 176.5283203125, |
|
"ranking_idealized": 0.5541666746139526, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.48750001192092896, |
|
"regularize": 0.4289272427558899, |
|
"step": 30, |
|
"wo_beta": 17.00359344482422 |
|
}, |
|
{ |
|
"dpo_loss": 0.6909116506576538, |
|
"epoch": 0.09919697685403873, |
|
"grad_norm": 16058.543561158533, |
|
"learning_rate": 1.9886363636363636e-07, |
|
"logits": -1.4108096361160278, |
|
"logps": -82.71344757080078, |
|
"loss": 183.8373, |
|
"objective": 173.34014892578125, |
|
"ranking_idealized": 0.6291666626930237, |
|
"ranking_idealized_expo": 0.5375000238418579, |
|
"ranking_simple": 0.5416666865348816, |
|
"regularize": 0.38034114241600037, |
|
"step": 35, |
|
"wo_beta": 16.153711318969727 |
|
}, |
|
{ |
|
"dpo_loss": 0.6891058683395386, |
|
"epoch": 0.11336797354747284, |
|
"grad_norm": 17014.23191466682, |
|
"learning_rate": 2.2727272727272726e-07, |
|
"logits": -1.402835488319397, |
|
"logps": -83.338134765625, |
|
"loss": 187.3552, |
|
"objective": 182.01144409179688, |
|
"ranking_idealized": 0.5708333253860474, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 0.4058202803134918, |
|
"step": 40, |
|
"wo_beta": 14.262288093566895 |
|
}, |
|
{ |
|
"dpo_loss": 0.684747040271759, |
|
"epoch": 0.12753897024090693, |
|
"grad_norm": 15346.751264548873, |
|
"learning_rate": 2.5568181818181816e-07, |
|
"logits": -1.419245958328247, |
|
"logps": -83.82090759277344, |
|
"loss": 171.4244, |
|
"objective": 183.38385009765625, |
|
"ranking_idealized": 0.5958333611488342, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 0.4130297601222992, |
|
"step": 45, |
|
"wo_beta": 14.29751968383789 |
|
}, |
|
{ |
|
"dpo_loss": 0.6823928356170654, |
|
"epoch": 0.14170996693434104, |
|
"grad_norm": 16514.084391847242, |
|
"learning_rate": 2.840909090909091e-07, |
|
"logits": -1.4350523948669434, |
|
"logps": -84.8818359375, |
|
"loss": 181.5404, |
|
"objective": 186.33828735351562, |
|
"ranking_idealized": 0.5916666388511658, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.5041666626930237, |
|
"regularize": 0.4348808526992798, |
|
"step": 50, |
|
"wo_beta": 15.604106903076172 |
|
}, |
|
{ |
|
"epoch": 0.14170996693434104, |
|
"eval_dpo_loss": 0.6889749765396118, |
|
"eval_logits": -1.4233466386795044, |
|
"eval_logps": -90.91888427734375, |
|
"eval_loss": 182.35984802246094, |
|
"eval_objective": 180.32789611816406, |
|
"eval_ranking_idealized": 0.6024844646453857, |
|
"eval_ranking_idealized_expo": 0.5232919454574585, |
|
"eval_ranking_simple": 0.5263975262641907, |
|
"eval_regularize": 0.40881022810935974, |
|
"eval_runtime": 472.6615, |
|
"eval_samples_per_second": 12.25, |
|
"eval_steps_per_second": 1.022, |
|
"eval_wo_beta": 16.297378540039062, |
|
"step": 50 |
|
}, |
|
{ |
|
"dpo_loss": 0.6824547052383423, |
|
"epoch": 0.15588096362777515, |
|
"grad_norm": 17699.4671939912, |
|
"learning_rate": 3.1249999999999997e-07, |
|
"logits": -1.3973591327667236, |
|
"logps": -84.62629699707031, |
|
"loss": 170.6542, |
|
"objective": 174.4287872314453, |
|
"ranking_idealized": 0.5583333373069763, |
|
"ranking_idealized_expo": 0.49166667461395264, |
|
"ranking_simple": 0.4833333194255829, |
|
"regularize": 0.3742547035217285, |
|
"step": 55, |
|
"wo_beta": 15.11441421508789 |
|
}, |
|
{ |
|
"dpo_loss": 0.6806777715682983, |
|
"epoch": 0.17005196032120926, |
|
"grad_norm": 16100.449715737686, |
|
"learning_rate": 3.4090909090909085e-07, |
|
"logits": -1.329344391822815, |
|
"logps": -85.16632843017578, |
|
"loss": 174.0689, |
|
"objective": 174.0922393798828, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5291666388511658, |
|
"regularize": 0.40893226861953735, |
|
"step": 60, |
|
"wo_beta": 14.438634872436523 |
|
}, |
|
{ |
|
"dpo_loss": 0.6708158254623413, |
|
"epoch": 0.18422295701464336, |
|
"grad_norm": 16302.471134333027, |
|
"learning_rate": 3.693181818181818e-07, |
|
"logits": -1.428707480430603, |
|
"logps": -82.03670501708984, |
|
"loss": 172.5426, |
|
"objective": 161.09950256347656, |
|
"ranking_idealized": 0.637499988079071, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.5333333611488342, |
|
"regularize": 0.36983728408813477, |
|
"step": 65, |
|
"wo_beta": 15.067524909973145 |
|
}, |
|
{ |
|
"dpo_loss": 0.6730712652206421, |
|
"epoch": 0.19839395370807747, |
|
"grad_norm": 15662.31236602018, |
|
"learning_rate": 3.977272727272727e-07, |
|
"logits": -1.4695442914962769, |
|
"logps": -84.42548370361328, |
|
"loss": 174.7341, |
|
"objective": 175.19439697265625, |
|
"ranking_idealized": 0.5916666388511658, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.5375000238418579, |
|
"regularize": 0.4013313353061676, |
|
"step": 70, |
|
"wo_beta": 15.375307083129883 |
|
}, |
|
{ |
|
"dpo_loss": 0.6741575002670288, |
|
"epoch": 0.21256495040151158, |
|
"grad_norm": 18686.585950552704, |
|
"learning_rate": 4.2613636363636364e-07, |
|
"logits": -1.393960952758789, |
|
"logps": -84.16697692871094, |
|
"loss": 174.6645, |
|
"objective": 164.988525390625, |
|
"ranking_idealized": 0.6083333492279053, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5208333134651184, |
|
"regularize": 0.38378840684890747, |
|
"step": 75, |
|
"wo_beta": 15.075023651123047 |
|
}, |
|
{ |
|
"dpo_loss": 0.669329822063446, |
|
"epoch": 0.22673594709494568, |
|
"grad_norm": 18561.894559157903, |
|
"learning_rate": 4.545454545454545e-07, |
|
"logits": -1.4905359745025635, |
|
"logps": -83.4140396118164, |
|
"loss": 169.0661, |
|
"objective": 177.64450073242188, |
|
"ranking_idealized": 0.6291666626930237, |
|
"ranking_idealized_expo": 0.5291666388511658, |
|
"ranking_simple": 0.5458333492279053, |
|
"regularize": 0.4245981276035309, |
|
"step": 80, |
|
"wo_beta": 16.333538055419922 |
|
}, |
|
{ |
|
"dpo_loss": 0.6659378409385681, |
|
"epoch": 0.2409069437883798, |
|
"grad_norm": 15942.37358833672, |
|
"learning_rate": 4.829545454545455e-07, |
|
"logits": -1.4715605974197388, |
|
"logps": -83.54389190673828, |
|
"loss": 171.1414, |
|
"objective": 182.98324584960938, |
|
"ranking_idealized": 0.625, |
|
"ranking_idealized_expo": 0.550000011920929, |
|
"ranking_simple": 0.5541666746139526, |
|
"regularize": 0.4137464463710785, |
|
"step": 85, |
|
"wo_beta": 15.189921379089355 |
|
}, |
|
{ |
|
"dpo_loss": 0.6600526571273804, |
|
"epoch": 0.25507794048181387, |
|
"grad_norm": 16315.909705896804, |
|
"learning_rate": 5.113636363636363e-07, |
|
"logits": -1.571618914604187, |
|
"logps": -84.54931640625, |
|
"loss": 168.3022, |
|
"objective": 174.0519561767578, |
|
"ranking_idealized": 0.612500011920929, |
|
"ranking_idealized_expo": 0.5291666388511658, |
|
"ranking_simple": 0.550000011920929, |
|
"regularize": 0.39906471967697144, |
|
"step": 90, |
|
"wo_beta": 15.517964363098145 |
|
}, |
|
{ |
|
"dpo_loss": 0.6545840501785278, |
|
"epoch": 0.269248937175248, |
|
"grad_norm": 17445.518244074756, |
|
"learning_rate": 5.397727272727273e-07, |
|
"logits": -1.49222731590271, |
|
"logps": -84.54743194580078, |
|
"loss": 168.7617, |
|
"objective": 175.46524047851562, |
|
"ranking_idealized": 0.5583333373069763, |
|
"ranking_idealized_expo": 0.4791666567325592, |
|
"ranking_simple": 0.512499988079071, |
|
"regularize": 0.4040308892726898, |
|
"step": 95, |
|
"wo_beta": 16.429697036743164 |
|
}, |
|
{ |
|
"dpo_loss": 0.6560600399971008, |
|
"epoch": 0.2834199338686821, |
|
"grad_norm": 16343.369412455128, |
|
"learning_rate": 5.681818181818182e-07, |
|
"logits": -1.370269775390625, |
|
"logps": -83.43912506103516, |
|
"loss": 156.9096, |
|
"objective": 160.82919311523438, |
|
"ranking_idealized": 0.5958333611488342, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.5, |
|
"regularize": 0.3631521761417389, |
|
"step": 100, |
|
"wo_beta": 15.597589492797852 |
|
}, |
|
{ |
|
"epoch": 0.2834199338686821, |
|
"eval_dpo_loss": 0.6855058073997498, |
|
"eval_logits": -1.470232367515564, |
|
"eval_logps": -91.45095825195312, |
|
"eval_loss": 181.86407470703125, |
|
"eval_objective": 180.31504821777344, |
|
"eval_ranking_idealized": 0.6024844646453857, |
|
"eval_ranking_idealized_expo": 0.5232919454574585, |
|
"eval_ranking_simple": 0.531573474407196, |
|
"eval_regularize": 0.41007429361343384, |
|
"eval_runtime": 479.0094, |
|
"eval_samples_per_second": 12.087, |
|
"eval_steps_per_second": 1.008, |
|
"eval_wo_beta": 16.373079299926758, |
|
"step": 100 |
|
}, |
|
{ |
|
"dpo_loss": 0.6687707901000977, |
|
"epoch": 0.2975909305621162, |
|
"grad_norm": 20737.972285358017, |
|
"learning_rate": 5.965909090909091e-07, |
|
"logits": -1.572224736213684, |
|
"logps": -86.08336639404297, |
|
"loss": 161.5898, |
|
"objective": 164.3712615966797, |
|
"ranking_idealized": 0.625, |
|
"ranking_idealized_expo": 0.5291666388511658, |
|
"ranking_simple": 0.5416666865348816, |
|
"regularize": 0.3831757605075836, |
|
"step": 105, |
|
"wo_beta": 14.803333282470703 |
|
}, |
|
{ |
|
"dpo_loss": 0.6561999917030334, |
|
"epoch": 0.3117619272555503, |
|
"grad_norm": 16188.43984842568, |
|
"learning_rate": 6.249999999999999e-07, |
|
"logits": -1.4707790613174438, |
|
"logps": -84.74868774414062, |
|
"loss": 158.3984, |
|
"objective": 159.52267456054688, |
|
"ranking_idealized": 0.5874999761581421, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 0.38105159997940063, |
|
"step": 110, |
|
"wo_beta": 15.120772361755371 |
|
}, |
|
{ |
|
"dpo_loss": 0.6603504419326782, |
|
"epoch": 0.32593292394898443, |
|
"grad_norm": 16290.29619326225, |
|
"learning_rate": 6.534090909090909e-07, |
|
"logits": -1.4433757066726685, |
|
"logps": -83.40989685058594, |
|
"loss": 149.8614, |
|
"objective": 154.2146453857422, |
|
"ranking_idealized": 0.6208333373069763, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.5375000238418579, |
|
"regularize": 0.3704533576965332, |
|
"step": 115, |
|
"wo_beta": 16.445148468017578 |
|
}, |
|
{ |
|
"dpo_loss": 0.6463068127632141, |
|
"epoch": 0.3401039206424185, |
|
"grad_norm": 15623.51190253056, |
|
"learning_rate": 6.818181818181817e-07, |
|
"logits": -1.4353134632110596, |
|
"logps": -83.36263275146484, |
|
"loss": 156.1384, |
|
"objective": 165.0032501220703, |
|
"ranking_idealized": 0.5916666388511658, |
|
"ranking_idealized_expo": 0.512499988079071, |
|
"ranking_simple": 0.5458333492279053, |
|
"regularize": 0.3623816668987274, |
|
"step": 120, |
|
"wo_beta": 15.72484302520752 |
|
}, |
|
{ |
|
"dpo_loss": 0.6474981904029846, |
|
"epoch": 0.35427491733585265, |
|
"grad_norm": 15992.631664901073, |
|
"learning_rate": 7.102272727272727e-07, |
|
"logits": -1.4708176851272583, |
|
"logps": -87.08245086669922, |
|
"loss": 148.8453, |
|
"objective": 139.25869750976562, |
|
"ranking_idealized": 0.5583333373069763, |
|
"ranking_idealized_expo": 0.48750001192092896, |
|
"ranking_simple": 0.5083333253860474, |
|
"regularize": 0.3299652636051178, |
|
"step": 125, |
|
"wo_beta": 16.12550163269043 |
|
}, |
|
{ |
|
"dpo_loss": 0.6404248476028442, |
|
"epoch": 0.3684459140292867, |
|
"grad_norm": 18351.934143281596, |
|
"learning_rate": 7.386363636363636e-07, |
|
"logits": -1.4490153789520264, |
|
"logps": -85.12788391113281, |
|
"loss": 156.9957, |
|
"objective": 159.24720764160156, |
|
"ranking_idealized": 0.6458333134651184, |
|
"ranking_idealized_expo": 0.5916666388511658, |
|
"ranking_simple": 0.5791666507720947, |
|
"regularize": 0.3523053526878357, |
|
"step": 130, |
|
"wo_beta": 16.6445255279541 |
|
}, |
|
{ |
|
"dpo_loss": 0.6560899615287781, |
|
"epoch": 0.3826169107227208, |
|
"grad_norm": 23473.507695048622, |
|
"learning_rate": 7.670454545454545e-07, |
|
"logits": -1.4993882179260254, |
|
"logps": -85.93272399902344, |
|
"loss": 163.276, |
|
"objective": 171.45176696777344, |
|
"ranking_idealized": 0.574999988079071, |
|
"ranking_idealized_expo": 0.4958333373069763, |
|
"ranking_simple": 0.5, |
|
"regularize": 0.3585022985935211, |
|
"step": 135, |
|
"wo_beta": 14.440187454223633 |
|
}, |
|
{ |
|
"dpo_loss": 0.6453083753585815, |
|
"epoch": 0.39678790741615494, |
|
"grad_norm": 18800.531975208964, |
|
"learning_rate": 7.954545454545454e-07, |
|
"logits": -1.4704848527908325, |
|
"logps": -84.99346160888672, |
|
"loss": 140.3663, |
|
"objective": 156.8263702392578, |
|
"ranking_idealized": 0.612500011920929, |
|
"ranking_idealized_expo": 0.5416666865348816, |
|
"ranking_simple": 0.5583333373069763, |
|
"regularize": 0.35928240418434143, |
|
"step": 140, |
|
"wo_beta": 14.692411422729492 |
|
}, |
|
{ |
|
"dpo_loss": 0.6320348978042603, |
|
"epoch": 0.410958904109589, |
|
"grad_norm": 16753.19118195896, |
|
"learning_rate": 8.238636363636363e-07, |
|
"logits": -1.481634259223938, |
|
"logps": -85.03217315673828, |
|
"loss": 148.4437, |
|
"objective": 142.04251098632812, |
|
"ranking_idealized": 0.6666666865348816, |
|
"ranking_idealized_expo": 0.5583333373069763, |
|
"ranking_simple": 0.6083333492279053, |
|
"regularize": 0.34694570302963257, |
|
"step": 145, |
|
"wo_beta": 14.103859901428223 |
|
}, |
|
{ |
|
"dpo_loss": 0.6397809386253357, |
|
"epoch": 0.42512990080302315, |
|
"grad_norm": 15467.131473675328, |
|
"learning_rate": 8.522727272727273e-07, |
|
"logits": -1.5027910470962524, |
|
"logps": -85.37592315673828, |
|
"loss": 145.838, |
|
"objective": 148.38160705566406, |
|
"ranking_idealized": 0.5958333611488342, |
|
"ranking_idealized_expo": 0.5541666746139526, |
|
"ranking_simple": 0.5625, |
|
"regularize": 0.3435940146446228, |
|
"step": 150, |
|
"wo_beta": 17.392671585083008 |
|
}, |
|
{ |
|
"epoch": 0.42512990080302315, |
|
"eval_dpo_loss": 0.6789573431015015, |
|
"eval_logits": -1.4503501653671265, |
|
"eval_logps": -90.70494842529297, |
|
"eval_loss": 180.64788818359375, |
|
"eval_objective": 178.1704864501953, |
|
"eval_ranking_idealized": 0.6024844646453857, |
|
"eval_ranking_idealized_expo": 0.5232919454574585, |
|
"eval_ranking_simple": 0.5383023023605347, |
|
"eval_regularize": 0.40225014090538025, |
|
"eval_runtime": 484.5212, |
|
"eval_samples_per_second": 11.95, |
|
"eval_steps_per_second": 0.997, |
|
"eval_wo_beta": 16.587987899780273, |
|
"step": 150 |
|
}, |
|
{ |
|
"dpo_loss": 0.6387067437171936, |
|
"epoch": 0.43930089749645723, |
|
"grad_norm": 15641.193562303264, |
|
"learning_rate": 8.806818181818182e-07, |
|
"logits": -1.5433834791183472, |
|
"logps": -83.86524200439453, |
|
"loss": 145.3558, |
|
"objective": 149.48431396484375, |
|
"ranking_idealized": 0.5666666626930237, |
|
"ranking_idealized_expo": 0.48750001192092896, |
|
"ranking_simple": 0.5041666626930237, |
|
"regularize": 0.3321545720100403, |
|
"step": 155, |
|
"wo_beta": 15.563851356506348 |
|
}, |
|
{ |
|
"dpo_loss": 0.6263092160224915, |
|
"epoch": 0.45347189418989137, |
|
"grad_norm": 17105.26137174702, |
|
"learning_rate": 9.09090909090909e-07, |
|
"logits": -1.4153720140457153, |
|
"logps": -85.28386688232422, |
|
"loss": 153.0504, |
|
"objective": 153.1988067626953, |
|
"ranking_idealized": 0.5916666388511658, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.5583333373069763, |
|
"regularize": 0.3481307625770569, |
|
"step": 160, |
|
"wo_beta": 14.662632942199707 |
|
}, |
|
{ |
|
"dpo_loss": 0.6309160590171814, |
|
"epoch": 0.46764289088332545, |
|
"grad_norm": 17759.815020595273, |
|
"learning_rate": 9.374999999999999e-07, |
|
"logits": -1.4963940382003784, |
|
"logps": -87.69454956054688, |
|
"loss": 139.2377, |
|
"objective": 131.2418670654297, |
|
"ranking_idealized": 0.6083333492279053, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.550000011920929, |
|
"regularize": 0.3078847825527191, |
|
"step": 165, |
|
"wo_beta": 15.923318862915039 |
|
}, |
|
{ |
|
"dpo_loss": 0.6393815279006958, |
|
"epoch": 0.4818138875767596, |
|
"grad_norm": 14258.083724870265, |
|
"learning_rate": 9.65909090909091e-07, |
|
"logits": -1.525942087173462, |
|
"logps": -87.34074401855469, |
|
"loss": 149.6952, |
|
"objective": 141.63162231445312, |
|
"ranking_idealized": 0.5583333373069763, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.5208333134651184, |
|
"regularize": 0.3343699276447296, |
|
"step": 170, |
|
"wo_beta": 16.248130798339844 |
|
}, |
|
{ |
|
"dpo_loss": 0.6326501369476318, |
|
"epoch": 0.49598488427019366, |
|
"grad_norm": 15096.239809153309, |
|
"learning_rate": 9.943181818181817e-07, |
|
"logits": -1.3718321323394775, |
|
"logps": -87.4573745727539, |
|
"loss": 140.2749, |
|
"objective": 132.79156494140625, |
|
"ranking_idealized": 0.574999988079071, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.5375000238418579, |
|
"regularize": 0.3121780455112457, |
|
"step": 175, |
|
"wo_beta": 17.698331832885742 |
|
}, |
|
{ |
|
"dpo_loss": 0.622785747051239, |
|
"epoch": 0.5101558809636277, |
|
"grad_norm": 16631.252094969073, |
|
"learning_rate": 9.999842657116664e-07, |
|
"logits": -1.3456240892410278, |
|
"logps": -86.42423248291016, |
|
"loss": 143.2666, |
|
"objective": 151.05718994140625, |
|
"ranking_idealized": 0.5708333253860474, |
|
"ranking_idealized_expo": 0.48750001192092896, |
|
"ranking_simple": 0.512499988079071, |
|
"regularize": 0.3372686207294464, |
|
"step": 180, |
|
"wo_beta": 14.807291030883789 |
|
}, |
|
{ |
|
"dpo_loss": 0.6099674701690674, |
|
"epoch": 0.5243268776570619, |
|
"grad_norm": 20691.36637721674, |
|
"learning_rate": 9.999203468625015e-07, |
|
"logits": -1.3633224964141846, |
|
"logps": -85.25286102294922, |
|
"loss": 132.6151, |
|
"objective": 133.30491638183594, |
|
"ranking_idealized": 0.5791666507720947, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.5333333611488342, |
|
"regularize": 0.3143846392631531, |
|
"step": 185, |
|
"wo_beta": 14.758675575256348 |
|
}, |
|
{ |
|
"dpo_loss": 0.596558690071106, |
|
"epoch": 0.538497874350496, |
|
"grad_norm": 16323.28292515014, |
|
"learning_rate": 9.998072663403656e-07, |
|
"logits": -1.4109238386154175, |
|
"logps": -83.85755157470703, |
|
"loss": 142.4777, |
|
"objective": 132.50650024414062, |
|
"ranking_idealized": 0.6625000238418579, |
|
"ranking_idealized_expo": 0.5625, |
|
"ranking_simple": 0.612500011920929, |
|
"regularize": 0.2925921082496643, |
|
"step": 190, |
|
"wo_beta": 17.561918258666992 |
|
}, |
|
{ |
|
"dpo_loss": 0.608472466468811, |
|
"epoch": 0.5526688710439301, |
|
"grad_norm": 14605.697671098327, |
|
"learning_rate": 9.99645035265485e-07, |
|
"logits": -1.426125407218933, |
|
"logps": -83.3570556640625, |
|
"loss": 148.3801, |
|
"objective": 154.04542541503906, |
|
"ranking_idealized": 0.625, |
|
"ranking_idealized_expo": 0.5375000238418579, |
|
"ranking_simple": 0.5708333253860474, |
|
"regularize": 0.3404535949230194, |
|
"step": 195, |
|
"wo_beta": 15.011509895324707 |
|
}, |
|
{ |
|
"dpo_loss": 0.6035653948783875, |
|
"epoch": 0.5668398677373642, |
|
"grad_norm": 14961.86824726212, |
|
"learning_rate": 9.99433669591504e-07, |
|
"logits": -1.4208530187606812, |
|
"logps": -83.7520523071289, |
|
"loss": 140.9398, |
|
"objective": 150.76983642578125, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.512499988079071, |
|
"ranking_simple": 0.5208333134651184, |
|
"regularize": 0.34441787004470825, |
|
"step": 200, |
|
"wo_beta": 16.120277404785156 |
|
}, |
|
{ |
|
"epoch": 0.5668398677373642, |
|
"eval_dpo_loss": 0.6803466081619263, |
|
"eval_logits": -1.3894833326339722, |
|
"eval_logps": -90.33295440673828, |
|
"eval_loss": 184.49874877929688, |
|
"eval_objective": 181.54510498046875, |
|
"eval_ranking_idealized": 0.6024844646453857, |
|
"eval_ranking_idealized_expo": 0.5232919454574585, |
|
"eval_ranking_simple": 0.532608687877655, |
|
"eval_regularize": 0.4101283848285675, |
|
"eval_runtime": 475.1985, |
|
"eval_samples_per_second": 12.184, |
|
"eval_steps_per_second": 1.016, |
|
"eval_wo_beta": 16.141496658325195, |
|
"step": 200 |
|
}, |
|
{ |
|
"dpo_loss": 0.6270676255226135, |
|
"epoch": 0.5810108644307983, |
|
"grad_norm": 16340.681317011602, |
|
"learning_rate": 9.991731901039136e-07, |
|
"logits": -1.283570647239685, |
|
"logps": -84.95980834960938, |
|
"loss": 136.3843, |
|
"objective": 133.73294067382812, |
|
"ranking_idealized": 0.574999988079071, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.5416666865348816, |
|
"regularize": 0.3290613889694214, |
|
"step": 205, |
|
"wo_beta": 16.529329299926758 |
|
}, |
|
{ |
|
"dpo_loss": 0.6101997494697571, |
|
"epoch": 0.5951818611242324, |
|
"grad_norm": 16979.514024444066, |
|
"learning_rate": 9.988636224180095e-07, |
|
"logits": -1.3387362957000732, |
|
"logps": -85.54541015625, |
|
"loss": 149.2125, |
|
"objective": 162.19125366210938, |
|
"ranking_idealized": 0.6041666865348816, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.5625, |
|
"regularize": 0.3633294999599457, |
|
"step": 210, |
|
"wo_beta": 15.476922988891602 |
|
}, |
|
{ |
|
"dpo_loss": 0.5931335687637329, |
|
"epoch": 0.6093528578176665, |
|
"grad_norm": 16588.23739039735, |
|
"learning_rate": 9.985049969763719e-07, |
|
"logits": -1.458817720413208, |
|
"logps": -84.46039581298828, |
|
"loss": 133.2822, |
|
"objective": 143.83396911621094, |
|
"ranking_idealized": 0.5791666507720947, |
|
"ranking_idealized_expo": 0.4749999940395355, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 0.3306182324886322, |
|
"step": 215, |
|
"wo_beta": 16.599056243896484 |
|
}, |
|
{ |
|
"dpo_loss": 0.6022905111312866, |
|
"epoch": 0.6235238545111006, |
|
"grad_norm": 17119.52021011513, |
|
"learning_rate": 9.980973490458728e-07, |
|
"logits": -1.4839917421340942, |
|
"logps": -84.08710479736328, |
|
"loss": 143.4095, |
|
"objective": 144.29782104492188, |
|
"ranking_idealized": 0.574999988079071, |
|
"ranking_idealized_expo": 0.4791666567325592, |
|
"ranking_simple": 0.5083333253860474, |
|
"regularize": 0.32684001326560974, |
|
"step": 220, |
|
"wo_beta": 16.91693878173828 |
|
}, |
|
{ |
|
"dpo_loss": 0.5977518558502197, |
|
"epoch": 0.6376948512045347, |
|
"grad_norm": 14023.197866950057, |
|
"learning_rate": 9.976407187142064e-07, |
|
"logits": -1.534485936164856, |
|
"logps": -85.1946792602539, |
|
"loss": 138.8846, |
|
"objective": 137.76622009277344, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.5583333373069763, |
|
"regularize": 0.31227758526802063, |
|
"step": 225, |
|
"wo_beta": 15.36359691619873 |
|
}, |
|
{ |
|
"dpo_loss": 0.5947220921516418, |
|
"epoch": 0.6518658478979689, |
|
"grad_norm": 14605.487004157298, |
|
"learning_rate": 9.971351508859486e-07, |
|
"logits": -1.439586877822876, |
|
"logps": -85.27981567382812, |
|
"loss": 124.6336, |
|
"objective": 121.90718078613281, |
|
"ranking_idealized": 0.637499988079071, |
|
"ranking_idealized_expo": 0.5666666626930237, |
|
"ranking_simple": 0.6000000238418579, |
|
"regularize": 0.2932659685611725, |
|
"step": 230, |
|
"wo_beta": 17.20786476135254 |
|
}, |
|
{ |
|
"dpo_loss": 0.6003122925758362, |
|
"epoch": 0.6660368445914029, |
|
"grad_norm": 16685.644038837043, |
|
"learning_rate": 9.9658069527814e-07, |
|
"logits": -1.3658267259597778, |
|
"logps": -86.23738098144531, |
|
"loss": 121.5208, |
|
"objective": 116.9168472290039, |
|
"ranking_idealized": 0.5874999761581421, |
|
"ranking_idealized_expo": 0.5416666865348816, |
|
"ranking_simple": 0.5583333373069763, |
|
"regularize": 0.2670106589794159, |
|
"step": 235, |
|
"wo_beta": 16.473398208618164 |
|
}, |
|
{ |
|
"dpo_loss": 0.5931513905525208, |
|
"epoch": 0.680207841284837, |
|
"grad_norm": 18082.47037845429, |
|
"learning_rate": 9.959774064153975e-07, |
|
"logits": -1.5063189268112183, |
|
"logps": -85.80690002441406, |
|
"loss": 131.6654, |
|
"objective": 136.83932495117188, |
|
"ranking_idealized": 0.6041666865348816, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.550000011920929, |
|
"regularize": 0.29019126296043396, |
|
"step": 240, |
|
"wo_beta": 16.562297821044922 |
|
}, |
|
{ |
|
"dpo_loss": 0.6120952367782593, |
|
"epoch": 0.6943788379782712, |
|
"grad_norm": 16231.64241500278, |
|
"learning_rate": 9.953253436245516e-07, |
|
"logits": -1.5183242559432983, |
|
"logps": -85.21266174316406, |
|
"loss": 120.6441, |
|
"objective": 111.80670928955078, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5458333492279053, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.2561970055103302, |
|
"step": 245, |
|
"wo_beta": 16.04390525817871 |
|
}, |
|
{ |
|
"dpo_loss": 0.5938802361488342, |
|
"epoch": 0.7085498346717053, |
|
"grad_norm": 15964.589309173105, |
|
"learning_rate": 9.94624571028813e-07, |
|
"logits": -1.3114020824432373, |
|
"logps": -83.990478515625, |
|
"loss": 131.1439, |
|
"objective": 132.464599609375, |
|
"ranking_idealized": 0.612500011920929, |
|
"ranking_idealized_expo": 0.5291666388511658, |
|
"ranking_simple": 0.5874999761581421, |
|
"regularize": 0.29094573855400085, |
|
"step": 250, |
|
"wo_beta": 15.082120895385742 |
|
}, |
|
{ |
|
"epoch": 0.7085498346717053, |
|
"eval_dpo_loss": 0.6797458529472351, |
|
"eval_logits": -1.478871464729309, |
|
"eval_logps": -91.22461700439453, |
|
"eval_loss": 182.20773315429688, |
|
"eval_objective": 178.44094848632812, |
|
"eval_ranking_idealized": 0.6024844646453857, |
|
"eval_ranking_idealized_expo": 0.5232919454574585, |
|
"eval_ranking_simple": 0.532608687877655, |
|
"eval_regularize": 0.4058575928211212, |
|
"eval_runtime": 475.3374, |
|
"eval_samples_per_second": 12.181, |
|
"eval_steps_per_second": 1.016, |
|
"eval_wo_beta": 16.368268966674805, |
|
"step": 250 |
|
}, |
|
{ |
|
"dpo_loss": 0.6089769005775452, |
|
"epoch": 0.7227208313651393, |
|
"grad_norm": 14595.146016283225, |
|
"learning_rate": 9.938751575414661e-07, |
|
"logits": -1.5532639026641846, |
|
"logps": -83.39389038085938, |
|
"loss": 133.1451, |
|
"objective": 121.37617492675781, |
|
"ranking_idealized": 0.5708333253860474, |
|
"ranking_idealized_expo": 0.42916667461395264, |
|
"ranking_simple": 0.5041666626930237, |
|
"regularize": 0.2965226471424103, |
|
"step": 255, |
|
"wo_beta": 15.059760093688965 |
|
}, |
|
{ |
|
"dpo_loss": 0.5949603915214539, |
|
"epoch": 0.7368918280585735, |
|
"grad_norm": 15154.916516529278, |
|
"learning_rate": 9.930771768590933e-07, |
|
"logits": -1.5184205770492554, |
|
"logps": -85.99275970458984, |
|
"loss": 128.7971, |
|
"objective": 149.26617431640625, |
|
"ranking_idealized": 0.5416666865348816, |
|
"ranking_idealized_expo": 0.4625000059604645, |
|
"ranking_simple": 0.5208333134651184, |
|
"regularize": 0.3260208070278168, |
|
"step": 260, |
|
"wo_beta": 15.812520027160645 |
|
}, |
|
{ |
|
"dpo_loss": 0.5942420959472656, |
|
"epoch": 0.7510628247520076, |
|
"grad_norm": 13672.874013609171, |
|
"learning_rate": 9.92230707454326e-07, |
|
"logits": -1.438194990158081, |
|
"logps": -86.4264907836914, |
|
"loss": 119.4127, |
|
"objective": 127.40038299560547, |
|
"ranking_idealized": 0.6625000238418579, |
|
"ranking_idealized_expo": 0.5666666626930237, |
|
"ranking_simple": 0.6416666507720947, |
|
"regularize": 0.29675182700157166, |
|
"step": 265, |
|
"wo_beta": 16.794330596923828 |
|
}, |
|
{ |
|
"dpo_loss": 0.6142985224723816, |
|
"epoch": 0.7652338214454416, |
|
"grad_norm": 14406.751122728363, |
|
"learning_rate": 9.91335832568129e-07, |
|
"logits": -1.5249485969543457, |
|
"logps": -87.38147735595703, |
|
"loss": 129.203, |
|
"objective": 141.37374877929688, |
|
"ranking_idealized": 0.5249999761581421, |
|
"ranking_idealized_expo": 0.4583333432674408, |
|
"ranking_simple": 0.4749999940395355, |
|
"regularize": 0.2984028458595276, |
|
"step": 270, |
|
"wo_beta": 14.417384147644043 |
|
}, |
|
{ |
|
"dpo_loss": 0.5954193472862244, |
|
"epoch": 0.7794048181388757, |
|
"grad_norm": 17040.572933936153, |
|
"learning_rate": 9.90392640201615e-07, |
|
"logits": -1.3636622428894043, |
|
"logps": -86.6485595703125, |
|
"loss": 118.1932, |
|
"objective": 113.61885833740234, |
|
"ranking_idealized": 0.574999988079071, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.5375000238418579, |
|
"regularize": 0.2610357701778412, |
|
"step": 275, |
|
"wo_beta": 15.509458541870117 |
|
}, |
|
{ |
|
"dpo_loss": 0.5917832851409912, |
|
"epoch": 0.7935758148323099, |
|
"grad_norm": 17559.793763685935, |
|
"learning_rate": 9.894012231073895e-07, |
|
"logits": -1.4590952396392822, |
|
"logps": -87.64340209960938, |
|
"loss": 132.6812, |
|
"objective": 137.506103515625, |
|
"ranking_idealized": 0.5874999761581421, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.5541666746139526, |
|
"regularize": 0.2935800850391388, |
|
"step": 280, |
|
"wo_beta": 15.80073070526123 |
|
}, |
|
{ |
|
"dpo_loss": 0.5836724042892456, |
|
"epoch": 0.807746811525744, |
|
"grad_norm": 14579.651979817574, |
|
"learning_rate": 9.88361678780429e-07, |
|
"logits": -1.4701313972473145, |
|
"logps": -88.11650085449219, |
|
"loss": 118.3926, |
|
"objective": 111.54865264892578, |
|
"ranking_idealized": 0.5666666626930237, |
|
"ranking_idealized_expo": 0.4791666567325592, |
|
"ranking_simple": 0.5208333134651184, |
|
"regularize": 0.2552913427352905, |
|
"step": 285, |
|
"wo_beta": 16.792234420776367 |
|
}, |
|
{ |
|
"dpo_loss": 0.5677815079689026, |
|
"epoch": 0.821917808219178, |
|
"grad_norm": 15029.308678016287, |
|
"learning_rate": 9.872741094484964e-07, |
|
"logits": -1.500461220741272, |
|
"logps": -86.58364868164062, |
|
"loss": 116.8557, |
|
"objective": 106.32292175292969, |
|
"ranking_idealized": 0.5874999761581421, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.550000011920929, |
|
"regularize": 0.24703934788703918, |
|
"step": 290, |
|
"wo_beta": 16.14396095275879 |
|
}, |
|
{ |
|
"dpo_loss": 0.5853282809257507, |
|
"epoch": 0.8360888049126122, |
|
"grad_norm": 15120.877217642179, |
|
"learning_rate": 9.86138622062085e-07, |
|
"logits": -1.494510293006897, |
|
"logps": -86.35259246826172, |
|
"loss": 116.1266, |
|
"objective": 112.15760803222656, |
|
"ranking_idealized": 0.550000011920929, |
|
"ranking_idealized_expo": 0.4958333373069763, |
|
"ranking_simple": 0.5208333134651184, |
|
"regularize": 0.2589784264564514, |
|
"step": 295, |
|
"wo_beta": 16.28350257873535 |
|
}, |
|
{ |
|
"dpo_loss": 0.5893528461456299, |
|
"epoch": 0.8502598016060463, |
|
"grad_norm": 14818.401223627045, |
|
"learning_rate": 9.849553282839024e-07, |
|
"logits": -1.4687484502792358, |
|
"logps": -85.012939453125, |
|
"loss": 118.3192, |
|
"objective": 113.60901641845703, |
|
"ranking_idealized": 0.625, |
|
"ranking_idealized_expo": 0.550000011920929, |
|
"ranking_simple": 0.6000000238418579, |
|
"regularize": 0.26101434230804443, |
|
"step": 300, |
|
"wo_beta": 15.157808303833008 |
|
}, |
|
{ |
|
"epoch": 0.8502598016060463, |
|
"eval_dpo_loss": 0.681740403175354, |
|
"eval_logits": -1.4551842212677002, |
|
"eval_logps": -92.57705688476562, |
|
"eval_loss": 183.44589233398438, |
|
"eval_objective": 180.4713592529297, |
|
"eval_ranking_idealized": 0.6024844646453857, |
|
"eval_ranking_idealized_expo": 0.5232919454574585, |
|
"eval_ranking_simple": 0.532608687877655, |
|
"eval_regularize": 0.41230443120002747, |
|
"eval_runtime": 479.855, |
|
"eval_samples_per_second": 12.066, |
|
"eval_steps_per_second": 1.007, |
|
"eval_wo_beta": 16.404129028320312, |
|
"step": 300 |
|
}, |
|
{ |
|
"dpo_loss": 0.5834535956382751, |
|
"epoch": 0.8644307982994804, |
|
"grad_norm": 14881.03810672454, |
|
"learning_rate": 9.837243444778899e-07, |
|
"logits": -1.4318089485168457, |
|
"logps": -85.52223205566406, |
|
"loss": 117.2997, |
|
"objective": 119.20571899414062, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.6083333492279053, |
|
"regularize": 0.2612408697605133, |
|
"step": 305, |
|
"wo_beta": 15.858050346374512 |
|
}, |
|
{ |
|
"dpo_loss": 0.5729119181632996, |
|
"epoch": 0.8786017949929145, |
|
"grad_norm": 13728.643717044331, |
|
"learning_rate": 9.824457916977784e-07, |
|
"logits": -1.430962085723877, |
|
"logps": -84.47950744628906, |
|
"loss": 113.9787, |
|
"objective": 119.12039184570312, |
|
"ranking_idealized": 0.5958333611488342, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.5541666746139526, |
|
"regularize": 0.2695327699184418, |
|
"step": 310, |
|
"wo_beta": 15.427461624145508 |
|
}, |
|
{ |
|
"dpo_loss": 0.5748838782310486, |
|
"epoch": 0.8927727916863486, |
|
"grad_norm": 15353.814970462101, |
|
"learning_rate": 9.81119795675185e-07, |
|
"logits": -1.4459213018417358, |
|
"logps": -83.27306365966797, |
|
"loss": 112.487, |
|
"objective": 110.93157196044922, |
|
"ranking_idealized": 0.5541666746139526, |
|
"ranking_idealized_expo": 0.47083333134651184, |
|
"ranking_simple": 0.5333333611488342, |
|
"regularize": 0.2502378225326538, |
|
"step": 315, |
|
"wo_beta": 15.45988941192627 |
|
}, |
|
{ |
|
"dpo_loss": 0.5748109221458435, |
|
"epoch": 0.9069437883797827, |
|
"grad_norm": 15007.545319328356, |
|
"learning_rate": 9.797464868072486e-07, |
|
"logits": -1.4066462516784668, |
|
"logps": -86.03001403808594, |
|
"loss": 110.898, |
|
"objective": 109.38225555419922, |
|
"ranking_idealized": 0.6208333373069763, |
|
"ranking_idealized_expo": 0.5583333373069763, |
|
"ranking_simple": 0.6083333492279053, |
|
"regularize": 0.2464234083890915, |
|
"step": 320, |
|
"wo_beta": 15.732470512390137 |
|
}, |
|
{ |
|
"dpo_loss": 0.5822945833206177, |
|
"epoch": 0.9211147850732169, |
|
"grad_norm": 13633.021631468031, |
|
"learning_rate": 9.783260001438066e-07, |
|
"logits": -1.4706979990005493, |
|
"logps": -87.00752258300781, |
|
"loss": 114.9803, |
|
"objective": 106.17591857910156, |
|
"ranking_idealized": 0.6083333492279053, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.5708333253860474, |
|
"regularize": 0.25146251916885376, |
|
"step": 325, |
|
"wo_beta": 15.325220108032227 |
|
}, |
|
{ |
|
"dpo_loss": 0.5598011016845703, |
|
"epoch": 0.9352857817666509, |
|
"grad_norm": 14695.63914534257, |
|
"learning_rate": 9.768584753741134e-07, |
|
"logits": -1.3177284002304077, |
|
"logps": -86.90360260009766, |
|
"loss": 116.6896, |
|
"objective": 123.9805679321289, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5291666388511658, |
|
"ranking_simple": 0.5916666388511658, |
|
"regularize": 0.2586965262889862, |
|
"step": 330, |
|
"wo_beta": 16.747480392456055 |
|
}, |
|
{ |
|
"dpo_loss": 0.5746142864227295, |
|
"epoch": 0.949456778460085, |
|
"grad_norm": 14706.814411020761, |
|
"learning_rate": 9.753440568131054e-07, |
|
"logits": -1.3514246940612793, |
|
"logps": -86.81550598144531, |
|
"loss": 115.5651, |
|
"objective": 113.5698471069336, |
|
"ranking_idealized": 0.5874999761581421, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.5791666507720947, |
|
"regularize": 0.25022396445274353, |
|
"step": 335, |
|
"wo_beta": 15.857748031616211 |
|
}, |
|
{ |
|
"dpo_loss": 0.5717839598655701, |
|
"epoch": 0.9636277751535192, |
|
"grad_norm": 13577.369360499106, |
|
"learning_rate": 9.737828933872073e-07, |
|
"logits": -1.400834321975708, |
|
"logps": -85.29247283935547, |
|
"loss": 118.1002, |
|
"objective": 108.19886779785156, |
|
"ranking_idealized": 0.6041666865348816, |
|
"ranking_idealized_expo": 0.5458333492279053, |
|
"ranking_simple": 0.5958333611488342, |
|
"regularize": 0.24417272210121155, |
|
"step": 340, |
|
"wo_beta": 16.27320098876953 |
|
}, |
|
{ |
|
"dpo_loss": 0.5746095776557922, |
|
"epoch": 0.9777987718469532, |
|
"grad_norm": 13673.428728913288, |
|
"learning_rate": 9.721751386196885e-07, |
|
"logits": -1.4508498907089233, |
|
"logps": -84.16486358642578, |
|
"loss": 110.1951, |
|
"objective": 103.0552749633789, |
|
"ranking_idealized": 0.6458333134651184, |
|
"ranking_idealized_expo": 0.5833333134651184, |
|
"ranking_simple": 0.6291666626930237, |
|
"regularize": 0.23596970736980438, |
|
"step": 345, |
|
"wo_beta": 15.449429512023926 |
|
}, |
|
{ |
|
"dpo_loss": 0.5632264018058777, |
|
"epoch": 0.9919697685403873, |
|
"grad_norm": 13613.304013119689, |
|
"learning_rate": 9.705209506155634e-07, |
|
"logits": -1.3619670867919922, |
|
"logps": -86.77315521240234, |
|
"loss": 108.5029, |
|
"objective": 110.73800659179688, |
|
"ranking_idealized": 0.6583333611488342, |
|
"ranking_idealized_expo": 0.574999988079071, |
|
"ranking_simple": 0.625, |
|
"regularize": 0.26065030694007874, |
|
"step": 350, |
|
"wo_beta": 15.869379997253418 |
|
}, |
|
{ |
|
"epoch": 0.9919697685403873, |
|
"eval_dpo_loss": 0.678183376789093, |
|
"eval_logits": -1.4316504001617432, |
|
"eval_logps": -92.18038177490234, |
|
"eval_loss": 183.9593048095703, |
|
"eval_objective": 180.11509704589844, |
|
"eval_ranking_idealized": 0.6024844646453857, |
|
"eval_ranking_idealized_expo": 0.5232919454574585, |
|
"eval_ranking_simple": 0.5320910811424255, |
|
"eval_regularize": 0.40945151448249817, |
|
"eval_runtime": 476.2355, |
|
"eval_samples_per_second": 12.158, |
|
"eval_steps_per_second": 1.014, |
|
"eval_wo_beta": 16.336669921875, |
|
"step": 350 |
|
}, |
|
{ |
|
"dpo_loss": 0.5633755326271057, |
|
"epoch": 1.0061407652338215, |
|
"grad_norm": 13717.944611215353, |
|
"learning_rate": 9.688204920460466e-07, |
|
"logits": -1.542311191558838, |
|
"logps": -84.23912811279297, |
|
"loss": 104.9579, |
|
"objective": 99.2624740600586, |
|
"ranking_idealized": 0.6083333492279053, |
|
"ranking_idealized_expo": 0.5375000238418579, |
|
"ranking_simple": 0.5791666507720947, |
|
"regularize": 0.2348737269639969, |
|
"step": 355, |
|
"wo_beta": 16.799049377441406 |
|
}, |
|
{ |
|
"dpo_loss": 0.5596449971199036, |
|
"epoch": 1.0203117619272555, |
|
"grad_norm": 15569.178838691118, |
|
"learning_rate": 9.670739301325534e-07, |
|
"logits": -1.4423024654388428, |
|
"logps": -84.60731506347656, |
|
"loss": 97.354, |
|
"objective": 96.60607147216797, |
|
"ranking_idealized": 0.574999988079071, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.5625, |
|
"regularize": 0.2134130448102951, |
|
"step": 360, |
|
"wo_beta": 16.611034393310547 |
|
}, |
|
{ |
|
"dpo_loss": 0.5573465824127197, |
|
"epoch": 1.0344827586206897, |
|
"grad_norm": 14412.61274623368, |
|
"learning_rate": 9.652814366302568e-07, |
|
"logits": -1.4710925817489624, |
|
"logps": -84.47969818115234, |
|
"loss": 109.2182, |
|
"objective": 110.00160217285156, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.2383657544851303, |
|
"step": 365, |
|
"wo_beta": 14.846599578857422 |
|
}, |
|
{ |
|
"dpo_loss": 0.559634804725647, |
|
"epoch": 1.0486537553141237, |
|
"grad_norm": 15121.427522934051, |
|
"learning_rate": 9.63443187811197e-07, |
|
"logits": -1.407724142074585, |
|
"logps": -82.60728454589844, |
|
"loss": 94.8917, |
|
"objective": 93.84876251220703, |
|
"ranking_idealized": 0.5958333611488342, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.21665388345718384, |
|
"step": 370, |
|
"wo_beta": 15.743396759033203 |
|
}, |
|
{ |
|
"dpo_loss": 0.5503371357917786, |
|
"epoch": 1.0628247520075578, |
|
"grad_norm": 14225.520073845873, |
|
"learning_rate": 9.61559364446946e-07, |
|
"logits": -1.4566776752471924, |
|
"logps": -84.27056121826172, |
|
"loss": 96.0324, |
|
"objective": 91.85355377197266, |
|
"ranking_idealized": 0.6083333492279053, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5791666507720947, |
|
"regularize": 0.20962905883789062, |
|
"step": 375, |
|
"wo_beta": 16.301313400268555 |
|
}, |
|
{ |
|
"dpo_loss": 0.5628603100776672, |
|
"epoch": 1.076995748700992, |
|
"grad_norm": 14108.362094897184, |
|
"learning_rate": 9.596301517908328e-07, |
|
"logits": -1.4387798309326172, |
|
"logps": -86.27851867675781, |
|
"loss": 98.7923, |
|
"objective": 108.01164245605469, |
|
"ranking_idealized": 0.6625000238418579, |
|
"ranking_idealized_expo": 0.574999988079071, |
|
"ranking_simple": 0.6291666626930237, |
|
"regularize": 0.2488705962896347, |
|
"step": 380, |
|
"wo_beta": 15.773112297058105 |
|
}, |
|
{ |
|
"dpo_loss": 0.5771984457969666, |
|
"epoch": 1.091166745394426, |
|
"grad_norm": 13105.168740611702, |
|
"learning_rate": 9.576557395597236e-07, |
|
"logits": -1.4021495580673218, |
|
"logps": -85.1259536743164, |
|
"loss": 99.6716, |
|
"objective": 109.83814239501953, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5583333373069763, |
|
"ranking_simple": 0.5791666507720947, |
|
"regularize": 0.23721593618392944, |
|
"step": 385, |
|
"wo_beta": 15.801533699035645 |
|
}, |
|
{ |
|
"dpo_loss": 0.5509606599807739, |
|
"epoch": 1.10533774208786, |
|
"grad_norm": 13663.873020268169, |
|
"learning_rate": 9.556363219153662e-07, |
|
"logits": -1.3366678953170776, |
|
"logps": -86.07147979736328, |
|
"loss": 96.1117, |
|
"objective": 90.10648345947266, |
|
"ranking_idealized": 0.637499988079071, |
|
"ranking_idealized_expo": 0.5625, |
|
"ranking_simple": 0.6333333253860474, |
|
"regularize": 0.2138025164604187, |
|
"step": 390, |
|
"wo_beta": 16.90329933166504 |
|
}, |
|
{ |
|
"dpo_loss": 0.5398973822593689, |
|
"epoch": 1.1195087387812943, |
|
"grad_norm": 14913.448008058538, |
|
"learning_rate": 9.53572097445297e-07, |
|
"logits": -1.3910351991653442, |
|
"logps": -84.76091766357422, |
|
"loss": 99.588, |
|
"objective": 102.71925354003906, |
|
"ranking_idealized": 0.5791666507720947, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.5708333253860474, |
|
"regularize": 0.21998313069343567, |
|
"step": 395, |
|
"wo_beta": 14.880850791931152 |
|
}, |
|
{ |
|
"dpo_loss": 0.5650266408920288, |
|
"epoch": 1.1336797354747283, |
|
"grad_norm": 14606.821946811386, |
|
"learning_rate": 9.514632691433106e-07, |
|
"logits": -1.4497681856155396, |
|
"logps": -82.1307373046875, |
|
"loss": 104.6813, |
|
"objective": 107.99799346923828, |
|
"ranking_idealized": 0.6291666626930237, |
|
"ranking_idealized_expo": 0.5416666865348816, |
|
"ranking_simple": 0.6166666746139526, |
|
"regularize": 0.23726312816143036, |
|
"step": 400, |
|
"wo_beta": 15.854341506958008 |
|
}, |
|
{ |
|
"epoch": 1.1336797354747283, |
|
"eval_dpo_loss": 0.6800939440727234, |
|
"eval_logits": -1.3930206298828125, |
|
"eval_logps": -89.72613525390625, |
|
"eval_loss": 183.87586975097656, |
|
"eval_objective": 180.28396606445312, |
|
"eval_ranking_idealized": 0.6024844646453857, |
|
"eval_ranking_idealized_expo": 0.5232919454574585, |
|
"eval_ranking_simple": 0.5310559272766113, |
|
"eval_regularize": 0.40940526127815247, |
|
"eval_runtime": 478.3822, |
|
"eval_samples_per_second": 12.103, |
|
"eval_steps_per_second": 1.01, |
|
"eval_wo_beta": 16.22085189819336, |
|
"step": 400 |
|
}, |
|
{ |
|
"dpo_loss": 0.5639857053756714, |
|
"epoch": 1.1478507321681626, |
|
"grad_norm": 15414.866076924996, |
|
"learning_rate": 9.493100443894984e-07, |
|
"logits": -1.416764736175537, |
|
"logps": -84.40596771240234, |
|
"loss": 97.7792, |
|
"objective": 106.99815368652344, |
|
"ranking_idealized": 0.5666666626930237, |
|
"ranking_idealized_expo": 0.4625000059604645, |
|
"ranking_simple": 0.5416666865348816, |
|
"regularize": 0.22935108840465546, |
|
"step": 405, |
|
"wo_beta": 17.16730499267578 |
|
}, |
|
{ |
|
"dpo_loss": 0.5612814426422119, |
|
"epoch": 1.1620217288615966, |
|
"grad_norm": 13730.11308532576, |
|
"learning_rate": 9.471126349298556e-07, |
|
"logits": -1.4282060861587524, |
|
"logps": -84.3336410522461, |
|
"loss": 96.1344, |
|
"objective": 93.89948272705078, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.5874999761581421, |
|
"regularize": 0.20958545804023743, |
|
"step": 410, |
|
"wo_beta": 16.73971939086914 |
|
}, |
|
{ |
|
"dpo_loss": 0.5569156408309937, |
|
"epoch": 1.1761927255550306, |
|
"grad_norm": 11975.058144386021, |
|
"learning_rate": 9.448712568554571e-07, |
|
"logits": -1.3549463748931885, |
|
"logps": -83.00645446777344, |
|
"loss": 93.1875, |
|
"objective": 96.11307525634766, |
|
"ranking_idealized": 0.612500011920929, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.22849011421203613, |
|
"step": 415, |
|
"wo_beta": 16.471454620361328 |
|
}, |
|
{ |
|
"dpo_loss": 0.5578625202178955, |
|
"epoch": 1.1903637222484649, |
|
"grad_norm": 13553.103377125492, |
|
"learning_rate": 9.425861305812081e-07, |
|
"logits": -1.3200798034667969, |
|
"logps": -84.18423461914062, |
|
"loss": 99.8958, |
|
"objective": 90.86384582519531, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.4958333373069763, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.2039288878440857, |
|
"step": 420, |
|
"wo_beta": 16.64999008178711 |
|
}, |
|
{ |
|
"dpo_loss": 0.5598068237304688, |
|
"epoch": 1.204534718941899, |
|
"grad_norm": 13382.98806426423, |
|
"learning_rate": 9.40257480824169e-07, |
|
"logits": -1.368670105934143, |
|
"logps": -82.51498413085938, |
|
"loss": 95.7898, |
|
"objective": 98.82903289794922, |
|
"ranking_idealized": 0.5958333611488342, |
|
"ranking_idealized_expo": 0.512499988079071, |
|
"ranking_simple": 0.5791666507720947, |
|
"regularize": 0.21656714379787445, |
|
"step": 425, |
|
"wo_beta": 15.240234375 |
|
}, |
|
{ |
|
"dpo_loss": 0.5631528496742249, |
|
"epoch": 1.2187057156353331, |
|
"grad_norm": 13379.590249575365, |
|
"learning_rate": 9.378855365814557e-07, |
|
"logits": -1.3373157978057861, |
|
"logps": -84.1694107055664, |
|
"loss": 89.0871, |
|
"objective": 83.64144897460938, |
|
"ranking_idealized": 0.5625, |
|
"ranking_idealized_expo": 0.4958333373069763, |
|
"ranking_simple": 0.5708333253860474, |
|
"regularize": 0.19078685343265533, |
|
"step": 430, |
|
"wo_beta": 16.387685775756836 |
|
}, |
|
{ |
|
"dpo_loss": 0.5549448132514954, |
|
"epoch": 1.2328767123287672, |
|
"grad_norm": 13813.435024161312, |
|
"learning_rate": 9.354705311077218e-07, |
|
"logits": -1.287793755531311, |
|
"logps": -83.4052963256836, |
|
"loss": 93.9205, |
|
"objective": 94.07813262939453, |
|
"ranking_idealized": 0.6458333134651184, |
|
"ranking_idealized_expo": 0.5958333611488342, |
|
"ranking_simple": 0.637499988079071, |
|
"regularize": 0.21654988825321198, |
|
"step": 435, |
|
"wo_beta": 17.72869110107422 |
|
}, |
|
{ |
|
"dpo_loss": 0.5550996661186218, |
|
"epoch": 1.2470477090222012, |
|
"grad_norm": 15408.139135942378, |
|
"learning_rate": 9.330127018922193e-07, |
|
"logits": -1.302925705909729, |
|
"logps": -83.39546203613281, |
|
"loss": 87.7477, |
|
"objective": 81.88467407226562, |
|
"ranking_idealized": 0.612500011920929, |
|
"ranking_idealized_expo": 0.5541666746139526, |
|
"ranking_simple": 0.612500011920929, |
|
"regularize": 0.18918146193027496, |
|
"step": 440, |
|
"wo_beta": 15.06383991241455 |
|
}, |
|
{ |
|
"dpo_loss": 0.544273316860199, |
|
"epoch": 1.2612187057156352, |
|
"grad_norm": 13358.127194753248, |
|
"learning_rate": 9.305122906354448e-07, |
|
"logits": -1.3234721422195435, |
|
"logps": -85.1892318725586, |
|
"loss": 91.347, |
|
"objective": 87.14881896972656, |
|
"ranking_idealized": 0.637499988079071, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.6208333373069763, |
|
"regularize": 0.2082992047071457, |
|
"step": 445, |
|
"wo_beta": 17.48933219909668 |
|
}, |
|
{ |
|
"dpo_loss": 0.5497770309448242, |
|
"epoch": 1.2753897024090695, |
|
"grad_norm": 13860.879601223209, |
|
"learning_rate": 9.279695432253708e-07, |
|
"logits": -1.4758702516555786, |
|
"logps": -84.91988372802734, |
|
"loss": 90.5585, |
|
"objective": 87.8936996459961, |
|
"ranking_idealized": 0.5541666746139526, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.2043653279542923, |
|
"step": 450, |
|
"wo_beta": 15.034831047058105 |
|
}, |
|
{ |
|
"epoch": 1.2753897024090695, |
|
"eval_dpo_loss": 0.6794779300689697, |
|
"eval_logits": -1.3663489818572998, |
|
"eval_logps": -91.20365905761719, |
|
"eval_loss": 184.06732177734375, |
|
"eval_objective": 180.62957763671875, |
|
"eval_ranking_idealized": 0.6024844646453857, |
|
"eval_ranking_idealized_expo": 0.5232919454574585, |
|
"eval_ranking_simple": 0.5357142686843872, |
|
"eval_regularize": 0.41047051548957825, |
|
"eval_runtime": 479.5938, |
|
"eval_samples_per_second": 12.073, |
|
"eval_steps_per_second": 1.007, |
|
"eval_wo_beta": 16.288923263549805, |
|
"step": 450 |
|
}, |
|
{ |
|
"dpo_loss": 0.5493736267089844, |
|
"epoch": 1.2895606991025035, |
|
"grad_norm": 12737.57571248245, |
|
"learning_rate": 9.253847097132655e-07, |
|
"logits": -1.2778384685516357, |
|
"logps": -85.39282989501953, |
|
"loss": 90.8388, |
|
"objective": 97.43504333496094, |
|
"ranking_idealized": 0.5458333492279053, |
|
"ranking_idealized_expo": 0.49166667461395264, |
|
"ranking_simple": 0.5375000238418579, |
|
"regularize": 0.21359196305274963, |
|
"step": 455, |
|
"wo_beta": 15.552309036254883 |
|
}, |
|
{ |
|
"dpo_loss": 0.5543228983879089, |
|
"epoch": 1.3037316957959377, |
|
"grad_norm": 14070.394055394958, |
|
"learning_rate": 9.227580442891021e-07, |
|
"logits": -1.3934885263442993, |
|
"logps": -84.22640991210938, |
|
"loss": 89.7715, |
|
"objective": 87.21723175048828, |
|
"ranking_idealized": 0.5583333373069763, |
|
"ranking_idealized_expo": 0.4749999940395355, |
|
"ranking_simple": 0.5333333611488342, |
|
"regularize": 0.1974954754114151, |
|
"step": 460, |
|
"wo_beta": 16.378904342651367 |
|
}, |
|
{ |
|
"dpo_loss": 0.5476227402687073, |
|
"epoch": 1.3179026924893718, |
|
"grad_norm": 11947.40976577932, |
|
"learning_rate": 9.200898052565637e-07, |
|
"logits": -1.3618992567062378, |
|
"logps": -82.62676239013672, |
|
"loss": 89.4031, |
|
"objective": 95.53166961669922, |
|
"ranking_idealized": 0.5541666746139526, |
|
"ranking_idealized_expo": 0.4749999940395355, |
|
"ranking_simple": 0.5416666865348816, |
|
"regularize": 0.2165236622095108, |
|
"step": 465, |
|
"wo_beta": 14.700522422790527 |
|
}, |
|
{ |
|
"dpo_loss": 0.5633279085159302, |
|
"epoch": 1.3320736891828058, |
|
"grad_norm": 14427.62714295139, |
|
"learning_rate": 9.173802550076401e-07, |
|
"logits": -1.4394139051437378, |
|
"logps": -81.61421203613281, |
|
"loss": 96.3098, |
|
"objective": 105.95228576660156, |
|
"ranking_idealized": 0.612500011920929, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.22128254175186157, |
|
"step": 470, |
|
"wo_beta": 14.451654434204102 |
|
}, |
|
{ |
|
"dpo_loss": 0.5512283444404602, |
|
"epoch": 1.34624468587624, |
|
"grad_norm": 15510.676068153169, |
|
"learning_rate": 9.146296599968258e-07, |
|
"logits": -1.334899663925171, |
|
"logps": -84.10041809082031, |
|
"loss": 85.2643, |
|
"objective": 97.66104125976562, |
|
"ranking_idealized": 0.6416666507720947, |
|
"ranking_idealized_expo": 0.574999988079071, |
|
"ranking_simple": 0.6208333373069763, |
|
"regularize": 0.22577306628227234, |
|
"step": 475, |
|
"wo_beta": 16.777812957763672 |
|
}, |
|
{ |
|
"dpo_loss": 0.5490090847015381, |
|
"epoch": 1.360415682569674, |
|
"grad_norm": 13039.859969979723, |
|
"learning_rate": 9.118382907149163e-07, |
|
"logits": -1.396318793296814, |
|
"logps": -84.05583953857422, |
|
"loss": 92.9048, |
|
"objective": 106.32127380371094, |
|
"ranking_idealized": 0.5666666626930237, |
|
"ranking_idealized_expo": 0.4791666567325592, |
|
"ranking_simple": 0.5291666388511658, |
|
"regularize": 0.22388581931591034, |
|
"step": 480, |
|
"wo_beta": 18.35649871826172 |
|
}, |
|
{ |
|
"dpo_loss": 0.55390864610672, |
|
"epoch": 1.3745866792631083, |
|
"grad_norm": 14303.492597277622, |
|
"learning_rate": 9.090064216624092e-07, |
|
"logits": -1.3780549764633179, |
|
"logps": -81.48451232910156, |
|
"loss": 89.9123, |
|
"objective": 85.18955993652344, |
|
"ranking_idealized": 0.574999988079071, |
|
"ranking_idealized_expo": 0.4958333373069763, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.19940294325351715, |
|
"step": 485, |
|
"wo_beta": 16.11130714416504 |
|
}, |
|
{ |
|
"dpo_loss": 0.5646805167198181, |
|
"epoch": 1.3887576759565423, |
|
"grad_norm": 13569.748240897005, |
|
"learning_rate": 9.061343313225087e-07, |
|
"logits": -1.3297451734542847, |
|
"logps": -84.58447265625, |
|
"loss": 91.7915, |
|
"objective": 92.44489288330078, |
|
"ranking_idealized": 0.574999988079071, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.5708333253860474, |
|
"regularize": 0.20839503407478333, |
|
"step": 490, |
|
"wo_beta": 15.887747764587402 |
|
}, |
|
{ |
|
"dpo_loss": 0.5439994931221008, |
|
"epoch": 1.4029286726499763, |
|
"grad_norm": 14224.725006990095, |
|
"learning_rate": 9.032223021337413e-07, |
|
"logits": -1.3493283987045288, |
|
"logps": -84.9798355102539, |
|
"loss": 89.0675, |
|
"objective": 84.06017303466797, |
|
"ranking_idealized": 0.5583333373069763, |
|
"ranking_idealized_expo": 0.4791666567325592, |
|
"ranking_simple": 0.550000011920929, |
|
"regularize": 0.1910681426525116, |
|
"step": 495, |
|
"wo_beta": 15.590251922607422 |
|
}, |
|
{ |
|
"dpo_loss": 0.539610743522644, |
|
"epoch": 1.4170996693434104, |
|
"grad_norm": 14123.937473491551, |
|
"learning_rate": 9.002706204621802e-07, |
|
"logits": -1.278394341468811, |
|
"logps": -83.08454132080078, |
|
"loss": 91.2372, |
|
"objective": 89.69623565673828, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.574999988079071, |
|
"regularize": 0.20472820103168488, |
|
"step": 500, |
|
"wo_beta": 15.177144050598145 |
|
}, |
|
{ |
|
"epoch": 1.4170996693434104, |
|
"eval_dpo_loss": 0.6782248020172119, |
|
"eval_logits": -1.3281084299087524, |
|
"eval_logps": -89.4298095703125, |
|
"eval_loss": 185.71939086914062, |
|
"eval_objective": 180.8789520263672, |
|
"eval_ranking_idealized": 0.6024844646453857, |
|
"eval_ranking_idealized_expo": 0.5232919454574585, |
|
"eval_ranking_simple": 0.534679114818573, |
|
"eval_regularize": 0.41098901629447937, |
|
"eval_runtime": 475.7419, |
|
"eval_samples_per_second": 12.17, |
|
"eval_steps_per_second": 1.015, |
|
"eval_wo_beta": 16.044300079345703, |
|
"step": 500 |
|
}, |
|
{ |
|
"dpo_loss": 0.5400077700614929, |
|
"epoch": 1.4312706660368446, |
|
"grad_norm": 13097.852775439285, |
|
"learning_rate": 8.972795765732846e-07, |
|
"logits": -1.3413732051849365, |
|
"logps": -82.83694458007812, |
|
"loss": 96.4414, |
|
"objective": 99.75823211669922, |
|
"ranking_idealized": 0.625, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.5874999761581421, |
|
"regularize": 0.20853710174560547, |
|
"step": 505, |
|
"wo_beta": 16.662084579467773 |
|
}, |
|
{ |
|
"dpo_loss": 0.5544862151145935, |
|
"epoch": 1.4454416627302786, |
|
"grad_norm": 13707.829813480788, |
|
"learning_rate": 8.942494646033554e-07, |
|
"logits": -1.3700981140136719, |
|
"logps": -84.05197143554688, |
|
"loss": 86.1912, |
|
"objective": 85.1514663696289, |
|
"ranking_idealized": 0.5874999761581421, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.18519388139247894, |
|
"step": 510, |
|
"wo_beta": 15.12604808807373 |
|
}, |
|
{ |
|
"dpo_loss": 0.5474262833595276, |
|
"epoch": 1.4596126594237129, |
|
"grad_norm": 16384.18532468762, |
|
"learning_rate": 8.911805825306096e-07, |
|
"logits": -1.4208234548568726, |
|
"logps": -85.2526626586914, |
|
"loss": 86.2928, |
|
"objective": 94.92349243164062, |
|
"ranking_idealized": 0.550000011920929, |
|
"ranking_idealized_expo": 0.4625000059604645, |
|
"ranking_simple": 0.5249999761581421, |
|
"regularize": 0.21247318387031555, |
|
"step": 515, |
|
"wo_beta": 16.363656997680664 |
|
}, |
|
{ |
|
"dpo_loss": 0.5538729429244995, |
|
"epoch": 1.473783656117147, |
|
"grad_norm": 14068.735921521182, |
|
"learning_rate": 8.880732321458784e-07, |
|
"logits": -1.3451961278915405, |
|
"logps": -81.92323303222656, |
|
"loss": 85.1002, |
|
"objective": 84.61219024658203, |
|
"ranking_idealized": 0.574999988079071, |
|
"ranking_idealized_expo": 0.49166667461395264, |
|
"ranking_simple": 0.5416666865348816, |
|
"regularize": 0.19058094918727875, |
|
"step": 520, |
|
"wo_beta": 16.039138793945312 |
|
}, |
|
{ |
|
"dpo_loss": 0.5211088061332703, |
|
"epoch": 1.487954652810581, |
|
"grad_norm": 12355.643543079665, |
|
"learning_rate": 8.849277190229283e-07, |
|
"logits": -1.2561639547348022, |
|
"logps": -80.8559341430664, |
|
"loss": 87.4323, |
|
"objective": 88.15239715576172, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.6000000238418579, |
|
"regularize": 0.19789734482765198, |
|
"step": 525, |
|
"wo_beta": 15.64743423461914 |
|
}, |
|
{ |
|
"dpo_loss": 0.5529366731643677, |
|
"epoch": 1.5021256495040152, |
|
"grad_norm": 13792.295805387279, |
|
"learning_rate": 8.817443524884117e-07, |
|
"logits": -1.4202781915664673, |
|
"logps": -84.57428741455078, |
|
"loss": 89.1332, |
|
"objective": 93.31535339355469, |
|
"ranking_idealized": 0.574999988079071, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.5708333253860474, |
|
"regularize": 0.20485611259937286, |
|
"step": 530, |
|
"wo_beta": 15.418906211853027 |
|
}, |
|
{ |
|
"dpo_loss": 0.5380304455757141, |
|
"epoch": 1.5162966461974492, |
|
"grad_norm": 12748.671458728879, |
|
"learning_rate": 8.785234455914488e-07, |
|
"logits": -1.4013686180114746, |
|
"logps": -83.34593963623047, |
|
"loss": 86.3246, |
|
"objective": 83.55619812011719, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.5791666507720947, |
|
"regularize": 0.19464156031608582, |
|
"step": 535, |
|
"wo_beta": 15.718771934509277 |
|
}, |
|
{ |
|
"dpo_loss": 0.5602856874465942, |
|
"epoch": 1.5304676428908834, |
|
"grad_norm": 13600.712518077142, |
|
"learning_rate": 8.752653150728411e-07, |
|
"logits": -1.3116246461868286, |
|
"logps": -83.8393783569336, |
|
"loss": 85.7548, |
|
"objective": 85.53334045410156, |
|
"ranking_idealized": 0.6291666626930237, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.5874999761581421, |
|
"regularize": 0.19387957453727722, |
|
"step": 540, |
|
"wo_beta": 15.35750961303711 |
|
}, |
|
{ |
|
"dpo_loss": 0.5608557462692261, |
|
"epoch": 1.5446386395843175, |
|
"grad_norm": 13202.179692261727, |
|
"learning_rate": 8.719702813339247e-07, |
|
"logits": -1.4217339754104614, |
|
"logps": -85.13090515136719, |
|
"loss": 78.3728, |
|
"objective": 73.64112854003906, |
|
"ranking_idealized": 0.6583333611488342, |
|
"ranking_idealized_expo": 0.5874999761581421, |
|
"ranking_simple": 0.6499999761581421, |
|
"regularize": 0.17463207244873047, |
|
"step": 545, |
|
"wo_beta": 14.742905616760254 |
|
}, |
|
{ |
|
"dpo_loss": 0.5433780550956726, |
|
"epoch": 1.5588096362777515, |
|
"grad_norm": 13773.885858068237, |
|
"learning_rate": 8.68638668405062e-07, |
|
"logits": -1.4105440378189087, |
|
"logps": -85.73950958251953, |
|
"loss": 85.7307, |
|
"objective": 91.58641815185547, |
|
"ranking_idealized": 0.5625, |
|
"ranking_idealized_expo": 0.46666666865348816, |
|
"ranking_simple": 0.5291666388511658, |
|
"regularize": 0.20735137164592743, |
|
"step": 550, |
|
"wo_beta": 15.781331062316895 |
|
}, |
|
{ |
|
"epoch": 1.5588096362777515, |
|
"eval_dpo_loss": 0.6799347400665283, |
|
"eval_logits": -1.36829674243927, |
|
"eval_logps": -91.68656921386719, |
|
"eval_loss": 186.22413635253906, |
|
"eval_objective": 182.13821411132812, |
|
"eval_ranking_idealized": 0.6024844646453857, |
|
"eval_ranking_idealized_expo": 0.5232919454574585, |
|
"eval_ranking_simple": 0.533643901348114, |
|
"eval_regularize": 0.4147377014160156, |
|
"eval_runtime": 478.899, |
|
"eval_samples_per_second": 12.09, |
|
"eval_steps_per_second": 1.009, |
|
"eval_wo_beta": 16.186290740966797, |
|
"step": 550 |
|
}, |
|
{ |
|
"dpo_loss": 0.5562130212783813, |
|
"epoch": 1.5729806329711855, |
|
"grad_norm": 13716.988937741002, |
|
"learning_rate": 8.652708039137766e-07, |
|
"logits": -1.2273495197296143, |
|
"logps": -85.2579116821289, |
|
"loss": 90.1931, |
|
"objective": 91.27943420410156, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.19078856706619263, |
|
"step": 555, |
|
"wo_beta": 14.82008171081543 |
|
}, |
|
{ |
|
"dpo_loss": 0.5405426621437073, |
|
"epoch": 1.5871516296646198, |
|
"grad_norm": 13222.290128913079, |
|
"learning_rate": 8.61867019052535e-07, |
|
"logits": -1.3004463911056519, |
|
"logps": -84.03120422363281, |
|
"loss": 82.5026, |
|
"objective": 82.23470306396484, |
|
"ranking_idealized": 0.6083333492279053, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.574999988079071, |
|
"regularize": 0.19372278451919556, |
|
"step": 560, |
|
"wo_beta": 16.210308074951172 |
|
}, |
|
{ |
|
"dpo_loss": 0.545985221862793, |
|
"epoch": 1.601322626358054, |
|
"grad_norm": 13798.95251346989, |
|
"learning_rate": 8.584276485461775e-07, |
|
"logits": -1.2903294563293457, |
|
"logps": -85.43083190917969, |
|
"loss": 87.1773, |
|
"objective": 87.97190856933594, |
|
"ranking_idealized": 0.5458333492279053, |
|
"ranking_idealized_expo": 0.44999998807907104, |
|
"ranking_simple": 0.5458333492279053, |
|
"regularize": 0.18329960107803345, |
|
"step": 565, |
|
"wo_beta": 15.259784698486328 |
|
}, |
|
{ |
|
"dpo_loss": 0.5544782280921936, |
|
"epoch": 1.615493623051488, |
|
"grad_norm": 14310.342902213652, |
|
"learning_rate": 8.549530306190014e-07, |
|
"logits": -1.4501588344573975, |
|
"logps": -85.62173461914062, |
|
"loss": 86.1569, |
|
"objective": 88.04158020019531, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5708333253860474, |
|
"regularize": 0.19149872660636902, |
|
"step": 570, |
|
"wo_beta": 15.673080444335938 |
|
}, |
|
{ |
|
"dpo_loss": 0.5482128858566284, |
|
"epoch": 1.629664619744922, |
|
"grad_norm": 13673.298787796572, |
|
"learning_rate": 8.514435069615004e-07, |
|
"logits": -1.380743384361267, |
|
"logps": -83.26321411132812, |
|
"loss": 78.7831, |
|
"objective": 86.95629119873047, |
|
"ranking_idealized": 0.637499988079071, |
|
"ranking_idealized_expo": 0.5583333373069763, |
|
"ranking_simple": 0.6166666746139526, |
|
"regularize": 0.1899789720773697, |
|
"step": 575, |
|
"wo_beta": 16.415205001831055 |
|
}, |
|
{ |
|
"dpo_loss": 0.5394086241722107, |
|
"epoch": 1.643835616438356, |
|
"grad_norm": 13082.53312626321, |
|
"learning_rate": 8.478994226967638e-07, |
|
"logits": -1.4001491069793701, |
|
"logps": -83.10562133789062, |
|
"loss": 76.6065, |
|
"objective": 77.20848846435547, |
|
"ranking_idealized": 0.6083333492279053, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.6000000238418579, |
|
"regularize": 0.1699313372373581, |
|
"step": 580, |
|
"wo_beta": 14.931032180786133 |
|
}, |
|
{ |
|
"dpo_loss": 0.5450774431228638, |
|
"epoch": 1.6580066131317903, |
|
"grad_norm": 14803.266258769623, |
|
"learning_rate": 8.443211263465362e-07, |
|
"logits": -1.2514622211456299, |
|
"logps": -82.91756439208984, |
|
"loss": 81.1936, |
|
"objective": 78.58777618408203, |
|
"ranking_idealized": 0.5333333611488342, |
|
"ranking_idealized_expo": 0.4625000059604645, |
|
"ranking_simple": 0.5291666388511658, |
|
"regularize": 0.18020884692668915, |
|
"step": 585, |
|
"wo_beta": 16.229631423950195 |
|
}, |
|
{ |
|
"dpo_loss": 0.5452067852020264, |
|
"epoch": 1.6721776098252243, |
|
"grad_norm": 14897.05549715986, |
|
"learning_rate": 8.407089697969456e-07, |
|
"logits": -1.310152530670166, |
|
"logps": -82.58568572998047, |
|
"loss": 84.6601, |
|
"objective": 89.34095764160156, |
|
"ranking_idealized": 0.6208333373069763, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.5874999761581421, |
|
"regularize": 0.18940496444702148, |
|
"step": 590, |
|
"wo_beta": 13.480273246765137 |
|
}, |
|
{ |
|
"dpo_loss": 0.5498862862586975, |
|
"epoch": 1.6863486065186586, |
|
"grad_norm": 13388.885538994262, |
|
"learning_rate": 8.370633082638975e-07, |
|
"logits": -1.2777602672576904, |
|
"logps": -84.25193786621094, |
|
"loss": 85.2501, |
|
"objective": 97.64402770996094, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5375000238418579, |
|
"ranking_simple": 0.5791666507720947, |
|
"regularize": 0.21810217201709747, |
|
"step": 595, |
|
"wo_beta": 17.075584411621094 |
|
}, |
|
{ |
|
"dpo_loss": 0.541688084602356, |
|
"epoch": 1.7005196032120926, |
|
"grad_norm": 12810.439354567186, |
|
"learning_rate": 8.333845002581458e-07, |
|
"logits": -1.3377609252929688, |
|
"logps": -85.63569641113281, |
|
"loss": 79.9458, |
|
"objective": 90.4583740234375, |
|
"ranking_idealized": 0.6333333253860474, |
|
"ranking_idealized_expo": 0.550000011920929, |
|
"ranking_simple": 0.612500011920929, |
|
"regularize": 0.2045913189649582, |
|
"step": 600, |
|
"wo_beta": 16.088045120239258 |
|
}, |
|
{ |
|
"epoch": 1.7005196032120926, |
|
"eval_dpo_loss": 0.6794285774230957, |
|
"eval_logits": -1.3519084453582764, |
|
"eval_logps": -91.084716796875, |
|
"eval_loss": 186.21368408203125, |
|
"eval_objective": 181.86863708496094, |
|
"eval_ranking_idealized": 0.6024844646453857, |
|
"eval_ranking_idealized_expo": 0.5232919454574585, |
|
"eval_ranking_simple": 0.5372670888900757, |
|
"eval_regularize": 0.4135282337665558, |
|
"eval_runtime": 449.0944, |
|
"eval_samples_per_second": 12.893, |
|
"eval_steps_per_second": 1.075, |
|
"eval_wo_beta": 16.10601043701172, |
|
"step": 600 |
|
}, |
|
{ |
|
"dpo_loss": 0.5528364777565002, |
|
"epoch": 1.7146905999055266, |
|
"grad_norm": 12864.49342558613, |
|
"learning_rate": 8.296729075500343e-07, |
|
"logits": -1.2839235067367554, |
|
"logps": -85.77102661132812, |
|
"loss": 81.7288, |
|
"objective": 90.60871124267578, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.5291666388511658, |
|
"ranking_simple": 0.574999988079071, |
|
"regularize": 0.2010929137468338, |
|
"step": 605, |
|
"wo_beta": 18.23944091796875 |
|
}, |
|
{ |
|
"dpo_loss": 0.5559037327766418, |
|
"epoch": 1.7288615965989607, |
|
"grad_norm": 14171.44704590598, |
|
"learning_rate": 8.259288951339232e-07, |
|
"logits": -1.3577406406402588, |
|
"logps": -83.76995086669922, |
|
"loss": 81.4701, |
|
"objective": 75.51998138427734, |
|
"ranking_idealized": 0.6291666626930237, |
|
"ranking_idealized_expo": 0.5708333253860474, |
|
"ranking_simple": 0.6291666626930237, |
|
"regularize": 0.17047205567359924, |
|
"step": 610, |
|
"wo_beta": 16.163959503173828 |
|
}, |
|
{ |
|
"dpo_loss": 0.5623223185539246, |
|
"epoch": 1.743032593292395, |
|
"grad_norm": 14064.695817652162, |
|
"learning_rate": 8.221528311922941e-07, |
|
"logits": -1.3709431886672974, |
|
"logps": -83.62710571289062, |
|
"loss": 84.5652, |
|
"objective": 85.32384490966797, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.5291666388511658, |
|
"ranking_simple": 0.5791666507720947, |
|
"regularize": 0.19118142127990723, |
|
"step": 615, |
|
"wo_beta": 15.722049713134766 |
|
}, |
|
{ |
|
"dpo_loss": 0.5426214933395386, |
|
"epoch": 1.7572035899858292, |
|
"grad_norm": 13161.981948520664, |
|
"learning_rate": 8.183450870595441e-07, |
|
"logits": -1.3993924856185913, |
|
"logps": -83.90966796875, |
|
"loss": 81.5518, |
|
"objective": 84.29554748535156, |
|
"ranking_idealized": 0.6208333373069763, |
|
"ranking_idealized_expo": 0.5416666865348816, |
|
"ranking_simple": 0.6000000238418579, |
|
"regularize": 0.19568467140197754, |
|
"step": 620, |
|
"wo_beta": 16.582374572753906 |
|
}, |
|
{ |
|
"dpo_loss": 0.5548843145370483, |
|
"epoch": 1.7713745866792632, |
|
"grad_norm": 13578.593083281268, |
|
"learning_rate": 8.145060371854691e-07, |
|
"logits": -1.3166680335998535, |
|
"logps": -83.37279510498047, |
|
"loss": 77.6344, |
|
"objective": 80.62175750732422, |
|
"ranking_idealized": 0.6083333492279053, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.5916666388511658, |
|
"regularize": 0.17566385865211487, |
|
"step": 625, |
|
"wo_beta": 15.19571304321289 |
|
}, |
|
{ |
|
"dpo_loss": 0.548730194568634, |
|
"epoch": 1.7855455833726972, |
|
"grad_norm": 12867.261945978005, |
|
"learning_rate": 8.106360590984404e-07, |
|
"logits": -1.3329386711120605, |
|
"logps": -85.60625457763672, |
|
"loss": 75.8762, |
|
"objective": 75.14217376708984, |
|
"ranking_idealized": 0.574999988079071, |
|
"ranking_idealized_expo": 0.49166667461395264, |
|
"ranking_simple": 0.5583333373069763, |
|
"regularize": 0.17412720620632172, |
|
"step": 630, |
|
"wo_beta": 16.33298110961914 |
|
}, |
|
{ |
|
"dpo_loss": 0.5529462695121765, |
|
"epoch": 1.7997165800661312, |
|
"grad_norm": 12432.106461076137, |
|
"learning_rate": 8.067355333682797e-07, |
|
"logits": -1.4188921451568604, |
|
"logps": -84.8874282836914, |
|
"loss": 78.6516, |
|
"objective": 76.64624786376953, |
|
"ranking_idealized": 0.637499988079071, |
|
"ranking_idealized_expo": 0.5291666388511658, |
|
"ranking_simple": 0.5916666388511658, |
|
"regularize": 0.17813840508460999, |
|
"step": 635, |
|
"wo_beta": 16.95586395263672 |
|
}, |
|
{ |
|
"dpo_loss": 0.5410430431365967, |
|
"epoch": 1.8138875767595655, |
|
"grad_norm": 12324.183379735212, |
|
"learning_rate": 8.028048435688333e-07, |
|
"logits": -1.3641606569290161, |
|
"logps": -85.47127532958984, |
|
"loss": 78.7118, |
|
"objective": 82.21182250976562, |
|
"ranking_idealized": 0.5666666626930237, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.18489192426204681, |
|
"step": 640, |
|
"wo_beta": 17.22258186340332 |
|
}, |
|
{ |
|
"dpo_loss": 0.5470555424690247, |
|
"epoch": 1.8280585734529995, |
|
"grad_norm": 13971.672253595729, |
|
"learning_rate": 7.988443762402523e-07, |
|
"logits": -1.4050637483596802, |
|
"logps": -85.07406616210938, |
|
"loss": 78.6084, |
|
"objective": 74.21890258789062, |
|
"ranking_idealized": 0.6416666507720947, |
|
"ranking_idealized_expo": 0.550000011920929, |
|
"ranking_simple": 0.6291666626930237, |
|
"regularize": 0.16714391112327576, |
|
"step": 645, |
|
"wo_beta": 16.80624008178711 |
|
}, |
|
{ |
|
"dpo_loss": 0.5424051880836487, |
|
"epoch": 1.8422295701464337, |
|
"grad_norm": 15285.601428700493, |
|
"learning_rate": 7.948545208509811e-07, |
|
"logits": -1.440900444984436, |
|
"logps": -84.5870590209961, |
|
"loss": 86.7578, |
|
"objective": 89.12664031982422, |
|
"ranking_idealized": 0.612500011920929, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.6000000238418579, |
|
"regularize": 0.18858183920383453, |
|
"step": 650, |
|
"wo_beta": 15.562705039978027 |
|
}, |
|
{ |
|
"epoch": 1.8422295701464337, |
|
"eval_dpo_loss": 0.6796835660934448, |
|
"eval_logits": -1.3402661085128784, |
|
"eval_logps": -89.40703582763672, |
|
"eval_loss": 186.7196044921875, |
|
"eval_objective": 182.49703979492188, |
|
"eval_ranking_idealized": 0.6024844646453857, |
|
"eval_ranking_idealized_expo": 0.5232919454574585, |
|
"eval_ranking_simple": 0.531573474407196, |
|
"eval_regularize": 0.4141009747982025, |
|
"eval_runtime": 450.1436, |
|
"eval_samples_per_second": 12.863, |
|
"eval_steps_per_second": 1.073, |
|
"eval_wo_beta": 16.0269832611084, |
|
"step": 650 |
|
}, |
|
{ |
|
"dpo_loss": 0.5390594601631165, |
|
"epoch": 1.8564005668398678, |
|
"grad_norm": 14945.717954531257, |
|
"learning_rate": 7.90835669759456e-07, |
|
"logits": -1.292981505393982, |
|
"logps": -81.8280029296875, |
|
"loss": 79.8064, |
|
"objective": 77.88701629638672, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.48750001192092896, |
|
"ranking_simple": 0.6000000238418579, |
|
"regularize": 0.18046139180660248, |
|
"step": 655, |
|
"wo_beta": 15.520308494567871 |
|
}, |
|
{ |
|
"dpo_loss": 0.5524376034736633, |
|
"epoch": 1.8705715635333018, |
|
"grad_norm": 12956.308969791295, |
|
"learning_rate": 7.86788218175523e-07, |
|
"logits": -1.3386873006820679, |
|
"logps": -84.97721862792969, |
|
"loss": 77.9731, |
|
"objective": 77.8855972290039, |
|
"ranking_idealized": 0.6041666865348816, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.6041666865348816, |
|
"regularize": 0.17455393075942993, |
|
"step": 660, |
|
"wo_beta": 17.077417373657227 |
|
}, |
|
{ |
|
"dpo_loss": 0.562981903553009, |
|
"epoch": 1.8847425602267358, |
|
"grad_norm": 12832.376229580192, |
|
"learning_rate": 7.827125641215718e-07, |
|
"logits": -1.334754228591919, |
|
"logps": -83.5533447265625, |
|
"loss": 82.4367, |
|
"objective": 85.92207336425781, |
|
"ranking_idealized": 0.6333333253860474, |
|
"ranking_idealized_expo": 0.550000011920929, |
|
"ranking_simple": 0.625, |
|
"regularize": 0.1833416372537613, |
|
"step": 665, |
|
"wo_beta": 15.230433464050293 |
|
}, |
|
{ |
|
"dpo_loss": 0.548839807510376, |
|
"epoch": 1.89891355692017, |
|
"grad_norm": 13460.183191194346, |
|
"learning_rate": 7.786091083933949e-07, |
|
"logits": -1.273821473121643, |
|
"logps": -81.98705291748047, |
|
"loss": 71.3613, |
|
"objective": 68.62953186035156, |
|
"ranking_idealized": 0.5666666626930237, |
|
"ranking_idealized_expo": 0.4791666567325592, |
|
"ranking_simple": 0.5375000238418579, |
|
"regularize": 0.16619008779525757, |
|
"step": 670, |
|
"wo_beta": 16.408151626586914 |
|
}, |
|
{ |
|
"dpo_loss": 0.5611833930015564, |
|
"epoch": 1.9130845536136043, |
|
"grad_norm": 12953.446893922981, |
|
"learning_rate": 7.744782545207744e-07, |
|
"logits": -1.2947652339935303, |
|
"logps": -83.05793762207031, |
|
"loss": 71.3196, |
|
"objective": 74.63235473632812, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.6083333492279053, |
|
"regularize": 0.16350051760673523, |
|
"step": 675, |
|
"wo_beta": 15.741961479187012 |
|
}, |
|
{ |
|
"dpo_loss": 0.5451231598854065, |
|
"epoch": 1.9272555503070383, |
|
"grad_norm": 13412.02601484903, |
|
"learning_rate": 7.703204087277988e-07, |
|
"logits": -1.3697810173034668, |
|
"logps": -85.1467056274414, |
|
"loss": 71.5185, |
|
"objective": 70.06403350830078, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.6000000238418579, |
|
"regularize": 0.16510257124900818, |
|
"step": 680, |
|
"wo_beta": 15.431069374084473 |
|
}, |
|
{ |
|
"dpo_loss": 0.5437536835670471, |
|
"epoch": 1.9414265470004723, |
|
"grad_norm": 13070.654673150682, |
|
"learning_rate": 7.661359798929152e-07, |
|
"logits": -1.2984110116958618, |
|
"logps": -82.4813003540039, |
|
"loss": 72.6279, |
|
"objective": 63.83388137817383, |
|
"ranking_idealized": 0.5291666388511658, |
|
"ranking_idealized_expo": 0.4791666567325592, |
|
"ranking_simple": 0.5333333611488342, |
|
"regularize": 0.1474105566740036, |
|
"step": 685, |
|
"wo_beta": 15.765579223632812 |
|
}, |
|
{ |
|
"dpo_loss": 0.5520148873329163, |
|
"epoch": 1.9555975436939064, |
|
"grad_norm": 13585.612422979371, |
|
"learning_rate": 7.619253795087208e-07, |
|
"logits": -1.3621736764907837, |
|
"logps": -83.20579528808594, |
|
"loss": 70.4149, |
|
"objective": 71.44465637207031, |
|
"ranking_idealized": 0.6041666865348816, |
|
"ranking_idealized_expo": 0.5583333373069763, |
|
"ranking_simple": 0.5916666388511658, |
|
"regularize": 0.15733769536018372, |
|
"step": 690, |
|
"wo_beta": 16.008506774902344 |
|
}, |
|
{ |
|
"dpo_loss": 0.5521395802497864, |
|
"epoch": 1.9697685403873406, |
|
"grad_norm": 12626.830880791873, |
|
"learning_rate": 7.576890216414972e-07, |
|
"logits": -1.2345752716064453, |
|
"logps": -84.00497436523438, |
|
"loss": 69.938, |
|
"objective": 70.55232238769531, |
|
"ranking_idealized": 0.5666666626930237, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.5541666746139526, |
|
"regularize": 0.15369382500648499, |
|
"step": 695, |
|
"wo_beta": 16.505474090576172 |
|
}, |
|
{ |
|
"dpo_loss": 0.5477771759033203, |
|
"epoch": 1.9839395370807746, |
|
"grad_norm": 14507.10563022748, |
|
"learning_rate": 7.534273228904915e-07, |
|
"logits": -1.2208502292633057, |
|
"logps": -84.28005981445312, |
|
"loss": 76.2665, |
|
"objective": 85.08452606201172, |
|
"ranking_idealized": 0.6041666865348816, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.574999988079071, |
|
"regularize": 0.1893630176782608, |
|
"step": 700, |
|
"wo_beta": 15.212244987487793 |
|
}, |
|
{ |
|
"epoch": 1.9839395370807746, |
|
"eval_dpo_loss": 0.6800020337104797, |
|
"eval_logits": -1.3222942352294922, |
|
"eval_logps": -89.5856704711914, |
|
"eval_loss": 186.28018188476562, |
|
"eval_objective": 182.39332580566406, |
|
"eval_ranking_idealized": 0.6024844646453857, |
|
"eval_ranking_idealized_expo": 0.5232919454574585, |
|
"eval_ranking_simple": 0.5310559272766113, |
|
"eval_regularize": 0.4136333167552948, |
|
"eval_runtime": 489.8617, |
|
"eval_samples_per_second": 11.82, |
|
"eval_steps_per_second": 0.986, |
|
"eval_wo_beta": 16.111663818359375, |
|
"step": 700 |
|
}, |
|
{ |
|
"dpo_loss": 0.5436014533042908, |
|
"epoch": 1.9981105337742089, |
|
"grad_norm": 14761.110739737924, |
|
"learning_rate": 7.49140702346948e-07, |
|
"logits": -1.1587742567062378, |
|
"logps": -83.4106216430664, |
|
"loss": 71.0478, |
|
"objective": 77.40288543701172, |
|
"ranking_idealized": 0.5916666388511658, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.5541666746139526, |
|
"regularize": 0.1687079817056656, |
|
"step": 705, |
|
"wo_beta": 17.46946907043457 |
|
}, |
|
{ |
|
"dpo_loss": 0.5400715470314026, |
|
"epoch": 2.012281530467643, |
|
"grad_norm": 13854.290443619322, |
|
"learning_rate": 7.448295815528956e-07, |
|
"logits": -1.3091672658920288, |
|
"logps": -83.20928192138672, |
|
"loss": 68.6235, |
|
"objective": 74.59980773925781, |
|
"ranking_idealized": 0.6083333492279053, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.6000000238418579, |
|
"regularize": 0.15744589269161224, |
|
"step": 710, |
|
"wo_beta": 16.282772064208984 |
|
}, |
|
{ |
|
"dpo_loss": 0.5266523957252502, |
|
"epoch": 2.026452527161077, |
|
"grad_norm": 12772.47402835887, |
|
"learning_rate": 7.404943844596938e-07, |
|
"logits": -1.3287214040756226, |
|
"logps": -82.50818634033203, |
|
"loss": 67.4219, |
|
"objective": 67.50071716308594, |
|
"ranking_idealized": 0.6791666746139526, |
|
"ranking_idealized_expo": 0.5708333253860474, |
|
"ranking_simple": 0.6625000238418579, |
|
"regularize": 0.15344351530075073, |
|
"step": 715, |
|
"wo_beta": 15.63277816772461 |
|
}, |
|
{ |
|
"dpo_loss": 0.5342952013015747, |
|
"epoch": 2.040623523854511, |
|
"grad_norm": 12280.29550374932, |
|
"learning_rate": 7.361355373863413e-07, |
|
"logits": -1.3206126689910889, |
|
"logps": -83.4239273071289, |
|
"loss": 65.7671, |
|
"objective": 62.988590240478516, |
|
"ranking_idealized": 0.6291666626930237, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.6041666865348816, |
|
"regularize": 0.13976921141147614, |
|
"step": 720, |
|
"wo_beta": 16.120634078979492 |
|
}, |
|
{ |
|
"dpo_loss": 0.5416182279586792, |
|
"epoch": 2.0547945205479454, |
|
"grad_norm": 11934.95995024634, |
|
"learning_rate": 7.317534689775527e-07, |
|
"logits": -1.329419732093811, |
|
"logps": -86.18152618408203, |
|
"loss": 73.1378, |
|
"objective": 77.66006469726562, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.4958333373069763, |
|
"ranking_simple": 0.574999988079071, |
|
"regularize": 0.1658337563276291, |
|
"step": 725, |
|
"wo_beta": 14.640992164611816 |
|
}, |
|
{ |
|
"dpo_loss": 0.5336278080940247, |
|
"epoch": 2.0689655172413794, |
|
"grad_norm": 13017.829141332633, |
|
"learning_rate": 7.273486101616056e-07, |
|
"logits": -1.4032765626907349, |
|
"logps": -83.5689468383789, |
|
"loss": 73.2891, |
|
"objective": 73.26839447021484, |
|
"ranking_idealized": 0.5791666507720947, |
|
"ranking_idealized_expo": 0.49166667461395264, |
|
"ranking_simple": 0.5874999761581421, |
|
"regularize": 0.15773232281208038, |
|
"step": 730, |
|
"wo_beta": 15.76942253112793 |
|
}, |
|
{ |
|
"dpo_loss": 0.5291448831558228, |
|
"epoch": 2.0831365139348135, |
|
"grad_norm": 13426.194750558408, |
|
"learning_rate": 7.229213941079639e-07, |
|
"logits": -1.3250770568847656, |
|
"logps": -82.74713897705078, |
|
"loss": 59.2054, |
|
"objective": 57.16627883911133, |
|
"ranking_idealized": 0.5625, |
|
"ranking_idealized_expo": 0.47083333134651184, |
|
"ranking_simple": 0.550000011920929, |
|
"regularize": 0.14032262563705444, |
|
"step": 735, |
|
"wo_beta": 17.056970596313477 |
|
}, |
|
{ |
|
"dpo_loss": 0.5465752482414246, |
|
"epoch": 2.0973075106282475, |
|
"grad_norm": 11906.26841829341, |
|
"learning_rate": 7.184722561846797e-07, |
|
"logits": -1.3804094791412354, |
|
"logps": -82.77980041503906, |
|
"loss": 62.2469, |
|
"objective": 65.71126556396484, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.6083333492279053, |
|
"regularize": 0.14654967188835144, |
|
"step": 740, |
|
"wo_beta": 15.721449851989746 |
|
}, |
|
{ |
|
"dpo_loss": 0.5360319018363953, |
|
"epoch": 2.1114785073216815, |
|
"grad_norm": 13337.057180758171, |
|
"learning_rate": 7.14001633915581e-07, |
|
"logits": -1.313341498374939, |
|
"logps": -83.15229797363281, |
|
"loss": 60.0244, |
|
"objective": 60.3892822265625, |
|
"ranking_idealized": 0.6333333253860474, |
|
"ranking_idealized_expo": 0.5583333373069763, |
|
"ranking_simple": 0.625, |
|
"regularize": 0.13975684344768524, |
|
"step": 745, |
|
"wo_beta": 15.697921752929688 |
|
}, |
|
{ |
|
"dpo_loss": 0.5399072170257568, |
|
"epoch": 2.1256495040151155, |
|
"grad_norm": 13331.418550163386, |
|
"learning_rate": 7.095099669372443e-07, |
|
"logits": -1.3453633785247803, |
|
"logps": -82.3453369140625, |
|
"loss": 65.1575, |
|
"objective": 60.51906967163086, |
|
"ranking_idealized": 0.574999988079071, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.1316269487142563, |
|
"step": 750, |
|
"wo_beta": 15.831055641174316 |
|
}, |
|
{ |
|
"epoch": 2.1256495040151155, |
|
"eval_dpo_loss": 0.6806153059005737, |
|
"eval_logits": -1.3253074884414673, |
|
"eval_logps": -90.24537658691406, |
|
"eval_loss": 188.15711975097656, |
|
"eval_objective": 184.20758056640625, |
|
"eval_ranking_idealized": 0.6024844646453857, |
|
"eval_ranking_idealized_expo": 0.5232919454574585, |
|
"eval_ranking_simple": 0.5320910811424255, |
|
"eval_regularize": 0.4179456830024719, |
|
"eval_runtime": 478.2913, |
|
"eval_samples_per_second": 12.106, |
|
"eval_steps_per_second": 1.01, |
|
"eval_wo_beta": 15.917864799499512, |
|
"step": 750 |
|
}, |
|
{ |
|
"dpo_loss": 0.5414224863052368, |
|
"epoch": 2.13982050070855, |
|
"grad_norm": 14942.893679399409, |
|
"learning_rate": 7.049976969557623e-07, |
|
"logits": -1.3125241994857788, |
|
"logps": -85.55477905273438, |
|
"loss": 70.5458, |
|
"objective": 72.25684356689453, |
|
"ranking_idealized": 0.5916666388511658, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5874999761581421, |
|
"regularize": 0.15286041796207428, |
|
"step": 755, |
|
"wo_beta": 16.596240997314453 |
|
}, |
|
{ |
|
"dpo_loss": 0.5502544641494751, |
|
"epoch": 2.153991497401984, |
|
"grad_norm": 14884.220119069658, |
|
"learning_rate": 7.004652677033068e-07, |
|
"logits": -1.2573704719543457, |
|
"logps": -81.78999328613281, |
|
"loss": 66.5347, |
|
"objective": 56.669010162353516, |
|
"ranking_idealized": 0.5958333611488342, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.1313803344964981, |
|
"step": 760, |
|
"wo_beta": 14.706622123718262 |
|
}, |
|
{ |
|
"dpo_loss": 0.537317156791687, |
|
"epoch": 2.168162494095418, |
|
"grad_norm": 12849.6702201699, |
|
"learning_rate": 6.959131248944922e-07, |
|
"logits": -1.3043426275253296, |
|
"logps": -82.6404037475586, |
|
"loss": 60.5154, |
|
"objective": 57.57880401611328, |
|
"ranking_idealized": 0.6208333373069763, |
|
"ranking_idealized_expo": 0.5375000238418579, |
|
"ranking_simple": 0.6041666865348816, |
|
"regularize": 0.13467958569526672, |
|
"step": 765, |
|
"wo_beta": 16.29267120361328 |
|
}, |
|
{ |
|
"dpo_loss": 0.5396389365196228, |
|
"epoch": 2.182333490788852, |
|
"grad_norm": 13686.597971217428, |
|
"learning_rate": 6.913417161825449e-07, |
|
"logits": -1.3148149251937866, |
|
"logps": -82.22266387939453, |
|
"loss": 66.4186, |
|
"objective": 71.55656433105469, |
|
"ranking_idealized": 0.5916666388511658, |
|
"ranking_idealized_expo": 0.49166667461395264, |
|
"ranking_simple": 0.574999988079071, |
|
"regularize": 0.15875324606895447, |
|
"step": 770, |
|
"wo_beta": 14.279667854309082 |
|
}, |
|
{ |
|
"dpo_loss": 0.5356777906417847, |
|
"epoch": 2.196504487482286, |
|
"grad_norm": 13109.133649943296, |
|
"learning_rate": 6.867514911152806e-07, |
|
"logits": -1.279820203781128, |
|
"logps": -82.98641204833984, |
|
"loss": 62.1208, |
|
"objective": 65.08477020263672, |
|
"ranking_idealized": 0.6291666626930237, |
|
"ranking_idealized_expo": 0.5791666507720947, |
|
"ranking_simple": 0.625, |
|
"regularize": 0.14647550880908966, |
|
"step": 775, |
|
"wo_beta": 17.69573211669922 |
|
}, |
|
{ |
|
"dpo_loss": 0.5467700362205505, |
|
"epoch": 2.21067548417572, |
|
"grad_norm": 13977.878251046886, |
|
"learning_rate": 6.821429010908971e-07, |
|
"logits": -1.2058584690093994, |
|
"logps": -82.53013610839844, |
|
"loss": 63.1931, |
|
"objective": 62.46464538574219, |
|
"ranking_idealized": 0.512499988079071, |
|
"ranking_idealized_expo": 0.44583332538604736, |
|
"ranking_simple": 0.5041666626930237, |
|
"regularize": 0.13306237757205963, |
|
"step": 780, |
|
"wo_beta": 15.349116325378418 |
|
}, |
|
{ |
|
"dpo_loss": 0.5252477526664734, |
|
"epoch": 2.2248464808691546, |
|
"grad_norm": 13522.027705329157, |
|
"learning_rate": 6.775163993135842e-07, |
|
"logits": -1.20766019821167, |
|
"logps": -81.99567413330078, |
|
"loss": 66.8492, |
|
"objective": 59.73252487182617, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.4791666567325592, |
|
"ranking_simple": 0.5708333253860474, |
|
"regularize": 0.13541431725025177, |
|
"step": 785, |
|
"wo_beta": 15.272583961486816 |
|
}, |
|
{ |
|
"dpo_loss": 0.5247431993484497, |
|
"epoch": 2.2390174775625886, |
|
"grad_norm": 12425.328833284808, |
|
"learning_rate": 6.728724407489553e-07, |
|
"logits": -1.205735445022583, |
|
"logps": -82.88821411132812, |
|
"loss": 66.8893, |
|
"objective": 59.76982498168945, |
|
"ranking_idealized": 0.637499988079071, |
|
"ranking_idealized_expo": 0.5625, |
|
"ranking_simple": 0.637499988079071, |
|
"regularize": 0.14265631139278412, |
|
"step": 790, |
|
"wo_beta": 15.509627342224121 |
|
}, |
|
{ |
|
"dpo_loss": 0.5296502113342285, |
|
"epoch": 2.2531884742560226, |
|
"grad_norm": 11978.127680414538, |
|
"learning_rate": 6.682114820793074e-07, |
|
"logits": -1.2859066724777222, |
|
"logps": -84.08002471923828, |
|
"loss": 63.7577, |
|
"objective": 59.34935760498047, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.612500011920929, |
|
"regularize": 0.14149998128414154, |
|
"step": 795, |
|
"wo_beta": 14.346338272094727 |
|
}, |
|
{ |
|
"dpo_loss": 0.5199058651924133, |
|
"epoch": 2.2673594709494567, |
|
"grad_norm": 12421.855115848897, |
|
"learning_rate": 6.635339816587108e-07, |
|
"logits": -1.3125, |
|
"logps": -83.2691879272461, |
|
"loss": 66.0375, |
|
"objective": 66.00747680664062, |
|
"ranking_idealized": 0.5916666388511658, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.6041666865348816, |
|
"regularize": 0.14774902164936066, |
|
"step": 800, |
|
"wo_beta": 14.81782341003418 |
|
}, |
|
{ |
|
"epoch": 2.2673594709494567, |
|
"eval_dpo_loss": 0.6780735850334167, |
|
"eval_logits": -1.3137409687042236, |
|
"eval_logps": -88.58743286132812, |
|
"eval_loss": 186.72210693359375, |
|
"eval_objective": 181.93551635742188, |
|
"eval_ranking_idealized": 0.6024844646453857, |
|
"eval_ranking_idealized_expo": 0.5232919454574585, |
|
"eval_ranking_simple": 0.533643901348114, |
|
"eval_regularize": 0.4137285053730011, |
|
"eval_runtime": 481.1373, |
|
"eval_samples_per_second": 12.034, |
|
"eval_steps_per_second": 1.004, |
|
"eval_wo_beta": 15.987866401672363, |
|
"step": 800 |
|
}, |
|
{ |
|
"dpo_loss": 0.5357646942138672, |
|
"epoch": 2.2815304676428907, |
|
"grad_norm": 12458.299460461743, |
|
"learning_rate": 6.588403994679354e-07, |
|
"logits": -1.319643497467041, |
|
"logps": -81.99591827392578, |
|
"loss": 60.8943, |
|
"objective": 64.13407135009766, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.574999988079071, |
|
"regularize": 0.13611546158790588, |
|
"step": 805, |
|
"wo_beta": 16.0935001373291 |
|
}, |
|
{ |
|
"dpo_loss": 0.5283416509628296, |
|
"epoch": 2.295701464336325, |
|
"grad_norm": 12943.281420533918, |
|
"learning_rate": 6.541311970692162e-07, |
|
"logits": -1.4129080772399902, |
|
"logps": -81.64440155029297, |
|
"loss": 61.2974, |
|
"objective": 61.06173324584961, |
|
"ranking_idealized": 0.6458333134651184, |
|
"ranking_idealized_expo": 0.5458333492279053, |
|
"ranking_simple": 0.6499999761581421, |
|
"regularize": 0.13648824393749237, |
|
"step": 810, |
|
"wo_beta": 14.963865280151367 |
|
}, |
|
{ |
|
"dpo_loss": 0.5351440906524658, |
|
"epoch": 2.309872461029759, |
|
"grad_norm": 12894.991014128658, |
|
"learning_rate": 6.494068375608646e-07, |
|
"logits": -1.352980136871338, |
|
"logps": -83.23399353027344, |
|
"loss": 60.8069, |
|
"objective": 63.9875602722168, |
|
"ranking_idealized": 0.5791666507720947, |
|
"ranking_idealized_expo": 0.47083333134651184, |
|
"ranking_simple": 0.5791666507720947, |
|
"regularize": 0.13894489407539368, |
|
"step": 815, |
|
"wo_beta": 15.326094627380371 |
|
}, |
|
{ |
|
"dpo_loss": 0.5325611233711243, |
|
"epoch": 2.324043457723193, |
|
"grad_norm": 12345.798302601574, |
|
"learning_rate": 6.446677855317264e-07, |
|
"logits": -1.2916339635849, |
|
"logps": -81.8837661743164, |
|
"loss": 59.9305, |
|
"objective": 55.95283126831055, |
|
"ranking_idealized": 0.5874999761581421, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.5708333253860474, |
|
"regularize": 0.12038219720125198, |
|
"step": 820, |
|
"wo_beta": 15.182144165039062 |
|
}, |
|
{ |
|
"dpo_loss": 0.5271125435829163, |
|
"epoch": 2.3382144544166272, |
|
"grad_norm": 12783.217599288302, |
|
"learning_rate": 6.39914507015496e-07, |
|
"logits": -1.3013333082199097, |
|
"logps": -81.13337707519531, |
|
"loss": 58.233, |
|
"objective": 62.38441467285156, |
|
"ranking_idealized": 0.625, |
|
"ranking_idealized_expo": 0.550000011920929, |
|
"ranking_simple": 0.612500011920929, |
|
"regularize": 0.14106927812099457, |
|
"step": 825, |
|
"wo_beta": 16.586782455444336 |
|
}, |
|
{ |
|
"dpo_loss": 0.5309893488883972, |
|
"epoch": 2.3523854511100613, |
|
"grad_norm": 14368.93982814313, |
|
"learning_rate": 6.351474694448864e-07, |
|
"logits": -1.2905962467193604, |
|
"logps": -83.69612121582031, |
|
"loss": 59.3517, |
|
"objective": 62.03671646118164, |
|
"ranking_idealized": 0.6416666507720947, |
|
"ranking_idealized_expo": 0.5416666865348816, |
|
"ranking_simple": 0.6416666507720947, |
|
"regularize": 0.13450145721435547, |
|
"step": 830, |
|
"wo_beta": 16.384456634521484 |
|
}, |
|
{ |
|
"dpo_loss": 0.5386961102485657, |
|
"epoch": 2.3665564478034957, |
|
"grad_norm": 12278.034874198123, |
|
"learning_rate": 6.303671416056621e-07, |
|
"logits": -1.2532858848571777, |
|
"logps": -83.63367462158203, |
|
"loss": 63.5605, |
|
"objective": 61.1205940246582, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.6041666865348816, |
|
"regularize": 0.1340387463569641, |
|
"step": 835, |
|
"wo_beta": 15.07408618927002 |
|
}, |
|
{ |
|
"dpo_loss": 0.5518457293510437, |
|
"epoch": 2.3807274444969297, |
|
"grad_norm": 12325.077561512098, |
|
"learning_rate": 6.255739935905395e-07, |
|
"logits": -1.222998023033142, |
|
"logps": -83.31403350830078, |
|
"loss": 56.4779, |
|
"objective": 54.8234977722168, |
|
"ranking_idealized": 0.5249999761581421, |
|
"ranking_idealized_expo": 0.4749999940395355, |
|
"ranking_simple": 0.5041666626930237, |
|
"regularize": 0.12345383316278458, |
|
"step": 840, |
|
"wo_beta": 15.817675590515137 |
|
}, |
|
{ |
|
"dpo_loss": 0.5455399751663208, |
|
"epoch": 2.3948984411903638, |
|
"grad_norm": 14534.352470484577, |
|
"learning_rate": 6.207684967529592e-07, |
|
"logits": -1.2789770364761353, |
|
"logps": -84.17676544189453, |
|
"loss": 61.3501, |
|
"objective": 56.92399978637695, |
|
"ranking_idealized": 0.5958333611488342, |
|
"ranking_idealized_expo": 0.5416666865348816, |
|
"ranking_simple": 0.5958333611488342, |
|
"regularize": 0.12513183057308197, |
|
"step": 845, |
|
"wo_beta": 16.274921417236328 |
|
}, |
|
{ |
|
"dpo_loss": 0.5384249091148376, |
|
"epoch": 2.409069437883798, |
|
"grad_norm": 11975.067630184618, |
|
"learning_rate": 6.159511236607315e-07, |
|
"logits": -1.3067547082901, |
|
"logps": -81.92616271972656, |
|
"loss": 55.6773, |
|
"objective": 53.89519500732422, |
|
"ranking_idealized": 0.5874999761581421, |
|
"ranking_idealized_expo": 0.49166667461395264, |
|
"ranking_simple": 0.5541666746139526, |
|
"regularize": 0.12293127924203873, |
|
"step": 850, |
|
"wo_beta": 15.953167915344238 |
|
}, |
|
{ |
|
"epoch": 2.409069437883798, |
|
"eval_dpo_loss": 0.680902361869812, |
|
"eval_logits": -1.311160922050476, |
|
"eval_logps": -88.26885986328125, |
|
"eval_loss": 189.5397491455078, |
|
"eval_objective": 185.2095947265625, |
|
"eval_ranking_idealized": 0.6024844646453857, |
|
"eval_ranking_idealized_expo": 0.5232919454574585, |
|
"eval_ranking_simple": 0.5300207138061523, |
|
"eval_regularize": 0.42031434178352356, |
|
"eval_runtime": 490.419, |
|
"eval_samples_per_second": 11.806, |
|
"eval_steps_per_second": 0.985, |
|
"eval_wo_beta": 15.931052207946777, |
|
"step": 850 |
|
}, |
|
{ |
|
"dpo_loss": 0.5426651239395142, |
|
"epoch": 2.423240434577232, |
|
"grad_norm": 13056.278516188751, |
|
"learning_rate": 6.111223480495671e-07, |
|
"logits": -1.3305928707122803, |
|
"logps": -80.8778076171875, |
|
"loss": 60.7771, |
|
"objective": 57.707275390625, |
|
"ranking_idealized": 0.5916666388511658, |
|
"ranking_idealized_expo": 0.5291666388511658, |
|
"ranking_simple": 0.5916666388511658, |
|
"regularize": 0.1298539638519287, |
|
"step": 855, |
|
"wo_beta": 15.123750686645508 |
|
}, |
|
{ |
|
"dpo_loss": 0.537179172039032, |
|
"epoch": 2.4374114312706663, |
|
"grad_norm": 13276.37666715339, |
|
"learning_rate": 6.062826447764883e-07, |
|
"logits": -1.2815066576004028, |
|
"logps": -82.55672454833984, |
|
"loss": 55.8238, |
|
"objective": 53.87760925292969, |
|
"ranking_idealized": 0.5874999761581421, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.6000000238418579, |
|
"regularize": 0.12577569484710693, |
|
"step": 860, |
|
"wo_beta": 16.197458267211914 |
|
}, |
|
{ |
|
"dpo_loss": 0.53245609998703, |
|
"epoch": 2.4515824279641003, |
|
"grad_norm": 13115.296464572477, |
|
"learning_rate": 6.014324897731333e-07, |
|
"logits": -1.305693507194519, |
|
"logps": -81.65880584716797, |
|
"loss": 57.2162, |
|
"objective": 57.622314453125, |
|
"ranking_idealized": 0.5791666507720947, |
|
"ranking_idealized_expo": 0.4958333373069763, |
|
"ranking_simple": 0.5708333253860474, |
|
"regularize": 0.12618619203567505, |
|
"step": 865, |
|
"wo_beta": 16.600849151611328 |
|
}, |
|
{ |
|
"dpo_loss": 0.53475421667099, |
|
"epoch": 2.4657534246575343, |
|
"grad_norm": 13057.72282671728, |
|
"learning_rate": 5.965723599989528e-07, |
|
"logits": -1.347506046295166, |
|
"logps": -82.02439880371094, |
|
"loss": 59.1596, |
|
"objective": 58.05669403076172, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.5916666388511658, |
|
"regularize": 0.12966732680797577, |
|
"step": 870, |
|
"wo_beta": 15.612308502197266 |
|
}, |
|
{ |
|
"dpo_loss": 0.5284960865974426, |
|
"epoch": 2.4799244213509684, |
|
"grad_norm": 13136.725552830958, |
|
"learning_rate": 5.917027333943072e-07, |
|
"logits": -1.2931278944015503, |
|
"logps": -82.28563690185547, |
|
"loss": 52.9771, |
|
"objective": 52.34040069580078, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5708333253860474, |
|
"ranking_simple": 0.6041666865348816, |
|
"regularize": 0.12045804411172867, |
|
"step": 875, |
|
"wo_beta": 17.299848556518555 |
|
}, |
|
{ |
|
"dpo_loss": 0.5465295910835266, |
|
"epoch": 2.4940954180444024, |
|
"grad_norm": 12177.118012490373, |
|
"learning_rate": 5.868240888334652e-07, |
|
"logits": -1.206485390663147, |
|
"logps": -82.52658081054688, |
|
"loss": 59.4905, |
|
"objective": 58.06962203979492, |
|
"ranking_idealized": 0.5625, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.574999988079071, |
|
"regularize": 0.13045351207256317, |
|
"step": 880, |
|
"wo_beta": 17.634618759155273 |
|
}, |
|
{ |
|
"dpo_loss": 0.5378908514976501, |
|
"epoch": 2.5082664147378364, |
|
"grad_norm": 12653.821371026783, |
|
"learning_rate": 5.819369060775124e-07, |
|
"logits": -1.3703595399856567, |
|
"logps": -81.24169921875, |
|
"loss": 54.0377, |
|
"objective": 55.50392150878906, |
|
"ranking_idealized": 0.5916666388511658, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.6000000238418579, |
|
"regularize": 0.1243302971124649, |
|
"step": 885, |
|
"wo_beta": 16.991498947143555 |
|
}, |
|
{ |
|
"dpo_loss": 0.521662712097168, |
|
"epoch": 2.5224374114312704, |
|
"grad_norm": 13224.96582542829, |
|
"learning_rate": 5.770416657271728e-07, |
|
"logits": -1.2803348302841187, |
|
"logps": -80.2920913696289, |
|
"loss": 54.9019, |
|
"objective": 55.66249084472656, |
|
"ranking_idealized": 0.5874999761581421, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.12915806472301483, |
|
"step": 890, |
|
"wo_beta": 14.390849113464355 |
|
}, |
|
{ |
|
"dpo_loss": 0.538814902305603, |
|
"epoch": 2.536608408124705, |
|
"grad_norm": 13679.562551953088, |
|
"learning_rate": 5.721388491755455e-07, |
|
"logits": -1.2745685577392578, |
|
"logps": -82.53682708740234, |
|
"loss": 55.8587, |
|
"objective": 53.00823211669922, |
|
"ranking_idealized": 0.5458333492279053, |
|
"ranking_idealized_expo": 0.49166667461395264, |
|
"ranking_simple": 0.5458333492279053, |
|
"regularize": 0.12104254215955734, |
|
"step": 895, |
|
"wo_beta": 16.952863693237305 |
|
}, |
|
{ |
|
"dpo_loss": 0.5534180998802185, |
|
"epoch": 2.550779404818139, |
|
"grad_norm": 12973.031921366075, |
|
"learning_rate": 5.67228938560766e-07, |
|
"logits": -1.2396929264068604, |
|
"logps": -81.03583526611328, |
|
"loss": 54.3682, |
|
"objective": 53.294551849365234, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.6000000238418579, |
|
"regularize": 0.12195997685194016, |
|
"step": 900, |
|
"wo_beta": 15.316643714904785 |
|
}, |
|
{ |
|
"epoch": 2.550779404818139, |
|
"eval_dpo_loss": 0.6793311238288879, |
|
"eval_logits": -1.3258877992630005, |
|
"eval_logps": -88.36107635498047, |
|
"eval_loss": 188.23812866210938, |
|
"eval_objective": 184.16783142089844, |
|
"eval_ranking_idealized": 0.6024844646453857, |
|
"eval_ranking_idealized_expo": 0.5232919454574585, |
|
"eval_ranking_simple": 0.5310559272766113, |
|
"eval_regularize": 0.41672980785369873, |
|
"eval_runtime": 486.377, |
|
"eval_samples_per_second": 11.904, |
|
"eval_steps_per_second": 0.993, |
|
"eval_wo_beta": 15.968037605285645, |
|
"step": 900 |
|
}, |
|
{ |
|
"dpo_loss": 0.5381408929824829, |
|
"epoch": 2.564950401511573, |
|
"grad_norm": 11810.259224351357, |
|
"learning_rate": 5.623124167185929e-07, |
|
"logits": -1.3189753293991089, |
|
"logps": -81.03609466552734, |
|
"loss": 51.9527, |
|
"objective": 49.01388931274414, |
|
"ranking_idealized": 0.5708333253860474, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.5541666746139526, |
|
"regularize": 0.11513598263263702, |
|
"step": 905, |
|
"wo_beta": 15.316691398620605 |
|
}, |
|
{ |
|
"dpo_loss": 0.5291991829872131, |
|
"epoch": 2.579121398205007, |
|
"grad_norm": 12343.801160156707, |
|
"learning_rate": 5.573897671349268e-07, |
|
"logits": -1.2955931425094604, |
|
"logps": -83.91735076904297, |
|
"loss": 55.8812, |
|
"objective": 63.70806121826172, |
|
"ranking_idealized": 0.612500011920929, |
|
"ranking_idealized_expo": 0.4958333373069763, |
|
"ranking_simple": 0.5958333611488342, |
|
"regularize": 0.13904932141304016, |
|
"step": 910, |
|
"wo_beta": 16.40995216369629 |
|
}, |
|
{ |
|
"dpo_loss": 0.5379226803779602, |
|
"epoch": 2.593292394898441, |
|
"grad_norm": 12490.177742860027, |
|
"learning_rate": 5.524614738982637e-07, |
|
"logits": -1.4045764207839966, |
|
"logps": -82.5849838256836, |
|
"loss": 55.5769, |
|
"objective": 54.98591613769531, |
|
"ranking_idealized": 0.6291666626930237, |
|
"ranking_idealized_expo": 0.512499988079071, |
|
"ranking_simple": 0.612500011920929, |
|
"regularize": 0.12587417662143707, |
|
"step": 915, |
|
"wo_beta": 15.309656143188477 |
|
}, |
|
{ |
|
"dpo_loss": 0.5216780304908752, |
|
"epoch": 2.6074633915918755, |
|
"grad_norm": 12017.347028460124, |
|
"learning_rate": 5.475280216520912e-07, |
|
"logits": -1.2480995655059814, |
|
"logps": -82.11782836914062, |
|
"loss": 56.8294, |
|
"objective": 57.75908660888672, |
|
"ranking_idealized": 0.6041666865348816, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.6041666865348816, |
|
"regularize": 0.12090341001749039, |
|
"step": 920, |
|
"wo_beta": 16.191049575805664 |
|
}, |
|
{ |
|
"dpo_loss": 0.5298858284950256, |
|
"epoch": 2.6216343882853095, |
|
"grad_norm": 14009.68291839978, |
|
"learning_rate": 5.42589895547229e-07, |
|
"logits": -1.280160665512085, |
|
"logps": -82.20765686035156, |
|
"loss": 53.1774, |
|
"objective": 55.67765426635742, |
|
"ranking_idealized": 0.574999988079071, |
|
"ranking_idealized_expo": 0.4749999940395355, |
|
"ranking_simple": 0.5625, |
|
"regularize": 0.12424833327531815, |
|
"step": 925, |
|
"wo_beta": 16.476573944091797 |
|
}, |
|
{ |
|
"dpo_loss": 0.5387442111968994, |
|
"epoch": 2.6358053849787435, |
|
"grad_norm": 12640.001047074344, |
|
"learning_rate": 5.376475811941191e-07, |
|
"logits": -1.2655282020568848, |
|
"logps": -82.08385467529297, |
|
"loss": 52.6196, |
|
"objective": 55.54609680175781, |
|
"ranking_idealized": 0.5416666865348816, |
|
"ranking_idealized_expo": 0.44583332538604736, |
|
"ranking_simple": 0.5333333611488342, |
|
"regularize": 0.12455514818429947, |
|
"step": 930, |
|
"wo_beta": 16.72053337097168 |
|
}, |
|
{ |
|
"dpo_loss": 0.5488451719284058, |
|
"epoch": 2.6499763816721775, |
|
"grad_norm": 12698.751364257567, |
|
"learning_rate": 5.327015646150716e-07, |
|
"logits": -1.2632043361663818, |
|
"logps": -81.3023910522461, |
|
"loss": 50.4175, |
|
"objective": 51.81110763549805, |
|
"ranking_idealized": 0.6499999761581421, |
|
"ranking_idealized_expo": 0.574999988079071, |
|
"ranking_simple": 0.6291666626930237, |
|
"regularize": 0.1139976978302002, |
|
"step": 935, |
|
"wo_beta": 16.381933212280273 |
|
}, |
|
{ |
|
"dpo_loss": 0.518785834312439, |
|
"epoch": 2.6641473783656116, |
|
"grad_norm": 14057.06029309221, |
|
"learning_rate": 5.277523321964701e-07, |
|
"logits": -1.3097693920135498, |
|
"logps": -84.61360931396484, |
|
"loss": 52.2129, |
|
"objective": 56.00838088989258, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.6041666865348816, |
|
"regularize": 0.11512833088636398, |
|
"step": 940, |
|
"wo_beta": 17.616283416748047 |
|
}, |
|
{ |
|
"dpo_loss": 0.5271897912025452, |
|
"epoch": 2.678318375059046, |
|
"grad_norm": 13084.001689574132, |
|
"learning_rate": 5.228003706409409e-07, |
|
"logits": -1.3481143712997437, |
|
"logps": -83.27128601074219, |
|
"loss": 49.6737, |
|
"objective": 52.79602813720703, |
|
"ranking_idealized": 0.5666666626930237, |
|
"ranking_idealized_expo": 0.48750001192092896, |
|
"ranking_simple": 0.5541666746139526, |
|
"regularize": 0.11426286399364471, |
|
"step": 945, |
|
"wo_beta": 16.029043197631836 |
|
}, |
|
{ |
|
"dpo_loss": 0.5474939942359924, |
|
"epoch": 2.69248937175248, |
|
"grad_norm": 13821.932425093552, |
|
"learning_rate": 5.178461669194903e-07, |
|
"logits": -1.2337779998779297, |
|
"logps": -83.05430603027344, |
|
"loss": 50.3775, |
|
"objective": 45.27042007446289, |
|
"ranking_idealized": 0.637499988079071, |
|
"ranking_idealized_expo": 0.5375000238418579, |
|
"ranking_simple": 0.625, |
|
"regularize": 0.10929083079099655, |
|
"step": 950, |
|
"wo_beta": 15.533432006835938 |
|
}, |
|
{ |
|
"epoch": 2.69248937175248, |
|
"eval_dpo_loss": 0.6802442669868469, |
|
"eval_logits": -1.3090835809707642, |
|
"eval_logps": -88.80048370361328, |
|
"eval_loss": 189.54185485839844, |
|
"eval_objective": 185.00436401367188, |
|
"eval_ranking_idealized": 0.6024844646453857, |
|
"eval_ranking_idealized_expo": 0.5232919454574585, |
|
"eval_ranking_simple": 0.5331262946128845, |
|
"eval_regularize": 0.418261855840683, |
|
"eval_runtime": 491.779, |
|
"eval_samples_per_second": 11.774, |
|
"eval_steps_per_second": 0.982, |
|
"eval_wo_beta": 15.998626708984375, |
|
"step": 950 |
|
}, |
|
{ |
|
"dpo_loss": 0.5236973166465759, |
|
"epoch": 2.706660368445914, |
|
"grad_norm": 13266.227245283348, |
|
"learning_rate": 5.128902082236175e-07, |
|
"logits": -1.319283127784729, |
|
"logps": -82.27372741699219, |
|
"loss": 46.7135, |
|
"objective": 43.35396194458008, |
|
"ranking_idealized": 0.5874999761581421, |
|
"ranking_idealized_expo": 0.4749999940395355, |
|
"ranking_simple": 0.574999988079071, |
|
"regularize": 0.10942530632019043, |
|
"step": 955, |
|
"wo_beta": 14.039530754089355 |
|
}, |
|
{ |
|
"dpo_loss": 0.5411895513534546, |
|
"epoch": 2.720831365139348, |
|
"grad_norm": 13668.800292035428, |
|
"learning_rate": 5.07932981917404e-07, |
|
"logits": -1.2875874042510986, |
|
"logps": -81.88396453857422, |
|
"loss": 53.1799, |
|
"objective": 54.5617561340332, |
|
"ranking_idealized": 0.5791666507720947, |
|
"ranking_idealized_expo": 0.49166667461395264, |
|
"ranking_simple": 0.5791666507720947, |
|
"regularize": 0.11944962292909622, |
|
"step": 960, |
|
"wo_beta": 16.39274787902832 |
|
}, |
|
{ |
|
"dpo_loss": 0.5236133933067322, |
|
"epoch": 2.735002361832782, |
|
"grad_norm": 12761.114664799663, |
|
"learning_rate": 5.029749754895868e-07, |
|
"logits": -1.306726098060608, |
|
"logps": -82.27013397216797, |
|
"loss": 49.2644, |
|
"objective": 47.3409309387207, |
|
"ranking_idealized": 0.6416666507720947, |
|
"ranking_idealized_expo": 0.5666666626930237, |
|
"ranking_simple": 0.6291666626930237, |
|
"regularize": 0.10921643674373627, |
|
"step": 965, |
|
"wo_beta": 15.65440559387207 |
|
}, |
|
{ |
|
"dpo_loss": 0.5498754382133484, |
|
"epoch": 2.7491733585262166, |
|
"grad_norm": 12565.339155193906, |
|
"learning_rate": 4.980166765056193e-07, |
|
"logits": -1.3193691968917847, |
|
"logps": -83.46347045898438, |
|
"loss": 52.7234, |
|
"objective": 56.7745246887207, |
|
"ranking_idealized": 0.6208333373069763, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.6083333492279053, |
|
"regularize": 0.13472451269626617, |
|
"step": 970, |
|
"wo_beta": 15.647109031677246 |
|
}, |
|
{ |
|
"dpo_loss": 0.5260103344917297, |
|
"epoch": 2.7633443552196506, |
|
"grad_norm": 13363.677196616523, |
|
"learning_rate": 4.930585725597247e-07, |
|
"logits": -1.240022897720337, |
|
"logps": -81.51500701904297, |
|
"loss": 50.997, |
|
"objective": 53.95423889160156, |
|
"ranking_idealized": 0.6333333253860474, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.625, |
|
"regularize": 0.12574762105941772, |
|
"step": 975, |
|
"wo_beta": 16.371328353881836 |
|
}, |
|
{ |
|
"dpo_loss": 0.5399420261383057, |
|
"epoch": 2.7775153519130846, |
|
"grad_norm": 13969.44472204385, |
|
"learning_rate": 4.881011512269463e-07, |
|
"logits": -1.35780930519104, |
|
"logps": -81.2794189453125, |
|
"loss": 51.6737, |
|
"objective": 55.6290283203125, |
|
"ranking_idealized": 0.5916666388511658, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.12999171018600464, |
|
"step": 980, |
|
"wo_beta": 14.558424949645996 |
|
}, |
|
{ |
|
"dpo_loss": 0.5281099677085876, |
|
"epoch": 2.7916863486065187, |
|
"grad_norm": 11586.92970672364, |
|
"learning_rate": 4.831449000151997e-07, |
|
"logits": -1.205262303352356, |
|
"logps": -79.56948852539062, |
|
"loss": 49.5107, |
|
"objective": 46.61149597167969, |
|
"ranking_idealized": 0.5791666507720947, |
|
"ranking_idealized_expo": 0.48750001192092896, |
|
"ranking_simple": 0.5791666507720947, |
|
"regularize": 0.10813379287719727, |
|
"step": 985, |
|
"wo_beta": 14.642525672912598 |
|
}, |
|
{ |
|
"dpo_loss": 0.5295001268386841, |
|
"epoch": 2.8058573452999527, |
|
"grad_norm": 12278.903797254565, |
|
"learning_rate": 4.78190306317332e-07, |
|
"logits": -1.268909215927124, |
|
"logps": -82.44329071044922, |
|
"loss": 47.3581, |
|
"objective": 51.39979553222656, |
|
"ranking_idealized": 0.5666666626930237, |
|
"ranking_idealized_expo": 0.4749999940395355, |
|
"ranking_simple": 0.5708333253860474, |
|
"regularize": 0.11149868369102478, |
|
"step": 990, |
|
"wo_beta": 16.07427406311035 |
|
}, |
|
{ |
|
"dpo_loss": 0.5399483442306519, |
|
"epoch": 2.820028341993387, |
|
"grad_norm": 12982.312529844054, |
|
"learning_rate": 4.732378573631924e-07, |
|
"logits": -1.3312995433807373, |
|
"logps": -80.66969299316406, |
|
"loss": 49.758, |
|
"objective": 55.4227409362793, |
|
"ranking_idealized": 0.6208333373069763, |
|
"ranking_idealized_expo": 0.5458333492279053, |
|
"ranking_simple": 0.6166666746139526, |
|
"regularize": 0.12711945176124573, |
|
"step": 995, |
|
"wo_beta": 16.746198654174805 |
|
}, |
|
{ |
|
"dpo_loss": 0.526489794254303, |
|
"epoch": 2.8341993386868207, |
|
"grad_norm": 12173.86125870911, |
|
"learning_rate": 4.682880401717177e-07, |
|
"logits": -1.271032691001892, |
|
"logps": -79.56470489501953, |
|
"loss": 45.9449, |
|
"objective": 40.13682174682617, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.6166666746139526, |
|
"regularize": 0.09338556975126266, |
|
"step": 1000, |
|
"wo_beta": 15.067657470703125 |
|
}, |
|
{ |
|
"epoch": 2.8341993386868207, |
|
"eval_dpo_loss": 0.6791692972183228, |
|
"eval_logits": -1.2989623546600342, |
|
"eval_logps": -87.81481170654297, |
|
"eval_loss": 187.70785522460938, |
|
"eval_objective": 183.56761169433594, |
|
"eval_ranking_idealized": 0.6024844646453857, |
|
"eval_ranking_idealized_expo": 0.5232919454574585, |
|
"eval_ranking_simple": 0.5300207138061523, |
|
"eval_regularize": 0.4161270260810852, |
|
"eval_runtime": 491.2083, |
|
"eval_samples_per_second": 11.787, |
|
"eval_steps_per_second": 0.983, |
|
"eval_wo_beta": 15.995977401733398, |
|
"step": 1000 |
|
}, |
|
{ |
|
"dpo_loss": 0.5403110384941101, |
|
"epoch": 2.848370335380255, |
|
"grad_norm": 13425.378037887134, |
|
"learning_rate": 4.633413415030401e-07, |
|
"logits": -1.2654575109481812, |
|
"logps": -80.49606323242188, |
|
"loss": 48.7319, |
|
"objective": 47.16264724731445, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.512499988079071, |
|
"ranking_simple": 0.6000000238418579, |
|
"regularize": 0.10651734471321106, |
|
"step": 1005, |
|
"wo_beta": 16.28557586669922 |
|
}, |
|
{ |
|
"dpo_loss": 0.5306838750839233, |
|
"epoch": 2.862541332073689, |
|
"grad_norm": 13143.964606052063, |
|
"learning_rate": 4.5839824781061886e-07, |
|
"logits": -1.32563316822052, |
|
"logps": -81.29505920410156, |
|
"loss": 51.8292, |
|
"objective": 49.8996467590332, |
|
"ranking_idealized": 0.612500011920929, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.6083333492279053, |
|
"regularize": 0.11315880715847015, |
|
"step": 1010, |
|
"wo_beta": 15.957425117492676 |
|
}, |
|
{ |
|
"dpo_loss": 0.5331242084503174, |
|
"epoch": 2.8767123287671232, |
|
"grad_norm": 12600.166168740529, |
|
"learning_rate": 4.53459245193404e-07, |
|
"logits": -1.2467234134674072, |
|
"logps": -80.21656799316406, |
|
"loss": 44.7609, |
|
"objective": 42.55329895019531, |
|
"ranking_idealized": 0.6541666388511658, |
|
"ranking_idealized_expo": 0.550000011920929, |
|
"ranking_simple": 0.6333333253860474, |
|
"regularize": 0.09937479346990585, |
|
"step": 1015, |
|
"wo_beta": 15.586889266967773 |
|
}, |
|
{ |
|
"dpo_loss": 0.5346752405166626, |
|
"epoch": 2.8908833254605573, |
|
"grad_norm": 14111.243992297606, |
|
"learning_rate": 4.4852481934803277e-07, |
|
"logits": -1.2140835523605347, |
|
"logps": -82.13688659667969, |
|
"loss": 46.0337, |
|
"objective": 43.36848831176758, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5708333253860474, |
|
"regularize": 0.09756989777088165, |
|
"step": 1020, |
|
"wo_beta": 18.553333282470703 |
|
}, |
|
{ |
|
"dpo_loss": 0.5420379042625427, |
|
"epoch": 2.9050543221539913, |
|
"grad_norm": 12276.868793163067, |
|
"learning_rate": 4.435954555210676e-07, |
|
"logits": -1.3084660768508911, |
|
"logps": -81.93505096435547, |
|
"loss": 46.0381, |
|
"objective": 48.77103042602539, |
|
"ranking_idealized": 0.6208333373069763, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.612500011920929, |
|
"regularize": 0.10658075660467148, |
|
"step": 1025, |
|
"wo_beta": 15.018412590026855 |
|
}, |
|
{ |
|
"dpo_loss": 0.5342170000076294, |
|
"epoch": 2.9192253188474258, |
|
"grad_norm": 12677.814826562366, |
|
"learning_rate": 4.3867163846127674e-07, |
|
"logits": -1.3350425958633423, |
|
"logps": -81.84678649902344, |
|
"loss": 47.2693, |
|
"objective": 41.97852325439453, |
|
"ranking_idealized": 0.5416666865348816, |
|
"ranking_idealized_expo": 0.4791666567325592, |
|
"ranking_simple": 0.5458333492279053, |
|
"regularize": 0.09485388547182083, |
|
"step": 1030, |
|
"wo_beta": 16.526702880859375 |
|
}, |
|
{ |
|
"dpo_loss": 0.5289677977561951, |
|
"epoch": 2.9333963155408598, |
|
"grad_norm": 13399.69328236257, |
|
"learning_rate": 4.3375385237196507e-07, |
|
"logits": -1.3010871410369873, |
|
"logps": -82.80349731445312, |
|
"loss": 43.5011, |
|
"objective": 41.88113784790039, |
|
"ranking_idealized": 0.625, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.6083333492279053, |
|
"regularize": 0.09509758651256561, |
|
"step": 1035, |
|
"wo_beta": 14.980511665344238 |
|
}, |
|
{ |
|
"dpo_loss": 0.5463218688964844, |
|
"epoch": 2.947567312234294, |
|
"grad_norm": 12072.270375502065, |
|
"learning_rate": 4.2884258086335745e-07, |
|
"logits": -1.2975058555603027, |
|
"logps": -82.66610717773438, |
|
"loss": 45.0537, |
|
"objective": 48.81401062011719, |
|
"ranking_idealized": 0.5708333253860474, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.5583333373069763, |
|
"regularize": 0.10974690318107605, |
|
"step": 1040, |
|
"wo_beta": 16.447132110595703 |
|
}, |
|
{ |
|
"dpo_loss": 0.5381548404693604, |
|
"epoch": 2.961738308927728, |
|
"grad_norm": 13887.433179664138, |
|
"learning_rate": 4.2393830690504165e-07, |
|
"logits": -1.2503575086593628, |
|
"logps": -84.04967498779297, |
|
"loss": 44.8665, |
|
"objective": 42.995948791503906, |
|
"ranking_idealized": 0.6208333373069763, |
|
"ranking_idealized_expo": 0.5666666626930237, |
|
"ranking_simple": 0.6208333373069763, |
|
"regularize": 0.09885497391223907, |
|
"step": 1045, |
|
"wo_beta": 18.053199768066406 |
|
}, |
|
{ |
|
"dpo_loss": 0.5348830819129944, |
|
"epoch": 2.975909305621162, |
|
"grad_norm": 13502.021630049758, |
|
"learning_rate": 4.1904151277847305e-07, |
|
"logits": -1.2968212366104126, |
|
"logps": -79.87500762939453, |
|
"loss": 49.0003, |
|
"objective": 50.04111862182617, |
|
"ranking_idealized": 0.6541666388511658, |
|
"ranking_idealized_expo": 0.5541666746139526, |
|
"ranking_simple": 0.6458333134651184, |
|
"regularize": 0.11395598948001862, |
|
"step": 1050, |
|
"wo_beta": 15.20615291595459 |
|
}, |
|
{ |
|
"epoch": 2.975909305621162, |
|
"eval_dpo_loss": 0.6791855692863464, |
|
"eval_logits": -1.2778165340423584, |
|
"eval_logps": -88.30037689208984, |
|
"eval_loss": 188.00396728515625, |
|
"eval_objective": 184.00155639648438, |
|
"eval_ranking_idealized": 0.6024844646453857, |
|
"eval_ranking_idealized_expo": 0.5232919454574585, |
|
"eval_ranking_simple": 0.5341615080833435, |
|
"eval_regularize": 0.41730284690856934, |
|
"eval_runtime": 486.4753, |
|
"eval_samples_per_second": 11.902, |
|
"eval_steps_per_second": 0.993, |
|
"eval_wo_beta": 16.040319442749023, |
|
"step": 1050 |
|
}, |
|
{ |
|
"dpo_loss": 0.5416039824485779, |
|
"epoch": 2.9900803023145963, |
|
"grad_norm": 13186.167879544177, |
|
"learning_rate": 4.141526800295481e-07, |
|
"logits": -1.2704575061798096, |
|
"logps": -81.0667724609375, |
|
"loss": 43.7316, |
|
"objective": 46.92390441894531, |
|
"ranking_idealized": 0.6083333492279053, |
|
"ranking_idealized_expo": 0.550000011920929, |
|
"ranking_simple": 0.6041666865348816, |
|
"regularize": 0.11228723078966141, |
|
"step": 1055, |
|
"wo_beta": 15.320064544677734 |
|
}, |
|
{ |
|
"dpo_loss": 0.5175911784172058, |
|
"epoch": 3.0042512990080303, |
|
"grad_norm": 11979.18084085825, |
|
"learning_rate": 4.092722894212487e-07, |
|
"logits": -1.291445255279541, |
|
"logps": -82.69534301757812, |
|
"loss": 44.4026, |
|
"objective": 47.78953552246094, |
|
"ranking_idealized": 0.6083333492279053, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.612500011920929, |
|
"regularize": 0.10625550150871277, |
|
"step": 1060, |
|
"wo_beta": 15.794866561889648 |
|
}, |
|
{ |
|
"dpo_loss": 0.523690938949585, |
|
"epoch": 3.0184222957014644, |
|
"grad_norm": 12600.45509733284, |
|
"learning_rate": 4.0440082088636546e-07, |
|
"logits": -1.3265612125396729, |
|
"logps": -84.14775848388672, |
|
"loss": 41.3718, |
|
"objective": 38.99584197998047, |
|
"ranking_idealized": 0.5916666388511658, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.5958333611488342, |
|
"regularize": 0.09401161223649979, |
|
"step": 1065, |
|
"wo_beta": 16.806358337402344 |
|
}, |
|
{ |
|
"dpo_loss": 0.5429927706718445, |
|
"epoch": 3.0325932923948984, |
|
"grad_norm": 13459.06076930384, |
|
"learning_rate": 3.995387534803005e-07, |
|
"logits": -1.2817329168319702, |
|
"logps": -81.6548080444336, |
|
"loss": 44.6891, |
|
"objective": 43.239158630371094, |
|
"ranking_idealized": 0.637499988079071, |
|
"ranking_idealized_expo": 0.5791666507720947, |
|
"ranking_simple": 0.625, |
|
"regularize": 0.09600695967674255, |
|
"step": 1070, |
|
"wo_beta": 17.19818878173828 |
|
}, |
|
{ |
|
"dpo_loss": 0.5399213433265686, |
|
"epoch": 3.0467642890883324, |
|
"grad_norm": 12451.835928919867, |
|
"learning_rate": 3.9468656533395934e-07, |
|
"logits": -1.2840524911880493, |
|
"logps": -81.64595031738281, |
|
"loss": 38.4816, |
|
"objective": 40.692039489746094, |
|
"ranking_idealized": 0.5874999761581421, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.09315841645002365, |
|
"step": 1075, |
|
"wo_beta": 15.41653060913086 |
|
}, |
|
{ |
|
"dpo_loss": 0.5243366360664368, |
|
"epoch": 3.0609352857817664, |
|
"grad_norm": 12956.687806008335, |
|
"learning_rate": 3.8984473360672967e-07, |
|
"logits": -1.3753383159637451, |
|
"logps": -82.9805908203125, |
|
"loss": 40.18, |
|
"objective": 39.79288864135742, |
|
"ranking_idealized": 0.6416666507720947, |
|
"ranking_idealized_expo": 0.550000011920929, |
|
"ranking_simple": 0.6291666626930237, |
|
"regularize": 0.09019829332828522, |
|
"step": 1080, |
|
"wo_beta": 17.60961151123047 |
|
}, |
|
{ |
|
"dpo_loss": 0.5346547365188599, |
|
"epoch": 3.075106282475201, |
|
"grad_norm": 12876.9232360082, |
|
"learning_rate": 3.850137344395598e-07, |
|
"logits": -1.318056344985962, |
|
"logps": -83.30501556396484, |
|
"loss": 39.6664, |
|
"objective": 41.40624237060547, |
|
"ranking_idealized": 0.550000011920929, |
|
"ranking_idealized_expo": 0.49166667461395264, |
|
"ranking_simple": 0.5458333492279053, |
|
"regularize": 0.0875302404165268, |
|
"step": 1085, |
|
"wo_beta": 15.289043426513672 |
|
}, |
|
{ |
|
"dpo_loss": 0.5314586162567139, |
|
"epoch": 3.089277279168635, |
|
"grad_norm": 12423.675708081033, |
|
"learning_rate": 3.801940429081345e-07, |
|
"logits": -1.297440767288208, |
|
"logps": -81.59999084472656, |
|
"loss": 40.7964, |
|
"objective": 42.56759262084961, |
|
"ranking_idealized": 0.6083333492279053, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.6291666626930237, |
|
"regularize": 0.09419377893209457, |
|
"step": 1090, |
|
"wo_beta": 15.919710159301758 |
|
}, |
|
{ |
|
"dpo_loss": 0.5284194946289062, |
|
"epoch": 3.103448275862069, |
|
"grad_norm": 12843.979452626416, |
|
"learning_rate": 3.7538613297615706e-07, |
|
"logits": -1.2590415477752686, |
|
"logps": -83.42412567138672, |
|
"loss": 40.9535, |
|
"objective": 44.701377868652344, |
|
"ranking_idealized": 0.625, |
|
"ranking_idealized_expo": 0.5708333253860474, |
|
"ranking_simple": 0.612500011920929, |
|
"regularize": 0.10055555403232574, |
|
"step": 1095, |
|
"wo_beta": 16.360620498657227 |
|
}, |
|
{ |
|
"dpo_loss": 0.5163142681121826, |
|
"epoch": 3.117619272555503, |
|
"grad_norm": 11098.073660723994, |
|
"learning_rate": 3.7059047744873955e-07, |
|
"logits": -1.2521919012069702, |
|
"logps": -82.35820770263672, |
|
"loss": 40.2428, |
|
"objective": 41.402366638183594, |
|
"ranking_idealized": 0.5874999761581421, |
|
"ranking_idealized_expo": 0.4958333373069763, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.08817121386528015, |
|
"step": 1100, |
|
"wo_beta": 16.056493759155273 |
|
}, |
|
{ |
|
"epoch": 3.117619272555503, |
|
"eval_dpo_loss": 0.680143415927887, |
|
"eval_logits": -1.2988417148590088, |
|
"eval_logps": -88.64698028564453, |
|
"eval_loss": 188.7165985107422, |
|
"eval_objective": 184.38153076171875, |
|
"eval_ranking_idealized": 0.6024844646453857, |
|
"eval_ranking_idealized_expo": 0.5232919454574585, |
|
"eval_ranking_simple": 0.532608687877655, |
|
"eval_regularize": 0.41805195808410645, |
|
"eval_runtime": 486.8996, |
|
"eval_samples_per_second": 11.892, |
|
"eval_steps_per_second": 0.992, |
|
"eval_wo_beta": 15.998079299926758, |
|
"step": 1100 |
|
}, |
|
{ |
|
"dpo_loss": 0.532370924949646, |
|
"epoch": 3.131790269248937, |
|
"grad_norm": 12884.072735206462, |
|
"learning_rate": 3.658075479259087e-07, |
|
"logits": -1.3051170110702515, |
|
"logps": -82.9980239868164, |
|
"loss": 43.5912, |
|
"objective": 42.78650665283203, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.512499988079071, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.09705787152051926, |
|
"step": 1105, |
|
"wo_beta": 17.55166244506836 |
|
}, |
|
{ |
|
"dpo_loss": 0.5135348439216614, |
|
"epoch": 3.1459612659423715, |
|
"grad_norm": 13710.402810117148, |
|
"learning_rate": 3.6103781475622786e-07, |
|
"logits": -1.2103074789047241, |
|
"logps": -83.2777328491211, |
|
"loss": 35.6812, |
|
"objective": 35.80618667602539, |
|
"ranking_idealized": 0.612500011920929, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.6083333492279053, |
|
"regularize": 0.07978586852550507, |
|
"step": 1110, |
|
"wo_beta": 16.995450973510742 |
|
}, |
|
{ |
|
"dpo_loss": 0.5229103565216064, |
|
"epoch": 3.1601322626358055, |
|
"grad_norm": 12411.913045675534, |
|
"learning_rate": 3.562817469905442e-07, |
|
"logits": -1.2619822025299072, |
|
"logps": -82.1358642578125, |
|
"loss": 38.5951, |
|
"objective": 36.70951461791992, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.6083333492279053, |
|
"regularize": 0.08537817001342773, |
|
"step": 1115, |
|
"wo_beta": 16.52168846130371 |
|
}, |
|
{ |
|
"dpo_loss": 0.5169024467468262, |
|
"epoch": 3.1743032593292395, |
|
"grad_norm": 12747.527049209308, |
|
"learning_rate": 3.5153981233586274e-07, |
|
"logits": -1.2052761316299438, |
|
"logps": -80.89930725097656, |
|
"loss": 35.9412, |
|
"objective": 35.01757049560547, |
|
"ranking_idealized": 0.612500011920929, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.6083333492279053, |
|
"regularize": 0.08051317185163498, |
|
"step": 1120, |
|
"wo_beta": 15.474043846130371 |
|
}, |
|
{ |
|
"dpo_loss": 0.5393829941749573, |
|
"epoch": 3.1884742560226735, |
|
"grad_norm": 13071.804290926188, |
|
"learning_rate": 3.468124771093519e-07, |
|
"logits": -1.263301134109497, |
|
"logps": -83.0383529663086, |
|
"loss": 37.8478, |
|
"objective": 38.899776458740234, |
|
"ranking_idealized": 0.612500011920929, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.6000000238418579, |
|
"regularize": 0.09170109778642654, |
|
"step": 1125, |
|
"wo_beta": 15.088132858276367 |
|
}, |
|
{ |
|
"dpo_loss": 0.5208443999290466, |
|
"epoch": 3.2026452527161076, |
|
"grad_norm": 13439.120791203995, |
|
"learning_rate": 3.421002061924876e-07, |
|
"logits": -1.298660159111023, |
|
"logps": -82.7750473022461, |
|
"loss": 34.6631, |
|
"objective": 33.578922271728516, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.5958333611488342, |
|
"regularize": 0.07489284873008728, |
|
"step": 1130, |
|
"wo_beta": 15.427777290344238 |
|
}, |
|
{ |
|
"dpo_loss": 0.5245645642280579, |
|
"epoch": 3.2168162494095416, |
|
"grad_norm": 11480.67381753106, |
|
"learning_rate": 3.374034629853356e-07, |
|
"logits": -1.3043017387390137, |
|
"logps": -80.89866638183594, |
|
"loss": 35.7927, |
|
"objective": 35.20330047607422, |
|
"ranking_idealized": 0.5291666388511658, |
|
"ranking_idealized_expo": 0.4625000059604645, |
|
"ranking_simple": 0.5249999761581421, |
|
"regularize": 0.08528413623571396, |
|
"step": 1135, |
|
"wo_beta": 16.220800399780273 |
|
}, |
|
{ |
|
"dpo_loss": 0.5402042269706726, |
|
"epoch": 3.230987246102976, |
|
"grad_norm": 12946.274800579084, |
|
"learning_rate": 3.327227093609824e-07, |
|
"logits": -1.1506885290145874, |
|
"logps": -81.15502166748047, |
|
"loss": 40.5475, |
|
"objective": 40.8009033203125, |
|
"ranking_idealized": 0.5958333611488342, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.09735415130853653, |
|
"step": 1140, |
|
"wo_beta": 16.101863861083984 |
|
}, |
|
{ |
|
"dpo_loss": 0.5243603587150574, |
|
"epoch": 3.24515824279641, |
|
"grad_norm": 13000.005011572795, |
|
"learning_rate": 3.2805840562011465e-07, |
|
"logits": -1.2146347761154175, |
|
"logps": -83.07351684570312, |
|
"loss": 40.1207, |
|
"objective": 42.64434814453125, |
|
"ranking_idealized": 0.6541666388511658, |
|
"ranking_idealized_expo": 0.550000011920929, |
|
"ranking_simple": 0.6333333253860474, |
|
"regularize": 0.09260058403015137, |
|
"step": 1145, |
|
"wo_beta": 15.928021430969238 |
|
}, |
|
{ |
|
"dpo_loss": 0.5314944386482239, |
|
"epoch": 3.259329239489844, |
|
"grad_norm": 12558.545529727347, |
|
"learning_rate": 3.234110104457536e-07, |
|
"logits": -1.352626085281372, |
|
"logps": -80.92655181884766, |
|
"loss": 37.177, |
|
"objective": 37.67503356933594, |
|
"ranking_idealized": 0.5874999761581421, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.5874999761581421, |
|
"regularize": 0.08233184367418289, |
|
"step": 1150, |
|
"wo_beta": 14.541799545288086 |
|
}, |
|
{ |
|
"epoch": 3.259329239489844, |
|
"eval_dpo_loss": 0.6804221868515015, |
|
"eval_logits": -1.2842507362365723, |
|
"eval_logps": -87.92387390136719, |
|
"eval_loss": 188.25633239746094, |
|
"eval_objective": 184.33511352539062, |
|
"eval_ranking_idealized": 0.6024844646453857, |
|
"eval_ranking_idealized_expo": 0.5232919454574585, |
|
"eval_ranking_simple": 0.5357142686843872, |
|
"eval_regularize": 0.4183206856250763, |
|
"eval_runtime": 488.1945, |
|
"eval_samples_per_second": 11.86, |
|
"eval_steps_per_second": 0.989, |
|
"eval_wo_beta": 16.0123348236084, |
|
"step": 1150 |
|
}, |
|
{ |
|
"dpo_loss": 0.5527331829071045, |
|
"epoch": 3.273500236183278, |
|
"grad_norm": 12600.213804572502, |
|
"learning_rate": 3.187809808581492e-07, |
|
"logits": -1.225222110748291, |
|
"logps": -80.92967987060547, |
|
"loss": 37.9886, |
|
"objective": 43.58564376831055, |
|
"ranking_idealized": 0.5958333611488342, |
|
"ranking_idealized_expo": 0.5375000238418579, |
|
"ranking_simple": 0.5874999761581421, |
|
"regularize": 0.09985193610191345, |
|
"step": 1155, |
|
"wo_beta": 16.76634407043457 |
|
}, |
|
{ |
|
"dpo_loss": 0.5320346355438232, |
|
"epoch": 3.287671232876712, |
|
"grad_norm": 13215.429208773, |
|
"learning_rate": 3.141687721698363e-07, |
|
"logits": -1.287786602973938, |
|
"logps": -83.13336944580078, |
|
"loss": 34.714, |
|
"objective": 32.02961349487305, |
|
"ranking_idealized": 0.5416666865348816, |
|
"ranking_idealized_expo": 0.4749999940395355, |
|
"ranking_simple": 0.5416666865348816, |
|
"regularize": 0.07429231703281403, |
|
"step": 1160, |
|
"wo_beta": 17.868885040283203 |
|
}, |
|
{ |
|
"dpo_loss": 0.5377687215805054, |
|
"epoch": 3.3018422295701466, |
|
"grad_norm": 12734.199495358569, |
|
"learning_rate": 3.095748379408603e-07, |
|
"logits": -1.3172459602355957, |
|
"logps": -80.96276092529297, |
|
"loss": 34.2009, |
|
"objective": 33.96812057495117, |
|
"ranking_idealized": 0.6333333253860474, |
|
"ranking_idealized_expo": 0.5458333492279053, |
|
"ranking_simple": 0.6166666746139526, |
|
"regularize": 0.08176220953464508, |
|
"step": 1165, |
|
"wo_beta": 15.74937629699707 |
|
}, |
|
{ |
|
"dpo_loss": 0.5269596576690674, |
|
"epoch": 3.3160132262635806, |
|
"grad_norm": 14339.996000811438, |
|
"learning_rate": 3.049996299341742e-07, |
|
"logits": -1.267351746559143, |
|
"logps": -82.11973571777344, |
|
"loss": 34.9879, |
|
"objective": 35.85028076171875, |
|
"ranking_idealized": 0.6041666865348816, |
|
"ranking_idealized_expo": 0.4958333373069763, |
|
"ranking_simple": 0.6041666865348816, |
|
"regularize": 0.08146883547306061, |
|
"step": 1170, |
|
"wo_beta": 15.652009963989258 |
|
}, |
|
{ |
|
"dpo_loss": 0.531762957572937, |
|
"epoch": 3.3301842229570147, |
|
"grad_norm": 12543.440661095656, |
|
"learning_rate": 3.004435980712129e-07, |
|
"logits": -1.257896900177002, |
|
"logps": -82.12284088134766, |
|
"loss": 38.0949, |
|
"objective": 35.93735122680664, |
|
"ranking_idealized": 0.5708333253860474, |
|
"ranking_idealized_expo": 0.4749999940395355, |
|
"ranking_simple": 0.574999988079071, |
|
"regularize": 0.08384241163730621, |
|
"step": 1175, |
|
"wo_beta": 13.72645378112793 |
|
}, |
|
{ |
|
"dpo_loss": 0.5355243682861328, |
|
"epoch": 3.3443552196504487, |
|
"grad_norm": 11718.716469797973, |
|
"learning_rate": 2.959071903876486e-07, |
|
"logits": -1.3486711978912354, |
|
"logps": -82.8729248046875, |
|
"loss": 35.7799, |
|
"objective": 35.360801696777344, |
|
"ranking_idealized": 0.5583333373069763, |
|
"ranking_idealized_expo": 0.49166667461395264, |
|
"ranking_simple": 0.5625, |
|
"regularize": 0.07788892835378647, |
|
"step": 1180, |
|
"wo_beta": 16.274147033691406 |
|
}, |
|
{ |
|
"dpo_loss": 0.5254151225090027, |
|
"epoch": 3.3585262163438827, |
|
"grad_norm": 13437.960403836023, |
|
"learning_rate": 2.913908529893304e-07, |
|
"logits": -1.1963578462600708, |
|
"logps": -83.22509002685547, |
|
"loss": 33.4865, |
|
"objective": 33.50373840332031, |
|
"ranking_idealized": 0.5874999761581421, |
|
"ranking_idealized_expo": 0.48750001192092896, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.07612194865942001, |
|
"step": 1185, |
|
"wo_beta": 15.737934112548828 |
|
}, |
|
{ |
|
"dpo_loss": 0.5395456552505493, |
|
"epoch": 3.372697213037317, |
|
"grad_norm": 12206.27505785514, |
|
"learning_rate": 2.86895030008416e-07, |
|
"logits": -1.3092117309570312, |
|
"logps": -81.93521118164062, |
|
"loss": 33.053, |
|
"objective": 29.232421875, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.5291666388511658, |
|
"ranking_simple": 0.5874999761581421, |
|
"regularize": 0.07262556999921799, |
|
"step": 1190, |
|
"wo_beta": 15.487491607666016 |
|
}, |
|
{ |
|
"dpo_loss": 0.5137616991996765, |
|
"epoch": 3.386868209730751, |
|
"grad_norm": 11921.58688181337, |
|
"learning_rate": 2.824201635596951e-07, |
|
"logits": -1.2198973894119263, |
|
"logps": -82.35958099365234, |
|
"loss": 29.3695, |
|
"objective": 29.94867706298828, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.48750001192092896, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.06865646690130234, |
|
"step": 1195, |
|
"wo_beta": 15.531022071838379 |
|
}, |
|
{ |
|
"dpo_loss": 0.5208079814910889, |
|
"epoch": 3.4010392064241852, |
|
"grad_norm": 12726.149489712327, |
|
"learning_rate": 2.779666936971129e-07, |
|
"logits": -1.3937805891036987, |
|
"logps": -82.6730728149414, |
|
"loss": 34.9809, |
|
"objective": 31.1435489654541, |
|
"ranking_idealized": 0.6208333373069763, |
|
"ranking_idealized_expo": 0.5375000238418579, |
|
"ranking_simple": 0.6166666746139526, |
|
"regularize": 0.07431173324584961, |
|
"step": 1200, |
|
"wo_beta": 18.369197845458984 |
|
}, |
|
{ |
|
"epoch": 3.4010392064241852, |
|
"eval_dpo_loss": 0.6805519461631775, |
|
"eval_logits": -1.289951205253601, |
|
"eval_logps": -88.11286926269531, |
|
"eval_loss": 189.17047119140625, |
|
"eval_objective": 184.87181091308594, |
|
"eval_ranking_idealized": 0.6024844646453857, |
|
"eval_ranking_idealized_expo": 0.5232919454574585, |
|
"eval_ranking_simple": 0.532608687877655, |
|
"eval_regularize": 0.41934508085250854, |
|
"eval_runtime": 498.5381, |
|
"eval_samples_per_second": 11.614, |
|
"eval_steps_per_second": 0.969, |
|
"eval_wo_beta": 15.953052520751953, |
|
"step": 1200 |
|
}, |
|
{ |
|
"dpo_loss": 0.5354551672935486, |
|
"epoch": 3.4152102031176192, |
|
"grad_norm": 12302.298902716244, |
|
"learning_rate": 2.7353505837049583e-07, |
|
"logits": -1.293818712234497, |
|
"logps": -81.88545989990234, |
|
"loss": 33.6714, |
|
"objective": 31.525800704956055, |
|
"ranking_idealized": 0.6333333253860474, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.6208333373069763, |
|
"regularize": 0.07086090743541718, |
|
"step": 1205, |
|
"wo_beta": 15.15488052368164 |
|
}, |
|
{ |
|
"dpo_loss": 0.5308886170387268, |
|
"epoch": 3.4293811998110533, |
|
"grad_norm": 11960.890598119064, |
|
"learning_rate": 2.6912569338248315e-07, |
|
"logits": -1.300658941268921, |
|
"logps": -83.05274200439453, |
|
"loss": 36.2356, |
|
"objective": 35.77425003051758, |
|
"ranking_idealized": 0.5874999761581421, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.5791666507720947, |
|
"regularize": 0.08412022143602371, |
|
"step": 1210, |
|
"wo_beta": 16.733659744262695 |
|
}, |
|
{ |
|
"dpo_loss": 0.528823733329773, |
|
"epoch": 3.4435521965044873, |
|
"grad_norm": 13078.935439317174, |
|
"learning_rate": 2.64739032345671e-07, |
|
"logits": -1.3109962940216064, |
|
"logps": -84.07682037353516, |
|
"loss": 35.0362, |
|
"objective": 32.51463317871094, |
|
"ranking_idealized": 0.5666666626930237, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.0771927461028099, |
|
"step": 1215, |
|
"wo_beta": 15.212308883666992 |
|
}, |
|
{ |
|
"dpo_loss": 0.540026843547821, |
|
"epoch": 3.4577231931979218, |
|
"grad_norm": 12256.162682293258, |
|
"learning_rate": 2.603755066399718e-07, |
|
"logits": -1.149971842765808, |
|
"logps": -82.9686508178711, |
|
"loss": 33.1832, |
|
"objective": 32.34642028808594, |
|
"ranking_idealized": 0.6041666865348816, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.06764715164899826, |
|
"step": 1220, |
|
"wo_beta": 16.678075790405273 |
|
}, |
|
{ |
|
"dpo_loss": 0.524185061454773, |
|
"epoch": 3.471894189891356, |
|
"grad_norm": 12930.685272364457, |
|
"learning_rate": 2.560355453701919e-07, |
|
"logits": -1.302108645439148, |
|
"logps": -82.00885772705078, |
|
"loss": 33.7294, |
|
"objective": 32.768775939941406, |
|
"ranking_idealized": 0.5666666626930237, |
|
"ranking_idealized_expo": 0.46666666865348816, |
|
"ranking_simple": 0.5541666746139526, |
|
"regularize": 0.0753529891371727, |
|
"step": 1225, |
|
"wo_beta": 16.024269104003906 |
|
}, |
|
{ |
|
"dpo_loss": 0.5251755118370056, |
|
"epoch": 3.48606518658479, |
|
"grad_norm": 12434.433063668528, |
|
"learning_rate": 2.517195753238345e-07, |
|
"logits": -1.325141191482544, |
|
"logps": -82.18378448486328, |
|
"loss": 35.229, |
|
"objective": 33.25638961791992, |
|
"ranking_idealized": 0.6291666626930237, |
|
"ranking_idealized_expo": 0.5416666865348816, |
|
"ranking_simple": 0.625, |
|
"regularize": 0.0771695226430893, |
|
"step": 1230, |
|
"wo_beta": 16.292001724243164 |
|
}, |
|
{ |
|
"dpo_loss": 0.5132806897163391, |
|
"epoch": 3.500236183278224, |
|
"grad_norm": 13558.533453277203, |
|
"learning_rate": 2.474280209291299e-07, |
|
"logits": -1.245792031288147, |
|
"logps": -81.74018096923828, |
|
"loss": 33.2282, |
|
"objective": 33.390872955322266, |
|
"ranking_idealized": 0.5916666388511658, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.5874999761581421, |
|
"regularize": 0.07453177124261856, |
|
"step": 1235, |
|
"wo_beta": 16.363548278808594 |
|
}, |
|
{ |
|
"dpo_loss": 0.5296925902366638, |
|
"epoch": 3.514407179971658, |
|
"grad_norm": 12949.63094083325, |
|
"learning_rate": 2.4316130421329696e-07, |
|
"logits": -1.238582968711853, |
|
"logps": -82.47282409667969, |
|
"loss": 34.0652, |
|
"objective": 31.30968475341797, |
|
"ranking_idealized": 0.5708333253860474, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.06809426844120026, |
|
"step": 1240, |
|
"wo_beta": 15.522791862487793 |
|
}, |
|
{ |
|
"dpo_loss": 0.5323511362075806, |
|
"epoch": 3.528578176665092, |
|
"grad_norm": 13527.106344889547, |
|
"learning_rate": 2.389198447610418e-07, |
|
"logits": -1.3098766803741455, |
|
"logps": -83.17538452148438, |
|
"loss": 30.2807, |
|
"objective": 31.539880752563477, |
|
"ranking_idealized": 0.6416666507720947, |
|
"ranking_idealized_expo": 0.5541666746139526, |
|
"ranking_simple": 0.625, |
|
"regularize": 0.07134827226400375, |
|
"step": 1245, |
|
"wo_beta": 15.821925163269043 |
|
}, |
|
{ |
|
"dpo_loss": 0.5260218977928162, |
|
"epoch": 3.5427491733585263, |
|
"grad_norm": 13239.929991928584, |
|
"learning_rate": 2.3470405967329604e-07, |
|
"logits": -1.2133029699325562, |
|
"logps": -81.8626480102539, |
|
"loss": 34.073, |
|
"objective": 34.22465515136719, |
|
"ranking_idealized": 0.6291666626930237, |
|
"ranking_idealized_expo": 0.5458333492279053, |
|
"ranking_simple": 0.625, |
|
"regularize": 0.08072555810213089, |
|
"step": 1250, |
|
"wo_beta": 15.293652534484863 |
|
}, |
|
{ |
|
"epoch": 3.5427491733585263, |
|
"eval_dpo_loss": 0.6802147626876831, |
|
"eval_logits": -1.289227843284607, |
|
"eval_logps": -88.56167602539062, |
|
"eval_loss": 188.2202911376953, |
|
"eval_objective": 184.19659423828125, |
|
"eval_ranking_idealized": 0.6024844646453857, |
|
"eval_ranking_idealized_expo": 0.5232919454574585, |
|
"eval_ranking_simple": 0.533643901348114, |
|
"eval_regularize": 0.4176720380783081, |
|
"eval_runtime": 501.867, |
|
"eval_samples_per_second": 11.537, |
|
"eval_steps_per_second": 0.962, |
|
"eval_wo_beta": 16.002193450927734, |
|
"step": 1250 |
|
}, |
|
{ |
|
"dpo_loss": 0.5387216806411743, |
|
"epoch": 3.5569201700519604, |
|
"grad_norm": 12534.49899559166, |
|
"learning_rate": 2.3051436352620036e-07, |
|
"logits": -1.2683520317077637, |
|
"logps": -82.32015991210938, |
|
"loss": 36.4025, |
|
"objective": 32.02161407470703, |
|
"ranking_idealized": 0.574999988079071, |
|
"ranking_idealized_expo": 0.4791666567325592, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.07073788344860077, |
|
"step": 1255, |
|
"wo_beta": 15.795002937316895 |
|
}, |
|
{ |
|
"dpo_loss": 0.530408501625061, |
|
"epoch": 3.5710911667453944, |
|
"grad_norm": 12912.721697415427, |
|
"learning_rate": 2.2635116833033392e-07, |
|
"logits": -1.2373536825180054, |
|
"logps": -81.3061294555664, |
|
"loss": 30.8038, |
|
"objective": 33.21593475341797, |
|
"ranking_idealized": 0.6333333253860474, |
|
"ranking_idealized_expo": 0.5375000238418579, |
|
"ranking_simple": 0.6208333373069763, |
|
"regularize": 0.07438240200281143, |
|
"step": 1260, |
|
"wo_beta": 16.231142044067383 |
|
}, |
|
{ |
|
"dpo_loss": 0.5247560739517212, |
|
"epoch": 3.5852621634388284, |
|
"grad_norm": 12931.353378545553, |
|
"learning_rate": 2.2221488349019902e-07, |
|
"logits": -1.2455730438232422, |
|
"logps": -80.93061828613281, |
|
"loss": 29.6738, |
|
"objective": 31.222820281982422, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.612500011920929, |
|
"regularize": 0.06749995797872543, |
|
"step": 1265, |
|
"wo_beta": 14.711896896362305 |
|
}, |
|
{ |
|
"dpo_loss": 0.5356096625328064, |
|
"epoch": 3.5994331601322624, |
|
"grad_norm": 13549.0763306813, |
|
"learning_rate": 2.181059157639598e-07, |
|
"logits": -1.3499952554702759, |
|
"logps": -81.31751251220703, |
|
"loss": 30.5338, |
|
"objective": 30.125825881958008, |
|
"ranking_idealized": 0.5333333611488342, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.5333333611488342, |
|
"regularize": 0.06899719685316086, |
|
"step": 1270, |
|
"wo_beta": 13.772916793823242 |
|
}, |
|
{ |
|
"dpo_loss": 0.5210896134376526, |
|
"epoch": 3.613604156825697, |
|
"grad_norm": 14924.204646126253, |
|
"learning_rate": 2.1402466922344303e-07, |
|
"logits": -1.210523247718811, |
|
"logps": -82.34052276611328, |
|
"loss": 29.82, |
|
"objective": 29.18175506591797, |
|
"ranking_idealized": 0.5874999761581421, |
|
"ranking_idealized_expo": 0.49166667461395264, |
|
"ranking_simple": 0.5791666507720947, |
|
"regularize": 0.06544475257396698, |
|
"step": 1275, |
|
"wo_beta": 15.473977088928223 |
|
}, |
|
{ |
|
"dpo_loss": 0.5307682752609253, |
|
"epoch": 3.627775153519131, |
|
"grad_norm": 12824.51476470017, |
|
"learning_rate": 2.0997154521440097e-07, |
|
"logits": -1.2915035486221313, |
|
"logps": -81.79452514648438, |
|
"loss": 30.8024, |
|
"objective": 29.49608612060547, |
|
"ranking_idealized": 0.5541666746139526, |
|
"ranking_idealized_expo": 0.4541666805744171, |
|
"ranking_simple": 0.5375000238418579, |
|
"regularize": 0.06567243486642838, |
|
"step": 1280, |
|
"wo_beta": 15.875335693359375 |
|
}, |
|
{ |
|
"dpo_loss": 0.5249419212341309, |
|
"epoch": 3.641946150212565, |
|
"grad_norm": 13428.13142246555, |
|
"learning_rate": 2.0594694231704373e-07, |
|
"logits": -1.2426308393478394, |
|
"logps": -81.00833892822266, |
|
"loss": 30.3043, |
|
"objective": 30.617321014404297, |
|
"ranking_idealized": 0.5916666388511658, |
|
"ranking_idealized_expo": 0.4958333373069763, |
|
"ranking_simple": 0.5916666388511658, |
|
"regularize": 0.07462318986654282, |
|
"step": 1285, |
|
"wo_beta": 17.443321228027344 |
|
}, |
|
{ |
|
"dpo_loss": 0.5173429250717163, |
|
"epoch": 3.656117146905999, |
|
"grad_norm": 12671.749777744226, |
|
"learning_rate": 2.0195125630684428e-07, |
|
"logits": -1.245200276374817, |
|
"logps": -81.8724594116211, |
|
"loss": 28.4671, |
|
"objective": 27.68103790283203, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.6000000238418579, |
|
"regularize": 0.06590177118778229, |
|
"step": 1290, |
|
"wo_beta": 17.08915138244629 |
|
}, |
|
{ |
|
"dpo_loss": 0.5335291028022766, |
|
"epoch": 3.670288143599433, |
|
"grad_norm": 13021.653293493737, |
|
"learning_rate": 1.979848801156167e-07, |
|
"logits": -1.3040084838867188, |
|
"logps": -81.88176727294922, |
|
"loss": 28.4196, |
|
"objective": 28.575376510620117, |
|
"ranking_idealized": 0.5958333611488342, |
|
"ranking_idealized_expo": 0.49166667461395264, |
|
"ranking_simple": 0.5874999761581421, |
|
"regularize": 0.0632786899805069, |
|
"step": 1295, |
|
"wo_beta": 14.829022407531738 |
|
}, |
|
{ |
|
"dpo_loss": 0.5276142954826355, |
|
"epoch": 3.6844591402928675, |
|
"grad_norm": 11978.937253641576, |
|
"learning_rate": 1.9404820379287672e-07, |
|
"logits": -1.187487244606018, |
|
"logps": -80.9906005859375, |
|
"loss": 28.4565, |
|
"objective": 28.971555709838867, |
|
"ranking_idealized": 0.6041666865348816, |
|
"ranking_idealized_expo": 0.5458333492279053, |
|
"ranking_simple": 0.6166666746139526, |
|
"regularize": 0.06764063984155655, |
|
"step": 1300, |
|
"wo_beta": 17.09331512451172 |
|
}, |
|
{ |
|
"epoch": 3.6844591402928675, |
|
"eval_dpo_loss": 0.680322527885437, |
|
"eval_logits": -1.2942335605621338, |
|
"eval_logps": -88.08357238769531, |
|
"eval_loss": 188.31890869140625, |
|
"eval_objective": 184.1293182373047, |
|
"eval_ranking_idealized": 0.6024844646453857, |
|
"eval_ranking_idealized_expo": 0.5232919454574585, |
|
"eval_ranking_simple": 0.5331262946128845, |
|
"eval_regularize": 0.4177800714969635, |
|
"eval_runtime": 491.4726, |
|
"eval_samples_per_second": 11.781, |
|
"eval_steps_per_second": 0.983, |
|
"eval_wo_beta": 16.008142471313477, |
|
"step": 1300 |
|
}, |
|
{ |
|
"dpo_loss": 0.5322309732437134, |
|
"epoch": 3.6986301369863015, |
|
"grad_norm": 13019.22557555901, |
|
"learning_rate": 1.9014161446748422e-07, |
|
"logits": -1.2798058986663818, |
|
"logps": -81.99161529541016, |
|
"loss": 30.5992, |
|
"objective": 32.30867004394531, |
|
"ranking_idealized": 0.5458333492279053, |
|
"ranking_idealized_expo": 0.47083333134651184, |
|
"ranking_simple": 0.5416666865348816, |
|
"regularize": 0.0735287144780159, |
|
"step": 1305, |
|
"wo_beta": 15.798765182495117 |
|
}, |
|
{ |
|
"dpo_loss": 0.5227470397949219, |
|
"epoch": 3.7128011336797355, |
|
"grad_norm": 12226.371631865619, |
|
"learning_rate": 1.8626549630957395e-07, |
|
"logits": -1.2566769123077393, |
|
"logps": -81.54576110839844, |
|
"loss": 28.0805, |
|
"objective": 26.042844772338867, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.512499988079071, |
|
"ranking_simple": 0.5916666388511658, |
|
"regularize": 0.06227840855717659, |
|
"step": 1310, |
|
"wo_beta": 15.27546501159668 |
|
}, |
|
{ |
|
"dpo_loss": 0.537932813167572, |
|
"epoch": 3.7269721303731695, |
|
"grad_norm": 12444.517818477534, |
|
"learning_rate": 1.8242023049277555e-07, |
|
"logits": -1.2929528951644897, |
|
"logps": -81.47209167480469, |
|
"loss": 30.7473, |
|
"objective": 30.499658584594727, |
|
"ranking_idealized": 0.5625, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.5583333373069763, |
|
"regularize": 0.07173587381839752, |
|
"step": 1315, |
|
"wo_beta": 15.575103759765625 |
|
}, |
|
{ |
|
"dpo_loss": 0.5317214131355286, |
|
"epoch": 3.7411431270666036, |
|
"grad_norm": 13300.946248563114, |
|
"learning_rate": 1.7860619515673032e-07, |
|
"logits": -1.3597683906555176, |
|
"logps": -83.02255249023438, |
|
"loss": 29.6239, |
|
"objective": 28.020469665527344, |
|
"ranking_idealized": 0.6541666388511658, |
|
"ranking_idealized_expo": 0.5708333253860474, |
|
"ranking_simple": 0.6541666388511658, |
|
"regularize": 0.06609723716974258, |
|
"step": 1320, |
|
"wo_beta": 16.70941734313965 |
|
}, |
|
{ |
|
"dpo_loss": 0.5467905402183533, |
|
"epoch": 3.755314123760038, |
|
"grad_norm": 11933.522036621489, |
|
"learning_rate": 1.7482376536990474e-07, |
|
"logits": -1.2760491371154785, |
|
"logps": -81.77200317382812, |
|
"loss": 28.58, |
|
"objective": 27.297456741333008, |
|
"ranking_idealized": 0.5791666507720947, |
|
"ranking_idealized_expo": 0.512499988079071, |
|
"ranking_simple": 0.5791666507720947, |
|
"regularize": 0.061256349086761475, |
|
"step": 1325, |
|
"wo_beta": 14.996780395507812 |
|
}, |
|
{ |
|
"dpo_loss": 0.5155090689659119, |
|
"epoch": 3.769485120453472, |
|
"grad_norm": 12146.906265203044, |
|
"learning_rate": 1.7107331309270684e-07, |
|
"logits": -1.2232296466827393, |
|
"logps": -81.67552185058594, |
|
"loss": 25.7046, |
|
"objective": 24.283742904663086, |
|
"ranking_idealized": 0.6041666865348816, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.6083333492279053, |
|
"regularize": 0.05803535133600235, |
|
"step": 1330, |
|
"wo_beta": 14.960771560668945 |
|
}, |
|
{ |
|
"dpo_loss": 0.5197141766548157, |
|
"epoch": 3.783656117146906, |
|
"grad_norm": 13269.1257120231, |
|
"learning_rate": 1.6735520714090778e-07, |
|
"logits": -1.3548495769500732, |
|
"logps": -82.88711547851562, |
|
"loss": 25.5411, |
|
"objective": 23.988988876342773, |
|
"ranking_idealized": 0.637499988079071, |
|
"ranking_idealized_expo": 0.5541666746139526, |
|
"ranking_simple": 0.6333333253860474, |
|
"regularize": 0.05831415578722954, |
|
"step": 1335, |
|
"wo_beta": 15.491255760192871 |
|
}, |
|
{ |
|
"dpo_loss": 0.5400987863540649, |
|
"epoch": 3.79782711384034, |
|
"grad_norm": 12222.682651732252, |
|
"learning_rate": 1.6366981314937372e-07, |
|
"logits": -1.3011000156402588, |
|
"logps": -81.44950866699219, |
|
"loss": 26.7414, |
|
"objective": 27.633180618286133, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5541666746139526, |
|
"ranking_simple": 0.612500011920929, |
|
"regularize": 0.06863755732774734, |
|
"step": 1340, |
|
"wo_beta": 15.513628005981445 |
|
}, |
|
{ |
|
"dpo_loss": 0.5207428336143494, |
|
"epoch": 3.811998110533774, |
|
"grad_norm": 13292.031759115218, |
|
"learning_rate": 1.6001749353610815e-07, |
|
"logits": -1.2988460063934326, |
|
"logps": -81.9979019165039, |
|
"loss": 27.5342, |
|
"objective": 26.436460494995117, |
|
"ranking_idealized": 0.6291666626930237, |
|
"ranking_idealized_expo": 0.5375000238418579, |
|
"ranking_simple": 0.637499988079071, |
|
"regularize": 0.060691170394420624, |
|
"step": 1345, |
|
"wo_beta": 16.72386360168457 |
|
}, |
|
{ |
|
"dpo_loss": 0.5372669100761414, |
|
"epoch": 3.826169107227208, |
|
"grad_norm": 12429.085011694839, |
|
"learning_rate": 1.5639860746661338e-07, |
|
"logits": -1.3200603723526, |
|
"logps": -80.8891830444336, |
|
"loss": 27.4636, |
|
"objective": 27.883655548095703, |
|
"ranking_idealized": 0.5625, |
|
"ranking_idealized_expo": 0.4958333373069763, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.06708240509033203, |
|
"step": 1350, |
|
"wo_beta": 15.541132926940918 |
|
}, |
|
{ |
|
"epoch": 3.826169107227208, |
|
"eval_dpo_loss": 0.6802567839622498, |
|
"eval_logits": -1.2973301410675049, |
|
"eval_logps": -88.45857238769531, |
|
"eval_loss": 188.3022003173828, |
|
"eval_objective": 184.21910095214844, |
|
"eval_ranking_idealized": 0.6024844646453857, |
|
"eval_ranking_idealized_expo": 0.5232919454574585, |
|
"eval_ranking_simple": 0.5320910811424255, |
|
"eval_regularize": 0.4178454279899597, |
|
"eval_runtime": 484.5227, |
|
"eval_samples_per_second": 11.95, |
|
"eval_steps_per_second": 0.997, |
|
"eval_wo_beta": 15.999577522277832, |
|
"step": 1350 |
|
}, |
|
{ |
|
"dpo_loss": 0.5401098132133484, |
|
"epoch": 3.840340103920642, |
|
"grad_norm": 12546.873988889934, |
|
"learning_rate": 1.5281351081856976e-07, |
|
"logits": -1.3091717958450317, |
|
"logps": -81.95738983154297, |
|
"loss": 24.53, |
|
"objective": 23.978574752807617, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.6000000238418579, |
|
"regularize": 0.05746602639555931, |
|
"step": 1355, |
|
"wo_beta": 15.96954345703125 |
|
}, |
|
{ |
|
"dpo_loss": 0.5322627425193787, |
|
"epoch": 3.8545111006140766, |
|
"grad_norm": 12396.074158573574, |
|
"learning_rate": 1.492625561468393e-07, |
|
"logits": -1.2270203828811646, |
|
"logps": -81.94197082519531, |
|
"loss": 27.8079, |
|
"objective": 25.823699951171875, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5375000238418579, |
|
"ranking_simple": 0.5958333611488342, |
|
"regularize": 0.06090990826487541, |
|
"step": 1360, |
|
"wo_beta": 15.92143440246582 |
|
}, |
|
{ |
|
"dpo_loss": 0.5215187668800354, |
|
"epoch": 3.8686820973075107, |
|
"grad_norm": 12924.951740893872, |
|
"learning_rate": 1.4574609264879632e-07, |
|
"logits": -1.2885017395019531, |
|
"logps": -81.9835205078125, |
|
"loss": 24.6244, |
|
"objective": 21.932554244995117, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.612500011920929, |
|
"regularize": 0.05161268636584282, |
|
"step": 1365, |
|
"wo_beta": 15.276032447814941 |
|
}, |
|
{ |
|
"dpo_loss": 0.5322207808494568, |
|
"epoch": 3.8828530940009447, |
|
"grad_norm": 11760.04729219421, |
|
"learning_rate": 1.4226446612998671e-07, |
|
"logits": -1.325412631034851, |
|
"logps": -82.93399810791016, |
|
"loss": 25.2873, |
|
"objective": 22.0572566986084, |
|
"ranking_idealized": 0.6208333373069763, |
|
"ranking_idealized_expo": 0.550000011920929, |
|
"ranking_simple": 0.637499988079071, |
|
"regularize": 0.05589644983410835, |
|
"step": 1370, |
|
"wo_beta": 16.43442726135254 |
|
}, |
|
{ |
|
"dpo_loss": 0.5177661776542664, |
|
"epoch": 3.8970240906943787, |
|
"grad_norm": 12668.234366032097, |
|
"learning_rate": 1.3881801897012224e-07, |
|
"logits": -1.3054790496826172, |
|
"logps": -81.97600555419922, |
|
"loss": 25.3791, |
|
"objective": 25.463533401489258, |
|
"ranking_idealized": 0.5708333253860474, |
|
"ranking_idealized_expo": 0.48750001192092896, |
|
"ranking_simple": 0.5791666507720947, |
|
"regularize": 0.06239425763487816, |
|
"step": 1375, |
|
"wo_beta": 15.740779876708984 |
|
}, |
|
{ |
|
"dpo_loss": 0.5269008874893188, |
|
"epoch": 3.9111950873878127, |
|
"grad_norm": 11613.901925945589, |
|
"learning_rate": 1.3540709008941147e-07, |
|
"logits": -1.2125933170318604, |
|
"logps": -81.08470153808594, |
|
"loss": 24.5614, |
|
"objective": 27.379404067993164, |
|
"ranking_idealized": 0.6416666507720947, |
|
"ranking_idealized_expo": 0.5666666626930237, |
|
"ranking_simple": 0.6499999761581421, |
|
"regularize": 0.06529100984334946, |
|
"step": 1380, |
|
"wo_beta": 16.324913024902344 |
|
}, |
|
{ |
|
"dpo_loss": 0.5204812288284302, |
|
"epoch": 3.925366084081247, |
|
"grad_norm": 12262.7455062338, |
|
"learning_rate": 1.3203201491523024e-07, |
|
"logits": -1.1872669458389282, |
|
"logps": -82.68800354003906, |
|
"loss": 26.2354, |
|
"objective": 27.383338928222656, |
|
"ranking_idealized": 0.574999988079071, |
|
"ranking_idealized_expo": 0.48750001192092896, |
|
"ranking_simple": 0.5708333253860474, |
|
"regularize": 0.06124182417988777, |
|
"step": 1385, |
|
"wo_beta": 17.184247970581055 |
|
}, |
|
{ |
|
"dpo_loss": 0.5293174982070923, |
|
"epoch": 3.9395370807746812, |
|
"grad_norm": 11656.13294817261, |
|
"learning_rate": 1.2869312534913685e-07, |
|
"logits": -1.3625025749206543, |
|
"logps": -81.69257354736328, |
|
"loss": 25.8656, |
|
"objective": 27.87486457824707, |
|
"ranking_idealized": 0.5375000238418579, |
|
"ranking_idealized_expo": 0.4541666805744171, |
|
"ranking_simple": 0.5208333134651184, |
|
"regularize": 0.06757337599992752, |
|
"step": 1390, |
|
"wo_beta": 14.843222618103027 |
|
}, |
|
{ |
|
"dpo_loss": 0.5323649644851685, |
|
"epoch": 3.9537080774681153, |
|
"grad_norm": 12688.563452750986, |
|
"learning_rate": 1.2539074973423204e-07, |
|
"logits": -1.344056487083435, |
|
"logps": -82.50756072998047, |
|
"loss": 25.269, |
|
"objective": 20.71147346496582, |
|
"ranking_idealized": 0.574999988079071, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.574999988079071, |
|
"regularize": 0.05190667137503624, |
|
"step": 1395, |
|
"wo_beta": 15.608321189880371 |
|
}, |
|
{ |
|
"dpo_loss": 0.5348060727119446, |
|
"epoch": 3.9678790741615493, |
|
"grad_norm": 13248.732573569929, |
|
"learning_rate": 1.2212521282287093e-07, |
|
"logits": -1.2224748134613037, |
|
"logps": -80.45255279541016, |
|
"loss": 27.3902, |
|
"objective": 28.852842330932617, |
|
"ranking_idealized": 0.574999988079071, |
|
"ranking_idealized_expo": 0.512499988079071, |
|
"ranking_simple": 0.5708333253860474, |
|
"regularize": 0.0665024146437645, |
|
"step": 1400, |
|
"wo_beta": 16.69828987121582 |
|
}, |
|
{ |
|
"epoch": 3.9678790741615493, |
|
"eval_dpo_loss": 0.6798388957977295, |
|
"eval_logits": -1.2974461317062378, |
|
"eval_logps": -88.3134765625, |
|
"eval_loss": 187.96913146972656, |
|
"eval_objective": 183.7815704345703, |
|
"eval_ranking_idealized": 0.6024844646453857, |
|
"eval_ranking_idealized_expo": 0.5232919454574585, |
|
"eval_ranking_simple": 0.5320910811424255, |
|
"eval_regularize": 0.4168493151664734, |
|
"eval_runtime": 519.2809, |
|
"eval_samples_per_second": 11.15, |
|
"eval_steps_per_second": 0.93, |
|
"eval_wo_beta": 15.978778839111328, |
|
"step": 1400 |
|
}, |
|
{ |
|
"dpo_loss": 0.5318723320960999, |
|
"epoch": 3.9820500708549833, |
|
"grad_norm": 12626.278495743487, |
|
"learning_rate": 1.1889683574472692e-07, |
|
"logits": -1.2031117677688599, |
|
"logps": -81.43195343017578, |
|
"loss": 25.6619, |
|
"objective": 22.53989028930664, |
|
"ranking_idealized": 0.5958333611488342, |
|
"ranking_idealized_expo": 0.4958333373069763, |
|
"ranking_simple": 0.5874999761581421, |
|
"regularize": 0.05257093533873558, |
|
"step": 1405, |
|
"wo_beta": 14.43735122680664 |
|
}, |
|
{ |
|
"dpo_loss": 0.5325983762741089, |
|
"epoch": 3.9962210675484178, |
|
"grad_norm": 12962.865030589033, |
|
"learning_rate": 1.15705935975212e-07, |
|
"logits": -1.2109463214874268, |
|
"logps": -80.95507049560547, |
|
"loss": 25.0327, |
|
"objective": 27.48863410949707, |
|
"ranking_idealized": 0.6541666388511658, |
|
"ranking_idealized_expo": 0.5583333373069763, |
|
"ranking_simple": 0.637499988079071, |
|
"regularize": 0.06891029328107834, |
|
"step": 1410, |
|
"wo_beta": 15.6097993850708 |
|
}, |
|
{ |
|
"dpo_loss": 0.5317092537879944, |
|
"epoch": 4.010392064241851, |
|
"grad_norm": 12833.61434685088, |
|
"learning_rate": 1.1255282730425708e-07, |
|
"logits": -1.2491552829742432, |
|
"logps": -81.32047271728516, |
|
"loss": 22.2145, |
|
"objective": 24.41758155822754, |
|
"ranking_idealized": 0.5874999761581421, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.5874999761581421, |
|
"regularize": 0.06543368101119995, |
|
"step": 1415, |
|
"wo_beta": 15.283975601196289 |
|
}, |
|
{ |
|
"dpo_loss": 0.5239009261131287, |
|
"epoch": 4.024563060935286, |
|
"grad_norm": 13451.327899072105, |
|
"learning_rate": 1.094378198054533e-07, |
|
"logits": -1.353010654449463, |
|
"logps": -83.2571792602539, |
|
"loss": 23.0966, |
|
"objective": 24.90163230895996, |
|
"ranking_idealized": 0.5291666388511658, |
|
"ranking_idealized_expo": 0.4416666626930237, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 0.0562543049454689, |
|
"step": 1420, |
|
"wo_beta": 16.40116310119629 |
|
}, |
|
{ |
|
"dpo_loss": 0.53034508228302, |
|
"epoch": 4.03873405762872, |
|
"grad_norm": 13582.157317581643, |
|
"learning_rate": 1.063612198055604e-07, |
|
"logits": -1.2672284841537476, |
|
"logps": -82.41036987304688, |
|
"loss": 19.725, |
|
"objective": 18.898433685302734, |
|
"ranking_idealized": 0.5666666626930237, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.5583333373069763, |
|
"regularize": 0.04213841259479523, |
|
"step": 1425, |
|
"wo_beta": 17.573118209838867 |
|
}, |
|
{ |
|
"dpo_loss": 0.5290653109550476, |
|
"epoch": 4.052905054322154, |
|
"grad_norm": 12471.786390228664, |
|
"learning_rate": 1.0332332985438247e-07, |
|
"logits": -1.2409167289733887, |
|
"logps": -82.05091094970703, |
|
"loss": 21.8465, |
|
"objective": 20.57358741760254, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.6083333492279053, |
|
"regularize": 0.05022308602929115, |
|
"step": 1430, |
|
"wo_beta": 17.054475784301758 |
|
}, |
|
{ |
|
"dpo_loss": 0.5352352261543274, |
|
"epoch": 4.067076051015588, |
|
"grad_norm": 12729.012234556472, |
|
"learning_rate": 1.0032444869501577e-07, |
|
"logits": -1.1344469785690308, |
|
"logps": -84.53145599365234, |
|
"loss": 23.6283, |
|
"objective": 21.45845603942871, |
|
"ranking_idealized": 0.5375000238418579, |
|
"ranking_idealized_expo": 0.4749999940395355, |
|
"ranking_simple": 0.550000011920929, |
|
"regularize": 0.046408891677856445, |
|
"step": 1435, |
|
"wo_beta": 17.1253719329834 |
|
}, |
|
{ |
|
"dpo_loss": 0.5163091421127319, |
|
"epoch": 4.081247047709022, |
|
"grad_norm": 12403.62054840324, |
|
"learning_rate": 9.736487123447068e-08, |
|
"logits": -1.3162797689437866, |
|
"logps": -83.0071792602539, |
|
"loss": 18.4912, |
|
"objective": 19.839466094970703, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5916666388511658, |
|
"regularize": 0.04717810079455376, |
|
"step": 1440, |
|
"wo_beta": 16.756040573120117 |
|
}, |
|
{ |
|
"dpo_loss": 0.5394971966743469, |
|
"epoch": 4.095418044402456, |
|
"grad_norm": 13017.070767832263, |
|
"learning_rate": 9.444488851467041e-08, |
|
"logits": -1.2141478061676025, |
|
"logps": -81.8912582397461, |
|
"loss": 22.8616, |
|
"objective": 24.104333877563477, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5291666388511658, |
|
"ranking_simple": 0.5916666388511658, |
|
"regularize": 0.05671803280711174, |
|
"step": 1445, |
|
"wo_beta": 15.497802734375 |
|
}, |
|
{ |
|
"dpo_loss": 0.5386175513267517, |
|
"epoch": 4.109589041095891, |
|
"grad_norm": 12319.490850536135, |
|
"learning_rate": 9.156478768383058e-08, |
|
"logits": -1.2780787944793701, |
|
"logps": -82.44509887695312, |
|
"loss": 21.2906, |
|
"objective": 22.363698959350586, |
|
"ranking_idealized": 0.5625, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.05074004456400871, |
|
"step": 1450, |
|
"wo_beta": 16.318647384643555 |
|
}, |
|
{ |
|
"epoch": 4.109589041095891, |
|
"eval_dpo_loss": 0.6796455383300781, |
|
"eval_logits": -1.2975972890853882, |
|
"eval_logps": -88.12124633789062, |
|
"eval_loss": 187.89852905273438, |
|
"eval_objective": 183.65463256835938, |
|
"eval_ranking_idealized": 0.6024844646453857, |
|
"eval_ranking_idealized_expo": 0.5232919454574585, |
|
"eval_ranking_simple": 0.5320910811424255, |
|
"eval_regularize": 0.4164124131202698, |
|
"eval_runtime": 516.4821, |
|
"eval_samples_per_second": 11.21, |
|
"eval_steps_per_second": 0.935, |
|
"eval_wo_beta": 15.985260009765625, |
|
"step": 1450 |
|
}, |
|
{ |
|
"dpo_loss": 0.5313987135887146, |
|
"epoch": 4.123760037789324, |
|
"grad_norm": 12478.853769070673, |
|
"learning_rate": 8.872485196822122e-08, |
|
"logits": -1.2814396619796753, |
|
"logps": -81.72008514404297, |
|
"loss": 22.8821, |
|
"objective": 23.81187629699707, |
|
"ranking_idealized": 0.6416666507720947, |
|
"ranking_idealized_expo": 0.5916666388511658, |
|
"ranking_simple": 0.6458333134651184, |
|
"regularize": 0.05551544576883316, |
|
"step": 1455, |
|
"wo_beta": 18.80474090576172 |
|
}, |
|
{ |
|
"dpo_loss": 0.5285670161247253, |
|
"epoch": 4.137931034482759, |
|
"grad_norm": 13016.324616810654, |
|
"learning_rate": 8.592536064431466e-08, |
|
"logits": -1.3169968128204346, |
|
"logps": -82.27637481689453, |
|
"loss": 21.0762, |
|
"objective": 22.214412689208984, |
|
"ranking_idealized": 0.574999988079071, |
|
"ranking_idealized_expo": 0.512499988079071, |
|
"ranking_simple": 0.5708333253860474, |
|
"regularize": 0.05284254625439644, |
|
"step": 1460, |
|
"wo_beta": 16.45089340209961 |
|
}, |
|
{ |
|
"dpo_loss": 0.5275595784187317, |
|
"epoch": 4.1521020311761925, |
|
"grad_norm": 12828.215315021795, |
|
"learning_rate": 8.316658901132163e-08, |
|
"logits": -1.2044638395309448, |
|
"logps": -83.09059143066406, |
|
"loss": 20.1235, |
|
"objective": 19.89800453186035, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5541666746139526, |
|
"ranking_simple": 0.6208333373069763, |
|
"regularize": 0.04856906086206436, |
|
"step": 1465, |
|
"wo_beta": 16.143047332763672 |
|
}, |
|
{ |
|
"dpo_loss": 0.5317350029945374, |
|
"epoch": 4.166273027869627, |
|
"grad_norm": 13452.677353962536, |
|
"learning_rate": 8.044880836411888e-08, |
|
"logits": -1.312625527381897, |
|
"logps": -80.955810546875, |
|
"loss": 18.8621, |
|
"objective": 22.22332000732422, |
|
"ranking_idealized": 0.5583333373069763, |
|
"ranking_idealized_expo": 0.4749999940395355, |
|
"ranking_simple": 0.5583333373069763, |
|
"regularize": 0.05493269860744476, |
|
"step": 1470, |
|
"wo_beta": 14.867803573608398 |
|
}, |
|
{ |
|
"dpo_loss": 0.5068629384040833, |
|
"epoch": 4.1804440245630605, |
|
"grad_norm": 12445.31776981503, |
|
"learning_rate": 7.777228596656993e-08, |
|
"logits": -1.2618132829666138, |
|
"logps": -83.48854064941406, |
|
"loss": 18.8691, |
|
"objective": 17.614728927612305, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5541666746139526, |
|
"ranking_simple": 0.612500011920929, |
|
"regularize": 0.04399799555540085, |
|
"step": 1475, |
|
"wo_beta": 17.06732940673828 |
|
}, |
|
{ |
|
"dpo_loss": 0.5202235579490662, |
|
"epoch": 4.194615021256495, |
|
"grad_norm": 12224.02993997593, |
|
"learning_rate": 7.513728502524286e-08, |
|
"logits": -1.1893463134765625, |
|
"logps": -81.5462417602539, |
|
"loss": 19.5471, |
|
"objective": 21.709897994995117, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.5916666388511658, |
|
"regularize": 0.05476529151201248, |
|
"step": 1480, |
|
"wo_beta": 16.902223587036133 |
|
}, |
|
{ |
|
"dpo_loss": 0.528392493724823, |
|
"epoch": 4.2087860179499295, |
|
"grad_norm": 12678.153549499324, |
|
"learning_rate": 7.25440646635268e-08, |
|
"logits": -1.3054612874984741, |
|
"logps": -80.2231674194336, |
|
"loss": 19.6042, |
|
"objective": 19.114337921142578, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.574999988079071, |
|
"regularize": 0.04563932120800018, |
|
"step": 1485, |
|
"wo_beta": 16.017080307006836 |
|
}, |
|
{ |
|
"dpo_loss": 0.5332812070846558, |
|
"epoch": 4.222957014643363, |
|
"grad_norm": 12851.091233563351, |
|
"learning_rate": 6.999287989614971e-08, |
|
"logits": -1.368248462677002, |
|
"logps": -81.43551635742188, |
|
"loss": 19.3664, |
|
"objective": 18.39341926574707, |
|
"ranking_idealized": 0.5583333373069763, |
|
"ranking_idealized_expo": 0.47083333134651184, |
|
"ranking_simple": 0.5625, |
|
"regularize": 0.0449262373149395, |
|
"step": 1490, |
|
"wo_beta": 14.998396873474121 |
|
}, |
|
{ |
|
"dpo_loss": 0.5162668824195862, |
|
"epoch": 4.2371280113367975, |
|
"grad_norm": 13439.750358421123, |
|
"learning_rate": 6.74839816041013e-08, |
|
"logits": -1.3570283651351929, |
|
"logps": -81.74089050292969, |
|
"loss": 16.8521, |
|
"objective": 18.91334342956543, |
|
"ranking_idealized": 0.5958333611488342, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.5958333611488342, |
|
"regularize": 0.04384367913007736, |
|
"step": 1495, |
|
"wo_beta": 18.151466369628906 |
|
}, |
|
{ |
|
"dpo_loss": 0.5271181464195251, |
|
"epoch": 4.251299008030231, |
|
"grad_norm": 12462.836104102607, |
|
"learning_rate": 6.501761650996052e-08, |
|
"logits": -1.3143360614776611, |
|
"logps": -83.34208679199219, |
|
"loss": 19.8787, |
|
"objective": 20.79971694946289, |
|
"ranking_idealized": 0.6083333492279053, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.5958333611488342, |
|
"regularize": 0.04957110807299614, |
|
"step": 1500, |
|
"wo_beta": 16.131967544555664 |
|
}, |
|
{ |
|
"epoch": 4.251299008030231, |
|
"eval_dpo_loss": 0.679940402507782, |
|
"eval_logits": -1.2942196130752563, |
|
"eval_logps": -88.3078384399414, |
|
"eval_loss": 188.08248901367188, |
|
"eval_objective": 183.8683624267578, |
|
"eval_ranking_idealized": 0.6024844646453857, |
|
"eval_ranking_idealized_expo": 0.5232919454574585, |
|
"eval_ranking_simple": 0.5320910811424255, |
|
"eval_regularize": 0.4168849587440491, |
|
"eval_runtime": 525.9589, |
|
"eval_samples_per_second": 11.008, |
|
"eval_steps_per_second": 0.918, |
|
"eval_wo_beta": 15.983942031860352, |
|
"step": 1500 |
|
}, |
|
{ |
|
"dpo_loss": 0.5348100066184998, |
|
"epoch": 4.2654700047236656, |
|
"grad_norm": 12354.445167507907, |
|
"learning_rate": 6.259402715363394e-08, |
|
"logits": -1.3128606081008911, |
|
"logps": -83.40116119384766, |
|
"loss": 18.971, |
|
"objective": 17.431968688964844, |
|
"ranking_idealized": 0.574999988079071, |
|
"ranking_idealized_expo": 0.4958333373069763, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.0443451851606369, |
|
"step": 1505, |
|
"wo_beta": 15.766800880432129 |
|
}, |
|
{ |
|
"dpo_loss": 0.5173017382621765, |
|
"epoch": 4.2796410014171, |
|
"grad_norm": 13102.659789592512, |
|
"learning_rate": 6.021345186850418e-08, |
|
"logits": -1.2090104818344116, |
|
"logps": -81.23714447021484, |
|
"loss": 21.191, |
|
"objective": 21.426023483276367, |
|
"ranking_idealized": 0.625, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.6166666746139526, |
|
"regularize": 0.04612095281481743, |
|
"step": 1510, |
|
"wo_beta": 15.78390121459961 |
|
}, |
|
{ |
|
"dpo_loss": 0.5307953357696533, |
|
"epoch": 4.293811998110534, |
|
"grad_norm": 12945.706552780925, |
|
"learning_rate": 5.787612475799269e-08, |
|
"logits": -1.367775559425354, |
|
"logps": -82.44042205810547, |
|
"loss": 16.9107, |
|
"objective": 16.15281867980957, |
|
"ranking_idealized": 0.625, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.6291666626930237, |
|
"regularize": 0.03850070759654045, |
|
"step": 1515, |
|
"wo_beta": 16.397567749023438 |
|
}, |
|
{ |
|
"dpo_loss": 0.5217214226722717, |
|
"epoch": 4.307982994803968, |
|
"grad_norm": 12094.58497098056, |
|
"learning_rate": 5.5582275672538316e-08, |
|
"logits": -1.2217297554016113, |
|
"logps": -81.85955047607422, |
|
"loss": 18.4539, |
|
"objective": 19.449350357055664, |
|
"ranking_idealized": 0.6791666746139526, |
|
"ranking_idealized_expo": 0.6041666865348816, |
|
"ranking_simple": 0.6791666746139526, |
|
"regularize": 0.04723352938890457, |
|
"step": 1520, |
|
"wo_beta": 16.819021224975586 |
|
}, |
|
{ |
|
"dpo_loss": 0.5168942809104919, |
|
"epoch": 4.322153991497402, |
|
"grad_norm": 12423.169223430634, |
|
"learning_rate": 5.333213018699356e-08, |
|
"logits": -1.2731564044952393, |
|
"logps": -81.88040161132812, |
|
"loss": 21.872, |
|
"objective": 21.83941078186035, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5458333492279053, |
|
"ranking_simple": 0.6291666626930237, |
|
"regularize": 0.054684512317180634, |
|
"step": 1525, |
|
"wo_beta": 14.882065773010254 |
|
}, |
|
{ |
|
"dpo_loss": 0.5241533517837524, |
|
"epoch": 4.336324988190836, |
|
"grad_norm": 13878.266489791004, |
|
"learning_rate": 5.112590957844232e-08, |
|
"logits": -1.3176230192184448, |
|
"logps": -83.9821548461914, |
|
"loss": 20.6818, |
|
"objective": 16.49356460571289, |
|
"ranking_idealized": 0.625, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.612500011920929, |
|
"regularize": 0.04093782603740692, |
|
"step": 1530, |
|
"wo_beta": 16.301631927490234 |
|
}, |
|
{ |
|
"dpo_loss": 0.5228941440582275, |
|
"epoch": 4.350495984884271, |
|
"grad_norm": 12969.925803784026, |
|
"learning_rate": 4.896383080443933e-08, |
|
"logits": -1.216440200805664, |
|
"logps": -82.53515625, |
|
"loss": 18.6444, |
|
"objective": 18.548452377319336, |
|
"ranking_idealized": 0.5708333253860474, |
|
"ranking_idealized_expo": 0.4625000059604645, |
|
"ranking_simple": 0.5791666507720947, |
|
"regularize": 0.047610316425561905, |
|
"step": 1535, |
|
"wo_beta": 15.176318168640137 |
|
}, |
|
{ |
|
"dpo_loss": 0.5215330123901367, |
|
"epoch": 4.364666981577704, |
|
"grad_norm": 12724.232692363212, |
|
"learning_rate": 4.684610648167503e-08, |
|
"logits": -1.3027079105377197, |
|
"logps": -81.91221618652344, |
|
"loss": 21.6498, |
|
"objective": 21.20092010498047, |
|
"ranking_idealized": 0.5874999761581421, |
|
"ranking_idealized_expo": 0.5416666865348816, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.05302129685878754, |
|
"step": 1540, |
|
"wo_beta": 16.298704147338867 |
|
}, |
|
{ |
|
"dpo_loss": 0.5356315970420837, |
|
"epoch": 4.378837978271139, |
|
"grad_norm": 12224.725778808395, |
|
"learning_rate": 4.4772944865067055e-08, |
|
"logits": -1.3303568363189697, |
|
"logps": -83.5517578125, |
|
"loss": 17.8525, |
|
"objective": 20.373811721801758, |
|
"ranking_idealized": 0.6333333253860474, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.6166666746139526, |
|
"regularize": 0.050720926374197006, |
|
"step": 1545, |
|
"wo_beta": 15.988405227661133 |
|
}, |
|
{ |
|
"dpo_loss": 0.5247156023979187, |
|
"epoch": 4.393008974964572, |
|
"grad_norm": 12442.357612605178, |
|
"learning_rate": 4.274454982728032e-08, |
|
"logits": -1.246690034866333, |
|
"logps": -81.54380798339844, |
|
"loss": 18.4741, |
|
"objective": 19.52410316467285, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.4958333373069763, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.049736883491277695, |
|
"step": 1550, |
|
"wo_beta": 17.08685874938965 |
|
}, |
|
{ |
|
"epoch": 4.393008974964572, |
|
"eval_dpo_loss": 0.6802076697349548, |
|
"eval_logits": -1.2950727939605713, |
|
"eval_logps": -88.48546600341797, |
|
"eval_loss": 188.04074096679688, |
|
"eval_objective": 184.0446319580078, |
|
"eval_ranking_idealized": 0.6024844646453857, |
|
"eval_ranking_idealized_expo": 0.5232919454574585, |
|
"eval_ranking_simple": 0.532608687877655, |
|
"eval_regularize": 0.4172796308994293, |
|
"eval_runtime": 533.9808, |
|
"eval_samples_per_second": 10.843, |
|
"eval_steps_per_second": 0.905, |
|
"eval_wo_beta": 15.994985580444336, |
|
"step": 1550 |
|
}, |
|
{ |
|
"dpo_loss": 0.5181335210800171, |
|
"epoch": 4.407179971658007, |
|
"grad_norm": 13002.101456533634, |
|
"learning_rate": 4.0761120838678776e-08, |
|
"logits": -1.3068591356277466, |
|
"logps": -81.81246185302734, |
|
"loss": 16.5342, |
|
"objective": 14.914339065551758, |
|
"ranking_idealized": 0.5708333253860474, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.5708333253860474, |
|
"regularize": 0.0403703935444355, |
|
"step": 1555, |
|
"wo_beta": 17.326810836791992 |
|
}, |
|
{ |
|
"dpo_loss": 0.5418220162391663, |
|
"epoch": 4.42135096835144, |
|
"grad_norm": 11889.810698222469, |
|
"learning_rate": 3.882285294770937e-08, |
|
"logits": -1.2680351734161377, |
|
"logps": -80.56555938720703, |
|
"loss": 16.764, |
|
"objective": 17.03957176208496, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5291666388511658, |
|
"ranking_simple": 0.5916666388511658, |
|
"regularize": 0.04209714010357857, |
|
"step": 1560, |
|
"wo_beta": 14.395126342773438 |
|
}, |
|
{ |
|
"dpo_loss": 0.5355924963951111, |
|
"epoch": 4.435521965044875, |
|
"grad_norm": 12185.339277571, |
|
"learning_rate": 3.6929936761721403e-08, |
|
"logits": -1.2988630533218384, |
|
"logps": -80.5867919921875, |
|
"loss": 21.4187, |
|
"objective": 21.873271942138672, |
|
"ranking_idealized": 0.574999988079071, |
|
"ranking_idealized_expo": 0.512499988079071, |
|
"ranking_simple": 0.5791666507720947, |
|
"regularize": 0.05235178396105766, |
|
"step": 1565, |
|
"wo_beta": 14.826796531677246 |
|
}, |
|
{ |
|
"dpo_loss": 0.5378596782684326, |
|
"epoch": 4.449692961738309, |
|
"grad_norm": 11114.71452911412, |
|
"learning_rate": 3.508255842822255e-08, |
|
"logits": -1.3118114471435547, |
|
"logps": -81.77924346923828, |
|
"loss": 18.6149, |
|
"objective": 20.33370590209961, |
|
"ranking_idealized": 0.612500011920929, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.5958333611488342, |
|
"regularize": 0.05320237576961517, |
|
"step": 1570, |
|
"wo_beta": 15.688643455505371 |
|
}, |
|
{ |
|
"dpo_loss": 0.5132429599761963, |
|
"epoch": 4.463863958431743, |
|
"grad_norm": 12945.538981188476, |
|
"learning_rate": 3.3280899616572656e-08, |
|
"logits": -1.3532111644744873, |
|
"logps": -84.82633209228516, |
|
"loss": 17.216, |
|
"objective": 17.143177032470703, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.512499988079071, |
|
"ranking_simple": 0.574999988079071, |
|
"regularize": 0.042684536427259445, |
|
"step": 1575, |
|
"wo_beta": 17.00408935546875 |
|
}, |
|
{ |
|
"dpo_loss": 0.5311785340309143, |
|
"epoch": 4.478034955125177, |
|
"grad_norm": 13235.594805356337, |
|
"learning_rate": 3.15251375001192e-08, |
|
"logits": -1.2649219036102295, |
|
"logps": -82.44920349121094, |
|
"loss": 17.9899, |
|
"objective": 17.875553131103516, |
|
"ranking_idealized": 0.5708333253860474, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.5708333253860474, |
|
"regularize": 0.043413810431957245, |
|
"step": 1580, |
|
"wo_beta": 17.040142059326172 |
|
}, |
|
{ |
|
"dpo_loss": 0.5295437574386597, |
|
"epoch": 4.492205951818612, |
|
"grad_norm": 13164.392376509253, |
|
"learning_rate": 2.98154447387739e-08, |
|
"logits": -1.318244457244873, |
|
"logps": -81.6868667602539, |
|
"loss": 18.7186, |
|
"objective": 14.95705509185791, |
|
"ranking_idealized": 0.6041666865348816, |
|
"ranking_idealized_expo": 0.550000011920929, |
|
"ranking_simple": 0.5958333611488342, |
|
"regularize": 0.03320387750864029, |
|
"step": 1585, |
|
"wo_beta": 17.157299041748047 |
|
}, |
|
{ |
|
"dpo_loss": 0.527228593826294, |
|
"epoch": 4.506376948512045, |
|
"grad_norm": 12124.025371614676, |
|
"learning_rate": 2.8151989462033787e-08, |
|
"logits": -1.1829341650009155, |
|
"logps": -83.83565521240234, |
|
"loss": 18.9673, |
|
"objective": 16.773042678833008, |
|
"ranking_idealized": 0.5791666507720947, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.042777713388204575, |
|
"step": 1590, |
|
"wo_beta": 16.952783584594727 |
|
}, |
|
{ |
|
"dpo_loss": 0.5242041349411011, |
|
"epoch": 4.52054794520548, |
|
"grad_norm": 11927.935212297323, |
|
"learning_rate": 2.653493525244721e-08, |
|
"logits": -1.2492893934249878, |
|
"logps": -82.36843872070312, |
|
"loss": 17.1521, |
|
"objective": 18.047021865844727, |
|
"ranking_idealized": 0.6208333373069763, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.6041666865348816, |
|
"regularize": 0.04608127102255821, |
|
"step": 1595, |
|
"wo_beta": 15.581862449645996 |
|
}, |
|
{ |
|
"dpo_loss": 0.5243973135948181, |
|
"epoch": 4.534718941898913, |
|
"grad_norm": 12379.266840127142, |
|
"learning_rate": 2.4964441129527335e-08, |
|
"logits": -1.2830615043640137, |
|
"logps": -82.28716278076172, |
|
"loss": 20.4794, |
|
"objective": 17.599641799926758, |
|
"ranking_idealized": 0.6333333253860474, |
|
"ranking_idealized_expo": 0.5416666865348816, |
|
"ranking_simple": 0.6333333253860474, |
|
"regularize": 0.042389459908008575, |
|
"step": 1600, |
|
"wo_beta": 16.58247184753418 |
|
}, |
|
{ |
|
"epoch": 4.534718941898913, |
|
"eval_dpo_loss": 0.6798632740974426, |
|
"eval_logits": -1.2950247526168823, |
|
"eval_logps": -88.43807983398438, |
|
"eval_loss": 187.9060821533203, |
|
"eval_objective": 183.82763671875, |
|
"eval_ranking_idealized": 0.6024844646453857, |
|
"eval_ranking_idealized_expo": 0.5232919454574585, |
|
"eval_ranking_simple": 0.5331262946128845, |
|
"eval_regularize": 0.4167550504207611, |
|
"eval_runtime": 510.5256, |
|
"eval_samples_per_second": 11.341, |
|
"eval_steps_per_second": 0.946, |
|
"eval_wo_beta": 16.000411987304688, |
|
"step": 1600 |
|
}, |
|
{ |
|
"dpo_loss": 0.5349418520927429, |
|
"epoch": 4.548889938592348, |
|
"grad_norm": 13428.292487446544, |
|
"learning_rate": 2.3440661534114557e-08, |
|
"logits": -1.2768018245697021, |
|
"logps": -83.37641906738281, |
|
"loss": 17.8123, |
|
"objective": 14.984145164489746, |
|
"ranking_idealized": 0.5541666746139526, |
|
"ranking_idealized_expo": 0.4749999940395355, |
|
"ranking_simple": 0.550000011920929, |
|
"regularize": 0.0358855277299881, |
|
"step": 1605, |
|
"wo_beta": 16.76499366760254 |
|
}, |
|
{ |
|
"dpo_loss": 0.5164486169815063, |
|
"epoch": 4.563060935285781, |
|
"grad_norm": 12892.913912379732, |
|
"learning_rate": 2.1963746313188757e-08, |
|
"logits": -1.249220371246338, |
|
"logps": -81.78076171875, |
|
"loss": 17.1832, |
|
"objective": 20.233509063720703, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.47083333134651184, |
|
"ranking_simple": 0.574999988079071, |
|
"regularize": 0.048116158694028854, |
|
"step": 1610, |
|
"wo_beta": 15.82449722290039 |
|
}, |
|
{ |
|
"dpo_loss": 0.5349178314208984, |
|
"epoch": 4.577231931979216, |
|
"grad_norm": 12493.396334435913, |
|
"learning_rate": 2.053384070513353e-08, |
|
"logits": -1.2513455152511597, |
|
"logps": -80.9568862915039, |
|
"loss": 18.7751, |
|
"objective": 20.071449279785156, |
|
"ranking_idealized": 0.5291666388511658, |
|
"ranking_idealized_expo": 0.46666666865348816, |
|
"ranking_simple": 0.5249999761581421, |
|
"regularize": 0.04651705548167229, |
|
"step": 1615, |
|
"wo_beta": 14.514166831970215 |
|
}, |
|
{ |
|
"dpo_loss": 0.5360397100448608, |
|
"epoch": 4.59140292867265, |
|
"grad_norm": 12311.497249141552, |
|
"learning_rate": 1.915108532545351e-08, |
|
"logits": -1.3831831216812134, |
|
"logps": -81.701904296875, |
|
"loss": 16.5863, |
|
"objective": 13.440372467041016, |
|
"ranking_idealized": 0.574999988079071, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.5791666507720947, |
|
"regularize": 0.03207644820213318, |
|
"step": 1620, |
|
"wo_beta": 16.37172508239746 |
|
}, |
|
{ |
|
"dpo_loss": 0.5228015780448914, |
|
"epoch": 4.605573925366084, |
|
"grad_norm": 12520.657843831757, |
|
"learning_rate": 1.781561615294652e-08, |
|
"logits": -1.3208075761795044, |
|
"logps": -82.14677429199219, |
|
"loss": 17.2643, |
|
"objective": 16.142719268798828, |
|
"ranking_idealized": 0.6791666746139526, |
|
"ranking_idealized_expo": 0.5916666388511658, |
|
"ranking_simple": 0.6875, |
|
"regularize": 0.03792344033718109, |
|
"step": 1625, |
|
"wo_beta": 15.518718719482422 |
|
}, |
|
{ |
|
"dpo_loss": 0.5221564173698425, |
|
"epoch": 4.619744922059518, |
|
"grad_norm": 11926.649260036038, |
|
"learning_rate": 1.6527564516331638e-08, |
|
"logits": -1.1876474618911743, |
|
"logps": -82.74609375, |
|
"loss": 17.5255, |
|
"objective": 16.14875602722168, |
|
"ranking_idealized": 0.6708333492279053, |
|
"ranking_idealized_expo": 0.5874999761581421, |
|
"ranking_simple": 0.6708333492279053, |
|
"regularize": 0.039545025676488876, |
|
"step": 1630, |
|
"wo_beta": 17.103187561035156 |
|
}, |
|
{ |
|
"dpo_loss": 0.5277553796768188, |
|
"epoch": 4.633915918752952, |
|
"grad_norm": 12387.92239266219, |
|
"learning_rate": 1.5287057081333988e-08, |
|
"logits": -1.303261399269104, |
|
"logps": -82.68264770507812, |
|
"loss": 17.5837, |
|
"objective": 18.295978546142578, |
|
"ranking_idealized": 0.625, |
|
"ranking_idealized_expo": 0.550000011920929, |
|
"ranking_simple": 0.6291666626930237, |
|
"regularize": 0.04383000358939171, |
|
"step": 1635, |
|
"wo_beta": 16.273590087890625 |
|
}, |
|
{ |
|
"dpo_loss": 0.5235089063644409, |
|
"epoch": 4.648086915446386, |
|
"grad_norm": 13550.591286437839, |
|
"learning_rate": 1.4094215838229172e-08, |
|
"logits": -1.3104770183563232, |
|
"logps": -81.95443725585938, |
|
"loss": 16.0714, |
|
"objective": 18.62168312072754, |
|
"ranking_idealized": 0.5874999761581421, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.5958333611488342, |
|
"regularize": 0.044566281139850616, |
|
"step": 1640, |
|
"wo_beta": 14.541909217834473 |
|
}, |
|
{ |
|
"dpo_loss": 0.5459772944450378, |
|
"epoch": 4.662257912139821, |
|
"grad_norm": 12589.25993273719, |
|
"learning_rate": 1.2949158089846368e-08, |
|
"logits": -1.2789607048034668, |
|
"logps": -80.86375427246094, |
|
"loss": 15.9698, |
|
"objective": 15.747620582580566, |
|
"ranking_idealized": 0.5708333253860474, |
|
"ranking_idealized_expo": 0.512499988079071, |
|
"ranking_simple": 0.5708333253860474, |
|
"regularize": 0.03958037868142128, |
|
"step": 1645, |
|
"wo_beta": 16.792747497558594 |
|
}, |
|
{ |
|
"dpo_loss": 0.5279684066772461, |
|
"epoch": 4.6764289088332545, |
|
"grad_norm": 11986.458011152894, |
|
"learning_rate": 1.1851996440033318e-08, |
|
"logits": -1.224802017211914, |
|
"logps": -81.75625610351562, |
|
"loss": 17.2115, |
|
"objective": 18.047420501708984, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5416666865348816, |
|
"ranking_simple": 0.5958333611488342, |
|
"regularize": 0.04608708992600441, |
|
"step": 1650, |
|
"wo_beta": 17.34733772277832 |
|
}, |
|
{ |
|
"epoch": 4.6764289088332545, |
|
"eval_dpo_loss": 0.6798492074012756, |
|
"eval_logits": -1.293831467628479, |
|
"eval_logps": -88.41741943359375, |
|
"eval_loss": 187.95040893554688, |
|
"eval_objective": 183.85658264160156, |
|
"eval_ranking_idealized": 0.6024844646453857, |
|
"eval_ranking_idealized_expo": 0.5232919454574585, |
|
"eval_ranking_simple": 0.532608687877655, |
|
"eval_regularize": 0.416820764541626, |
|
"eval_runtime": 510.4095, |
|
"eval_samples_per_second": 11.344, |
|
"eval_steps_per_second": 0.946, |
|
"eval_wo_beta": 15.994239807128906, |
|
"step": 1650 |
|
}, |
|
{ |
|
"dpo_loss": 0.5183621048927307, |
|
"epoch": 4.690599905526689, |
|
"grad_norm": 12001.298881228338, |
|
"learning_rate": 1.0802838782582535e-08, |
|
"logits": -1.2560440301895142, |
|
"logps": -81.986083984375, |
|
"loss": 18.141, |
|
"objective": 16.23440170288086, |
|
"ranking_idealized": 0.5958333611488342, |
|
"ranking_idealized_expo": 0.5291666388511658, |
|
"ranking_simple": 0.6041666865348816, |
|
"regularize": 0.0418228842318058, |
|
"step": 1655, |
|
"wo_beta": 14.709871292114258 |
|
}, |
|
{ |
|
"dpo_loss": 0.5308786034584045, |
|
"epoch": 4.7047709022201225, |
|
"grad_norm": 12471.919482995943, |
|
"learning_rate": 9.801788290621505e-09, |
|
"logits": -1.242910623550415, |
|
"logps": -82.37290954589844, |
|
"loss": 19.8764, |
|
"objective": 21.41328239440918, |
|
"ranking_idealized": 0.6083333492279053, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.6083333492279053, |
|
"regularize": 0.04805602878332138, |
|
"step": 1660, |
|
"wo_beta": 16.161657333374023 |
|
}, |
|
{ |
|
"dpo_loss": 0.517335832118988, |
|
"epoch": 4.718941898913557, |
|
"grad_norm": 12326.624130987268, |
|
"learning_rate": 8.848943406466468e-09, |
|
"logits": -1.2066967487335205, |
|
"logps": -81.63778686523438, |
|
"loss": 17.9054, |
|
"objective": 18.123321533203125, |
|
"ranking_idealized": 0.550000011920929, |
|
"ranking_idealized_expo": 0.47083333134651184, |
|
"ranking_simple": 0.550000011920929, |
|
"regularize": 0.04272852838039398, |
|
"step": 1665, |
|
"wo_beta": 15.821066856384277 |
|
}, |
|
{ |
|
"dpo_loss": 0.534516453742981, |
|
"epoch": 4.733112895606991, |
|
"grad_norm": 13120.765521158273, |
|
"learning_rate": 7.944397831941951e-09, |
|
"logits": -1.3101601600646973, |
|
"logps": -83.31844329833984, |
|
"loss": 15.3296, |
|
"objective": 14.363126754760742, |
|
"ranking_idealized": 0.5625, |
|
"ranking_idealized_expo": 0.47083333134651184, |
|
"ranking_simple": 0.5541666746139526, |
|
"regularize": 0.0361357256770134, |
|
"step": 1670, |
|
"wo_beta": 15.148748397827148 |
|
}, |
|
{ |
|
"dpo_loss": 0.5110668540000916, |
|
"epoch": 4.747283892300425, |
|
"grad_norm": 12106.475879366208, |
|
"learning_rate": 7.088240519165955e-09, |
|
"logits": -1.2715505361557007, |
|
"logps": -83.65233612060547, |
|
"loss": 18.7232, |
|
"objective": 22.049705505371094, |
|
"ranking_idealized": 0.5708333253860474, |
|
"ranking_idealized_expo": 0.4791666567325592, |
|
"ranking_simple": 0.5583333373069763, |
|
"regularize": 0.04454280436038971, |
|
"step": 1675, |
|
"wo_beta": 16.55459213256836 |
|
}, |
|
{ |
|
"dpo_loss": 0.5210347771644592, |
|
"epoch": 4.7614548889938595, |
|
"grad_norm": 13458.285236730762, |
|
"learning_rate": 6.280555661802856e-09, |
|
"logits": -1.2422146797180176, |
|
"logps": -82.28036499023438, |
|
"loss": 16.7571, |
|
"objective": 16.147016525268555, |
|
"ranking_idealized": 0.637499988079071, |
|
"ranking_idealized_expo": 0.5833333134651184, |
|
"ranking_simple": 0.637499988079071, |
|
"regularize": 0.03472811356186867, |
|
"step": 1680, |
|
"wo_beta": 17.648740768432617 |
|
}, |
|
{ |
|
"dpo_loss": 0.5201699733734131, |
|
"epoch": 4.775625885687293, |
|
"grad_norm": 13687.817133347355, |
|
"learning_rate": 5.521422686783294e-09, |
|
"logits": -1.308603286743164, |
|
"logps": -82.1572265625, |
|
"loss": 17.374, |
|
"objective": 18.0618839263916, |
|
"ranking_idealized": 0.5791666507720947, |
|
"ranking_idealized_expo": 0.4583333432674408, |
|
"ranking_simple": 0.5625, |
|
"regularize": 0.039491456001996994, |
|
"step": 1685, |
|
"wo_beta": 14.411401748657227 |
|
}, |
|
{ |
|
"dpo_loss": 0.5319506525993347, |
|
"epoch": 4.7897968823807275, |
|
"grad_norm": 11765.631080020812, |
|
"learning_rate": 4.810916246494157e-09, |
|
"logits": -1.3420146703720093, |
|
"logps": -81.82181549072266, |
|
"loss": 16.2518, |
|
"objective": 15.689167976379395, |
|
"ranking_idealized": 0.6291666626930237, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.612500011920929, |
|
"regularize": 0.03873560577630997, |
|
"step": 1690, |
|
"wo_beta": 15.535360336303711 |
|
}, |
|
{ |
|
"dpo_loss": 0.5226943492889404, |
|
"epoch": 4.803967879074161, |
|
"grad_norm": 15212.188996211064, |
|
"learning_rate": 4.149106211436659e-09, |
|
"logits": -1.205290675163269, |
|
"logps": -81.14673614501953, |
|
"loss": 15.6316, |
|
"objective": 14.224554061889648, |
|
"ranking_idealized": 0.6333333253860474, |
|
"ranking_idealized_expo": 0.5708333253860474, |
|
"ranking_simple": 0.6333333253860474, |
|
"regularize": 0.033357344567775726, |
|
"step": 1695, |
|
"wo_beta": 17.657291412353516 |
|
}, |
|
{ |
|
"dpo_loss": 0.5265616178512573, |
|
"epoch": 4.818138875767596, |
|
"grad_norm": 14041.074803893325, |
|
"learning_rate": 3.5360576633558513e-09, |
|
"logits": -1.3079345226287842, |
|
"logps": -80.5920639038086, |
|
"loss": 16.5799, |
|
"objective": 17.133312225341797, |
|
"ranking_idealized": 0.625, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.6208333373069763, |
|
"regularize": 0.042179401963949203, |
|
"step": 1700, |
|
"wo_beta": 14.612165451049805 |
|
}, |
|
{ |
|
"epoch": 4.818138875767596, |
|
"eval_dpo_loss": 0.6798617839813232, |
|
"eval_logits": -1.2946054935455322, |
|
"eval_logps": -88.42201232910156, |
|
"eval_loss": 187.93597412109375, |
|
"eval_objective": 183.8405303955078, |
|
"eval_ranking_idealized": 0.6024844646453857, |
|
"eval_ranking_idealized_expo": 0.5232919454574585, |
|
"eval_ranking_simple": 0.532608687877655, |
|
"eval_regularize": 0.4168170392513275, |
|
"eval_runtime": 537.7382, |
|
"eval_samples_per_second": 10.767, |
|
"eval_steps_per_second": 0.898, |
|
"eval_wo_beta": 15.996342658996582, |
|
"step": 1700 |
|
}, |
|
{ |
|
"dpo_loss": 0.5323117971420288, |
|
"epoch": 4.83230987246103, |
|
"grad_norm": 12547.78573915316, |
|
"learning_rate": 2.9718308888401767e-09, |
|
"logits": -1.3183315992355347, |
|
"logps": -81.7763442993164, |
|
"loss": 16.0513, |
|
"objective": 17.510692596435547, |
|
"ranking_idealized": 0.6791666746139526, |
|
"ranking_idealized_expo": 0.5833333134651184, |
|
"ranking_simple": 0.6708333492279053, |
|
"regularize": 0.040996309369802475, |
|
"step": 1705, |
|
"wo_beta": 17.88062858581543 |
|
}, |
|
{ |
|
"dpo_loss": 0.5359232425689697, |
|
"epoch": 4.846480869154464, |
|
"grad_norm": 13302.316035438349, |
|
"learning_rate": 2.4564813733932155e-09, |
|
"logits": -1.316437840461731, |
|
"logps": -81.5803451538086, |
|
"loss": 17.058, |
|
"objective": 15.084990501403809, |
|
"ranking_idealized": 0.5916666388511658, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.5874999761581421, |
|
"regularize": 0.0330289825797081, |
|
"step": 1710, |
|
"wo_beta": 14.95897102355957 |
|
}, |
|
{ |
|
"dpo_loss": 0.5298423171043396, |
|
"epoch": 4.860651865847898, |
|
"grad_norm": 13131.732232168924, |
|
"learning_rate": 1.9900597959770505e-09, |
|
"logits": -1.2239762544631958, |
|
"logps": -80.93972778320312, |
|
"loss": 15.5353, |
|
"objective": 14.398134231567383, |
|
"ranking_idealized": 0.5874999761581421, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.03392880782485008, |
|
"step": 1715, |
|
"wo_beta": 16.470539093017578 |
|
}, |
|
{ |
|
"dpo_loss": 0.5080859065055847, |
|
"epoch": 4.874822862541333, |
|
"grad_norm": 13218.33236233331, |
|
"learning_rate": 1.5726120240288631e-09, |
|
"logits": -1.2538625001907349, |
|
"logps": -80.96495819091797, |
|
"loss": 16.0016, |
|
"objective": 18.5091552734375, |
|
"ranking_idealized": 0.612500011920929, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.5874999761581421, |
|
"regularize": 0.045930005609989166, |
|
"step": 1720, |
|
"wo_beta": 17.185333251953125 |
|
}, |
|
{ |
|
"dpo_loss": 0.5118470788002014, |
|
"epoch": 4.888993859234766, |
|
"grad_norm": 12268.487941087904, |
|
"learning_rate": 1.2041791089499875e-09, |
|
"logits": -1.279910683631897, |
|
"logps": -79.85582733154297, |
|
"loss": 13.4289, |
|
"objective": 14.366524696350098, |
|
"ranking_idealized": 0.6416666507720947, |
|
"ranking_idealized_expo": 0.5708333253860474, |
|
"ranking_simple": 0.6416666507720947, |
|
"regularize": 0.03632321581244469, |
|
"step": 1725, |
|
"wo_beta": 17.114274978637695 |
|
}, |
|
{ |
|
"dpo_loss": 0.5286470055580139, |
|
"epoch": 4.903164855928201, |
|
"grad_norm": 11950.336190164535, |
|
"learning_rate": 8.847972820693051e-10, |
|
"logits": -1.2914131879806519, |
|
"logps": -80.19400787353516, |
|
"loss": 16.9458, |
|
"objective": 18.679357528686523, |
|
"ranking_idealized": 0.5416666865348816, |
|
"ranking_idealized_expo": 0.4416666626930237, |
|
"ranking_simple": 0.5249999761581421, |
|
"regularize": 0.04655119404196739, |
|
"step": 1730, |
|
"wo_beta": 14.276873588562012 |
|
}, |
|
{ |
|
"dpo_loss": 0.528618574142456, |
|
"epoch": 4.917335852621634, |
|
"grad_norm": 12737.302460928488, |
|
"learning_rate": 6.144979510802062e-10, |
|
"logits": -1.4132698774337769, |
|
"logps": -82.34892272949219, |
|
"loss": 18.3815, |
|
"objective": 18.776357650756836, |
|
"ranking_idealized": 0.6458333134651184, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.637499988079071, |
|
"regularize": 0.047753263264894485, |
|
"step": 1735, |
|
"wo_beta": 15.833959579467773 |
|
}, |
|
{ |
|
"dpo_loss": 0.5292457938194275, |
|
"epoch": 4.931506849315069, |
|
"grad_norm": 13241.609695831672, |
|
"learning_rate": 3.933076969516724e-10, |
|
"logits": -1.2396830320358276, |
|
"logps": -81.28510284423828, |
|
"loss": 15.2755, |
|
"objective": 15.8608980178833, |
|
"ranking_idealized": 0.5874999761581421, |
|
"ranking_idealized_expo": 0.5375000238418579, |
|
"ranking_simple": 0.5916666388511658, |
|
"regularize": 0.041682373732328415, |
|
"step": 1740, |
|
"wo_beta": 15.47945499420166 |
|
}, |
|
{ |
|
"dpo_loss": 0.5308272838592529, |
|
"epoch": 4.945677846008502, |
|
"grad_norm": 12128.166835896209, |
|
"learning_rate": 2.212482713149222e-10, |
|
"logits": -1.2960669994354248, |
|
"logps": -80.84746551513672, |
|
"loss": 15.3037, |
|
"objective": 12.663678169250488, |
|
"ranking_idealized": 0.6083333492279053, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.6041666865348816, |
|
"regularize": 0.03369910642504692, |
|
"step": 1745, |
|
"wo_beta": 16.19184112548828 |
|
}, |
|
{ |
|
"dpo_loss": 0.5277208089828491, |
|
"epoch": 4.959848842701937, |
|
"grad_norm": 12921.297125323947, |
|
"learning_rate": 9.833659432367803e-11, |
|
"logits": -1.2565745115280151, |
|
"logps": -82.744873046875, |
|
"loss": 16.689, |
|
"objective": 16.856407165527344, |
|
"ranking_idealized": 0.5874999761581421, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.5874999761581421, |
|
"regularize": 0.04240218922495842, |
|
"step": 1750, |
|
"wo_beta": 16.752824783325195 |
|
}, |
|
{ |
|
"epoch": 4.959848842701937, |
|
"eval_dpo_loss": 0.6798657774925232, |
|
"eval_logits": -1.2945247888565063, |
|
"eval_logps": -88.4161605834961, |
|
"eval_loss": 187.94732666015625, |
|
"eval_objective": 183.85096740722656, |
|
"eval_ranking_idealized": 0.6024844646453857, |
|
"eval_ranking_idealized_expo": 0.5232919454574585, |
|
"eval_ranking_simple": 0.532608687877655, |
|
"eval_regularize": 0.4168415367603302, |
|
"eval_runtime": 526.9139, |
|
"eval_samples_per_second": 10.989, |
|
"eval_steps_per_second": 0.917, |
|
"eval_wo_beta": 15.995292663574219, |
|
"step": 1750 |
|
}, |
|
{ |
|
"dpo_loss": 0.5391930937767029, |
|
"epoch": 4.974019839395371, |
|
"grad_norm": 11466.754753582296, |
|
"learning_rate": 2.4584752990997048e-11, |
|
"logits": -1.29628324508667, |
|
"logps": -82.2157211303711, |
|
"loss": 14.7634, |
|
"objective": 15.14171314239502, |
|
"ranking_idealized": 0.6041666865348816, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.6041666865348816, |
|
"regularize": 0.04220600798726082, |
|
"step": 1755, |
|
"wo_beta": 16.21957778930664 |
|
}, |
|
{ |
|
"dpo_loss": 0.525145411491394, |
|
"epoch": 4.988190836088805, |
|
"grad_norm": 12278.79483067917, |
|
"learning_rate": 0.0, |
|
"logits": -1.2211812734603882, |
|
"logps": -82.23439025878906, |
|
"loss": 15.7539, |
|
"objective": 15.124394416809082, |
|
"ranking_idealized": 0.6291666626930237, |
|
"ranking_idealized_expo": 0.5416666865348816, |
|
"ranking_simple": 0.6208333373069763, |
|
"regularize": 0.035570546984672546, |
|
"step": 1760, |
|
"wo_beta": 17.11547088623047 |
|
}, |
|
{ |
|
"epoch": 4.988190836088805, |
|
"step": 1760, |
|
"total_flos": 0.0, |
|
"train_loss": 67.88850653388283, |
|
"train_runtime": 74214.1269, |
|
"train_samples_per_second": 3.423, |
|
"train_steps_per_second": 0.024 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 1760, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": false, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|