hZzy's picture
Model save
29fb157 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 4.988190836088805,
"eval_steps": 50,
"global_step": 1760,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"dpo_loss": 0.6931471824645996,
"epoch": 0.002834199338686821,
"grad_norm": 17675.585799054454,
"learning_rate": 5.681818181818181e-09,
"logits": -1.2867579460144043,
"logps": -84.34933471679688,
"loss": 169.5214,
"objective": 153.4677734375,
"ranking_idealized": 0.6458333134651184,
"ranking_idealized_expo": 0.5833333134651184,
"ranking_simple": 0.5833333134651184,
"regularize": 0.3618059456348419,
"step": 1,
"wo_beta": 14.83154582977295
},
{
"dpo_loss": 0.6930850148200989,
"epoch": 0.014170996693434105,
"grad_norm": 16809.76979726276,
"learning_rate": 2.8409090909090908e-08,
"logits": -1.4291090965270996,
"logps": -83.86122131347656,
"loss": 181.7047,
"objective": 168.55690002441406,
"ranking_idealized": 0.5625,
"ranking_idealized_expo": 0.4895833432674408,
"ranking_simple": 0.4895833432674408,
"regularize": 0.4036543667316437,
"step": 5,
"wo_beta": 16.679672241210938
},
{
"dpo_loss": 0.6930658221244812,
"epoch": 0.02834199338686821,
"grad_norm": 18604.90219885959,
"learning_rate": 5.6818181818181815e-08,
"logits": -1.4008290767669678,
"logps": -84.83370971679688,
"loss": 177.0775,
"objective": 170.34666442871094,
"ranking_idealized": 0.6708333492279053,
"ranking_idealized_expo": 0.5833333134651184,
"ranking_simple": 0.5791666507720947,
"regularize": 0.40391480922698975,
"step": 10,
"wo_beta": 15.222626686096191
},
{
"dpo_loss": 0.6919592618942261,
"epoch": 0.042512990080302314,
"grad_norm": 17866.85697228391,
"learning_rate": 8.522727272727271e-08,
"logits": -1.5378918647766113,
"logps": -84.51753234863281,
"loss": 178.9384,
"objective": 187.3764190673828,
"ranking_idealized": 0.6499999761581421,
"ranking_idealized_expo": 0.5708333253860474,
"ranking_simple": 0.5666666626930237,
"regularize": 0.44199517369270325,
"step": 15,
"wo_beta": 15.720404624938965
},
{
"dpo_loss": 0.6915046572685242,
"epoch": 0.05668398677373642,
"grad_norm": 17562.319543911097,
"learning_rate": 1.1363636363636363e-07,
"logits": -1.3619273900985718,
"logps": -83.62174224853516,
"loss": 185.6226,
"objective": 203.74549865722656,
"ranking_idealized": 0.5833333134651184,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.48750001192092896,
"regularize": 0.4415209889411926,
"step": 20,
"wo_beta": 16.53324317932129
},
{
"dpo_loss": 0.6925450563430786,
"epoch": 0.07085498346717052,
"grad_norm": 16842.244030261496,
"learning_rate": 1.4204545454545455e-07,
"logits": -1.369999647140503,
"logps": -83.69309997558594,
"loss": 181.9124,
"objective": 172.8611297607422,
"ranking_idealized": 0.6458333134651184,
"ranking_idealized_expo": 0.5583333373069763,
"ranking_simple": 0.5666666626930237,
"regularize": 0.4071991741657257,
"step": 25,
"wo_beta": 15.610980987548828
},
{
"dpo_loss": 0.6898643970489502,
"epoch": 0.08502598016060463,
"grad_norm": 14842.574916726253,
"learning_rate": 1.7045454545454543e-07,
"logits": -1.432415246963501,
"logps": -83.48454284667969,
"loss": 181.3521,
"objective": 176.5283203125,
"ranking_idealized": 0.5541666746139526,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.48750001192092896,
"regularize": 0.4289272427558899,
"step": 30,
"wo_beta": 17.00359344482422
},
{
"dpo_loss": 0.6909116506576538,
"epoch": 0.09919697685403873,
"grad_norm": 16058.543561158533,
"learning_rate": 1.9886363636363636e-07,
"logits": -1.4108096361160278,
"logps": -82.71344757080078,
"loss": 183.8373,
"objective": 173.34014892578125,
"ranking_idealized": 0.6291666626930237,
"ranking_idealized_expo": 0.5375000238418579,
"ranking_simple": 0.5416666865348816,
"regularize": 0.38034114241600037,
"step": 35,
"wo_beta": 16.153711318969727
},
{
"dpo_loss": 0.6891058683395386,
"epoch": 0.11336797354747284,
"grad_norm": 17014.23191466682,
"learning_rate": 2.2727272727272726e-07,
"logits": -1.402835488319397,
"logps": -83.338134765625,
"loss": 187.3552,
"objective": 182.01144409179688,
"ranking_idealized": 0.5708333253860474,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5166666507720947,
"regularize": 0.4058202803134918,
"step": 40,
"wo_beta": 14.262288093566895
},
{
"dpo_loss": 0.684747040271759,
"epoch": 0.12753897024090693,
"grad_norm": 15346.751264548873,
"learning_rate": 2.5568181818181816e-07,
"logits": -1.419245958328247,
"logps": -83.82090759277344,
"loss": 171.4244,
"objective": 183.38385009765625,
"ranking_idealized": 0.5958333611488342,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.5166666507720947,
"regularize": 0.4130297601222992,
"step": 45,
"wo_beta": 14.29751968383789
},
{
"dpo_loss": 0.6823928356170654,
"epoch": 0.14170996693434104,
"grad_norm": 16514.084391847242,
"learning_rate": 2.840909090909091e-07,
"logits": -1.4350523948669434,
"logps": -84.8818359375,
"loss": 181.5404,
"objective": 186.33828735351562,
"ranking_idealized": 0.5916666388511658,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.5041666626930237,
"regularize": 0.4348808526992798,
"step": 50,
"wo_beta": 15.604106903076172
},
{
"epoch": 0.14170996693434104,
"eval_dpo_loss": 0.6889749765396118,
"eval_logits": -1.4233466386795044,
"eval_logps": -90.91888427734375,
"eval_loss": 182.35984802246094,
"eval_objective": 180.32789611816406,
"eval_ranking_idealized": 0.6024844646453857,
"eval_ranking_idealized_expo": 0.5232919454574585,
"eval_ranking_simple": 0.5263975262641907,
"eval_regularize": 0.40881022810935974,
"eval_runtime": 472.6615,
"eval_samples_per_second": 12.25,
"eval_steps_per_second": 1.022,
"eval_wo_beta": 16.297378540039062,
"step": 50
},
{
"dpo_loss": 0.6824547052383423,
"epoch": 0.15588096362777515,
"grad_norm": 17699.4671939912,
"learning_rate": 3.1249999999999997e-07,
"logits": -1.3973591327667236,
"logps": -84.62629699707031,
"loss": 170.6542,
"objective": 174.4287872314453,
"ranking_idealized": 0.5583333373069763,
"ranking_idealized_expo": 0.49166667461395264,
"ranking_simple": 0.4833333194255829,
"regularize": 0.3742547035217285,
"step": 55,
"wo_beta": 15.11441421508789
},
{
"dpo_loss": 0.6806777715682983,
"epoch": 0.17005196032120926,
"grad_norm": 16100.449715737686,
"learning_rate": 3.4090909090909085e-07,
"logits": -1.329344391822815,
"logps": -85.16632843017578,
"loss": 174.0689,
"objective": 174.0922393798828,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5291666388511658,
"regularize": 0.40893226861953735,
"step": 60,
"wo_beta": 14.438634872436523
},
{
"dpo_loss": 0.6708158254623413,
"epoch": 0.18422295701464336,
"grad_norm": 16302.471134333027,
"learning_rate": 3.693181818181818e-07,
"logits": -1.428707480430603,
"logps": -82.03670501708984,
"loss": 172.5426,
"objective": 161.09950256347656,
"ranking_idealized": 0.637499988079071,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.5333333611488342,
"regularize": 0.36983728408813477,
"step": 65,
"wo_beta": 15.067524909973145
},
{
"dpo_loss": 0.6730712652206421,
"epoch": 0.19839395370807747,
"grad_norm": 15662.31236602018,
"learning_rate": 3.977272727272727e-07,
"logits": -1.4695442914962769,
"logps": -84.42548370361328,
"loss": 174.7341,
"objective": 175.19439697265625,
"ranking_idealized": 0.5916666388511658,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.5375000238418579,
"regularize": 0.4013313353061676,
"step": 70,
"wo_beta": 15.375307083129883
},
{
"dpo_loss": 0.6741575002670288,
"epoch": 0.21256495040151158,
"grad_norm": 18686.585950552704,
"learning_rate": 4.2613636363636364e-07,
"logits": -1.393960952758789,
"logps": -84.16697692871094,
"loss": 174.6645,
"objective": 164.988525390625,
"ranking_idealized": 0.6083333492279053,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5208333134651184,
"regularize": 0.38378840684890747,
"step": 75,
"wo_beta": 15.075023651123047
},
{
"dpo_loss": 0.669329822063446,
"epoch": 0.22673594709494568,
"grad_norm": 18561.894559157903,
"learning_rate": 4.545454545454545e-07,
"logits": -1.4905359745025635,
"logps": -83.4140396118164,
"loss": 169.0661,
"objective": 177.64450073242188,
"ranking_idealized": 0.6291666626930237,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.5458333492279053,
"regularize": 0.4245981276035309,
"step": 80,
"wo_beta": 16.333538055419922
},
{
"dpo_loss": 0.6659378409385681,
"epoch": 0.2409069437883798,
"grad_norm": 15942.37358833672,
"learning_rate": 4.829545454545455e-07,
"logits": -1.4715605974197388,
"logps": -83.54389190673828,
"loss": 171.1414,
"objective": 182.98324584960938,
"ranking_idealized": 0.625,
"ranking_idealized_expo": 0.550000011920929,
"ranking_simple": 0.5541666746139526,
"regularize": 0.4137464463710785,
"step": 85,
"wo_beta": 15.189921379089355
},
{
"dpo_loss": 0.6600526571273804,
"epoch": 0.25507794048181387,
"grad_norm": 16315.909705896804,
"learning_rate": 5.113636363636363e-07,
"logits": -1.571618914604187,
"logps": -84.54931640625,
"loss": 168.3022,
"objective": 174.0519561767578,
"ranking_idealized": 0.612500011920929,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.550000011920929,
"regularize": 0.39906471967697144,
"step": 90,
"wo_beta": 15.517964363098145
},
{
"dpo_loss": 0.6545840501785278,
"epoch": 0.269248937175248,
"grad_norm": 17445.518244074756,
"learning_rate": 5.397727272727273e-07,
"logits": -1.49222731590271,
"logps": -84.54743194580078,
"loss": 168.7617,
"objective": 175.46524047851562,
"ranking_idealized": 0.5583333373069763,
"ranking_idealized_expo": 0.4791666567325592,
"ranking_simple": 0.512499988079071,
"regularize": 0.4040308892726898,
"step": 95,
"wo_beta": 16.429697036743164
},
{
"dpo_loss": 0.6560600399971008,
"epoch": 0.2834199338686821,
"grad_norm": 16343.369412455128,
"learning_rate": 5.681818181818182e-07,
"logits": -1.370269775390625,
"logps": -83.43912506103516,
"loss": 156.9096,
"objective": 160.82919311523438,
"ranking_idealized": 0.5958333611488342,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.5,
"regularize": 0.3631521761417389,
"step": 100,
"wo_beta": 15.597589492797852
},
{
"epoch": 0.2834199338686821,
"eval_dpo_loss": 0.6855058073997498,
"eval_logits": -1.470232367515564,
"eval_logps": -91.45095825195312,
"eval_loss": 181.86407470703125,
"eval_objective": 180.31504821777344,
"eval_ranking_idealized": 0.6024844646453857,
"eval_ranking_idealized_expo": 0.5232919454574585,
"eval_ranking_simple": 0.531573474407196,
"eval_regularize": 0.41007429361343384,
"eval_runtime": 479.0094,
"eval_samples_per_second": 12.087,
"eval_steps_per_second": 1.008,
"eval_wo_beta": 16.373079299926758,
"step": 100
},
{
"dpo_loss": 0.6687707901000977,
"epoch": 0.2975909305621162,
"grad_norm": 20737.972285358017,
"learning_rate": 5.965909090909091e-07,
"logits": -1.572224736213684,
"logps": -86.08336639404297,
"loss": 161.5898,
"objective": 164.3712615966797,
"ranking_idealized": 0.625,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.5416666865348816,
"regularize": 0.3831757605075836,
"step": 105,
"wo_beta": 14.803333282470703
},
{
"dpo_loss": 0.6561999917030334,
"epoch": 0.3117619272555503,
"grad_norm": 16188.43984842568,
"learning_rate": 6.249999999999999e-07,
"logits": -1.4707790613174438,
"logps": -84.74868774414062,
"loss": 158.3984,
"objective": 159.52267456054688,
"ranking_idealized": 0.5874999761581421,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.5166666507720947,
"regularize": 0.38105159997940063,
"step": 110,
"wo_beta": 15.120772361755371
},
{
"dpo_loss": 0.6603504419326782,
"epoch": 0.32593292394898443,
"grad_norm": 16290.29619326225,
"learning_rate": 6.534090909090909e-07,
"logits": -1.4433757066726685,
"logps": -83.40989685058594,
"loss": 149.8614,
"objective": 154.2146453857422,
"ranking_idealized": 0.6208333373069763,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.5375000238418579,
"regularize": 0.3704533576965332,
"step": 115,
"wo_beta": 16.445148468017578
},
{
"dpo_loss": 0.6463068127632141,
"epoch": 0.3401039206424185,
"grad_norm": 15623.51190253056,
"learning_rate": 6.818181818181817e-07,
"logits": -1.4353134632110596,
"logps": -83.36263275146484,
"loss": 156.1384,
"objective": 165.0032501220703,
"ranking_idealized": 0.5916666388511658,
"ranking_idealized_expo": 0.512499988079071,
"ranking_simple": 0.5458333492279053,
"regularize": 0.3623816668987274,
"step": 120,
"wo_beta": 15.72484302520752
},
{
"dpo_loss": 0.6474981904029846,
"epoch": 0.35427491733585265,
"grad_norm": 15992.631664901073,
"learning_rate": 7.102272727272727e-07,
"logits": -1.4708176851272583,
"logps": -87.08245086669922,
"loss": 148.8453,
"objective": 139.25869750976562,
"ranking_idealized": 0.5583333373069763,
"ranking_idealized_expo": 0.48750001192092896,
"ranking_simple": 0.5083333253860474,
"regularize": 0.3299652636051178,
"step": 125,
"wo_beta": 16.12550163269043
},
{
"dpo_loss": 0.6404248476028442,
"epoch": 0.3684459140292867,
"grad_norm": 18351.934143281596,
"learning_rate": 7.386363636363636e-07,
"logits": -1.4490153789520264,
"logps": -85.12788391113281,
"loss": 156.9957,
"objective": 159.24720764160156,
"ranking_idealized": 0.6458333134651184,
"ranking_idealized_expo": 0.5916666388511658,
"ranking_simple": 0.5791666507720947,
"regularize": 0.3523053526878357,
"step": 130,
"wo_beta": 16.6445255279541
},
{
"dpo_loss": 0.6560899615287781,
"epoch": 0.3826169107227208,
"grad_norm": 23473.507695048622,
"learning_rate": 7.670454545454545e-07,
"logits": -1.4993882179260254,
"logps": -85.93272399902344,
"loss": 163.276,
"objective": 171.45176696777344,
"ranking_idealized": 0.574999988079071,
"ranking_idealized_expo": 0.4958333373069763,
"ranking_simple": 0.5,
"regularize": 0.3585022985935211,
"step": 135,
"wo_beta": 14.440187454223633
},
{
"dpo_loss": 0.6453083753585815,
"epoch": 0.39678790741615494,
"grad_norm": 18800.531975208964,
"learning_rate": 7.954545454545454e-07,
"logits": -1.4704848527908325,
"logps": -84.99346160888672,
"loss": 140.3663,
"objective": 156.8263702392578,
"ranking_idealized": 0.612500011920929,
"ranking_idealized_expo": 0.5416666865348816,
"ranking_simple": 0.5583333373069763,
"regularize": 0.35928240418434143,
"step": 140,
"wo_beta": 14.692411422729492
},
{
"dpo_loss": 0.6320348978042603,
"epoch": 0.410958904109589,
"grad_norm": 16753.19118195896,
"learning_rate": 8.238636363636363e-07,
"logits": -1.481634259223938,
"logps": -85.03217315673828,
"loss": 148.4437,
"objective": 142.04251098632812,
"ranking_idealized": 0.6666666865348816,
"ranking_idealized_expo": 0.5583333373069763,
"ranking_simple": 0.6083333492279053,
"regularize": 0.34694570302963257,
"step": 145,
"wo_beta": 14.103859901428223
},
{
"dpo_loss": 0.6397809386253357,
"epoch": 0.42512990080302315,
"grad_norm": 15467.131473675328,
"learning_rate": 8.522727272727273e-07,
"logits": -1.5027910470962524,
"logps": -85.37592315673828,
"loss": 145.838,
"objective": 148.38160705566406,
"ranking_idealized": 0.5958333611488342,
"ranking_idealized_expo": 0.5541666746139526,
"ranking_simple": 0.5625,
"regularize": 0.3435940146446228,
"step": 150,
"wo_beta": 17.392671585083008
},
{
"epoch": 0.42512990080302315,
"eval_dpo_loss": 0.6789573431015015,
"eval_logits": -1.4503501653671265,
"eval_logps": -90.70494842529297,
"eval_loss": 180.64788818359375,
"eval_objective": 178.1704864501953,
"eval_ranking_idealized": 0.6024844646453857,
"eval_ranking_idealized_expo": 0.5232919454574585,
"eval_ranking_simple": 0.5383023023605347,
"eval_regularize": 0.40225014090538025,
"eval_runtime": 484.5212,
"eval_samples_per_second": 11.95,
"eval_steps_per_second": 0.997,
"eval_wo_beta": 16.587987899780273,
"step": 150
},
{
"dpo_loss": 0.6387067437171936,
"epoch": 0.43930089749645723,
"grad_norm": 15641.193562303264,
"learning_rate": 8.806818181818182e-07,
"logits": -1.5433834791183472,
"logps": -83.86524200439453,
"loss": 145.3558,
"objective": 149.48431396484375,
"ranking_idealized": 0.5666666626930237,
"ranking_idealized_expo": 0.48750001192092896,
"ranking_simple": 0.5041666626930237,
"regularize": 0.3321545720100403,
"step": 155,
"wo_beta": 15.563851356506348
},
{
"dpo_loss": 0.6263092160224915,
"epoch": 0.45347189418989137,
"grad_norm": 17105.26137174702,
"learning_rate": 9.09090909090909e-07,
"logits": -1.4153720140457153,
"logps": -85.28386688232422,
"loss": 153.0504,
"objective": 153.1988067626953,
"ranking_idealized": 0.5916666388511658,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.5583333373069763,
"regularize": 0.3481307625770569,
"step": 160,
"wo_beta": 14.662632942199707
},
{
"dpo_loss": 0.6309160590171814,
"epoch": 0.46764289088332545,
"grad_norm": 17759.815020595273,
"learning_rate": 9.374999999999999e-07,
"logits": -1.4963940382003784,
"logps": -87.69454956054688,
"loss": 139.2377,
"objective": 131.2418670654297,
"ranking_idealized": 0.6083333492279053,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.550000011920929,
"regularize": 0.3078847825527191,
"step": 165,
"wo_beta": 15.923318862915039
},
{
"dpo_loss": 0.6393815279006958,
"epoch": 0.4818138875767596,
"grad_norm": 14258.083724870265,
"learning_rate": 9.65909090909091e-07,
"logits": -1.525942087173462,
"logps": -87.34074401855469,
"loss": 149.6952,
"objective": 141.63162231445312,
"ranking_idealized": 0.5583333373069763,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.5208333134651184,
"regularize": 0.3343699276447296,
"step": 170,
"wo_beta": 16.248130798339844
},
{
"dpo_loss": 0.6326501369476318,
"epoch": 0.49598488427019366,
"grad_norm": 15096.239809153309,
"learning_rate": 9.943181818181817e-07,
"logits": -1.3718321323394775,
"logps": -87.4573745727539,
"loss": 140.2749,
"objective": 132.79156494140625,
"ranking_idealized": 0.574999988079071,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.5375000238418579,
"regularize": 0.3121780455112457,
"step": 175,
"wo_beta": 17.698331832885742
},
{
"dpo_loss": 0.622785747051239,
"epoch": 0.5101558809636277,
"grad_norm": 16631.252094969073,
"learning_rate": 9.999842657116664e-07,
"logits": -1.3456240892410278,
"logps": -86.42423248291016,
"loss": 143.2666,
"objective": 151.05718994140625,
"ranking_idealized": 0.5708333253860474,
"ranking_idealized_expo": 0.48750001192092896,
"ranking_simple": 0.512499988079071,
"regularize": 0.3372686207294464,
"step": 180,
"wo_beta": 14.807291030883789
},
{
"dpo_loss": 0.6099674701690674,
"epoch": 0.5243268776570619,
"grad_norm": 20691.36637721674,
"learning_rate": 9.999203468625015e-07,
"logits": -1.3633224964141846,
"logps": -85.25286102294922,
"loss": 132.6151,
"objective": 133.30491638183594,
"ranking_idealized": 0.5791666507720947,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.5333333611488342,
"regularize": 0.3143846392631531,
"step": 185,
"wo_beta": 14.758675575256348
},
{
"dpo_loss": 0.596558690071106,
"epoch": 0.538497874350496,
"grad_norm": 16323.28292515014,
"learning_rate": 9.998072663403656e-07,
"logits": -1.4109238386154175,
"logps": -83.85755157470703,
"loss": 142.4777,
"objective": 132.50650024414062,
"ranking_idealized": 0.6625000238418579,
"ranking_idealized_expo": 0.5625,
"ranking_simple": 0.612500011920929,
"regularize": 0.2925921082496643,
"step": 190,
"wo_beta": 17.561918258666992
},
{
"dpo_loss": 0.608472466468811,
"epoch": 0.5526688710439301,
"grad_norm": 14605.697671098327,
"learning_rate": 9.99645035265485e-07,
"logits": -1.426125407218933,
"logps": -83.3570556640625,
"loss": 148.3801,
"objective": 154.04542541503906,
"ranking_idealized": 0.625,
"ranking_idealized_expo": 0.5375000238418579,
"ranking_simple": 0.5708333253860474,
"regularize": 0.3404535949230194,
"step": 195,
"wo_beta": 15.011509895324707
},
{
"dpo_loss": 0.6035653948783875,
"epoch": 0.5668398677373642,
"grad_norm": 14961.86824726212,
"learning_rate": 9.99433669591504e-07,
"logits": -1.4208530187606812,
"logps": -83.7520523071289,
"loss": 140.9398,
"objective": 150.76983642578125,
"ranking_idealized": 0.5833333134651184,
"ranking_idealized_expo": 0.512499988079071,
"ranking_simple": 0.5208333134651184,
"regularize": 0.34441787004470825,
"step": 200,
"wo_beta": 16.120277404785156
},
{
"epoch": 0.5668398677373642,
"eval_dpo_loss": 0.6803466081619263,
"eval_logits": -1.3894833326339722,
"eval_logps": -90.33295440673828,
"eval_loss": 184.49874877929688,
"eval_objective": 181.54510498046875,
"eval_ranking_idealized": 0.6024844646453857,
"eval_ranking_idealized_expo": 0.5232919454574585,
"eval_ranking_simple": 0.532608687877655,
"eval_regularize": 0.4101283848285675,
"eval_runtime": 475.1985,
"eval_samples_per_second": 12.184,
"eval_steps_per_second": 1.016,
"eval_wo_beta": 16.141496658325195,
"step": 200
},
{
"dpo_loss": 0.6270676255226135,
"epoch": 0.5810108644307983,
"grad_norm": 16340.681317011602,
"learning_rate": 9.991731901039136e-07,
"logits": -1.283570647239685,
"logps": -84.95980834960938,
"loss": 136.3843,
"objective": 133.73294067382812,
"ranking_idealized": 0.574999988079071,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.5416666865348816,
"regularize": 0.3290613889694214,
"step": 205,
"wo_beta": 16.529329299926758
},
{
"dpo_loss": 0.6101997494697571,
"epoch": 0.5951818611242324,
"grad_norm": 16979.514024444066,
"learning_rate": 9.988636224180095e-07,
"logits": -1.3387362957000732,
"logps": -85.54541015625,
"loss": 149.2125,
"objective": 162.19125366210938,
"ranking_idealized": 0.6041666865348816,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.5625,
"regularize": 0.3633294999599457,
"step": 210,
"wo_beta": 15.476922988891602
},
{
"dpo_loss": 0.5931335687637329,
"epoch": 0.6093528578176665,
"grad_norm": 16588.23739039735,
"learning_rate": 9.985049969763719e-07,
"logits": -1.458817720413208,
"logps": -84.46039581298828,
"loss": 133.2822,
"objective": 143.83396911621094,
"ranking_idealized": 0.5791666507720947,
"ranking_idealized_expo": 0.4749999940395355,
"ranking_simple": 0.5166666507720947,
"regularize": 0.3306182324886322,
"step": 215,
"wo_beta": 16.599056243896484
},
{
"dpo_loss": 0.6022905111312866,
"epoch": 0.6235238545111006,
"grad_norm": 17119.52021011513,
"learning_rate": 9.980973490458728e-07,
"logits": -1.4839917421340942,
"logps": -84.08710479736328,
"loss": 143.4095,
"objective": 144.29782104492188,
"ranking_idealized": 0.574999988079071,
"ranking_idealized_expo": 0.4791666567325592,
"ranking_simple": 0.5083333253860474,
"regularize": 0.32684001326560974,
"step": 220,
"wo_beta": 16.91693878173828
},
{
"dpo_loss": 0.5977518558502197,
"epoch": 0.6376948512045347,
"grad_norm": 14023.197866950057,
"learning_rate": 9.976407187142064e-07,
"logits": -1.534485936164856,
"logps": -85.1946792602539,
"loss": 138.8846,
"objective": 137.76622009277344,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.5583333373069763,
"regularize": 0.31227758526802063,
"step": 225,
"wo_beta": 15.36359691619873
},
{
"dpo_loss": 0.5947220921516418,
"epoch": 0.6518658478979689,
"grad_norm": 14605.487004157298,
"learning_rate": 9.971351508859486e-07,
"logits": -1.439586877822876,
"logps": -85.27981567382812,
"loss": 124.6336,
"objective": 121.90718078613281,
"ranking_idealized": 0.637499988079071,
"ranking_idealized_expo": 0.5666666626930237,
"ranking_simple": 0.6000000238418579,
"regularize": 0.2932659685611725,
"step": 230,
"wo_beta": 17.20786476135254
},
{
"dpo_loss": 0.6003122925758362,
"epoch": 0.6660368445914029,
"grad_norm": 16685.644038837043,
"learning_rate": 9.9658069527814e-07,
"logits": -1.3658267259597778,
"logps": -86.23738098144531,
"loss": 121.5208,
"objective": 116.9168472290039,
"ranking_idealized": 0.5874999761581421,
"ranking_idealized_expo": 0.5416666865348816,
"ranking_simple": 0.5583333373069763,
"regularize": 0.2670106589794159,
"step": 235,
"wo_beta": 16.473398208618164
},
{
"dpo_loss": 0.5931513905525208,
"epoch": 0.680207841284837,
"grad_norm": 18082.47037845429,
"learning_rate": 9.959774064153975e-07,
"logits": -1.5063189268112183,
"logps": -85.80690002441406,
"loss": 131.6654,
"objective": 136.83932495117188,
"ranking_idealized": 0.6041666865348816,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.550000011920929,
"regularize": 0.29019126296043396,
"step": 240,
"wo_beta": 16.562297821044922
},
{
"dpo_loss": 0.6120952367782593,
"epoch": 0.6943788379782712,
"grad_norm": 16231.64241500278,
"learning_rate": 9.953253436245516e-07,
"logits": -1.5183242559432983,
"logps": -85.21266174316406,
"loss": 120.6441,
"objective": 111.80670928955078,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.5458333492279053,
"ranking_simple": 0.5666666626930237,
"regularize": 0.2561970055103302,
"step": 245,
"wo_beta": 16.04390525817871
},
{
"dpo_loss": 0.5938802361488342,
"epoch": 0.7085498346717053,
"grad_norm": 15964.589309173105,
"learning_rate": 9.94624571028813e-07,
"logits": -1.3114020824432373,
"logps": -83.990478515625,
"loss": 131.1439,
"objective": 132.464599609375,
"ranking_idealized": 0.612500011920929,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.5874999761581421,
"regularize": 0.29094573855400085,
"step": 250,
"wo_beta": 15.082120895385742
},
{
"epoch": 0.7085498346717053,
"eval_dpo_loss": 0.6797458529472351,
"eval_logits": -1.478871464729309,
"eval_logps": -91.22461700439453,
"eval_loss": 182.20773315429688,
"eval_objective": 178.44094848632812,
"eval_ranking_idealized": 0.6024844646453857,
"eval_ranking_idealized_expo": 0.5232919454574585,
"eval_ranking_simple": 0.532608687877655,
"eval_regularize": 0.4058575928211212,
"eval_runtime": 475.3374,
"eval_samples_per_second": 12.181,
"eval_steps_per_second": 1.016,
"eval_wo_beta": 16.368268966674805,
"step": 250
},
{
"dpo_loss": 0.6089769005775452,
"epoch": 0.7227208313651393,
"grad_norm": 14595.146016283225,
"learning_rate": 9.938751575414661e-07,
"logits": -1.5532639026641846,
"logps": -83.39389038085938,
"loss": 133.1451,
"objective": 121.37617492675781,
"ranking_idealized": 0.5708333253860474,
"ranking_idealized_expo": 0.42916667461395264,
"ranking_simple": 0.5041666626930237,
"regularize": 0.2965226471424103,
"step": 255,
"wo_beta": 15.059760093688965
},
{
"dpo_loss": 0.5949603915214539,
"epoch": 0.7368918280585735,
"grad_norm": 15154.916516529278,
"learning_rate": 9.930771768590933e-07,
"logits": -1.5184205770492554,
"logps": -85.99275970458984,
"loss": 128.7971,
"objective": 149.26617431640625,
"ranking_idealized": 0.5416666865348816,
"ranking_idealized_expo": 0.4625000059604645,
"ranking_simple": 0.5208333134651184,
"regularize": 0.3260208070278168,
"step": 260,
"wo_beta": 15.812520027160645
},
{
"dpo_loss": 0.5942420959472656,
"epoch": 0.7510628247520076,
"grad_norm": 13672.874013609171,
"learning_rate": 9.92230707454326e-07,
"logits": -1.438194990158081,
"logps": -86.4264907836914,
"loss": 119.4127,
"objective": 127.40038299560547,
"ranking_idealized": 0.6625000238418579,
"ranking_idealized_expo": 0.5666666626930237,
"ranking_simple": 0.6416666507720947,
"regularize": 0.29675182700157166,
"step": 265,
"wo_beta": 16.794330596923828
},
{
"dpo_loss": 0.6142985224723816,
"epoch": 0.7652338214454416,
"grad_norm": 14406.751122728363,
"learning_rate": 9.91335832568129e-07,
"logits": -1.5249485969543457,
"logps": -87.38147735595703,
"loss": 129.203,
"objective": 141.37374877929688,
"ranking_idealized": 0.5249999761581421,
"ranking_idealized_expo": 0.4583333432674408,
"ranking_simple": 0.4749999940395355,
"regularize": 0.2984028458595276,
"step": 270,
"wo_beta": 14.417384147644043
},
{
"dpo_loss": 0.5954193472862244,
"epoch": 0.7794048181388757,
"grad_norm": 17040.572933936153,
"learning_rate": 9.90392640201615e-07,
"logits": -1.3636622428894043,
"logps": -86.6485595703125,
"loss": 118.1932,
"objective": 113.61885833740234,
"ranking_idealized": 0.574999988079071,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.5375000238418579,
"regularize": 0.2610357701778412,
"step": 275,
"wo_beta": 15.509458541870117
},
{
"dpo_loss": 0.5917832851409912,
"epoch": 0.7935758148323099,
"grad_norm": 17559.793763685935,
"learning_rate": 9.894012231073895e-07,
"logits": -1.4590952396392822,
"logps": -87.64340209960938,
"loss": 132.6812,
"objective": 137.506103515625,
"ranking_idealized": 0.5874999761581421,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.5541666746139526,
"regularize": 0.2935800850391388,
"step": 280,
"wo_beta": 15.80073070526123
},
{
"dpo_loss": 0.5836724042892456,
"epoch": 0.807746811525744,
"grad_norm": 14579.651979817574,
"learning_rate": 9.88361678780429e-07,
"logits": -1.4701313972473145,
"logps": -88.11650085449219,
"loss": 118.3926,
"objective": 111.54865264892578,
"ranking_idealized": 0.5666666626930237,
"ranking_idealized_expo": 0.4791666567325592,
"ranking_simple": 0.5208333134651184,
"regularize": 0.2552913427352905,
"step": 285,
"wo_beta": 16.792234420776367
},
{
"dpo_loss": 0.5677815079689026,
"epoch": 0.821917808219178,
"grad_norm": 15029.308678016287,
"learning_rate": 9.872741094484964e-07,
"logits": -1.500461220741272,
"logps": -86.58364868164062,
"loss": 116.8557,
"objective": 106.32292175292969,
"ranking_idealized": 0.5874999761581421,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.550000011920929,
"regularize": 0.24703934788703918,
"step": 290,
"wo_beta": 16.14396095275879
},
{
"dpo_loss": 0.5853282809257507,
"epoch": 0.8360888049126122,
"grad_norm": 15120.877217642179,
"learning_rate": 9.86138622062085e-07,
"logits": -1.494510293006897,
"logps": -86.35259246826172,
"loss": 116.1266,
"objective": 112.15760803222656,
"ranking_idealized": 0.550000011920929,
"ranking_idealized_expo": 0.4958333373069763,
"ranking_simple": 0.5208333134651184,
"regularize": 0.2589784264564514,
"step": 295,
"wo_beta": 16.28350257873535
},
{
"dpo_loss": 0.5893528461456299,
"epoch": 0.8502598016060463,
"grad_norm": 14818.401223627045,
"learning_rate": 9.849553282839024e-07,
"logits": -1.4687484502792358,
"logps": -85.012939453125,
"loss": 118.3192,
"objective": 113.60901641845703,
"ranking_idealized": 0.625,
"ranking_idealized_expo": 0.550000011920929,
"ranking_simple": 0.6000000238418579,
"regularize": 0.26101434230804443,
"step": 300,
"wo_beta": 15.157808303833008
},
{
"epoch": 0.8502598016060463,
"eval_dpo_loss": 0.681740403175354,
"eval_logits": -1.4551842212677002,
"eval_logps": -92.57705688476562,
"eval_loss": 183.44589233398438,
"eval_objective": 180.4713592529297,
"eval_ranking_idealized": 0.6024844646453857,
"eval_ranking_idealized_expo": 0.5232919454574585,
"eval_ranking_simple": 0.532608687877655,
"eval_regularize": 0.41230443120002747,
"eval_runtime": 479.855,
"eval_samples_per_second": 12.066,
"eval_steps_per_second": 1.007,
"eval_wo_beta": 16.404129028320312,
"step": 300
},
{
"dpo_loss": 0.5834535956382751,
"epoch": 0.8644307982994804,
"grad_norm": 14881.03810672454,
"learning_rate": 9.837243444778899e-07,
"logits": -1.4318089485168457,
"logps": -85.52223205566406,
"loss": 117.2997,
"objective": 119.20571899414062,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.6083333492279053,
"regularize": 0.2612408697605133,
"step": 305,
"wo_beta": 15.858050346374512
},
{
"dpo_loss": 0.5729119181632996,
"epoch": 0.8786017949929145,
"grad_norm": 13728.643717044331,
"learning_rate": 9.824457916977784e-07,
"logits": -1.430962085723877,
"logps": -84.47950744628906,
"loss": 113.9787,
"objective": 119.12039184570312,
"ranking_idealized": 0.5958333611488342,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.5541666746139526,
"regularize": 0.2695327699184418,
"step": 310,
"wo_beta": 15.427461624145508
},
{
"dpo_loss": 0.5748838782310486,
"epoch": 0.8927727916863486,
"grad_norm": 15353.814970462101,
"learning_rate": 9.81119795675185e-07,
"logits": -1.4459213018417358,
"logps": -83.27306365966797,
"loss": 112.487,
"objective": 110.93157196044922,
"ranking_idealized": 0.5541666746139526,
"ranking_idealized_expo": 0.47083333134651184,
"ranking_simple": 0.5333333611488342,
"regularize": 0.2502378225326538,
"step": 315,
"wo_beta": 15.45988941192627
},
{
"dpo_loss": 0.5748109221458435,
"epoch": 0.9069437883797827,
"grad_norm": 15007.545319328356,
"learning_rate": 9.797464868072486e-07,
"logits": -1.4066462516784668,
"logps": -86.03001403808594,
"loss": 110.898,
"objective": 109.38225555419922,
"ranking_idealized": 0.6208333373069763,
"ranking_idealized_expo": 0.5583333373069763,
"ranking_simple": 0.6083333492279053,
"regularize": 0.2464234083890915,
"step": 320,
"wo_beta": 15.732470512390137
},
{
"dpo_loss": 0.5822945833206177,
"epoch": 0.9211147850732169,
"grad_norm": 13633.021631468031,
"learning_rate": 9.783260001438066e-07,
"logits": -1.4706979990005493,
"logps": -87.00752258300781,
"loss": 114.9803,
"objective": 106.17591857910156,
"ranking_idealized": 0.6083333492279053,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.5708333253860474,
"regularize": 0.25146251916885376,
"step": 325,
"wo_beta": 15.325220108032227
},
{
"dpo_loss": 0.5598011016845703,
"epoch": 0.9352857817666509,
"grad_norm": 14695.63914534257,
"learning_rate": 9.768584753741134e-07,
"logits": -1.3177284002304077,
"logps": -86.90360260009766,
"loss": 116.6896,
"objective": 123.9805679321289,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.5916666388511658,
"regularize": 0.2586965262889862,
"step": 330,
"wo_beta": 16.747480392456055
},
{
"dpo_loss": 0.5746142864227295,
"epoch": 0.949456778460085,
"grad_norm": 14706.814411020761,
"learning_rate": 9.753440568131054e-07,
"logits": -1.3514246940612793,
"logps": -86.81550598144531,
"loss": 115.5651,
"objective": 113.5698471069336,
"ranking_idealized": 0.5874999761581421,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.5791666507720947,
"regularize": 0.25022396445274353,
"step": 335,
"wo_beta": 15.857748031616211
},
{
"dpo_loss": 0.5717839598655701,
"epoch": 0.9636277751535192,
"grad_norm": 13577.369360499106,
"learning_rate": 9.737828933872073e-07,
"logits": -1.400834321975708,
"logps": -85.29247283935547,
"loss": 118.1002,
"objective": 108.19886779785156,
"ranking_idealized": 0.6041666865348816,
"ranking_idealized_expo": 0.5458333492279053,
"ranking_simple": 0.5958333611488342,
"regularize": 0.24417272210121155,
"step": 340,
"wo_beta": 16.27320098876953
},
{
"dpo_loss": 0.5746095776557922,
"epoch": 0.9777987718469532,
"grad_norm": 13673.428728913288,
"learning_rate": 9.721751386196885e-07,
"logits": -1.4508498907089233,
"logps": -84.16486358642578,
"loss": 110.1951,
"objective": 103.0552749633789,
"ranking_idealized": 0.6458333134651184,
"ranking_idealized_expo": 0.5833333134651184,
"ranking_simple": 0.6291666626930237,
"regularize": 0.23596970736980438,
"step": 345,
"wo_beta": 15.449429512023926
},
{
"dpo_loss": 0.5632264018058777,
"epoch": 0.9919697685403873,
"grad_norm": 13613.304013119689,
"learning_rate": 9.705209506155634e-07,
"logits": -1.3619670867919922,
"logps": -86.77315521240234,
"loss": 108.5029,
"objective": 110.73800659179688,
"ranking_idealized": 0.6583333611488342,
"ranking_idealized_expo": 0.574999988079071,
"ranking_simple": 0.625,
"regularize": 0.26065030694007874,
"step": 350,
"wo_beta": 15.869379997253418
},
{
"epoch": 0.9919697685403873,
"eval_dpo_loss": 0.678183376789093,
"eval_logits": -1.4316504001617432,
"eval_logps": -92.18038177490234,
"eval_loss": 183.9593048095703,
"eval_objective": 180.11509704589844,
"eval_ranking_idealized": 0.6024844646453857,
"eval_ranking_idealized_expo": 0.5232919454574585,
"eval_ranking_simple": 0.5320910811424255,
"eval_regularize": 0.40945151448249817,
"eval_runtime": 476.2355,
"eval_samples_per_second": 12.158,
"eval_steps_per_second": 1.014,
"eval_wo_beta": 16.336669921875,
"step": 350
},
{
"dpo_loss": 0.5633755326271057,
"epoch": 1.0061407652338215,
"grad_norm": 13717.944611215353,
"learning_rate": 9.688204920460466e-07,
"logits": -1.542311191558838,
"logps": -84.23912811279297,
"loss": 104.9579,
"objective": 99.2624740600586,
"ranking_idealized": 0.6083333492279053,
"ranking_idealized_expo": 0.5375000238418579,
"ranking_simple": 0.5791666507720947,
"regularize": 0.2348737269639969,
"step": 355,
"wo_beta": 16.799049377441406
},
{
"dpo_loss": 0.5596449971199036,
"epoch": 1.0203117619272555,
"grad_norm": 15569.178838691118,
"learning_rate": 9.670739301325534e-07,
"logits": -1.4423024654388428,
"logps": -84.60731506347656,
"loss": 97.354,
"objective": 96.60607147216797,
"ranking_idealized": 0.574999988079071,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.5625,
"regularize": 0.2134130448102951,
"step": 360,
"wo_beta": 16.611034393310547
},
{
"dpo_loss": 0.5573465824127197,
"epoch": 1.0344827586206897,
"grad_norm": 14412.61274623368,
"learning_rate": 9.652814366302568e-07,
"logits": -1.4710925817489624,
"logps": -84.47969818115234,
"loss": 109.2182,
"objective": 110.00160217285156,
"ranking_idealized": 0.5833333134651184,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.5833333134651184,
"regularize": 0.2383657544851303,
"step": 365,
"wo_beta": 14.846599578857422
},
{
"dpo_loss": 0.559634804725647,
"epoch": 1.0486537553141237,
"grad_norm": 15121.427522934051,
"learning_rate": 9.63443187811197e-07,
"logits": -1.407724142074585,
"logps": -82.60728454589844,
"loss": 94.8917,
"objective": 93.84876251220703,
"ranking_idealized": 0.5958333611488342,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5666666626930237,
"regularize": 0.21665388345718384,
"step": 370,
"wo_beta": 15.743396759033203
},
{
"dpo_loss": 0.5503371357917786,
"epoch": 1.0628247520075578,
"grad_norm": 14225.520073845873,
"learning_rate": 9.61559364446946e-07,
"logits": -1.4566776752471924,
"logps": -84.27056121826172,
"loss": 96.0324,
"objective": 91.85355377197266,
"ranking_idealized": 0.6083333492279053,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5791666507720947,
"regularize": 0.20962905883789062,
"step": 375,
"wo_beta": 16.301313400268555
},
{
"dpo_loss": 0.5628603100776672,
"epoch": 1.076995748700992,
"grad_norm": 14108.362094897184,
"learning_rate": 9.596301517908328e-07,
"logits": -1.4387798309326172,
"logps": -86.27851867675781,
"loss": 98.7923,
"objective": 108.01164245605469,
"ranking_idealized": 0.6625000238418579,
"ranking_idealized_expo": 0.574999988079071,
"ranking_simple": 0.6291666626930237,
"regularize": 0.2488705962896347,
"step": 380,
"wo_beta": 15.773112297058105
},
{
"dpo_loss": 0.5771984457969666,
"epoch": 1.091166745394426,
"grad_norm": 13105.168740611702,
"learning_rate": 9.576557395597236e-07,
"logits": -1.4021495580673218,
"logps": -85.1259536743164,
"loss": 99.6716,
"objective": 109.83814239501953,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.5583333373069763,
"ranking_simple": 0.5791666507720947,
"regularize": 0.23721593618392944,
"step": 385,
"wo_beta": 15.801533699035645
},
{
"dpo_loss": 0.5509606599807739,
"epoch": 1.10533774208786,
"grad_norm": 13663.873020268169,
"learning_rate": 9.556363219153662e-07,
"logits": -1.3366678953170776,
"logps": -86.07147979736328,
"loss": 96.1117,
"objective": 90.10648345947266,
"ranking_idealized": 0.637499988079071,
"ranking_idealized_expo": 0.5625,
"ranking_simple": 0.6333333253860474,
"regularize": 0.2138025164604187,
"step": 390,
"wo_beta": 16.90329933166504
},
{
"dpo_loss": 0.5398973822593689,
"epoch": 1.1195087387812943,
"grad_norm": 14913.448008058538,
"learning_rate": 9.53572097445297e-07,
"logits": -1.3910351991653442,
"logps": -84.76091766357422,
"loss": 99.588,
"objective": 102.71925354003906,
"ranking_idealized": 0.5791666507720947,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.5708333253860474,
"regularize": 0.21998313069343567,
"step": 395,
"wo_beta": 14.880850791931152
},
{
"dpo_loss": 0.5650266408920288,
"epoch": 1.1336797354747283,
"grad_norm": 14606.821946811386,
"learning_rate": 9.514632691433106e-07,
"logits": -1.4497681856155396,
"logps": -82.1307373046875,
"loss": 104.6813,
"objective": 107.99799346923828,
"ranking_idealized": 0.6291666626930237,
"ranking_idealized_expo": 0.5416666865348816,
"ranking_simple": 0.6166666746139526,
"regularize": 0.23726312816143036,
"step": 400,
"wo_beta": 15.854341506958008
},
{
"epoch": 1.1336797354747283,
"eval_dpo_loss": 0.6800939440727234,
"eval_logits": -1.3930206298828125,
"eval_logps": -89.72613525390625,
"eval_loss": 183.87586975097656,
"eval_objective": 180.28396606445312,
"eval_ranking_idealized": 0.6024844646453857,
"eval_ranking_idealized_expo": 0.5232919454574585,
"eval_ranking_simple": 0.5310559272766113,
"eval_regularize": 0.40940526127815247,
"eval_runtime": 478.3822,
"eval_samples_per_second": 12.103,
"eval_steps_per_second": 1.01,
"eval_wo_beta": 16.22085189819336,
"step": 400
},
{
"dpo_loss": 0.5639857053756714,
"epoch": 1.1478507321681626,
"grad_norm": 15414.866076924996,
"learning_rate": 9.493100443894984e-07,
"logits": -1.416764736175537,
"logps": -84.40596771240234,
"loss": 97.7792,
"objective": 106.99815368652344,
"ranking_idealized": 0.5666666626930237,
"ranking_idealized_expo": 0.4625000059604645,
"ranking_simple": 0.5416666865348816,
"regularize": 0.22935108840465546,
"step": 405,
"wo_beta": 17.16730499267578
},
{
"dpo_loss": 0.5612814426422119,
"epoch": 1.1620217288615966,
"grad_norm": 13730.11308532576,
"learning_rate": 9.471126349298556e-07,
"logits": -1.4282060861587524,
"logps": -84.3336410522461,
"loss": 96.1344,
"objective": 93.89948272705078,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.5874999761581421,
"regularize": 0.20958545804023743,
"step": 410,
"wo_beta": 16.73971939086914
},
{
"dpo_loss": 0.5569156408309937,
"epoch": 1.1761927255550306,
"grad_norm": 11975.058144386021,
"learning_rate": 9.448712568554571e-07,
"logits": -1.3549463748931885,
"logps": -83.00645446777344,
"loss": 93.1875,
"objective": 96.11307525634766,
"ranking_idealized": 0.612500011920929,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5833333134651184,
"regularize": 0.22849011421203613,
"step": 415,
"wo_beta": 16.471454620361328
},
{
"dpo_loss": 0.5578625202178955,
"epoch": 1.1903637222484649,
"grad_norm": 13553.103377125492,
"learning_rate": 9.425861305812081e-07,
"logits": -1.3200798034667969,
"logps": -84.18423461914062,
"loss": 99.8958,
"objective": 90.86384582519531,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.4958333373069763,
"ranking_simple": 0.5666666626930237,
"regularize": 0.2039288878440857,
"step": 420,
"wo_beta": 16.64999008178711
},
{
"dpo_loss": 0.5598068237304688,
"epoch": 1.204534718941899,
"grad_norm": 13382.98806426423,
"learning_rate": 9.40257480824169e-07,
"logits": -1.368670105934143,
"logps": -82.51498413085938,
"loss": 95.7898,
"objective": 98.82903289794922,
"ranking_idealized": 0.5958333611488342,
"ranking_idealized_expo": 0.512499988079071,
"ranking_simple": 0.5791666507720947,
"regularize": 0.21656714379787445,
"step": 425,
"wo_beta": 15.240234375
},
{
"dpo_loss": 0.5631528496742249,
"epoch": 1.2187057156353331,
"grad_norm": 13379.590249575365,
"learning_rate": 9.378855365814557e-07,
"logits": -1.3373157978057861,
"logps": -84.1694107055664,
"loss": 89.0871,
"objective": 83.64144897460938,
"ranking_idealized": 0.5625,
"ranking_idealized_expo": 0.4958333373069763,
"ranking_simple": 0.5708333253860474,
"regularize": 0.19078685343265533,
"step": 430,
"wo_beta": 16.387685775756836
},
{
"dpo_loss": 0.5549448132514954,
"epoch": 1.2328767123287672,
"grad_norm": 13813.435024161312,
"learning_rate": 9.354705311077218e-07,
"logits": -1.287793755531311,
"logps": -83.4052963256836,
"loss": 93.9205,
"objective": 94.07813262939453,
"ranking_idealized": 0.6458333134651184,
"ranking_idealized_expo": 0.5958333611488342,
"ranking_simple": 0.637499988079071,
"regularize": 0.21654988825321198,
"step": 435,
"wo_beta": 17.72869110107422
},
{
"dpo_loss": 0.5550996661186218,
"epoch": 1.2470477090222012,
"grad_norm": 15408.139135942378,
"learning_rate": 9.330127018922193e-07,
"logits": -1.302925705909729,
"logps": -83.39546203613281,
"loss": 87.7477,
"objective": 81.88467407226562,
"ranking_idealized": 0.612500011920929,
"ranking_idealized_expo": 0.5541666746139526,
"ranking_simple": 0.612500011920929,
"regularize": 0.18918146193027496,
"step": 440,
"wo_beta": 15.06383991241455
},
{
"dpo_loss": 0.544273316860199,
"epoch": 1.2612187057156352,
"grad_norm": 13358.127194753248,
"learning_rate": 9.305122906354448e-07,
"logits": -1.3234721422195435,
"logps": -85.1892318725586,
"loss": 91.347,
"objective": 87.14881896972656,
"ranking_idealized": 0.637499988079071,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.6208333373069763,
"regularize": 0.2082992047071457,
"step": 445,
"wo_beta": 17.48933219909668
},
{
"dpo_loss": 0.5497770309448242,
"epoch": 1.2753897024090695,
"grad_norm": 13860.879601223209,
"learning_rate": 9.279695432253708e-07,
"logits": -1.4758702516555786,
"logps": -84.91988372802734,
"loss": 90.5585,
"objective": 87.8936996459961,
"ranking_idealized": 0.5541666746139526,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.5666666626930237,
"regularize": 0.2043653279542923,
"step": 450,
"wo_beta": 15.034831047058105
},
{
"epoch": 1.2753897024090695,
"eval_dpo_loss": 0.6794779300689697,
"eval_logits": -1.3663489818572998,
"eval_logps": -91.20365905761719,
"eval_loss": 184.06732177734375,
"eval_objective": 180.62957763671875,
"eval_ranking_idealized": 0.6024844646453857,
"eval_ranking_idealized_expo": 0.5232919454574585,
"eval_ranking_simple": 0.5357142686843872,
"eval_regularize": 0.41047051548957825,
"eval_runtime": 479.5938,
"eval_samples_per_second": 12.073,
"eval_steps_per_second": 1.007,
"eval_wo_beta": 16.288923263549805,
"step": 450
},
{
"dpo_loss": 0.5493736267089844,
"epoch": 1.2895606991025035,
"grad_norm": 12737.57571248245,
"learning_rate": 9.253847097132655e-07,
"logits": -1.2778384685516357,
"logps": -85.39282989501953,
"loss": 90.8388,
"objective": 97.43504333496094,
"ranking_idealized": 0.5458333492279053,
"ranking_idealized_expo": 0.49166667461395264,
"ranking_simple": 0.5375000238418579,
"regularize": 0.21359196305274963,
"step": 455,
"wo_beta": 15.552309036254883
},
{
"dpo_loss": 0.5543228983879089,
"epoch": 1.3037316957959377,
"grad_norm": 14070.394055394958,
"learning_rate": 9.227580442891021e-07,
"logits": -1.3934885263442993,
"logps": -84.22640991210938,
"loss": 89.7715,
"objective": 87.21723175048828,
"ranking_idealized": 0.5583333373069763,
"ranking_idealized_expo": 0.4749999940395355,
"ranking_simple": 0.5333333611488342,
"regularize": 0.1974954754114151,
"step": 460,
"wo_beta": 16.378904342651367
},
{
"dpo_loss": 0.5476227402687073,
"epoch": 1.3179026924893718,
"grad_norm": 11947.40976577932,
"learning_rate": 9.200898052565637e-07,
"logits": -1.3618992567062378,
"logps": -82.62676239013672,
"loss": 89.4031,
"objective": 95.53166961669922,
"ranking_idealized": 0.5541666746139526,
"ranking_idealized_expo": 0.4749999940395355,
"ranking_simple": 0.5416666865348816,
"regularize": 0.2165236622095108,
"step": 465,
"wo_beta": 14.700522422790527
},
{
"dpo_loss": 0.5633279085159302,
"epoch": 1.3320736891828058,
"grad_norm": 14427.62714295139,
"learning_rate": 9.173802550076401e-07,
"logits": -1.4394139051437378,
"logps": -81.61421203613281,
"loss": 96.3098,
"objective": 105.95228576660156,
"ranking_idealized": 0.612500011920929,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.5833333134651184,
"regularize": 0.22128254175186157,
"step": 470,
"wo_beta": 14.451654434204102
},
{
"dpo_loss": 0.5512283444404602,
"epoch": 1.34624468587624,
"grad_norm": 15510.676068153169,
"learning_rate": 9.146296599968258e-07,
"logits": -1.334899663925171,
"logps": -84.10041809082031,
"loss": 85.2643,
"objective": 97.66104125976562,
"ranking_idealized": 0.6416666507720947,
"ranking_idealized_expo": 0.574999988079071,
"ranking_simple": 0.6208333373069763,
"regularize": 0.22577306628227234,
"step": 475,
"wo_beta": 16.777812957763672
},
{
"dpo_loss": 0.5490090847015381,
"epoch": 1.360415682569674,
"grad_norm": 13039.859969979723,
"learning_rate": 9.118382907149163e-07,
"logits": -1.396318793296814,
"logps": -84.05583953857422,
"loss": 92.9048,
"objective": 106.32127380371094,
"ranking_idealized": 0.5666666626930237,
"ranking_idealized_expo": 0.4791666567325592,
"ranking_simple": 0.5291666388511658,
"regularize": 0.22388581931591034,
"step": 480,
"wo_beta": 18.35649871826172
},
{
"dpo_loss": 0.55390864610672,
"epoch": 1.3745866792631083,
"grad_norm": 14303.492597277622,
"learning_rate": 9.090064216624092e-07,
"logits": -1.3780549764633179,
"logps": -81.48451232910156,
"loss": 89.9123,
"objective": 85.18955993652344,
"ranking_idealized": 0.574999988079071,
"ranking_idealized_expo": 0.4958333373069763,
"ranking_simple": 0.5666666626930237,
"regularize": 0.19940294325351715,
"step": 485,
"wo_beta": 16.11130714416504
},
{
"dpo_loss": 0.5646805167198181,
"epoch": 1.3887576759565423,
"grad_norm": 13569.748240897005,
"learning_rate": 9.061343313225087e-07,
"logits": -1.3297451734542847,
"logps": -84.58447265625,
"loss": 91.7915,
"objective": 92.44489288330078,
"ranking_idealized": 0.574999988079071,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.5708333253860474,
"regularize": 0.20839503407478333,
"step": 490,
"wo_beta": 15.887747764587402
},
{
"dpo_loss": 0.5439994931221008,
"epoch": 1.4029286726499763,
"grad_norm": 14224.725006990095,
"learning_rate": 9.032223021337413e-07,
"logits": -1.3493283987045288,
"logps": -84.9798355102539,
"loss": 89.0675,
"objective": 84.06017303466797,
"ranking_idealized": 0.5583333373069763,
"ranking_idealized_expo": 0.4791666567325592,
"ranking_simple": 0.550000011920929,
"regularize": 0.1910681426525116,
"step": 495,
"wo_beta": 15.590251922607422
},
{
"dpo_loss": 0.539610743522644,
"epoch": 1.4170996693434104,
"grad_norm": 14123.937473491551,
"learning_rate": 9.002706204621802e-07,
"logits": -1.278394341468811,
"logps": -83.08454132080078,
"loss": 91.2372,
"objective": 89.69623565673828,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.574999988079071,
"regularize": 0.20472820103168488,
"step": 500,
"wo_beta": 15.177144050598145
},
{
"epoch": 1.4170996693434104,
"eval_dpo_loss": 0.6782248020172119,
"eval_logits": -1.3281084299087524,
"eval_logps": -89.4298095703125,
"eval_loss": 185.71939086914062,
"eval_objective": 180.8789520263672,
"eval_ranking_idealized": 0.6024844646453857,
"eval_ranking_idealized_expo": 0.5232919454574585,
"eval_ranking_simple": 0.534679114818573,
"eval_regularize": 0.41098901629447937,
"eval_runtime": 475.7419,
"eval_samples_per_second": 12.17,
"eval_steps_per_second": 1.015,
"eval_wo_beta": 16.044300079345703,
"step": 500
},
{
"dpo_loss": 0.5400077700614929,
"epoch": 1.4312706660368446,
"grad_norm": 13097.852775439285,
"learning_rate": 8.972795765732846e-07,
"logits": -1.3413732051849365,
"logps": -82.83694458007812,
"loss": 96.4414,
"objective": 99.75823211669922,
"ranking_idealized": 0.625,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.5874999761581421,
"regularize": 0.20853710174560547,
"step": 505,
"wo_beta": 16.662084579467773
},
{
"dpo_loss": 0.5544862151145935,
"epoch": 1.4454416627302786,
"grad_norm": 13707.829813480788,
"learning_rate": 8.942494646033554e-07,
"logits": -1.3700981140136719,
"logps": -84.05197143554688,
"loss": 86.1912,
"objective": 85.1514663696289,
"ranking_idealized": 0.5874999761581421,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.5833333134651184,
"regularize": 0.18519388139247894,
"step": 510,
"wo_beta": 15.12604808807373
},
{
"dpo_loss": 0.5474262833595276,
"epoch": 1.4596126594237129,
"grad_norm": 16384.18532468762,
"learning_rate": 8.911805825306096e-07,
"logits": -1.4208234548568726,
"logps": -85.2526626586914,
"loss": 86.2928,
"objective": 94.92349243164062,
"ranking_idealized": 0.550000011920929,
"ranking_idealized_expo": 0.4625000059604645,
"ranking_simple": 0.5249999761581421,
"regularize": 0.21247318387031555,
"step": 515,
"wo_beta": 16.363656997680664
},
{
"dpo_loss": 0.5538729429244995,
"epoch": 1.473783656117147,
"grad_norm": 14068.735921521182,
"learning_rate": 8.880732321458784e-07,
"logits": -1.3451961278915405,
"logps": -81.92323303222656,
"loss": 85.1002,
"objective": 84.61219024658203,
"ranking_idealized": 0.574999988079071,
"ranking_idealized_expo": 0.49166667461395264,
"ranking_simple": 0.5416666865348816,
"regularize": 0.19058094918727875,
"step": 520,
"wo_beta": 16.039138793945312
},
{
"dpo_loss": 0.5211088061332703,
"epoch": 1.487954652810581,
"grad_norm": 12355.643543079665,
"learning_rate": 8.849277190229283e-07,
"logits": -1.2561639547348022,
"logps": -80.8559341430664,
"loss": 87.4323,
"objective": 88.15239715576172,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.6000000238418579,
"regularize": 0.19789734482765198,
"step": 525,
"wo_beta": 15.64743423461914
},
{
"dpo_loss": 0.5529366731643677,
"epoch": 1.5021256495040152,
"grad_norm": 13792.295805387279,
"learning_rate": 8.817443524884117e-07,
"logits": -1.4202781915664673,
"logps": -84.57428741455078,
"loss": 89.1332,
"objective": 93.31535339355469,
"ranking_idealized": 0.574999988079071,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.5708333253860474,
"regularize": 0.20485611259937286,
"step": 530,
"wo_beta": 15.418906211853027
},
{
"dpo_loss": 0.5380304455757141,
"epoch": 1.5162966461974492,
"grad_norm": 12748.671458728879,
"learning_rate": 8.785234455914488e-07,
"logits": -1.4013686180114746,
"logps": -83.34593963623047,
"loss": 86.3246,
"objective": 83.55619812011719,
"ranking_idealized": 0.5833333134651184,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.5791666507720947,
"regularize": 0.19464156031608582,
"step": 535,
"wo_beta": 15.718771934509277
},
{
"dpo_loss": 0.5602856874465942,
"epoch": 1.5304676428908834,
"grad_norm": 13600.712518077142,
"learning_rate": 8.752653150728411e-07,
"logits": -1.3116246461868286,
"logps": -83.8393783569336,
"loss": 85.7548,
"objective": 85.53334045410156,
"ranking_idealized": 0.6291666626930237,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.5874999761581421,
"regularize": 0.19387957453727722,
"step": 540,
"wo_beta": 15.35750961303711
},
{
"dpo_loss": 0.5608557462692261,
"epoch": 1.5446386395843175,
"grad_norm": 13202.179692261727,
"learning_rate": 8.719702813339247e-07,
"logits": -1.4217339754104614,
"logps": -85.13090515136719,
"loss": 78.3728,
"objective": 73.64112854003906,
"ranking_idealized": 0.6583333611488342,
"ranking_idealized_expo": 0.5874999761581421,
"ranking_simple": 0.6499999761581421,
"regularize": 0.17463207244873047,
"step": 545,
"wo_beta": 14.742905616760254
},
{
"dpo_loss": 0.5433780550956726,
"epoch": 1.5588096362777515,
"grad_norm": 13773.885858068237,
"learning_rate": 8.68638668405062e-07,
"logits": -1.4105440378189087,
"logps": -85.73950958251953,
"loss": 85.7307,
"objective": 91.58641815185547,
"ranking_idealized": 0.5625,
"ranking_idealized_expo": 0.46666666865348816,
"ranking_simple": 0.5291666388511658,
"regularize": 0.20735137164592743,
"step": 550,
"wo_beta": 15.781331062316895
},
{
"epoch": 1.5588096362777515,
"eval_dpo_loss": 0.6799347400665283,
"eval_logits": -1.36829674243927,
"eval_logps": -91.68656921386719,
"eval_loss": 186.22413635253906,
"eval_objective": 182.13821411132812,
"eval_ranking_idealized": 0.6024844646453857,
"eval_ranking_idealized_expo": 0.5232919454574585,
"eval_ranking_simple": 0.533643901348114,
"eval_regularize": 0.4147377014160156,
"eval_runtime": 478.899,
"eval_samples_per_second": 12.09,
"eval_steps_per_second": 1.009,
"eval_wo_beta": 16.186290740966797,
"step": 550
},
{
"dpo_loss": 0.5562130212783813,
"epoch": 1.5729806329711855,
"grad_norm": 13716.988937741002,
"learning_rate": 8.652708039137766e-07,
"logits": -1.2273495197296143,
"logps": -85.2579116821289,
"loss": 90.1931,
"objective": 91.27943420410156,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.5833333134651184,
"regularize": 0.19078856706619263,
"step": 555,
"wo_beta": 14.82008171081543
},
{
"dpo_loss": 0.5405426621437073,
"epoch": 1.5871516296646198,
"grad_norm": 13222.290128913079,
"learning_rate": 8.61867019052535e-07,
"logits": -1.3004463911056519,
"logps": -84.03120422363281,
"loss": 82.5026,
"objective": 82.23470306396484,
"ranking_idealized": 0.6083333492279053,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.574999988079071,
"regularize": 0.19372278451919556,
"step": 560,
"wo_beta": 16.210308074951172
},
{
"dpo_loss": 0.545985221862793,
"epoch": 1.601322626358054,
"grad_norm": 13798.95251346989,
"learning_rate": 8.584276485461775e-07,
"logits": -1.2903294563293457,
"logps": -85.43083190917969,
"loss": 87.1773,
"objective": 87.97190856933594,
"ranking_idealized": 0.5458333492279053,
"ranking_idealized_expo": 0.44999998807907104,
"ranking_simple": 0.5458333492279053,
"regularize": 0.18329960107803345,
"step": 565,
"wo_beta": 15.259784698486328
},
{
"dpo_loss": 0.5544782280921936,
"epoch": 1.615493623051488,
"grad_norm": 14310.342902213652,
"learning_rate": 8.549530306190014e-07,
"logits": -1.4501588344573975,
"logps": -85.62173461914062,
"loss": 86.1569,
"objective": 88.04158020019531,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5708333253860474,
"regularize": 0.19149872660636902,
"step": 570,
"wo_beta": 15.673080444335938
},
{
"dpo_loss": 0.5482128858566284,
"epoch": 1.629664619744922,
"grad_norm": 13673.298787796572,
"learning_rate": 8.514435069615004e-07,
"logits": -1.380743384361267,
"logps": -83.26321411132812,
"loss": 78.7831,
"objective": 86.95629119873047,
"ranking_idealized": 0.637499988079071,
"ranking_idealized_expo": 0.5583333373069763,
"ranking_simple": 0.6166666746139526,
"regularize": 0.1899789720773697,
"step": 575,
"wo_beta": 16.415205001831055
},
{
"dpo_loss": 0.5394086241722107,
"epoch": 1.643835616438356,
"grad_norm": 13082.53312626321,
"learning_rate": 8.478994226967638e-07,
"logits": -1.4001491069793701,
"logps": -83.10562133789062,
"loss": 76.6065,
"objective": 77.20848846435547,
"ranking_idealized": 0.6083333492279053,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.6000000238418579,
"regularize": 0.1699313372373581,
"step": 580,
"wo_beta": 14.931032180786133
},
{
"dpo_loss": 0.5450774431228638,
"epoch": 1.6580066131317903,
"grad_norm": 14803.266258769623,
"learning_rate": 8.443211263465362e-07,
"logits": -1.2514622211456299,
"logps": -82.91756439208984,
"loss": 81.1936,
"objective": 78.58777618408203,
"ranking_idealized": 0.5333333611488342,
"ranking_idealized_expo": 0.4625000059604645,
"ranking_simple": 0.5291666388511658,
"regularize": 0.18020884692668915,
"step": 585,
"wo_beta": 16.229631423950195
},
{
"dpo_loss": 0.5452067852020264,
"epoch": 1.6721776098252243,
"grad_norm": 14897.05549715986,
"learning_rate": 8.407089697969456e-07,
"logits": -1.310152530670166,
"logps": -82.58568572998047,
"loss": 84.6601,
"objective": 89.34095764160156,
"ranking_idealized": 0.6208333373069763,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.5874999761581421,
"regularize": 0.18940496444702148,
"step": 590,
"wo_beta": 13.480273246765137
},
{
"dpo_loss": 0.5498862862586975,
"epoch": 1.6863486065186586,
"grad_norm": 13388.885538994262,
"learning_rate": 8.370633082638975e-07,
"logits": -1.2777602672576904,
"logps": -84.25193786621094,
"loss": 85.2501,
"objective": 97.64402770996094,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.5375000238418579,
"ranking_simple": 0.5791666507720947,
"regularize": 0.21810217201709747,
"step": 595,
"wo_beta": 17.075584411621094
},
{
"dpo_loss": 0.541688084602356,
"epoch": 1.7005196032120926,
"grad_norm": 12810.439354567186,
"learning_rate": 8.333845002581458e-07,
"logits": -1.3377609252929688,
"logps": -85.63569641113281,
"loss": 79.9458,
"objective": 90.4583740234375,
"ranking_idealized": 0.6333333253860474,
"ranking_idealized_expo": 0.550000011920929,
"ranking_simple": 0.612500011920929,
"regularize": 0.2045913189649582,
"step": 600,
"wo_beta": 16.088045120239258
},
{
"epoch": 1.7005196032120926,
"eval_dpo_loss": 0.6794285774230957,
"eval_logits": -1.3519084453582764,
"eval_logps": -91.084716796875,
"eval_loss": 186.21368408203125,
"eval_objective": 181.86863708496094,
"eval_ranking_idealized": 0.6024844646453857,
"eval_ranking_idealized_expo": 0.5232919454574585,
"eval_ranking_simple": 0.5372670888900757,
"eval_regularize": 0.4135282337665558,
"eval_runtime": 449.0944,
"eval_samples_per_second": 12.893,
"eval_steps_per_second": 1.075,
"eval_wo_beta": 16.10601043701172,
"step": 600
},
{
"dpo_loss": 0.5528364777565002,
"epoch": 1.7146905999055266,
"grad_norm": 12864.49342558613,
"learning_rate": 8.296729075500343e-07,
"logits": -1.2839235067367554,
"logps": -85.77102661132812,
"loss": 81.7288,
"objective": 90.60871124267578,
"ranking_idealized": 0.5833333134651184,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.574999988079071,
"regularize": 0.2010929137468338,
"step": 605,
"wo_beta": 18.23944091796875
},
{
"dpo_loss": 0.5559037327766418,
"epoch": 1.7288615965989607,
"grad_norm": 14171.44704590598,
"learning_rate": 8.259288951339232e-07,
"logits": -1.3577406406402588,
"logps": -83.76995086669922,
"loss": 81.4701,
"objective": 75.51998138427734,
"ranking_idealized": 0.6291666626930237,
"ranking_idealized_expo": 0.5708333253860474,
"ranking_simple": 0.6291666626930237,
"regularize": 0.17047205567359924,
"step": 610,
"wo_beta": 16.163959503173828
},
{
"dpo_loss": 0.5623223185539246,
"epoch": 1.743032593292395,
"grad_norm": 14064.695817652162,
"learning_rate": 8.221528311922941e-07,
"logits": -1.3709431886672974,
"logps": -83.62710571289062,
"loss": 84.5652,
"objective": 85.32384490966797,
"ranking_idealized": 0.5833333134651184,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.5791666507720947,
"regularize": 0.19118142127990723,
"step": 615,
"wo_beta": 15.722049713134766
},
{
"dpo_loss": 0.5426214933395386,
"epoch": 1.7572035899858292,
"grad_norm": 13161.981948520664,
"learning_rate": 8.183450870595441e-07,
"logits": -1.3993924856185913,
"logps": -83.90966796875,
"loss": 81.5518,
"objective": 84.29554748535156,
"ranking_idealized": 0.6208333373069763,
"ranking_idealized_expo": 0.5416666865348816,
"ranking_simple": 0.6000000238418579,
"regularize": 0.19568467140197754,
"step": 620,
"wo_beta": 16.582374572753906
},
{
"dpo_loss": 0.5548843145370483,
"epoch": 1.7713745866792632,
"grad_norm": 13578.593083281268,
"learning_rate": 8.145060371854691e-07,
"logits": -1.3166680335998535,
"logps": -83.37279510498047,
"loss": 77.6344,
"objective": 80.62175750732422,
"ranking_idealized": 0.6083333492279053,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.5916666388511658,
"regularize": 0.17566385865211487,
"step": 625,
"wo_beta": 15.19571304321289
},
{
"dpo_loss": 0.548730194568634,
"epoch": 1.7855455833726972,
"grad_norm": 12867.261945978005,
"learning_rate": 8.106360590984404e-07,
"logits": -1.3329386711120605,
"logps": -85.60625457763672,
"loss": 75.8762,
"objective": 75.14217376708984,
"ranking_idealized": 0.574999988079071,
"ranking_idealized_expo": 0.49166667461395264,
"ranking_simple": 0.5583333373069763,
"regularize": 0.17412720620632172,
"step": 630,
"wo_beta": 16.33298110961914
},
{
"dpo_loss": 0.5529462695121765,
"epoch": 1.7997165800661312,
"grad_norm": 12432.106461076137,
"learning_rate": 8.067355333682797e-07,
"logits": -1.4188921451568604,
"logps": -84.8874282836914,
"loss": 78.6516,
"objective": 76.64624786376953,
"ranking_idealized": 0.637499988079071,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.5916666388511658,
"regularize": 0.17813840508460999,
"step": 635,
"wo_beta": 16.95586395263672
},
{
"dpo_loss": 0.5410430431365967,
"epoch": 1.8138875767595655,
"grad_norm": 12324.183379735212,
"learning_rate": 8.028048435688333e-07,
"logits": -1.3641606569290161,
"logps": -85.47127532958984,
"loss": 78.7118,
"objective": 82.21182250976562,
"ranking_idealized": 0.5666666626930237,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.5666666626930237,
"regularize": 0.18489192426204681,
"step": 640,
"wo_beta": 17.22258186340332
},
{
"dpo_loss": 0.5470555424690247,
"epoch": 1.8280585734529995,
"grad_norm": 13971.672253595729,
"learning_rate": 7.988443762402523e-07,
"logits": -1.4050637483596802,
"logps": -85.07406616210938,
"loss": 78.6084,
"objective": 74.21890258789062,
"ranking_idealized": 0.6416666507720947,
"ranking_idealized_expo": 0.550000011920929,
"ranking_simple": 0.6291666626930237,
"regularize": 0.16714391112327576,
"step": 645,
"wo_beta": 16.80624008178711
},
{
"dpo_loss": 0.5424051880836487,
"epoch": 1.8422295701464337,
"grad_norm": 15285.601428700493,
"learning_rate": 7.948545208509811e-07,
"logits": -1.440900444984436,
"logps": -84.5870590209961,
"loss": 86.7578,
"objective": 89.12664031982422,
"ranking_idealized": 0.612500011920929,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.6000000238418579,
"regularize": 0.18858183920383453,
"step": 650,
"wo_beta": 15.562705039978027
},
{
"epoch": 1.8422295701464337,
"eval_dpo_loss": 0.6796835660934448,
"eval_logits": -1.3402661085128784,
"eval_logps": -89.40703582763672,
"eval_loss": 186.7196044921875,
"eval_objective": 182.49703979492188,
"eval_ranking_idealized": 0.6024844646453857,
"eval_ranking_idealized_expo": 0.5232919454574585,
"eval_ranking_simple": 0.531573474407196,
"eval_regularize": 0.4141009747982025,
"eval_runtime": 450.1436,
"eval_samples_per_second": 12.863,
"eval_steps_per_second": 1.073,
"eval_wo_beta": 16.0269832611084,
"step": 650
},
{
"dpo_loss": 0.5390594601631165,
"epoch": 1.8564005668398678,
"grad_norm": 14945.717954531257,
"learning_rate": 7.90835669759456e-07,
"logits": -1.292981505393982,
"logps": -81.8280029296875,
"loss": 79.8064,
"objective": 77.88701629638672,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.48750001192092896,
"ranking_simple": 0.6000000238418579,
"regularize": 0.18046139180660248,
"step": 655,
"wo_beta": 15.520308494567871
},
{
"dpo_loss": 0.5524376034736633,
"epoch": 1.8705715635333018,
"grad_norm": 12956.308969791295,
"learning_rate": 7.86788218175523e-07,
"logits": -1.3386873006820679,
"logps": -84.97721862792969,
"loss": 77.9731,
"objective": 77.8855972290039,
"ranking_idealized": 0.6041666865348816,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.6041666865348816,
"regularize": 0.17455393075942993,
"step": 660,
"wo_beta": 17.077417373657227
},
{
"dpo_loss": 0.562981903553009,
"epoch": 1.8847425602267358,
"grad_norm": 12832.376229580192,
"learning_rate": 7.827125641215718e-07,
"logits": -1.334754228591919,
"logps": -83.5533447265625,
"loss": 82.4367,
"objective": 85.92207336425781,
"ranking_idealized": 0.6333333253860474,
"ranking_idealized_expo": 0.550000011920929,
"ranking_simple": 0.625,
"regularize": 0.1833416372537613,
"step": 665,
"wo_beta": 15.230433464050293
},
{
"dpo_loss": 0.548839807510376,
"epoch": 1.89891355692017,
"grad_norm": 13460.183191194346,
"learning_rate": 7.786091083933949e-07,
"logits": -1.273821473121643,
"logps": -81.98705291748047,
"loss": 71.3613,
"objective": 68.62953186035156,
"ranking_idealized": 0.5666666626930237,
"ranking_idealized_expo": 0.4791666567325592,
"ranking_simple": 0.5375000238418579,
"regularize": 0.16619008779525757,
"step": 670,
"wo_beta": 16.408151626586914
},
{
"dpo_loss": 0.5611833930015564,
"epoch": 1.9130845536136043,
"grad_norm": 12953.446893922981,
"learning_rate": 7.744782545207744e-07,
"logits": -1.2947652339935303,
"logps": -83.05793762207031,
"loss": 71.3196,
"objective": 74.63235473632812,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.6083333492279053,
"regularize": 0.16350051760673523,
"step": 675,
"wo_beta": 15.741961479187012
},
{
"dpo_loss": 0.5451231598854065,
"epoch": 1.9272555503070383,
"grad_norm": 13412.02601484903,
"learning_rate": 7.703204087277988e-07,
"logits": -1.3697810173034668,
"logps": -85.1467056274414,
"loss": 71.5185,
"objective": 70.06403350830078,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.6000000238418579,
"regularize": 0.16510257124900818,
"step": 680,
"wo_beta": 15.431069374084473
},
{
"dpo_loss": 0.5437536835670471,
"epoch": 1.9414265470004723,
"grad_norm": 13070.654673150682,
"learning_rate": 7.661359798929152e-07,
"logits": -1.2984110116958618,
"logps": -82.4813003540039,
"loss": 72.6279,
"objective": 63.83388137817383,
"ranking_idealized": 0.5291666388511658,
"ranking_idealized_expo": 0.4791666567325592,
"ranking_simple": 0.5333333611488342,
"regularize": 0.1474105566740036,
"step": 685,
"wo_beta": 15.765579223632812
},
{
"dpo_loss": 0.5520148873329163,
"epoch": 1.9555975436939064,
"grad_norm": 13585.612422979371,
"learning_rate": 7.619253795087208e-07,
"logits": -1.3621736764907837,
"logps": -83.20579528808594,
"loss": 70.4149,
"objective": 71.44465637207031,
"ranking_idealized": 0.6041666865348816,
"ranking_idealized_expo": 0.5583333373069763,
"ranking_simple": 0.5916666388511658,
"regularize": 0.15733769536018372,
"step": 690,
"wo_beta": 16.008506774902344
},
{
"dpo_loss": 0.5521395802497864,
"epoch": 1.9697685403873406,
"grad_norm": 12626.830880791873,
"learning_rate": 7.576890216414972e-07,
"logits": -1.2345752716064453,
"logps": -84.00497436523438,
"loss": 69.938,
"objective": 70.55232238769531,
"ranking_idealized": 0.5666666626930237,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.5541666746139526,
"regularize": 0.15369382500648499,
"step": 695,
"wo_beta": 16.505474090576172
},
{
"dpo_loss": 0.5477771759033203,
"epoch": 1.9839395370807746,
"grad_norm": 14507.10563022748,
"learning_rate": 7.534273228904915e-07,
"logits": -1.2208502292633057,
"logps": -84.28005981445312,
"loss": 76.2665,
"objective": 85.08452606201172,
"ranking_idealized": 0.6041666865348816,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.574999988079071,
"regularize": 0.1893630176782608,
"step": 700,
"wo_beta": 15.212244987487793
},
{
"epoch": 1.9839395370807746,
"eval_dpo_loss": 0.6800020337104797,
"eval_logits": -1.3222942352294922,
"eval_logps": -89.5856704711914,
"eval_loss": 186.28018188476562,
"eval_objective": 182.39332580566406,
"eval_ranking_idealized": 0.6024844646453857,
"eval_ranking_idealized_expo": 0.5232919454574585,
"eval_ranking_simple": 0.5310559272766113,
"eval_regularize": 0.4136333167552948,
"eval_runtime": 489.8617,
"eval_samples_per_second": 11.82,
"eval_steps_per_second": 0.986,
"eval_wo_beta": 16.111663818359375,
"step": 700
},
{
"dpo_loss": 0.5436014533042908,
"epoch": 1.9981105337742089,
"grad_norm": 14761.110739737924,
"learning_rate": 7.49140702346948e-07,
"logits": -1.1587742567062378,
"logps": -83.4106216430664,
"loss": 71.0478,
"objective": 77.40288543701172,
"ranking_idealized": 0.5916666388511658,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.5541666746139526,
"regularize": 0.1687079817056656,
"step": 705,
"wo_beta": 17.46946907043457
},
{
"dpo_loss": 0.5400715470314026,
"epoch": 2.012281530467643,
"grad_norm": 13854.290443619322,
"learning_rate": 7.448295815528956e-07,
"logits": -1.3091672658920288,
"logps": -83.20928192138672,
"loss": 68.6235,
"objective": 74.59980773925781,
"ranking_idealized": 0.6083333492279053,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.6000000238418579,
"regularize": 0.15744589269161224,
"step": 710,
"wo_beta": 16.282772064208984
},
{
"dpo_loss": 0.5266523957252502,
"epoch": 2.026452527161077,
"grad_norm": 12772.47402835887,
"learning_rate": 7.404943844596938e-07,
"logits": -1.3287214040756226,
"logps": -82.50818634033203,
"loss": 67.4219,
"objective": 67.50071716308594,
"ranking_idealized": 0.6791666746139526,
"ranking_idealized_expo": 0.5708333253860474,
"ranking_simple": 0.6625000238418579,
"regularize": 0.15344351530075073,
"step": 715,
"wo_beta": 15.63277816772461
},
{
"dpo_loss": 0.5342952013015747,
"epoch": 2.040623523854511,
"grad_norm": 12280.29550374932,
"learning_rate": 7.361355373863413e-07,
"logits": -1.3206126689910889,
"logps": -83.4239273071289,
"loss": 65.7671,
"objective": 62.988590240478516,
"ranking_idealized": 0.6291666626930237,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.6041666865348816,
"regularize": 0.13976921141147614,
"step": 720,
"wo_beta": 16.120634078979492
},
{
"dpo_loss": 0.5416182279586792,
"epoch": 2.0547945205479454,
"grad_norm": 11934.95995024634,
"learning_rate": 7.317534689775527e-07,
"logits": -1.329419732093811,
"logps": -86.18152618408203,
"loss": 73.1378,
"objective": 77.66006469726562,
"ranking_idealized": 0.5833333134651184,
"ranking_idealized_expo": 0.4958333373069763,
"ranking_simple": 0.574999988079071,
"regularize": 0.1658337563276291,
"step": 725,
"wo_beta": 14.640992164611816
},
{
"dpo_loss": 0.5336278080940247,
"epoch": 2.0689655172413794,
"grad_norm": 13017.829141332633,
"learning_rate": 7.273486101616056e-07,
"logits": -1.4032765626907349,
"logps": -83.5689468383789,
"loss": 73.2891,
"objective": 73.26839447021484,
"ranking_idealized": 0.5791666507720947,
"ranking_idealized_expo": 0.49166667461395264,
"ranking_simple": 0.5874999761581421,
"regularize": 0.15773232281208038,
"step": 730,
"wo_beta": 15.76942253112793
},
{
"dpo_loss": 0.5291448831558228,
"epoch": 2.0831365139348135,
"grad_norm": 13426.194750558408,
"learning_rate": 7.229213941079639e-07,
"logits": -1.3250770568847656,
"logps": -82.74713897705078,
"loss": 59.2054,
"objective": 57.16627883911133,
"ranking_idealized": 0.5625,
"ranking_idealized_expo": 0.47083333134651184,
"ranking_simple": 0.550000011920929,
"regularize": 0.14032262563705444,
"step": 735,
"wo_beta": 17.056970596313477
},
{
"dpo_loss": 0.5465752482414246,
"epoch": 2.0973075106282475,
"grad_norm": 11906.26841829341,
"learning_rate": 7.184722561846797e-07,
"logits": -1.3804094791412354,
"logps": -82.77980041503906,
"loss": 62.2469,
"objective": 65.71126556396484,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.6083333492279053,
"regularize": 0.14654967188835144,
"step": 740,
"wo_beta": 15.721449851989746
},
{
"dpo_loss": 0.5360319018363953,
"epoch": 2.1114785073216815,
"grad_norm": 13337.057180758171,
"learning_rate": 7.14001633915581e-07,
"logits": -1.313341498374939,
"logps": -83.15229797363281,
"loss": 60.0244,
"objective": 60.3892822265625,
"ranking_idealized": 0.6333333253860474,
"ranking_idealized_expo": 0.5583333373069763,
"ranking_simple": 0.625,
"regularize": 0.13975684344768524,
"step": 745,
"wo_beta": 15.697921752929688
},
{
"dpo_loss": 0.5399072170257568,
"epoch": 2.1256495040151155,
"grad_norm": 13331.418550163386,
"learning_rate": 7.095099669372443e-07,
"logits": -1.3453633785247803,
"logps": -82.3453369140625,
"loss": 65.1575,
"objective": 60.51906967163086,
"ranking_idealized": 0.574999988079071,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.5666666626930237,
"regularize": 0.1316269487142563,
"step": 750,
"wo_beta": 15.831055641174316
},
{
"epoch": 2.1256495040151155,
"eval_dpo_loss": 0.6806153059005737,
"eval_logits": -1.3253074884414673,
"eval_logps": -90.24537658691406,
"eval_loss": 188.15711975097656,
"eval_objective": 184.20758056640625,
"eval_ranking_idealized": 0.6024844646453857,
"eval_ranking_idealized_expo": 0.5232919454574585,
"eval_ranking_simple": 0.5320910811424255,
"eval_regularize": 0.4179456830024719,
"eval_runtime": 478.2913,
"eval_samples_per_second": 12.106,
"eval_steps_per_second": 1.01,
"eval_wo_beta": 15.917864799499512,
"step": 750
},
{
"dpo_loss": 0.5414224863052368,
"epoch": 2.13982050070855,
"grad_norm": 14942.893679399409,
"learning_rate": 7.049976969557623e-07,
"logits": -1.3125241994857788,
"logps": -85.55477905273438,
"loss": 70.5458,
"objective": 72.25684356689453,
"ranking_idealized": 0.5916666388511658,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5874999761581421,
"regularize": 0.15286041796207428,
"step": 755,
"wo_beta": 16.596240997314453
},
{
"dpo_loss": 0.5502544641494751,
"epoch": 2.153991497401984,
"grad_norm": 14884.220119069658,
"learning_rate": 7.004652677033068e-07,
"logits": -1.2573704719543457,
"logps": -81.78999328613281,
"loss": 66.5347,
"objective": 56.669010162353516,
"ranking_idealized": 0.5958333611488342,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.5833333134651184,
"regularize": 0.1313803344964981,
"step": 760,
"wo_beta": 14.706622123718262
},
{
"dpo_loss": 0.537317156791687,
"epoch": 2.168162494095418,
"grad_norm": 12849.6702201699,
"learning_rate": 6.959131248944922e-07,
"logits": -1.3043426275253296,
"logps": -82.6404037475586,
"loss": 60.5154,
"objective": 57.57880401611328,
"ranking_idealized": 0.6208333373069763,
"ranking_idealized_expo": 0.5375000238418579,
"ranking_simple": 0.6041666865348816,
"regularize": 0.13467958569526672,
"step": 765,
"wo_beta": 16.29267120361328
},
{
"dpo_loss": 0.5396389365196228,
"epoch": 2.182333490788852,
"grad_norm": 13686.597971217428,
"learning_rate": 6.913417161825449e-07,
"logits": -1.3148149251937866,
"logps": -82.22266387939453,
"loss": 66.4186,
"objective": 71.55656433105469,
"ranking_idealized": 0.5916666388511658,
"ranking_idealized_expo": 0.49166667461395264,
"ranking_simple": 0.574999988079071,
"regularize": 0.15875324606895447,
"step": 770,
"wo_beta": 14.279667854309082
},
{
"dpo_loss": 0.5356777906417847,
"epoch": 2.196504487482286,
"grad_norm": 13109.133649943296,
"learning_rate": 6.867514911152806e-07,
"logits": -1.279820203781128,
"logps": -82.98641204833984,
"loss": 62.1208,
"objective": 65.08477020263672,
"ranking_idealized": 0.6291666626930237,
"ranking_idealized_expo": 0.5791666507720947,
"ranking_simple": 0.625,
"regularize": 0.14647550880908966,
"step": 775,
"wo_beta": 17.69573211669922
},
{
"dpo_loss": 0.5467700362205505,
"epoch": 2.21067548417572,
"grad_norm": 13977.878251046886,
"learning_rate": 6.821429010908971e-07,
"logits": -1.2058584690093994,
"logps": -82.53013610839844,
"loss": 63.1931,
"objective": 62.46464538574219,
"ranking_idealized": 0.512499988079071,
"ranking_idealized_expo": 0.44583332538604736,
"ranking_simple": 0.5041666626930237,
"regularize": 0.13306237757205963,
"step": 780,
"wo_beta": 15.349116325378418
},
{
"dpo_loss": 0.5252477526664734,
"epoch": 2.2248464808691546,
"grad_norm": 13522.027705329157,
"learning_rate": 6.775163993135842e-07,
"logits": -1.20766019821167,
"logps": -81.99567413330078,
"loss": 66.8492,
"objective": 59.73252487182617,
"ranking_idealized": 0.5833333134651184,
"ranking_idealized_expo": 0.4791666567325592,
"ranking_simple": 0.5708333253860474,
"regularize": 0.13541431725025177,
"step": 785,
"wo_beta": 15.272583961486816
},
{
"dpo_loss": 0.5247431993484497,
"epoch": 2.2390174775625886,
"grad_norm": 12425.328833284808,
"learning_rate": 6.728724407489553e-07,
"logits": -1.205735445022583,
"logps": -82.88821411132812,
"loss": 66.8893,
"objective": 59.76982498168945,
"ranking_idealized": 0.637499988079071,
"ranking_idealized_expo": 0.5625,
"ranking_simple": 0.637499988079071,
"regularize": 0.14265631139278412,
"step": 790,
"wo_beta": 15.509627342224121
},
{
"dpo_loss": 0.5296502113342285,
"epoch": 2.2531884742560226,
"grad_norm": 11978.127680414538,
"learning_rate": 6.682114820793074e-07,
"logits": -1.2859066724777222,
"logps": -84.08002471923828,
"loss": 63.7577,
"objective": 59.34935760498047,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.612500011920929,
"regularize": 0.14149998128414154,
"step": 795,
"wo_beta": 14.346338272094727
},
{
"dpo_loss": 0.5199058651924133,
"epoch": 2.2673594709494567,
"grad_norm": 12421.855115848897,
"learning_rate": 6.635339816587108e-07,
"logits": -1.3125,
"logps": -83.2691879272461,
"loss": 66.0375,
"objective": 66.00747680664062,
"ranking_idealized": 0.5916666388511658,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.6041666865348816,
"regularize": 0.14774902164936066,
"step": 800,
"wo_beta": 14.81782341003418
},
{
"epoch": 2.2673594709494567,
"eval_dpo_loss": 0.6780735850334167,
"eval_logits": -1.3137409687042236,
"eval_logps": -88.58743286132812,
"eval_loss": 186.72210693359375,
"eval_objective": 181.93551635742188,
"eval_ranking_idealized": 0.6024844646453857,
"eval_ranking_idealized_expo": 0.5232919454574585,
"eval_ranking_simple": 0.533643901348114,
"eval_regularize": 0.4137285053730011,
"eval_runtime": 481.1373,
"eval_samples_per_second": 12.034,
"eval_steps_per_second": 1.004,
"eval_wo_beta": 15.987866401672363,
"step": 800
},
{
"dpo_loss": 0.5357646942138672,
"epoch": 2.2815304676428907,
"grad_norm": 12458.299460461743,
"learning_rate": 6.588403994679354e-07,
"logits": -1.319643497467041,
"logps": -81.99591827392578,
"loss": 60.8943,
"objective": 64.13407135009766,
"ranking_idealized": 0.5833333134651184,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.574999988079071,
"regularize": 0.13611546158790588,
"step": 805,
"wo_beta": 16.0935001373291
},
{
"dpo_loss": 0.5283416509628296,
"epoch": 2.295701464336325,
"grad_norm": 12943.281420533918,
"learning_rate": 6.541311970692162e-07,
"logits": -1.4129080772399902,
"logps": -81.64440155029297,
"loss": 61.2974,
"objective": 61.06173324584961,
"ranking_idealized": 0.6458333134651184,
"ranking_idealized_expo": 0.5458333492279053,
"ranking_simple": 0.6499999761581421,
"regularize": 0.13648824393749237,
"step": 810,
"wo_beta": 14.963865280151367
},
{
"dpo_loss": 0.5351440906524658,
"epoch": 2.309872461029759,
"grad_norm": 12894.991014128658,
"learning_rate": 6.494068375608646e-07,
"logits": -1.352980136871338,
"logps": -83.23399353027344,
"loss": 60.8069,
"objective": 63.9875602722168,
"ranking_idealized": 0.5791666507720947,
"ranking_idealized_expo": 0.47083333134651184,
"ranking_simple": 0.5791666507720947,
"regularize": 0.13894489407539368,
"step": 815,
"wo_beta": 15.326094627380371
},
{
"dpo_loss": 0.5325611233711243,
"epoch": 2.324043457723193,
"grad_norm": 12345.798302601574,
"learning_rate": 6.446677855317264e-07,
"logits": -1.2916339635849,
"logps": -81.8837661743164,
"loss": 59.9305,
"objective": 55.95283126831055,
"ranking_idealized": 0.5874999761581421,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.5708333253860474,
"regularize": 0.12038219720125198,
"step": 820,
"wo_beta": 15.182144165039062
},
{
"dpo_loss": 0.5271125435829163,
"epoch": 2.3382144544166272,
"grad_norm": 12783.217599288302,
"learning_rate": 6.39914507015496e-07,
"logits": -1.3013333082199097,
"logps": -81.13337707519531,
"loss": 58.233,
"objective": 62.38441467285156,
"ranking_idealized": 0.625,
"ranking_idealized_expo": 0.550000011920929,
"ranking_simple": 0.612500011920929,
"regularize": 0.14106927812099457,
"step": 825,
"wo_beta": 16.586782455444336
},
{
"dpo_loss": 0.5309893488883972,
"epoch": 2.3523854511100613,
"grad_norm": 14368.93982814313,
"learning_rate": 6.351474694448864e-07,
"logits": -1.2905962467193604,
"logps": -83.69612121582031,
"loss": 59.3517,
"objective": 62.03671646118164,
"ranking_idealized": 0.6416666507720947,
"ranking_idealized_expo": 0.5416666865348816,
"ranking_simple": 0.6416666507720947,
"regularize": 0.13450145721435547,
"step": 830,
"wo_beta": 16.384456634521484
},
{
"dpo_loss": 0.5386961102485657,
"epoch": 2.3665564478034957,
"grad_norm": 12278.034874198123,
"learning_rate": 6.303671416056621e-07,
"logits": -1.2532858848571777,
"logps": -83.63367462158203,
"loss": 63.5605,
"objective": 61.1205940246582,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.6041666865348816,
"regularize": 0.1340387463569641,
"step": 835,
"wo_beta": 15.07408618927002
},
{
"dpo_loss": 0.5518457293510437,
"epoch": 2.3807274444969297,
"grad_norm": 12325.077561512098,
"learning_rate": 6.255739935905395e-07,
"logits": -1.222998023033142,
"logps": -83.31403350830078,
"loss": 56.4779,
"objective": 54.8234977722168,
"ranking_idealized": 0.5249999761581421,
"ranking_idealized_expo": 0.4749999940395355,
"ranking_simple": 0.5041666626930237,
"regularize": 0.12345383316278458,
"step": 840,
"wo_beta": 15.817675590515137
},
{
"dpo_loss": 0.5455399751663208,
"epoch": 2.3948984411903638,
"grad_norm": 14534.352470484577,
"learning_rate": 6.207684967529592e-07,
"logits": -1.2789770364761353,
"logps": -84.17676544189453,
"loss": 61.3501,
"objective": 56.92399978637695,
"ranking_idealized": 0.5958333611488342,
"ranking_idealized_expo": 0.5416666865348816,
"ranking_simple": 0.5958333611488342,
"regularize": 0.12513183057308197,
"step": 845,
"wo_beta": 16.274921417236328
},
{
"dpo_loss": 0.5384249091148376,
"epoch": 2.409069437883798,
"grad_norm": 11975.067630184618,
"learning_rate": 6.159511236607315e-07,
"logits": -1.3067547082901,
"logps": -81.92616271972656,
"loss": 55.6773,
"objective": 53.89519500732422,
"ranking_idealized": 0.5874999761581421,
"ranking_idealized_expo": 0.49166667461395264,
"ranking_simple": 0.5541666746139526,
"regularize": 0.12293127924203873,
"step": 850,
"wo_beta": 15.953167915344238
},
{
"epoch": 2.409069437883798,
"eval_dpo_loss": 0.680902361869812,
"eval_logits": -1.311160922050476,
"eval_logps": -88.26885986328125,
"eval_loss": 189.5397491455078,
"eval_objective": 185.2095947265625,
"eval_ranking_idealized": 0.6024844646453857,
"eval_ranking_idealized_expo": 0.5232919454574585,
"eval_ranking_simple": 0.5300207138061523,
"eval_regularize": 0.42031434178352356,
"eval_runtime": 490.419,
"eval_samples_per_second": 11.806,
"eval_steps_per_second": 0.985,
"eval_wo_beta": 15.931052207946777,
"step": 850
},
{
"dpo_loss": 0.5426651239395142,
"epoch": 2.423240434577232,
"grad_norm": 13056.278516188751,
"learning_rate": 6.111223480495671e-07,
"logits": -1.3305928707122803,
"logps": -80.8778076171875,
"loss": 60.7771,
"objective": 57.707275390625,
"ranking_idealized": 0.5916666388511658,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.5916666388511658,
"regularize": 0.1298539638519287,
"step": 855,
"wo_beta": 15.123750686645508
},
{
"dpo_loss": 0.537179172039032,
"epoch": 2.4374114312706663,
"grad_norm": 13276.37666715339,
"learning_rate": 6.062826447764883e-07,
"logits": -1.2815066576004028,
"logps": -82.55672454833984,
"loss": 55.8238,
"objective": 53.87760925292969,
"ranking_idealized": 0.5874999761581421,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.6000000238418579,
"regularize": 0.12577569484710693,
"step": 860,
"wo_beta": 16.197458267211914
},
{
"dpo_loss": 0.53245609998703,
"epoch": 2.4515824279641003,
"grad_norm": 13115.296464572477,
"learning_rate": 6.014324897731333e-07,
"logits": -1.305693507194519,
"logps": -81.65880584716797,
"loss": 57.2162,
"objective": 57.622314453125,
"ranking_idealized": 0.5791666507720947,
"ranking_idealized_expo": 0.4958333373069763,
"ranking_simple": 0.5708333253860474,
"regularize": 0.12618619203567505,
"step": 865,
"wo_beta": 16.600849151611328
},
{
"dpo_loss": 0.53475421667099,
"epoch": 2.4657534246575343,
"grad_norm": 13057.72282671728,
"learning_rate": 5.965723599989528e-07,
"logits": -1.347506046295166,
"logps": -82.02439880371094,
"loss": 59.1596,
"objective": 58.05669403076172,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.5916666388511658,
"regularize": 0.12966732680797577,
"step": 870,
"wo_beta": 15.612308502197266
},
{
"dpo_loss": 0.5284960865974426,
"epoch": 2.4799244213509684,
"grad_norm": 13136.725552830958,
"learning_rate": 5.917027333943072e-07,
"logits": -1.2931278944015503,
"logps": -82.28563690185547,
"loss": 52.9771,
"objective": 52.34040069580078,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.5708333253860474,
"ranking_simple": 0.6041666865348816,
"regularize": 0.12045804411172867,
"step": 875,
"wo_beta": 17.299848556518555
},
{
"dpo_loss": 0.5465295910835266,
"epoch": 2.4940954180444024,
"grad_norm": 12177.118012490373,
"learning_rate": 5.868240888334652e-07,
"logits": -1.206485390663147,
"logps": -82.52658081054688,
"loss": 59.4905,
"objective": 58.06962203979492,
"ranking_idealized": 0.5625,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.574999988079071,
"regularize": 0.13045351207256317,
"step": 880,
"wo_beta": 17.634618759155273
},
{
"dpo_loss": 0.5378908514976501,
"epoch": 2.5082664147378364,
"grad_norm": 12653.821371026783,
"learning_rate": 5.819369060775124e-07,
"logits": -1.3703595399856567,
"logps": -81.24169921875,
"loss": 54.0377,
"objective": 55.50392150878906,
"ranking_idealized": 0.5916666388511658,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.6000000238418579,
"regularize": 0.1243302971124649,
"step": 885,
"wo_beta": 16.991498947143555
},
{
"dpo_loss": 0.521662712097168,
"epoch": 2.5224374114312704,
"grad_norm": 13224.96582542829,
"learning_rate": 5.770416657271728e-07,
"logits": -1.2803348302841187,
"logps": -80.2920913696289,
"loss": 54.9019,
"objective": 55.66249084472656,
"ranking_idealized": 0.5874999761581421,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.5833333134651184,
"regularize": 0.12915806472301483,
"step": 890,
"wo_beta": 14.390849113464355
},
{
"dpo_loss": 0.538814902305603,
"epoch": 2.536608408124705,
"grad_norm": 13679.562551953088,
"learning_rate": 5.721388491755455e-07,
"logits": -1.2745685577392578,
"logps": -82.53682708740234,
"loss": 55.8587,
"objective": 53.00823211669922,
"ranking_idealized": 0.5458333492279053,
"ranking_idealized_expo": 0.49166667461395264,
"ranking_simple": 0.5458333492279053,
"regularize": 0.12104254215955734,
"step": 895,
"wo_beta": 16.952863693237305
},
{
"dpo_loss": 0.5534180998802185,
"epoch": 2.550779404818139,
"grad_norm": 12973.031921366075,
"learning_rate": 5.67228938560766e-07,
"logits": -1.2396929264068604,
"logps": -81.03583526611328,
"loss": 54.3682,
"objective": 53.294551849365234,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.6000000238418579,
"regularize": 0.12195997685194016,
"step": 900,
"wo_beta": 15.316643714904785
},
{
"epoch": 2.550779404818139,
"eval_dpo_loss": 0.6793311238288879,
"eval_logits": -1.3258877992630005,
"eval_logps": -88.36107635498047,
"eval_loss": 188.23812866210938,
"eval_objective": 184.16783142089844,
"eval_ranking_idealized": 0.6024844646453857,
"eval_ranking_idealized_expo": 0.5232919454574585,
"eval_ranking_simple": 0.5310559272766113,
"eval_regularize": 0.41672980785369873,
"eval_runtime": 486.377,
"eval_samples_per_second": 11.904,
"eval_steps_per_second": 0.993,
"eval_wo_beta": 15.968037605285645,
"step": 900
},
{
"dpo_loss": 0.5381408929824829,
"epoch": 2.564950401511573,
"grad_norm": 11810.259224351357,
"learning_rate": 5.623124167185929e-07,
"logits": -1.3189753293991089,
"logps": -81.03609466552734,
"loss": 51.9527,
"objective": 49.01388931274414,
"ranking_idealized": 0.5708333253860474,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.5541666746139526,
"regularize": 0.11513598263263702,
"step": 905,
"wo_beta": 15.316691398620605
},
{
"dpo_loss": 0.5291991829872131,
"epoch": 2.579121398205007,
"grad_norm": 12343.801160156707,
"learning_rate": 5.573897671349268e-07,
"logits": -1.2955931425094604,
"logps": -83.91735076904297,
"loss": 55.8812,
"objective": 63.70806121826172,
"ranking_idealized": 0.612500011920929,
"ranking_idealized_expo": 0.4958333373069763,
"ranking_simple": 0.5958333611488342,
"regularize": 0.13904932141304016,
"step": 910,
"wo_beta": 16.40995216369629
},
{
"dpo_loss": 0.5379226803779602,
"epoch": 2.593292394898441,
"grad_norm": 12490.177742860027,
"learning_rate": 5.524614738982637e-07,
"logits": -1.4045764207839966,
"logps": -82.5849838256836,
"loss": 55.5769,
"objective": 54.98591613769531,
"ranking_idealized": 0.6291666626930237,
"ranking_idealized_expo": 0.512499988079071,
"ranking_simple": 0.612500011920929,
"regularize": 0.12587417662143707,
"step": 915,
"wo_beta": 15.309656143188477
},
{
"dpo_loss": 0.5216780304908752,
"epoch": 2.6074633915918755,
"grad_norm": 12017.347028460124,
"learning_rate": 5.475280216520912e-07,
"logits": -1.2480995655059814,
"logps": -82.11782836914062,
"loss": 56.8294,
"objective": 57.75908660888672,
"ranking_idealized": 0.6041666865348816,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.6041666865348816,
"regularize": 0.12090341001749039,
"step": 920,
"wo_beta": 16.191049575805664
},
{
"dpo_loss": 0.5298858284950256,
"epoch": 2.6216343882853095,
"grad_norm": 14009.68291839978,
"learning_rate": 5.42589895547229e-07,
"logits": -1.280160665512085,
"logps": -82.20765686035156,
"loss": 53.1774,
"objective": 55.67765426635742,
"ranking_idealized": 0.574999988079071,
"ranking_idealized_expo": 0.4749999940395355,
"ranking_simple": 0.5625,
"regularize": 0.12424833327531815,
"step": 925,
"wo_beta": 16.476573944091797
},
{
"dpo_loss": 0.5387442111968994,
"epoch": 2.6358053849787435,
"grad_norm": 12640.001047074344,
"learning_rate": 5.376475811941191e-07,
"logits": -1.2655282020568848,
"logps": -82.08385467529297,
"loss": 52.6196,
"objective": 55.54609680175781,
"ranking_idealized": 0.5416666865348816,
"ranking_idealized_expo": 0.44583332538604736,
"ranking_simple": 0.5333333611488342,
"regularize": 0.12455514818429947,
"step": 930,
"wo_beta": 16.72053337097168
},
{
"dpo_loss": 0.5488451719284058,
"epoch": 2.6499763816721775,
"grad_norm": 12698.751364257567,
"learning_rate": 5.327015646150716e-07,
"logits": -1.2632043361663818,
"logps": -81.3023910522461,
"loss": 50.4175,
"objective": 51.81110763549805,
"ranking_idealized": 0.6499999761581421,
"ranking_idealized_expo": 0.574999988079071,
"ranking_simple": 0.6291666626930237,
"regularize": 0.1139976978302002,
"step": 935,
"wo_beta": 16.381933212280273
},
{
"dpo_loss": 0.518785834312439,
"epoch": 2.6641473783656116,
"grad_norm": 14057.06029309221,
"learning_rate": 5.277523321964701e-07,
"logits": -1.3097693920135498,
"logps": -84.61360931396484,
"loss": 52.2129,
"objective": 56.00838088989258,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.6041666865348816,
"regularize": 0.11512833088636398,
"step": 940,
"wo_beta": 17.616283416748047
},
{
"dpo_loss": 0.5271897912025452,
"epoch": 2.678318375059046,
"grad_norm": 13084.001689574132,
"learning_rate": 5.228003706409409e-07,
"logits": -1.3481143712997437,
"logps": -83.27128601074219,
"loss": 49.6737,
"objective": 52.79602813720703,
"ranking_idealized": 0.5666666626930237,
"ranking_idealized_expo": 0.48750001192092896,
"ranking_simple": 0.5541666746139526,
"regularize": 0.11426286399364471,
"step": 945,
"wo_beta": 16.029043197631836
},
{
"dpo_loss": 0.5474939942359924,
"epoch": 2.69248937175248,
"grad_norm": 13821.932425093552,
"learning_rate": 5.178461669194903e-07,
"logits": -1.2337779998779297,
"logps": -83.05430603027344,
"loss": 50.3775,
"objective": 45.27042007446289,
"ranking_idealized": 0.637499988079071,
"ranking_idealized_expo": 0.5375000238418579,
"ranking_simple": 0.625,
"regularize": 0.10929083079099655,
"step": 950,
"wo_beta": 15.533432006835938
},
{
"epoch": 2.69248937175248,
"eval_dpo_loss": 0.6802442669868469,
"eval_logits": -1.3090835809707642,
"eval_logps": -88.80048370361328,
"eval_loss": 189.54185485839844,
"eval_objective": 185.00436401367188,
"eval_ranking_idealized": 0.6024844646453857,
"eval_ranking_idealized_expo": 0.5232919454574585,
"eval_ranking_simple": 0.5331262946128845,
"eval_regularize": 0.418261855840683,
"eval_runtime": 491.779,
"eval_samples_per_second": 11.774,
"eval_steps_per_second": 0.982,
"eval_wo_beta": 15.998626708984375,
"step": 950
},
{
"dpo_loss": 0.5236973166465759,
"epoch": 2.706660368445914,
"grad_norm": 13266.227245283348,
"learning_rate": 5.128902082236175e-07,
"logits": -1.319283127784729,
"logps": -82.27372741699219,
"loss": 46.7135,
"objective": 43.35396194458008,
"ranking_idealized": 0.5874999761581421,
"ranking_idealized_expo": 0.4749999940395355,
"ranking_simple": 0.574999988079071,
"regularize": 0.10942530632019043,
"step": 955,
"wo_beta": 14.039530754089355
},
{
"dpo_loss": 0.5411895513534546,
"epoch": 2.720831365139348,
"grad_norm": 13668.800292035428,
"learning_rate": 5.07932981917404e-07,
"logits": -1.2875874042510986,
"logps": -81.88396453857422,
"loss": 53.1799,
"objective": 54.5617561340332,
"ranking_idealized": 0.5791666507720947,
"ranking_idealized_expo": 0.49166667461395264,
"ranking_simple": 0.5791666507720947,
"regularize": 0.11944962292909622,
"step": 960,
"wo_beta": 16.39274787902832
},
{
"dpo_loss": 0.5236133933067322,
"epoch": 2.735002361832782,
"grad_norm": 12761.114664799663,
"learning_rate": 5.029749754895868e-07,
"logits": -1.306726098060608,
"logps": -82.27013397216797,
"loss": 49.2644,
"objective": 47.3409309387207,
"ranking_idealized": 0.6416666507720947,
"ranking_idealized_expo": 0.5666666626930237,
"ranking_simple": 0.6291666626930237,
"regularize": 0.10921643674373627,
"step": 965,
"wo_beta": 15.65440559387207
},
{
"dpo_loss": 0.5498754382133484,
"epoch": 2.7491733585262166,
"grad_norm": 12565.339155193906,
"learning_rate": 4.980166765056193e-07,
"logits": -1.3193691968917847,
"logps": -83.46347045898438,
"loss": 52.7234,
"objective": 56.7745246887207,
"ranking_idealized": 0.6208333373069763,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.6083333492279053,
"regularize": 0.13472451269626617,
"step": 970,
"wo_beta": 15.647109031677246
},
{
"dpo_loss": 0.5260103344917297,
"epoch": 2.7633443552196506,
"grad_norm": 13363.677196616523,
"learning_rate": 4.930585725597247e-07,
"logits": -1.240022897720337,
"logps": -81.51500701904297,
"loss": 50.997,
"objective": 53.95423889160156,
"ranking_idealized": 0.6333333253860474,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.625,
"regularize": 0.12574762105941772,
"step": 975,
"wo_beta": 16.371328353881836
},
{
"dpo_loss": 0.5399420261383057,
"epoch": 2.7775153519130846,
"grad_norm": 13969.44472204385,
"learning_rate": 4.881011512269463e-07,
"logits": -1.35780930519104,
"logps": -81.2794189453125,
"loss": 51.6737,
"objective": 55.6290283203125,
"ranking_idealized": 0.5916666388511658,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.5666666626930237,
"regularize": 0.12999171018600464,
"step": 980,
"wo_beta": 14.558424949645996
},
{
"dpo_loss": 0.5281099677085876,
"epoch": 2.7916863486065187,
"grad_norm": 11586.92970672364,
"learning_rate": 4.831449000151997e-07,
"logits": -1.205262303352356,
"logps": -79.56948852539062,
"loss": 49.5107,
"objective": 46.61149597167969,
"ranking_idealized": 0.5791666507720947,
"ranking_idealized_expo": 0.48750001192092896,
"ranking_simple": 0.5791666507720947,
"regularize": 0.10813379287719727,
"step": 985,
"wo_beta": 14.642525672912598
},
{
"dpo_loss": 0.5295001268386841,
"epoch": 2.8058573452999527,
"grad_norm": 12278.903797254565,
"learning_rate": 4.78190306317332e-07,
"logits": -1.268909215927124,
"logps": -82.44329071044922,
"loss": 47.3581,
"objective": 51.39979553222656,
"ranking_idealized": 0.5666666626930237,
"ranking_idealized_expo": 0.4749999940395355,
"ranking_simple": 0.5708333253860474,
"regularize": 0.11149868369102478,
"step": 990,
"wo_beta": 16.07427406311035
},
{
"dpo_loss": 0.5399483442306519,
"epoch": 2.820028341993387,
"grad_norm": 12982.312529844054,
"learning_rate": 4.732378573631924e-07,
"logits": -1.3312995433807373,
"logps": -80.66969299316406,
"loss": 49.758,
"objective": 55.4227409362793,
"ranking_idealized": 0.6208333373069763,
"ranking_idealized_expo": 0.5458333492279053,
"ranking_simple": 0.6166666746139526,
"regularize": 0.12711945176124573,
"step": 995,
"wo_beta": 16.746198654174805
},
{
"dpo_loss": 0.526489794254303,
"epoch": 2.8341993386868207,
"grad_norm": 12173.86125870911,
"learning_rate": 4.682880401717177e-07,
"logits": -1.271032691001892,
"logps": -79.56470489501953,
"loss": 45.9449,
"objective": 40.13682174682617,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.6166666746139526,
"regularize": 0.09338556975126266,
"step": 1000,
"wo_beta": 15.067657470703125
},
{
"epoch": 2.8341993386868207,
"eval_dpo_loss": 0.6791692972183228,
"eval_logits": -1.2989623546600342,
"eval_logps": -87.81481170654297,
"eval_loss": 187.70785522460938,
"eval_objective": 183.56761169433594,
"eval_ranking_idealized": 0.6024844646453857,
"eval_ranking_idealized_expo": 0.5232919454574585,
"eval_ranking_simple": 0.5300207138061523,
"eval_regularize": 0.4161270260810852,
"eval_runtime": 491.2083,
"eval_samples_per_second": 11.787,
"eval_steps_per_second": 0.983,
"eval_wo_beta": 15.995977401733398,
"step": 1000
},
{
"dpo_loss": 0.5403110384941101,
"epoch": 2.848370335380255,
"grad_norm": 13425.378037887134,
"learning_rate": 4.633413415030401e-07,
"logits": -1.2654575109481812,
"logps": -80.49606323242188,
"loss": 48.7319,
"objective": 47.16264724731445,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.512499988079071,
"ranking_simple": 0.6000000238418579,
"regularize": 0.10651734471321106,
"step": 1005,
"wo_beta": 16.28557586669922
},
{
"dpo_loss": 0.5306838750839233,
"epoch": 2.862541332073689,
"grad_norm": 13143.964606052063,
"learning_rate": 4.5839824781061886e-07,
"logits": -1.32563316822052,
"logps": -81.29505920410156,
"loss": 51.8292,
"objective": 49.8996467590332,
"ranking_idealized": 0.612500011920929,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.6083333492279053,
"regularize": 0.11315880715847015,
"step": 1010,
"wo_beta": 15.957425117492676
},
{
"dpo_loss": 0.5331242084503174,
"epoch": 2.8767123287671232,
"grad_norm": 12600.166168740529,
"learning_rate": 4.53459245193404e-07,
"logits": -1.2467234134674072,
"logps": -80.21656799316406,
"loss": 44.7609,
"objective": 42.55329895019531,
"ranking_idealized": 0.6541666388511658,
"ranking_idealized_expo": 0.550000011920929,
"ranking_simple": 0.6333333253860474,
"regularize": 0.09937479346990585,
"step": 1015,
"wo_beta": 15.586889266967773
},
{
"dpo_loss": 0.5346752405166626,
"epoch": 2.8908833254605573,
"grad_norm": 14111.243992297606,
"learning_rate": 4.4852481934803277e-07,
"logits": -1.2140835523605347,
"logps": -82.13688659667969,
"loss": 46.0337,
"objective": 43.36848831176758,
"ranking_idealized": 0.5833333134651184,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5708333253860474,
"regularize": 0.09756989777088165,
"step": 1020,
"wo_beta": 18.553333282470703
},
{
"dpo_loss": 0.5420379042625427,
"epoch": 2.9050543221539913,
"grad_norm": 12276.868793163067,
"learning_rate": 4.435954555210676e-07,
"logits": -1.3084660768508911,
"logps": -81.93505096435547,
"loss": 46.0381,
"objective": 48.77103042602539,
"ranking_idealized": 0.6208333373069763,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.612500011920929,
"regularize": 0.10658075660467148,
"step": 1025,
"wo_beta": 15.018412590026855
},
{
"dpo_loss": 0.5342170000076294,
"epoch": 2.9192253188474258,
"grad_norm": 12677.814826562366,
"learning_rate": 4.3867163846127674e-07,
"logits": -1.3350425958633423,
"logps": -81.84678649902344,
"loss": 47.2693,
"objective": 41.97852325439453,
"ranking_idealized": 0.5416666865348816,
"ranking_idealized_expo": 0.4791666567325592,
"ranking_simple": 0.5458333492279053,
"regularize": 0.09485388547182083,
"step": 1030,
"wo_beta": 16.526702880859375
},
{
"dpo_loss": 0.5289677977561951,
"epoch": 2.9333963155408598,
"grad_norm": 13399.69328236257,
"learning_rate": 4.3375385237196507e-07,
"logits": -1.3010871410369873,
"logps": -82.80349731445312,
"loss": 43.5011,
"objective": 41.88113784790039,
"ranking_idealized": 0.625,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.6083333492279053,
"regularize": 0.09509758651256561,
"step": 1035,
"wo_beta": 14.980511665344238
},
{
"dpo_loss": 0.5463218688964844,
"epoch": 2.947567312234294,
"grad_norm": 12072.270375502065,
"learning_rate": 4.2884258086335745e-07,
"logits": -1.2975058555603027,
"logps": -82.66610717773438,
"loss": 45.0537,
"objective": 48.81401062011719,
"ranking_idealized": 0.5708333253860474,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.5583333373069763,
"regularize": 0.10974690318107605,
"step": 1040,
"wo_beta": 16.447132110595703
},
{
"dpo_loss": 0.5381548404693604,
"epoch": 2.961738308927728,
"grad_norm": 13887.433179664138,
"learning_rate": 4.2393830690504165e-07,
"logits": -1.2503575086593628,
"logps": -84.04967498779297,
"loss": 44.8665,
"objective": 42.995948791503906,
"ranking_idealized": 0.6208333373069763,
"ranking_idealized_expo": 0.5666666626930237,
"ranking_simple": 0.6208333373069763,
"regularize": 0.09885497391223907,
"step": 1045,
"wo_beta": 18.053199768066406
},
{
"dpo_loss": 0.5348830819129944,
"epoch": 2.975909305621162,
"grad_norm": 13502.021630049758,
"learning_rate": 4.1904151277847305e-07,
"logits": -1.2968212366104126,
"logps": -79.87500762939453,
"loss": 49.0003,
"objective": 50.04111862182617,
"ranking_idealized": 0.6541666388511658,
"ranking_idealized_expo": 0.5541666746139526,
"ranking_simple": 0.6458333134651184,
"regularize": 0.11395598948001862,
"step": 1050,
"wo_beta": 15.20615291595459
},
{
"epoch": 2.975909305621162,
"eval_dpo_loss": 0.6791855692863464,
"eval_logits": -1.2778165340423584,
"eval_logps": -88.30037689208984,
"eval_loss": 188.00396728515625,
"eval_objective": 184.00155639648438,
"eval_ranking_idealized": 0.6024844646453857,
"eval_ranking_idealized_expo": 0.5232919454574585,
"eval_ranking_simple": 0.5341615080833435,
"eval_regularize": 0.41730284690856934,
"eval_runtime": 486.4753,
"eval_samples_per_second": 11.902,
"eval_steps_per_second": 0.993,
"eval_wo_beta": 16.040319442749023,
"step": 1050
},
{
"dpo_loss": 0.5416039824485779,
"epoch": 2.9900803023145963,
"grad_norm": 13186.167879544177,
"learning_rate": 4.141526800295481e-07,
"logits": -1.2704575061798096,
"logps": -81.0667724609375,
"loss": 43.7316,
"objective": 46.92390441894531,
"ranking_idealized": 0.6083333492279053,
"ranking_idealized_expo": 0.550000011920929,
"ranking_simple": 0.6041666865348816,
"regularize": 0.11228723078966141,
"step": 1055,
"wo_beta": 15.320064544677734
},
{
"dpo_loss": 0.5175911784172058,
"epoch": 3.0042512990080303,
"grad_norm": 11979.18084085825,
"learning_rate": 4.092722894212487e-07,
"logits": -1.291445255279541,
"logps": -82.69534301757812,
"loss": 44.4026,
"objective": 47.78953552246094,
"ranking_idealized": 0.6083333492279053,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.612500011920929,
"regularize": 0.10625550150871277,
"step": 1060,
"wo_beta": 15.794866561889648
},
{
"dpo_loss": 0.523690938949585,
"epoch": 3.0184222957014644,
"grad_norm": 12600.45509733284,
"learning_rate": 4.0440082088636546e-07,
"logits": -1.3265612125396729,
"logps": -84.14775848388672,
"loss": 41.3718,
"objective": 38.99584197998047,
"ranking_idealized": 0.5916666388511658,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.5958333611488342,
"regularize": 0.09401161223649979,
"step": 1065,
"wo_beta": 16.806358337402344
},
{
"dpo_loss": 0.5429927706718445,
"epoch": 3.0325932923948984,
"grad_norm": 13459.06076930384,
"learning_rate": 3.995387534803005e-07,
"logits": -1.2817329168319702,
"logps": -81.6548080444336,
"loss": 44.6891,
"objective": 43.239158630371094,
"ranking_idealized": 0.637499988079071,
"ranking_idealized_expo": 0.5791666507720947,
"ranking_simple": 0.625,
"regularize": 0.09600695967674255,
"step": 1070,
"wo_beta": 17.19818878173828
},
{
"dpo_loss": 0.5399213433265686,
"epoch": 3.0467642890883324,
"grad_norm": 12451.835928919867,
"learning_rate": 3.9468656533395934e-07,
"logits": -1.2840524911880493,
"logps": -81.64595031738281,
"loss": 38.4816,
"objective": 40.692039489746094,
"ranking_idealized": 0.5874999761581421,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.5833333134651184,
"regularize": 0.09315841645002365,
"step": 1075,
"wo_beta": 15.41653060913086
},
{
"dpo_loss": 0.5243366360664368,
"epoch": 3.0609352857817664,
"grad_norm": 12956.687806008335,
"learning_rate": 3.8984473360672967e-07,
"logits": -1.3753383159637451,
"logps": -82.9805908203125,
"loss": 40.18,
"objective": 39.79288864135742,
"ranking_idealized": 0.6416666507720947,
"ranking_idealized_expo": 0.550000011920929,
"ranking_simple": 0.6291666626930237,
"regularize": 0.09019829332828522,
"step": 1080,
"wo_beta": 17.60961151123047
},
{
"dpo_loss": 0.5346547365188599,
"epoch": 3.075106282475201,
"grad_norm": 12876.9232360082,
"learning_rate": 3.850137344395598e-07,
"logits": -1.318056344985962,
"logps": -83.30501556396484,
"loss": 39.6664,
"objective": 41.40624237060547,
"ranking_idealized": 0.550000011920929,
"ranking_idealized_expo": 0.49166667461395264,
"ranking_simple": 0.5458333492279053,
"regularize": 0.0875302404165268,
"step": 1085,
"wo_beta": 15.289043426513672
},
{
"dpo_loss": 0.5314586162567139,
"epoch": 3.089277279168635,
"grad_norm": 12423.675708081033,
"learning_rate": 3.801940429081345e-07,
"logits": -1.297440767288208,
"logps": -81.59999084472656,
"loss": 40.7964,
"objective": 42.56759262084961,
"ranking_idealized": 0.6083333492279053,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.6291666626930237,
"regularize": 0.09419377893209457,
"step": 1090,
"wo_beta": 15.919710159301758
},
{
"dpo_loss": 0.5284194946289062,
"epoch": 3.103448275862069,
"grad_norm": 12843.979452626416,
"learning_rate": 3.7538613297615706e-07,
"logits": -1.2590415477752686,
"logps": -83.42412567138672,
"loss": 40.9535,
"objective": 44.701377868652344,
"ranking_idealized": 0.625,
"ranking_idealized_expo": 0.5708333253860474,
"ranking_simple": 0.612500011920929,
"regularize": 0.10055555403232574,
"step": 1095,
"wo_beta": 16.360620498657227
},
{
"dpo_loss": 0.5163142681121826,
"epoch": 3.117619272555503,
"grad_norm": 11098.073660723994,
"learning_rate": 3.7059047744873955e-07,
"logits": -1.2521919012069702,
"logps": -82.35820770263672,
"loss": 40.2428,
"objective": 41.402366638183594,
"ranking_idealized": 0.5874999761581421,
"ranking_idealized_expo": 0.4958333373069763,
"ranking_simple": 0.5833333134651184,
"regularize": 0.08817121386528015,
"step": 1100,
"wo_beta": 16.056493759155273
},
{
"epoch": 3.117619272555503,
"eval_dpo_loss": 0.680143415927887,
"eval_logits": -1.2988417148590088,
"eval_logps": -88.64698028564453,
"eval_loss": 188.7165985107422,
"eval_objective": 184.38153076171875,
"eval_ranking_idealized": 0.6024844646453857,
"eval_ranking_idealized_expo": 0.5232919454574585,
"eval_ranking_simple": 0.532608687877655,
"eval_regularize": 0.41805195808410645,
"eval_runtime": 486.8996,
"eval_samples_per_second": 11.892,
"eval_steps_per_second": 0.992,
"eval_wo_beta": 15.998079299926758,
"step": 1100
},
{
"dpo_loss": 0.532370924949646,
"epoch": 3.131790269248937,
"grad_norm": 12884.072735206462,
"learning_rate": 3.658075479259087e-07,
"logits": -1.3051170110702515,
"logps": -82.9980239868164,
"loss": 43.5912,
"objective": 42.78650665283203,
"ranking_idealized": 0.5833333134651184,
"ranking_idealized_expo": 0.512499988079071,
"ranking_simple": 0.5666666626930237,
"regularize": 0.09705787152051926,
"step": 1105,
"wo_beta": 17.55166244506836
},
{
"dpo_loss": 0.5135348439216614,
"epoch": 3.1459612659423715,
"grad_norm": 13710.402810117148,
"learning_rate": 3.6103781475622786e-07,
"logits": -1.2103074789047241,
"logps": -83.2777328491211,
"loss": 35.6812,
"objective": 35.80618667602539,
"ranking_idealized": 0.612500011920929,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.6083333492279053,
"regularize": 0.07978586852550507,
"step": 1110,
"wo_beta": 16.995450973510742
},
{
"dpo_loss": 0.5229103565216064,
"epoch": 3.1601322626358055,
"grad_norm": 12411.913045675534,
"learning_rate": 3.562817469905442e-07,
"logits": -1.2619822025299072,
"logps": -82.1358642578125,
"loss": 38.5951,
"objective": 36.70951461791992,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.6083333492279053,
"regularize": 0.08537817001342773,
"step": 1115,
"wo_beta": 16.52168846130371
},
{
"dpo_loss": 0.5169024467468262,
"epoch": 3.1743032593292395,
"grad_norm": 12747.527049209308,
"learning_rate": 3.5153981233586274e-07,
"logits": -1.2052761316299438,
"logps": -80.89930725097656,
"loss": 35.9412,
"objective": 35.01757049560547,
"ranking_idealized": 0.612500011920929,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.6083333492279053,
"regularize": 0.08051317185163498,
"step": 1120,
"wo_beta": 15.474043846130371
},
{
"dpo_loss": 0.5393829941749573,
"epoch": 3.1884742560226735,
"grad_norm": 13071.804290926188,
"learning_rate": 3.468124771093519e-07,
"logits": -1.263301134109497,
"logps": -83.0383529663086,
"loss": 37.8478,
"objective": 38.899776458740234,
"ranking_idealized": 0.612500011920929,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.6000000238418579,
"regularize": 0.09170109778642654,
"step": 1125,
"wo_beta": 15.088132858276367
},
{
"dpo_loss": 0.5208443999290466,
"epoch": 3.2026452527161076,
"grad_norm": 13439.120791203995,
"learning_rate": 3.421002061924876e-07,
"logits": -1.298660159111023,
"logps": -82.7750473022461,
"loss": 34.6631,
"objective": 33.578922271728516,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.5958333611488342,
"regularize": 0.07489284873008728,
"step": 1130,
"wo_beta": 15.427777290344238
},
{
"dpo_loss": 0.5245645642280579,
"epoch": 3.2168162494095416,
"grad_norm": 11480.67381753106,
"learning_rate": 3.374034629853356e-07,
"logits": -1.3043017387390137,
"logps": -80.89866638183594,
"loss": 35.7927,
"objective": 35.20330047607422,
"ranking_idealized": 0.5291666388511658,
"ranking_idealized_expo": 0.4625000059604645,
"ranking_simple": 0.5249999761581421,
"regularize": 0.08528413623571396,
"step": 1135,
"wo_beta": 16.220800399780273
},
{
"dpo_loss": 0.5402042269706726,
"epoch": 3.230987246102976,
"grad_norm": 12946.274800579084,
"learning_rate": 3.327227093609824e-07,
"logits": -1.1506885290145874,
"logps": -81.15502166748047,
"loss": 40.5475,
"objective": 40.8009033203125,
"ranking_idealized": 0.5958333611488342,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.5833333134651184,
"regularize": 0.09735415130853653,
"step": 1140,
"wo_beta": 16.101863861083984
},
{
"dpo_loss": 0.5243603587150574,
"epoch": 3.24515824279641,
"grad_norm": 13000.005011572795,
"learning_rate": 3.2805840562011465e-07,
"logits": -1.2146347761154175,
"logps": -83.07351684570312,
"loss": 40.1207,
"objective": 42.64434814453125,
"ranking_idealized": 0.6541666388511658,
"ranking_idealized_expo": 0.550000011920929,
"ranking_simple": 0.6333333253860474,
"regularize": 0.09260058403015137,
"step": 1145,
"wo_beta": 15.928021430969238
},
{
"dpo_loss": 0.5314944386482239,
"epoch": 3.259329239489844,
"grad_norm": 12558.545529727347,
"learning_rate": 3.234110104457536e-07,
"logits": -1.352626085281372,
"logps": -80.92655181884766,
"loss": 37.177,
"objective": 37.67503356933594,
"ranking_idealized": 0.5874999761581421,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.5874999761581421,
"regularize": 0.08233184367418289,
"step": 1150,
"wo_beta": 14.541799545288086
},
{
"epoch": 3.259329239489844,
"eval_dpo_loss": 0.6804221868515015,
"eval_logits": -1.2842507362365723,
"eval_logps": -87.92387390136719,
"eval_loss": 188.25633239746094,
"eval_objective": 184.33511352539062,
"eval_ranking_idealized": 0.6024844646453857,
"eval_ranking_idealized_expo": 0.5232919454574585,
"eval_ranking_simple": 0.5357142686843872,
"eval_regularize": 0.4183206856250763,
"eval_runtime": 488.1945,
"eval_samples_per_second": 11.86,
"eval_steps_per_second": 0.989,
"eval_wo_beta": 16.0123348236084,
"step": 1150
},
{
"dpo_loss": 0.5527331829071045,
"epoch": 3.273500236183278,
"grad_norm": 12600.213804572502,
"learning_rate": 3.187809808581492e-07,
"logits": -1.225222110748291,
"logps": -80.92967987060547,
"loss": 37.9886,
"objective": 43.58564376831055,
"ranking_idealized": 0.5958333611488342,
"ranking_idealized_expo": 0.5375000238418579,
"ranking_simple": 0.5874999761581421,
"regularize": 0.09985193610191345,
"step": 1155,
"wo_beta": 16.76634407043457
},
{
"dpo_loss": 0.5320346355438232,
"epoch": 3.287671232876712,
"grad_norm": 13215.429208773,
"learning_rate": 3.141687721698363e-07,
"logits": -1.287786602973938,
"logps": -83.13336944580078,
"loss": 34.714,
"objective": 32.02961349487305,
"ranking_idealized": 0.5416666865348816,
"ranking_idealized_expo": 0.4749999940395355,
"ranking_simple": 0.5416666865348816,
"regularize": 0.07429231703281403,
"step": 1160,
"wo_beta": 17.868885040283203
},
{
"dpo_loss": 0.5377687215805054,
"epoch": 3.3018422295701466,
"grad_norm": 12734.199495358569,
"learning_rate": 3.095748379408603e-07,
"logits": -1.3172459602355957,
"logps": -80.96276092529297,
"loss": 34.2009,
"objective": 33.96812057495117,
"ranking_idealized": 0.6333333253860474,
"ranking_idealized_expo": 0.5458333492279053,
"ranking_simple": 0.6166666746139526,
"regularize": 0.08176220953464508,
"step": 1165,
"wo_beta": 15.74937629699707
},
{
"dpo_loss": 0.5269596576690674,
"epoch": 3.3160132262635806,
"grad_norm": 14339.996000811438,
"learning_rate": 3.049996299341742e-07,
"logits": -1.267351746559143,
"logps": -82.11973571777344,
"loss": 34.9879,
"objective": 35.85028076171875,
"ranking_idealized": 0.6041666865348816,
"ranking_idealized_expo": 0.4958333373069763,
"ranking_simple": 0.6041666865348816,
"regularize": 0.08146883547306061,
"step": 1170,
"wo_beta": 15.652009963989258
},
{
"dpo_loss": 0.531762957572937,
"epoch": 3.3301842229570147,
"grad_norm": 12543.440661095656,
"learning_rate": 3.004435980712129e-07,
"logits": -1.257896900177002,
"logps": -82.12284088134766,
"loss": 38.0949,
"objective": 35.93735122680664,
"ranking_idealized": 0.5708333253860474,
"ranking_idealized_expo": 0.4749999940395355,
"ranking_simple": 0.574999988079071,
"regularize": 0.08384241163730621,
"step": 1175,
"wo_beta": 13.72645378112793
},
{
"dpo_loss": 0.5355243682861328,
"epoch": 3.3443552196504487,
"grad_norm": 11718.716469797973,
"learning_rate": 2.959071903876486e-07,
"logits": -1.3486711978912354,
"logps": -82.8729248046875,
"loss": 35.7799,
"objective": 35.360801696777344,
"ranking_idealized": 0.5583333373069763,
"ranking_idealized_expo": 0.49166667461395264,
"ranking_simple": 0.5625,
"regularize": 0.07788892835378647,
"step": 1180,
"wo_beta": 16.274147033691406
},
{
"dpo_loss": 0.5254151225090027,
"epoch": 3.3585262163438827,
"grad_norm": 13437.960403836023,
"learning_rate": 2.913908529893304e-07,
"logits": -1.1963578462600708,
"logps": -83.22509002685547,
"loss": 33.4865,
"objective": 33.50373840332031,
"ranking_idealized": 0.5874999761581421,
"ranking_idealized_expo": 0.48750001192092896,
"ranking_simple": 0.5833333134651184,
"regularize": 0.07612194865942001,
"step": 1185,
"wo_beta": 15.737934112548828
},
{
"dpo_loss": 0.5395456552505493,
"epoch": 3.372697213037317,
"grad_norm": 12206.27505785514,
"learning_rate": 2.86895030008416e-07,
"logits": -1.3092117309570312,
"logps": -81.93521118164062,
"loss": 33.053,
"objective": 29.232421875,
"ranking_idealized": 0.5833333134651184,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.5874999761581421,
"regularize": 0.07262556999921799,
"step": 1190,
"wo_beta": 15.487491607666016
},
{
"dpo_loss": 0.5137616991996765,
"epoch": 3.386868209730751,
"grad_norm": 11921.58688181337,
"learning_rate": 2.824201635596951e-07,
"logits": -1.2198973894119263,
"logps": -82.35958099365234,
"loss": 29.3695,
"objective": 29.94867706298828,
"ranking_idealized": 0.5833333134651184,
"ranking_idealized_expo": 0.48750001192092896,
"ranking_simple": 0.5666666626930237,
"regularize": 0.06865646690130234,
"step": 1195,
"wo_beta": 15.531022071838379
},
{
"dpo_loss": 0.5208079814910889,
"epoch": 3.4010392064241852,
"grad_norm": 12726.149489712327,
"learning_rate": 2.779666936971129e-07,
"logits": -1.3937805891036987,
"logps": -82.6730728149414,
"loss": 34.9809,
"objective": 31.1435489654541,
"ranking_idealized": 0.6208333373069763,
"ranking_idealized_expo": 0.5375000238418579,
"ranking_simple": 0.6166666746139526,
"regularize": 0.07431173324584961,
"step": 1200,
"wo_beta": 18.369197845458984
},
{
"epoch": 3.4010392064241852,
"eval_dpo_loss": 0.6805519461631775,
"eval_logits": -1.289951205253601,
"eval_logps": -88.11286926269531,
"eval_loss": 189.17047119140625,
"eval_objective": 184.87181091308594,
"eval_ranking_idealized": 0.6024844646453857,
"eval_ranking_idealized_expo": 0.5232919454574585,
"eval_ranking_simple": 0.532608687877655,
"eval_regularize": 0.41934508085250854,
"eval_runtime": 498.5381,
"eval_samples_per_second": 11.614,
"eval_steps_per_second": 0.969,
"eval_wo_beta": 15.953052520751953,
"step": 1200
},
{
"dpo_loss": 0.5354551672935486,
"epoch": 3.4152102031176192,
"grad_norm": 12302.298902716244,
"learning_rate": 2.7353505837049583e-07,
"logits": -1.293818712234497,
"logps": -81.88545989990234,
"loss": 33.6714,
"objective": 31.525800704956055,
"ranking_idealized": 0.6333333253860474,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.6208333373069763,
"regularize": 0.07086090743541718,
"step": 1205,
"wo_beta": 15.15488052368164
},
{
"dpo_loss": 0.5308886170387268,
"epoch": 3.4293811998110533,
"grad_norm": 11960.890598119064,
"learning_rate": 2.6912569338248315e-07,
"logits": -1.300658941268921,
"logps": -83.05274200439453,
"loss": 36.2356,
"objective": 35.77425003051758,
"ranking_idealized": 0.5874999761581421,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.5791666507720947,
"regularize": 0.08412022143602371,
"step": 1210,
"wo_beta": 16.733659744262695
},
{
"dpo_loss": 0.528823733329773,
"epoch": 3.4435521965044873,
"grad_norm": 13078.935439317174,
"learning_rate": 2.64739032345671e-07,
"logits": -1.3109962940216064,
"logps": -84.07682037353516,
"loss": 35.0362,
"objective": 32.51463317871094,
"ranking_idealized": 0.5666666626930237,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.5666666626930237,
"regularize": 0.0771927461028099,
"step": 1215,
"wo_beta": 15.212308883666992
},
{
"dpo_loss": 0.540026843547821,
"epoch": 3.4577231931979218,
"grad_norm": 12256.162682293258,
"learning_rate": 2.603755066399718e-07,
"logits": -1.149971842765808,
"logps": -82.9686508178711,
"loss": 33.1832,
"objective": 32.34642028808594,
"ranking_idealized": 0.6041666865348816,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5833333134651184,
"regularize": 0.06764715164899826,
"step": 1220,
"wo_beta": 16.678075790405273
},
{
"dpo_loss": 0.524185061454773,
"epoch": 3.471894189891356,
"grad_norm": 12930.685272364457,
"learning_rate": 2.560355453701919e-07,
"logits": -1.302108645439148,
"logps": -82.00885772705078,
"loss": 33.7294,
"objective": 32.768775939941406,
"ranking_idealized": 0.5666666626930237,
"ranking_idealized_expo": 0.46666666865348816,
"ranking_simple": 0.5541666746139526,
"regularize": 0.0753529891371727,
"step": 1225,
"wo_beta": 16.024269104003906
},
{
"dpo_loss": 0.5251755118370056,
"epoch": 3.48606518658479,
"grad_norm": 12434.433063668528,
"learning_rate": 2.517195753238345e-07,
"logits": -1.325141191482544,
"logps": -82.18378448486328,
"loss": 35.229,
"objective": 33.25638961791992,
"ranking_idealized": 0.6291666626930237,
"ranking_idealized_expo": 0.5416666865348816,
"ranking_simple": 0.625,
"regularize": 0.0771695226430893,
"step": 1230,
"wo_beta": 16.292001724243164
},
{
"dpo_loss": 0.5132806897163391,
"epoch": 3.500236183278224,
"grad_norm": 13558.533453277203,
"learning_rate": 2.474280209291299e-07,
"logits": -1.245792031288147,
"logps": -81.74018096923828,
"loss": 33.2282,
"objective": 33.390872955322266,
"ranking_idealized": 0.5916666388511658,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.5874999761581421,
"regularize": 0.07453177124261856,
"step": 1235,
"wo_beta": 16.363548278808594
},
{
"dpo_loss": 0.5296925902366638,
"epoch": 3.514407179971658,
"grad_norm": 12949.63094083325,
"learning_rate": 2.4316130421329696e-07,
"logits": -1.238582968711853,
"logps": -82.47282409667969,
"loss": 34.0652,
"objective": 31.30968475341797,
"ranking_idealized": 0.5708333253860474,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.5666666626930237,
"regularize": 0.06809426844120026,
"step": 1240,
"wo_beta": 15.522791862487793
},
{
"dpo_loss": 0.5323511362075806,
"epoch": 3.528578176665092,
"grad_norm": 13527.106344889547,
"learning_rate": 2.389198447610418e-07,
"logits": -1.3098766803741455,
"logps": -83.17538452148438,
"loss": 30.2807,
"objective": 31.539880752563477,
"ranking_idealized": 0.6416666507720947,
"ranking_idealized_expo": 0.5541666746139526,
"ranking_simple": 0.625,
"regularize": 0.07134827226400375,
"step": 1245,
"wo_beta": 15.821925163269043
},
{
"dpo_loss": 0.5260218977928162,
"epoch": 3.5427491733585263,
"grad_norm": 13239.929991928584,
"learning_rate": 2.3470405967329604e-07,
"logits": -1.2133029699325562,
"logps": -81.8626480102539,
"loss": 34.073,
"objective": 34.22465515136719,
"ranking_idealized": 0.6291666626930237,
"ranking_idealized_expo": 0.5458333492279053,
"ranking_simple": 0.625,
"regularize": 0.08072555810213089,
"step": 1250,
"wo_beta": 15.293652534484863
},
{
"epoch": 3.5427491733585263,
"eval_dpo_loss": 0.6802147626876831,
"eval_logits": -1.289227843284607,
"eval_logps": -88.56167602539062,
"eval_loss": 188.2202911376953,
"eval_objective": 184.19659423828125,
"eval_ranking_idealized": 0.6024844646453857,
"eval_ranking_idealized_expo": 0.5232919454574585,
"eval_ranking_simple": 0.533643901348114,
"eval_regularize": 0.4176720380783081,
"eval_runtime": 501.867,
"eval_samples_per_second": 11.537,
"eval_steps_per_second": 0.962,
"eval_wo_beta": 16.002193450927734,
"step": 1250
},
{
"dpo_loss": 0.5387216806411743,
"epoch": 3.5569201700519604,
"grad_norm": 12534.49899559166,
"learning_rate": 2.3051436352620036e-07,
"logits": -1.2683520317077637,
"logps": -82.32015991210938,
"loss": 36.4025,
"objective": 32.02161407470703,
"ranking_idealized": 0.574999988079071,
"ranking_idealized_expo": 0.4791666567325592,
"ranking_simple": 0.5666666626930237,
"regularize": 0.07073788344860077,
"step": 1255,
"wo_beta": 15.795002937316895
},
{
"dpo_loss": 0.530408501625061,
"epoch": 3.5710911667453944,
"grad_norm": 12912.721697415427,
"learning_rate": 2.2635116833033392e-07,
"logits": -1.2373536825180054,
"logps": -81.3061294555664,
"loss": 30.8038,
"objective": 33.21593475341797,
"ranking_idealized": 0.6333333253860474,
"ranking_idealized_expo": 0.5375000238418579,
"ranking_simple": 0.6208333373069763,
"regularize": 0.07438240200281143,
"step": 1260,
"wo_beta": 16.231142044067383
},
{
"dpo_loss": 0.5247560739517212,
"epoch": 3.5852621634388284,
"grad_norm": 12931.353378545553,
"learning_rate": 2.2221488349019902e-07,
"logits": -1.2455730438232422,
"logps": -80.93061828613281,
"loss": 29.6738,
"objective": 31.222820281982422,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.612500011920929,
"regularize": 0.06749995797872543,
"step": 1265,
"wo_beta": 14.711896896362305
},
{
"dpo_loss": 0.5356096625328064,
"epoch": 3.5994331601322624,
"grad_norm": 13549.0763306813,
"learning_rate": 2.181059157639598e-07,
"logits": -1.3499952554702759,
"logps": -81.31751251220703,
"loss": 30.5338,
"objective": 30.125825881958008,
"ranking_idealized": 0.5333333611488342,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.5333333611488342,
"regularize": 0.06899719685316086,
"step": 1270,
"wo_beta": 13.772916793823242
},
{
"dpo_loss": 0.5210896134376526,
"epoch": 3.613604156825697,
"grad_norm": 14924.204646126253,
"learning_rate": 2.1402466922344303e-07,
"logits": -1.210523247718811,
"logps": -82.34052276611328,
"loss": 29.82,
"objective": 29.18175506591797,
"ranking_idealized": 0.5874999761581421,
"ranking_idealized_expo": 0.49166667461395264,
"ranking_simple": 0.5791666507720947,
"regularize": 0.06544475257396698,
"step": 1275,
"wo_beta": 15.473977088928223
},
{
"dpo_loss": 0.5307682752609253,
"epoch": 3.627775153519131,
"grad_norm": 12824.51476470017,
"learning_rate": 2.0997154521440097e-07,
"logits": -1.2915035486221313,
"logps": -81.79452514648438,
"loss": 30.8024,
"objective": 29.49608612060547,
"ranking_idealized": 0.5541666746139526,
"ranking_idealized_expo": 0.4541666805744171,
"ranking_simple": 0.5375000238418579,
"regularize": 0.06567243486642838,
"step": 1280,
"wo_beta": 15.875335693359375
},
{
"dpo_loss": 0.5249419212341309,
"epoch": 3.641946150212565,
"grad_norm": 13428.13142246555,
"learning_rate": 2.0594694231704373e-07,
"logits": -1.2426308393478394,
"logps": -81.00833892822266,
"loss": 30.3043,
"objective": 30.617321014404297,
"ranking_idealized": 0.5916666388511658,
"ranking_idealized_expo": 0.4958333373069763,
"ranking_simple": 0.5916666388511658,
"regularize": 0.07462318986654282,
"step": 1285,
"wo_beta": 17.443321228027344
},
{
"dpo_loss": 0.5173429250717163,
"epoch": 3.656117146905999,
"grad_norm": 12671.749777744226,
"learning_rate": 2.0195125630684428e-07,
"logits": -1.245200276374817,
"logps": -81.8724594116211,
"loss": 28.4671,
"objective": 27.68103790283203,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.6000000238418579,
"regularize": 0.06590177118778229,
"step": 1290,
"wo_beta": 17.08915138244629
},
{
"dpo_loss": 0.5335291028022766,
"epoch": 3.670288143599433,
"grad_norm": 13021.653293493737,
"learning_rate": 1.979848801156167e-07,
"logits": -1.3040084838867188,
"logps": -81.88176727294922,
"loss": 28.4196,
"objective": 28.575376510620117,
"ranking_idealized": 0.5958333611488342,
"ranking_idealized_expo": 0.49166667461395264,
"ranking_simple": 0.5874999761581421,
"regularize": 0.0632786899805069,
"step": 1295,
"wo_beta": 14.829022407531738
},
{
"dpo_loss": 0.5276142954826355,
"epoch": 3.6844591402928675,
"grad_norm": 11978.937253641576,
"learning_rate": 1.9404820379287672e-07,
"logits": -1.187487244606018,
"logps": -80.9906005859375,
"loss": 28.4565,
"objective": 28.971555709838867,
"ranking_idealized": 0.6041666865348816,
"ranking_idealized_expo": 0.5458333492279053,
"ranking_simple": 0.6166666746139526,
"regularize": 0.06764063984155655,
"step": 1300,
"wo_beta": 17.09331512451172
},
{
"epoch": 3.6844591402928675,
"eval_dpo_loss": 0.680322527885437,
"eval_logits": -1.2942335605621338,
"eval_logps": -88.08357238769531,
"eval_loss": 188.31890869140625,
"eval_objective": 184.1293182373047,
"eval_ranking_idealized": 0.6024844646453857,
"eval_ranking_idealized_expo": 0.5232919454574585,
"eval_ranking_simple": 0.5331262946128845,
"eval_regularize": 0.4177800714969635,
"eval_runtime": 491.4726,
"eval_samples_per_second": 11.781,
"eval_steps_per_second": 0.983,
"eval_wo_beta": 16.008142471313477,
"step": 1300
},
{
"dpo_loss": 0.5322309732437134,
"epoch": 3.6986301369863015,
"grad_norm": 13019.22557555901,
"learning_rate": 1.9014161446748422e-07,
"logits": -1.2798058986663818,
"logps": -81.99161529541016,
"loss": 30.5992,
"objective": 32.30867004394531,
"ranking_idealized": 0.5458333492279053,
"ranking_idealized_expo": 0.47083333134651184,
"ranking_simple": 0.5416666865348816,
"regularize": 0.0735287144780159,
"step": 1305,
"wo_beta": 15.798765182495117
},
{
"dpo_loss": 0.5227470397949219,
"epoch": 3.7128011336797355,
"grad_norm": 12226.371631865619,
"learning_rate": 1.8626549630957395e-07,
"logits": -1.2566769123077393,
"logps": -81.54576110839844,
"loss": 28.0805,
"objective": 26.042844772338867,
"ranking_idealized": 0.5833333134651184,
"ranking_idealized_expo": 0.512499988079071,
"ranking_simple": 0.5916666388511658,
"regularize": 0.06227840855717659,
"step": 1310,
"wo_beta": 15.27546501159668
},
{
"dpo_loss": 0.537932813167572,
"epoch": 3.7269721303731695,
"grad_norm": 12444.517818477534,
"learning_rate": 1.8242023049277555e-07,
"logits": -1.2929528951644897,
"logps": -81.47209167480469,
"loss": 30.7473,
"objective": 30.499658584594727,
"ranking_idealized": 0.5625,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.5583333373069763,
"regularize": 0.07173587381839752,
"step": 1315,
"wo_beta": 15.575103759765625
},
{
"dpo_loss": 0.5317214131355286,
"epoch": 3.7411431270666036,
"grad_norm": 13300.946248563114,
"learning_rate": 1.7860619515673032e-07,
"logits": -1.3597683906555176,
"logps": -83.02255249023438,
"loss": 29.6239,
"objective": 28.020469665527344,
"ranking_idealized": 0.6541666388511658,
"ranking_idealized_expo": 0.5708333253860474,
"ranking_simple": 0.6541666388511658,
"regularize": 0.06609723716974258,
"step": 1320,
"wo_beta": 16.70941734313965
},
{
"dpo_loss": 0.5467905402183533,
"epoch": 3.755314123760038,
"grad_norm": 11933.522036621489,
"learning_rate": 1.7482376536990474e-07,
"logits": -1.2760491371154785,
"logps": -81.77200317382812,
"loss": 28.58,
"objective": 27.297456741333008,
"ranking_idealized": 0.5791666507720947,
"ranking_idealized_expo": 0.512499988079071,
"ranking_simple": 0.5791666507720947,
"regularize": 0.061256349086761475,
"step": 1325,
"wo_beta": 14.996780395507812
},
{
"dpo_loss": 0.5155090689659119,
"epoch": 3.769485120453472,
"grad_norm": 12146.906265203044,
"learning_rate": 1.7107331309270684e-07,
"logits": -1.2232296466827393,
"logps": -81.67552185058594,
"loss": 25.7046,
"objective": 24.283742904663086,
"ranking_idealized": 0.6041666865348816,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.6083333492279053,
"regularize": 0.05803535133600235,
"step": 1330,
"wo_beta": 14.960771560668945
},
{
"dpo_loss": 0.5197141766548157,
"epoch": 3.783656117146906,
"grad_norm": 13269.1257120231,
"learning_rate": 1.6735520714090778e-07,
"logits": -1.3548495769500732,
"logps": -82.88711547851562,
"loss": 25.5411,
"objective": 23.988988876342773,
"ranking_idealized": 0.637499988079071,
"ranking_idealized_expo": 0.5541666746139526,
"ranking_simple": 0.6333333253860474,
"regularize": 0.05831415578722954,
"step": 1335,
"wo_beta": 15.491255760192871
},
{
"dpo_loss": 0.5400987863540649,
"epoch": 3.79782711384034,
"grad_norm": 12222.682651732252,
"learning_rate": 1.6366981314937372e-07,
"logits": -1.3011000156402588,
"logps": -81.44950866699219,
"loss": 26.7414,
"objective": 27.633180618286133,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.5541666746139526,
"ranking_simple": 0.612500011920929,
"regularize": 0.06863755732774734,
"step": 1340,
"wo_beta": 15.513628005981445
},
{
"dpo_loss": 0.5207428336143494,
"epoch": 3.811998110533774,
"grad_norm": 13292.031759115218,
"learning_rate": 1.6001749353610815e-07,
"logits": -1.2988460063934326,
"logps": -81.9979019165039,
"loss": 27.5342,
"objective": 26.436460494995117,
"ranking_idealized": 0.6291666626930237,
"ranking_idealized_expo": 0.5375000238418579,
"ranking_simple": 0.637499988079071,
"regularize": 0.060691170394420624,
"step": 1345,
"wo_beta": 16.72386360168457
},
{
"dpo_loss": 0.5372669100761414,
"epoch": 3.826169107227208,
"grad_norm": 12429.085011694839,
"learning_rate": 1.5639860746661338e-07,
"logits": -1.3200603723526,
"logps": -80.8891830444336,
"loss": 27.4636,
"objective": 27.883655548095703,
"ranking_idealized": 0.5625,
"ranking_idealized_expo": 0.4958333373069763,
"ranking_simple": 0.5666666626930237,
"regularize": 0.06708240509033203,
"step": 1350,
"wo_beta": 15.541132926940918
},
{
"epoch": 3.826169107227208,
"eval_dpo_loss": 0.6802567839622498,
"eval_logits": -1.2973301410675049,
"eval_logps": -88.45857238769531,
"eval_loss": 188.3022003173828,
"eval_objective": 184.21910095214844,
"eval_ranking_idealized": 0.6024844646453857,
"eval_ranking_idealized_expo": 0.5232919454574585,
"eval_ranking_simple": 0.5320910811424255,
"eval_regularize": 0.4178454279899597,
"eval_runtime": 484.5227,
"eval_samples_per_second": 11.95,
"eval_steps_per_second": 0.997,
"eval_wo_beta": 15.999577522277832,
"step": 1350
},
{
"dpo_loss": 0.5401098132133484,
"epoch": 3.840340103920642,
"grad_norm": 12546.873988889934,
"learning_rate": 1.5281351081856976e-07,
"logits": -1.3091717958450317,
"logps": -81.95738983154297,
"loss": 24.53,
"objective": 23.978574752807617,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.6000000238418579,
"regularize": 0.05746602639555931,
"step": 1355,
"wo_beta": 15.96954345703125
},
{
"dpo_loss": 0.5322627425193787,
"epoch": 3.8545111006140766,
"grad_norm": 12396.074158573574,
"learning_rate": 1.492625561468393e-07,
"logits": -1.2270203828811646,
"logps": -81.94197082519531,
"loss": 27.8079,
"objective": 25.823699951171875,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.5375000238418579,
"ranking_simple": 0.5958333611488342,
"regularize": 0.06090990826487541,
"step": 1360,
"wo_beta": 15.92143440246582
},
{
"dpo_loss": 0.5215187668800354,
"epoch": 3.8686820973075107,
"grad_norm": 12924.951740893872,
"learning_rate": 1.4574609264879632e-07,
"logits": -1.2885017395019531,
"logps": -81.9835205078125,
"loss": 24.6244,
"objective": 21.932554244995117,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.612500011920929,
"regularize": 0.05161268636584282,
"step": 1365,
"wo_beta": 15.276032447814941
},
{
"dpo_loss": 0.5322207808494568,
"epoch": 3.8828530940009447,
"grad_norm": 11760.04729219421,
"learning_rate": 1.4226446612998671e-07,
"logits": -1.325412631034851,
"logps": -82.93399810791016,
"loss": 25.2873,
"objective": 22.0572566986084,
"ranking_idealized": 0.6208333373069763,
"ranking_idealized_expo": 0.550000011920929,
"ranking_simple": 0.637499988079071,
"regularize": 0.05589644983410835,
"step": 1370,
"wo_beta": 16.43442726135254
},
{
"dpo_loss": 0.5177661776542664,
"epoch": 3.8970240906943787,
"grad_norm": 12668.234366032097,
"learning_rate": 1.3881801897012224e-07,
"logits": -1.3054790496826172,
"logps": -81.97600555419922,
"loss": 25.3791,
"objective": 25.463533401489258,
"ranking_idealized": 0.5708333253860474,
"ranking_idealized_expo": 0.48750001192092896,
"ranking_simple": 0.5791666507720947,
"regularize": 0.06239425763487816,
"step": 1375,
"wo_beta": 15.740779876708984
},
{
"dpo_loss": 0.5269008874893188,
"epoch": 3.9111950873878127,
"grad_norm": 11613.901925945589,
"learning_rate": 1.3540709008941147e-07,
"logits": -1.2125933170318604,
"logps": -81.08470153808594,
"loss": 24.5614,
"objective": 27.379404067993164,
"ranking_idealized": 0.6416666507720947,
"ranking_idealized_expo": 0.5666666626930237,
"ranking_simple": 0.6499999761581421,
"regularize": 0.06529100984334946,
"step": 1380,
"wo_beta": 16.324913024902344
},
{
"dpo_loss": 0.5204812288284302,
"epoch": 3.925366084081247,
"grad_norm": 12262.7455062338,
"learning_rate": 1.3203201491523024e-07,
"logits": -1.1872669458389282,
"logps": -82.68800354003906,
"loss": 26.2354,
"objective": 27.383338928222656,
"ranking_idealized": 0.574999988079071,
"ranking_idealized_expo": 0.48750001192092896,
"ranking_simple": 0.5708333253860474,
"regularize": 0.06124182417988777,
"step": 1385,
"wo_beta": 17.184247970581055
},
{
"dpo_loss": 0.5293174982070923,
"epoch": 3.9395370807746812,
"grad_norm": 11656.13294817261,
"learning_rate": 1.2869312534913685e-07,
"logits": -1.3625025749206543,
"logps": -81.69257354736328,
"loss": 25.8656,
"objective": 27.87486457824707,
"ranking_idealized": 0.5375000238418579,
"ranking_idealized_expo": 0.4541666805744171,
"ranking_simple": 0.5208333134651184,
"regularize": 0.06757337599992752,
"step": 1390,
"wo_beta": 14.843222618103027
},
{
"dpo_loss": 0.5323649644851685,
"epoch": 3.9537080774681153,
"grad_norm": 12688.563452750986,
"learning_rate": 1.2539074973423204e-07,
"logits": -1.344056487083435,
"logps": -82.50756072998047,
"loss": 25.269,
"objective": 20.71147346496582,
"ranking_idealized": 0.574999988079071,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.574999988079071,
"regularize": 0.05190667137503624,
"step": 1395,
"wo_beta": 15.608321189880371
},
{
"dpo_loss": 0.5348060727119446,
"epoch": 3.9678790741615493,
"grad_norm": 13248.732573569929,
"learning_rate": 1.2212521282287093e-07,
"logits": -1.2224748134613037,
"logps": -80.45255279541016,
"loss": 27.3902,
"objective": 28.852842330932617,
"ranking_idealized": 0.574999988079071,
"ranking_idealized_expo": 0.512499988079071,
"ranking_simple": 0.5708333253860474,
"regularize": 0.0665024146437645,
"step": 1400,
"wo_beta": 16.69828987121582
},
{
"epoch": 3.9678790741615493,
"eval_dpo_loss": 0.6798388957977295,
"eval_logits": -1.2974461317062378,
"eval_logps": -88.3134765625,
"eval_loss": 187.96913146972656,
"eval_objective": 183.7815704345703,
"eval_ranking_idealized": 0.6024844646453857,
"eval_ranking_idealized_expo": 0.5232919454574585,
"eval_ranking_simple": 0.5320910811424255,
"eval_regularize": 0.4168493151664734,
"eval_runtime": 519.2809,
"eval_samples_per_second": 11.15,
"eval_steps_per_second": 0.93,
"eval_wo_beta": 15.978778839111328,
"step": 1400
},
{
"dpo_loss": 0.5318723320960999,
"epoch": 3.9820500708549833,
"grad_norm": 12626.278495743487,
"learning_rate": 1.1889683574472692e-07,
"logits": -1.2031117677688599,
"logps": -81.43195343017578,
"loss": 25.6619,
"objective": 22.53989028930664,
"ranking_idealized": 0.5958333611488342,
"ranking_idealized_expo": 0.4958333373069763,
"ranking_simple": 0.5874999761581421,
"regularize": 0.05257093533873558,
"step": 1405,
"wo_beta": 14.43735122680664
},
{
"dpo_loss": 0.5325983762741089,
"epoch": 3.9962210675484178,
"grad_norm": 12962.865030589033,
"learning_rate": 1.15705935975212e-07,
"logits": -1.2109463214874268,
"logps": -80.95507049560547,
"loss": 25.0327,
"objective": 27.48863410949707,
"ranking_idealized": 0.6541666388511658,
"ranking_idealized_expo": 0.5583333373069763,
"ranking_simple": 0.637499988079071,
"regularize": 0.06891029328107834,
"step": 1410,
"wo_beta": 15.6097993850708
},
{
"dpo_loss": 0.5317092537879944,
"epoch": 4.010392064241851,
"grad_norm": 12833.61434685088,
"learning_rate": 1.1255282730425708e-07,
"logits": -1.2491552829742432,
"logps": -81.32047271728516,
"loss": 22.2145,
"objective": 24.41758155822754,
"ranking_idealized": 0.5874999761581421,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.5874999761581421,
"regularize": 0.06543368101119995,
"step": 1415,
"wo_beta": 15.283975601196289
},
{
"dpo_loss": 0.5239009261131287,
"epoch": 4.024563060935286,
"grad_norm": 13451.327899072105,
"learning_rate": 1.094378198054533e-07,
"logits": -1.353010654449463,
"logps": -83.2571792602539,
"loss": 23.0966,
"objective": 24.90163230895996,
"ranking_idealized": 0.5291666388511658,
"ranking_idealized_expo": 0.4416666626930237,
"ranking_simple": 0.5166666507720947,
"regularize": 0.0562543049454689,
"step": 1420,
"wo_beta": 16.40116310119629
},
{
"dpo_loss": 0.53034508228302,
"epoch": 4.03873405762872,
"grad_norm": 13582.157317581643,
"learning_rate": 1.063612198055604e-07,
"logits": -1.2672284841537476,
"logps": -82.41036987304688,
"loss": 19.725,
"objective": 18.898433685302734,
"ranking_idealized": 0.5666666626930237,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.5583333373069763,
"regularize": 0.04213841259479523,
"step": 1425,
"wo_beta": 17.573118209838867
},
{
"dpo_loss": 0.5290653109550476,
"epoch": 4.052905054322154,
"grad_norm": 12471.786390228664,
"learning_rate": 1.0332332985438247e-07,
"logits": -1.2409167289733887,
"logps": -82.05091094970703,
"loss": 21.8465,
"objective": 20.57358741760254,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.6083333492279053,
"regularize": 0.05022308602929115,
"step": 1430,
"wo_beta": 17.054475784301758
},
{
"dpo_loss": 0.5352352261543274,
"epoch": 4.067076051015588,
"grad_norm": 12729.012234556472,
"learning_rate": 1.0032444869501577e-07,
"logits": -1.1344469785690308,
"logps": -84.53145599365234,
"loss": 23.6283,
"objective": 21.45845603942871,
"ranking_idealized": 0.5375000238418579,
"ranking_idealized_expo": 0.4749999940395355,
"ranking_simple": 0.550000011920929,
"regularize": 0.046408891677856445,
"step": 1435,
"wo_beta": 17.1253719329834
},
{
"dpo_loss": 0.5163091421127319,
"epoch": 4.081247047709022,
"grad_norm": 12403.62054840324,
"learning_rate": 9.736487123447068e-08,
"logits": -1.3162797689437866,
"logps": -83.0071792602539,
"loss": 18.4912,
"objective": 19.839466094970703,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5916666388511658,
"regularize": 0.04717810079455376,
"step": 1440,
"wo_beta": 16.756040573120117
},
{
"dpo_loss": 0.5394971966743469,
"epoch": 4.095418044402456,
"grad_norm": 13017.070767832263,
"learning_rate": 9.444488851467041e-08,
"logits": -1.2141478061676025,
"logps": -81.8912582397461,
"loss": 22.8616,
"objective": 24.104333877563477,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.5916666388511658,
"regularize": 0.05671803280711174,
"step": 1445,
"wo_beta": 15.497802734375
},
{
"dpo_loss": 0.5386175513267517,
"epoch": 4.109589041095891,
"grad_norm": 12319.490850536135,
"learning_rate": 9.156478768383058e-08,
"logits": -1.2780787944793701,
"logps": -82.44509887695312,
"loss": 21.2906,
"objective": 22.363698959350586,
"ranking_idealized": 0.5625,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.5666666626930237,
"regularize": 0.05074004456400871,
"step": 1450,
"wo_beta": 16.318647384643555
},
{
"epoch": 4.109589041095891,
"eval_dpo_loss": 0.6796455383300781,
"eval_logits": -1.2975972890853882,
"eval_logps": -88.12124633789062,
"eval_loss": 187.89852905273438,
"eval_objective": 183.65463256835938,
"eval_ranking_idealized": 0.6024844646453857,
"eval_ranking_idealized_expo": 0.5232919454574585,
"eval_ranking_simple": 0.5320910811424255,
"eval_regularize": 0.4164124131202698,
"eval_runtime": 516.4821,
"eval_samples_per_second": 11.21,
"eval_steps_per_second": 0.935,
"eval_wo_beta": 15.985260009765625,
"step": 1450
},
{
"dpo_loss": 0.5313987135887146,
"epoch": 4.123760037789324,
"grad_norm": 12478.853769070673,
"learning_rate": 8.872485196822122e-08,
"logits": -1.2814396619796753,
"logps": -81.72008514404297,
"loss": 22.8821,
"objective": 23.81187629699707,
"ranking_idealized": 0.6416666507720947,
"ranking_idealized_expo": 0.5916666388511658,
"ranking_simple": 0.6458333134651184,
"regularize": 0.05551544576883316,
"step": 1455,
"wo_beta": 18.80474090576172
},
{
"dpo_loss": 0.5285670161247253,
"epoch": 4.137931034482759,
"grad_norm": 13016.324616810654,
"learning_rate": 8.592536064431466e-08,
"logits": -1.3169968128204346,
"logps": -82.27637481689453,
"loss": 21.0762,
"objective": 22.214412689208984,
"ranking_idealized": 0.574999988079071,
"ranking_idealized_expo": 0.512499988079071,
"ranking_simple": 0.5708333253860474,
"regularize": 0.05284254625439644,
"step": 1460,
"wo_beta": 16.45089340209961
},
{
"dpo_loss": 0.5275595784187317,
"epoch": 4.1521020311761925,
"grad_norm": 12828.215315021795,
"learning_rate": 8.316658901132163e-08,
"logits": -1.2044638395309448,
"logps": -83.09059143066406,
"loss": 20.1235,
"objective": 19.89800453186035,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.5541666746139526,
"ranking_simple": 0.6208333373069763,
"regularize": 0.04856906086206436,
"step": 1465,
"wo_beta": 16.143047332763672
},
{
"dpo_loss": 0.5317350029945374,
"epoch": 4.166273027869627,
"grad_norm": 13452.677353962536,
"learning_rate": 8.044880836411888e-08,
"logits": -1.312625527381897,
"logps": -80.955810546875,
"loss": 18.8621,
"objective": 22.22332000732422,
"ranking_idealized": 0.5583333373069763,
"ranking_idealized_expo": 0.4749999940395355,
"ranking_simple": 0.5583333373069763,
"regularize": 0.05493269860744476,
"step": 1470,
"wo_beta": 14.867803573608398
},
{
"dpo_loss": 0.5068629384040833,
"epoch": 4.1804440245630605,
"grad_norm": 12445.31776981503,
"learning_rate": 7.777228596656993e-08,
"logits": -1.2618132829666138,
"logps": -83.48854064941406,
"loss": 18.8691,
"objective": 17.614728927612305,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.5541666746139526,
"ranking_simple": 0.612500011920929,
"regularize": 0.04399799555540085,
"step": 1475,
"wo_beta": 17.06732940673828
},
{
"dpo_loss": 0.5202235579490662,
"epoch": 4.194615021256495,
"grad_norm": 12224.02993997593,
"learning_rate": 7.513728502524286e-08,
"logits": -1.1893463134765625,
"logps": -81.5462417602539,
"loss": 19.5471,
"objective": 21.709897994995117,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.5916666388511658,
"regularize": 0.05476529151201248,
"step": 1480,
"wo_beta": 16.902223587036133
},
{
"dpo_loss": 0.528392493724823,
"epoch": 4.2087860179499295,
"grad_norm": 12678.153549499324,
"learning_rate": 7.25440646635268e-08,
"logits": -1.3054612874984741,
"logps": -80.2231674194336,
"loss": 19.6042,
"objective": 19.114337921142578,
"ranking_idealized": 0.5833333134651184,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.574999988079071,
"regularize": 0.04563932120800018,
"step": 1485,
"wo_beta": 16.017080307006836
},
{
"dpo_loss": 0.5332812070846558,
"epoch": 4.222957014643363,
"grad_norm": 12851.091233563351,
"learning_rate": 6.999287989614971e-08,
"logits": -1.368248462677002,
"logps": -81.43551635742188,
"loss": 19.3664,
"objective": 18.39341926574707,
"ranking_idealized": 0.5583333373069763,
"ranking_idealized_expo": 0.47083333134651184,
"ranking_simple": 0.5625,
"regularize": 0.0449262373149395,
"step": 1490,
"wo_beta": 14.998396873474121
},
{
"dpo_loss": 0.5162668824195862,
"epoch": 4.2371280113367975,
"grad_norm": 13439.750358421123,
"learning_rate": 6.74839816041013e-08,
"logits": -1.3570283651351929,
"logps": -81.74089050292969,
"loss": 16.8521,
"objective": 18.91334342956543,
"ranking_idealized": 0.5958333611488342,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.5958333611488342,
"regularize": 0.04384367913007736,
"step": 1495,
"wo_beta": 18.151466369628906
},
{
"dpo_loss": 0.5271181464195251,
"epoch": 4.251299008030231,
"grad_norm": 12462.836104102607,
"learning_rate": 6.501761650996052e-08,
"logits": -1.3143360614776611,
"logps": -83.34208679199219,
"loss": 19.8787,
"objective": 20.79971694946289,
"ranking_idealized": 0.6083333492279053,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.5958333611488342,
"regularize": 0.04957110807299614,
"step": 1500,
"wo_beta": 16.131967544555664
},
{
"epoch": 4.251299008030231,
"eval_dpo_loss": 0.679940402507782,
"eval_logits": -1.2942196130752563,
"eval_logps": -88.3078384399414,
"eval_loss": 188.08248901367188,
"eval_objective": 183.8683624267578,
"eval_ranking_idealized": 0.6024844646453857,
"eval_ranking_idealized_expo": 0.5232919454574585,
"eval_ranking_simple": 0.5320910811424255,
"eval_regularize": 0.4168849587440491,
"eval_runtime": 525.9589,
"eval_samples_per_second": 11.008,
"eval_steps_per_second": 0.918,
"eval_wo_beta": 15.983942031860352,
"step": 1500
},
{
"dpo_loss": 0.5348100066184998,
"epoch": 4.2654700047236656,
"grad_norm": 12354.445167507907,
"learning_rate": 6.259402715363394e-08,
"logits": -1.3128606081008911,
"logps": -83.40116119384766,
"loss": 18.971,
"objective": 17.431968688964844,
"ranking_idealized": 0.574999988079071,
"ranking_idealized_expo": 0.4958333373069763,
"ranking_simple": 0.5666666626930237,
"regularize": 0.0443451851606369,
"step": 1505,
"wo_beta": 15.766800880432129
},
{
"dpo_loss": 0.5173017382621765,
"epoch": 4.2796410014171,
"grad_norm": 13102.659789592512,
"learning_rate": 6.021345186850418e-08,
"logits": -1.2090104818344116,
"logps": -81.23714447021484,
"loss": 21.191,
"objective": 21.426023483276367,
"ranking_idealized": 0.625,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.6166666746139526,
"regularize": 0.04612095281481743,
"step": 1510,
"wo_beta": 15.78390121459961
},
{
"dpo_loss": 0.5307953357696533,
"epoch": 4.293811998110534,
"grad_norm": 12945.706552780925,
"learning_rate": 5.787612475799269e-08,
"logits": -1.367775559425354,
"logps": -82.44042205810547,
"loss": 16.9107,
"objective": 16.15281867980957,
"ranking_idealized": 0.625,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.6291666626930237,
"regularize": 0.03850070759654045,
"step": 1515,
"wo_beta": 16.397567749023438
},
{
"dpo_loss": 0.5217214226722717,
"epoch": 4.307982994803968,
"grad_norm": 12094.58497098056,
"learning_rate": 5.5582275672538316e-08,
"logits": -1.2217297554016113,
"logps": -81.85955047607422,
"loss": 18.4539,
"objective": 19.449350357055664,
"ranking_idealized": 0.6791666746139526,
"ranking_idealized_expo": 0.6041666865348816,
"ranking_simple": 0.6791666746139526,
"regularize": 0.04723352938890457,
"step": 1520,
"wo_beta": 16.819021224975586
},
{
"dpo_loss": 0.5168942809104919,
"epoch": 4.322153991497402,
"grad_norm": 12423.169223430634,
"learning_rate": 5.333213018699356e-08,
"logits": -1.2731564044952393,
"logps": -81.88040161132812,
"loss": 21.872,
"objective": 21.83941078186035,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.5458333492279053,
"ranking_simple": 0.6291666626930237,
"regularize": 0.054684512317180634,
"step": 1525,
"wo_beta": 14.882065773010254
},
{
"dpo_loss": 0.5241533517837524,
"epoch": 4.336324988190836,
"grad_norm": 13878.266489791004,
"learning_rate": 5.112590957844232e-08,
"logits": -1.3176230192184448,
"logps": -83.9821548461914,
"loss": 20.6818,
"objective": 16.49356460571289,
"ranking_idealized": 0.625,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.612500011920929,
"regularize": 0.04093782603740692,
"step": 1530,
"wo_beta": 16.301631927490234
},
{
"dpo_loss": 0.5228941440582275,
"epoch": 4.350495984884271,
"grad_norm": 12969.925803784026,
"learning_rate": 4.896383080443933e-08,
"logits": -1.216440200805664,
"logps": -82.53515625,
"loss": 18.6444,
"objective": 18.548452377319336,
"ranking_idealized": 0.5708333253860474,
"ranking_idealized_expo": 0.4625000059604645,
"ranking_simple": 0.5791666507720947,
"regularize": 0.047610316425561905,
"step": 1535,
"wo_beta": 15.176318168640137
},
{
"dpo_loss": 0.5215330123901367,
"epoch": 4.364666981577704,
"grad_norm": 12724.232692363212,
"learning_rate": 4.684610648167503e-08,
"logits": -1.3027079105377197,
"logps": -81.91221618652344,
"loss": 21.6498,
"objective": 21.20092010498047,
"ranking_idealized": 0.5874999761581421,
"ranking_idealized_expo": 0.5416666865348816,
"ranking_simple": 0.5833333134651184,
"regularize": 0.05302129685878754,
"step": 1540,
"wo_beta": 16.298704147338867
},
{
"dpo_loss": 0.5356315970420837,
"epoch": 4.378837978271139,
"grad_norm": 12224.725778808395,
"learning_rate": 4.4772944865067055e-08,
"logits": -1.3303568363189697,
"logps": -83.5517578125,
"loss": 17.8525,
"objective": 20.373811721801758,
"ranking_idealized": 0.6333333253860474,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.6166666746139526,
"regularize": 0.050720926374197006,
"step": 1545,
"wo_beta": 15.988405227661133
},
{
"dpo_loss": 0.5247156023979187,
"epoch": 4.393008974964572,
"grad_norm": 12442.357612605178,
"learning_rate": 4.274454982728032e-08,
"logits": -1.246690034866333,
"logps": -81.54380798339844,
"loss": 18.4741,
"objective": 19.52410316467285,
"ranking_idealized": 0.5833333134651184,
"ranking_idealized_expo": 0.4958333373069763,
"ranking_simple": 0.5833333134651184,
"regularize": 0.049736883491277695,
"step": 1550,
"wo_beta": 17.08685874938965
},
{
"epoch": 4.393008974964572,
"eval_dpo_loss": 0.6802076697349548,
"eval_logits": -1.2950727939605713,
"eval_logps": -88.48546600341797,
"eval_loss": 188.04074096679688,
"eval_objective": 184.0446319580078,
"eval_ranking_idealized": 0.6024844646453857,
"eval_ranking_idealized_expo": 0.5232919454574585,
"eval_ranking_simple": 0.532608687877655,
"eval_regularize": 0.4172796308994293,
"eval_runtime": 533.9808,
"eval_samples_per_second": 10.843,
"eval_steps_per_second": 0.905,
"eval_wo_beta": 15.994985580444336,
"step": 1550
},
{
"dpo_loss": 0.5181335210800171,
"epoch": 4.407179971658007,
"grad_norm": 13002.101456533634,
"learning_rate": 4.0761120838678776e-08,
"logits": -1.3068591356277466,
"logps": -81.81246185302734,
"loss": 16.5342,
"objective": 14.914339065551758,
"ranking_idealized": 0.5708333253860474,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.5708333253860474,
"regularize": 0.0403703935444355,
"step": 1555,
"wo_beta": 17.326810836791992
},
{
"dpo_loss": 0.5418220162391663,
"epoch": 4.42135096835144,
"grad_norm": 11889.810698222469,
"learning_rate": 3.882285294770937e-08,
"logits": -1.2680351734161377,
"logps": -80.56555938720703,
"loss": 16.764,
"objective": 17.03957176208496,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.5916666388511658,
"regularize": 0.04209714010357857,
"step": 1560,
"wo_beta": 14.395126342773438
},
{
"dpo_loss": 0.5355924963951111,
"epoch": 4.435521965044875,
"grad_norm": 12185.339277571,
"learning_rate": 3.6929936761721403e-08,
"logits": -1.2988630533218384,
"logps": -80.5867919921875,
"loss": 21.4187,
"objective": 21.873271942138672,
"ranking_idealized": 0.574999988079071,
"ranking_idealized_expo": 0.512499988079071,
"ranking_simple": 0.5791666507720947,
"regularize": 0.05235178396105766,
"step": 1565,
"wo_beta": 14.826796531677246
},
{
"dpo_loss": 0.5378596782684326,
"epoch": 4.449692961738309,
"grad_norm": 11114.71452911412,
"learning_rate": 3.508255842822255e-08,
"logits": -1.3118114471435547,
"logps": -81.77924346923828,
"loss": 18.6149,
"objective": 20.33370590209961,
"ranking_idealized": 0.612500011920929,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.5958333611488342,
"regularize": 0.05320237576961517,
"step": 1570,
"wo_beta": 15.688643455505371
},
{
"dpo_loss": 0.5132429599761963,
"epoch": 4.463863958431743,
"grad_norm": 12945.538981188476,
"learning_rate": 3.3280899616572656e-08,
"logits": -1.3532111644744873,
"logps": -84.82633209228516,
"loss": 17.216,
"objective": 17.143177032470703,
"ranking_idealized": 0.5833333134651184,
"ranking_idealized_expo": 0.512499988079071,
"ranking_simple": 0.574999988079071,
"regularize": 0.042684536427259445,
"step": 1575,
"wo_beta": 17.00408935546875
},
{
"dpo_loss": 0.5311785340309143,
"epoch": 4.478034955125177,
"grad_norm": 13235.594805356337,
"learning_rate": 3.15251375001192e-08,
"logits": -1.2649219036102295,
"logps": -82.44920349121094,
"loss": 17.9899,
"objective": 17.875553131103516,
"ranking_idealized": 0.5708333253860474,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.5708333253860474,
"regularize": 0.043413810431957245,
"step": 1580,
"wo_beta": 17.040142059326172
},
{
"dpo_loss": 0.5295437574386597,
"epoch": 4.492205951818612,
"grad_norm": 13164.392376509253,
"learning_rate": 2.98154447387739e-08,
"logits": -1.318244457244873,
"logps": -81.6868667602539,
"loss": 18.7186,
"objective": 14.95705509185791,
"ranking_idealized": 0.6041666865348816,
"ranking_idealized_expo": 0.550000011920929,
"ranking_simple": 0.5958333611488342,
"regularize": 0.03320387750864029,
"step": 1585,
"wo_beta": 17.157299041748047
},
{
"dpo_loss": 0.527228593826294,
"epoch": 4.506376948512045,
"grad_norm": 12124.025371614676,
"learning_rate": 2.8151989462033787e-08,
"logits": -1.1829341650009155,
"logps": -83.83565521240234,
"loss": 18.9673,
"objective": 16.773042678833008,
"ranking_idealized": 0.5791666507720947,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.5833333134651184,
"regularize": 0.042777713388204575,
"step": 1590,
"wo_beta": 16.952783584594727
},
{
"dpo_loss": 0.5242041349411011,
"epoch": 4.52054794520548,
"grad_norm": 11927.935212297323,
"learning_rate": 2.653493525244721e-08,
"logits": -1.2492893934249878,
"logps": -82.36843872070312,
"loss": 17.1521,
"objective": 18.047021865844727,
"ranking_idealized": 0.6208333373069763,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.6041666865348816,
"regularize": 0.04608127102255821,
"step": 1595,
"wo_beta": 15.581862449645996
},
{
"dpo_loss": 0.5243973135948181,
"epoch": 4.534718941898913,
"grad_norm": 12379.266840127142,
"learning_rate": 2.4964441129527335e-08,
"logits": -1.2830615043640137,
"logps": -82.28716278076172,
"loss": 20.4794,
"objective": 17.599641799926758,
"ranking_idealized": 0.6333333253860474,
"ranking_idealized_expo": 0.5416666865348816,
"ranking_simple": 0.6333333253860474,
"regularize": 0.042389459908008575,
"step": 1600,
"wo_beta": 16.58247184753418
},
{
"epoch": 4.534718941898913,
"eval_dpo_loss": 0.6798632740974426,
"eval_logits": -1.2950247526168823,
"eval_logps": -88.43807983398438,
"eval_loss": 187.9060821533203,
"eval_objective": 183.82763671875,
"eval_ranking_idealized": 0.6024844646453857,
"eval_ranking_idealized_expo": 0.5232919454574585,
"eval_ranking_simple": 0.5331262946128845,
"eval_regularize": 0.4167550504207611,
"eval_runtime": 510.5256,
"eval_samples_per_second": 11.341,
"eval_steps_per_second": 0.946,
"eval_wo_beta": 16.000411987304688,
"step": 1600
},
{
"dpo_loss": 0.5349418520927429,
"epoch": 4.548889938592348,
"grad_norm": 13428.292487446544,
"learning_rate": 2.3440661534114557e-08,
"logits": -1.2768018245697021,
"logps": -83.37641906738281,
"loss": 17.8123,
"objective": 14.984145164489746,
"ranking_idealized": 0.5541666746139526,
"ranking_idealized_expo": 0.4749999940395355,
"ranking_simple": 0.550000011920929,
"regularize": 0.0358855277299881,
"step": 1605,
"wo_beta": 16.76499366760254
},
{
"dpo_loss": 0.5164486169815063,
"epoch": 4.563060935285781,
"grad_norm": 12892.913912379732,
"learning_rate": 2.1963746313188757e-08,
"logits": -1.249220371246338,
"logps": -81.78076171875,
"loss": 17.1832,
"objective": 20.233509063720703,
"ranking_idealized": 0.5833333134651184,
"ranking_idealized_expo": 0.47083333134651184,
"ranking_simple": 0.574999988079071,
"regularize": 0.048116158694028854,
"step": 1610,
"wo_beta": 15.82449722290039
},
{
"dpo_loss": 0.5349178314208984,
"epoch": 4.577231931979216,
"grad_norm": 12493.396334435913,
"learning_rate": 2.053384070513353e-08,
"logits": -1.2513455152511597,
"logps": -80.9568862915039,
"loss": 18.7751,
"objective": 20.071449279785156,
"ranking_idealized": 0.5291666388511658,
"ranking_idealized_expo": 0.46666666865348816,
"ranking_simple": 0.5249999761581421,
"regularize": 0.04651705548167229,
"step": 1615,
"wo_beta": 14.514166831970215
},
{
"dpo_loss": 0.5360397100448608,
"epoch": 4.59140292867265,
"grad_norm": 12311.497249141552,
"learning_rate": 1.915108532545351e-08,
"logits": -1.3831831216812134,
"logps": -81.701904296875,
"loss": 16.5863,
"objective": 13.440372467041016,
"ranking_idealized": 0.574999988079071,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.5791666507720947,
"regularize": 0.03207644820213318,
"step": 1620,
"wo_beta": 16.37172508239746
},
{
"dpo_loss": 0.5228015780448914,
"epoch": 4.605573925366084,
"grad_norm": 12520.657843831757,
"learning_rate": 1.781561615294652e-08,
"logits": -1.3208075761795044,
"logps": -82.14677429199219,
"loss": 17.2643,
"objective": 16.142719268798828,
"ranking_idealized": 0.6791666746139526,
"ranking_idealized_expo": 0.5916666388511658,
"ranking_simple": 0.6875,
"regularize": 0.03792344033718109,
"step": 1625,
"wo_beta": 15.518718719482422
},
{
"dpo_loss": 0.5221564173698425,
"epoch": 4.619744922059518,
"grad_norm": 11926.649260036038,
"learning_rate": 1.6527564516331638e-08,
"logits": -1.1876474618911743,
"logps": -82.74609375,
"loss": 17.5255,
"objective": 16.14875602722168,
"ranking_idealized": 0.6708333492279053,
"ranking_idealized_expo": 0.5874999761581421,
"ranking_simple": 0.6708333492279053,
"regularize": 0.039545025676488876,
"step": 1630,
"wo_beta": 17.103187561035156
},
{
"dpo_loss": 0.5277553796768188,
"epoch": 4.633915918752952,
"grad_norm": 12387.92239266219,
"learning_rate": 1.5287057081333988e-08,
"logits": -1.303261399269104,
"logps": -82.68264770507812,
"loss": 17.5837,
"objective": 18.295978546142578,
"ranking_idealized": 0.625,
"ranking_idealized_expo": 0.550000011920929,
"ranking_simple": 0.6291666626930237,
"regularize": 0.04383000358939171,
"step": 1635,
"wo_beta": 16.273590087890625
},
{
"dpo_loss": 0.5235089063644409,
"epoch": 4.648086915446386,
"grad_norm": 13550.591286437839,
"learning_rate": 1.4094215838229172e-08,
"logits": -1.3104770183563232,
"logps": -81.95443725585938,
"loss": 16.0714,
"objective": 18.62168312072754,
"ranking_idealized": 0.5874999761581421,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.5958333611488342,
"regularize": 0.044566281139850616,
"step": 1640,
"wo_beta": 14.541909217834473
},
{
"dpo_loss": 0.5459772944450378,
"epoch": 4.662257912139821,
"grad_norm": 12589.25993273719,
"learning_rate": 1.2949158089846368e-08,
"logits": -1.2789607048034668,
"logps": -80.86375427246094,
"loss": 15.9698,
"objective": 15.747620582580566,
"ranking_idealized": 0.5708333253860474,
"ranking_idealized_expo": 0.512499988079071,
"ranking_simple": 0.5708333253860474,
"regularize": 0.03958037868142128,
"step": 1645,
"wo_beta": 16.792747497558594
},
{
"dpo_loss": 0.5279684066772461,
"epoch": 4.6764289088332545,
"grad_norm": 11986.458011152894,
"learning_rate": 1.1851996440033318e-08,
"logits": -1.224802017211914,
"logps": -81.75625610351562,
"loss": 17.2115,
"objective": 18.047420501708984,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.5416666865348816,
"ranking_simple": 0.5958333611488342,
"regularize": 0.04608708992600441,
"step": 1650,
"wo_beta": 17.34733772277832
},
{
"epoch": 4.6764289088332545,
"eval_dpo_loss": 0.6798492074012756,
"eval_logits": -1.293831467628479,
"eval_logps": -88.41741943359375,
"eval_loss": 187.95040893554688,
"eval_objective": 183.85658264160156,
"eval_ranking_idealized": 0.6024844646453857,
"eval_ranking_idealized_expo": 0.5232919454574585,
"eval_ranking_simple": 0.532608687877655,
"eval_regularize": 0.416820764541626,
"eval_runtime": 510.4095,
"eval_samples_per_second": 11.344,
"eval_steps_per_second": 0.946,
"eval_wo_beta": 15.994239807128906,
"step": 1650
},
{
"dpo_loss": 0.5183621048927307,
"epoch": 4.690599905526689,
"grad_norm": 12001.298881228338,
"learning_rate": 1.0802838782582535e-08,
"logits": -1.2560440301895142,
"logps": -81.986083984375,
"loss": 18.141,
"objective": 16.23440170288086,
"ranking_idealized": 0.5958333611488342,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.6041666865348816,
"regularize": 0.0418228842318058,
"step": 1655,
"wo_beta": 14.709871292114258
},
{
"dpo_loss": 0.5308786034584045,
"epoch": 4.7047709022201225,
"grad_norm": 12471.919482995943,
"learning_rate": 9.801788290621505e-09,
"logits": -1.242910623550415,
"logps": -82.37290954589844,
"loss": 19.8764,
"objective": 21.41328239440918,
"ranking_idealized": 0.6083333492279053,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.6083333492279053,
"regularize": 0.04805602878332138,
"step": 1660,
"wo_beta": 16.161657333374023
},
{
"dpo_loss": 0.517335832118988,
"epoch": 4.718941898913557,
"grad_norm": 12326.624130987268,
"learning_rate": 8.848943406466468e-09,
"logits": -1.2066967487335205,
"logps": -81.63778686523438,
"loss": 17.9054,
"objective": 18.123321533203125,
"ranking_idealized": 0.550000011920929,
"ranking_idealized_expo": 0.47083333134651184,
"ranking_simple": 0.550000011920929,
"regularize": 0.04272852838039398,
"step": 1665,
"wo_beta": 15.821066856384277
},
{
"dpo_loss": 0.534516453742981,
"epoch": 4.733112895606991,
"grad_norm": 13120.765521158273,
"learning_rate": 7.944397831941951e-09,
"logits": -1.3101601600646973,
"logps": -83.31844329833984,
"loss": 15.3296,
"objective": 14.363126754760742,
"ranking_idealized": 0.5625,
"ranking_idealized_expo": 0.47083333134651184,
"ranking_simple": 0.5541666746139526,
"regularize": 0.0361357256770134,
"step": 1670,
"wo_beta": 15.148748397827148
},
{
"dpo_loss": 0.5110668540000916,
"epoch": 4.747283892300425,
"grad_norm": 12106.475879366208,
"learning_rate": 7.088240519165955e-09,
"logits": -1.2715505361557007,
"logps": -83.65233612060547,
"loss": 18.7232,
"objective": 22.049705505371094,
"ranking_idealized": 0.5708333253860474,
"ranking_idealized_expo": 0.4791666567325592,
"ranking_simple": 0.5583333373069763,
"regularize": 0.04454280436038971,
"step": 1675,
"wo_beta": 16.55459213256836
},
{
"dpo_loss": 0.5210347771644592,
"epoch": 4.7614548889938595,
"grad_norm": 13458.285236730762,
"learning_rate": 6.280555661802856e-09,
"logits": -1.2422146797180176,
"logps": -82.28036499023438,
"loss": 16.7571,
"objective": 16.147016525268555,
"ranking_idealized": 0.637499988079071,
"ranking_idealized_expo": 0.5833333134651184,
"ranking_simple": 0.637499988079071,
"regularize": 0.03472811356186867,
"step": 1680,
"wo_beta": 17.648740768432617
},
{
"dpo_loss": 0.5201699733734131,
"epoch": 4.775625885687293,
"grad_norm": 13687.817133347355,
"learning_rate": 5.521422686783294e-09,
"logits": -1.308603286743164,
"logps": -82.1572265625,
"loss": 17.374,
"objective": 18.0618839263916,
"ranking_idealized": 0.5791666507720947,
"ranking_idealized_expo": 0.4583333432674408,
"ranking_simple": 0.5625,
"regularize": 0.039491456001996994,
"step": 1685,
"wo_beta": 14.411401748657227
},
{
"dpo_loss": 0.5319506525993347,
"epoch": 4.7897968823807275,
"grad_norm": 11765.631080020812,
"learning_rate": 4.810916246494157e-09,
"logits": -1.3420146703720093,
"logps": -81.82181549072266,
"loss": 16.2518,
"objective": 15.689167976379395,
"ranking_idealized": 0.6291666626930237,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.612500011920929,
"regularize": 0.03873560577630997,
"step": 1690,
"wo_beta": 15.535360336303711
},
{
"dpo_loss": 0.5226943492889404,
"epoch": 4.803967879074161,
"grad_norm": 15212.188996211064,
"learning_rate": 4.149106211436659e-09,
"logits": -1.205290675163269,
"logps": -81.14673614501953,
"loss": 15.6316,
"objective": 14.224554061889648,
"ranking_idealized": 0.6333333253860474,
"ranking_idealized_expo": 0.5708333253860474,
"ranking_simple": 0.6333333253860474,
"regularize": 0.033357344567775726,
"step": 1695,
"wo_beta": 17.657291412353516
},
{
"dpo_loss": 0.5265616178512573,
"epoch": 4.818138875767596,
"grad_norm": 14041.074803893325,
"learning_rate": 3.5360576633558513e-09,
"logits": -1.3079345226287842,
"logps": -80.5920639038086,
"loss": 16.5799,
"objective": 17.133312225341797,
"ranking_idealized": 0.625,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.6208333373069763,
"regularize": 0.042179401963949203,
"step": 1700,
"wo_beta": 14.612165451049805
},
{
"epoch": 4.818138875767596,
"eval_dpo_loss": 0.6798617839813232,
"eval_logits": -1.2946054935455322,
"eval_logps": -88.42201232910156,
"eval_loss": 187.93597412109375,
"eval_objective": 183.8405303955078,
"eval_ranking_idealized": 0.6024844646453857,
"eval_ranking_idealized_expo": 0.5232919454574585,
"eval_ranking_simple": 0.532608687877655,
"eval_regularize": 0.4168170392513275,
"eval_runtime": 537.7382,
"eval_samples_per_second": 10.767,
"eval_steps_per_second": 0.898,
"eval_wo_beta": 15.996342658996582,
"step": 1700
},
{
"dpo_loss": 0.5323117971420288,
"epoch": 4.83230987246103,
"grad_norm": 12547.78573915316,
"learning_rate": 2.9718308888401767e-09,
"logits": -1.3183315992355347,
"logps": -81.7763442993164,
"loss": 16.0513,
"objective": 17.510692596435547,
"ranking_idealized": 0.6791666746139526,
"ranking_idealized_expo": 0.5833333134651184,
"ranking_simple": 0.6708333492279053,
"regularize": 0.040996309369802475,
"step": 1705,
"wo_beta": 17.88062858581543
},
{
"dpo_loss": 0.5359232425689697,
"epoch": 4.846480869154464,
"grad_norm": 13302.316035438349,
"learning_rate": 2.4564813733932155e-09,
"logits": -1.316437840461731,
"logps": -81.5803451538086,
"loss": 17.058,
"objective": 15.084990501403809,
"ranking_idealized": 0.5916666388511658,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.5874999761581421,
"regularize": 0.0330289825797081,
"step": 1710,
"wo_beta": 14.95897102355957
},
{
"dpo_loss": 0.5298423171043396,
"epoch": 4.860651865847898,
"grad_norm": 13131.732232168924,
"learning_rate": 1.9900597959770505e-09,
"logits": -1.2239762544631958,
"logps": -80.93972778320312,
"loss": 15.5353,
"objective": 14.398134231567383,
"ranking_idealized": 0.5874999761581421,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.5833333134651184,
"regularize": 0.03392880782485008,
"step": 1715,
"wo_beta": 16.470539093017578
},
{
"dpo_loss": 0.5080859065055847,
"epoch": 4.874822862541333,
"grad_norm": 13218.33236233331,
"learning_rate": 1.5726120240288631e-09,
"logits": -1.2538625001907349,
"logps": -80.96495819091797,
"loss": 16.0016,
"objective": 18.5091552734375,
"ranking_idealized": 0.612500011920929,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.5874999761581421,
"regularize": 0.045930005609989166,
"step": 1720,
"wo_beta": 17.185333251953125
},
{
"dpo_loss": 0.5118470788002014,
"epoch": 4.888993859234766,
"grad_norm": 12268.487941087904,
"learning_rate": 1.2041791089499875e-09,
"logits": -1.279910683631897,
"logps": -79.85582733154297,
"loss": 13.4289,
"objective": 14.366524696350098,
"ranking_idealized": 0.6416666507720947,
"ranking_idealized_expo": 0.5708333253860474,
"ranking_simple": 0.6416666507720947,
"regularize": 0.03632321581244469,
"step": 1725,
"wo_beta": 17.114274978637695
},
{
"dpo_loss": 0.5286470055580139,
"epoch": 4.903164855928201,
"grad_norm": 11950.336190164535,
"learning_rate": 8.847972820693051e-10,
"logits": -1.2914131879806519,
"logps": -80.19400787353516,
"loss": 16.9458,
"objective": 18.679357528686523,
"ranking_idealized": 0.5416666865348816,
"ranking_idealized_expo": 0.4416666626930237,
"ranking_simple": 0.5249999761581421,
"regularize": 0.04655119404196739,
"step": 1730,
"wo_beta": 14.276873588562012
},
{
"dpo_loss": 0.528618574142456,
"epoch": 4.917335852621634,
"grad_norm": 12737.302460928488,
"learning_rate": 6.144979510802062e-10,
"logits": -1.4132698774337769,
"logps": -82.34892272949219,
"loss": 18.3815,
"objective": 18.776357650756836,
"ranking_idealized": 0.6458333134651184,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.637499988079071,
"regularize": 0.047753263264894485,
"step": 1735,
"wo_beta": 15.833959579467773
},
{
"dpo_loss": 0.5292457938194275,
"epoch": 4.931506849315069,
"grad_norm": 13241.609695831672,
"learning_rate": 3.933076969516724e-10,
"logits": -1.2396830320358276,
"logps": -81.28510284423828,
"loss": 15.2755,
"objective": 15.8608980178833,
"ranking_idealized": 0.5874999761581421,
"ranking_idealized_expo": 0.5375000238418579,
"ranking_simple": 0.5916666388511658,
"regularize": 0.041682373732328415,
"step": 1740,
"wo_beta": 15.47945499420166
},
{
"dpo_loss": 0.5308272838592529,
"epoch": 4.945677846008502,
"grad_norm": 12128.166835896209,
"learning_rate": 2.212482713149222e-10,
"logits": -1.2960669994354248,
"logps": -80.84746551513672,
"loss": 15.3037,
"objective": 12.663678169250488,
"ranking_idealized": 0.6083333492279053,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.6041666865348816,
"regularize": 0.03369910642504692,
"step": 1745,
"wo_beta": 16.19184112548828
},
{
"dpo_loss": 0.5277208089828491,
"epoch": 4.959848842701937,
"grad_norm": 12921.297125323947,
"learning_rate": 9.833659432367803e-11,
"logits": -1.2565745115280151,
"logps": -82.744873046875,
"loss": 16.689,
"objective": 16.856407165527344,
"ranking_idealized": 0.5874999761581421,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.5874999761581421,
"regularize": 0.04240218922495842,
"step": 1750,
"wo_beta": 16.752824783325195
},
{
"epoch": 4.959848842701937,
"eval_dpo_loss": 0.6798657774925232,
"eval_logits": -1.2945247888565063,
"eval_logps": -88.4161605834961,
"eval_loss": 187.94732666015625,
"eval_objective": 183.85096740722656,
"eval_ranking_idealized": 0.6024844646453857,
"eval_ranking_idealized_expo": 0.5232919454574585,
"eval_ranking_simple": 0.532608687877655,
"eval_regularize": 0.4168415367603302,
"eval_runtime": 526.9139,
"eval_samples_per_second": 10.989,
"eval_steps_per_second": 0.917,
"eval_wo_beta": 15.995292663574219,
"step": 1750
},
{
"dpo_loss": 0.5391930937767029,
"epoch": 4.974019839395371,
"grad_norm": 11466.754753582296,
"learning_rate": 2.4584752990997048e-11,
"logits": -1.29628324508667,
"logps": -82.2157211303711,
"loss": 14.7634,
"objective": 15.14171314239502,
"ranking_idealized": 0.6041666865348816,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.6041666865348816,
"regularize": 0.04220600798726082,
"step": 1755,
"wo_beta": 16.21957778930664
},
{
"dpo_loss": 0.525145411491394,
"epoch": 4.988190836088805,
"grad_norm": 12278.79483067917,
"learning_rate": 0.0,
"logits": -1.2211812734603882,
"logps": -82.23439025878906,
"loss": 15.7539,
"objective": 15.124394416809082,
"ranking_idealized": 0.6291666626930237,
"ranking_idealized_expo": 0.5416666865348816,
"ranking_simple": 0.6208333373069763,
"regularize": 0.035570546984672546,
"step": 1760,
"wo_beta": 17.11547088623047
},
{
"epoch": 4.988190836088805,
"step": 1760,
"total_flos": 0.0,
"train_loss": 67.88850653388283,
"train_runtime": 74214.1269,
"train_samples_per_second": 3.423,
"train_steps_per_second": 0.024
}
],
"logging_steps": 5,
"max_steps": 1760,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": false,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}