qwen2.5-0.5b-expo-DPO-ES-TRY2 / trainer_state.json
hZzy's picture
Model save
7f977a6 verified
{
"best_metric": 0.5423553586006165,
"best_model_checkpoint": "./qwen2.5-0.5b/qwen2.5-0.5b-expo-DPO-ES-TRY/checkpoint-371",
"epoch": 2.9976381672177608,
"eval_steps": 53,
"global_step": 528,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"dpo_loss": 0.6931471824645996,
"dpo_wo_beta": -0.6931471824645996,
"epoch": 0.005668398677373642,
"grad_norm": 13.433600669124935,
"learning_rate": 9.433962264150944e-08,
"logits": -1.3874311447143555,
"logps": -88.43561553955078,
"loss": 0.6931,
"objective": 0.6931471824645996,
"ranking_idealized": 0.625,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.5208333134651184,
"regularize": 0.6931471824645996,
"step": 1
},
{
"dpo_loss": 0.693236768245697,
"dpo_wo_beta": -0.6993356347084045,
"epoch": 0.02834199338686821,
"grad_norm": 13.640653628388394,
"learning_rate": 4.716981132075472e-07,
"logits": -1.4090652465820312,
"logps": -84.34337615966797,
"loss": 0.693,
"objective": 0.693236768245697,
"ranking_idealized": 0.6197916865348816,
"ranking_idealized_expo": 0.546875,
"ranking_simple": 0.546875,
"regularize": 0.693236768245697,
"step": 5
},
{
"dpo_loss": 0.6845630407333374,
"dpo_wo_beta": -0.7111619710922241,
"epoch": 0.05668398677373642,
"grad_norm": 12.626074407134174,
"learning_rate": 9.433962264150944e-07,
"logits": -1.4784893989562988,
"logps": -81.94055938720703,
"loss": 0.6892,
"objective": 0.6845630407333374,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.512499988079071,
"regularize": 0.6845630407333374,
"step": 10
},
{
"dpo_loss": 0.6825469136238098,
"dpo_wo_beta": -0.8259204626083374,
"epoch": 0.08502598016060463,
"grad_norm": 12.374180595083178,
"learning_rate": 1.4150943396226415e-06,
"logits": -1.4932299852371216,
"logps": -81.52880096435547,
"loss": 0.6814,
"objective": 0.6825469136238098,
"ranking_idealized": 0.5916666388511658,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.512499988079071,
"regularize": 0.6825469136238098,
"step": 15
},
{
"dpo_loss": 0.6950914263725281,
"dpo_wo_beta": -1.2390469312667847,
"epoch": 0.11336797354747284,
"grad_norm": 14.839934392200913,
"learning_rate": 1.8867924528301889e-06,
"logits": -1.5371100902557373,
"logps": -82.72624969482422,
"loss": 0.6711,
"objective": 0.6950914263725281,
"ranking_idealized": 0.625,
"ranking_idealized_expo": 0.5375000238418579,
"ranking_simple": 0.5249999761581421,
"regularize": 0.6950914263725281,
"step": 20
},
{
"dpo_loss": 0.6556071043014526,
"dpo_wo_beta": -1.110619068145752,
"epoch": 0.14170996693434104,
"grad_norm": 12.89805052529156,
"learning_rate": 2.358490566037736e-06,
"logits": -1.6399922370910645,
"logps": -81.59695434570312,
"loss": 0.6589,
"objective": 0.6556071043014526,
"ranking_idealized": 0.5958333611488342,
"ranking_idealized_expo": 0.4958333373069763,
"ranking_simple": 0.5249999761581421,
"regularize": 0.6556071043014526,
"step": 25
},
{
"dpo_loss": 0.6518108248710632,
"dpo_wo_beta": -1.2506839036941528,
"epoch": 0.17005196032120926,
"grad_norm": 12.64998937636519,
"learning_rate": 2.830188679245283e-06,
"logits": -1.6404598951339722,
"logps": -83.20111846923828,
"loss": 0.6451,
"objective": 0.6518108248710632,
"ranking_idealized": 0.6041666865348816,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.5666666626930237,
"regularize": 0.6518108248710632,
"step": 30
},
{
"dpo_loss": 0.6226770877838135,
"dpo_wo_beta": -1.394917368888855,
"epoch": 0.19839395370807747,
"grad_norm": 13.760162421635227,
"learning_rate": 3.30188679245283e-06,
"logits": -1.6237396001815796,
"logps": -87.80964660644531,
"loss": 0.6189,
"objective": 0.6226770877838135,
"ranking_idealized": 0.612500011920929,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.5583333373069763,
"regularize": 0.6226770877838135,
"step": 35
},
{
"dpo_loss": 0.5924390554428101,
"dpo_wo_beta": -1.422450304031372,
"epoch": 0.22673594709494568,
"grad_norm": 16.810886476613117,
"learning_rate": 3.7735849056603777e-06,
"logits": -1.620682954788208,
"logps": -91.93690490722656,
"loss": 0.6076,
"objective": 0.5924390554428101,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.5708333253860474,
"regularize": 0.5924390554428101,
"step": 40
},
{
"dpo_loss": 0.573756217956543,
"dpo_wo_beta": -1.3691534996032715,
"epoch": 0.25507794048181387,
"grad_norm": 13.798774501924722,
"learning_rate": 4.245283018867925e-06,
"logits": -1.7814558744430542,
"logps": -92.24474334716797,
"loss": 0.5989,
"objective": 0.573756217956543,
"ranking_idealized": 0.6208333373069763,
"ranking_idealized_expo": 0.5375000238418579,
"ranking_simple": 0.5666666626930237,
"regularize": 0.573756217956543,
"step": 45
},
{
"dpo_loss": 0.5726417899131775,
"dpo_wo_beta": -1.3605374097824097,
"epoch": 0.2834199338686821,
"grad_norm": 12.568473894025988,
"learning_rate": 4.716981132075472e-06,
"logits": -1.808895468711853,
"logps": -90.65751647949219,
"loss": 0.5954,
"objective": 0.5726417899131775,
"ranking_idealized": 0.5416666865348816,
"ranking_idealized_expo": 0.42500001192092896,
"ranking_simple": 0.574999988079071,
"regularize": 0.5726417899131775,
"step": 50
},
{
"epoch": 0.300425129900803,
"eval_dpo_loss": 0.7112604975700378,
"eval_dpo_wo_beta": -2.2659413814544678,
"eval_logits": -1.892814040184021,
"eval_logps": -101.36742401123047,
"eval_loss": 0.6816489100456238,
"eval_objective": 0.7112604975700378,
"eval_ranking_idealized": 0.5888429880142212,
"eval_ranking_idealized_expo": 0.5092975497245789,
"eval_ranking_simple": 0.5237603187561035,
"eval_regularize": 0.7112604975700378,
"eval_runtime": 211.6587,
"eval_samples_per_second": 27.355,
"eval_steps_per_second": 1.143,
"step": 53
},
{
"dpo_loss": 0.5827316045761108,
"dpo_wo_beta": -1.6213361024856567,
"epoch": 0.3117619272555503,
"grad_norm": 14.442715913160086,
"learning_rate": 4.999781286194085e-06,
"logits": -1.8762638568878174,
"logps": -93.41423797607422,
"loss": 0.5721,
"objective": 0.5827316045761108,
"ranking_idealized": 0.625,
"ranking_idealized_expo": 0.5416666865348816,
"ranking_simple": 0.6041666865348816,
"regularize": 0.5827316045761108,
"step": 55
},
{
"dpo_loss": 0.5553872585296631,
"dpo_wo_beta": -1.6468366384506226,
"epoch": 0.3401039206424185,
"grad_norm": 13.845514282811145,
"learning_rate": 4.997321195347154e-06,
"logits": -1.8914529085159302,
"logps": -90.59642028808594,
"loss": 0.5756,
"objective": 0.5553872585296631,
"ranking_idealized": 0.6333333253860474,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.5791666507720947,
"regularize": 0.5553872585296631,
"step": 60
},
{
"dpo_loss": 0.5302771329879761,
"dpo_wo_beta": -1.3166770935058594,
"epoch": 0.3684459140292867,
"grad_norm": 10.846857687148022,
"learning_rate": 4.992130320438411e-06,
"logits": -1.8399535417556763,
"logps": -86.60197448730469,
"loss": 0.5586,
"objective": 0.5302771329879761,
"ranking_idealized": 0.5708333253860474,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.6208333373069763,
"regularize": 0.5302771329879761,
"step": 65
},
{
"dpo_loss": 0.5711485743522644,
"dpo_wo_beta": -1.7437169551849365,
"epoch": 0.39678790741615494,
"grad_norm": 13.787840238803502,
"learning_rate": 4.984214337613357e-06,
"logits": -1.8178967237472534,
"logps": -91.10688781738281,
"loss": 0.5701,
"objective": 0.5711485743522644,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5874999761581421,
"regularize": 0.5711485743522644,
"step": 70
},
{
"dpo_loss": 0.523643434047699,
"dpo_wo_beta": -1.669514536857605,
"epoch": 0.42512990080302315,
"grad_norm": 13.192298437287352,
"learning_rate": 4.97358190288299e-06,
"logits": -1.8182169198989868,
"logps": -94.8000717163086,
"loss": 0.5205,
"objective": 0.523643434047699,
"ranking_idealized": 0.625,
"ranking_idealized_expo": 0.5458333492279053,
"ranking_simple": 0.6583333611488342,
"regularize": 0.523643434047699,
"step": 75
},
{
"dpo_loss": 0.51079261302948,
"dpo_wo_beta": -1.7271808385849,
"epoch": 0.45347189418989137,
"grad_norm": 15.151373786996814,
"learning_rate": 4.9602446426585845e-06,
"logits": -1.8920824527740479,
"logps": -93.58238220214844,
"loss": 0.5285,
"objective": 0.51079261302948,
"ranking_idealized": 0.5833333134651184,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.6458333134651184,
"regularize": 0.51079261302948,
"step": 80
},
{
"dpo_loss": 0.5066012144088745,
"dpo_wo_beta": -1.5956443548202515,
"epoch": 0.4818138875767596,
"grad_norm": 12.328960275584794,
"learning_rate": 4.944217141038379e-06,
"logits": -1.8741406202316284,
"logps": -87.06742858886719,
"loss": 0.5202,
"objective": 0.5066012144088745,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.6208333373069763,
"regularize": 0.5066012144088745,
"step": 85
},
{
"dpo_loss": 0.5358369946479797,
"dpo_wo_beta": -1.9357556104660034,
"epoch": 0.5101558809636277,
"grad_norm": 12.694483590051824,
"learning_rate": 4.925516923860083e-06,
"logits": -1.7968534231185913,
"logps": -86.77802276611328,
"loss": 0.4858,
"objective": 0.5358369946479797,
"ranking_idealized": 0.5458333492279053,
"ranking_idealized_expo": 0.4749999940395355,
"ranking_simple": 0.5874999761581421,
"regularize": 0.5358369946479797,
"step": 90
},
{
"dpo_loss": 0.4783257842063904,
"dpo_wo_beta": -1.9098786115646362,
"epoch": 0.538497874350496,
"grad_norm": 14.474706973531484,
"learning_rate": 4.904164439536626e-06,
"logits": -1.8568389415740967,
"logps": -88.12813568115234,
"loss": 0.4865,
"objective": 0.4783257842063904,
"ranking_idealized": 0.6291666626930237,
"ranking_idealized_expo": 0.5458333492279053,
"ranking_simple": 0.6916666626930237,
"regularize": 0.4783257842063904,
"step": 95
},
{
"dpo_loss": 0.4654810130596161,
"dpo_wo_beta": -1.9254087209701538,
"epoch": 0.5668398677373642,
"grad_norm": 13.577084707122001,
"learning_rate": 4.880183036696123e-06,
"logits": -1.938937783241272,
"logps": -92.29436492919922,
"loss": 0.5016,
"objective": 0.4654810130596161,
"ranking_idealized": 0.612500011920929,
"ranking_idealized_expo": 0.5416666865348816,
"ranking_simple": 0.6875,
"regularize": 0.4654810130596161,
"step": 100
},
{
"dpo_loss": 0.4374677240848541,
"dpo_wo_beta": -1.4267934560775757,
"epoch": 0.5951818611242324,
"grad_norm": 11.14545328639218,
"learning_rate": 4.853598938650487e-06,
"logits": -1.8158982992172241,
"logps": -90.21449279785156,
"loss": 0.4618,
"objective": 0.4374677240848541,
"ranking_idealized": 0.625,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.6666666865348816,
"regularize": 0.4374677240848541,
"step": 105
},
{
"epoch": 0.600850259801606,
"eval_dpo_loss": 0.6936022639274597,
"eval_dpo_wo_beta": -2.462427854537964,
"eval_logits": -1.9007418155670166,
"eval_logps": -94.35714721679688,
"eval_loss": 0.6912521123886108,
"eval_objective": 0.6936022639274597,
"eval_ranking_idealized": 0.5888429880142212,
"eval_ranking_idealized_expo": 0.5092975497245789,
"eval_ranking_simple": 0.5351239442825317,
"eval_regularize": 0.6936022639274597,
"eval_runtime": 210.2297,
"eval_samples_per_second": 27.541,
"eval_steps_per_second": 1.151,
"step": 106
},
{
"dpo_loss": 0.47933149337768555,
"dpo_wo_beta": -1.9683055877685547,
"epoch": 0.6235238545111006,
"grad_norm": 12.39392340166307,
"learning_rate": 4.824441214720629e-06,
"logits": -1.9334439039230347,
"logps": -87.35523223876953,
"loss": 0.4633,
"objective": 0.47933149337768555,
"ranking_idealized": 0.5708333253860474,
"ranking_idealized_expo": 0.4541666805744171,
"ranking_simple": 0.6625000238418579,
"regularize": 0.47933149337768555,
"step": 110
},
{
"dpo_loss": 0.4749464690685272,
"dpo_wo_beta": -1.7375919818878174,
"epoch": 0.6518658478979689,
"grad_norm": 12.612865651893962,
"learning_rate": 4.7927417484495756e-06,
"logits": -1.9057692289352417,
"logps": -87.68991088867188,
"loss": 0.4712,
"objective": 0.4749464690685272,
"ranking_idealized": 0.574999988079071,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.6333333253860474,
"regularize": 0.4749464690685272,
"step": 115
},
{
"dpo_loss": 0.4848935306072235,
"dpo_wo_beta": -1.9273093938827515,
"epoch": 0.680207841284837,
"grad_norm": 13.836239066838136,
"learning_rate": 4.758535202738287e-06,
"logits": -1.8775906562805176,
"logps": -87.8878173828125,
"loss": 0.4641,
"objective": 0.4848935306072235,
"ranking_idealized": 0.5874999761581421,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.6625000238418579,
"regularize": 0.4848935306072235,
"step": 120
},
{
"dpo_loss": 0.4785127639770508,
"dpo_wo_beta": -1.814666748046875,
"epoch": 0.7085498346717053,
"grad_norm": 12.105170057238437,
"learning_rate": 4.721858981942284e-06,
"logits": -1.8346068859100342,
"logps": -86.40522766113281,
"loss": 0.4801,
"objective": 0.4785127639770508,
"ranking_idealized": 0.625,
"ranking_idealized_expo": 0.550000011920929,
"ranking_simple": 0.6875,
"regularize": 0.4785127639770508,
"step": 125
},
{
"dpo_loss": 0.4548089802265167,
"dpo_wo_beta": -1.4164987802505493,
"epoch": 0.7368918280585735,
"grad_norm": 11.895980627109102,
"learning_rate": 4.682753190970533e-06,
"logits": -1.9488608837127686,
"logps": -79.42195129394531,
"loss": 0.4538,
"objective": 0.4548089802265167,
"ranking_idealized": 0.5416666865348816,
"ranking_idealized_expo": 0.44999998807907104,
"ranking_simple": 0.6291666626930237,
"regularize": 0.4548089802265167,
"step": 130
},
{
"dpo_loss": 0.49760884046554565,
"dpo_wo_beta": -1.994195818901062,
"epoch": 0.7652338214454416,
"grad_norm": 12.298776298341995,
"learning_rate": 4.641260591431315e-06,
"logits": -1.9813282489776611,
"logps": -82.40634155273438,
"loss": 0.4433,
"objective": 0.49760884046554565,
"ranking_idealized": 0.5874999761581421,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.6583333611488342,
"regularize": 0.49760884046554565,
"step": 135
},
{
"dpo_loss": 0.41459351778030396,
"dpo_wo_beta": -1.187635064125061,
"epoch": 0.7935758148323099,
"grad_norm": 12.618720178096575,
"learning_rate": 4.597426554873037e-06,
"logits": -1.97609281539917,
"logps": -83.44467163085938,
"loss": 0.4236,
"objective": 0.41459351778030396,
"ranking_idealized": 0.5791666507720947,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.6791666746139526,
"regularize": 0.41459351778030396,
"step": 140
},
{
"dpo_loss": 0.4073801636695862,
"dpo_wo_beta": -1.311059832572937,
"epoch": 0.821917808219178,
"grad_norm": 14.417917904409194,
"learning_rate": 4.551299013171111e-06,
"logits": -2.0718839168548584,
"logps": -84.2674560546875,
"loss": 0.4215,
"objective": 0.4073801636695862,
"ranking_idealized": 0.5791666507720947,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.6916666626930237,
"regularize": 0.4073801636695862,
"step": 145
},
{
"dpo_loss": 0.4207518398761749,
"dpo_wo_beta": -1.50857675075531,
"epoch": 0.8502598016060463,
"grad_norm": 11.543599868064442,
"learning_rate": 4.502928406115152e-06,
"logits": -2.0730583667755127,
"logps": -82.68958282470703,
"loss": 0.4276,
"objective": 0.4207518398761749,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.5416666865348816,
"ranking_simple": 0.7208333611488342,
"regularize": 0.4207518398761749,
"step": 150
},
{
"dpo_loss": 0.3847941756248474,
"dpo_wo_beta": -1.4449684619903564,
"epoch": 0.8786017949929145,
"grad_norm": 12.08771803065001,
"learning_rate": 4.452367626253805e-06,
"logits": -2.0991933345794678,
"logps": -85.211181640625,
"loss": 0.3986,
"objective": 0.3847941756248474,
"ranking_idealized": 0.612500011920929,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.7250000238418579,
"regularize": 0.3847941756248474,
"step": 155
},
{
"epoch": 0.9012753897024091,
"eval_dpo_loss": 0.7214789390563965,
"eval_dpo_wo_beta": -3.1229145526885986,
"eval_logits": -2.1450352668762207,
"eval_logps": -95.60012817382812,
"eval_loss": 0.7013870477676392,
"eval_objective": 0.7214789390563965,
"eval_ranking_idealized": 0.5888429880142212,
"eval_ranking_idealized_expo": 0.5092975497245789,
"eval_ranking_simple": 0.5351239442825317,
"eval_regularize": 0.7214789390563965,
"eval_runtime": 210.3593,
"eval_samples_per_second": 27.524,
"eval_steps_per_second": 1.15,
"step": 159
},
{
"dpo_loss": 0.4162478744983673,
"dpo_wo_beta": -1.6461573839187622,
"epoch": 0.9069437883797827,
"grad_norm": 12.82345397067452,
"learning_rate": 4.399671961057523e-06,
"logits": -2.0759384632110596,
"logps": -89.25846862792969,
"loss": 0.4236,
"objective": 0.4162478744983673,
"ranking_idealized": 0.5708333253860474,
"ranking_idealized_expo": 0.49166667461395264,
"ranking_simple": 0.699999988079071,
"regularize": 0.4162478744983673,
"step": 160
},
{
"dpo_loss": 0.41358453035354614,
"dpo_wo_beta": -1.648630976676941,
"epoch": 0.9352857817666509,
"grad_norm": 12.860537676624453,
"learning_rate": 4.3448990324625244e-06,
"logits": -2.024477481842041,
"logps": -88.03329467773438,
"loss": 0.4026,
"objective": 0.41358453035354614,
"ranking_idealized": 0.6041666865348816,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.7333333492279053,
"regularize": 0.41358453035354614,
"step": 165
},
{
"dpo_loss": 0.378000408411026,
"dpo_wo_beta": -1.2966532707214355,
"epoch": 0.9636277751535192,
"grad_norm": 11.533711130228069,
"learning_rate": 4.288108733862064e-06,
"logits": -2.042527437210083,
"logps": -90.26854705810547,
"loss": 0.3925,
"objective": 0.378000408411026,
"ranking_idealized": 0.5791666507720947,
"ranking_idealized_expo": 0.512499988079071,
"ranking_simple": 0.7166666388511658,
"regularize": 0.378000408411026,
"step": 170
},
{
"dpo_loss": 0.3764660954475403,
"dpo_wo_beta": -1.3978971242904663,
"epoch": 0.9919697685403873,
"grad_norm": 12.165192869157089,
"learning_rate": 4.229363164613874e-06,
"logits": -2.0610477924346924,
"logps": -89.8354721069336,
"loss": 0.3793,
"objective": 0.3764660954475403,
"ranking_idealized": 0.6541666388511658,
"ranking_idealized_expo": 0.574999988079071,
"ranking_simple": 0.7916666865348816,
"regularize": 0.3764660954475403,
"step": 175
},
{
"dpo_loss": 0.27626773715019226,
"dpo_wo_beta": -0.8504549860954285,
"epoch": 1.0203117619272555,
"grad_norm": 10.141692447282386,
"learning_rate": 4.168726562135432e-06,
"logits": -2.2514243125915527,
"logps": -90.8476333618164,
"loss": 0.2852,
"objective": 0.27626773715019226,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.5416666865348816,
"ranking_simple": 0.8083333373069763,
"regularize": 0.27626773715019226,
"step": 180
},
{
"dpo_loss": 0.23696589469909668,
"dpo_wo_beta": -0.6947117447853088,
"epoch": 1.0486537553141237,
"grad_norm": 13.78702272812957,
"learning_rate": 4.106265231661292e-06,
"logits": -2.158977746963501,
"logps": -95.00120544433594,
"loss": 0.2429,
"objective": 0.23696589469909668,
"ranking_idealized": 0.5833333134651184,
"ranking_idealized_expo": 0.49166667461395264,
"ranking_simple": 0.8083333373069763,
"regularize": 0.23696589469909668,
"step": 185
},
{
"dpo_loss": 0.26388806104660034,
"dpo_wo_beta": -0.9112051725387573,
"epoch": 1.076995748700992,
"grad_norm": 14.740228375586371,
"learning_rate": 4.042047473739278e-06,
"logits": -2.1533920764923096,
"logps": -101.71949768066406,
"loss": 0.2517,
"objective": 0.26388806104660034,
"ranking_idealized": 0.625,
"ranking_idealized_expo": 0.5541666746139526,
"ranking_simple": 0.8416666388511658,
"regularize": 0.26388806104660034,
"step": 190
},
{
"dpo_loss": 0.2244579941034317,
"dpo_wo_beta": -0.6430780291557312,
"epoch": 1.10533774208786,
"grad_norm": 10.169064121599527,
"learning_rate": 3.976143509544843e-06,
"logits": -2.1589295864105225,
"logps": -96.5248031616211,
"loss": 0.2467,
"objective": 0.2244579941034317,
"ranking_idealized": 0.6333333253860474,
"ranking_idealized_expo": 0.5666666626930237,
"ranking_simple": 0.8083333373069763,
"regularize": 0.2244579941034317,
"step": 195
},
{
"dpo_loss": 0.24179764091968536,
"dpo_wo_beta": -0.6332272291183472,
"epoch": 1.1336797354747283,
"grad_norm": 9.444774343787891,
"learning_rate": 3.908625404095242e-06,
"logits": -2.2753493785858154,
"logps": -91.93312072753906,
"loss": 0.2563,
"objective": 0.24179764091968536,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.8458333611488342,
"regularize": 0.24179764091968536,
"step": 200
},
{
"dpo_loss": 0.25683078169822693,
"dpo_wo_beta": -0.8531176447868347,
"epoch": 1.1620217288615966,
"grad_norm": 9.240319326762517,
"learning_rate": 3.839566987447492e-06,
"logits": -2.2432618141174316,
"logps": -91.3159408569336,
"loss": 0.2584,
"objective": 0.25683078169822693,
"ranking_idealized": 0.5708333253860474,
"ranking_idealized_expo": 0.49166667461395264,
"ranking_simple": 0.8166666626930237,
"regularize": 0.25683078169822693,
"step": 205
},
{
"dpo_loss": 0.24292893707752228,
"dpo_wo_beta": -0.8205318450927734,
"epoch": 1.1903637222484649,
"grad_norm": 9.283856100785183,
"learning_rate": 3.7690437739662928e-06,
"logits": -2.2361652851104736,
"logps": -90.6613998413086,
"loss": 0.2551,
"objective": 0.24292893707752228,
"ranking_idealized": 0.5833333134651184,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.800000011920929,
"regularize": 0.24292893707752228,
"step": 210
},
{
"epoch": 1.201700519603212,
"eval_dpo_loss": 0.7525234222412109,
"eval_dpo_wo_beta": -3.7749528884887695,
"eval_logits": -2.267778158187866,
"eval_logps": -98.14269256591797,
"eval_loss": 0.7350714206695557,
"eval_objective": 0.7525234222412109,
"eval_ranking_idealized": 0.5888429880142212,
"eval_ranking_idealized_expo": 0.5092975497245789,
"eval_ranking_simple": 0.5371900796890259,
"eval_regularize": 0.7525234222412109,
"eval_runtime": 210.8898,
"eval_samples_per_second": 27.455,
"eval_steps_per_second": 1.148,
"step": 212
},
{
"dpo_loss": 0.289533793926239,
"dpo_wo_beta": -0.8810125589370728,
"epoch": 1.2187057156353331,
"grad_norm": 10.72372972136692,
"learning_rate": 3.697132879750174e-06,
"logits": -2.1757090091705322,
"logps": -93.64250183105469,
"loss": 0.2578,
"objective": 0.289533793926239,
"ranking_idealized": 0.5583333373069763,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.7875000238418579,
"regularize": 0.289533793926239,
"step": 215
},
{
"dpo_loss": 0.25134381651878357,
"dpo_wo_beta": -0.8703542947769165,
"epoch": 1.2470477090222012,
"grad_norm": 12.940604838816247,
"learning_rate": 3.6239129383061764e-06,
"logits": -2.121750593185425,
"logps": -94.44015502929688,
"loss": 0.2676,
"objective": 0.25134381651878357,
"ranking_idealized": 0.6541666388511658,
"ranking_idealized_expo": 0.6041666865348816,
"ranking_simple": 0.8208333253860474,
"regularize": 0.25134381651878357,
"step": 220
},
{
"dpo_loss": 0.23937886953353882,
"dpo_wo_beta": -0.7396827936172485,
"epoch": 1.2753897024090695,
"grad_norm": 9.645711793319885,
"learning_rate": 3.5494640145652647e-06,
"logits": -2.0901684761047363,
"logps": -94.10260772705078,
"loss": 0.2637,
"objective": 0.23937886953353882,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.8541666865348816,
"regularize": 0.23937886953353882,
"step": 225
},
{
"dpo_loss": 0.2818019688129425,
"dpo_wo_beta": -1.1170729398727417,
"epoch": 1.3037316957959377,
"grad_norm": 8.80210598601974,
"learning_rate": 3.4738675173325008e-06,
"logits": -1.9860222339630127,
"logps": -92.9978256225586,
"loss": 0.2776,
"objective": 0.2818019688129425,
"ranking_idealized": 0.5791666507720947,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.7749999761581421,
"regularize": 0.2818019688129425,
"step": 230
},
{
"dpo_loss": 0.22621506452560425,
"dpo_wo_beta": -0.35843732953071594,
"epoch": 1.3320736891828058,
"grad_norm": 9.267612473930496,
"learning_rate": 3.397206110267713e-06,
"logits": -2.1131467819213867,
"logps": -87.49403381347656,
"loss": 0.2618,
"objective": 0.22621506452560425,
"ranking_idealized": 0.6041666865348816,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.8333333134651184,
"regularize": 0.22621506452560425,
"step": 235
},
{
"dpo_loss": 0.23632274568080902,
"dpo_wo_beta": -0.6697984933853149,
"epoch": 1.360415682569674,
"grad_norm": 10.68594080832048,
"learning_rate": 3.3195636214939943e-06,
"logits": -2.130047559738159,
"logps": -91.7619857788086,
"loss": 0.2584,
"objective": 0.23632274568080902,
"ranking_idealized": 0.5541666746139526,
"ranking_idealized_expo": 0.4791666567325592,
"ranking_simple": 0.8125,
"regularize": 0.23632274568080902,
"step": 240
},
{
"dpo_loss": 0.2982023358345032,
"dpo_wo_beta": -1.1124054193496704,
"epoch": 1.3887576759565423,
"grad_norm": 10.330360151122868,
"learning_rate": 3.2410249519328848e-06,
"logits": -2.1718757152557373,
"logps": -93.45353698730469,
"loss": 0.2692,
"objective": 0.2982023358345032,
"ranking_idealized": 0.5916666388511658,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.8041666746139526,
"regularize": 0.2982023358345032,
"step": 245
},
{
"dpo_loss": 0.2403133064508438,
"dpo_wo_beta": -0.7000442147254944,
"epoch": 1.4170996693434104,
"grad_norm": 13.026578288520353,
"learning_rate": 3.1616759824664543e-06,
"logits": -2.145325183868408,
"logps": -94.18195343017578,
"loss": 0.269,
"objective": 0.2403133064508438,
"ranking_idealized": 0.574999988079071,
"ranking_idealized_expo": 0.47083333134651184,
"ranking_simple": 0.824999988079071,
"regularize": 0.2403133064508438,
"step": 250
},
{
"dpo_loss": 0.23977436125278473,
"dpo_wo_beta": -0.5784927010536194,
"epoch": 1.4454416627302786,
"grad_norm": 10.959901566104394,
"learning_rate": 3.081603480027826e-06,
"logits": -2.108074426651001,
"logps": -94.5383529663086,
"loss": 0.2625,
"objective": 0.23977436125278473,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.5458333492279053,
"ranking_simple": 0.8374999761581421,
"regularize": 0.23977436125278473,
"step": 255
},
{
"dpo_loss": 0.25297579169273376,
"dpo_wo_beta": -0.7996426820755005,
"epoch": 1.473783656117147,
"grad_norm": 9.578050078679867,
"learning_rate": 3.0008950027228035e-06,
"logits": -2.1828908920288086,
"logps": -92.77781677246094,
"loss": 0.232,
"objective": 0.25297579169273376,
"ranking_idealized": 0.5666666626930237,
"ranking_idealized_expo": 0.48750001192092896,
"ranking_simple": 0.8041666746139526,
"regularize": 0.25297579169273376,
"step": 260
},
{
"dpo_loss": 0.2830916941165924,
"dpo_wo_beta": -1.124144434928894,
"epoch": 1.5021256495040152,
"grad_norm": 11.27765707111355,
"learning_rate": 2.9196388040863695e-06,
"logits": -2.1150081157684326,
"logps": -95.04662322998047,
"loss": 0.2623,
"objective": 0.2830916941165924,
"ranking_idealized": 0.5791666507720947,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.8041666746139526,
"regularize": 0.2830916941165924,
"step": 265
},
{
"epoch": 1.5021256495040152,
"eval_dpo_loss": 0.7739136815071106,
"eval_dpo_wo_beta": -4.163427829742432,
"eval_logits": -2.1478331089019775,
"eval_logps": -100.8313217163086,
"eval_loss": 0.7400166392326355,
"eval_objective": 0.7739136815071106,
"eval_ranking_idealized": 0.5888429880142212,
"eval_ranking_idealized_expo": 0.5092975497245789,
"eval_ranking_simple": 0.53925621509552,
"eval_regularize": 0.7739136815071106,
"eval_runtime": 210.8657,
"eval_samples_per_second": 27.458,
"eval_steps_per_second": 1.148,
"step": 265
},
{
"dpo_loss": 0.2930367887020111,
"dpo_wo_beta": -1.3651045560836792,
"epoch": 1.5304676428908834,
"grad_norm": 11.715215816813723,
"learning_rate": 2.8379237365787426e-06,
"logits": -2.035703182220459,
"logps": -97.7331771850586,
"loss": 0.253,
"objective": 0.2930367887020111,
"ranking_idealized": 0.574999988079071,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.7916666865348816,
"regularize": 0.2930367887020111,
"step": 270
},
{
"dpo_loss": 0.24886849522590637,
"dpo_wo_beta": -0.8069366216659546,
"epoch": 1.5588096362777515,
"grad_norm": 8.958944325794365,
"learning_rate": 2.7558391544265127e-06,
"logits": -1.9700883626937866,
"logps": -97.53855895996094,
"loss": 0.2491,
"objective": 0.24886849522590637,
"ranking_idealized": 0.6208333373069763,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.8125,
"regularize": 0.24886849522590637,
"step": 275
},
{
"dpo_loss": 0.22936613857746124,
"dpo_wo_beta": -0.6120084524154663,
"epoch": 1.5871516296646198,
"grad_norm": 10.814739938498821,
"learning_rate": 2.6734748159151104e-06,
"logits": -1.9118597507476807,
"logps": -98.06639099121094,
"loss": 0.2491,
"objective": 0.22936613857746124,
"ranking_idealized": 0.574999988079071,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.8374999761581421,
"regularize": 0.22936613857746124,
"step": 280
},
{
"dpo_loss": 0.22401383519172668,
"dpo_wo_beta": -0.5180224776268005,
"epoch": 1.615493623051488,
"grad_norm": 11.270657822712987,
"learning_rate": 2.5909207852394363e-06,
"logits": -1.9585484266281128,
"logps": -100.70836639404297,
"loss": 0.2348,
"objective": 0.22401383519172668,
"ranking_idealized": 0.5874999761581421,
"ranking_idealized_expo": 0.512499988079071,
"ranking_simple": 0.8291666507720947,
"regularize": 0.22401383519172668,
"step": 285
},
{
"dpo_loss": 0.2646006941795349,
"dpo_wo_beta": -0.7763135433197021,
"epoch": 1.643835616438356,
"grad_norm": 10.585292794409252,
"learning_rate": 2.508267334019988e-06,
"logits": -1.9566444158554077,
"logps": -97.0122299194336,
"loss": 0.2532,
"objective": 0.2646006941795349,
"ranking_idealized": 0.612500011920929,
"ranking_idealized_expo": 0.5625,
"ranking_simple": 0.8041666746139526,
"regularize": 0.2646006941795349,
"step": 290
},
{
"dpo_loss": 0.23113909363746643,
"dpo_wo_beta": -0.6497251987457275,
"epoch": 1.6721776098252243,
"grad_norm": 11.90240881956814,
"learning_rate": 2.4256048425921693e-06,
"logits": -1.8574607372283936,
"logps": -94.91531372070312,
"loss": 0.2476,
"objective": 0.23113909363746643,
"ranking_idealized": 0.5791666507720947,
"ranking_idealized_expo": 0.4749999940395355,
"ranking_simple": 0.8333333134651184,
"regularize": 0.23113909363746643,
"step": 295
},
{
"dpo_loss": 0.22116926312446594,
"dpo_wo_beta": -0.6268281936645508,
"epoch": 1.7005196032120926,
"grad_norm": 11.745161783871675,
"learning_rate": 2.3430237011767166e-06,
"logits": -1.895004153251648,
"logps": -97.79885864257812,
"loss": 0.2266,
"objective": 0.22116926312446594,
"ranking_idealized": 0.6583333611488342,
"ranking_idealized_expo": 0.574999988079071,
"ranking_simple": 0.8333333134651184,
"regularize": 0.22116926312446594,
"step": 300
},
{
"dpo_loss": 0.24756571650505066,
"dpo_wo_beta": -0.9131773114204407,
"epoch": 1.7288615965989607,
"grad_norm": 12.299641904512029,
"learning_rate": 2.2606142110393248e-06,
"logits": -1.8061485290527344,
"logps": -96.69060516357422,
"loss": 0.2379,
"objective": 0.24756571650505066,
"ranking_idealized": 0.5625,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.8291666507720947,
"regularize": 0.24756571650505066,
"step": 305
},
{
"dpo_loss": 0.2321903556585312,
"dpo_wo_beta": -0.6867564916610718,
"epoch": 1.7572035899858292,
"grad_norm": 13.489735935272718,
"learning_rate": 2.1784664857475356e-06,
"logits": -1.8388514518737793,
"logps": -95.04447937011719,
"loss": 0.2456,
"objective": 0.2321903556585312,
"ranking_idealized": 0.6083333492279053,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.824999988079071,
"regularize": 0.2321903556585312,
"step": 310
},
{
"dpo_loss": 0.2901044189929962,
"dpo_wo_beta": -1.1286156177520752,
"epoch": 1.7855455833726972,
"grad_norm": 10.887596324980125,
"learning_rate": 2.096670352632873e-06,
"logits": -1.75984525680542,
"logps": -94.63612365722656,
"loss": 0.2571,
"objective": 0.2901044189929962,
"ranking_idealized": 0.6041666865348816,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.8083333373069763,
"regularize": 0.2901044189929962,
"step": 315
},
{
"epoch": 1.8025507794048181,
"eval_dpo_loss": 0.7664583325386047,
"eval_dpo_wo_beta": -4.09501838684082,
"eval_logits": -1.9888346195220947,
"eval_logps": -102.3712158203125,
"eval_loss": 0.7400712966918945,
"eval_objective": 0.7664583325386047,
"eval_ranking_idealized": 0.5888429880142212,
"eval_ranking_idealized_expo": 0.5092975497245789,
"eval_ranking_simple": 0.53925621509552,
"eval_regularize": 0.7664583325386047,
"eval_runtime": 210.274,
"eval_samples_per_second": 27.535,
"eval_steps_per_second": 1.151,
"step": 318
},
{
"dpo_loss": 0.2219768464565277,
"dpo_wo_beta": -0.47742757201194763,
"epoch": 1.8138875767595655,
"grad_norm": 11.029480506309918,
"learning_rate": 2.01531525456598e-06,
"logits": -1.9175788164138794,
"logps": -99.74655151367188,
"loss": 0.2404,
"objective": 0.2219768464565277,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.824999988079071,
"regularize": 0.2219768464565277,
"step": 320
},
{
"dpo_loss": 0.24908211827278137,
"dpo_wo_beta": -0.8014059066772461,
"epoch": 1.8422295701464337,
"grad_norm": 12.92850322071669,
"learning_rate": 1.93449015215215e-06,
"logits": -2.0084919929504395,
"logps": -101.09780883789062,
"loss": 0.2586,
"objective": 0.24908211827278137,
"ranking_idealized": 0.6333333253860474,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.8333333134651184,
"regularize": 0.24908211827278137,
"step": 325
},
{
"dpo_loss": 0.1984507441520691,
"dpo_wo_beta": -0.3766098618507385,
"epoch": 1.8705715635333018,
"grad_norm": 10.415606016359964,
"learning_rate": 1.8542834264542091e-06,
"logits": -1.851909875869751,
"logps": -94.5366439819336,
"loss": 0.2496,
"objective": 0.1984507441520691,
"ranking_idealized": 0.574999988079071,
"ranking_idealized_expo": 0.4791666567325592,
"ranking_simple": 0.8291666507720947,
"regularize": 0.1984507441520691,
"step": 330
},
{
"dpo_loss": 0.26707762479782104,
"dpo_wo_beta": -0.9339324831962585,
"epoch": 1.89891355692017,
"grad_norm": 10.078352873471246,
"learning_rate": 1.7747827823491253e-06,
"logits": -1.9827288389205933,
"logps": -94.26249694824219,
"loss": 0.2463,
"objective": 0.26707762479782104,
"ranking_idealized": 0.5958333611488342,
"ranking_idealized_expo": 0.4958333373069763,
"ranking_simple": 0.8125,
"regularize": 0.26707762479782104,
"step": 335
},
{
"dpo_loss": 0.2447831928730011,
"dpo_wo_beta": -0.7387041449546814,
"epoch": 1.9272555503070383,
"grad_norm": 10.88136655004607,
"learning_rate": 1.6960751526240122e-06,
"logits": -1.9671465158462524,
"logps": -98.63937377929688,
"loss": 0.2399,
"objective": 0.2447831928730011,
"ranking_idealized": 0.6416666507720947,
"ranking_idealized_expo": 0.5416666865348816,
"ranking_simple": 0.8583333492279053,
"regularize": 0.2447831928730011,
"step": 340
},
{
"dpo_loss": 0.2123527079820633,
"dpo_wo_beta": -0.5544185638427734,
"epoch": 1.9555975436939064,
"grad_norm": 11.18260747105762,
"learning_rate": 1.6182466029163974e-06,
"logits": -1.9572845697402954,
"logps": -100.18721008300781,
"loss": 0.2211,
"objective": 0.2123527079820633,
"ranking_idealized": 0.574999988079071,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.8208333253860474,
"regularize": 0.2123527079820633,
"step": 345
},
{
"dpo_loss": 0.2570362389087677,
"dpo_wo_beta": -0.7474013566970825,
"epoch": 1.9839395370807746,
"grad_norm": 11.061918116138507,
"learning_rate": 1.541382237602721e-06,
"logits": -1.8960832357406616,
"logps": -101.65901947021484,
"loss": 0.2316,
"objective": 0.2570362389087677,
"ranking_idealized": 0.5791666507720947,
"ranking_idealized_expo": 0.48750001192092896,
"ranking_simple": 0.7916666865348816,
"regularize": 0.2570362389087677,
"step": 350
},
{
"dpo_loss": 0.19961656630039215,
"dpo_wo_beta": -0.5642960667610168,
"epoch": 2.012281530467643,
"grad_norm": 7.569515164252156,
"learning_rate": 1.465566106737942e-06,
"logits": -1.8380100727081299,
"logps": -102.71571350097656,
"loss": 0.2103,
"objective": 0.19961656630039215,
"ranking_idealized": 0.5958333611488342,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.8374999761581421,
"regularize": 0.19961656630039215,
"step": 355
},
{
"dpo_loss": 0.11018560826778412,
"dpo_wo_beta": -0.12253165245056152,
"epoch": 2.040623523854511,
"grad_norm": 6.632276986432463,
"learning_rate": 1.3908811141480408e-06,
"logits": -1.867693543434143,
"logps": -103.06665802001953,
"loss": 0.118,
"objective": 0.11018560826778412,
"ranking_idealized": 0.6625000238418579,
"ranking_idealized_expo": 0.5375000238418579,
"ranking_simple": 0.9041666388511658,
"regularize": 0.11018560826778412,
"step": 360
},
{
"dpo_loss": 0.12077057361602783,
"dpo_wo_beta": -0.197490856051445,
"epoch": 2.0689655172413794,
"grad_norm": 10.213186193965676,
"learning_rate": 1.3174089267758983e-06,
"logits": -1.8255099058151245,
"logps": -110.3724136352539,
"loss": 0.118,
"objective": 0.12077057361602783,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.8666666746139526,
"regularize": 0.12077057361602783,
"step": 365
},
{
"dpo_loss": 0.1337815225124359,
"dpo_wo_beta": -0.27523547410964966,
"epoch": 2.0973075106282475,
"grad_norm": 9.926730675582434,
"learning_rate": 1.245229885379699e-06,
"logits": -1.7588540315628052,
"logps": -111.99506378173828,
"loss": 0.1227,
"objective": 0.1337815225124359,
"ranking_idealized": 0.5708333253860474,
"ranking_idealized_expo": 0.4791666567325592,
"ranking_simple": 0.8916666507720947,
"regularize": 0.1337815225124359,
"step": 370
},
{
"epoch": 2.1029759093056213,
"eval_dpo_loss": 0.9223728179931641,
"eval_dpo_wo_beta": -6.4510064125061035,
"eval_logits": -1.8644566535949707,
"eval_logps": -122.00161743164062,
"eval_loss": 0.8844180107116699,
"eval_objective": 0.9223728179931641,
"eval_ranking_idealized": 0.5888429880142212,
"eval_ranking_idealized_expo": 0.5092975497245789,
"eval_ranking_simple": 0.5423553586006165,
"eval_regularize": 0.9223728179931641,
"eval_runtime": 210.7356,
"eval_samples_per_second": 27.475,
"eval_steps_per_second": 1.148,
"step": 371
},
{
"dpo_loss": 0.10664375871419907,
"dpo_wo_beta": -0.2532973289489746,
"epoch": 2.1256495040151155,
"grad_norm": 9.740007111179482,
"learning_rate": 1.1744229166814889e-06,
"logits": -1.696647047996521,
"logps": -118.39366149902344,
"loss": 0.1103,
"objective": 0.10664375871419907,
"ranking_idealized": 0.675000011920929,
"ranking_idealized_expo": 0.5916666388511658,
"ranking_simple": 0.925000011920929,
"regularize": 0.10664375871419907,
"step": 375
},
{
"dpo_loss": 0.12854978442192078,
"dpo_wo_beta": -0.27664583921432495,
"epoch": 2.153991497401984,
"grad_norm": 9.699256456859702,
"learning_rate": 1.1050654470619602e-06,
"logits": -1.700494647026062,
"logps": -114.1063232421875,
"loss": 0.1208,
"objective": 0.12854978442192078,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.8999999761581421,
"regularize": 0.12854978442192078,
"step": 380
},
{
"dpo_loss": 0.10418140888214111,
"dpo_wo_beta": -0.09889766573905945,
"epoch": 2.182333490788852,
"grad_norm": 9.620361843085416,
"learning_rate": 1.0372333178958462e-06,
"logits": -1.8633235692977905,
"logps": -110.55794525146484,
"loss": 0.1244,
"objective": 0.10418140888214111,
"ranking_idealized": 0.5833333134651184,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.9125000238418579,
"regularize": 0.10418140888214111,
"step": 385
},
{
"dpo_loss": 0.12462247163057327,
"dpo_wo_beta": -0.2658768594264984,
"epoch": 2.21067548417572,
"grad_norm": 11.000881222201947,
"learning_rate": 9.710007026204896e-07,
"logits": -1.7877620458602905,
"logps": -112.08268737792969,
"loss": 0.1204,
"objective": 0.12462247163057327,
"ranking_idealized": 0.5249999761581421,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.8833333253860474,
"regularize": 0.12462247163057327,
"step": 390
},
{
"dpo_loss": 0.11183874309062958,
"dpo_wo_beta": -0.3540593981742859,
"epoch": 2.2390174775625886,
"grad_norm": 8.717110295390793,
"learning_rate": 9.064400256282757e-07,
"logits": -1.8010636568069458,
"logps": -110.48490142822266,
"loss": 0.1248,
"objective": 0.11183874309062958,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.9041666388511658,
"regularize": 0.11183874309062958,
"step": 395
},
{
"dpo_loss": 0.12893003225326538,
"dpo_wo_beta": -0.3680768311023712,
"epoch": 2.2673594709494567,
"grad_norm": 9.562073048936949,
"learning_rate": 8.436218830716259e-07,
"logits": -1.8909595012664795,
"logps": -111.70219421386719,
"loss": 0.1193,
"objective": 0.12893003225326538,
"ranking_idealized": 0.6625000238418579,
"ranking_idealized_expo": 0.5708333253860474,
"ranking_simple": 0.9166666865348816,
"regularize": 0.12893003225326538,
"step": 400
},
{
"dpo_loss": 0.13196416199207306,
"dpo_wo_beta": -0.17852090299129486,
"epoch": 2.295701464336325,
"grad_norm": 9.166021194752298,
"learning_rate": 7.826149656671386e-07,
"logits": -1.9320632219314575,
"logps": -108.1246566772461,
"loss": 0.1267,
"objective": 0.13196416199207306,
"ranking_idealized": 0.6416666507720947,
"ranking_idealized_expo": 0.5583333373069763,
"ranking_simple": 0.8958333134651184,
"regularize": 0.13196416199207306,
"step": 405
},
{
"dpo_loss": 0.11071384698152542,
"dpo_wo_beta": -0.1424117088317871,
"epoch": 2.324043457723193,
"grad_norm": 8.918983804471582,
"learning_rate": 7.234859835833022e-07,
"logits": -1.8304682970046997,
"logps": -111.2301025390625,
"loss": 0.112,
"objective": 0.11071384698152542,
"ranking_idealized": 0.5708333253860474,
"ranking_idealized_expo": 0.4791666567325592,
"ranking_simple": 0.9041666388511658,
"regularize": 0.11071384698152542,
"step": 410
},
{
"dpo_loss": 0.1223960742354393,
"dpo_wo_beta": -0.1956464648246765,
"epoch": 2.3523854511100613,
"grad_norm": 9.386393866562546,
"learning_rate": 6.662995934939007e-07,
"logits": -1.8708041906356812,
"logps": -111.06449890136719,
"loss": 0.1155,
"objective": 0.1223960742354393,
"ranking_idealized": 0.5958333611488342,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.9083333611488342,
"regularize": 0.1223960742354393,
"step": 415
},
{
"dpo_loss": 0.12930770218372345,
"dpo_wo_beta": -0.21560731530189514,
"epoch": 2.3807274444969297,
"grad_norm": 11.0131183307354,
"learning_rate": 6.111183278768956e-07,
"logits": -1.860797643661499,
"logps": -113.08780670166016,
"loss": 0.133,
"objective": 0.12930770218372345,
"ranking_idealized": 0.5666666626930237,
"ranking_idealized_expo": 0.4749999940395355,
"ranking_simple": 0.9208333492279053,
"regularize": 0.12930770218372345,
"step": 420
},
{
"epoch": 2.403401039206424,
"eval_dpo_loss": 0.8785684108734131,
"eval_dpo_wo_beta": -5.887755870819092,
"eval_logits": -2.0276894569396973,
"eval_logps": -117.1216812133789,
"eval_loss": 0.8447906374931335,
"eval_objective": 0.8785684108734131,
"eval_ranking_idealized": 0.5888429880142212,
"eval_ranking_idealized_expo": 0.5092975497245789,
"eval_ranking_simple": 0.5413222908973694,
"eval_regularize": 0.8785684108734131,
"eval_runtime": 209.8564,
"eval_samples_per_second": 27.59,
"eval_steps_per_second": 1.153,
"step": 424
},
{
"dpo_loss": 0.117975153028965,
"dpo_wo_beta": -0.1884605884552002,
"epoch": 2.413793103448276,
"grad_norm": 11.036168833651558,
"learning_rate": 5.580025266360764e-07,
"logits": -1.7822004556655884,
"logps": -114.43038177490234,
"loss": 0.1465,
"objective": 0.117975153028965,
"ranking_idealized": 0.6666666865348816,
"ranking_idealized_expo": 0.5625,
"ranking_simple": 0.9375,
"regularize": 0.117975153028965,
"step": 425
},
{
"dpo_loss": 0.1465020477771759,
"dpo_wo_beta": -0.2595965266227722,
"epoch": 2.442135096835144,
"grad_norm": 10.595070818850646,
"learning_rate": 5.070102711202606e-07,
"logits": -1.8692681789398193,
"logps": -110.2762680053711,
"loss": 0.1276,
"objective": 0.1465020477771759,
"ranking_idealized": 0.5916666388511658,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.8958333134651184,
"regularize": 0.1465020477771759,
"step": 430
},
{
"dpo_loss": 0.09775053709745407,
"dpo_wo_beta": -0.12755917012691498,
"epoch": 2.4704770902220123,
"grad_norm": 9.393206692367766,
"learning_rate": 4.581973206121948e-07,
"logits": -1.8968538045883179,
"logps": -112.28767395019531,
"loss": 0.1175,
"objective": 0.09775053709745407,
"ranking_idealized": 0.5874999761581421,
"ranking_idealized_expo": 0.512499988079071,
"ranking_simple": 0.9083333611488342,
"regularize": 0.09775053709745407,
"step": 435
},
{
"dpo_loss": 0.14228057861328125,
"dpo_wo_beta": -0.3639788329601288,
"epoch": 2.4988190836088804,
"grad_norm": 8.020134663378592,
"learning_rate": 4.116170513565942e-07,
"logits": -1.8666160106658936,
"logps": -109.18843078613281,
"loss": 0.1167,
"objective": 0.14228057861328125,
"ranking_idealized": 0.574999988079071,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.875,
"regularize": 0.14228057861328125,
"step": 440
},
{
"dpo_loss": 0.13583588600158691,
"dpo_wo_beta": -0.2074100226163864,
"epoch": 2.527161076995749,
"grad_norm": 9.224367796824264,
"learning_rate": 3.6732039819400686e-07,
"logits": -1.8071045875549316,
"logps": -107.2675552368164,
"loss": 0.1319,
"objective": 0.13583588600158691,
"ranking_idealized": 0.5625,
"ranking_idealized_expo": 0.49166667461395264,
"ranking_simple": 0.8791666626930237,
"regularize": 0.13583588600158691,
"step": 445
},
{
"dpo_loss": 0.17114870250225067,
"dpo_wo_beta": -0.43270742893218994,
"epoch": 2.555503070382617,
"grad_norm": 11.265861710797749,
"learning_rate": 3.253557988643072e-07,
"logits": -1.9256045818328857,
"logps": -111.20384216308594,
"loss": 0.1288,
"objective": 0.17114870250225067,
"ranking_idealized": 0.5874999761581421,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.862500011920929,
"regularize": 0.17114870250225067,
"step": 450
},
{
"dpo_loss": 0.10827689617872238,
"dpo_wo_beta": -0.1751028150320053,
"epoch": 2.583845063769485,
"grad_norm": 9.605136286662574,
"learning_rate": 2.8576914104074425e-07,
"logits": -1.9289051294326782,
"logps": -109.37706756591797,
"loss": 0.1168,
"objective": 0.10827689617872238,
"ranking_idealized": 0.5958333611488342,
"ranking_idealized_expo": 0.4958333373069763,
"ranking_simple": 0.9125000238418579,
"regularize": 0.10827689617872238,
"step": 455
},
{
"dpo_loss": 0.11124877631664276,
"dpo_wo_beta": -0.28054580092430115,
"epoch": 2.6121870571563535,
"grad_norm": 9.957466667064367,
"learning_rate": 2.486037121524448e-07,
"logits": -1.93342924118042,
"logps": -113.2356948852539,
"loss": 0.1169,
"objective": 0.11124877631664276,
"ranking_idealized": 0.6291666626930237,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.9375,
"regularize": 0.11124877631664276,
"step": 460
},
{
"dpo_loss": 0.12714476883411407,
"dpo_wo_beta": -0.22146105766296387,
"epoch": 2.6405290505432215,
"grad_norm": 10.04326854921629,
"learning_rate": 2.13900152050239e-07,
"logits": -1.8874350786209106,
"logps": -108.94982147216797,
"loss": 0.1189,
"objective": 0.12714476883411407,
"ranking_idealized": 0.5458333492279053,
"ranking_idealized_expo": 0.4791666567325592,
"ranking_simple": 0.862500011920929,
"regularize": 0.12714476883411407,
"step": 465
},
{
"dpo_loss": 0.12151040881872177,
"dpo_wo_beta": -0.26416900753974915,
"epoch": 2.66887104393009,
"grad_norm": 8.777820527737605,
"learning_rate": 1.8169640856758652e-07,
"logits": -1.9314534664154053,
"logps": -112.75170135498047,
"loss": 0.1254,
"objective": 0.12151040881872177,
"ranking_idealized": 0.6291666626930237,
"ranking_idealized_expo": 0.5541666746139526,
"ranking_simple": 0.9083333611488342,
"regularize": 0.12151040881872177,
"step": 470
},
{
"dpo_loss": 0.12749101221561432,
"dpo_wo_beta": -0.2816121280193329,
"epoch": 2.697213037316958,
"grad_norm": 9.221778751171357,
"learning_rate": 1.5202769602517514e-07,
"logits": -1.8307260274887085,
"logps": -109.39693450927734,
"loss": 0.1211,
"objective": 0.12749101221561432,
"ranking_idealized": 0.6041666865348816,
"ranking_idealized_expo": 0.4958333373069763,
"ranking_simple": 0.8999999761581421,
"regularize": 0.12749101221561432,
"step": 475
},
{
"epoch": 2.708549834671705,
"eval_dpo_loss": 0.8738968372344971,
"eval_dpo_wo_beta": -5.815241813659668,
"eval_logits": -2.0271613597869873,
"eval_logps": -116.42301177978516,
"eval_loss": 0.8371492624282837,
"eval_objective": 0.8738968372344971,
"eval_ranking_idealized": 0.5888429880142212,
"eval_ranking_idealized_expo": 0.5092975497245789,
"eval_ranking_simple": 0.5402892827987671,
"eval_regularize": 0.8738968372344971,
"eval_runtime": 211.9437,
"eval_samples_per_second": 27.319,
"eval_steps_per_second": 1.142,
"step": 477
},
{
"dpo_loss": 0.13781045377254486,
"dpo_wo_beta": -0.2485995590686798,
"epoch": 2.725555030703826,
"grad_norm": 10.971551462649595,
"learning_rate": 1.2492645672457838e-07,
"logits": -1.9437103271484375,
"logps": -108.93817901611328,
"loss": 0.1267,
"objective": 0.13781045377254486,
"ranking_idealized": 0.5916666388511658,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.8999999761581421,
"regularize": 0.13781045377254486,
"step": 480
},
{
"dpo_loss": 0.11082082241773605,
"dpo_wo_beta": -0.10876031965017319,
"epoch": 2.753897024090694,
"grad_norm": 10.884940640535042,
"learning_rate": 1.004223254730749e-07,
"logits": -1.7556992769241333,
"logps": -114.1142807006836,
"loss": 0.1222,
"objective": 0.11082082241773605,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.9041666388511658,
"regularize": 0.11082082241773605,
"step": 485
},
{
"dpo_loss": 0.09154360741376877,
"dpo_wo_beta": -0.05899694189429283,
"epoch": 2.7822390174775626,
"grad_norm": 11.015982469457516,
"learning_rate": 7.854209717842231e-08,
"logits": -1.8848822116851807,
"logps": -110.15470886230469,
"loss": 0.1058,
"objective": 0.09154360741376877,
"ranking_idealized": 0.5874999761581421,
"ranking_idealized_expo": 0.47083333134651184,
"ranking_simple": 0.9333333373069763,
"regularize": 0.09154360741376877,
"step": 490
},
{
"dpo_loss": 0.10964310169219971,
"dpo_wo_beta": -0.07648710906505585,
"epoch": 2.8105810108644307,
"grad_norm": 10.079416267782939,
"learning_rate": 5.930969754901844e-08,
"logits": -1.8575230836868286,
"logps": -108.52234649658203,
"loss": 0.1192,
"objective": 0.10964310169219971,
"ranking_idealized": 0.5583333373069763,
"ranking_idealized_expo": 0.4749999940395355,
"ranking_simple": 0.8666666746139526,
"regularize": 0.10964310169219971,
"step": 495
},
{
"dpo_loss": 0.09479068219661713,
"dpo_wo_beta": -0.03411731496453285,
"epoch": 2.838923004251299,
"grad_norm": 9.84080114767598,
"learning_rate": 4.2746156931490756e-08,
"logits": -1.8439643383026123,
"logps": -109.77281188964844,
"loss": 0.1213,
"objective": 0.09479068219661713,
"ranking_idealized": 0.6208333373069763,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.925000011920929,
"regularize": 0.09479068219661713,
"step": 500
},
{
"dpo_loss": 0.12725140154361725,
"dpo_wo_beta": -0.18973813951015472,
"epoch": 2.8672649976381672,
"grad_norm": 9.973754192936779,
"learning_rate": 2.8869587314321324e-08,
"logits": -1.8574442863464355,
"logps": -110.32710266113281,
"loss": 0.132,
"objective": 0.12725140154361725,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.8999999761581421,
"regularize": 0.12725140154361725,
"step": 505
},
{
"dpo_loss": 0.10469380766153336,
"dpo_wo_beta": -0.1985001415014267,
"epoch": 2.8956069910250353,
"grad_norm": 8.936464383287202,
"learning_rate": 1.7695162522652352e-08,
"logits": -1.8629390001296997,
"logps": -113.56767272949219,
"loss": 0.1218,
"objective": 0.10469380766153336,
"ranking_idealized": 0.6041666865348816,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.8916666507720947,
"regularize": 0.10469380766153336,
"step": 510
},
{
"dpo_loss": 0.11260154843330383,
"dpo_wo_beta": -0.15691885352134705,
"epoch": 2.9239489844119038,
"grad_norm": 9.442300088571939,
"learning_rate": 9.235101625932885e-09,
"logits": -1.946829915046692,
"logps": -108.54016876220703,
"loss": 0.1258,
"objective": 0.11260154843330383,
"ranking_idealized": 0.5791666507720947,
"ranking_idealized_expo": 0.48750001192092896,
"ranking_simple": 0.925000011920929,
"regularize": 0.11260154843330383,
"step": 515
},
{
"dpo_loss": 0.12230218201875687,
"dpo_wo_beta": -0.10489177703857422,
"epoch": 2.952290977798772,
"grad_norm": 9.279898048101137,
"learning_rate": 3.4986555765434415e-09,
"logits": -1.8482831716537476,
"logps": -114.20655059814453,
"loss": 0.1228,
"objective": 0.12230218201875687,
"ranking_idealized": 0.6291666626930237,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.8708333373069763,
"regularize": 0.12230218201875687,
"step": 520
},
{
"dpo_loss": 0.13335375487804413,
"dpo_wo_beta": -0.35261282324790955,
"epoch": 2.9806329711856403,
"grad_norm": 11.094809681697281,
"learning_rate": 4.920970940180958e-10,
"logits": -1.876869797706604,
"logps": -111.03084564208984,
"loss": 0.1235,
"objective": 0.13335375487804413,
"ranking_idealized": 0.6291666626930237,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.8958333134651184,
"regularize": 0.13335375487804413,
"step": 525
},
{
"epoch": 2.9976381672177608,
"step": 528,
"total_flos": 0.0,
"train_loss": 0.023984534440167023,
"train_runtime": 3310.3799,
"train_samples_per_second": 46.039,
"train_steps_per_second": 0.159
}
],
"logging_steps": 5,
"max_steps": 528,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 53,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 5,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}