hZzy's picture
Model save
c5a3ab9 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 4.988190836088805,
"eval_steps": 50,
"global_step": 880,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"dpo_loss": 0.6931471824645996,
"epoch": 0.005668398677373642,
"grad_norm": 13413.608867135807,
"learning_rate": 5.681818181818182e-08,
"logits": -1.3147305250167847,
"logps": -88.0877456665039,
"loss": 0.4113,
"objective": 0.41588976979255676,
"ranking_idealized": 0.5208333134651184,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.5208333134651184,
"regularize": 0.41588976979255676,
"step": 1
},
{
"dpo_loss": 1.6281694173812866,
"epoch": 0.02834199338686821,
"grad_norm": 23830.011481895843,
"learning_rate": 2.840909090909091e-07,
"logits": -1.3680096864700317,
"logps": -84.41747283935547,
"loss": 2.7794,
"objective": 2.858431100845337,
"ranking_idealized": 0.546875,
"ranking_idealized_expo": 0.546875,
"ranking_simple": 0.546875,
"regularize": 2.858431100845337,
"step": 5
},
{
"dpo_loss": 3.2700231075286865,
"epoch": 0.05668398677373642,
"grad_norm": 29280.306552066217,
"learning_rate": 5.681818181818182e-07,
"logits": -1.4477864503860474,
"logps": -83.50318908691406,
"loss": 7.354,
"objective": 6.85181999206543,
"ranking_idealized": 0.5083333253860474,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.512499988079071,
"regularize": 6.85181999206543,
"step": 10
},
{
"dpo_loss": 6.557200908660889,
"epoch": 0.08502598016060463,
"grad_norm": 20109.513515072522,
"learning_rate": 8.522727272727273e-07,
"logits": -1.4104349613189697,
"logps": -83.76626586914062,
"loss": 13.7747,
"objective": 13.58159351348877,
"ranking_idealized": 0.5041666626930237,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.5083333253860474,
"regularize": 13.58159351348877,
"step": 15
},
{
"dpo_loss": 9.023734092712402,
"epoch": 0.11336797354747284,
"grad_norm": 13139.311441037944,
"learning_rate": 1.1363636363636364e-06,
"logits": -1.3969768285751343,
"logps": -84.27888488769531,
"loss": 19.2285,
"objective": 18.731477737426758,
"ranking_idealized": 0.5375000238418579,
"ranking_idealized_expo": 0.5375000238418579,
"ranking_simple": 0.5375000238418579,
"regularize": 18.731477737426758,
"step": 20
},
{
"dpo_loss": 14.174921035766602,
"epoch": 0.14170996693434104,
"grad_norm": 16805.179390769823,
"learning_rate": 1.4204545454545458e-06,
"logits": -1.432785153388977,
"logps": -84.65080261230469,
"loss": 28.6241,
"objective": 28.14487648010254,
"ranking_idealized": 0.4958333373069763,
"ranking_idealized_expo": 0.4958333373069763,
"ranking_simple": 0.5,
"regularize": 28.14487648010254,
"step": 25
},
{
"dpo_loss": 16.938854217529297,
"epoch": 0.17005196032120926,
"grad_norm": 15979.803455377341,
"learning_rate": 1.7045454545454546e-06,
"logits": -1.3974343538284302,
"logps": -85.62297058105469,
"loss": 36.5075,
"objective": 37.031803131103516,
"ranking_idealized": 0.5416666865348816,
"ranking_idealized_expo": 0.5416666865348816,
"ranking_simple": 0.550000011920929,
"regularize": 37.031803131103516,
"step": 30
},
{
"dpo_loss": 23.828941345214844,
"epoch": 0.19839395370807747,
"grad_norm": 17293.601243499223,
"learning_rate": 1.9886363636363638e-06,
"logits": -1.373544692993164,
"logps": -83.43585968017578,
"loss": 46.3083,
"objective": 45.453346252441406,
"ranking_idealized": 0.5041666626930237,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.5041666626930237,
"regularize": 45.453346252441406,
"step": 35
},
{
"dpo_loss": 32.33571243286133,
"epoch": 0.22673594709494568,
"grad_norm": 12280.78311184525,
"learning_rate": 2.2727272727272728e-06,
"logits": -1.3559505939483643,
"logps": -83.9026870727539,
"loss": 52.9242,
"objective": 55.41267776489258,
"ranking_idealized": 0.5208333134651184,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.5249999761581421,
"regularize": 55.41267776489258,
"step": 40
},
{
"dpo_loss": 34.25632858276367,
"epoch": 0.25507794048181387,
"grad_norm": 13382.907615852768,
"learning_rate": 2.556818181818182e-06,
"logits": -1.4366874694824219,
"logps": -85.12437438964844,
"loss": 65.7855,
"objective": 74.01615142822266,
"ranking_idealized": 0.5375000238418579,
"ranking_idealized_expo": 0.5375000238418579,
"ranking_simple": 0.5291666388511658,
"regularize": 74.01615142822266,
"step": 45
},
{
"dpo_loss": 40.2269287109375,
"epoch": 0.2834199338686821,
"grad_norm": 11751.345269013133,
"learning_rate": 2.8409090909090916e-06,
"logits": -1.2930634021759033,
"logps": -83.90473175048828,
"loss": 72.7249,
"objective": 73.77684783935547,
"ranking_idealized": 0.42500001192092896,
"ranking_idealized_expo": 0.42500001192092896,
"ranking_simple": 0.4166666567325592,
"regularize": 73.77684783935547,
"step": 50
},
{
"epoch": 0.2834199338686821,
"eval_dpo_loss": 25.884489059448242,
"eval_logits": -1.3016308546066284,
"eval_logps": -92.81625366210938,
"eval_loss": 49.766300201416016,
"eval_objective": 48.72661209106445,
"eval_ranking_idealized": 0.5092975497245789,
"eval_ranking_idealized_expo": 0.5092975497245789,
"eval_ranking_simple": 0.5082644820213318,
"eval_regularize": 48.72661209106445,
"eval_runtime": 261.2926,
"eval_samples_per_second": 22.159,
"eval_steps_per_second": 0.926,
"step": 50
},
{
"dpo_loss": 46.39546585083008,
"epoch": 0.3117619272555503,
"grad_norm": 11714.053294301802,
"learning_rate": 3.125e-06,
"logits": -1.2961366176605225,
"logps": -86.04666137695312,
"loss": 88.1724,
"objective": 85.7418212890625,
"ranking_idealized": 0.5416666865348816,
"ranking_idealized_expo": 0.5416666865348816,
"ranking_simple": 0.5541666746139526,
"regularize": 85.7418212890625,
"step": 55
},
{
"dpo_loss": 51.08903503417969,
"epoch": 0.3401039206424185,
"grad_norm": 11581.047450443415,
"learning_rate": 3.409090909090909e-06,
"logits": -1.233353853225708,
"logps": -83.57112121582031,
"loss": 97.4116,
"objective": 92.88329315185547,
"ranking_idealized": 0.512499988079071,
"ranking_idealized_expo": 0.512499988079071,
"ranking_simple": 0.5,
"regularize": 92.88329315185547,
"step": 60
},
{
"dpo_loss": 47.421348571777344,
"epoch": 0.3684459140292867,
"grad_norm": 10219.552488411706,
"learning_rate": 3.6931818181818186e-06,
"logits": -1.2004388570785522,
"logps": -81.25647735595703,
"loss": 104.1737,
"objective": 105.26258850097656,
"ranking_idealized": 0.5083333253860474,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.48750001192092896,
"regularize": 105.26258850097656,
"step": 65
},
{
"dpo_loss": 62.97288131713867,
"epoch": 0.39678790741615494,
"grad_norm": 10005.80876054057,
"learning_rate": 3.9772727272727275e-06,
"logits": -1.299712061882019,
"logps": -82.07231140136719,
"loss": 114.6226,
"objective": 102.42678833007812,
"ranking_idealized": 0.5166666507720947,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5,
"regularize": 102.42678833007812,
"step": 70
},
{
"dpo_loss": 64.9041976928711,
"epoch": 0.42512990080302315,
"grad_norm": 8729.102244678843,
"learning_rate": 4.2613636363636365e-06,
"logits": -1.2741096019744873,
"logps": -82.21930694580078,
"loss": 120.1568,
"objective": 109.94430541992188,
"ranking_idealized": 0.5458333492279053,
"ranking_idealized_expo": 0.5458333492279053,
"ranking_simple": 0.5583333373069763,
"regularize": 109.94430541992188,
"step": 75
},
{
"dpo_loss": 68.410400390625,
"epoch": 0.45347189418989137,
"grad_norm": 10066.382938514493,
"learning_rate": 4.5454545454545455e-06,
"logits": -1.3132286071777344,
"logps": -81.40987396240234,
"loss": 131.1049,
"objective": 139.6371612548828,
"ranking_idealized": 0.5166666507720947,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5166666507720947,
"regularize": 139.6371612548828,
"step": 80
},
{
"dpo_loss": 61.07379913330078,
"epoch": 0.4818138875767596,
"grad_norm": 8906.640582071686,
"learning_rate": 4.829545454545455e-06,
"logits": -1.2284363508224487,
"logps": -78.84890747070312,
"loss": 139.4462,
"objective": 142.55609130859375,
"ranking_idealized": 0.5249999761581421,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.5041666626930237,
"regularize": 142.55609130859375,
"step": 85
},
{
"dpo_loss": 76.96965789794922,
"epoch": 0.5101558809636277,
"grad_norm": 8487.217481710324,
"learning_rate": 4.999921328558333e-06,
"logits": -1.012691855430603,
"logps": -76.95401000976562,
"loss": 144.8047,
"objective": 142.29689025878906,
"ranking_idealized": 0.4791666567325592,
"ranking_idealized_expo": 0.4791666567325592,
"ranking_simple": 0.46666666865348816,
"regularize": 142.29689025878906,
"step": 90
},
{
"dpo_loss": 78.01219177246094,
"epoch": 0.538497874350496,
"grad_norm": 8772.977046324564,
"learning_rate": 4.999036331701828e-06,
"logits": -1.063112735748291,
"logps": -74.44496154785156,
"loss": 151.2955,
"objective": 154.6220703125,
"ranking_idealized": 0.5416666865348816,
"ranking_idealized_expo": 0.5416666865348816,
"ranking_simple": 0.5416666865348816,
"regularize": 154.6220703125,
"step": 95
},
{
"dpo_loss": 78.0488510131836,
"epoch": 0.5668398677373642,
"grad_norm": 8669.676787481652,
"learning_rate": 4.997168347957521e-06,
"logits": -1.1887397766113281,
"logps": -75.71530151367188,
"loss": 152.2211,
"objective": 154.32736206054688,
"ranking_idealized": 0.5416666865348816,
"ranking_idealized_expo": 0.5416666865348816,
"ranking_simple": 0.5291666388511658,
"regularize": 154.32736206054688,
"step": 100
},
{
"epoch": 0.5668398677373642,
"eval_dpo_loss": 74.54129791259766,
"eval_logits": -1.245821237564087,
"eval_logps": -80.67256164550781,
"eval_loss": 146.651123046875,
"eval_objective": 149.05430603027344,
"eval_ranking_idealized": 0.5092975497245789,
"eval_ranking_idealized_expo": 0.5092975497245789,
"eval_ranking_simple": 0.5123966932296753,
"eval_regularize": 149.05430603027344,
"eval_runtime": 260.1184,
"eval_samples_per_second": 22.259,
"eval_steps_per_second": 0.93,
"step": 100
},
{
"dpo_loss": 97.11483001708984,
"epoch": 0.5951818611242324,
"grad_norm": 8886.474594959353,
"learning_rate": 4.994318112090048e-06,
"logits": -1.109060525894165,
"logps": -77.18095397949219,
"loss": 162.3808,
"objective": 165.7858428955078,
"ranking_idealized": 0.5333333611488342,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.5,
"regularize": 165.7858428955078,
"step": 105
},
{
"dpo_loss": 84.30198669433594,
"epoch": 0.6235238545111006,
"grad_norm": 8328.506430860918,
"learning_rate": 4.990486745229364e-06,
"logits": -1.1675399541854858,
"logps": -76.15277862548828,
"loss": 160.4772,
"objective": 181.19313049316406,
"ranking_idealized": 0.44999998807907104,
"ranking_idealized_expo": 0.44999998807907104,
"ranking_simple": 0.4416666626930237,
"regularize": 181.19313049316406,
"step": 110
},
{
"dpo_loss": 75.68659210205078,
"epoch": 0.6518658478979689,
"grad_norm": 9699.16568235547,
"learning_rate": 4.985675754429744e-06,
"logits": -1.1235295534133911,
"logps": -77.19483947753906,
"loss": 156.7024,
"objective": 153.15138244628906,
"ranking_idealized": 0.4833333194255829,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.4833333194255829,
"regularize": 153.15138244628906,
"step": 115
},
{
"dpo_loss": 85.8583984375,
"epoch": 0.680207841284837,
"grad_norm": 7593.864278067022,
"learning_rate": 4.9798870320769884e-06,
"logits": -1.034916639328003,
"logps": -78.47518157958984,
"loss": 158.3137,
"objective": 163.73861694335938,
"ranking_idealized": 0.5333333611488342,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.5041666626930237,
"regularize": 163.73861694335938,
"step": 120
},
{
"dpo_loss": 87.04534149169922,
"epoch": 0.7085498346717053,
"grad_norm": 7441.764004781261,
"learning_rate": 4.973122855144066e-06,
"logits": -0.9983721971511841,
"logps": -76.62566375732422,
"loss": 152.8004,
"objective": 162.25894165039062,
"ranking_idealized": 0.550000011920929,
"ranking_idealized_expo": 0.550000011920929,
"ranking_simple": 0.5291666388511658,
"regularize": 162.25894165039062,
"step": 125
},
{
"dpo_loss": 78.63727569580078,
"epoch": 0.7368918280585735,
"grad_norm": 7450.826686003035,
"learning_rate": 4.965385884295467e-06,
"logits": -1.1029576063156128,
"logps": -75.08853912353516,
"loss": 155.3986,
"objective": 160.40919494628906,
"ranking_idealized": 0.44999998807907104,
"ranking_idealized_expo": 0.44999998807907104,
"ranking_simple": 0.44583332538604736,
"regularize": 160.40919494628906,
"step": 130
},
{
"dpo_loss": 77.3191909790039,
"epoch": 0.7652338214454416,
"grad_norm": 7545.4833393494555,
"learning_rate": 4.956679162840646e-06,
"logits": -1.0437390804290771,
"logps": -77.15032196044922,
"loss": 163.0839,
"objective": 150.17971801757812,
"ranking_idealized": 0.5291666388511658,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.4791666567325592,
"regularize": 150.17971801757812,
"step": 135
},
{
"dpo_loss": 77.59061431884766,
"epoch": 0.7935758148323099,
"grad_norm": 7289.370251601989,
"learning_rate": 4.947006115536947e-06,
"logits": -0.9542478322982788,
"logps": -77.21605682373047,
"loss": 157.3087,
"objective": 153.8356475830078,
"ranking_idealized": 0.5041666626930237,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.512499988079071,
"regularize": 153.8356475830078,
"step": 140
},
{
"dpo_loss": 80.91732788085938,
"epoch": 0.821917808219178,
"grad_norm": 7226.1243002610445,
"learning_rate": 4.9363705472424825e-06,
"logits": -0.975356936454773,
"logps": -76.82910919189453,
"loss": 146.9377,
"objective": 150.40052795410156,
"ranking_idealized": 0.4833333194255829,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.47083333134651184,
"regularize": 150.40052795410156,
"step": 145
},
{
"dpo_loss": 60.50048828125,
"epoch": 0.8502598016060463,
"grad_norm": 6968.43374501734,
"learning_rate": 4.924776641419513e-06,
"logits": -0.9192944765090942,
"logps": -75.97390747070312,
"loss": 149.0411,
"objective": 144.81809997558594,
"ranking_idealized": 0.5375000238418579,
"ranking_idealized_expo": 0.5375000238418579,
"ranking_simple": 0.5541666746139526,
"regularize": 144.81809997558594,
"step": 150
},
{
"epoch": 0.8502598016060463,
"eval_dpo_loss": 89.5256576538086,
"eval_logits": -0.9510709643363953,
"eval_logps": -81.42581176757812,
"eval_loss": 179.02293395996094,
"eval_objective": 179.47549438476562,
"eval_ranking_idealized": 0.5092975497245789,
"eval_ranking_idealized_expo": 0.5092975497245789,
"eval_ranking_simple": 0.5123966932296753,
"eval_regularize": 179.47549438476562,
"eval_runtime": 259.8812,
"eval_samples_per_second": 22.279,
"eval_steps_per_second": 0.931,
"step": 150
},
{
"dpo_loss": 70.23117065429688,
"epoch": 0.8786017949929145,
"grad_norm": 6700.90677088136,
"learning_rate": 4.9122289584888926e-06,
"logits": -0.9173446297645569,
"logps": -75.11918640136719,
"loss": 149.5902,
"objective": 151.32916259765625,
"ranking_idealized": 0.512499988079071,
"ranking_idealized_expo": 0.512499988079071,
"ranking_simple": 0.5291666388511658,
"regularize": 151.32916259765625,
"step": 155
},
{
"dpo_loss": 77.6051025390625,
"epoch": 0.9069437883797827,
"grad_norm": 6795.6302941338045,
"learning_rate": 4.8987324340362445e-06,
"logits": -0.8373637795448303,
"logps": -76.12178802490234,
"loss": 143.2267,
"objective": 138.21070861816406,
"ranking_idealized": 0.4958333373069763,
"ranking_idealized_expo": 0.4958333373069763,
"ranking_simple": 0.4833333194255829,
"regularize": 138.21070861816406,
"step": 160
},
{
"dpo_loss": 82.77120971679688,
"epoch": 0.9352857817666509,
"grad_norm": 6989.765984737861,
"learning_rate": 4.884292376870567e-06,
"logits": -0.7745934724807739,
"logps": -77.40280151367188,
"loss": 149.6674,
"objective": 159.49266052246094,
"ranking_idealized": 0.5208333134651184,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.5375000238418579,
"regularize": 159.49266052246094,
"step": 165
},
{
"dpo_loss": 82.72408294677734,
"epoch": 0.9636277751535192,
"grad_norm": 6801.285873992836,
"learning_rate": 4.868914466936038e-06,
"logits": -0.7280222773551941,
"logps": -78.39759063720703,
"loss": 147.4437,
"objective": 154.9546661376953,
"ranking_idealized": 0.5083333253860474,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.5291666388511658,
"regularize": 154.9546661376953,
"step": 170
},
{
"dpo_loss": 62.412452697753906,
"epoch": 0.9919697685403873,
"grad_norm": 6950.067011326567,
"learning_rate": 4.8526047530778175e-06,
"logits": -0.6979461908340454,
"logps": -78.83072662353516,
"loss": 148.1536,
"objective": 130.68894958496094,
"ranking_idealized": 0.574999988079071,
"ranking_idealized_expo": 0.574999988079071,
"ranking_simple": 0.550000011920929,
"regularize": 130.68894958496094,
"step": 175
},
{
"dpo_loss": 91.09583282470703,
"epoch": 1.0203117619272555,
"grad_norm": 6720.433078386762,
"learning_rate": 4.835369650662767e-06,
"logits": -0.7362512946128845,
"logps": -77.91545867919922,
"loss": 150.91,
"objective": 157.76901245117188,
"ranking_idealized": 0.5416666865348816,
"ranking_idealized_expo": 0.5416666865348816,
"ranking_simple": 0.5583333373069763,
"regularize": 157.76901245117188,
"step": 180
},
{
"dpo_loss": 92.66343688964844,
"epoch": 1.0486537553141237,
"grad_norm": 6715.629478521071,
"learning_rate": 4.817215939055984e-06,
"logits": -0.6558343172073364,
"logps": -77.34681701660156,
"loss": 138.7646,
"objective": 152.0157012939453,
"ranking_idealized": 0.4833333194255829,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.47083333134651184,
"regularize": 152.0157012939453,
"step": 185
},
{
"dpo_loss": 90.67267608642578,
"epoch": 1.076995748700992,
"grad_norm": 6675.106211711281,
"learning_rate": 4.798150758954164e-06,
"logits": -0.7170895934104919,
"logps": -78.08677673339844,
"loss": 135.6227,
"objective": 155.2058563232422,
"ranking_idealized": 0.550000011920929,
"ranking_idealized_expo": 0.550000011920929,
"ranking_simple": 0.5458333492279053,
"regularize": 155.2058563232422,
"step": 190
},
{
"dpo_loss": 70.37294006347656,
"epoch": 1.10533774208786,
"grad_norm": 6877.835091759628,
"learning_rate": 4.778181609576832e-06,
"logits": -0.7233827114105225,
"logps": -78.22598266601562,
"loss": 141.4543,
"objective": 141.26718139648438,
"ranking_idealized": 0.5666666626930237,
"ranking_idealized_expo": 0.5666666626930237,
"ranking_simple": 0.5625,
"regularize": 141.26718139648438,
"step": 195
},
{
"dpo_loss": 68.18767547607422,
"epoch": 1.1336797354747283,
"grad_norm": 6695.865097498279,
"learning_rate": 4.757316345716554e-06,
"logits": -0.811337411403656,
"logps": -77.92884063720703,
"loss": 135.6758,
"objective": 136.39527893066406,
"ranking_idealized": 0.5083333253860474,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.512499988079071,
"regularize": 136.39527893066406,
"step": 200
},
{
"epoch": 1.1336797354747283,
"eval_dpo_loss": 98.72966003417969,
"eval_logits": -0.8759555220603943,
"eval_logps": -83.1371078491211,
"eval_loss": 190.77737426757812,
"eval_objective": 195.49459838867188,
"eval_ranking_idealized": 0.5092975497245789,
"eval_ranking_idealized_expo": 0.5092975497245789,
"eval_ranking_simple": 0.5082644820213318,
"eval_regularize": 195.49459838867188,
"eval_runtime": 260.3291,
"eval_samples_per_second": 22.241,
"eval_steps_per_second": 0.93,
"step": 200
},
{
"dpo_loss": 61.18910217285156,
"epoch": 1.1620217288615966,
"grad_norm": 6574.968638595831,
"learning_rate": 4.735563174649278e-06,
"logits": -0.83098304271698,
"logps": -78.29829406738281,
"loss": 137.2297,
"objective": 146.6360626220703,
"ranking_idealized": 0.48750001192092896,
"ranking_idealized_expo": 0.48750001192092896,
"ranking_simple": 0.5,
"regularize": 146.6360626220703,
"step": 205
},
{
"dpo_loss": 84.49109649658203,
"epoch": 1.1903637222484649,
"grad_norm": 6368.0146943258505,
"learning_rate": 4.7129306529060415e-06,
"logits": -0.828973114490509,
"logps": -78.18091583251953,
"loss": 136.1199,
"objective": 162.718994140625,
"ranking_idealized": 0.5,
"ranking_idealized_expo": 0.4958333373069763,
"ranking_simple": 0.512499988079071,
"regularize": 162.718994140625,
"step": 210
},
{
"dpo_loss": 75.69349670410156,
"epoch": 1.2187057156353331,
"grad_norm": 6721.076267224635,
"learning_rate": 4.68942768290728e-06,
"logits": -0.7940189242362976,
"logps": -77.39470672607422,
"loss": 132.4868,
"objective": 145.17942810058594,
"ranking_idealized": 0.4833333194255829,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.4791666567325592,
"regularize": 145.17942810058594,
"step": 215
},
{
"dpo_loss": 70.87561798095703,
"epoch": 1.2470477090222012,
"grad_norm": 6782.903860178787,
"learning_rate": 4.665063509461098e-06,
"logits": -0.63108229637146,
"logps": -77.25511932373047,
"loss": 135.8738,
"objective": 132.0025177001953,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.6000000238418579,
"ranking_simple": 0.574999988079071,
"regularize": 132.0025177001953,
"step": 220
},
{
"dpo_loss": 60.317649841308594,
"epoch": 1.2753897024090695,
"grad_norm": 6714.729050270533,
"learning_rate": 4.639847716126855e-06,
"logits": -0.7443896532058716,
"logps": -78.88975524902344,
"loss": 129.2623,
"objective": 123.79737854003906,
"ranking_idealized": 0.5291666388511658,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.5249999761581421,
"regularize": 123.79737854003906,
"step": 225
},
{
"dpo_loss": 61.865577697753906,
"epoch": 1.3037316957959377,
"grad_norm": 6667.869735511306,
"learning_rate": 4.613790221445511e-06,
"logits": -0.6796783208847046,
"logps": -78.33611297607422,
"loss": 120.3527,
"objective": 119.14628601074219,
"ranking_idealized": 0.5083333253860474,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.5291666388511658,
"regularize": 119.14628601074219,
"step": 230
},
{
"dpo_loss": 59.05632400512695,
"epoch": 1.3320736891828058,
"grad_norm": 6713.042134498338,
"learning_rate": 4.586901275038201e-06,
"logits": -0.7024025321006775,
"logps": -76.42981719970703,
"loss": 129.2294,
"objective": 135.5782928466797,
"ranking_idealized": 0.4833333194255829,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.5249999761581421,
"regularize": 135.5782928466797,
"step": 235
},
{
"dpo_loss": 63.171669006347656,
"epoch": 1.360415682569674,
"grad_norm": 6288.49610479985,
"learning_rate": 4.559191453574582e-06,
"logits": -0.678625226020813,
"logps": -77.7660903930664,
"loss": 125.9373,
"objective": 122.7352066040039,
"ranking_idealized": 0.4791666567325592,
"ranking_idealized_expo": 0.4791666567325592,
"ranking_simple": 0.4791666567325592,
"regularize": 122.7352066040039,
"step": 240
},
{
"dpo_loss": 62.384334564208984,
"epoch": 1.3887576759565423,
"grad_norm": 6361.509384946799,
"learning_rate": 4.530671656612544e-06,
"logits": -0.755474865436554,
"logps": -77.20565032958984,
"loss": 123.4547,
"objective": 123.81192779541016,
"ranking_idealized": 0.5166666507720947,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5,
"regularize": 123.81192779541016,
"step": 245
},
{
"dpo_loss": 64.1493911743164,
"epoch": 1.4170996693434104,
"grad_norm": 6492.257111773256,
"learning_rate": 4.501353102310901e-06,
"logits": -0.761756956577301,
"logps": -76.3807144165039,
"loss": 122.9397,
"objective": 115.62742614746094,
"ranking_idealized": 0.47083333134651184,
"ranking_idealized_expo": 0.47083333134651184,
"ranking_simple": 0.4541666805744171,
"regularize": 115.62742614746094,
"step": 250
},
{
"epoch": 1.4170996693434104,
"eval_dpo_loss": 104.7900161743164,
"eval_logits": -0.8410003781318665,
"eval_logps": -81.18800354003906,
"eval_loss": 204.81558227539062,
"eval_objective": 206.54141235351562,
"eval_ranking_idealized": 0.5092975497245789,
"eval_ranking_idealized_expo": 0.5092975497245789,
"eval_ranking_simple": 0.49896693229675293,
"eval_regularize": 206.54141235351562,
"eval_runtime": 260.0773,
"eval_samples_per_second": 22.263,
"eval_steps_per_second": 0.93,
"step": 250
},
{
"dpo_loss": 58.41355895996094,
"epoch": 1.4454416627302786,
"grad_norm": 6090.22781261977,
"learning_rate": 4.4712473230167775e-06,
"logits": -0.7302901148796082,
"logps": -77.44480895996094,
"loss": 121.9243,
"objective": 119.19187927246094,
"ranking_idealized": 0.5458333492279053,
"ranking_idealized_expo": 0.5458333492279053,
"ranking_simple": 0.5541666746139526,
"regularize": 119.19187927246094,
"step": 255
},
{
"dpo_loss": 62.344058990478516,
"epoch": 1.473783656117147,
"grad_norm": 6061.78193398447,
"learning_rate": 4.440366160729393e-06,
"logits": -0.7257960438728333,
"logps": -78.19904327392578,
"loss": 116.9529,
"objective": 126.45060729980469,
"ranking_idealized": 0.48750001192092896,
"ranking_idealized_expo": 0.48750001192092896,
"ranking_simple": 0.4958333373069763,
"regularize": 126.45060729980469,
"step": 260
},
{
"dpo_loss": 63.532493591308594,
"epoch": 1.5021256495040152,
"grad_norm": 6481.304182744527,
"learning_rate": 4.4087217624420595e-06,
"logits": -0.7170858979225159,
"logps": -76.29792022705078,
"loss": 117.9619,
"objective": 119.87791442871094,
"ranking_idealized": 0.5041666626930237,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.5166666507720947,
"regularize": 119.87791442871094,
"step": 265
},
{
"dpo_loss": 62.21635055541992,
"epoch": 1.5304676428908834,
"grad_norm": 6389.335954603194,
"learning_rate": 4.376326575364206e-06,
"logits": -0.6881006360054016,
"logps": -76.92182922363281,
"loss": 112.4777,
"objective": 120.40229034423828,
"ranking_idealized": 0.5041666626930237,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.5166666507720947,
"regularize": 120.40229034423828,
"step": 270
},
{
"dpo_loss": 63.73827362060547,
"epoch": 1.5588096362777515,
"grad_norm": 6484.028715201892,
"learning_rate": 4.34319334202531e-06,
"logits": -0.7395693063735962,
"logps": -76.80595397949219,
"loss": 110.8653,
"objective": 114.3071517944336,
"ranking_idealized": 0.5249999761581421,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.5333333611488342,
"regularize": 114.3071517944336,
"step": 275
},
{
"dpo_loss": 57.1733512878418,
"epoch": 1.5871516296646198,
"grad_norm": 6228.430728506919,
"learning_rate": 4.309335095262675e-06,
"logits": -0.7416642904281616,
"logps": -74.88420104980469,
"loss": 114.3982,
"objective": 110.38121795654297,
"ranking_idealized": 0.5041666626930237,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.512499988079071,
"regularize": 110.38121795654297,
"step": 280
},
{
"dpo_loss": 54.96518325805664,
"epoch": 1.615493623051488,
"grad_norm": 6537.148002785846,
"learning_rate": 4.274765153095008e-06,
"logits": -0.8007023930549622,
"logps": -76.65270233154297,
"loss": 116.8492,
"objective": 124.57038879394531,
"ranking_idealized": 0.5166666507720947,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5291666388511658,
"regularize": 124.57038879394531,
"step": 285
},
{
"dpo_loss": 56.35616683959961,
"epoch": 1.643835616438356,
"grad_norm": 5801.834071606869,
"learning_rate": 4.239497113483819e-06,
"logits": -0.7784015536308289,
"logps": -75.22969055175781,
"loss": 108.7635,
"objective": 101.91283416748047,
"ranking_idealized": 0.5625,
"ranking_idealized_expo": 0.5625,
"ranking_simple": 0.5541666746139526,
"regularize": 101.91283416748047,
"step": 290
},
{
"dpo_loss": 59.50160598754883,
"epoch": 1.6721776098252243,
"grad_norm": 5633.558615804516,
"learning_rate": 4.203544848984729e-06,
"logits": -0.6548821926116943,
"logps": -74.7501449584961,
"loss": 110.3539,
"objective": 111.20478820800781,
"ranking_idealized": 0.4791666567325592,
"ranking_idealized_expo": 0.4791666567325592,
"ranking_simple": 0.4958333373069763,
"regularize": 111.20478820800781,
"step": 295
},
{
"dpo_loss": 65.51039123535156,
"epoch": 1.7005196032120926,
"grad_norm": 6070.898597461427,
"learning_rate": 4.16692250129073e-06,
"logits": -0.615551233291626,
"logps": -76.81422424316406,
"loss": 109.8686,
"objective": 122.05563354492188,
"ranking_idealized": 0.574999988079071,
"ranking_idealized_expo": 0.574999988079071,
"ranking_simple": 0.5625,
"regularize": 122.05563354492188,
"step": 300
},
{
"epoch": 1.7005196032120926,
"eval_dpo_loss": 109.18816375732422,
"eval_logits": -0.665767252445221,
"eval_logps": -82.2343978881836,
"eval_loss": 216.43338012695312,
"eval_objective": 216.9470672607422,
"eval_ranking_idealized": 0.5092975497245789,
"eval_ranking_idealized_expo": 0.5092975497245789,
"eval_ranking_simple": 0.5082644820213318,
"eval_regularize": 216.9470672607422,
"eval_runtime": 260.4112,
"eval_samples_per_second": 22.234,
"eval_steps_per_second": 0.929,
"step": 300
},
{
"dpo_loss": 55.654296875,
"epoch": 1.7288615965989607,
"grad_norm": 5873.367897520137,
"learning_rate": 4.129644475669617e-06,
"logits": -0.5988628268241882,
"logps": -78.57816314697266,
"loss": 107.9981,
"objective": 113.90287780761719,
"ranking_idealized": 0.5041666626930237,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.4833333194255829,
"regularize": 113.90287780761719,
"step": 305
},
{
"dpo_loss": 53.0214729309082,
"epoch": 1.7572035899858292,
"grad_norm": 6172.163624470604,
"learning_rate": 4.091725435297721e-06,
"logits": -0.6676580905914307,
"logps": -76.13163757324219,
"loss": 116.5937,
"objective": 113.61856842041016,
"ranking_idealized": 0.5208333134651184,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.5333333611488342,
"regularize": 113.61856842041016,
"step": 310
},
{
"dpo_loss": 57.42729568481445,
"epoch": 1.7855455833726972,
"grad_norm": 6120.23161933891,
"learning_rate": 4.053180295492203e-06,
"logits": -0.6111046075820923,
"logps": -77.60387420654297,
"loss": 109.9605,
"objective": 105.1899642944336,
"ranking_idealized": 0.5083333253860474,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.5166666507720947,
"regularize": 105.1899642944336,
"step": 315
},
{
"dpo_loss": 48.950584411621094,
"epoch": 1.8138875767595655,
"grad_norm": 5988.504398453124,
"learning_rate": 4.014024217844167e-06,
"logits": -0.671535313129425,
"logps": -77.83592224121094,
"loss": 102.7624,
"objective": 104.49610137939453,
"ranking_idealized": 0.5041666626930237,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.5291666388511658,
"regularize": 104.49610137939453,
"step": 320
},
{
"dpo_loss": 57.7384033203125,
"epoch": 1.8422295701464337,
"grad_norm": 6423.999380292735,
"learning_rate": 3.974272604254906e-06,
"logits": -0.6641644239425659,
"logps": -78.81893920898438,
"loss": 106.3894,
"objective": 106.4183120727539,
"ranking_idealized": 0.5166666507720947,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5416666865348816,
"regularize": 106.4183120727539,
"step": 325
},
{
"dpo_loss": 47.59239959716797,
"epoch": 1.8705715635333018,
"grad_norm": 5787.176377487831,
"learning_rate": 3.933941090877615e-06,
"logits": -0.5340750217437744,
"logps": -75.45977020263672,
"loss": 104.8829,
"objective": 98.918701171875,
"ranking_idealized": 0.47083333134651184,
"ranking_idealized_expo": 0.47083333134651184,
"ranking_simple": 0.4625000059604645,
"regularize": 98.918701171875,
"step": 330
},
{
"dpo_loss": 53.71455001831055,
"epoch": 1.89891355692017,
"grad_norm": 5809.111403677512,
"learning_rate": 3.893045541966975e-06,
"logits": -0.6170223355293274,
"logps": -75.74073028564453,
"loss": 102.0785,
"objective": 106.43668365478516,
"ranking_idealized": 0.4958333373069763,
"ranking_idealized_expo": 0.4958333373069763,
"ranking_simple": 0.4833333194255829,
"regularize": 106.43668365478516,
"step": 335
},
{
"dpo_loss": 48.59009552001953,
"epoch": 1.9272555503070383,
"grad_norm": 6046.947822829327,
"learning_rate": 3.8516020436389945e-06,
"logits": -0.5560640692710876,
"logps": -78.0693130493164,
"loss": 99.5837,
"objective": 97.92900085449219,
"ranking_idealized": 0.5416666865348816,
"ranking_idealized_expo": 0.5416666865348816,
"ranking_simple": 0.5416666865348816,
"regularize": 97.92900085449219,
"step": 340
},
{
"dpo_loss": 39.00655746459961,
"epoch": 1.9555975436939064,
"grad_norm": 6294.646295716598,
"learning_rate": 3.8096268975436045e-06,
"logits": -0.581243097782135,
"logps": -76.83429718017578,
"loss": 98.8101,
"objective": 92.9996566772461,
"ranking_idealized": 0.5333333611488342,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.5333333611488342,
"regularize": 92.9996566772461,
"step": 345
},
{
"dpo_loss": 45.82107162475586,
"epoch": 1.9839395370807746,
"grad_norm": 6285.693550619337,
"learning_rate": 3.767136614452458e-06,
"logits": -0.5315877199172974,
"logps": -77.77294158935547,
"loss": 97.6956,
"objective": 92.99220275878906,
"ranking_idealized": 0.48750001192092896,
"ranking_idealized_expo": 0.48750001192092896,
"ranking_simple": 0.5041666626930237,
"regularize": 92.99220275878906,
"step": 350
},
{
"epoch": 1.9839395370807746,
"eval_dpo_loss": 109.80838012695312,
"eval_logits": -0.6322916746139526,
"eval_logps": -81.08039093017578,
"eval_loss": 218.28872680664062,
"eval_objective": 217.42910766601562,
"eval_ranking_idealized": 0.5092975497245789,
"eval_ranking_idealized_expo": 0.5092975497245789,
"eval_ranking_simple": 0.5072314143180847,
"eval_regularize": 217.42910766601562,
"eval_runtime": 260.5078,
"eval_samples_per_second": 22.226,
"eval_steps_per_second": 0.929,
"step": 350
},
{
"dpo_loss": 45.73307800292969,
"epoch": 2.012281530467643,
"grad_norm": 5901.724049272657,
"learning_rate": 3.724147907764478e-06,
"logits": -0.49950748682022095,
"logps": -77.2893295288086,
"loss": 95.374,
"objective": 97.87924194335938,
"ranking_idealized": 0.5083333253860474,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.5041666626930237,
"regularize": 97.87924194335938,
"step": 355
},
{
"dpo_loss": 46.2380256652832,
"epoch": 2.040623523854511,
"grad_norm": 5907.7941999877185,
"learning_rate": 3.6806776869317074e-06,
"logits": -0.571615993976593,
"logps": -75.41194152832031,
"loss": 89.9848,
"objective": 88.52743530273438,
"ranking_idealized": 0.5291666388511658,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.5333333611488342,
"regularize": 88.52743530273438,
"step": 360
},
{
"dpo_loss": 50.726741790771484,
"epoch": 2.0689655172413794,
"grad_norm": 6275.9056471544,
"learning_rate": 3.6367430508080283e-06,
"logits": -0.6418294310569763,
"logps": -77.9503173828125,
"loss": 93.574,
"objective": 92.23693084716797,
"ranking_idealized": 0.5041666626930237,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.5249999761581421,
"regularize": 92.23693084716797,
"step": 365
},
{
"dpo_loss": 44.6575927734375,
"epoch": 2.0973075106282475,
"grad_norm": 5856.945542345439,
"learning_rate": 3.5923612809233987e-06,
"logits": -0.552503228187561,
"logps": -75.52668762207031,
"loss": 89.2864,
"objective": 88.46630096435547,
"ranking_idealized": 0.4833333194255829,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.5083333253860474,
"regularize": 88.46630096435547,
"step": 370
},
{
"dpo_loss": 44.330020904541016,
"epoch": 2.1256495040151155,
"grad_norm": 5715.458499171565,
"learning_rate": 3.547549834686222e-06,
"logits": -0.5592830777168274,
"logps": -77.21271514892578,
"loss": 87.4304,
"objective": 95.40113830566406,
"ranking_idealized": 0.5874999761581421,
"ranking_idealized_expo": 0.5874999761581421,
"ranking_simple": 0.574999988079071,
"regularize": 95.40113830566406,
"step": 375
},
{
"dpo_loss": 42.92914581298828,
"epoch": 2.153991497401984,
"grad_norm": 5740.849860043716,
"learning_rate": 3.5023263385165346e-06,
"logits": -0.5162667632102966,
"logps": -77.4334945678711,
"loss": 88.8215,
"objective": 89.14612579345703,
"ranking_idealized": 0.5291666388511658,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.512499988079071,
"regularize": 89.14612579345703,
"step": 380
},
{
"dpo_loss": 49.994590759277344,
"epoch": 2.182333490788852,
"grad_norm": 5839.263703953865,
"learning_rate": 3.4567085809127247e-06,
"logits": -0.5519043207168579,
"logps": -80.0301284790039,
"loss": 87.9243,
"objective": 99.98570251464844,
"ranking_idealized": 0.5,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.5166666507720947,
"regularize": 99.98570251464844,
"step": 385
},
{
"dpo_loss": 46.78067398071289,
"epoch": 2.21067548417572,
"grad_norm": 5926.180751245502,
"learning_rate": 3.410714505454486e-06,
"logits": -0.4752744138240814,
"logps": -78.86099243164062,
"loss": 88.7389,
"objective": 83.77468872070312,
"ranking_idealized": 0.4833333194255829,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.46666666865348816,
"regularize": 83.77468872070312,
"step": 390
},
{
"dpo_loss": 48.72767639160156,
"epoch": 2.2390174775625886,
"grad_norm": 5736.684602068411,
"learning_rate": 3.364362203744777e-06,
"logits": -0.488779217004776,
"logps": -79.17351531982422,
"loss": 85.6137,
"objective": 89.27422332763672,
"ranking_idealized": 0.5166666507720947,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5,
"regularize": 89.27422332763672,
"step": 395
},
{
"dpo_loss": 46.81766128540039,
"epoch": 2.2673594709494567,
"grad_norm": 6351.520354901001,
"learning_rate": 3.3176699082935546e-06,
"logits": -0.5262924432754517,
"logps": -80.44742584228516,
"loss": 86.0309,
"objective": 90.6736831665039,
"ranking_idealized": 0.574999988079071,
"ranking_idealized_expo": 0.574999988079071,
"ranking_simple": 0.5708333253860474,
"regularize": 90.6736831665039,
"step": 400
},
{
"epoch": 2.2673594709494567,
"eval_dpo_loss": 115.8748779296875,
"eval_logits": -0.5904337763786316,
"eval_logps": -83.60823822021484,
"eval_loss": 221.7113494873047,
"eval_objective": 225.3389129638672,
"eval_ranking_idealized": 0.5092975497245789,
"eval_ranking_idealized_expo": 0.5092975497245789,
"eval_ranking_simple": 0.5051652789115906,
"eval_regularize": 225.3389129638672,
"eval_runtime": 260.7013,
"eval_samples_per_second": 22.209,
"eval_steps_per_second": 0.928,
"step": 400
},
{
"dpo_loss": 39.29842758178711,
"epoch": 2.295701464336325,
"grad_norm": 6158.234438206871,
"learning_rate": 3.2706559853460818e-06,
"logits": -0.583505392074585,
"logps": -78.83747100830078,
"loss": 84.181,
"objective": 83.5100326538086,
"ranking_idealized": 0.5583333373069763,
"ranking_idealized_expo": 0.5583333373069763,
"ranking_simple": 0.5708333253860474,
"regularize": 83.5100326538086,
"step": 405
},
{
"dpo_loss": 41.60795974731445,
"epoch": 2.324043457723193,
"grad_norm": 5505.6808601173025,
"learning_rate": 3.2233389276586325e-06,
"logits": -0.5247575640678406,
"logps": -77.18423461914062,
"loss": 81.7469,
"objective": 78.1520004272461,
"ranking_idealized": 0.4749999940395355,
"ranking_idealized_expo": 0.4749999940395355,
"ranking_simple": 0.4749999940395355,
"regularize": 78.1520004272461,
"step": 410
},
{
"dpo_loss": 40.77152633666992,
"epoch": 2.3523854511100613,
"grad_norm": 7217.9049268653225,
"learning_rate": 3.1757373472244324e-06,
"logits": -0.5214927196502686,
"logps": -78.42851257324219,
"loss": 82.4994,
"objective": 76.92170715332031,
"ranking_idealized": 0.5333333611488342,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.5166666507720947,
"regularize": 76.92170715332031,
"step": 415
},
{
"dpo_loss": 39.665164947509766,
"epoch": 2.3807274444969297,
"grad_norm": 6286.310490781002,
"learning_rate": 3.127869967952698e-06,
"logits": -0.5284460783004761,
"logps": -78.95869445800781,
"loss": 81.5421,
"objective": 80.19254302978516,
"ranking_idealized": 0.47083333134651184,
"ranking_idealized_expo": 0.47083333134651184,
"ranking_simple": 0.4749999940395355,
"regularize": 80.19254302978516,
"step": 420
},
{
"dpo_loss": 35.95686340332031,
"epoch": 2.409069437883798,
"grad_norm": 6025.61452707402,
"learning_rate": 3.0797556183036582e-06,
"logits": -0.5814462304115295,
"logps": -77.8717269897461,
"loss": 78.4787,
"objective": 80.24657440185547,
"ranking_idealized": 0.4958333373069763,
"ranking_idealized_expo": 0.4958333373069763,
"ranking_simple": 0.5041666626930237,
"regularize": 80.24657440185547,
"step": 425
},
{
"dpo_loss": 42.524593353271484,
"epoch": 2.4374114312706663,
"grad_norm": 5711.500524222879,
"learning_rate": 3.0314132238824416e-06,
"logits": -0.6198355555534363,
"logps": -78.72447204589844,
"loss": 79.9018,
"objective": 80.83815002441406,
"ranking_idealized": 0.5291666388511658,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.5333333611488342,
"regularize": 80.83815002441406,
"step": 430
},
{
"dpo_loss": 41.12130355834961,
"epoch": 2.4657534246575343,
"grad_norm": 5723.1684727847205,
"learning_rate": 2.9828617999947647e-06,
"logits": -0.6906354427337646,
"logps": -76.77250671386719,
"loss": 78.7991,
"objective": 82.95913696289062,
"ranking_idealized": 0.5083333253860474,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.5166666507720947,
"regularize": 82.95913696289062,
"step": 435
},
{
"dpo_loss": 41.736507415771484,
"epoch": 2.4940954180444024,
"grad_norm": 6162.556759828204,
"learning_rate": 2.9341204441673267e-06,
"logits": -0.5858648419380188,
"logps": -77.20186614990234,
"loss": 79.0761,
"objective": 77.8648910522461,
"ranking_idealized": 0.5375000238418579,
"ranking_idealized_expo": 0.5375000238418579,
"ranking_simple": 0.5291666388511658,
"regularize": 77.8648910522461,
"step": 440
},
{
"dpo_loss": 38.95100784301758,
"epoch": 2.5224374114312704,
"grad_norm": 5599.536160663401,
"learning_rate": 2.8852083286358647e-06,
"logits": -0.5441535711288452,
"logps": -75.47164154052734,
"loss": 75.4247,
"objective": 74.47901916503906,
"ranking_idealized": 0.5166666507720947,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5333333611488342,
"regularize": 74.47901916503906,
"step": 445
},
{
"dpo_loss": 42.42906188964844,
"epoch": 2.550779404818139,
"grad_norm": 5927.201776955767,
"learning_rate": 2.8361446928038298e-06,
"logits": -0.5917781591415405,
"logps": -77.22566986083984,
"loss": 78.4362,
"objective": 75.78580474853516,
"ranking_idealized": 0.5,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.49166667461395264,
"regularize": 75.78580474853516,
"step": 450
},
{
"epoch": 2.550779404818139,
"eval_dpo_loss": 116.21166229248047,
"eval_logits": -0.6173169016838074,
"eval_logps": -82.07426452636719,
"eval_loss": 221.37322998046875,
"eval_objective": 224.48391723632812,
"eval_ranking_idealized": 0.5092975497245789,
"eval_ranking_idealized_expo": 0.5092975497245789,
"eval_ranking_simple": 0.5113636255264282,
"eval_regularize": 224.48391723632812,
"eval_runtime": 259.6525,
"eval_samples_per_second": 22.299,
"eval_steps_per_second": 0.932,
"step": 450
},
{
"dpo_loss": 42.16978073120117,
"epoch": 2.579121398205007,
"grad_norm": 5821.790965127779,
"learning_rate": 2.7869488356746344e-06,
"logits": -0.5925208926200867,
"logps": -78.49901580810547,
"loss": 76.2254,
"objective": 73.7408447265625,
"ranking_idealized": 0.5,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.48750001192092896,
"regularize": 73.7408447265625,
"step": 455
},
{
"dpo_loss": 35.18735885620117,
"epoch": 2.6074633915918755,
"grad_norm": 5721.627507359908,
"learning_rate": 2.7376401082604563e-06,
"logits": -0.648517370223999,
"logps": -77.49411010742188,
"loss": 72.6796,
"objective": 75.32291412353516,
"ranking_idealized": 0.512499988079071,
"ranking_idealized_expo": 0.512499988079071,
"ranking_simple": 0.5083333253860474,
"regularize": 75.32291412353516,
"step": 460
},
{
"dpo_loss": 39.74345779418945,
"epoch": 2.6358053849787435,
"grad_norm": 5692.925017991515,
"learning_rate": 2.6882379059705953e-06,
"logits": -0.5508330464363098,
"logps": -77.89730834960938,
"loss": 71.4232,
"objective": 72.29166412353516,
"ranking_idealized": 0.4958333373069763,
"ranking_idealized_expo": 0.4958333373069763,
"ranking_simple": 0.49166667461395264,
"regularize": 72.29166412353516,
"step": 465
},
{
"dpo_loss": 38.005611419677734,
"epoch": 2.6641473783656116,
"grad_norm": 5846.922593898297,
"learning_rate": 2.6387616609823506e-06,
"logits": -0.5707463622093201,
"logps": -77.01284790039062,
"loss": 70.6383,
"objective": 67.4544906616211,
"ranking_idealized": 0.5541666746139526,
"ranking_idealized_expo": 0.550000011920929,
"ranking_simple": 0.5291666388511658,
"regularize": 67.4544906616211,
"step": 470
},
{
"dpo_loss": 28.328413009643555,
"epoch": 2.69248937175248,
"grad_norm": 5857.504945194897,
"learning_rate": 2.5892308345974517e-06,
"logits": -0.6008989810943604,
"logps": -76.99757385253906,
"loss": 68.5943,
"objective": 67.81741333007812,
"ranking_idealized": 0.4958333373069763,
"ranking_idealized_expo": 0.4958333373069763,
"ranking_simple": 0.49166667461395264,
"regularize": 67.81741333007812,
"step": 475
},
{
"dpo_loss": 32.999122619628906,
"epoch": 2.720831365139348,
"grad_norm": 6073.696561883967,
"learning_rate": 2.53966490958702e-06,
"logits": -0.6349701285362244,
"logps": -76.78397369384766,
"loss": 67.1153,
"objective": 68.42402648925781,
"ranking_idealized": 0.5,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.512499988079071,
"regularize": 68.42402648925781,
"step": 480
},
{
"dpo_loss": 34.57282257080078,
"epoch": 2.7491733585262166,
"grad_norm": 5933.827803950185,
"learning_rate": 2.490083382528097e-06,
"logits": -0.5782140493392944,
"logps": -79.55359649658203,
"loss": 69.0218,
"objective": 67.90946960449219,
"ranking_idealized": 0.5208333134651184,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.5166666507720947,
"regularize": 67.90946960449219,
"step": 485
},
{
"dpo_loss": 37.92434310913086,
"epoch": 2.7775153519130846,
"grad_norm": 6156.113425613259,
"learning_rate": 2.440505756134732e-06,
"logits": -0.5393855571746826,
"logps": -77.88603210449219,
"loss": 67.9056,
"objective": 68.96742248535156,
"ranking_idealized": 0.47083333134651184,
"ranking_idealized_expo": 0.47083333134651184,
"ranking_simple": 0.46666666865348816,
"regularize": 68.96742248535156,
"step": 490
},
{
"dpo_loss": 43.407958984375,
"epoch": 2.8058573452999527,
"grad_norm": 5739.304263530335,
"learning_rate": 2.3909515315866606e-06,
"logits": -0.5840901136398315,
"logps": -77.01483917236328,
"loss": 65.3189,
"objective": 70.57030487060547,
"ranking_idealized": 0.4791666567325592,
"ranking_idealized_expo": 0.4791666567325592,
"ranking_simple": 0.4375,
"regularize": 70.57030487060547,
"step": 495
},
{
"dpo_loss": 34.67405700683594,
"epoch": 2.8341993386868207,
"grad_norm": 5760.189667227515,
"learning_rate": 2.341440200858589e-06,
"logits": -0.6246147155761719,
"logps": -76.3143310546875,
"loss": 65.179,
"objective": 62.59866714477539,
"ranking_idealized": 0.5208333134651184,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.5,
"regularize": 62.59866714477539,
"step": 500
},
{
"epoch": 2.8341993386868207,
"eval_dpo_loss": 114.98714447021484,
"eval_logits": -0.6892295479774475,
"eval_logps": -82.34246826171875,
"eval_loss": 223.8012237548828,
"eval_objective": 227.17547607421875,
"eval_ranking_idealized": 0.5092975497245789,
"eval_ranking_idealized_expo": 0.5092975497245789,
"eval_ranking_simple": 0.5082644820213318,
"eval_regularize": 227.17547607421875,
"eval_runtime": 259.8207,
"eval_samples_per_second": 22.285,
"eval_steps_per_second": 0.931,
"step": 500
},
{
"dpo_loss": 29.432533264160156,
"epoch": 2.862541332073689,
"grad_norm": 5869.553889606324,
"learning_rate": 2.2919912390530945e-06,
"logits": -0.6314287185668945,
"logps": -77.34867095947266,
"loss": 62.3763,
"objective": 57.663639068603516,
"ranking_idealized": 0.5083333253860474,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.5249999761581421,
"regularize": 57.663639068603516,
"step": 505
},
{
"dpo_loss": 32.43122100830078,
"epoch": 2.8908833254605573,
"grad_norm": 5536.552957965224,
"learning_rate": 2.242624096740164e-06,
"logits": -0.6147390604019165,
"logps": -78.36116027832031,
"loss": 64.5354,
"objective": 59.26633834838867,
"ranking_idealized": 0.5249999761581421,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.5416666865348816,
"regularize": 59.26633834838867,
"step": 510
},
{
"dpo_loss": 28.44099235534668,
"epoch": 2.9192253188474258,
"grad_norm": 5901.455292936029,
"learning_rate": 2.193358192306384e-06,
"logits": -0.6631244421005249,
"logps": -77.92379760742188,
"loss": 61.9083,
"objective": 54.49189758300781,
"ranking_idealized": 0.4791666567325592,
"ranking_idealized_expo": 0.4791666567325592,
"ranking_simple": 0.48750001192092896,
"regularize": 54.49189758300781,
"step": 515
},
{
"dpo_loss": 30.10361671447754,
"epoch": 2.947567312234294,
"grad_norm": 6161.871560570766,
"learning_rate": 2.1442129043167877e-06,
"logits": -0.6410778760910034,
"logps": -79.62260437011719,
"loss": 63.1757,
"objective": 61.97486877441406,
"ranking_idealized": 0.5083333253860474,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.5208333134651184,
"regularize": 61.97486877441406,
"step": 520
},
{
"dpo_loss": 30.742000579833984,
"epoch": 2.975909305621162,
"grad_norm": 6354.821007890038,
"learning_rate": 2.0952075638923656e-06,
"logits": -0.6085123419761658,
"logps": -78.72110748291016,
"loss": 61.2353,
"objective": 57.58578109741211,
"ranking_idealized": 0.5666666626930237,
"ranking_idealized_expo": 0.5666666626930237,
"ranking_simple": 0.5416666865348816,
"regularize": 57.58578109741211,
"step": 525
},
{
"dpo_loss": 35.809959411621094,
"epoch": 3.0042512990080303,
"grad_norm": 5662.677867628538,
"learning_rate": 2.046361447106244e-06,
"logits": -0.6243312358856201,
"logps": -77.60399627685547,
"loss": 61.3834,
"objective": 55.699283599853516,
"ranking_idealized": 0.512499988079071,
"ranking_idealized_expo": 0.512499988079071,
"ranking_simple": 0.512499988079071,
"regularize": 55.699283599853516,
"step": 530
},
{
"dpo_loss": 28.718589782714844,
"epoch": 3.0325932923948984,
"grad_norm": 5725.286339309564,
"learning_rate": 1.997693767401503e-06,
"logits": -0.6556482315063477,
"logps": -78.42686462402344,
"loss": 56.5612,
"objective": 56.667293548583984,
"ranking_idealized": 0.5416666865348816,
"ranking_idealized_expo": 0.5416666865348816,
"ranking_simple": 0.5416666865348816,
"regularize": 56.667293548583984,
"step": 535
},
{
"dpo_loss": 29.884973526000977,
"epoch": 3.0609352857817664,
"grad_norm": 6203.03849936551,
"learning_rate": 1.9492236680336486e-06,
"logits": -0.6453068256378174,
"logps": -77.5386734008789,
"loss": 55.8428,
"objective": 58.181007385253906,
"ranking_idealized": 0.5333333611488342,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.550000011920929,
"regularize": 58.181007385253906,
"step": 540
},
{
"dpo_loss": 25.884765625,
"epoch": 3.089277279168635,
"grad_norm": 5980.787430652505,
"learning_rate": 1.9009702145406728e-06,
"logits": -0.6111847758293152,
"logps": -78.12004852294922,
"loss": 53.4125,
"objective": 54.29548263549805,
"ranking_idealized": 0.49166667461395264,
"ranking_idealized_expo": 0.49166667461395264,
"ranking_simple": 0.4958333373069763,
"regularize": 54.29548263549805,
"step": 545
},
{
"dpo_loss": 27.113332748413086,
"epoch": 3.117619272555503,
"grad_norm": 5749.494417530868,
"learning_rate": 1.852952387243698e-06,
"logits": -0.5344541668891907,
"logps": -78.76824188232422,
"loss": 52.3116,
"objective": 55.897151947021484,
"ranking_idealized": 0.5375000238418579,
"ranking_idealized_expo": 0.5375000238418579,
"ranking_simple": 0.5249999761581421,
"regularize": 55.897151947021484,
"step": 550
},
{
"epoch": 3.117619272555503,
"eval_dpo_loss": 114.9251937866211,
"eval_logits": -0.6290253400802612,
"eval_logps": -81.84333038330078,
"eval_loss": 223.6770477294922,
"eval_objective": 226.75909423828125,
"eval_ranking_idealized": 0.5092975497245789,
"eval_ranking_idealized_expo": 0.5092975497245789,
"eval_ranking_simple": 0.5103305578231812,
"eval_regularize": 226.75909423828125,
"eval_runtime": 260.203,
"eval_samples_per_second": 22.252,
"eval_steps_per_second": 0.93,
"step": 550
},
{
"dpo_loss": 27.520069122314453,
"epoch": 3.1459612659423715,
"grad_norm": 5785.031733960155,
"learning_rate": 1.8051890737811395e-06,
"logits": -0.49687737226486206,
"logps": -78.77640533447266,
"loss": 51.2223,
"objective": 51.64868927001953,
"ranking_idealized": 0.550000011920929,
"ranking_idealized_expo": 0.550000011920929,
"ranking_simple": 0.5541666746139526,
"regularize": 51.64868927001953,
"step": 555
},
{
"dpo_loss": 24.614133834838867,
"epoch": 3.1743032593292395,
"grad_norm": 5927.440129812864,
"learning_rate": 1.7576990616793139e-06,
"logits": -0.5366522073745728,
"logps": -75.75458526611328,
"loss": 52.4275,
"objective": 50.823753356933594,
"ranking_idealized": 0.5041666626930237,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.5083333253860474,
"regularize": 50.823753356933594,
"step": 560
},
{
"dpo_loss": 26.020414352416992,
"epoch": 3.2026452527161076,
"grad_norm": 6153.770849149074,
"learning_rate": 1.7105010309624381e-06,
"logits": -0.5409637689590454,
"logps": -77.39885711669922,
"loss": 51.519,
"objective": 49.933555603027344,
"ranking_idealized": 0.5416666865348816,
"ranking_idealized_expo": 0.5416666865348816,
"ranking_simple": 0.5541666746139526,
"regularize": 49.933555603027344,
"step": 565
},
{
"dpo_loss": 26.2557373046875,
"epoch": 3.230987246102976,
"grad_norm": 5301.823299812263,
"learning_rate": 1.6636135468049122e-06,
"logits": -0.5224726796150208,
"logps": -76.94538116455078,
"loss": 49.5781,
"objective": 46.85703659057617,
"ranking_idealized": 0.4791666567325592,
"ranking_idealized_expo": 0.4791666567325592,
"ranking_simple": 0.46666666865348816,
"regularize": 46.85703659057617,
"step": 570
},
{
"dpo_loss": 30.09845733642578,
"epoch": 3.259329239489844,
"grad_norm": 5768.637134430283,
"learning_rate": 1.617055052228768e-06,
"logits": -0.5873778462409973,
"logps": -77.48201751708984,
"loss": 48.8665,
"objective": 49.90784454345703,
"ranking_idealized": 0.5,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.4749999940395355,
"regularize": 49.90784454345703,
"step": 575
},
{
"dpo_loss": 25.3740234375,
"epoch": 3.287671232876712,
"grad_norm": 5535.872847536237,
"learning_rate": 1.5708438608491816e-06,
"logits": -0.6106985211372375,
"logps": -78.49124145507812,
"loss": 47.4222,
"objective": 46.31576156616211,
"ranking_idealized": 0.5541666746139526,
"ranking_idealized_expo": 0.5541666746139526,
"ranking_simple": 0.5583333373069763,
"regularize": 46.31576156616211,
"step": 580
},
{
"dpo_loss": 26.91870880126953,
"epoch": 3.3160132262635806,
"grad_norm": 6071.893217545125,
"learning_rate": 1.524998149670871e-06,
"logits": -0.611818253993988,
"logps": -78.98985290527344,
"loss": 49.9675,
"objective": 49.87598419189453,
"ranking_idealized": 0.5291666388511658,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.5208333134651184,
"regularize": 49.87598419189453,
"step": 585
},
{
"dpo_loss": 25.645116806030273,
"epoch": 3.3443552196504487,
"grad_norm": 5940.058676849357,
"learning_rate": 1.479535951938243e-06,
"logits": -0.6204649209976196,
"logps": -79.12604522705078,
"loss": 47.5629,
"objective": 50.466861724853516,
"ranking_idealized": 0.48750001192092896,
"ranking_idealized_expo": 0.48750001192092896,
"ranking_simple": 0.49166667461395264,
"regularize": 50.466861724853516,
"step": 590
},
{
"dpo_loss": 23.5716552734375,
"epoch": 3.372697213037317,
"grad_norm": 5635.743149288216,
"learning_rate": 1.43447515004208e-06,
"logits": -0.5404379367828369,
"logps": -78.58365631103516,
"loss": 45.2162,
"objective": 44.304752349853516,
"ranking_idealized": 0.5249999761581421,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.5375000238418579,
"regularize": 44.304752349853516,
"step": 595
},
{
"dpo_loss": 21.746015548706055,
"epoch": 3.4010392064241852,
"grad_norm": 5566.716122386338,
"learning_rate": 1.3898334684855647e-06,
"logits": -0.5577505826950073,
"logps": -77.53295135498047,
"loss": 45.9426,
"objective": 44.46009826660156,
"ranking_idealized": 0.5041666626930237,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.4958333373069763,
"regularize": 44.46009826660156,
"step": 600
},
{
"epoch": 3.4010392064241852,
"eval_dpo_loss": 113.63313293457031,
"eval_logits": -0.6182904839515686,
"eval_logps": -81.3167724609375,
"eval_loss": 222.4720458984375,
"eval_objective": 223.18727111816406,
"eval_ranking_idealized": 0.5092975497245789,
"eval_ranking_idealized_expo": 0.5092975497245789,
"eval_ranking_simple": 0.5072314143180847,
"eval_regularize": 223.18727111816406,
"eval_runtime": 260.7691,
"eval_samples_per_second": 22.204,
"eval_steps_per_second": 0.928,
"step": 600
},
{
"dpo_loss": 20.865568161010742,
"epoch": 3.4293811998110533,
"grad_norm": 6078.906522590126,
"learning_rate": 1.3456284669124159e-06,
"logits": -0.5672589540481567,
"logps": -79.59375762939453,
"loss": 44.9405,
"objective": 43.26637268066406,
"ranking_idealized": 0.5333333611488342,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.5333333611488342,
"regularize": 43.26637268066406,
"step": 605
},
{
"dpo_loss": 23.76979637145996,
"epoch": 3.4577231931979218,
"grad_norm": 6106.495672204209,
"learning_rate": 1.301877533199859e-06,
"logits": -0.6027007699012756,
"logps": -77.29557800292969,
"loss": 43.4191,
"objective": 46.19691848754883,
"ranking_idealized": 0.5166666507720947,
"ranking_idealized_expo": 0.512499988079071,
"ranking_simple": 0.512499988079071,
"regularize": 46.19691848754883,
"step": 610
},
{
"dpo_loss": 20.621265411376953,
"epoch": 3.48606518658479,
"grad_norm": 5469.445552736854,
"learning_rate": 1.2585978766191726e-06,
"logits": -0.6229711771011353,
"logps": -77.44082641601562,
"loss": 41.5198,
"objective": 40.210323333740234,
"ranking_idealized": 0.46666666865348816,
"ranking_idealized_expo": 0.46666666865348816,
"ranking_simple": 0.47083333134651184,
"regularize": 40.210323333740234,
"step": 615
},
{
"dpo_loss": 20.698007583618164,
"epoch": 3.514407179971658,
"grad_norm": 5640.751427601147,
"learning_rate": 1.2158065210664848e-06,
"logits": -0.5536880493164062,
"logps": -76.66329193115234,
"loss": 42.3006,
"objective": 39.67619323730469,
"ranking_idealized": 0.4958333373069763,
"ranking_idealized_expo": 0.4958333373069763,
"ranking_simple": 0.4833333194255829,
"regularize": 39.67619323730469,
"step": 620
},
{
"dpo_loss": 19.59212875366211,
"epoch": 3.5427491733585263,
"grad_norm": 5586.621015909176,
"learning_rate": 1.1735202983664803e-06,
"logits": -0.5632150769233704,
"logps": -75.98930358886719,
"loss": 42.7824,
"objective": 40.66218948364258,
"ranking_idealized": 0.5208333134651184,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.5291666388511658,
"regularize": 40.66218948364258,
"step": 625
},
{
"dpo_loss": 20.836034774780273,
"epoch": 3.5710911667453944,
"grad_norm": 5970.049888723431,
"learning_rate": 1.1317558416516696e-06,
"logits": -0.6033153533935547,
"logps": -75.92536163330078,
"loss": 41.207,
"objective": 38.50748062133789,
"ranking_idealized": 0.512499988079071,
"ranking_idealized_expo": 0.512499988079071,
"ranking_simple": 0.512499988079071,
"regularize": 38.50748062133789,
"step": 630
},
{
"dpo_loss": 18.94291877746582,
"epoch": 3.5994331601322624,
"grad_norm": 5671.302619310192,
"learning_rate": 1.0905295788197993e-06,
"logits": -0.5977668762207031,
"logps": -76.6431655883789,
"loss": 38.9416,
"objective": 40.45100402832031,
"ranking_idealized": 0.48750001192092896,
"ranking_idealized_expo": 0.48750001192092896,
"ranking_simple": 0.48750001192092896,
"regularize": 40.45100402832031,
"step": 635
},
{
"dpo_loss": 21.708051681518555,
"epoch": 3.627775153519131,
"grad_norm": 5604.363744970696,
"learning_rate": 1.049857726072005e-06,
"logits": -0.5701695680618286,
"logps": -77.40218353271484,
"loss": 38.2866,
"objective": 42.04179382324219,
"ranking_idealized": 0.4625000059604645,
"ranking_idealized_expo": 0.4625000059604645,
"ranking_simple": 0.4749999940395355,
"regularize": 42.04179382324219,
"step": 640
},
{
"dpo_loss": 20.66307830810547,
"epoch": 3.656117146905999,
"grad_norm": 5446.269996729428,
"learning_rate": 1.0097562815342215e-06,
"logits": -0.5814236998558044,
"logps": -76.69013214111328,
"loss": 38.1396,
"objective": 35.1287841796875,
"ranking_idealized": 0.5249999761581421,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.5249999761581421,
"regularize": 35.1287841796875,
"step": 645
},
{
"dpo_loss": 17.27460479736328,
"epoch": 3.6844591402928675,
"grad_norm": 5568.286359889519,
"learning_rate": 9.702410189643838e-07,
"logits": -0.5777478814125061,
"logps": -77.85608673095703,
"loss": 37.3789,
"objective": 35.844581604003906,
"ranking_idealized": 0.5041666626930237,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.5083333253860474,
"regularize": 35.844581604003906,
"step": 650
},
{
"epoch": 3.6844591402928675,
"eval_dpo_loss": 114.61026000976562,
"eval_logits": -0.6355183720588684,
"eval_logps": -81.70125579833984,
"eval_loss": 223.4119110107422,
"eval_objective": 225.21566772460938,
"eval_ranking_idealized": 0.5092975497245789,
"eval_ranking_idealized_expo": 0.5092975497245789,
"eval_ranking_simple": 0.5072314143180847,
"eval_regularize": 225.21566772460938,
"eval_runtime": 259.96,
"eval_samples_per_second": 22.273,
"eval_steps_per_second": 0.931,
"step": 650
},
{
"dpo_loss": 15.926929473876953,
"epoch": 3.7128011336797355,
"grad_norm": 5550.664905496162,
"learning_rate": 9.313274815478698e-07,
"logits": -0.5537225008010864,
"logps": -76.9358901977539,
"loss": 36.4745,
"objective": 33.71458053588867,
"ranking_idealized": 0.5208333134651184,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.5208333134651184,
"regularize": 33.71458053588867,
"step": 655
},
{
"dpo_loss": 19.080156326293945,
"epoch": 3.7411431270666036,
"grad_norm": 5480.303347312512,
"learning_rate": 8.930309757836517e-07,
"logits": -0.5778761506080627,
"logps": -78.07957458496094,
"loss": 36.4648,
"objective": 39.15421676635742,
"ranking_idealized": 0.5541666746139526,
"ranking_idealized_expo": 0.5541666746139526,
"ranking_simple": 0.5583333373069763,
"regularize": 39.15421676635742,
"step": 660
},
{
"dpo_loss": 16.046911239624023,
"epoch": 3.769485120453472,
"grad_norm": 5637.549638771613,
"learning_rate": 8.553665654635343e-07,
"logits": -0.5516543984413147,
"logps": -78.4363784790039,
"loss": 35.2475,
"objective": 35.23891830444336,
"ranking_idealized": 0.5166666507720947,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5083333253860474,
"regularize": 35.23891830444336,
"step": 665
},
{
"dpo_loss": 14.368963241577148,
"epoch": 3.79782711384034,
"grad_norm": 5529.55552176345,
"learning_rate": 8.183490657468687e-07,
"logits": -0.5940511226654053,
"logps": -78.2576675415039,
"loss": 33.6192,
"objective": 31.81795883178711,
"ranking_idealized": 0.5916666388511658,
"ranking_idealized_expo": 0.5916666388511658,
"ranking_simple": 0.5958333611488342,
"regularize": 31.81795883178711,
"step": 670
},
{
"dpo_loss": 14.198540687561035,
"epoch": 3.826169107227208,
"grad_norm": 6007.275340904618,
"learning_rate": 7.819930373330669e-07,
"logits": -0.5869444608688354,
"logps": -77.28413391113281,
"loss": 34.2996,
"objective": 32.41952133178711,
"ranking_idealized": 0.5375000238418579,
"ranking_idealized_expo": 0.5375000238418579,
"ranking_simple": 0.5458333492279053,
"regularize": 32.41952133178711,
"step": 675
},
{
"dpo_loss": 17.151575088500977,
"epoch": 3.8545111006140766,
"grad_norm": 5553.071492491405,
"learning_rate": 7.463127807341966e-07,
"logits": -0.5520313382148743,
"logps": -77.69143676757812,
"loss": 32.8529,
"objective": 37.0202522277832,
"ranking_idealized": 0.5083333253860474,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.5083333253860474,
"regularize": 37.0202522277832,
"step": 680
},
{
"dpo_loss": 17.791221618652344,
"epoch": 3.8828530940009447,
"grad_norm": 5648.54220107531,
"learning_rate": 7.113223306499336e-07,
"logits": -0.5711807608604431,
"logps": -77.1575698852539,
"loss": 32.524,
"objective": 31.409448623657227,
"ranking_idealized": 0.5166666507720947,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5291666388511658,
"regularize": 31.409448623657227,
"step": 685
},
{
"dpo_loss": 13.461447715759277,
"epoch": 3.9111950873878127,
"grad_norm": 5401.846772194243,
"learning_rate": 6.770354504470575e-07,
"logits": -0.6161063313484192,
"logps": -77.0300064086914,
"loss": 30.9577,
"objective": 27.045875549316406,
"ranking_idealized": 0.5458333492279053,
"ranking_idealized_expo": 0.5458333492279053,
"ranking_simple": 0.5541666746139526,
"regularize": 27.045875549316406,
"step": 690
},
{
"dpo_loss": 14.924878120422363,
"epoch": 3.9395370807746812,
"grad_norm": 5461.186181284553,
"learning_rate": 6.434656267456843e-07,
"logits": -0.5763762593269348,
"logps": -78.13215637207031,
"loss": 30.6986,
"objective": 32.01544189453125,
"ranking_idealized": 0.46666666865348816,
"ranking_idealized_expo": 0.46666666865348816,
"ranking_simple": 0.4749999940395355,
"regularize": 32.01544189453125,
"step": 695
},
{
"dpo_loss": 18.97331428527832,
"epoch": 3.9678790741615493,
"grad_norm": 5818.6857506377755,
"learning_rate": 6.106260641143547e-07,
"logits": -0.5926896333694458,
"logps": -77.89541625976562,
"loss": 32.7043,
"objective": 33.83491516113281,
"ranking_idealized": 0.4833333194255829,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.4791666567325592,
"regularize": 33.83491516113281,
"step": 700
},
{
"epoch": 3.9678790741615493,
"eval_dpo_loss": 114.2601547241211,
"eval_logits": -0.6585275530815125,
"eval_logps": -81.83432006835938,
"eval_loss": 223.54994201660156,
"eval_objective": 224.45416259765625,
"eval_ranking_idealized": 0.5092975497245789,
"eval_ranking_idealized_expo": 0.5092975497245789,
"eval_ranking_simple": 0.5061983466148376,
"eval_regularize": 224.45416259765625,
"eval_runtime": 260.3584,
"eval_samples_per_second": 22.239,
"eval_steps_per_second": 0.929,
"step": 700
},
{
"dpo_loss": 13.038025856018066,
"epoch": 3.9962210675484178,
"grad_norm": 5581.426984911725,
"learning_rate": 5.785296798760601e-07,
"logits": -0.5679181218147278,
"logps": -76.54711151123047,
"loss": 29.9148,
"objective": 30.134431838989258,
"ranking_idealized": 0.5249999761581421,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.5333333611488342,
"regularize": 30.134431838989258,
"step": 705
},
{
"dpo_loss": 12.296858787536621,
"epoch": 4.024563060935286,
"grad_norm": 5718.960380456749,
"learning_rate": 5.471890990272666e-07,
"logits": -0.6013753414154053,
"logps": -78.08429718017578,
"loss": 26.5384,
"objective": 26.26165771484375,
"ranking_idealized": 0.4791666567325592,
"ranking_idealized_expo": 0.4791666567325592,
"ranking_simple": 0.4791666567325592,
"regularize": 26.26165771484375,
"step": 710
},
{
"dpo_loss": 10.493599891662598,
"epoch": 4.052905054322154,
"grad_norm": 5570.51042110099,
"learning_rate": 5.166166492719124e-07,
"logits": -0.585443377494812,
"logps": -77.34422302246094,
"loss": 25.5029,
"objective": 24.800411224365234,
"ranking_idealized": 0.5333333611488342,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.5375000238418579,
"regularize": 24.800411224365234,
"step": 715
},
{
"dpo_loss": 11.174727439880371,
"epoch": 4.081247047709022,
"grad_norm": 5676.732061923856,
"learning_rate": 4.868243561723535e-07,
"logits": -0.5442956686019897,
"logps": -79.65494537353516,
"loss": 25.0424,
"objective": 26.295820236206055,
"ranking_idealized": 0.5208333134651184,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.5249999761581421,
"regularize": 26.295820236206055,
"step": 720
},
{
"dpo_loss": 10.66411304473877,
"epoch": 4.109589041095891,
"grad_norm": 6033.326375340949,
"learning_rate": 4.57823938419153e-07,
"logits": -0.5671114325523376,
"logps": -77.33932495117188,
"loss": 24.0471,
"objective": 21.140954971313477,
"ranking_idealized": 0.5291666388511658,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.5291666388511658,
"regularize": 21.140954971313477,
"step": 725
},
{
"dpo_loss": 14.05311107635498,
"epoch": 4.137931034482759,
"grad_norm": 5855.037678063073,
"learning_rate": 4.2962680322157335e-07,
"logits": -0.6375981569290161,
"logps": -77.6299057006836,
"loss": 24.9993,
"objective": 25.476118087768555,
"ranking_idealized": 0.5375000238418579,
"ranking_idealized_expo": 0.5375000238418579,
"ranking_simple": 0.5416666865348816,
"regularize": 25.476118087768555,
"step": 730
},
{
"dpo_loss": 9.336868286132812,
"epoch": 4.166273027869627,
"grad_norm": 5559.618972518497,
"learning_rate": 4.0224404182059443e-07,
"logits": -0.5607864856719971,
"logps": -78.882568359375,
"loss": 22.8143,
"objective": 21.191997528076172,
"ranking_idealized": 0.5249999761581421,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.5166666507720947,
"regularize": 21.191997528076172,
"step": 735
},
{
"dpo_loss": 10.657824516296387,
"epoch": 4.194615021256495,
"grad_norm": 5999.26275088513,
"learning_rate": 3.756864251262143e-07,
"logits": -0.6011705994606018,
"logps": -78.2365951538086,
"loss": 23.5575,
"objective": 21.66937255859375,
"ranking_idealized": 0.5208333134651184,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.5249999761581421,
"regularize": 21.66937255859375,
"step": 740
},
{
"dpo_loss": 13.124934196472168,
"epoch": 4.222957014643363,
"grad_norm": 5474.613445401946,
"learning_rate": 3.499643994807486e-07,
"logits": -0.6319831013679504,
"logps": -75.88219451904297,
"loss": 22.84,
"objective": 24.67308807373047,
"ranking_idealized": 0.48750001192092896,
"ranking_idealized_expo": 0.48750001192092896,
"ranking_simple": 0.49166667461395264,
"regularize": 24.67308807373047,
"step": 745
},
{
"dpo_loss": 8.683289527893066,
"epoch": 4.251299008030231,
"grad_norm": 5367.021912836146,
"learning_rate": 3.250880825498026e-07,
"logits": -0.6556726098060608,
"logps": -77.68559265136719,
"loss": 22.8627,
"objective": 24.22634506225586,
"ranking_idealized": 0.5249999761581421,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.5291666388511658,
"regularize": 24.22634506225586,
"step": 750
},
{
"epoch": 4.251299008030231,
"eval_dpo_loss": 114.44987487792969,
"eval_logits": -0.656375527381897,
"eval_logps": -81.75474548339844,
"eval_loss": 223.7742462158203,
"eval_objective": 224.67481994628906,
"eval_ranking_idealized": 0.5092975497245789,
"eval_ranking_idealized_expo": 0.5092975497245789,
"eval_ranking_simple": 0.5072314143180847,
"eval_regularize": 224.67481994628906,
"eval_runtime": 260.8308,
"eval_samples_per_second": 22.198,
"eval_steps_per_second": 0.928,
"step": 750
},
{
"dpo_loss": 10.33426284790039,
"epoch": 4.2796410014171,
"grad_norm": 5558.083809420797,
"learning_rate": 3.0106725934252095e-07,
"logits": -0.620187520980835,
"logps": -77.5008544921875,
"loss": 23.8011,
"objective": 21.08031463623047,
"ranking_idealized": 0.5333333611488342,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.5333333611488342,
"regularize": 21.08031463623047,
"step": 755
},
{
"dpo_loss": 10.207574844360352,
"epoch": 4.307982994803968,
"grad_norm": 5471.852981397453,
"learning_rate": 2.779113783626916e-07,
"logits": -0.6027387976646423,
"logps": -77.6553726196289,
"loss": 20.5752,
"objective": 20.16022300720215,
"ranking_idealized": 0.5625,
"ranking_idealized_expo": 0.5625,
"ranking_simple": 0.5583333373069763,
"regularize": 20.16022300720215,
"step": 760
},
{
"dpo_loss": 11.417025566101074,
"epoch": 4.336324988190836,
"grad_norm": 5508.4710027728315,
"learning_rate": 2.5562954789221164e-07,
"logits": -0.6078615784645081,
"logps": -78.18927764892578,
"loss": 23.2097,
"objective": 23.003475189208984,
"ranking_idealized": 0.5249999761581421,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.5208333134651184,
"regularize": 23.003475189208984,
"step": 765
},
{
"dpo_loss": 12.027268409729004,
"epoch": 4.364666981577704,
"grad_norm": 5910.519266120715,
"learning_rate": 2.3423053240837518e-07,
"logits": -0.5967121720314026,
"logps": -76.82184600830078,
"loss": 22.881,
"objective": 21.95987892150879,
"ranking_idealized": 0.5041666626930237,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.5083333253860474,
"regularize": 21.95987892150879,
"step": 770
},
{
"dpo_loss": 8.871806144714355,
"epoch": 4.393008974964572,
"grad_norm": 5570.02355891466,
"learning_rate": 2.137227491364016e-07,
"logits": -0.5998777747154236,
"logps": -77.76221466064453,
"loss": 21.4575,
"objective": 21.222782135009766,
"ranking_idealized": 0.5083333253860474,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.512499988079071,
"regularize": 21.222782135009766,
"step": 775
},
{
"dpo_loss": 9.251056671142578,
"epoch": 4.42135096835144,
"grad_norm": 5696.323750788966,
"learning_rate": 1.941142647385469e-07,
"logits": -0.6099433898925781,
"logps": -76.77149963378906,
"loss": 21.1917,
"objective": 19.60472869873047,
"ranking_idealized": 0.5,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.5083333253860474,
"regularize": 19.60472869873047,
"step": 780
},
{
"dpo_loss": 13.425146102905273,
"epoch": 4.449692961738309,
"grad_norm": 5399.943749207063,
"learning_rate": 1.7541279214111277e-07,
"logits": -0.6504854559898376,
"logps": -76.7619400024414,
"loss": 20.4983,
"objective": 22.567527770996094,
"ranking_idealized": 0.5291666388511658,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.5249999761581421,
"regularize": 22.567527770996094,
"step": 785
},
{
"dpo_loss": 8.888577461242676,
"epoch": 4.478034955125177,
"grad_norm": 5693.14721540096,
"learning_rate": 1.5762568750059604e-07,
"logits": -0.631601095199585,
"logps": -79.10377502441406,
"loss": 20.8686,
"objective": 19.034799575805664,
"ranking_idealized": 0.5541666746139526,
"ranking_idealized_expo": 0.5541666746139526,
"ranking_simple": 0.5583333373069763,
"regularize": 19.034799575805664,
"step": 790
},
{
"dpo_loss": 9.370686531066895,
"epoch": 4.506376948512045,
"grad_norm": 5419.310940845333,
"learning_rate": 1.4075994731016895e-07,
"logits": -0.5613911747932434,
"logps": -78.7647705078125,
"loss": 19.2852,
"objective": 19.674354553222656,
"ranking_idealized": 0.550000011920929,
"ranking_idealized_expo": 0.550000011920929,
"ranking_simple": 0.550000011920929,
"regularize": 19.674354553222656,
"step": 795
},
{
"dpo_loss": 9.124998092651367,
"epoch": 4.534718941898913,
"grad_norm": 5936.415800302871,
"learning_rate": 1.2482220564763669e-07,
"logits": -0.5293439626693726,
"logps": -77.9144287109375,
"loss": 19.3618,
"objective": 20.374727249145508,
"ranking_idealized": 0.5083333253860474,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.5166666507720947,
"regularize": 20.374727249145508,
"step": 800
},
{
"epoch": 4.534718941898913,
"eval_dpo_loss": 114.34851837158203,
"eval_logits": -0.6540291905403137,
"eval_logps": -81.88977813720703,
"eval_loss": 223.28860473632812,
"eval_objective": 224.43710327148438,
"eval_ranking_idealized": 0.5092975497245789,
"eval_ranking_idealized_expo": 0.5092975497245789,
"eval_ranking_simple": 0.5082644820213318,
"eval_regularize": 224.43710327148438,
"eval_runtime": 261.2589,
"eval_samples_per_second": 22.162,
"eval_steps_per_second": 0.926,
"step": 800
},
{
"dpo_loss": 8.316792488098145,
"epoch": 4.563060935285781,
"grad_norm": 6027.940037587719,
"learning_rate": 1.0981873156594381e-07,
"logits": -0.5915284752845764,
"logps": -77.64116668701172,
"loss": 18.7882,
"objective": 17.984209060668945,
"ranking_idealized": 0.4583333432674408,
"ranking_idealized_expo": 0.4583333432674408,
"ranking_simple": 0.4541666805744171,
"regularize": 17.984209060668945,
"step": 805
},
{
"dpo_loss": 10.590494155883789,
"epoch": 4.59140292867265,
"grad_norm": 5522.739351644112,
"learning_rate": 9.575542662726756e-08,
"logits": -0.615004301071167,
"logps": -76.7275161743164,
"loss": 18.881,
"objective": 19.299503326416016,
"ranking_idealized": 0.4583333432674408,
"ranking_idealized_expo": 0.4583333432674408,
"ranking_simple": 0.4625000059604645,
"regularize": 19.299503326416016,
"step": 810
},
{
"dpo_loss": 10.131912231445312,
"epoch": 4.619744922059518,
"grad_norm": 5480.52996485649,
"learning_rate": 8.26378225816582e-08,
"logits": -0.5336829423904419,
"logps": -78.38713836669922,
"loss": 20.3217,
"objective": 20.68436050415039,
"ranking_idealized": 0.5874999761581421,
"ranking_idealized_expo": 0.5874999761581421,
"ranking_simple": 0.5874999761581421,
"regularize": 20.68436050415039,
"step": 815
},
{
"dpo_loss": 9.877272605895996,
"epoch": 4.648086915446386,
"grad_norm": 5772.91774545367,
"learning_rate": 7.047107919114588e-08,
"logits": -0.5947645306587219,
"logps": -77.6579360961914,
"loss": 18.9691,
"objective": 20.117984771728516,
"ranking_idealized": 0.5625,
"ranking_idealized_expo": 0.5625,
"ranking_simple": 0.5541666746139526,
"regularize": 20.117984771728516,
"step": 820
},
{
"dpo_loss": 9.382747650146484,
"epoch": 4.6764289088332545,
"grad_norm": 5315.332110164652,
"learning_rate": 5.92599822001666e-08,
"logits": -0.5714208483695984,
"logps": -76.10657501220703,
"loss": 18.8755,
"objective": 17.507164001464844,
"ranking_idealized": 0.5,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.4958333373069763,
"regularize": 17.507164001464844,
"step": 825
},
{
"dpo_loss": 12.066271781921387,
"epoch": 4.7047709022201225,
"grad_norm": 5546.435431499101,
"learning_rate": 4.9008941453107527e-08,
"logits": -0.6190983653068542,
"logps": -78.2457046508789,
"loss": 19.5002,
"objective": 20.513113021850586,
"ranking_idealized": 0.512499988079071,
"ranking_idealized_expo": 0.512499988079071,
"ranking_simple": 0.5291666388511658,
"regularize": 20.513113021850586,
"step": 830
},
{
"dpo_loss": 9.90359115600586,
"epoch": 4.733112895606991,
"grad_norm": 5332.26085581772,
"learning_rate": 3.972198915970976e-08,
"logits": -0.5777944922447205,
"logps": -77.54198455810547,
"loss": 20.0274,
"objective": 21.012075424194336,
"ranking_idealized": 0.48750001192092896,
"ranking_idealized_expo": 0.48750001192092896,
"ranking_simple": 0.4958333373069763,
"regularize": 21.012075424194336,
"step": 835
},
{
"dpo_loss": 9.880459785461426,
"epoch": 4.7614548889938595,
"grad_norm": 5541.828579858096,
"learning_rate": 3.1402778309014284e-08,
"logits": -0.5807673931121826,
"logps": -77.97594451904297,
"loss": 18.1421,
"objective": 20.538494110107422,
"ranking_idealized": 0.5541666746139526,
"ranking_idealized_expo": 0.5541666746139526,
"ranking_simple": 0.5541666746139526,
"regularize": 20.538494110107422,
"step": 840
},
{
"dpo_loss": 8.26816177368164,
"epoch": 4.7897968823807275,
"grad_norm": 5943.885633928928,
"learning_rate": 2.4054581232470785e-08,
"logits": -0.5882014036178589,
"logps": -78.15058135986328,
"loss": 18.3375,
"objective": 17.440135955810547,
"ranking_idealized": 0.4541666805744171,
"ranking_idealized_expo": 0.4541666805744171,
"ranking_simple": 0.4583333432674408,
"regularize": 17.440135955810547,
"step": 845
},
{
"dpo_loss": 10.461382865905762,
"epoch": 4.818138875767596,
"grad_norm": 5496.982876639107,
"learning_rate": 1.768028831677926e-08,
"logits": -0.6040247678756714,
"logps": -76.82685852050781,
"loss": 18.3796,
"objective": 16.339303970336914,
"ranking_idealized": 0.5291666388511658,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.5249999761581421,
"regularize": 16.339303970336914,
"step": 850
},
{
"epoch": 4.818138875767596,
"eval_dpo_loss": 114.2867202758789,
"eval_logits": -0.6522302627563477,
"eval_logps": -81.8523941040039,
"eval_loss": 223.19020080566406,
"eval_objective": 224.42820739746094,
"eval_ranking_idealized": 0.5092975497245789,
"eval_ranking_idealized_expo": 0.5092975497245789,
"eval_ranking_simple": 0.5082644820213318,
"eval_regularize": 224.42820739746094,
"eval_runtime": 260.7184,
"eval_samples_per_second": 22.208,
"eval_steps_per_second": 0.928,
"step": 850
},
{
"dpo_loss": 10.172054290771484,
"epoch": 4.846480869154464,
"grad_norm": 5801.469775352937,
"learning_rate": 1.2282406866966078e-08,
"logits": -0.565997302532196,
"logps": -76.97755432128906,
"loss": 18.5829,
"objective": 20.51591682434082,
"ranking_idealized": 0.5375000238418579,
"ranking_idealized_expo": 0.5375000238418579,
"ranking_simple": 0.5458333492279053,
"regularize": 20.51591682434082,
"step": 855
},
{
"dpo_loss": 8.222423553466797,
"epoch": 4.874822862541333,
"grad_norm": 5724.160408470258,
"learning_rate": 7.863060120144316e-09,
"logits": -0.5622321963310242,
"logps": -77.1912612915039,
"loss": 19.2154,
"objective": 18.061235427856445,
"ranking_idealized": 0.550000011920929,
"ranking_idealized_expo": 0.550000011920929,
"ranking_simple": 0.5458333492279053,
"regularize": 18.061235427856445,
"step": 860
},
{
"dpo_loss": 9.016589164733887,
"epoch": 4.903164855928201,
"grad_norm": 5747.678123234768,
"learning_rate": 4.423986410346526e-09,
"logits": -0.5972442030906677,
"logps": -76.20024108886719,
"loss": 17.5709,
"objective": 17.75177574157715,
"ranking_idealized": 0.4958333373069763,
"ranking_idealized_expo": 0.4958333373069763,
"ranking_simple": 0.4833333194255829,
"regularize": 17.75177574157715,
"step": 865
},
{
"dpo_loss": 8.523500442504883,
"epoch": 4.931506849315069,
"grad_norm": 5849.97189887377,
"learning_rate": 1.9665384847583622e-09,
"logits": -0.6018223166465759,
"logps": -77.82225036621094,
"loss": 17.7554,
"objective": 17.821632385253906,
"ranking_idealized": 0.5083333253860474,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.512499988079071,
"regularize": 17.821632385253906,
"step": 870
},
{
"dpo_loss": 9.60958480834961,
"epoch": 4.959848842701937,
"grad_norm": 5608.648575998045,
"learning_rate": 4.916829716183901e-10,
"logits": -0.5770124197006226,
"logps": -77.4490966796875,
"loss": 18.6192,
"objective": 19.857257843017578,
"ranking_idealized": 0.5458333492279053,
"ranking_idealized_expo": 0.5458333492279053,
"ranking_simple": 0.5541666746139526,
"regularize": 19.857257843017578,
"step": 875
},
{
"dpo_loss": 10.620950698852539,
"epoch": 4.988190836088805,
"grad_norm": 5634.2530385257105,
"learning_rate": 0.0,
"logits": -0.6300147175788879,
"logps": -78.3343734741211,
"loss": 18.2825,
"objective": 19.312227249145508,
"ranking_idealized": 0.5291666388511658,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.5291666388511658,
"regularize": 19.312227249145508,
"step": 880
},
{
"epoch": 4.988190836088805,
"step": 880,
"total_flos": 0.0,
"train_loss": 74.13437041504817,
"train_runtime": 35293.9194,
"train_samples_per_second": 7.197,
"train_steps_per_second": 0.025
}
],
"logging_steps": 5,
"max_steps": 880,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": false,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}