qwen2.5-0.5b-expo-L2EXPO-ES-100 / trainer_state.json
hZzy's picture
Model save
d13ca91 verified
{
"best_metric": 14.046432495117188,
"best_model_checkpoint": "./qwen2.5-0.5b/qwen2.5-0.5b-expo-L2EXPO-ES-100/checkpoint-550",
"epoch": 2.9806329711856403,
"eval_steps": 50,
"global_step": 1050,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"dpo_loss": 0.6931471824645996,
"epoch": 0.002834199338686821,
"grad_norm": 36884.87916049903,
"learning_rate": 2.840909090909091e-08,
"logits": -1.359458565711975,
"logps": -84.69721221923828,
"loss": 0.3913,
"objective": 0.3618059456348419,
"ranking_idealized": 0.5833333134651184,
"ranking_idealized_expo": 0.5833333134651184,
"ranking_simple": 0.5833333134651184,
"regularize": 0.3618059456348419,
"step": 1,
"wo_beta": 14.830931663513184
},
{
"dpo_loss": 20.195844650268555,
"epoch": 0.14170996693434104,
"grad_norm": 35665.35173471636,
"learning_rate": 1.4204545454545458e-06,
"logits": -1.4575351476669312,
"logps": -84.27513122558594,
"loss": 43.2587,
"objective": 41.916500091552734,
"ranking_idealized": 0.5221088528633118,
"ranking_idealized_expo": 0.5216836929321289,
"ranking_simple": 0.5216836929321289,
"regularize": 41.916500091552734,
"step": 50,
"wo_beta": 15.635692596435547
},
{
"epoch": 0.14170996693434104,
"eval_dpo_loss": 26.447525024414062,
"eval_logits": -1.4447709321975708,
"eval_logps": -90.52921295166016,
"eval_loss": 52.66217041015625,
"eval_objective": 53.697696685791016,
"eval_ranking_idealized": 0.5212215185165405,
"eval_ranking_idealized_expo": 0.5212215185165405,
"eval_ranking_simple": 0.5263975262641907,
"eval_regularize": 53.697696685791016,
"eval_runtime": 309.407,
"eval_samples_per_second": 18.713,
"eval_steps_per_second": 1.561,
"eval_wo_beta": 16.170011520385742,
"step": 50
},
{
"dpo_loss": 89.97029876708984,
"epoch": 0.2834199338686821,
"grad_norm": 28235.60144716246,
"learning_rate": 2.8409090909090916e-06,
"logits": -1.398974061012268,
"logps": -82.89569091796875,
"loss": 169.8852,
"objective": 172.36553955078125,
"ranking_idealized": 0.5137500166893005,
"ranking_idealized_expo": 0.5137500166893005,
"ranking_simple": 0.51583331823349,
"regularize": 172.36553955078125,
"step": 100,
"wo_beta": 15.30754566192627
},
{
"epoch": 0.2834199338686821,
"eval_dpo_loss": 85.763916015625,
"eval_logits": -1.3620884418487549,
"eval_logps": -85.2786636352539,
"eval_loss": 173.986083984375,
"eval_objective": 172.1890869140625,
"eval_ranking_idealized": 0.5212215185165405,
"eval_ranking_idealized_expo": 0.5212215185165405,
"eval_ranking_simple": 0.5243270993232727,
"eval_regularize": 172.1890869140625,
"eval_runtime": 308.6643,
"eval_samples_per_second": 18.758,
"eval_steps_per_second": 1.565,
"eval_wo_beta": 15.439105033874512,
"step": 100
},
{
"dpo_loss": 149.43614196777344,
"epoch": 0.42512990080302315,
"grad_norm": 21173.096618846714,
"learning_rate": 4.2613636363636365e-06,
"logits": -1.213483214378357,
"logps": -78.55652618408203,
"loss": 285.0432,
"objective": 280.7228088378906,
"ranking_idealized": 0.527916669845581,
"ranking_idealized_expo": 0.527916669845581,
"ranking_simple": 0.5229166746139526,
"regularize": 280.7228088378906,
"step": 150,
"wo_beta": 15.080223083496094
},
{
"epoch": 0.42512990080302315,
"eval_dpo_loss": 143.02996826171875,
"eval_logits": -1.1693531274795532,
"eval_logps": -83.2181167602539,
"eval_loss": 291.4833679199219,
"eval_objective": 293.4403991699219,
"eval_ranking_idealized": 0.5212215185165405,
"eval_ranking_idealized_expo": 0.5212215185165405,
"eval_ranking_simple": 0.5279502868652344,
"eval_regularize": 293.4403991699219,
"eval_runtime": 311.5234,
"eval_samples_per_second": 18.586,
"eval_steps_per_second": 1.55,
"eval_wo_beta": 15.222454071044922,
"step": 150
},
{
"dpo_loss": 181.66571044921875,
"epoch": 0.5668398677373642,
"grad_norm": 20619.52737873687,
"learning_rate": 4.997168347957521e-06,
"logits": -0.9392554759979248,
"logps": -78.61503601074219,
"loss": 355.4066,
"objective": 356.2313232421875,
"ranking_idealized": 0.51541668176651,
"ranking_idealized_expo": 0.51541668176651,
"ranking_simple": 0.5191666483879089,
"regularize": 356.2313232421875,
"step": 200,
"wo_beta": 15.349950790405273
},
{
"epoch": 0.5668398677373642,
"eval_dpo_loss": 189.846923828125,
"eval_logits": -0.9273601174354553,
"eval_logps": -84.03199768066406,
"eval_loss": 372.7905578613281,
"eval_objective": 365.21240234375,
"eval_ranking_idealized": 0.5212215185165405,
"eval_ranking_idealized_expo": 0.5212215185165405,
"eval_ranking_simple": 0.5232919454574585,
"eval_regularize": 365.21240234375,
"eval_runtime": 308.8911,
"eval_samples_per_second": 18.744,
"eval_steps_per_second": 1.564,
"eval_wo_beta": 14.868427276611328,
"step": 200
},
{
"dpo_loss": 186.671142578125,
"epoch": 0.7085498346717053,
"grad_norm": 17849.874243754268,
"learning_rate": 4.973122855144066e-06,
"logits": -0.822012186050415,
"logps": -76.92431640625,
"loss": 368.9811,
"objective": 366.2552490234375,
"ranking_idealized": 0.5162500143051147,
"ranking_idealized_expo": 0.5162500143051147,
"ranking_simple": 0.5074999928474426,
"regularize": 366.2552490234375,
"step": 250,
"wo_beta": 15.5939359664917
},
{
"epoch": 0.7085498346717053,
"eval_dpo_loss": 216.4584197998047,
"eval_logits": -0.7745867967605591,
"eval_logps": -81.50496673583984,
"eval_loss": 446.6966247558594,
"eval_objective": 442.3320617675781,
"eval_ranking_idealized": 0.5212215185165405,
"eval_ranking_idealized_expo": 0.5212215185165405,
"eval_ranking_simple": 0.5258799195289612,
"eval_regularize": 442.3320617675781,
"eval_runtime": 308.996,
"eval_samples_per_second": 18.738,
"eval_steps_per_second": 1.563,
"eval_wo_beta": 14.479047775268555,
"step": 250
},
{
"dpo_loss": 182.8459930419922,
"epoch": 0.8502598016060463,
"grad_norm": 16846.286343055544,
"learning_rate": 4.924776641419513e-06,
"logits": -0.6342157125473022,
"logps": -78.78164672851562,
"loss": 360.5868,
"objective": 363.29473876953125,
"ranking_idealized": 0.4950000047683716,
"ranking_idealized_expo": 0.4950000047683716,
"ranking_simple": 0.4970833361148834,
"regularize": 363.29473876953125,
"step": 300,
"wo_beta": 15.358329772949219
},
{
"epoch": 0.8502598016060463,
"eval_dpo_loss": 222.88400268554688,
"eval_logits": -0.5983948707580566,
"eval_logps": -82.20111083984375,
"eval_loss": 448.9505920410156,
"eval_objective": 443.9051208496094,
"eval_ranking_idealized": 0.5212215185165405,
"eval_ranking_idealized_expo": 0.5212215185165405,
"eval_ranking_simple": 0.5248447060585022,
"eval_regularize": 443.9051208496094,
"eval_runtime": 309.9292,
"eval_samples_per_second": 18.682,
"eval_steps_per_second": 1.558,
"eval_wo_beta": 14.392961502075195,
"step": 300
},
{
"dpo_loss": 171.55615234375,
"epoch": 0.9919697685403873,
"grad_norm": 16864.304890654712,
"learning_rate": 4.8526047530778175e-06,
"logits": -0.6237902641296387,
"logps": -79.1826400756836,
"loss": 338.3987,
"objective": 335.6865234375,
"ranking_idealized": 0.5254166722297668,
"ranking_idealized_expo": 0.5254166722297668,
"ranking_simple": 0.5216666460037231,
"regularize": 335.6865234375,
"step": 350,
"wo_beta": 15.20045280456543
},
{
"epoch": 0.9919697685403873,
"eval_dpo_loss": 232.93649291992188,
"eval_logits": -0.78554368019104,
"eval_logps": -84.16381072998047,
"eval_loss": 462.19232177734375,
"eval_objective": 461.2073059082031,
"eval_ranking_idealized": 0.5212215185165405,
"eval_ranking_idealized_expo": 0.5212215185165405,
"eval_ranking_simple": 0.5269151329994202,
"eval_regularize": 461.2073059082031,
"eval_runtime": 308.3435,
"eval_samples_per_second": 18.778,
"eval_steps_per_second": 1.566,
"eval_wo_beta": 14.297853469848633,
"step": 350
},
{
"dpo_loss": 158.00930786132812,
"epoch": 1.1336797354747283,
"grad_norm": 16419.225936790586,
"learning_rate": 4.757316345716554e-06,
"logits": -0.689696192741394,
"logps": -80.24182891845703,
"loss": 309.1712,
"objective": 306.0483703613281,
"ranking_idealized": 0.5320833325386047,
"ranking_idealized_expo": 0.5320833325386047,
"ranking_simple": 0.5229166746139526,
"regularize": 306.0483703613281,
"step": 400,
"wo_beta": 15.245408058166504
},
{
"epoch": 1.1336797354747283,
"eval_dpo_loss": 248.07177734375,
"eval_logits": -0.6413922905921936,
"eval_logps": -82.49343872070312,
"eval_loss": 480.5964660644531,
"eval_objective": 478.7404479980469,
"eval_ranking_idealized": 0.5212215185165405,
"eval_ranking_idealized_expo": 0.5212215185165405,
"eval_ranking_simple": 0.5253623127937317,
"eval_regularize": 478.7404479980469,
"eval_runtime": 308.667,
"eval_samples_per_second": 18.758,
"eval_steps_per_second": 1.565,
"eval_wo_beta": 14.387246131896973,
"step": 400
},
{
"dpo_loss": 142.9551239013672,
"epoch": 1.2753897024090695,
"grad_norm": 16215.238701636586,
"learning_rate": 4.639847716126855e-06,
"logits": -0.5957368612289429,
"logps": -78.66122436523438,
"loss": 298.1424,
"objective": 298.3353271484375,
"ranking_idealized": 0.5191666483879089,
"ranking_idealized_expo": 0.5191666483879089,
"ranking_simple": 0.5204166769981384,
"regularize": 298.3353271484375,
"step": 450,
"wo_beta": 15.736668586730957
},
{
"epoch": 1.2753897024090695,
"eval_dpo_loss": 247.8721923828125,
"eval_logits": -0.7014132142066956,
"eval_logps": -82.14649200439453,
"eval_loss": 480.3255615234375,
"eval_objective": 482.1766052246094,
"eval_ranking_idealized": 0.5212215185165405,
"eval_ranking_idealized_expo": 0.5212215185165405,
"eval_ranking_simple": 0.523809552192688,
"eval_regularize": 482.1766052246094,
"eval_runtime": 309.6985,
"eval_samples_per_second": 18.696,
"eval_steps_per_second": 1.56,
"eval_wo_beta": 14.369455337524414,
"step": 450
},
{
"dpo_loss": 135.57347106933594,
"epoch": 1.4170996693434104,
"grad_norm": 15944.03549179383,
"learning_rate": 4.501353102310901e-06,
"logits": -0.5253962278366089,
"logps": -78.11959075927734,
"loss": 282.4504,
"objective": 278.3994445800781,
"ranking_idealized": 0.4970833361148834,
"ranking_idealized_expo": 0.4970833361148834,
"ranking_simple": 0.49958333373069763,
"regularize": 278.3994445800781,
"step": 500,
"wo_beta": 15.295467376708984
},
{
"epoch": 1.4170996693434104,
"eval_dpo_loss": 252.20928955078125,
"eval_logits": -0.45775941014289856,
"eval_logps": -83.41010284423828,
"eval_loss": 493.7484436035156,
"eval_objective": 495.763916015625,
"eval_ranking_idealized": 0.5212215185165405,
"eval_ranking_idealized_expo": 0.5212215185165405,
"eval_ranking_simple": 0.5248447060585022,
"eval_regularize": 495.763916015625,
"eval_runtime": 308.7418,
"eval_samples_per_second": 18.754,
"eval_steps_per_second": 1.564,
"eval_wo_beta": 14.174327850341797,
"step": 500
},
{
"dpo_loss": 138.2200164794922,
"epoch": 1.5588096362777515,
"grad_norm": 14777.775900953873,
"learning_rate": 4.34319334202531e-06,
"logits": -0.43370625376701355,
"logps": -79.58238220214844,
"loss": 261.1027,
"objective": 261.7706298828125,
"ranking_idealized": 0.5104166865348816,
"ranking_idealized_expo": 0.5104166865348816,
"ranking_simple": 0.5104166865348816,
"regularize": 261.7706298828125,
"step": 550,
"wo_beta": 15.099705696105957
},
{
"epoch": 1.5588096362777515,
"eval_dpo_loss": 245.87562561035156,
"eval_logits": -0.5435077548027039,
"eval_logps": -82.8267593383789,
"eval_loss": 486.1625671386719,
"eval_objective": 489.7927551269531,
"eval_ranking_idealized": 0.5212215185165405,
"eval_ranking_idealized_expo": 0.5212215185165405,
"eval_ranking_simple": 0.5253623127937317,
"eval_regularize": 489.7927551269531,
"eval_runtime": 313.2811,
"eval_samples_per_second": 18.482,
"eval_steps_per_second": 1.542,
"eval_wo_beta": 14.046432495117188,
"step": 550
},
{
"dpo_loss": 129.54061889648438,
"epoch": 1.7005196032120926,
"grad_norm": 15449.253251441141,
"learning_rate": 4.16692250129073e-06,
"logits": -0.4195112884044647,
"logps": -79.62303924560547,
"loss": 255.9288,
"objective": 256.1207275390625,
"ranking_idealized": 0.5149999856948853,
"ranking_idealized_expo": 0.5149999856948853,
"ranking_simple": 0.5095833539962769,
"regularize": 256.1207275390625,
"step": 600,
"wo_beta": 15.011371612548828
},
{
"epoch": 1.7005196032120926,
"eval_dpo_loss": 251.29342651367188,
"eval_logits": -0.5346657633781433,
"eval_logps": -82.17684173583984,
"eval_loss": 500.38006591796875,
"eval_objective": 502.1727294921875,
"eval_ranking_idealized": 0.5212215185165405,
"eval_ranking_idealized_expo": 0.5212215185165405,
"eval_ranking_simple": 0.5269151329994202,
"eval_regularize": 502.1727294921875,
"eval_runtime": 309.4233,
"eval_samples_per_second": 18.712,
"eval_steps_per_second": 1.561,
"eval_wo_beta": 14.243566513061523,
"step": 600
},
{
"dpo_loss": 130.9433135986328,
"epoch": 1.8422295701464337,
"grad_norm": 15825.93366010816,
"learning_rate": 3.974272604254906e-06,
"logits": -0.4912276566028595,
"logps": -78.56413269042969,
"loss": 248.6787,
"objective": 253.7882843017578,
"ranking_idealized": 0.527916669845581,
"ranking_idealized_expo": 0.527916669845581,
"ranking_simple": 0.5224999785423279,
"regularize": 253.7882843017578,
"step": 650,
"wo_beta": 15.684560775756836
},
{
"epoch": 1.8422295701464337,
"eval_dpo_loss": 254.5959014892578,
"eval_logits": -0.5140498876571655,
"eval_logps": -81.49234771728516,
"eval_loss": 502.3152770996094,
"eval_objective": 504.1581726074219,
"eval_ranking_idealized": 0.5212215185165405,
"eval_ranking_idealized_expo": 0.5212215185165405,
"eval_ranking_simple": 0.5248447060585022,
"eval_regularize": 504.1581726074219,
"eval_runtime": 308.5565,
"eval_samples_per_second": 18.765,
"eval_steps_per_second": 1.565,
"eval_wo_beta": 14.331953048706055,
"step": 650
},
{
"dpo_loss": 112.15733337402344,
"epoch": 1.9839395370807746,
"grad_norm": 16229.838704301123,
"learning_rate": 3.767136614452458e-06,
"logits": -0.4758701026439667,
"logps": -78.98329162597656,
"loss": 226.4676,
"objective": 224.5537567138672,
"ranking_idealized": 0.5112500190734863,
"ranking_idealized_expo": 0.5108333230018616,
"ranking_simple": 0.5087500214576721,
"regularize": 224.5537567138672,
"step": 700,
"wo_beta": 15.5169095993042
},
{
"epoch": 1.9839395370807746,
"eval_dpo_loss": 264.166015625,
"eval_logits": -0.4816124141216278,
"eval_logps": -83.42156219482422,
"eval_loss": 512.698974609375,
"eval_objective": 516.7103271484375,
"eval_ranking_idealized": 0.5212215185165405,
"eval_ranking_idealized_expo": 0.5212215185165405,
"eval_ranking_simple": 0.5253623127937317,
"eval_regularize": 516.7103271484375,
"eval_runtime": 308.9876,
"eval_samples_per_second": 18.739,
"eval_steps_per_second": 1.563,
"eval_wo_beta": 14.08342456817627,
"step": 700
},
{
"dpo_loss": 105.4539566040039,
"epoch": 2.1256495040151155,
"grad_norm": 15281.11762954245,
"learning_rate": 3.547549834686222e-06,
"logits": -0.47171100974082947,
"logps": -79.71944427490234,
"loss": 207.1551,
"objective": 207.78355407714844,
"ranking_idealized": 0.5112500190734863,
"ranking_idealized_expo": 0.5112500190734863,
"ranking_simple": 0.51583331823349,
"regularize": 207.78355407714844,
"step": 750,
"wo_beta": 15.408516883850098
},
{
"epoch": 2.1256495040151155,
"eval_dpo_loss": 259.2528381347656,
"eval_logits": -0.5409926772117615,
"eval_logps": -83.45890045166016,
"eval_loss": 506.4237365722656,
"eval_objective": 510.6129150390625,
"eval_ranking_idealized": 0.5212215185165405,
"eval_ranking_idealized_expo": 0.5212215185165405,
"eval_ranking_simple": 0.523809552192688,
"eval_regularize": 510.6129150390625,
"eval_runtime": 308.6604,
"eval_samples_per_second": 18.758,
"eval_steps_per_second": 1.565,
"eval_wo_beta": 14.129502296447754,
"step": 750
},
{
"dpo_loss": 97.64974975585938,
"epoch": 2.2673594709494567,
"grad_norm": 15212.863475959,
"learning_rate": 3.3176699082935546e-06,
"logits": -0.48046550154685974,
"logps": -81.52581787109375,
"loss": 197.3545,
"objective": 198.5575714111328,
"ranking_idealized": 0.512499988079071,
"ranking_idealized_expo": 0.512499988079071,
"ranking_simple": 0.5083333253860474,
"regularize": 198.5575714111328,
"step": 800,
"wo_beta": 15.000423431396484
},
{
"epoch": 2.2673594709494567,
"eval_dpo_loss": 262.3102111816406,
"eval_logits": -0.5658813118934631,
"eval_logps": -84.87467956542969,
"eval_loss": 513.39794921875,
"eval_objective": 514.31201171875,
"eval_ranking_idealized": 0.5212215185165405,
"eval_ranking_idealized_expo": 0.5212215185165405,
"eval_ranking_simple": 0.522774338722229,
"eval_regularize": 514.31201171875,
"eval_runtime": 310.3348,
"eval_samples_per_second": 18.657,
"eval_steps_per_second": 1.556,
"eval_wo_beta": 14.070388793945312,
"step": 800
},
{
"dpo_loss": 94.56294250488281,
"epoch": 2.413793103448276,
"grad_norm": 14099.488226379159,
"learning_rate": 3.0797556183036582e-06,
"logits": -0.5114213824272156,
"logps": -81.04893493652344,
"loss": 182.3796,
"objective": 182.07008361816406,
"ranking_idealized": 0.5145833492279053,
"ranking_idealized_expo": 0.5137500166893005,
"ranking_simple": 0.5129166841506958,
"regularize": 182.07008361816406,
"step": 850,
"wo_beta": 15.241059303283691
},
{
"epoch": 2.413793103448276,
"eval_dpo_loss": 254.12506103515625,
"eval_logits": -0.5509631037712097,
"eval_logps": -82.86239624023438,
"eval_loss": 501.88311767578125,
"eval_objective": 504.852294921875,
"eval_ranking_idealized": 0.5212215185165405,
"eval_ranking_idealized_expo": 0.5212215185165405,
"eval_ranking_simple": 0.5274327397346497,
"eval_regularize": 504.852294921875,
"eval_runtime": 309.6743,
"eval_samples_per_second": 18.697,
"eval_steps_per_second": 1.56,
"eval_wo_beta": 14.17065143585205,
"step": 850
},
{
"dpo_loss": 92.21830749511719,
"epoch": 2.555503070382617,
"grad_norm": 14214.15214964791,
"learning_rate": 2.8361446928038298e-06,
"logits": -0.49887704849243164,
"logps": -80.69136810302734,
"loss": 176.042,
"objective": 176.71592712402344,
"ranking_idealized": 0.5183333158493042,
"ranking_idealized_expo": 0.5179166793823242,
"ranking_simple": 0.5129166841506958,
"regularize": 176.71592712402344,
"step": 900,
"wo_beta": 15.599862098693848
},
{
"epoch": 2.555503070382617,
"eval_dpo_loss": 263.27996826171875,
"eval_logits": -0.5038771033287048,
"eval_logps": -85.07097625732422,
"eval_loss": 518.1983032226562,
"eval_objective": 519.5007934570312,
"eval_ranking_idealized": 0.5212215185165405,
"eval_ranking_idealized_expo": 0.5212215185165405,
"eval_ranking_simple": 0.523809552192688,
"eval_regularize": 519.5007934570312,
"eval_runtime": 308.5117,
"eval_samples_per_second": 18.768,
"eval_steps_per_second": 1.566,
"eval_wo_beta": 14.112290382385254,
"step": 900
},
{
"dpo_loss": 81.76506805419922,
"epoch": 2.697213037316958,
"grad_norm": 14116.155586290091,
"learning_rate": 2.5892308345974517e-06,
"logits": -0.45734792947769165,
"logps": -81.56855773925781,
"loss": 164.8281,
"objective": 161.75088500976562,
"ranking_idealized": 0.5045833587646484,
"ranking_idealized_expo": 0.5045833587646484,
"ranking_simple": 0.5099999904632568,
"regularize": 161.75088500976562,
"step": 950,
"wo_beta": 15.513051986694336
},
{
"epoch": 2.697213037316958,
"eval_dpo_loss": 262.8074035644531,
"eval_logits": -0.5199795961380005,
"eval_logps": -84.58429718017578,
"eval_loss": 512.1844482421875,
"eval_objective": 512.7650756835938,
"eval_ranking_idealized": 0.5212215185165405,
"eval_ranking_idealized_expo": 0.5212215185165405,
"eval_ranking_simple": 0.523809552192688,
"eval_regularize": 512.7650756835938,
"eval_runtime": 309.6572,
"eval_samples_per_second": 18.698,
"eval_steps_per_second": 1.56,
"eval_wo_beta": 14.164327621459961,
"step": 950
},
{
"dpo_loss": 76.9761962890625,
"epoch": 2.838923004251299,
"grad_norm": 13709.192801586882,
"learning_rate": 2.341440200858589e-06,
"logits": -0.46604040265083313,
"logps": -79.8453598022461,
"loss": 150.0401,
"objective": 150.50038146972656,
"ranking_idealized": 0.5112500190734863,
"ranking_idealized_expo": 0.5112500190734863,
"ranking_simple": 0.5099999904632568,
"regularize": 150.50038146972656,
"step": 1000,
"wo_beta": 15.057799339294434
},
{
"epoch": 2.838923004251299,
"eval_dpo_loss": 263.6168518066406,
"eval_logits": -0.5218656659126282,
"eval_logps": -83.73433685302734,
"eval_loss": 514.70361328125,
"eval_objective": 516.595947265625,
"eval_ranking_idealized": 0.5212215185165405,
"eval_ranking_idealized_expo": 0.5212215185165405,
"eval_ranking_simple": 0.5258799195289612,
"eval_regularize": 516.595947265625,
"eval_runtime": 308.5762,
"eval_samples_per_second": 18.764,
"eval_steps_per_second": 1.565,
"eval_wo_beta": 14.180042266845703,
"step": 1000
},
{
"dpo_loss": 69.53437805175781,
"epoch": 2.9806329711856403,
"grad_norm": 14565.063000853466,
"learning_rate": 2.0952075638923656e-06,
"logits": -0.4686031639575958,
"logps": -80.63922882080078,
"loss": 141.0317,
"objective": 136.92088317871094,
"ranking_idealized": 0.5174999833106995,
"ranking_idealized_expo": 0.5174999833106995,
"ranking_simple": 0.5262500047683716,
"regularize": 136.92088317871094,
"step": 1050,
"wo_beta": 15.512243270874023
},
{
"epoch": 2.9806329711856403,
"eval_dpo_loss": 266.94525146484375,
"eval_logits": -0.49528759717941284,
"eval_logps": -84.26761627197266,
"eval_loss": 519.2467041015625,
"eval_objective": 521.8153076171875,
"eval_ranking_idealized": 0.5212215185165405,
"eval_ranking_idealized_expo": 0.5212215185165405,
"eval_ranking_simple": 0.5263975262641907,
"eval_regularize": 521.8153076171875,
"eval_runtime": 308.2205,
"eval_samples_per_second": 18.785,
"eval_steps_per_second": 1.567,
"eval_wo_beta": 14.257741928100586,
"step": 1050
},
{
"epoch": 2.9806329711856403,
"step": 1050,
"total_flos": 0.0,
"train_loss": 38.77721726190476,
"train_runtime": 6898.2426,
"train_samples_per_second": 36.822,
"train_steps_per_second": 0.255
}
],
"logging_steps": 50,
"max_steps": 1760,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 50,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 5,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}