|
{ |
|
"best_metric": 14.046432495117188, |
|
"best_model_checkpoint": "./qwen2.5-0.5b/qwen2.5-0.5b-expo-L2EXPO-ES-100/checkpoint-550", |
|
"epoch": 2.9806329711856403, |
|
"eval_steps": 50, |
|
"global_step": 1050, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"dpo_loss": 0.6931471824645996, |
|
"epoch": 0.002834199338686821, |
|
"grad_norm": 36884.87916049903, |
|
"learning_rate": 2.840909090909091e-08, |
|
"logits": -1.359458565711975, |
|
"logps": -84.69721221923828, |
|
"loss": 0.3913, |
|
"objective": 0.3618059456348419, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.5833333134651184, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.3618059456348419, |
|
"step": 1, |
|
"wo_beta": 14.830931663513184 |
|
}, |
|
{ |
|
"dpo_loss": 20.195844650268555, |
|
"epoch": 0.14170996693434104, |
|
"grad_norm": 35665.35173471636, |
|
"learning_rate": 1.4204545454545458e-06, |
|
"logits": -1.4575351476669312, |
|
"logps": -84.27513122558594, |
|
"loss": 43.2587, |
|
"objective": 41.916500091552734, |
|
"ranking_idealized": 0.5221088528633118, |
|
"ranking_idealized_expo": 0.5216836929321289, |
|
"ranking_simple": 0.5216836929321289, |
|
"regularize": 41.916500091552734, |
|
"step": 50, |
|
"wo_beta": 15.635692596435547 |
|
}, |
|
{ |
|
"epoch": 0.14170996693434104, |
|
"eval_dpo_loss": 26.447525024414062, |
|
"eval_logits": -1.4447709321975708, |
|
"eval_logps": -90.52921295166016, |
|
"eval_loss": 52.66217041015625, |
|
"eval_objective": 53.697696685791016, |
|
"eval_ranking_idealized": 0.5212215185165405, |
|
"eval_ranking_idealized_expo": 0.5212215185165405, |
|
"eval_ranking_simple": 0.5263975262641907, |
|
"eval_regularize": 53.697696685791016, |
|
"eval_runtime": 309.407, |
|
"eval_samples_per_second": 18.713, |
|
"eval_steps_per_second": 1.561, |
|
"eval_wo_beta": 16.170011520385742, |
|
"step": 50 |
|
}, |
|
{ |
|
"dpo_loss": 89.97029876708984, |
|
"epoch": 0.2834199338686821, |
|
"grad_norm": 28235.60144716246, |
|
"learning_rate": 2.8409090909090916e-06, |
|
"logits": -1.398974061012268, |
|
"logps": -82.89569091796875, |
|
"loss": 169.8852, |
|
"objective": 172.36553955078125, |
|
"ranking_idealized": 0.5137500166893005, |
|
"ranking_idealized_expo": 0.5137500166893005, |
|
"ranking_simple": 0.51583331823349, |
|
"regularize": 172.36553955078125, |
|
"step": 100, |
|
"wo_beta": 15.30754566192627 |
|
}, |
|
{ |
|
"epoch": 0.2834199338686821, |
|
"eval_dpo_loss": 85.763916015625, |
|
"eval_logits": -1.3620884418487549, |
|
"eval_logps": -85.2786636352539, |
|
"eval_loss": 173.986083984375, |
|
"eval_objective": 172.1890869140625, |
|
"eval_ranking_idealized": 0.5212215185165405, |
|
"eval_ranking_idealized_expo": 0.5212215185165405, |
|
"eval_ranking_simple": 0.5243270993232727, |
|
"eval_regularize": 172.1890869140625, |
|
"eval_runtime": 308.6643, |
|
"eval_samples_per_second": 18.758, |
|
"eval_steps_per_second": 1.565, |
|
"eval_wo_beta": 15.439105033874512, |
|
"step": 100 |
|
}, |
|
{ |
|
"dpo_loss": 149.43614196777344, |
|
"epoch": 0.42512990080302315, |
|
"grad_norm": 21173.096618846714, |
|
"learning_rate": 4.2613636363636365e-06, |
|
"logits": -1.213483214378357, |
|
"logps": -78.55652618408203, |
|
"loss": 285.0432, |
|
"objective": 280.7228088378906, |
|
"ranking_idealized": 0.527916669845581, |
|
"ranking_idealized_expo": 0.527916669845581, |
|
"ranking_simple": 0.5229166746139526, |
|
"regularize": 280.7228088378906, |
|
"step": 150, |
|
"wo_beta": 15.080223083496094 |
|
}, |
|
{ |
|
"epoch": 0.42512990080302315, |
|
"eval_dpo_loss": 143.02996826171875, |
|
"eval_logits": -1.1693531274795532, |
|
"eval_logps": -83.2181167602539, |
|
"eval_loss": 291.4833679199219, |
|
"eval_objective": 293.4403991699219, |
|
"eval_ranking_idealized": 0.5212215185165405, |
|
"eval_ranking_idealized_expo": 0.5212215185165405, |
|
"eval_ranking_simple": 0.5279502868652344, |
|
"eval_regularize": 293.4403991699219, |
|
"eval_runtime": 311.5234, |
|
"eval_samples_per_second": 18.586, |
|
"eval_steps_per_second": 1.55, |
|
"eval_wo_beta": 15.222454071044922, |
|
"step": 150 |
|
}, |
|
{ |
|
"dpo_loss": 181.66571044921875, |
|
"epoch": 0.5668398677373642, |
|
"grad_norm": 20619.52737873687, |
|
"learning_rate": 4.997168347957521e-06, |
|
"logits": -0.9392554759979248, |
|
"logps": -78.61503601074219, |
|
"loss": 355.4066, |
|
"objective": 356.2313232421875, |
|
"ranking_idealized": 0.51541668176651, |
|
"ranking_idealized_expo": 0.51541668176651, |
|
"ranking_simple": 0.5191666483879089, |
|
"regularize": 356.2313232421875, |
|
"step": 200, |
|
"wo_beta": 15.349950790405273 |
|
}, |
|
{ |
|
"epoch": 0.5668398677373642, |
|
"eval_dpo_loss": 189.846923828125, |
|
"eval_logits": -0.9273601174354553, |
|
"eval_logps": -84.03199768066406, |
|
"eval_loss": 372.7905578613281, |
|
"eval_objective": 365.21240234375, |
|
"eval_ranking_idealized": 0.5212215185165405, |
|
"eval_ranking_idealized_expo": 0.5212215185165405, |
|
"eval_ranking_simple": 0.5232919454574585, |
|
"eval_regularize": 365.21240234375, |
|
"eval_runtime": 308.8911, |
|
"eval_samples_per_second": 18.744, |
|
"eval_steps_per_second": 1.564, |
|
"eval_wo_beta": 14.868427276611328, |
|
"step": 200 |
|
}, |
|
{ |
|
"dpo_loss": 186.671142578125, |
|
"epoch": 0.7085498346717053, |
|
"grad_norm": 17849.874243754268, |
|
"learning_rate": 4.973122855144066e-06, |
|
"logits": -0.822012186050415, |
|
"logps": -76.92431640625, |
|
"loss": 368.9811, |
|
"objective": 366.2552490234375, |
|
"ranking_idealized": 0.5162500143051147, |
|
"ranking_idealized_expo": 0.5162500143051147, |
|
"ranking_simple": 0.5074999928474426, |
|
"regularize": 366.2552490234375, |
|
"step": 250, |
|
"wo_beta": 15.5939359664917 |
|
}, |
|
{ |
|
"epoch": 0.7085498346717053, |
|
"eval_dpo_loss": 216.4584197998047, |
|
"eval_logits": -0.7745867967605591, |
|
"eval_logps": -81.50496673583984, |
|
"eval_loss": 446.6966247558594, |
|
"eval_objective": 442.3320617675781, |
|
"eval_ranking_idealized": 0.5212215185165405, |
|
"eval_ranking_idealized_expo": 0.5212215185165405, |
|
"eval_ranking_simple": 0.5258799195289612, |
|
"eval_regularize": 442.3320617675781, |
|
"eval_runtime": 308.996, |
|
"eval_samples_per_second": 18.738, |
|
"eval_steps_per_second": 1.563, |
|
"eval_wo_beta": 14.479047775268555, |
|
"step": 250 |
|
}, |
|
{ |
|
"dpo_loss": 182.8459930419922, |
|
"epoch": 0.8502598016060463, |
|
"grad_norm": 16846.286343055544, |
|
"learning_rate": 4.924776641419513e-06, |
|
"logits": -0.6342157125473022, |
|
"logps": -78.78164672851562, |
|
"loss": 360.5868, |
|
"objective": 363.29473876953125, |
|
"ranking_idealized": 0.4950000047683716, |
|
"ranking_idealized_expo": 0.4950000047683716, |
|
"ranking_simple": 0.4970833361148834, |
|
"regularize": 363.29473876953125, |
|
"step": 300, |
|
"wo_beta": 15.358329772949219 |
|
}, |
|
{ |
|
"epoch": 0.8502598016060463, |
|
"eval_dpo_loss": 222.88400268554688, |
|
"eval_logits": -0.5983948707580566, |
|
"eval_logps": -82.20111083984375, |
|
"eval_loss": 448.9505920410156, |
|
"eval_objective": 443.9051208496094, |
|
"eval_ranking_idealized": 0.5212215185165405, |
|
"eval_ranking_idealized_expo": 0.5212215185165405, |
|
"eval_ranking_simple": 0.5248447060585022, |
|
"eval_regularize": 443.9051208496094, |
|
"eval_runtime": 309.9292, |
|
"eval_samples_per_second": 18.682, |
|
"eval_steps_per_second": 1.558, |
|
"eval_wo_beta": 14.392961502075195, |
|
"step": 300 |
|
}, |
|
{ |
|
"dpo_loss": 171.55615234375, |
|
"epoch": 0.9919697685403873, |
|
"grad_norm": 16864.304890654712, |
|
"learning_rate": 4.8526047530778175e-06, |
|
"logits": -0.6237902641296387, |
|
"logps": -79.1826400756836, |
|
"loss": 338.3987, |
|
"objective": 335.6865234375, |
|
"ranking_idealized": 0.5254166722297668, |
|
"ranking_idealized_expo": 0.5254166722297668, |
|
"ranking_simple": 0.5216666460037231, |
|
"regularize": 335.6865234375, |
|
"step": 350, |
|
"wo_beta": 15.20045280456543 |
|
}, |
|
{ |
|
"epoch": 0.9919697685403873, |
|
"eval_dpo_loss": 232.93649291992188, |
|
"eval_logits": -0.78554368019104, |
|
"eval_logps": -84.16381072998047, |
|
"eval_loss": 462.19232177734375, |
|
"eval_objective": 461.2073059082031, |
|
"eval_ranking_idealized": 0.5212215185165405, |
|
"eval_ranking_idealized_expo": 0.5212215185165405, |
|
"eval_ranking_simple": 0.5269151329994202, |
|
"eval_regularize": 461.2073059082031, |
|
"eval_runtime": 308.3435, |
|
"eval_samples_per_second": 18.778, |
|
"eval_steps_per_second": 1.566, |
|
"eval_wo_beta": 14.297853469848633, |
|
"step": 350 |
|
}, |
|
{ |
|
"dpo_loss": 158.00930786132812, |
|
"epoch": 1.1336797354747283, |
|
"grad_norm": 16419.225936790586, |
|
"learning_rate": 4.757316345716554e-06, |
|
"logits": -0.689696192741394, |
|
"logps": -80.24182891845703, |
|
"loss": 309.1712, |
|
"objective": 306.0483703613281, |
|
"ranking_idealized": 0.5320833325386047, |
|
"ranking_idealized_expo": 0.5320833325386047, |
|
"ranking_simple": 0.5229166746139526, |
|
"regularize": 306.0483703613281, |
|
"step": 400, |
|
"wo_beta": 15.245408058166504 |
|
}, |
|
{ |
|
"epoch": 1.1336797354747283, |
|
"eval_dpo_loss": 248.07177734375, |
|
"eval_logits": -0.6413922905921936, |
|
"eval_logps": -82.49343872070312, |
|
"eval_loss": 480.5964660644531, |
|
"eval_objective": 478.7404479980469, |
|
"eval_ranking_idealized": 0.5212215185165405, |
|
"eval_ranking_idealized_expo": 0.5212215185165405, |
|
"eval_ranking_simple": 0.5253623127937317, |
|
"eval_regularize": 478.7404479980469, |
|
"eval_runtime": 308.667, |
|
"eval_samples_per_second": 18.758, |
|
"eval_steps_per_second": 1.565, |
|
"eval_wo_beta": 14.387246131896973, |
|
"step": 400 |
|
}, |
|
{ |
|
"dpo_loss": 142.9551239013672, |
|
"epoch": 1.2753897024090695, |
|
"grad_norm": 16215.238701636586, |
|
"learning_rate": 4.639847716126855e-06, |
|
"logits": -0.5957368612289429, |
|
"logps": -78.66122436523438, |
|
"loss": 298.1424, |
|
"objective": 298.3353271484375, |
|
"ranking_idealized": 0.5191666483879089, |
|
"ranking_idealized_expo": 0.5191666483879089, |
|
"ranking_simple": 0.5204166769981384, |
|
"regularize": 298.3353271484375, |
|
"step": 450, |
|
"wo_beta": 15.736668586730957 |
|
}, |
|
{ |
|
"epoch": 1.2753897024090695, |
|
"eval_dpo_loss": 247.8721923828125, |
|
"eval_logits": -0.7014132142066956, |
|
"eval_logps": -82.14649200439453, |
|
"eval_loss": 480.3255615234375, |
|
"eval_objective": 482.1766052246094, |
|
"eval_ranking_idealized": 0.5212215185165405, |
|
"eval_ranking_idealized_expo": 0.5212215185165405, |
|
"eval_ranking_simple": 0.523809552192688, |
|
"eval_regularize": 482.1766052246094, |
|
"eval_runtime": 309.6985, |
|
"eval_samples_per_second": 18.696, |
|
"eval_steps_per_second": 1.56, |
|
"eval_wo_beta": 14.369455337524414, |
|
"step": 450 |
|
}, |
|
{ |
|
"dpo_loss": 135.57347106933594, |
|
"epoch": 1.4170996693434104, |
|
"grad_norm": 15944.03549179383, |
|
"learning_rate": 4.501353102310901e-06, |
|
"logits": -0.5253962278366089, |
|
"logps": -78.11959075927734, |
|
"loss": 282.4504, |
|
"objective": 278.3994445800781, |
|
"ranking_idealized": 0.4970833361148834, |
|
"ranking_idealized_expo": 0.4970833361148834, |
|
"ranking_simple": 0.49958333373069763, |
|
"regularize": 278.3994445800781, |
|
"step": 500, |
|
"wo_beta": 15.295467376708984 |
|
}, |
|
{ |
|
"epoch": 1.4170996693434104, |
|
"eval_dpo_loss": 252.20928955078125, |
|
"eval_logits": -0.45775941014289856, |
|
"eval_logps": -83.41010284423828, |
|
"eval_loss": 493.7484436035156, |
|
"eval_objective": 495.763916015625, |
|
"eval_ranking_idealized": 0.5212215185165405, |
|
"eval_ranking_idealized_expo": 0.5212215185165405, |
|
"eval_ranking_simple": 0.5248447060585022, |
|
"eval_regularize": 495.763916015625, |
|
"eval_runtime": 308.7418, |
|
"eval_samples_per_second": 18.754, |
|
"eval_steps_per_second": 1.564, |
|
"eval_wo_beta": 14.174327850341797, |
|
"step": 500 |
|
}, |
|
{ |
|
"dpo_loss": 138.2200164794922, |
|
"epoch": 1.5588096362777515, |
|
"grad_norm": 14777.775900953873, |
|
"learning_rate": 4.34319334202531e-06, |
|
"logits": -0.43370625376701355, |
|
"logps": -79.58238220214844, |
|
"loss": 261.1027, |
|
"objective": 261.7706298828125, |
|
"ranking_idealized": 0.5104166865348816, |
|
"ranking_idealized_expo": 0.5104166865348816, |
|
"ranking_simple": 0.5104166865348816, |
|
"regularize": 261.7706298828125, |
|
"step": 550, |
|
"wo_beta": 15.099705696105957 |
|
}, |
|
{ |
|
"epoch": 1.5588096362777515, |
|
"eval_dpo_loss": 245.87562561035156, |
|
"eval_logits": -0.5435077548027039, |
|
"eval_logps": -82.8267593383789, |
|
"eval_loss": 486.1625671386719, |
|
"eval_objective": 489.7927551269531, |
|
"eval_ranking_idealized": 0.5212215185165405, |
|
"eval_ranking_idealized_expo": 0.5212215185165405, |
|
"eval_ranking_simple": 0.5253623127937317, |
|
"eval_regularize": 489.7927551269531, |
|
"eval_runtime": 313.2811, |
|
"eval_samples_per_second": 18.482, |
|
"eval_steps_per_second": 1.542, |
|
"eval_wo_beta": 14.046432495117188, |
|
"step": 550 |
|
}, |
|
{ |
|
"dpo_loss": 129.54061889648438, |
|
"epoch": 1.7005196032120926, |
|
"grad_norm": 15449.253251441141, |
|
"learning_rate": 4.16692250129073e-06, |
|
"logits": -0.4195112884044647, |
|
"logps": -79.62303924560547, |
|
"loss": 255.9288, |
|
"objective": 256.1207275390625, |
|
"ranking_idealized": 0.5149999856948853, |
|
"ranking_idealized_expo": 0.5149999856948853, |
|
"ranking_simple": 0.5095833539962769, |
|
"regularize": 256.1207275390625, |
|
"step": 600, |
|
"wo_beta": 15.011371612548828 |
|
}, |
|
{ |
|
"epoch": 1.7005196032120926, |
|
"eval_dpo_loss": 251.29342651367188, |
|
"eval_logits": -0.5346657633781433, |
|
"eval_logps": -82.17684173583984, |
|
"eval_loss": 500.38006591796875, |
|
"eval_objective": 502.1727294921875, |
|
"eval_ranking_idealized": 0.5212215185165405, |
|
"eval_ranking_idealized_expo": 0.5212215185165405, |
|
"eval_ranking_simple": 0.5269151329994202, |
|
"eval_regularize": 502.1727294921875, |
|
"eval_runtime": 309.4233, |
|
"eval_samples_per_second": 18.712, |
|
"eval_steps_per_second": 1.561, |
|
"eval_wo_beta": 14.243566513061523, |
|
"step": 600 |
|
}, |
|
{ |
|
"dpo_loss": 130.9433135986328, |
|
"epoch": 1.8422295701464337, |
|
"grad_norm": 15825.93366010816, |
|
"learning_rate": 3.974272604254906e-06, |
|
"logits": -0.4912276566028595, |
|
"logps": -78.56413269042969, |
|
"loss": 248.6787, |
|
"objective": 253.7882843017578, |
|
"ranking_idealized": 0.527916669845581, |
|
"ranking_idealized_expo": 0.527916669845581, |
|
"ranking_simple": 0.5224999785423279, |
|
"regularize": 253.7882843017578, |
|
"step": 650, |
|
"wo_beta": 15.684560775756836 |
|
}, |
|
{ |
|
"epoch": 1.8422295701464337, |
|
"eval_dpo_loss": 254.5959014892578, |
|
"eval_logits": -0.5140498876571655, |
|
"eval_logps": -81.49234771728516, |
|
"eval_loss": 502.3152770996094, |
|
"eval_objective": 504.1581726074219, |
|
"eval_ranking_idealized": 0.5212215185165405, |
|
"eval_ranking_idealized_expo": 0.5212215185165405, |
|
"eval_ranking_simple": 0.5248447060585022, |
|
"eval_regularize": 504.1581726074219, |
|
"eval_runtime": 308.5565, |
|
"eval_samples_per_second": 18.765, |
|
"eval_steps_per_second": 1.565, |
|
"eval_wo_beta": 14.331953048706055, |
|
"step": 650 |
|
}, |
|
{ |
|
"dpo_loss": 112.15733337402344, |
|
"epoch": 1.9839395370807746, |
|
"grad_norm": 16229.838704301123, |
|
"learning_rate": 3.767136614452458e-06, |
|
"logits": -0.4758701026439667, |
|
"logps": -78.98329162597656, |
|
"loss": 226.4676, |
|
"objective": 224.5537567138672, |
|
"ranking_idealized": 0.5112500190734863, |
|
"ranking_idealized_expo": 0.5108333230018616, |
|
"ranking_simple": 0.5087500214576721, |
|
"regularize": 224.5537567138672, |
|
"step": 700, |
|
"wo_beta": 15.5169095993042 |
|
}, |
|
{ |
|
"epoch": 1.9839395370807746, |
|
"eval_dpo_loss": 264.166015625, |
|
"eval_logits": -0.4816124141216278, |
|
"eval_logps": -83.42156219482422, |
|
"eval_loss": 512.698974609375, |
|
"eval_objective": 516.7103271484375, |
|
"eval_ranking_idealized": 0.5212215185165405, |
|
"eval_ranking_idealized_expo": 0.5212215185165405, |
|
"eval_ranking_simple": 0.5253623127937317, |
|
"eval_regularize": 516.7103271484375, |
|
"eval_runtime": 308.9876, |
|
"eval_samples_per_second": 18.739, |
|
"eval_steps_per_second": 1.563, |
|
"eval_wo_beta": 14.08342456817627, |
|
"step": 700 |
|
}, |
|
{ |
|
"dpo_loss": 105.4539566040039, |
|
"epoch": 2.1256495040151155, |
|
"grad_norm": 15281.11762954245, |
|
"learning_rate": 3.547549834686222e-06, |
|
"logits": -0.47171100974082947, |
|
"logps": -79.71944427490234, |
|
"loss": 207.1551, |
|
"objective": 207.78355407714844, |
|
"ranking_idealized": 0.5112500190734863, |
|
"ranking_idealized_expo": 0.5112500190734863, |
|
"ranking_simple": 0.51583331823349, |
|
"regularize": 207.78355407714844, |
|
"step": 750, |
|
"wo_beta": 15.408516883850098 |
|
}, |
|
{ |
|
"epoch": 2.1256495040151155, |
|
"eval_dpo_loss": 259.2528381347656, |
|
"eval_logits": -0.5409926772117615, |
|
"eval_logps": -83.45890045166016, |
|
"eval_loss": 506.4237365722656, |
|
"eval_objective": 510.6129150390625, |
|
"eval_ranking_idealized": 0.5212215185165405, |
|
"eval_ranking_idealized_expo": 0.5212215185165405, |
|
"eval_ranking_simple": 0.523809552192688, |
|
"eval_regularize": 510.6129150390625, |
|
"eval_runtime": 308.6604, |
|
"eval_samples_per_second": 18.758, |
|
"eval_steps_per_second": 1.565, |
|
"eval_wo_beta": 14.129502296447754, |
|
"step": 750 |
|
}, |
|
{ |
|
"dpo_loss": 97.64974975585938, |
|
"epoch": 2.2673594709494567, |
|
"grad_norm": 15212.863475959, |
|
"learning_rate": 3.3176699082935546e-06, |
|
"logits": -0.48046550154685974, |
|
"logps": -81.52581787109375, |
|
"loss": 197.3545, |
|
"objective": 198.5575714111328, |
|
"ranking_idealized": 0.512499988079071, |
|
"ranking_idealized_expo": 0.512499988079071, |
|
"ranking_simple": 0.5083333253860474, |
|
"regularize": 198.5575714111328, |
|
"step": 800, |
|
"wo_beta": 15.000423431396484 |
|
}, |
|
{ |
|
"epoch": 2.2673594709494567, |
|
"eval_dpo_loss": 262.3102111816406, |
|
"eval_logits": -0.5658813118934631, |
|
"eval_logps": -84.87467956542969, |
|
"eval_loss": 513.39794921875, |
|
"eval_objective": 514.31201171875, |
|
"eval_ranking_idealized": 0.5212215185165405, |
|
"eval_ranking_idealized_expo": 0.5212215185165405, |
|
"eval_ranking_simple": 0.522774338722229, |
|
"eval_regularize": 514.31201171875, |
|
"eval_runtime": 310.3348, |
|
"eval_samples_per_second": 18.657, |
|
"eval_steps_per_second": 1.556, |
|
"eval_wo_beta": 14.070388793945312, |
|
"step": 800 |
|
}, |
|
{ |
|
"dpo_loss": 94.56294250488281, |
|
"epoch": 2.413793103448276, |
|
"grad_norm": 14099.488226379159, |
|
"learning_rate": 3.0797556183036582e-06, |
|
"logits": -0.5114213824272156, |
|
"logps": -81.04893493652344, |
|
"loss": 182.3796, |
|
"objective": 182.07008361816406, |
|
"ranking_idealized": 0.5145833492279053, |
|
"ranking_idealized_expo": 0.5137500166893005, |
|
"ranking_simple": 0.5129166841506958, |
|
"regularize": 182.07008361816406, |
|
"step": 850, |
|
"wo_beta": 15.241059303283691 |
|
}, |
|
{ |
|
"epoch": 2.413793103448276, |
|
"eval_dpo_loss": 254.12506103515625, |
|
"eval_logits": -0.5509631037712097, |
|
"eval_logps": -82.86239624023438, |
|
"eval_loss": 501.88311767578125, |
|
"eval_objective": 504.852294921875, |
|
"eval_ranking_idealized": 0.5212215185165405, |
|
"eval_ranking_idealized_expo": 0.5212215185165405, |
|
"eval_ranking_simple": 0.5274327397346497, |
|
"eval_regularize": 504.852294921875, |
|
"eval_runtime": 309.6743, |
|
"eval_samples_per_second": 18.697, |
|
"eval_steps_per_second": 1.56, |
|
"eval_wo_beta": 14.17065143585205, |
|
"step": 850 |
|
}, |
|
{ |
|
"dpo_loss": 92.21830749511719, |
|
"epoch": 2.555503070382617, |
|
"grad_norm": 14214.15214964791, |
|
"learning_rate": 2.8361446928038298e-06, |
|
"logits": -0.49887704849243164, |
|
"logps": -80.69136810302734, |
|
"loss": 176.042, |
|
"objective": 176.71592712402344, |
|
"ranking_idealized": 0.5183333158493042, |
|
"ranking_idealized_expo": 0.5179166793823242, |
|
"ranking_simple": 0.5129166841506958, |
|
"regularize": 176.71592712402344, |
|
"step": 900, |
|
"wo_beta": 15.599862098693848 |
|
}, |
|
{ |
|
"epoch": 2.555503070382617, |
|
"eval_dpo_loss": 263.27996826171875, |
|
"eval_logits": -0.5038771033287048, |
|
"eval_logps": -85.07097625732422, |
|
"eval_loss": 518.1983032226562, |
|
"eval_objective": 519.5007934570312, |
|
"eval_ranking_idealized": 0.5212215185165405, |
|
"eval_ranking_idealized_expo": 0.5212215185165405, |
|
"eval_ranking_simple": 0.523809552192688, |
|
"eval_regularize": 519.5007934570312, |
|
"eval_runtime": 308.5117, |
|
"eval_samples_per_second": 18.768, |
|
"eval_steps_per_second": 1.566, |
|
"eval_wo_beta": 14.112290382385254, |
|
"step": 900 |
|
}, |
|
{ |
|
"dpo_loss": 81.76506805419922, |
|
"epoch": 2.697213037316958, |
|
"grad_norm": 14116.155586290091, |
|
"learning_rate": 2.5892308345974517e-06, |
|
"logits": -0.45734792947769165, |
|
"logps": -81.56855773925781, |
|
"loss": 164.8281, |
|
"objective": 161.75088500976562, |
|
"ranking_idealized": 0.5045833587646484, |
|
"ranking_idealized_expo": 0.5045833587646484, |
|
"ranking_simple": 0.5099999904632568, |
|
"regularize": 161.75088500976562, |
|
"step": 950, |
|
"wo_beta": 15.513051986694336 |
|
}, |
|
{ |
|
"epoch": 2.697213037316958, |
|
"eval_dpo_loss": 262.8074035644531, |
|
"eval_logits": -0.5199795961380005, |
|
"eval_logps": -84.58429718017578, |
|
"eval_loss": 512.1844482421875, |
|
"eval_objective": 512.7650756835938, |
|
"eval_ranking_idealized": 0.5212215185165405, |
|
"eval_ranking_idealized_expo": 0.5212215185165405, |
|
"eval_ranking_simple": 0.523809552192688, |
|
"eval_regularize": 512.7650756835938, |
|
"eval_runtime": 309.6572, |
|
"eval_samples_per_second": 18.698, |
|
"eval_steps_per_second": 1.56, |
|
"eval_wo_beta": 14.164327621459961, |
|
"step": 950 |
|
}, |
|
{ |
|
"dpo_loss": 76.9761962890625, |
|
"epoch": 2.838923004251299, |
|
"grad_norm": 13709.192801586882, |
|
"learning_rate": 2.341440200858589e-06, |
|
"logits": -0.46604040265083313, |
|
"logps": -79.8453598022461, |
|
"loss": 150.0401, |
|
"objective": 150.50038146972656, |
|
"ranking_idealized": 0.5112500190734863, |
|
"ranking_idealized_expo": 0.5112500190734863, |
|
"ranking_simple": 0.5099999904632568, |
|
"regularize": 150.50038146972656, |
|
"step": 1000, |
|
"wo_beta": 15.057799339294434 |
|
}, |
|
{ |
|
"epoch": 2.838923004251299, |
|
"eval_dpo_loss": 263.6168518066406, |
|
"eval_logits": -0.5218656659126282, |
|
"eval_logps": -83.73433685302734, |
|
"eval_loss": 514.70361328125, |
|
"eval_objective": 516.595947265625, |
|
"eval_ranking_idealized": 0.5212215185165405, |
|
"eval_ranking_idealized_expo": 0.5212215185165405, |
|
"eval_ranking_simple": 0.5258799195289612, |
|
"eval_regularize": 516.595947265625, |
|
"eval_runtime": 308.5762, |
|
"eval_samples_per_second": 18.764, |
|
"eval_steps_per_second": 1.565, |
|
"eval_wo_beta": 14.180042266845703, |
|
"step": 1000 |
|
}, |
|
{ |
|
"dpo_loss": 69.53437805175781, |
|
"epoch": 2.9806329711856403, |
|
"grad_norm": 14565.063000853466, |
|
"learning_rate": 2.0952075638923656e-06, |
|
"logits": -0.4686031639575958, |
|
"logps": -80.63922882080078, |
|
"loss": 141.0317, |
|
"objective": 136.92088317871094, |
|
"ranking_idealized": 0.5174999833106995, |
|
"ranking_idealized_expo": 0.5174999833106995, |
|
"ranking_simple": 0.5262500047683716, |
|
"regularize": 136.92088317871094, |
|
"step": 1050, |
|
"wo_beta": 15.512243270874023 |
|
}, |
|
{ |
|
"epoch": 2.9806329711856403, |
|
"eval_dpo_loss": 266.94525146484375, |
|
"eval_logits": -0.49528759717941284, |
|
"eval_logps": -84.26761627197266, |
|
"eval_loss": 519.2467041015625, |
|
"eval_objective": 521.8153076171875, |
|
"eval_ranking_idealized": 0.5212215185165405, |
|
"eval_ranking_idealized_expo": 0.5212215185165405, |
|
"eval_ranking_simple": 0.5263975262641907, |
|
"eval_regularize": 521.8153076171875, |
|
"eval_runtime": 308.2205, |
|
"eval_samples_per_second": 18.785, |
|
"eval_steps_per_second": 1.567, |
|
"eval_wo_beta": 14.257741928100586, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 2.9806329711856403, |
|
"step": 1050, |
|
"total_flos": 0.0, |
|
"train_loss": 38.77721726190476, |
|
"train_runtime": 6898.2426, |
|
"train_samples_per_second": 36.822, |
|
"train_steps_per_second": 0.255 |
|
} |
|
], |
|
"logging_steps": 50, |
|
"max_steps": 1760, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|