|
{ |
|
"best_metric": 14.316285133361816, |
|
"best_model_checkpoint": "./qwen2.5-0.5b/qwen2.5-0.5b-expo-L2EXPO-ES-0.1/checkpoint-800", |
|
"epoch": 2.975909305621162, |
|
"eval_steps": 50, |
|
"global_step": 1050, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"dpo_loss": 0.6931471824645996, |
|
"epoch": 0.002834199338686821, |
|
"grad_norm": 36.88507599678088, |
|
"learning_rate": 2.840909090909091e-08, |
|
"logits": -1.359458565711975, |
|
"logps": -84.69721221923828, |
|
"loss": 0.3913, |
|
"objective": 0.3618059456348419, |
|
"ranking_idealized": 0.6458333134651184, |
|
"ranking_idealized_expo": 0.5833333134651184, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.3618059456348419, |
|
"step": 1, |
|
"wo_beta": 14.830931663513184 |
|
}, |
|
{ |
|
"dpo_loss": 0.6800611615180969, |
|
"epoch": 0.14170996693434104, |
|
"grad_norm": 34.65452784204521, |
|
"learning_rate": 1.4204545454545458e-06, |
|
"logits": -1.481619119644165, |
|
"logps": -83.80532836914062, |
|
"loss": 0.4017, |
|
"objective": 0.4062296152114868, |
|
"ranking_idealized": 0.608418345451355, |
|
"ranking_idealized_expo": 0.5229591727256775, |
|
"ranking_simple": 0.5250850319862366, |
|
"regularize": 0.4062296152114868, |
|
"step": 50, |
|
"wo_beta": 15.713354110717773 |
|
}, |
|
{ |
|
"epoch": 0.14170996693434104, |
|
"eval_dpo_loss": 0.6868039965629578, |
|
"eval_logits": -1.5024017095565796, |
|
"eval_logps": -93.17259979248047, |
|
"eval_loss": 0.4164615273475647, |
|
"eval_objective": 0.41487643122673035, |
|
"eval_ranking_idealized": 0.6030020713806152, |
|
"eval_ranking_idealized_expo": 0.5222567319869995, |
|
"eval_ranking_simple": 0.5258799195289612, |
|
"eval_regularize": 0.41487643122673035, |
|
"eval_runtime": 308.9601, |
|
"eval_samples_per_second": 18.74, |
|
"eval_steps_per_second": 1.563, |
|
"eval_wo_beta": 16.426729202270508, |
|
"step": 50 |
|
}, |
|
{ |
|
"dpo_loss": 0.6331456303596497, |
|
"epoch": 0.2834199338686821, |
|
"grad_norm": 29.48988078429707, |
|
"learning_rate": 2.8409090909090916e-06, |
|
"logits": -1.514231562614441, |
|
"logps": -84.27212524414062, |
|
"loss": 0.3777, |
|
"objective": 0.38673925399780273, |
|
"ranking_idealized": 0.6016666889190674, |
|
"ranking_idealized_expo": 0.5141666531562805, |
|
"ranking_simple": 0.5520833134651184, |
|
"regularize": 0.38673925399780273, |
|
"step": 100, |
|
"wo_beta": 15.562942504882812 |
|
}, |
|
{ |
|
"epoch": 0.2834199338686821, |
|
"eval_dpo_loss": 0.6818161606788635, |
|
"eval_logits": -1.4774748086929321, |
|
"eval_logps": -92.86526489257812, |
|
"eval_loss": 0.4359625577926636, |
|
"eval_objective": 0.4269382953643799, |
|
"eval_ranking_idealized": 0.6030020713806152, |
|
"eval_ranking_idealized_expo": 0.5222567319869995, |
|
"eval_ranking_simple": 0.531573474407196, |
|
"eval_regularize": 0.4269382953643799, |
|
"eval_runtime": 308.6402, |
|
"eval_samples_per_second": 18.76, |
|
"eval_steps_per_second": 1.565, |
|
"eval_wo_beta": 16.243934631347656, |
|
"step": 100 |
|
}, |
|
{ |
|
"dpo_loss": 0.6157870292663574, |
|
"epoch": 0.42512990080302315, |
|
"grad_norm": 23.64962148842917, |
|
"learning_rate": 4.2613636363636365e-06, |
|
"logits": -1.363812804222107, |
|
"logps": -83.03893280029297, |
|
"loss": 0.4057, |
|
"objective": 0.4109911620616913, |
|
"ranking_idealized": 0.6066666841506958, |
|
"ranking_idealized_expo": 0.5287500023841858, |
|
"ranking_simple": 0.5679166913032532, |
|
"regularize": 0.4109911620616913, |
|
"step": 150, |
|
"wo_beta": 15.67545223236084 |
|
}, |
|
{ |
|
"epoch": 0.42512990080302315, |
|
"eval_dpo_loss": 0.6897013783454895, |
|
"eval_logits": -1.2946008443832397, |
|
"eval_logps": -84.17744445800781, |
|
"eval_loss": 0.49110475182533264, |
|
"eval_objective": 0.48045814037323, |
|
"eval_ranking_idealized": 0.6030020713806152, |
|
"eval_ranking_idealized_expo": 0.5222567319869995, |
|
"eval_ranking_simple": 0.5383023023605347, |
|
"eval_regularize": 0.48045814037323, |
|
"eval_runtime": 312.8899, |
|
"eval_samples_per_second": 18.505, |
|
"eval_steps_per_second": 1.544, |
|
"eval_wo_beta": 15.630563735961914, |
|
"step": 150 |
|
}, |
|
{ |
|
"dpo_loss": 0.6037490963935852, |
|
"epoch": 0.5668398677373642, |
|
"grad_norm": 20.792422526564724, |
|
"learning_rate": 4.997168347957521e-06, |
|
"logits": -1.1515488624572754, |
|
"logps": -78.54210662841797, |
|
"loss": 0.4475, |
|
"objective": 0.4344017505645752, |
|
"ranking_idealized": 0.5924999713897705, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5537499785423279, |
|
"regularize": 0.4344017505645752, |
|
"step": 200, |
|
"wo_beta": 15.705690383911133 |
|
}, |
|
{ |
|
"epoch": 0.5668398677373642, |
|
"eval_dpo_loss": 0.7102847695350647, |
|
"eval_logits": -0.9896814823150635, |
|
"eval_logps": -89.7341537475586, |
|
"eval_loss": 0.5660186409950256, |
|
"eval_objective": 0.5515478253364563, |
|
"eval_ranking_idealized": 0.6030020713806152, |
|
"eval_ranking_idealized_expo": 0.5222567319869995, |
|
"eval_ranking_simple": 0.531573474407196, |
|
"eval_regularize": 0.5515478253364563, |
|
"eval_runtime": 307.7447, |
|
"eval_samples_per_second": 18.814, |
|
"eval_steps_per_second": 1.569, |
|
"eval_wo_beta": 15.128002166748047, |
|
"step": 200 |
|
}, |
|
{ |
|
"dpo_loss": 0.6019502878189087, |
|
"epoch": 0.7085498346717053, |
|
"grad_norm": 19.149849902460005, |
|
"learning_rate": 4.973122855144066e-06, |
|
"logits": -0.9062835574150085, |
|
"logps": -77.80750274658203, |
|
"loss": 0.455, |
|
"objective": 0.4496636390686035, |
|
"ranking_idealized": 0.5991666913032532, |
|
"ranking_idealized_expo": 0.5170833468437195, |
|
"ranking_simple": 0.5724999904632568, |
|
"regularize": 0.4496636390686035, |
|
"step": 250, |
|
"wo_beta": 16.00295639038086 |
|
}, |
|
{ |
|
"epoch": 0.7085498346717053, |
|
"eval_dpo_loss": 0.7171492576599121, |
|
"eval_logits": -1.0032674074172974, |
|
"eval_logps": -78.19169616699219, |
|
"eval_loss": 0.5978298783302307, |
|
"eval_objective": 0.5822399258613586, |
|
"eval_ranking_idealized": 0.6030020713806152, |
|
"eval_ranking_idealized_expo": 0.5222567319869995, |
|
"eval_ranking_simple": 0.5310559272766113, |
|
"eval_regularize": 0.5822399258613586, |
|
"eval_runtime": 309.2921, |
|
"eval_samples_per_second": 18.72, |
|
"eval_steps_per_second": 1.562, |
|
"eval_wo_beta": 14.676263809204102, |
|
"step": 250 |
|
}, |
|
{ |
|
"dpo_loss": 0.5993608236312866, |
|
"epoch": 0.8502598016060463, |
|
"grad_norm": 17.89888160897824, |
|
"learning_rate": 4.924776641419513e-06, |
|
"logits": -0.8504629731178284, |
|
"logps": -74.18943786621094, |
|
"loss": 0.4337, |
|
"objective": 0.42672449350357056, |
|
"ranking_idealized": 0.5799999833106995, |
|
"ranking_idealized_expo": 0.4970833361148834, |
|
"ranking_simple": 0.5529166460037231, |
|
"regularize": 0.42672449350357056, |
|
"step": 300, |
|
"wo_beta": 15.642317771911621 |
|
}, |
|
{ |
|
"epoch": 0.8502598016060463, |
|
"eval_dpo_loss": 0.710507333278656, |
|
"eval_logits": -0.6760910153388977, |
|
"eval_logps": -78.8918228149414, |
|
"eval_loss": 0.5993344783782959, |
|
"eval_objective": 0.5779486894607544, |
|
"eval_ranking_idealized": 0.6030020713806152, |
|
"eval_ranking_idealized_expo": 0.5222567319869995, |
|
"eval_ranking_simple": 0.5300207138061523, |
|
"eval_regularize": 0.5779486894607544, |
|
"eval_runtime": 308.8688, |
|
"eval_samples_per_second": 18.746, |
|
"eval_steps_per_second": 1.564, |
|
"eval_wo_beta": 14.919622421264648, |
|
"step": 300 |
|
}, |
|
{ |
|
"dpo_loss": 0.5866905450820923, |
|
"epoch": 0.9919697685403873, |
|
"grad_norm": 18.680005245459032, |
|
"learning_rate": 4.8526047530778175e-06, |
|
"logits": -0.6557392477989197, |
|
"logps": -72.1249008178711, |
|
"loss": 0.4039, |
|
"objective": 0.3963530361652374, |
|
"ranking_idealized": 0.60916668176651, |
|
"ranking_idealized_expo": 0.5270833373069763, |
|
"ranking_simple": 0.5874999761581421, |
|
"regularize": 0.3963530361652374, |
|
"step": 350, |
|
"wo_beta": 15.802534103393555 |
|
}, |
|
{ |
|
"epoch": 0.9919697685403873, |
|
"eval_dpo_loss": 0.707767128944397, |
|
"eval_logits": -0.7968087792396545, |
|
"eval_logps": -75.1519775390625, |
|
"eval_loss": 0.5977659821510315, |
|
"eval_objective": 0.5765314102172852, |
|
"eval_ranking_idealized": 0.6030020713806152, |
|
"eval_ranking_idealized_expo": 0.5222567319869995, |
|
"eval_ranking_simple": 0.5289855003356934, |
|
"eval_regularize": 0.5765314102172852, |
|
"eval_runtime": 309.2048, |
|
"eval_samples_per_second": 18.725, |
|
"eval_steps_per_second": 1.562, |
|
"eval_wo_beta": 14.653112411499023, |
|
"step": 350 |
|
}, |
|
{ |
|
"dpo_loss": 0.571822464466095, |
|
"epoch": 1.1336797354747283, |
|
"grad_norm": 17.758722724655755, |
|
"learning_rate": 4.757316345716554e-06, |
|
"logits": -0.6229808926582336, |
|
"logps": -71.7779541015625, |
|
"loss": 0.3729, |
|
"objective": 0.38071343302726746, |
|
"ranking_idealized": 0.6087499856948853, |
|
"ranking_idealized_expo": 0.5337499976158142, |
|
"ranking_simple": 0.596666693687439, |
|
"regularize": 0.38071343302726746, |
|
"step": 400, |
|
"wo_beta": 15.872475624084473 |
|
}, |
|
{ |
|
"epoch": 1.1336797354747283, |
|
"eval_dpo_loss": 0.7153333425521851, |
|
"eval_logits": -0.5569362044334412, |
|
"eval_logps": -75.14326477050781, |
|
"eval_loss": 0.6180254817008972, |
|
"eval_objective": 0.6000439524650574, |
|
"eval_ranking_idealized": 0.6030020713806152, |
|
"eval_ranking_idealized_expo": 0.5222567319869995, |
|
"eval_ranking_simple": 0.522774338722229, |
|
"eval_regularize": 0.6000439524650574, |
|
"eval_runtime": 308.681, |
|
"eval_samples_per_second": 18.757, |
|
"eval_steps_per_second": 1.565, |
|
"eval_wo_beta": 14.647075653076172, |
|
"step": 400 |
|
}, |
|
{ |
|
"dpo_loss": 0.5686503052711487, |
|
"epoch": 1.2753897024090695, |
|
"grad_norm": 17.208572847816768, |
|
"learning_rate": 4.639847716126855e-06, |
|
"logits": -0.558698296546936, |
|
"logps": -71.48841094970703, |
|
"loss": 0.3454, |
|
"objective": 0.34283894300460815, |
|
"ranking_idealized": 0.5975000262260437, |
|
"ranking_idealized_expo": 0.5199999809265137, |
|
"ranking_simple": 0.59375, |
|
"regularize": 0.34283894300460815, |
|
"step": 450, |
|
"wo_beta": 16.339805603027344 |
|
}, |
|
{ |
|
"epoch": 1.2753897024090695, |
|
"eval_dpo_loss": 0.716464638710022, |
|
"eval_logits": -0.6214241981506348, |
|
"eval_logps": -76.22888946533203, |
|
"eval_loss": 0.6315773725509644, |
|
"eval_objective": 0.6131163239479065, |
|
"eval_ranking_idealized": 0.6030020713806152, |
|
"eval_ranking_idealized_expo": 0.5222567319869995, |
|
"eval_ranking_simple": 0.533643901348114, |
|
"eval_regularize": 0.6131163239479065, |
|
"eval_runtime": 309.544, |
|
"eval_samples_per_second": 18.705, |
|
"eval_steps_per_second": 1.56, |
|
"eval_wo_beta": 14.503443717956543, |
|
"step": 450 |
|
}, |
|
{ |
|
"dpo_loss": 0.5579800605773926, |
|
"epoch": 1.4170996693434104, |
|
"grad_norm": 17.247428143304518, |
|
"learning_rate": 4.501353102310901e-06, |
|
"logits": -0.5506080985069275, |
|
"logps": -72.05484008789062, |
|
"loss": 0.3226, |
|
"objective": 0.321167916059494, |
|
"ranking_idealized": 0.57833331823349, |
|
"ranking_idealized_expo": 0.4983333349227905, |
|
"ranking_simple": 0.5724999904632568, |
|
"regularize": 0.321167916059494, |
|
"step": 500, |
|
"wo_beta": 15.88575553894043 |
|
}, |
|
{ |
|
"epoch": 1.4170996693434104, |
|
"eval_dpo_loss": 0.7203696370124817, |
|
"eval_logits": -0.5608097910881042, |
|
"eval_logps": -77.60398864746094, |
|
"eval_loss": 0.6255373358726501, |
|
"eval_objective": 0.6084341406822205, |
|
"eval_ranking_idealized": 0.6030020713806152, |
|
"eval_ranking_idealized_expo": 0.5222567319869995, |
|
"eval_ranking_simple": 0.5284678936004639, |
|
"eval_regularize": 0.6084341406822205, |
|
"eval_runtime": 308.7073, |
|
"eval_samples_per_second": 18.756, |
|
"eval_steps_per_second": 1.565, |
|
"eval_wo_beta": 14.499795913696289, |
|
"step": 500 |
|
}, |
|
{ |
|
"dpo_loss": 0.5586966872215271, |
|
"epoch": 1.5588096362777515, |
|
"grad_norm": 16.23813175362919, |
|
"learning_rate": 4.34319334202531e-06, |
|
"logits": -0.5342339873313904, |
|
"logps": -74.90955352783203, |
|
"loss": 0.3133, |
|
"objective": 0.31819403171539307, |
|
"ranking_idealized": 0.5945833325386047, |
|
"ranking_idealized_expo": 0.5116666555404663, |
|
"ranking_simple": 0.5854166746139526, |
|
"regularize": 0.31819403171539307, |
|
"step": 550, |
|
"wo_beta": 15.694311141967773 |
|
}, |
|
{ |
|
"epoch": 1.5588096362777515, |
|
"eval_dpo_loss": 0.7138590216636658, |
|
"eval_logits": -0.6736307144165039, |
|
"eval_logps": -78.62907409667969, |
|
"eval_loss": 0.6281688809394836, |
|
"eval_objective": 0.613820493221283, |
|
"eval_ranking_idealized": 0.6030020713806152, |
|
"eval_ranking_idealized_expo": 0.5222567319869995, |
|
"eval_ranking_simple": 0.533643901348114, |
|
"eval_regularize": 0.613820493221283, |
|
"eval_runtime": 309.2677, |
|
"eval_samples_per_second": 18.722, |
|
"eval_steps_per_second": 1.562, |
|
"eval_wo_beta": 14.406906127929688, |
|
"step": 550 |
|
}, |
|
{ |
|
"dpo_loss": 0.5538429021835327, |
|
"epoch": 1.7005196032120926, |
|
"grad_norm": 16.284668340767176, |
|
"learning_rate": 4.16692250129073e-06, |
|
"logits": -0.5140345692634583, |
|
"logps": -74.2342758178711, |
|
"loss": 0.2944, |
|
"objective": 0.29470422863960266, |
|
"ranking_idealized": 0.6004166603088379, |
|
"ranking_idealized_expo": 0.51583331823349, |
|
"ranking_simple": 0.5895833373069763, |
|
"regularize": 0.29470422863960266, |
|
"step": 600, |
|
"wo_beta": 15.488865852355957 |
|
}, |
|
{ |
|
"epoch": 1.7005196032120926, |
|
"eval_dpo_loss": 0.717461884021759, |
|
"eval_logits": -0.5620033740997314, |
|
"eval_logps": -78.91792297363281, |
|
"eval_loss": 0.6321352124214172, |
|
"eval_objective": 0.6138916015625, |
|
"eval_ranking_idealized": 0.6030020713806152, |
|
"eval_ranking_idealized_expo": 0.5222567319869995, |
|
"eval_ranking_simple": 0.5357142686843872, |
|
"eval_regularize": 0.6138916015625, |
|
"eval_runtime": 308.6792, |
|
"eval_samples_per_second": 18.757, |
|
"eval_steps_per_second": 1.565, |
|
"eval_wo_beta": 14.614200592041016, |
|
"step": 600 |
|
}, |
|
{ |
|
"dpo_loss": 0.5554340481758118, |
|
"epoch": 1.8422295701464337, |
|
"grad_norm": 15.91791899711329, |
|
"learning_rate": 3.974272604254906e-06, |
|
"logits": -0.5311375260353088, |
|
"logps": -74.39502716064453, |
|
"loss": 0.2915, |
|
"objective": 0.28872814774513245, |
|
"ranking_idealized": 0.6058333516120911, |
|
"ranking_idealized_expo": 0.5295833349227905, |
|
"ranking_simple": 0.5991666913032532, |
|
"regularize": 0.28872814774513245, |
|
"step": 650, |
|
"wo_beta": 16.528623580932617 |
|
}, |
|
{ |
|
"epoch": 1.8422295701464337, |
|
"eval_dpo_loss": 0.7137619256973267, |
|
"eval_logits": -0.702060341835022, |
|
"eval_logps": -77.44371795654297, |
|
"eval_loss": 0.6321162581443787, |
|
"eval_objective": 0.6157041788101196, |
|
"eval_ranking_idealized": 0.6030020713806152, |
|
"eval_ranking_idealized_expo": 0.5222567319869995, |
|
"eval_ranking_simple": 0.5367494821548462, |
|
"eval_regularize": 0.6157041788101196, |
|
"eval_runtime": 309.0389, |
|
"eval_samples_per_second": 18.736, |
|
"eval_steps_per_second": 1.563, |
|
"eval_wo_beta": 14.385796546936035, |
|
"step": 650 |
|
}, |
|
{ |
|
"dpo_loss": 0.5520748496055603, |
|
"epoch": 1.9839395370807746, |
|
"grad_norm": 15.323602142583136, |
|
"learning_rate": 3.767136614452458e-06, |
|
"logits": -0.5264750719070435, |
|
"logps": -75.2638931274414, |
|
"loss": 0.2675, |
|
"objective": 0.2728944420814514, |
|
"ranking_idealized": 0.5954166650772095, |
|
"ranking_idealized_expo": 0.5129166841506958, |
|
"ranking_simple": 0.5883333086967468, |
|
"regularize": 0.2728944420814514, |
|
"step": 700, |
|
"wo_beta": 15.999488830566406 |
|
}, |
|
{ |
|
"epoch": 1.9839395370807746, |
|
"eval_dpo_loss": 0.7185091972351074, |
|
"eval_logits": -0.561150312423706, |
|
"eval_logps": -79.35997009277344, |
|
"eval_loss": 0.6386255621910095, |
|
"eval_objective": 0.6233482956886292, |
|
"eval_ranking_idealized": 0.6030020713806152, |
|
"eval_ranking_idealized_expo": 0.5222567319869995, |
|
"eval_ranking_simple": 0.5289855003356934, |
|
"eval_regularize": 0.6233482956886292, |
|
"eval_runtime": 309.5574, |
|
"eval_samples_per_second": 18.704, |
|
"eval_steps_per_second": 1.56, |
|
"eval_wo_beta": 14.317137718200684, |
|
"step": 700 |
|
}, |
|
{ |
|
"dpo_loss": 0.5413146615028381, |
|
"epoch": 2.1256495040151155, |
|
"grad_norm": 14.916489496350042, |
|
"learning_rate": 3.547549834686222e-06, |
|
"logits": -0.5209631323814392, |
|
"logps": -75.42438507080078, |
|
"loss": 0.2415, |
|
"objective": 0.23713654279708862, |
|
"ranking_idealized": 0.6066666841506958, |
|
"ranking_idealized_expo": 0.5133333206176758, |
|
"ranking_simple": 0.597083330154419, |
|
"regularize": 0.23713654279708862, |
|
"step": 750, |
|
"wo_beta": 15.934895515441895 |
|
}, |
|
{ |
|
"epoch": 2.1256495040151155, |
|
"eval_dpo_loss": 0.7177355885505676, |
|
"eval_logits": -0.6173678636550903, |
|
"eval_logps": -80.0989761352539, |
|
"eval_loss": 0.6405187249183655, |
|
"eval_objective": 0.6263132095336914, |
|
"eval_ranking_idealized": 0.6030020713806152, |
|
"eval_ranking_idealized_expo": 0.5222567319869995, |
|
"eval_ranking_simple": 0.534679114818573, |
|
"eval_regularize": 0.6263132095336914, |
|
"eval_runtime": 309.3835, |
|
"eval_samples_per_second": 18.715, |
|
"eval_steps_per_second": 1.561, |
|
"eval_wo_beta": 14.430180549621582, |
|
"step": 750 |
|
}, |
|
{ |
|
"dpo_loss": 0.5419275760650635, |
|
"epoch": 2.2673594709494567, |
|
"grad_norm": 15.335797694469315, |
|
"learning_rate": 3.3176699082935546e-06, |
|
"logits": -0.5584273338317871, |
|
"logps": -76.04695129394531, |
|
"loss": 0.2263, |
|
"objective": 0.223616361618042, |
|
"ranking_idealized": 0.5941666960716248, |
|
"ranking_idealized_expo": 0.5129166841506958, |
|
"ranking_simple": 0.5866666436195374, |
|
"regularize": 0.223616361618042, |
|
"step": 800, |
|
"wo_beta": 15.526464462280273 |
|
}, |
|
{ |
|
"epoch": 2.2673594709494567, |
|
"eval_dpo_loss": 0.7205542922019958, |
|
"eval_logits": -0.5665243268013, |
|
"eval_logps": -79.37840270996094, |
|
"eval_loss": 0.6457626223564148, |
|
"eval_objective": 0.6297247409820557, |
|
"eval_ranking_idealized": 0.6030020713806152, |
|
"eval_ranking_idealized_expo": 0.5222567319869995, |
|
"eval_ranking_simple": 0.534679114818573, |
|
"eval_regularize": 0.6297247409820557, |
|
"eval_runtime": 308.5045, |
|
"eval_samples_per_second": 18.768, |
|
"eval_steps_per_second": 1.566, |
|
"eval_wo_beta": 14.316285133361816, |
|
"step": 800 |
|
}, |
|
{ |
|
"dpo_loss": 0.539085865020752, |
|
"epoch": 2.409069437883798, |
|
"grad_norm": 15.300324456571563, |
|
"learning_rate": 3.0797556183036582e-06, |
|
"logits": -0.5494623780250549, |
|
"logps": -75.91812133789062, |
|
"loss": 0.2148, |
|
"objective": 0.2133045643568039, |
|
"ranking_idealized": 0.5979166626930237, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5879166722297668, |
|
"regularize": 0.2133045643568039, |
|
"step": 850, |
|
"wo_beta": 15.76329231262207 |
|
}, |
|
{ |
|
"epoch": 2.409069437883798, |
|
"eval_dpo_loss": 0.7192490100860596, |
|
"eval_logits": -0.5793017148971558, |
|
"eval_logps": -79.08055114746094, |
|
"eval_loss": 0.6435712575912476, |
|
"eval_objective": 0.6275891661643982, |
|
"eval_ranking_idealized": 0.6030020713806152, |
|
"eval_ranking_idealized_expo": 0.5222567319869995, |
|
"eval_ranking_simple": 0.5362318754196167, |
|
"eval_regularize": 0.6275891661643982, |
|
"eval_runtime": 312.2924, |
|
"eval_samples_per_second": 18.54, |
|
"eval_steps_per_second": 1.547, |
|
"eval_wo_beta": 14.426342010498047, |
|
"step": 850 |
|
}, |
|
{ |
|
"dpo_loss": 0.5431251525878906, |
|
"epoch": 2.550779404818139, |
|
"grad_norm": 15.744993696603856, |
|
"learning_rate": 2.8361446928038298e-06, |
|
"logits": -0.5151351690292358, |
|
"logps": -75.99987030029297, |
|
"loss": 0.1993, |
|
"objective": 0.1999633014202118, |
|
"ranking_idealized": 0.5887500047683716, |
|
"ranking_idealized_expo": 0.518750011920929, |
|
"ranking_simple": 0.5899999737739563, |
|
"regularize": 0.1999633014202118, |
|
"step": 900, |
|
"wo_beta": 16.303361892700195 |
|
}, |
|
{ |
|
"epoch": 2.550779404818139, |
|
"eval_dpo_loss": 0.7217252850532532, |
|
"eval_logits": -0.5620540976524353, |
|
"eval_logps": -80.38152313232422, |
|
"eval_loss": 0.6453951001167297, |
|
"eval_objective": 0.6301912665367126, |
|
"eval_ranking_idealized": 0.6030020713806152, |
|
"eval_ranking_idealized_expo": 0.5222567319869995, |
|
"eval_ranking_simple": 0.5341615080833435, |
|
"eval_regularize": 0.6301912665367126, |
|
"eval_runtime": 308.7307, |
|
"eval_samples_per_second": 18.754, |
|
"eval_steps_per_second": 1.564, |
|
"eval_wo_beta": 14.44913387298584, |
|
"step": 900 |
|
}, |
|
{ |
|
"dpo_loss": 0.5340785980224609, |
|
"epoch": 2.69248937175248, |
|
"grad_norm": 15.888338408049977, |
|
"learning_rate": 2.5892308345974517e-06, |
|
"logits": -0.5341619253158569, |
|
"logps": -76.2621841430664, |
|
"loss": 0.1887, |
|
"objective": 0.1899857223033905, |
|
"ranking_idealized": 0.5975000262260437, |
|
"ranking_idealized_expo": 0.5087500214576721, |
|
"ranking_simple": 0.5933333039283752, |
|
"regularize": 0.1899857223033905, |
|
"step": 950, |
|
"wo_beta": 16.227678298950195 |
|
}, |
|
{ |
|
"epoch": 2.69248937175248, |
|
"eval_dpo_loss": 0.7204239964485168, |
|
"eval_logits": -0.6216442584991455, |
|
"eval_logps": -79.14459228515625, |
|
"eval_loss": 0.6443176865577698, |
|
"eval_objective": 0.6274449825286865, |
|
"eval_ranking_idealized": 0.6030020713806152, |
|
"eval_ranking_idealized_expo": 0.5222567319869995, |
|
"eval_ranking_simple": 0.533643901348114, |
|
"eval_regularize": 0.6274449825286865, |
|
"eval_runtime": 309.1927, |
|
"eval_samples_per_second": 18.726, |
|
"eval_steps_per_second": 1.562, |
|
"eval_wo_beta": 14.318567276000977, |
|
"step": 950 |
|
}, |
|
{ |
|
"dpo_loss": 0.5356190204620361, |
|
"epoch": 2.8341993386868207, |
|
"grad_norm": 14.415639648177313, |
|
"learning_rate": 2.341440200858589e-06, |
|
"logits": -0.5420577526092529, |
|
"logps": -74.890869140625, |
|
"loss": 0.1764, |
|
"objective": 0.17968998849391937, |
|
"ranking_idealized": 0.6020833253860474, |
|
"ranking_idealized_expo": 0.5104166865348816, |
|
"ranking_simple": 0.590416669845581, |
|
"regularize": 0.17968998849391937, |
|
"step": 1000, |
|
"wo_beta": 15.519268035888672 |
|
}, |
|
{ |
|
"epoch": 2.8341993386868207, |
|
"eval_dpo_loss": 0.7200449705123901, |
|
"eval_logits": -0.6086606383323669, |
|
"eval_logps": -79.77206420898438, |
|
"eval_loss": 0.639886736869812, |
|
"eval_objective": 0.6246000528335571, |
|
"eval_ranking_idealized": 0.6030020713806152, |
|
"eval_ranking_idealized_expo": 0.5222567319869995, |
|
"eval_ranking_simple": 0.533643901348114, |
|
"eval_regularize": 0.6246000528335571, |
|
"eval_runtime": 309.9286, |
|
"eval_samples_per_second": 18.682, |
|
"eval_steps_per_second": 1.558, |
|
"eval_wo_beta": 14.450177192687988, |
|
"step": 1000 |
|
}, |
|
{ |
|
"dpo_loss": 0.5345789194107056, |
|
"epoch": 2.975909305621162, |
|
"grad_norm": 14.911688792125858, |
|
"learning_rate": 2.0952075638923656e-06, |
|
"logits": -0.5775164365768433, |
|
"logps": -75.94026184082031, |
|
"loss": 0.163, |
|
"objective": 0.162851020693779, |
|
"ranking_idealized": 0.6075000166893005, |
|
"ranking_idealized_expo": 0.5179166793823242, |
|
"ranking_simple": 0.6041666865348816, |
|
"regularize": 0.162851020693779, |
|
"step": 1050, |
|
"wo_beta": 16.27153205871582 |
|
}, |
|
{ |
|
"epoch": 2.975909305621162, |
|
"eval_dpo_loss": 0.721105694770813, |
|
"eval_logits": -0.6067584156990051, |
|
"eval_logps": -79.58184814453125, |
|
"eval_loss": 0.6428102850914001, |
|
"eval_objective": 0.6266354322433472, |
|
"eval_ranking_idealized": 0.6030020713806152, |
|
"eval_ranking_idealized_expo": 0.5222567319869995, |
|
"eval_ranking_simple": 0.531573474407196, |
|
"eval_regularize": 0.6266354322433472, |
|
"eval_runtime": 308.0819, |
|
"eval_samples_per_second": 18.794, |
|
"eval_steps_per_second": 1.568, |
|
"eval_wo_beta": 14.340644836425781, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 2.975909305621162, |
|
"step": 1050, |
|
"total_flos": 0.0, |
|
"train_loss": 0.3115594020343962, |
|
"train_runtime": 28931.7373, |
|
"train_samples_per_second": 8.78, |
|
"train_steps_per_second": 0.061 |
|
} |
|
], |
|
"logging_steps": 50, |
|
"max_steps": 1760, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|