|
{ |
|
"best_metric": 6.68360710144043, |
|
"best_model_checkpoint": "./qwen2.5-0.5b/qwen2.5-0.5b-expo-DPO-ES-1/checkpoint-700", |
|
"epoch": 2.69248937175248, |
|
"eval_steps": 50, |
|
"global_step": 950, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"dpo_loss": 0.6931471824645996, |
|
"epoch": 0.002834199338686821, |
|
"grad_norm": 184.42504161174296, |
|
"learning_rate": 2.840909090909091e-08, |
|
"logits": -1.359458565711975, |
|
"logps": -84.69721221923828, |
|
"loss": 0.6931, |
|
"objective": 0.6931471824645996, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.5833333134651184, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.6931471824645996, |
|
"step": 1, |
|
"wo_beta": 5.271125316619873 |
|
}, |
|
{ |
|
"dpo_loss": 0.7105370759963989, |
|
"epoch": 0.14170996693434104, |
|
"grad_norm": 184.8690438155141, |
|
"learning_rate": 1.4204545454545458e-06, |
|
"logits": -1.464050531387329, |
|
"logps": -84.1650161743164, |
|
"loss": 0.7017, |
|
"objective": 0.7105370759963989, |
|
"ranking_idealized": 0.5289115905761719, |
|
"ranking_idealized_expo": 0.5221088528633118, |
|
"ranking_simple": 0.5225340127944946, |
|
"regularize": 0.7105370759963989, |
|
"step": 50, |
|
"wo_beta": 7.069356918334961 |
|
}, |
|
{ |
|
"epoch": 0.14170996693434104, |
|
"eval_dpo_loss": 0.856997013092041, |
|
"eval_logits": -1.458188533782959, |
|
"eval_logps": -93.02446746826172, |
|
"eval_loss": 0.8469988107681274, |
|
"eval_objective": 0.856997013092041, |
|
"eval_ranking_idealized": 0.5295031070709229, |
|
"eval_ranking_idealized_expo": 0.5212215185165405, |
|
"eval_ranking_simple": 0.523809552192688, |
|
"eval_regularize": 0.856997013092041, |
|
"eval_runtime": 312.0194, |
|
"eval_samples_per_second": 18.557, |
|
"eval_steps_per_second": 1.548, |
|
"eval_wo_beta": 7.850645065307617, |
|
"step": 50 |
|
}, |
|
{ |
|
"dpo_loss": 0.8048794865608215, |
|
"epoch": 0.2834199338686821, |
|
"grad_norm": 165.00318532005144, |
|
"learning_rate": 2.8409090909090916e-06, |
|
"logits": -1.4549062252044678, |
|
"logps": -84.21845245361328, |
|
"loss": 0.8112, |
|
"objective": 0.8048794865608215, |
|
"ranking_idealized": 0.5241666436195374, |
|
"ranking_idealized_expo": 0.5137500166893005, |
|
"ranking_simple": 0.543749988079071, |
|
"regularize": 0.8048794865608215, |
|
"step": 100, |
|
"wo_beta": 6.3764801025390625 |
|
}, |
|
{ |
|
"epoch": 0.2834199338686821, |
|
"eval_dpo_loss": 1.0273518562316895, |
|
"eval_logits": -1.4382514953613281, |
|
"eval_logps": -86.68037414550781, |
|
"eval_loss": 1.0529025793075562, |
|
"eval_objective": 1.0273518562316895, |
|
"eval_ranking_idealized": 0.5295031070709229, |
|
"eval_ranking_idealized_expo": 0.5212215185165405, |
|
"eval_ranking_simple": 0.5284678936004639, |
|
"eval_regularize": 1.0273518562316895, |
|
"eval_runtime": 308.916, |
|
"eval_samples_per_second": 18.743, |
|
"eval_steps_per_second": 1.564, |
|
"eval_wo_beta": 7.498167991638184, |
|
"step": 100 |
|
}, |
|
{ |
|
"dpo_loss": 1.0717324018478394, |
|
"epoch": 0.42512990080302315, |
|
"grad_norm": 132.13237608472397, |
|
"learning_rate": 4.2613636363636365e-06, |
|
"logits": -1.3764770030975342, |
|
"logps": -75.389892578125, |
|
"loss": 1.0895, |
|
"objective": 1.0717324018478394, |
|
"ranking_idealized": 0.5333333611488342, |
|
"ranking_idealized_expo": 0.527916669845581, |
|
"ranking_simple": 0.5645833611488342, |
|
"regularize": 1.0717324018478394, |
|
"step": 150, |
|
"wo_beta": 6.200186729431152 |
|
}, |
|
{ |
|
"epoch": 0.42512990080302315, |
|
"eval_dpo_loss": 1.4009839296340942, |
|
"eval_logits": -1.2964812517166138, |
|
"eval_logps": -84.43347930908203, |
|
"eval_loss": 1.4497517347335815, |
|
"eval_objective": 1.4009839296340942, |
|
"eval_ranking_idealized": 0.5295031070709229, |
|
"eval_ranking_idealized_expo": 0.5212215185165405, |
|
"eval_ranking_simple": 0.5320910811424255, |
|
"eval_regularize": 1.4009839296340942, |
|
"eval_runtime": 308.861, |
|
"eval_samples_per_second": 18.746, |
|
"eval_steps_per_second": 1.564, |
|
"eval_wo_beta": 7.269246578216553, |
|
"step": 150 |
|
}, |
|
{ |
|
"dpo_loss": 1.2378751039505005, |
|
"epoch": 0.5668398677373642, |
|
"grad_norm": 120.88819473175299, |
|
"learning_rate": 4.997168347957521e-06, |
|
"logits": -1.3378355503082275, |
|
"logps": -75.85352325439453, |
|
"loss": 1.2362, |
|
"objective": 1.2378751039505005, |
|
"ranking_idealized": 0.5204166769981384, |
|
"ranking_idealized_expo": 0.51541668176651, |
|
"ranking_simple": 0.5699999928474426, |
|
"regularize": 1.2378751039505005, |
|
"step": 200, |
|
"wo_beta": 6.1314239501953125 |
|
}, |
|
{ |
|
"epoch": 0.5668398677373642, |
|
"eval_dpo_loss": 1.611555576324463, |
|
"eval_logits": -1.2954967021942139, |
|
"eval_logps": -77.71936798095703, |
|
"eval_loss": 1.7034811973571777, |
|
"eval_objective": 1.611555576324463, |
|
"eval_ranking_idealized": 0.5295031070709229, |
|
"eval_ranking_idealized_expo": 0.5212215185165405, |
|
"eval_ranking_simple": 0.5320910811424255, |
|
"eval_regularize": 1.611555576324463, |
|
"eval_runtime": 309.5132, |
|
"eval_samples_per_second": 18.707, |
|
"eval_steps_per_second": 1.561, |
|
"eval_wo_beta": 7.226413726806641, |
|
"step": 200 |
|
}, |
|
{ |
|
"dpo_loss": 1.2734363079071045, |
|
"epoch": 0.7085498346717053, |
|
"grad_norm": 126.97448798013814, |
|
"learning_rate": 4.973122855144066e-06, |
|
"logits": -1.1745147705078125, |
|
"logps": -80.53407287597656, |
|
"loss": 1.3151, |
|
"objective": 1.2734363079071045, |
|
"ranking_idealized": 0.5249999761581421, |
|
"ranking_idealized_expo": 0.5162500143051147, |
|
"ranking_simple": 0.5929166674613953, |
|
"regularize": 1.2734363079071045, |
|
"step": 250, |
|
"wo_beta": 6.07757568359375 |
|
}, |
|
{ |
|
"epoch": 0.7085498346717053, |
|
"eval_dpo_loss": 1.831916093826294, |
|
"eval_logits": -1.2565279006958008, |
|
"eval_logps": -92.72237396240234, |
|
"eval_loss": 1.9222025871276855, |
|
"eval_objective": 1.831916093826294, |
|
"eval_ranking_idealized": 0.5295031070709229, |
|
"eval_ranking_idealized_expo": 0.5212215185165405, |
|
"eval_ranking_simple": 0.5310559272766113, |
|
"eval_regularize": 1.831916093826294, |
|
"eval_runtime": 308.4533, |
|
"eval_samples_per_second": 18.771, |
|
"eval_steps_per_second": 1.566, |
|
"eval_wo_beta": 7.185470104217529, |
|
"step": 250 |
|
}, |
|
{ |
|
"dpo_loss": 1.1058416366577148, |
|
"epoch": 0.8502598016060463, |
|
"grad_norm": 93.62402691602496, |
|
"learning_rate": 4.924776641419513e-06, |
|
"logits": -1.027516484260559, |
|
"logps": -86.21515655517578, |
|
"loss": 1.1899, |
|
"objective": 1.1058416366577148, |
|
"ranking_idealized": 0.5062500238418579, |
|
"ranking_idealized_expo": 0.4950000047683716, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 1.1058416366577148, |
|
"step": 300, |
|
"wo_beta": 5.9321818351745605 |
|
}, |
|
{ |
|
"epoch": 0.8502598016060463, |
|
"eval_dpo_loss": 1.9587332010269165, |
|
"eval_logits": -0.9785559177398682, |
|
"eval_logps": -90.93506622314453, |
|
"eval_loss": 2.0297279357910156, |
|
"eval_objective": 1.9587332010269165, |
|
"eval_ranking_idealized": 0.5295031070709229, |
|
"eval_ranking_idealized_expo": 0.5212215185165405, |
|
"eval_ranking_simple": 0.5367494821548462, |
|
"eval_regularize": 1.9587332010269165, |
|
"eval_runtime": 308.5179, |
|
"eval_samples_per_second": 18.767, |
|
"eval_steps_per_second": 1.566, |
|
"eval_wo_beta": 6.933654308319092, |
|
"step": 300 |
|
}, |
|
{ |
|
"dpo_loss": 1.1039565801620483, |
|
"epoch": 0.9919697685403873, |
|
"grad_norm": 81.61244111449733, |
|
"learning_rate": 4.8526047530778175e-06, |
|
"logits": -0.8597685694694519, |
|
"logps": -83.09705352783203, |
|
"loss": 1.1441, |
|
"objective": 1.1039565801620483, |
|
"ranking_idealized": 0.5354166626930237, |
|
"ranking_idealized_expo": 0.5254166722297668, |
|
"ranking_simple": 0.6141666769981384, |
|
"regularize": 1.1039565801620483, |
|
"step": 350, |
|
"wo_beta": 5.234945297241211 |
|
}, |
|
{ |
|
"epoch": 0.9919697685403873, |
|
"eval_dpo_loss": 2.054466485977173, |
|
"eval_logits": -1.0211108922958374, |
|
"eval_logps": -82.12905883789062, |
|
"eval_loss": 2.1653144359588623, |
|
"eval_objective": 2.054466485977173, |
|
"eval_ranking_idealized": 0.5295031070709229, |
|
"eval_ranking_idealized_expo": 0.5212215185165405, |
|
"eval_ranking_simple": 0.542443037033081, |
|
"eval_regularize": 2.054466485977173, |
|
"eval_runtime": 308.5833, |
|
"eval_samples_per_second": 18.763, |
|
"eval_steps_per_second": 1.565, |
|
"eval_wo_beta": 7.001650810241699, |
|
"step": 350 |
|
}, |
|
{ |
|
"dpo_loss": 0.7012167572975159, |
|
"epoch": 1.1336797354747283, |
|
"grad_norm": 72.06842141832568, |
|
"learning_rate": 4.757316345716554e-06, |
|
"logits": -0.8224175572395325, |
|
"logps": -77.92687225341797, |
|
"loss": 0.725, |
|
"objective": 0.7012167572975159, |
|
"ranking_idealized": 0.5412499904632568, |
|
"ranking_idealized_expo": 0.5320833325386047, |
|
"ranking_simple": 0.6483333110809326, |
|
"regularize": 0.7012167572975159, |
|
"step": 400, |
|
"wo_beta": 4.492737770080566 |
|
}, |
|
{ |
|
"epoch": 1.1336797354747283, |
|
"eval_dpo_loss": 2.2359888553619385, |
|
"eval_logits": -0.7528972029685974, |
|
"eval_logps": -84.34579467773438, |
|
"eval_loss": 2.288560152053833, |
|
"eval_objective": 2.2359888553619385, |
|
"eval_ranking_idealized": 0.5295031070709229, |
|
"eval_ranking_idealized_expo": 0.5212215185165405, |
|
"eval_ranking_simple": 0.5331262946128845, |
|
"eval_regularize": 2.2359888553619385, |
|
"eval_runtime": 308.7822, |
|
"eval_samples_per_second": 18.751, |
|
"eval_steps_per_second": 1.564, |
|
"eval_wo_beta": 7.154054164886475, |
|
"step": 400 |
|
}, |
|
{ |
|
"dpo_loss": 0.78798508644104, |
|
"epoch": 1.2753897024090695, |
|
"grad_norm": 67.40926541067125, |
|
"learning_rate": 4.639847716126855e-06, |
|
"logits": -0.7312514185905457, |
|
"logps": -80.1772232055664, |
|
"loss": 0.7626, |
|
"objective": 0.78798508644104, |
|
"ranking_idealized": 0.5245833396911621, |
|
"ranking_idealized_expo": 0.5191666483879089, |
|
"ranking_simple": 0.6483333110809326, |
|
"regularize": 0.78798508644104, |
|
"step": 450, |
|
"wo_beta": 5.0193963050842285 |
|
}, |
|
{ |
|
"epoch": 1.2753897024090695, |
|
"eval_dpo_loss": 2.0657246112823486, |
|
"eval_logits": -0.8863243460655212, |
|
"eval_logps": -80.59545135498047, |
|
"eval_loss": 2.1594510078430176, |
|
"eval_objective": 2.0657246112823486, |
|
"eval_ranking_idealized": 0.5295031070709229, |
|
"eval_ranking_idealized_expo": 0.5212215185165405, |
|
"eval_ranking_simple": 0.532608687877655, |
|
"eval_regularize": 2.0657246112823486, |
|
"eval_runtime": 314.8953, |
|
"eval_samples_per_second": 18.387, |
|
"eval_steps_per_second": 1.534, |
|
"eval_wo_beta": 6.7939372062683105, |
|
"step": 450 |
|
}, |
|
{ |
|
"dpo_loss": 0.7955495119094849, |
|
"epoch": 1.4170996693434104, |
|
"grad_norm": 80.21112193307313, |
|
"learning_rate": 4.501353102310901e-06, |
|
"logits": -0.717734694480896, |
|
"logps": -76.27667999267578, |
|
"loss": 0.8048, |
|
"objective": 0.7955495119094849, |
|
"ranking_idealized": 0.5054166913032532, |
|
"ranking_idealized_expo": 0.4970833361148834, |
|
"ranking_simple": 0.6424999833106995, |
|
"regularize": 0.7955495119094849, |
|
"step": 500, |
|
"wo_beta": 4.868187427520752 |
|
}, |
|
{ |
|
"epoch": 1.4170996693434104, |
|
"eval_dpo_loss": 2.0975162982940674, |
|
"eval_logits": -0.7432325482368469, |
|
"eval_logps": -82.348876953125, |
|
"eval_loss": 2.2134060859680176, |
|
"eval_objective": 2.0975162982940674, |
|
"eval_ranking_idealized": 0.5295031070709229, |
|
"eval_ranking_idealized_expo": 0.5212215185165405, |
|
"eval_ranking_simple": 0.5341615080833435, |
|
"eval_regularize": 2.0975162982940674, |
|
"eval_runtime": 308.5741, |
|
"eval_samples_per_second": 18.764, |
|
"eval_steps_per_second": 1.565, |
|
"eval_wo_beta": 6.79838752746582, |
|
"step": 500 |
|
}, |
|
{ |
|
"dpo_loss": 0.733691394329071, |
|
"epoch": 1.5588096362777515, |
|
"grad_norm": 73.44558837421432, |
|
"learning_rate": 4.34319334202531e-06, |
|
"logits": -0.6085548996925354, |
|
"logps": -79.48100280761719, |
|
"loss": 0.7106, |
|
"objective": 0.733691394329071, |
|
"ranking_idealized": 0.5199999809265137, |
|
"ranking_idealized_expo": 0.5108333230018616, |
|
"ranking_simple": 0.6579166650772095, |
|
"regularize": 0.733691394329071, |
|
"step": 550, |
|
"wo_beta": 4.594529151916504 |
|
}, |
|
{ |
|
"epoch": 1.5588096362777515, |
|
"eval_dpo_loss": 2.069565534591675, |
|
"eval_logits": -0.6665427088737488, |
|
"eval_logps": -85.06731414794922, |
|
"eval_loss": 2.1704676151275635, |
|
"eval_objective": 2.069565534591675, |
|
"eval_ranking_idealized": 0.5295031070709229, |
|
"eval_ranking_idealized_expo": 0.5212215185165405, |
|
"eval_ranking_simple": 0.5320910811424255, |
|
"eval_regularize": 2.069565534591675, |
|
"eval_runtime": 308.5, |
|
"eval_samples_per_second": 18.768, |
|
"eval_steps_per_second": 1.566, |
|
"eval_wo_beta": 6.861428260803223, |
|
"step": 550 |
|
}, |
|
{ |
|
"dpo_loss": 0.6737102270126343, |
|
"epoch": 1.7005196032120926, |
|
"grad_norm": 58.923373544475695, |
|
"learning_rate": 4.16692250129073e-06, |
|
"logits": -0.5712339282035828, |
|
"logps": -79.50873565673828, |
|
"loss": 0.6934, |
|
"objective": 0.6737102270126343, |
|
"ranking_idealized": 0.5220833420753479, |
|
"ranking_idealized_expo": 0.5149999856948853, |
|
"ranking_simple": 0.6537500023841858, |
|
"regularize": 0.6737102270126343, |
|
"step": 600, |
|
"wo_beta": 4.632404804229736 |
|
}, |
|
{ |
|
"epoch": 1.7005196032120926, |
|
"eval_dpo_loss": 2.0693418979644775, |
|
"eval_logits": -0.7358241081237793, |
|
"eval_logps": -81.67726135253906, |
|
"eval_loss": 2.2126805782318115, |
|
"eval_objective": 2.0693418979644775, |
|
"eval_ranking_idealized": 0.5295031070709229, |
|
"eval_ranking_idealized_expo": 0.5212215185165405, |
|
"eval_ranking_simple": 0.5362318754196167, |
|
"eval_regularize": 2.0693418979644775, |
|
"eval_runtime": 308.5887, |
|
"eval_samples_per_second": 18.763, |
|
"eval_steps_per_second": 1.565, |
|
"eval_wo_beta": 6.726527690887451, |
|
"step": 600 |
|
}, |
|
{ |
|
"dpo_loss": 0.6718880534172058, |
|
"epoch": 1.8422295701464337, |
|
"grad_norm": 71.22185869553977, |
|
"learning_rate": 3.974272604254906e-06, |
|
"logits": -0.6514729261398315, |
|
"logps": -79.11984252929688, |
|
"loss": 0.6885, |
|
"objective": 0.6718880534172058, |
|
"ranking_idealized": 0.5408333539962769, |
|
"ranking_idealized_expo": 0.527916669845581, |
|
"ranking_simple": 0.6583333611488342, |
|
"regularize": 0.6718880534172058, |
|
"step": 650, |
|
"wo_beta": 4.779858112335205 |
|
}, |
|
{ |
|
"epoch": 1.8422295701464337, |
|
"eval_dpo_loss": 2.1432294845581055, |
|
"eval_logits": -0.6786962747573853, |
|
"eval_logps": -82.88699340820312, |
|
"eval_loss": 2.2198147773742676, |
|
"eval_objective": 2.1432294845581055, |
|
"eval_ranking_idealized": 0.5295031070709229, |
|
"eval_ranking_idealized_expo": 0.5212215185165405, |
|
"eval_ranking_simple": 0.5362318754196167, |
|
"eval_regularize": 2.1432294845581055, |
|
"eval_runtime": 308.8348, |
|
"eval_samples_per_second": 18.748, |
|
"eval_steps_per_second": 1.564, |
|
"eval_wo_beta": 6.820231914520264, |
|
"step": 650 |
|
}, |
|
{ |
|
"dpo_loss": 0.6824830770492554, |
|
"epoch": 1.9839395370807746, |
|
"grad_norm": 69.2974675476568, |
|
"learning_rate": 3.767136614452458e-06, |
|
"logits": -0.5030444860458374, |
|
"logps": -78.93128967285156, |
|
"loss": 0.6477, |
|
"objective": 0.6824830770492554, |
|
"ranking_idealized": 0.5170833468437195, |
|
"ranking_idealized_expo": 0.5112500190734863, |
|
"ranking_simple": 0.6433333158493042, |
|
"regularize": 0.6824830770492554, |
|
"step": 700, |
|
"wo_beta": 4.807421684265137 |
|
}, |
|
{ |
|
"epoch": 1.9839395370807746, |
|
"eval_dpo_loss": 2.2591824531555176, |
|
"eval_logits": -0.659694254398346, |
|
"eval_logps": -83.41045379638672, |
|
"eval_loss": 2.3420462608337402, |
|
"eval_objective": 2.2591824531555176, |
|
"eval_ranking_idealized": 0.5295031070709229, |
|
"eval_ranking_idealized_expo": 0.5212215185165405, |
|
"eval_ranking_simple": 0.5403726696968079, |
|
"eval_regularize": 2.2591824531555176, |
|
"eval_runtime": 308.6048, |
|
"eval_samples_per_second": 18.762, |
|
"eval_steps_per_second": 1.565, |
|
"eval_wo_beta": 6.68360710144043, |
|
"step": 700 |
|
}, |
|
{ |
|
"dpo_loss": 0.38009390234947205, |
|
"epoch": 2.1256495040151155, |
|
"grad_norm": 45.305368238738886, |
|
"learning_rate": 3.547549834686222e-06, |
|
"logits": -0.5769148468971252, |
|
"logps": -79.54972076416016, |
|
"loss": 0.3785, |
|
"objective": 0.38009390234947205, |
|
"ranking_idealized": 0.5216666460037231, |
|
"ranking_idealized_expo": 0.5112500190734863, |
|
"ranking_simple": 0.6787499785423279, |
|
"regularize": 0.38009390234947205, |
|
"step": 750, |
|
"wo_beta": 4.319232940673828 |
|
}, |
|
{ |
|
"epoch": 2.1256495040151155, |
|
"eval_dpo_loss": 2.2005364894866943, |
|
"eval_logits": -0.7841140627861023, |
|
"eval_logps": -84.03689575195312, |
|
"eval_loss": 2.2919445037841797, |
|
"eval_objective": 2.2005364894866943, |
|
"eval_ranking_idealized": 0.5295031070709229, |
|
"eval_ranking_idealized_expo": 0.5212215185165405, |
|
"eval_ranking_simple": 0.54347825050354, |
|
"eval_regularize": 2.2005364894866943, |
|
"eval_runtime": 311.4559, |
|
"eval_samples_per_second": 18.59, |
|
"eval_steps_per_second": 1.551, |
|
"eval_wo_beta": 6.8514323234558105, |
|
"step": 750 |
|
}, |
|
{ |
|
"dpo_loss": 0.33919599652290344, |
|
"epoch": 2.2673594709494567, |
|
"grad_norm": 40.098481963513144, |
|
"learning_rate": 3.3176699082935546e-06, |
|
"logits": -0.6165332198143005, |
|
"logps": -80.33509826660156, |
|
"loss": 0.3316, |
|
"objective": 0.33919599652290344, |
|
"ranking_idealized": 0.5195833444595337, |
|
"ranking_idealized_expo": 0.512499988079071, |
|
"ranking_simple": 0.6795833110809326, |
|
"regularize": 0.33919599652290344, |
|
"step": 800, |
|
"wo_beta": 3.9735937118530273 |
|
}, |
|
{ |
|
"epoch": 2.2673594709494567, |
|
"eval_dpo_loss": 2.1123316287994385, |
|
"eval_logits": -0.6766893267631531, |
|
"eval_logps": -84.29901123046875, |
|
"eval_loss": 2.2220041751861572, |
|
"eval_objective": 2.1123316287994385, |
|
"eval_ranking_idealized": 0.5295031070709229, |
|
"eval_ranking_idealized_expo": 0.5212215185165405, |
|
"eval_ranking_simple": 0.5408902764320374, |
|
"eval_regularize": 2.1123316287994385, |
|
"eval_runtime": 309.0664, |
|
"eval_samples_per_second": 18.734, |
|
"eval_steps_per_second": 1.563, |
|
"eval_wo_beta": 6.766322612762451, |
|
"step": 800 |
|
}, |
|
{ |
|
"dpo_loss": 0.34950682520866394, |
|
"epoch": 2.409069437883798, |
|
"grad_norm": 35.45933765377846, |
|
"learning_rate": 3.0797556183036582e-06, |
|
"logits": -0.6031672954559326, |
|
"logps": -80.82646942138672, |
|
"loss": 0.3283, |
|
"objective": 0.34950682520866394, |
|
"ranking_idealized": 0.5254166722297668, |
|
"ranking_idealized_expo": 0.5141666531562805, |
|
"ranking_simple": 0.6779166460037231, |
|
"regularize": 0.34950682520866394, |
|
"step": 850, |
|
"wo_beta": 4.280696392059326 |
|
}, |
|
{ |
|
"epoch": 2.409069437883798, |
|
"eval_dpo_loss": 2.221233606338501, |
|
"eval_logits": -0.6537899374961853, |
|
"eval_logps": -85.08342742919922, |
|
"eval_loss": 2.3019895553588867, |
|
"eval_objective": 2.221233606338501, |
|
"eval_ranking_idealized": 0.5295031070709229, |
|
"eval_ranking_idealized_expo": 0.5212215185165405, |
|
"eval_ranking_simple": 0.5408902764320374, |
|
"eval_regularize": 2.221233606338501, |
|
"eval_runtime": 308.3167, |
|
"eval_samples_per_second": 18.779, |
|
"eval_steps_per_second": 1.567, |
|
"eval_wo_beta": 6.777287006378174, |
|
"step": 850 |
|
}, |
|
{ |
|
"dpo_loss": 0.35370326042175293, |
|
"epoch": 2.550779404818139, |
|
"grad_norm": 63.704496862923826, |
|
"learning_rate": 2.8361446928038298e-06, |
|
"logits": -0.600394606590271, |
|
"logps": -79.94818115234375, |
|
"loss": 0.3516, |
|
"objective": 0.35370326042175293, |
|
"ranking_idealized": 0.5249999761581421, |
|
"ranking_idealized_expo": 0.5183333158493042, |
|
"ranking_simple": 0.6729166507720947, |
|
"regularize": 0.35370326042175293, |
|
"step": 900, |
|
"wo_beta": 4.2681732177734375 |
|
}, |
|
{ |
|
"epoch": 2.550779404818139, |
|
"eval_dpo_loss": 2.1911425590515137, |
|
"eval_logits": -0.6225402355194092, |
|
"eval_logps": -84.7564468383789, |
|
"eval_loss": 2.272311210632324, |
|
"eval_objective": 2.1911425590515137, |
|
"eval_ranking_idealized": 0.5295031070709229, |
|
"eval_ranking_idealized_expo": 0.5212215185165405, |
|
"eval_ranking_simple": 0.5362318754196167, |
|
"eval_regularize": 2.1911425590515137, |
|
"eval_runtime": 308.2356, |
|
"eval_samples_per_second": 18.784, |
|
"eval_steps_per_second": 1.567, |
|
"eval_wo_beta": 6.816178798675537, |
|
"step": 900 |
|
}, |
|
{ |
|
"dpo_loss": 0.38168859481811523, |
|
"epoch": 2.69248937175248, |
|
"grad_norm": 42.960212326170364, |
|
"learning_rate": 2.5892308345974517e-06, |
|
"logits": -0.5876100659370422, |
|
"logps": -80.23540496826172, |
|
"loss": 0.3245, |
|
"objective": 0.38168859481811523, |
|
"ranking_idealized": 0.5162500143051147, |
|
"ranking_idealized_expo": 0.5058333277702332, |
|
"ranking_simple": 0.6866666674613953, |
|
"regularize": 0.38168859481811523, |
|
"step": 950, |
|
"wo_beta": 4.207254409790039 |
|
}, |
|
{ |
|
"epoch": 2.69248937175248, |
|
"eval_dpo_loss": 2.2523224353790283, |
|
"eval_logits": -0.7128884196281433, |
|
"eval_logps": -83.6421127319336, |
|
"eval_loss": 2.3303723335266113, |
|
"eval_objective": 2.2523224353790283, |
|
"eval_ranking_idealized": 0.5295031070709229, |
|
"eval_ranking_idealized_expo": 0.5212215185165405, |
|
"eval_ranking_simple": 0.533643901348114, |
|
"eval_regularize": 2.2523224353790283, |
|
"eval_runtime": 308.3878, |
|
"eval_samples_per_second": 18.775, |
|
"eval_steps_per_second": 1.566, |
|
"eval_wo_beta": 6.89421272277832, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 2.69248937175248, |
|
"step": 950, |
|
"total_flos": 0.0, |
|
"train_loss": 0.7491915085441188, |
|
"train_runtime": 26152.6935, |
|
"train_samples_per_second": 9.713, |
|
"train_steps_per_second": 0.067 |
|
} |
|
], |
|
"logging_steps": 50, |
|
"max_steps": 1760, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|