|
{ |
|
"best_metric": 7.4231791496276855, |
|
"best_model_checkpoint": "./qwen2.5-0.5b/qwen2.5-0.5b-expo-DPO-ES-0.1/checkpoint-300", |
|
"epoch": 1.5588096362777515, |
|
"eval_steps": 50, |
|
"global_step": 550, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"dpo_loss": 0.6931471824645996, |
|
"epoch": 0.002834199338686821, |
|
"grad_norm": 18.44253347826331, |
|
"learning_rate": 2.840909090909091e-08, |
|
"logits": -1.359458565711975, |
|
"logps": -84.69721221923828, |
|
"loss": 0.6931, |
|
"objective": 0.6931471824645996, |
|
"ranking_idealized": 0.6458333134651184, |
|
"ranking_idealized_expo": 0.5833333134651184, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.6931471824645996, |
|
"step": 1, |
|
"wo_beta": 5.271125316619873 |
|
}, |
|
{ |
|
"dpo_loss": 0.6822353601455688, |
|
"epoch": 0.14170996693434104, |
|
"grad_norm": 18.641365531241362, |
|
"learning_rate": 1.4204545454545458e-06, |
|
"logits": -1.508646011352539, |
|
"logps": -82.822021484375, |
|
"loss": 0.6785, |
|
"objective": 0.6822353601455688, |
|
"ranking_idealized": 0.608418345451355, |
|
"ranking_idealized_expo": 0.5229591727256775, |
|
"ranking_simple": 0.5267857313156128, |
|
"regularize": 0.6822353601455688, |
|
"step": 50, |
|
"wo_beta": 7.122643947601318 |
|
}, |
|
{ |
|
"epoch": 0.14170996693434104, |
|
"eval_dpo_loss": 0.6842507719993591, |
|
"eval_logits": -1.6022367477416992, |
|
"eval_logps": -90.87205505371094, |
|
"eval_loss": 0.6813501715660095, |
|
"eval_objective": 0.6842507719993591, |
|
"eval_ranking_idealized": 0.6030020713806152, |
|
"eval_ranking_idealized_expo": 0.5222567319869995, |
|
"eval_ranking_simple": 0.5258799195289612, |
|
"eval_regularize": 0.6842507719993591, |
|
"eval_runtime": 308.2905, |
|
"eval_samples_per_second": 18.781, |
|
"eval_steps_per_second": 1.567, |
|
"eval_wo_beta": 7.874889850616455, |
|
"step": 50 |
|
}, |
|
{ |
|
"dpo_loss": 0.61030513048172, |
|
"epoch": 0.2834199338686821, |
|
"grad_norm": 19.29211957833529, |
|
"learning_rate": 2.8409090909090916e-06, |
|
"logits": -1.6967747211456299, |
|
"logps": -88.03068542480469, |
|
"loss": 0.618, |
|
"objective": 0.61030513048172, |
|
"ranking_idealized": 0.6016666889190674, |
|
"ranking_idealized_expo": 0.5141666531562805, |
|
"ranking_simple": 0.5729166865348816, |
|
"regularize": 0.61030513048172, |
|
"step": 100, |
|
"wo_beta": 6.357723236083984 |
|
}, |
|
{ |
|
"epoch": 0.2834199338686821, |
|
"eval_dpo_loss": 0.6765881776809692, |
|
"eval_logits": -1.7799152135849, |
|
"eval_logps": -98.88995361328125, |
|
"eval_loss": 0.6732921004295349, |
|
"eval_objective": 0.6765881776809692, |
|
"eval_ranking_idealized": 0.6030020713806152, |
|
"eval_ranking_idealized_expo": 0.5222567319869995, |
|
"eval_ranking_simple": 0.5398550629615784, |
|
"eval_regularize": 0.6765881776809692, |
|
"eval_runtime": 307.7771, |
|
"eval_samples_per_second": 18.812, |
|
"eval_steps_per_second": 1.569, |
|
"eval_wo_beta": 7.7840118408203125, |
|
"step": 100 |
|
}, |
|
{ |
|
"dpo_loss": 0.5696364045143127, |
|
"epoch": 0.42512990080302315, |
|
"grad_norm": 18.142654654279198, |
|
"learning_rate": 4.2613636363636365e-06, |
|
"logits": -1.7447518110275269, |
|
"logps": -90.71894073486328, |
|
"loss": 0.5667, |
|
"objective": 0.5696364045143127, |
|
"ranking_idealized": 0.6066666841506958, |
|
"ranking_idealized_expo": 0.5287500023841858, |
|
"ranking_simple": 0.6016666889190674, |
|
"regularize": 0.5696364045143127, |
|
"step": 150, |
|
"wo_beta": 5.971243858337402 |
|
}, |
|
{ |
|
"epoch": 0.42512990080302315, |
|
"eval_dpo_loss": 0.6829443573951721, |
|
"eval_logits": -1.807220697402954, |
|
"eval_logps": -99.12174987792969, |
|
"eval_loss": 0.686660647392273, |
|
"eval_objective": 0.6829443573951721, |
|
"eval_ranking_idealized": 0.6030020713806152, |
|
"eval_ranking_idealized_expo": 0.5222567319869995, |
|
"eval_ranking_simple": 0.5408902764320374, |
|
"eval_regularize": 0.6829443573951721, |
|
"eval_runtime": 308.0058, |
|
"eval_samples_per_second": 18.798, |
|
"eval_steps_per_second": 1.568, |
|
"eval_wo_beta": 7.853672981262207, |
|
"step": 150 |
|
}, |
|
{ |
|
"dpo_loss": 0.5278546214103699, |
|
"epoch": 0.5668398677373642, |
|
"grad_norm": 15.355172339669656, |
|
"learning_rate": 4.997168347957521e-06, |
|
"logits": -1.9084746837615967, |
|
"logps": -92.50672149658203, |
|
"loss": 0.5214, |
|
"objective": 0.5278546214103699, |
|
"ranking_idealized": 0.5924999713897705, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.6362500190734863, |
|
"regularize": 0.5278546214103699, |
|
"step": 200, |
|
"wo_beta": 5.543394565582275 |
|
}, |
|
{ |
|
"epoch": 0.5668398677373642, |
|
"eval_dpo_loss": 0.69049072265625, |
|
"eval_logits": -1.8895256519317627, |
|
"eval_logps": -99.51531982421875, |
|
"eval_loss": 0.6901801824569702, |
|
"eval_objective": 0.69049072265625, |
|
"eval_ranking_idealized": 0.6030020713806152, |
|
"eval_ranking_idealized_expo": 0.5222567319869995, |
|
"eval_ranking_simple": 0.544513463973999, |
|
"eval_regularize": 0.69049072265625, |
|
"eval_runtime": 308.0169, |
|
"eval_samples_per_second": 18.798, |
|
"eval_steps_per_second": 1.568, |
|
"eval_wo_beta": 7.701313495635986, |
|
"step": 200 |
|
}, |
|
{ |
|
"dpo_loss": 0.4935472011566162, |
|
"epoch": 0.7085498346717053, |
|
"grad_norm": 14.324676993767012, |
|
"learning_rate": 4.973122855144066e-06, |
|
"logits": -1.9081355333328247, |
|
"logps": -88.3556137084961, |
|
"loss": 0.4922, |
|
"objective": 0.4935472011566162, |
|
"ranking_idealized": 0.5991666913032532, |
|
"ranking_idealized_expo": 0.5170833468437195, |
|
"ranking_simple": 0.6608333587646484, |
|
"regularize": 0.4935472011566162, |
|
"step": 250, |
|
"wo_beta": 5.325418472290039 |
|
}, |
|
{ |
|
"epoch": 0.7085498346717053, |
|
"eval_dpo_loss": 0.6914450526237488, |
|
"eval_logits": -1.9887231588363647, |
|
"eval_logps": -82.8383560180664, |
|
"eval_loss": 0.6975522041320801, |
|
"eval_objective": 0.6914450526237488, |
|
"eval_ranking_idealized": 0.6030020713806152, |
|
"eval_ranking_idealized_expo": 0.5222567319869995, |
|
"eval_ranking_simple": 0.5481366515159607, |
|
"eval_regularize": 0.6914450526237488, |
|
"eval_runtime": 307.641, |
|
"eval_samples_per_second": 18.821, |
|
"eval_steps_per_second": 1.57, |
|
"eval_wo_beta": 7.878448486328125, |
|
"step": 250 |
|
}, |
|
{ |
|
"dpo_loss": 0.4521400034427643, |
|
"epoch": 0.8502598016060463, |
|
"grad_norm": 13.428528764338076, |
|
"learning_rate": 4.924776641419513e-06, |
|
"logits": -2.0954272747039795, |
|
"logps": -81.39044952392578, |
|
"loss": 0.4535, |
|
"objective": 0.4521400034427643, |
|
"ranking_idealized": 0.5799999833106995, |
|
"ranking_idealized_expo": 0.4970833361148834, |
|
"ranking_simple": 0.6837499737739563, |
|
"regularize": 0.4521400034427643, |
|
"step": 300, |
|
"wo_beta": 5.026640892028809 |
|
}, |
|
{ |
|
"epoch": 0.8502598016060463, |
|
"eval_dpo_loss": 0.6893911957740784, |
|
"eval_logits": -2.1208713054656982, |
|
"eval_logps": -90.9490966796875, |
|
"eval_loss": 0.6922824382781982, |
|
"eval_objective": 0.6893911957740784, |
|
"eval_ranking_idealized": 0.6030020713806152, |
|
"eval_ranking_idealized_expo": 0.5222567319869995, |
|
"eval_ranking_simple": 0.556418240070343, |
|
"eval_regularize": 0.6893911957740784, |
|
"eval_runtime": 307.9716, |
|
"eval_samples_per_second": 18.8, |
|
"eval_steps_per_second": 1.568, |
|
"eval_wo_beta": 7.4231791496276855, |
|
"step": 300 |
|
}, |
|
{ |
|
"dpo_loss": 0.40847164392471313, |
|
"epoch": 0.9919697685403873, |
|
"grad_norm": 15.041732370189118, |
|
"learning_rate": 4.8526047530778175e-06, |
|
"logits": -1.9279303550720215, |
|
"logps": -84.23763275146484, |
|
"loss": 0.4228, |
|
"objective": 0.40847164392471313, |
|
"ranking_idealized": 0.60916668176651, |
|
"ranking_idealized_expo": 0.5270833373069763, |
|
"ranking_simple": 0.7191666960716248, |
|
"regularize": 0.40847164392471313, |
|
"step": 350, |
|
"wo_beta": 3.9990389347076416 |
|
}, |
|
{ |
|
"epoch": 0.9919697685403873, |
|
"eval_dpo_loss": 0.6968410611152649, |
|
"eval_logits": -1.9802873134613037, |
|
"eval_logps": -87.72307586669922, |
|
"eval_loss": 0.7063526511192322, |
|
"eval_objective": 0.6968410611152649, |
|
"eval_ranking_idealized": 0.6030020713806152, |
|
"eval_ranking_idealized_expo": 0.5222567319869995, |
|
"eval_ranking_simple": 0.5538302063941956, |
|
"eval_regularize": 0.6968410611152649, |
|
"eval_runtime": 307.977, |
|
"eval_samples_per_second": 18.8, |
|
"eval_steps_per_second": 1.568, |
|
"eval_wo_beta": 8.025344848632812, |
|
"step": 350 |
|
}, |
|
{ |
|
"dpo_loss": 0.28143200278282166, |
|
"epoch": 1.1336797354747283, |
|
"grad_norm": 12.112116162894052, |
|
"learning_rate": 4.757316345716554e-06, |
|
"logits": -2.0127880573272705, |
|
"logps": -91.47932434082031, |
|
"loss": 0.2845, |
|
"objective": 0.28143200278282166, |
|
"ranking_idealized": 0.6087499856948853, |
|
"ranking_idealized_expo": 0.5337499976158142, |
|
"ranking_simple": 0.8141666650772095, |
|
"regularize": 0.28143200278282166, |
|
"step": 400, |
|
"wo_beta": 2.649115562438965 |
|
}, |
|
{ |
|
"epoch": 1.1336797354747283, |
|
"eval_dpo_loss": 0.7269378900527954, |
|
"eval_logits": -2.080526828765869, |
|
"eval_logps": -101.31802368164062, |
|
"eval_loss": 0.7304782867431641, |
|
"eval_objective": 0.7269378900527954, |
|
"eval_ranking_idealized": 0.6030020713806152, |
|
"eval_ranking_idealized_expo": 0.5222567319869995, |
|
"eval_ranking_simple": 0.5429606437683105, |
|
"eval_regularize": 0.7269378900527954, |
|
"eval_runtime": 308.0503, |
|
"eval_samples_per_second": 18.796, |
|
"eval_steps_per_second": 1.568, |
|
"eval_wo_beta": 8.616350173950195, |
|
"step": 400 |
|
}, |
|
{ |
|
"dpo_loss": 0.3042532503604889, |
|
"epoch": 1.2753897024090695, |
|
"grad_norm": 12.171625264502326, |
|
"learning_rate": 4.639847716126855e-06, |
|
"logits": -1.9084649085998535, |
|
"logps": -92.91566467285156, |
|
"loss": 0.2989, |
|
"objective": 0.3042532503604889, |
|
"ranking_idealized": 0.5975000262260437, |
|
"ranking_idealized_expo": 0.5199999809265137, |
|
"ranking_simple": 0.8075000047683716, |
|
"regularize": 0.3042532503604889, |
|
"step": 450, |
|
"wo_beta": 3.341869592666626 |
|
}, |
|
{ |
|
"epoch": 1.2753897024090695, |
|
"eval_dpo_loss": 0.6973706483840942, |
|
"eval_logits": -1.8646337985992432, |
|
"eval_logps": -93.1955337524414, |
|
"eval_loss": 0.7005103826522827, |
|
"eval_objective": 0.6973706483840942, |
|
"eval_ranking_idealized": 0.6030020713806152, |
|
"eval_ranking_idealized_expo": 0.5222567319869995, |
|
"eval_ranking_simple": 0.5605590343475342, |
|
"eval_regularize": 0.6973706483840942, |
|
"eval_runtime": 308.4071, |
|
"eval_samples_per_second": 18.774, |
|
"eval_steps_per_second": 1.566, |
|
"eval_wo_beta": 8.238639831542969, |
|
"step": 450 |
|
}, |
|
{ |
|
"dpo_loss": 0.29817140102386475, |
|
"epoch": 1.4170996693434104, |
|
"grad_norm": 15.895661459470155, |
|
"learning_rate": 4.501353102310901e-06, |
|
"logits": -1.9325114488601685, |
|
"logps": -91.53684997558594, |
|
"loss": 0.3065, |
|
"objective": 0.29817140102386475, |
|
"ranking_idealized": 0.57833331823349, |
|
"ranking_idealized_expo": 0.4983333349227905, |
|
"ranking_simple": 0.7975000143051147, |
|
"regularize": 0.29817140102386475, |
|
"step": 500, |
|
"wo_beta": 3.1243510246276855 |
|
}, |
|
{ |
|
"epoch": 1.4170996693434104, |
|
"eval_dpo_loss": 0.7146824598312378, |
|
"eval_logits": -1.9982556104660034, |
|
"eval_logps": -97.01371002197266, |
|
"eval_loss": 0.717850923538208, |
|
"eval_objective": 0.7146824598312378, |
|
"eval_ranking_idealized": 0.6030020713806152, |
|
"eval_ranking_idealized_expo": 0.5222567319869995, |
|
"eval_ranking_simple": 0.5548654198646545, |
|
"eval_regularize": 0.7146824598312378, |
|
"eval_runtime": 308.8392, |
|
"eval_samples_per_second": 18.748, |
|
"eval_steps_per_second": 1.564, |
|
"eval_wo_beta": 8.27602767944336, |
|
"step": 500 |
|
}, |
|
{ |
|
"dpo_loss": 0.2845906615257263, |
|
"epoch": 1.5588096362777515, |
|
"grad_norm": 10.66218965165015, |
|
"learning_rate": 4.34319334202531e-06, |
|
"logits": -1.9555292129516602, |
|
"logps": -97.59473419189453, |
|
"loss": 0.2885, |
|
"objective": 0.2845906615257263, |
|
"ranking_idealized": 0.5945833325386047, |
|
"ranking_idealized_expo": 0.5116666555404663, |
|
"ranking_simple": 0.8041666746139526, |
|
"regularize": 0.2845906615257263, |
|
"step": 550, |
|
"wo_beta": 2.6915340423583984 |
|
}, |
|
{ |
|
"epoch": 1.5588096362777515, |
|
"eval_dpo_loss": 0.7133627533912659, |
|
"eval_logits": -1.9041162729263306, |
|
"eval_logps": -107.96095275878906, |
|
"eval_loss": 0.7091230750083923, |
|
"eval_objective": 0.7133627533912659, |
|
"eval_ranking_idealized": 0.6030020713806152, |
|
"eval_ranking_idealized_expo": 0.5222567319869995, |
|
"eval_ranking_simple": 0.5615941882133484, |
|
"eval_regularize": 0.7133627533912659, |
|
"eval_runtime": 308.0507, |
|
"eval_samples_per_second": 18.796, |
|
"eval_steps_per_second": 1.568, |
|
"eval_wo_beta": 8.196797370910645, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.5588096362777515, |
|
"step": 550, |
|
"total_flos": 0.0, |
|
"train_loss": 0.4483427975394509, |
|
"train_runtime": 15087.4424, |
|
"train_samples_per_second": 16.836, |
|
"train_steps_per_second": 0.117 |
|
} |
|
], |
|
"logging_steps": 50, |
|
"max_steps": 1760, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|