qwen2.5-0.5b-expo-DPO-ES-0.1 / trainer_state.json
hZzy's picture
Model save
88a4513 verified
raw
history blame
14.7 kB
{
"best_metric": 7.4231791496276855,
"best_model_checkpoint": "./qwen2.5-0.5b/qwen2.5-0.5b-expo-DPO-ES-0.1/checkpoint-300",
"epoch": 1.5588096362777515,
"eval_steps": 50,
"global_step": 550,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"dpo_loss": 0.6931471824645996,
"epoch": 0.002834199338686821,
"grad_norm": 18.44253347826331,
"learning_rate": 2.840909090909091e-08,
"logits": -1.359458565711975,
"logps": -84.69721221923828,
"loss": 0.6931,
"objective": 0.6931471824645996,
"ranking_idealized": 0.6458333134651184,
"ranking_idealized_expo": 0.5833333134651184,
"ranking_simple": 0.5833333134651184,
"regularize": 0.6931471824645996,
"step": 1,
"wo_beta": 5.271125316619873
},
{
"dpo_loss": 0.6822353601455688,
"epoch": 0.14170996693434104,
"grad_norm": 18.641365531241362,
"learning_rate": 1.4204545454545458e-06,
"logits": -1.508646011352539,
"logps": -82.822021484375,
"loss": 0.6785,
"objective": 0.6822353601455688,
"ranking_idealized": 0.608418345451355,
"ranking_idealized_expo": 0.5229591727256775,
"ranking_simple": 0.5267857313156128,
"regularize": 0.6822353601455688,
"step": 50,
"wo_beta": 7.122643947601318
},
{
"epoch": 0.14170996693434104,
"eval_dpo_loss": 0.6842507719993591,
"eval_logits": -1.6022367477416992,
"eval_logps": -90.87205505371094,
"eval_loss": 0.6813501715660095,
"eval_objective": 0.6842507719993591,
"eval_ranking_idealized": 0.6030020713806152,
"eval_ranking_idealized_expo": 0.5222567319869995,
"eval_ranking_simple": 0.5258799195289612,
"eval_regularize": 0.6842507719993591,
"eval_runtime": 308.2905,
"eval_samples_per_second": 18.781,
"eval_steps_per_second": 1.567,
"eval_wo_beta": 7.874889850616455,
"step": 50
},
{
"dpo_loss": 0.61030513048172,
"epoch": 0.2834199338686821,
"grad_norm": 19.29211957833529,
"learning_rate": 2.8409090909090916e-06,
"logits": -1.6967747211456299,
"logps": -88.03068542480469,
"loss": 0.618,
"objective": 0.61030513048172,
"ranking_idealized": 0.6016666889190674,
"ranking_idealized_expo": 0.5141666531562805,
"ranking_simple": 0.5729166865348816,
"regularize": 0.61030513048172,
"step": 100,
"wo_beta": 6.357723236083984
},
{
"epoch": 0.2834199338686821,
"eval_dpo_loss": 0.6765881776809692,
"eval_logits": -1.7799152135849,
"eval_logps": -98.88995361328125,
"eval_loss": 0.6732921004295349,
"eval_objective": 0.6765881776809692,
"eval_ranking_idealized": 0.6030020713806152,
"eval_ranking_idealized_expo": 0.5222567319869995,
"eval_ranking_simple": 0.5398550629615784,
"eval_regularize": 0.6765881776809692,
"eval_runtime": 307.7771,
"eval_samples_per_second": 18.812,
"eval_steps_per_second": 1.569,
"eval_wo_beta": 7.7840118408203125,
"step": 100
},
{
"dpo_loss": 0.5696364045143127,
"epoch": 0.42512990080302315,
"grad_norm": 18.142654654279198,
"learning_rate": 4.2613636363636365e-06,
"logits": -1.7447518110275269,
"logps": -90.71894073486328,
"loss": 0.5667,
"objective": 0.5696364045143127,
"ranking_idealized": 0.6066666841506958,
"ranking_idealized_expo": 0.5287500023841858,
"ranking_simple": 0.6016666889190674,
"regularize": 0.5696364045143127,
"step": 150,
"wo_beta": 5.971243858337402
},
{
"epoch": 0.42512990080302315,
"eval_dpo_loss": 0.6829443573951721,
"eval_logits": -1.807220697402954,
"eval_logps": -99.12174987792969,
"eval_loss": 0.686660647392273,
"eval_objective": 0.6829443573951721,
"eval_ranking_idealized": 0.6030020713806152,
"eval_ranking_idealized_expo": 0.5222567319869995,
"eval_ranking_simple": 0.5408902764320374,
"eval_regularize": 0.6829443573951721,
"eval_runtime": 308.0058,
"eval_samples_per_second": 18.798,
"eval_steps_per_second": 1.568,
"eval_wo_beta": 7.853672981262207,
"step": 150
},
{
"dpo_loss": 0.5278546214103699,
"epoch": 0.5668398677373642,
"grad_norm": 15.355172339669656,
"learning_rate": 4.997168347957521e-06,
"logits": -1.9084746837615967,
"logps": -92.50672149658203,
"loss": 0.5214,
"objective": 0.5278546214103699,
"ranking_idealized": 0.5924999713897705,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.6362500190734863,
"regularize": 0.5278546214103699,
"step": 200,
"wo_beta": 5.543394565582275
},
{
"epoch": 0.5668398677373642,
"eval_dpo_loss": 0.69049072265625,
"eval_logits": -1.8895256519317627,
"eval_logps": -99.51531982421875,
"eval_loss": 0.6901801824569702,
"eval_objective": 0.69049072265625,
"eval_ranking_idealized": 0.6030020713806152,
"eval_ranking_idealized_expo": 0.5222567319869995,
"eval_ranking_simple": 0.544513463973999,
"eval_regularize": 0.69049072265625,
"eval_runtime": 308.0169,
"eval_samples_per_second": 18.798,
"eval_steps_per_second": 1.568,
"eval_wo_beta": 7.701313495635986,
"step": 200
},
{
"dpo_loss": 0.4935472011566162,
"epoch": 0.7085498346717053,
"grad_norm": 14.324676993767012,
"learning_rate": 4.973122855144066e-06,
"logits": -1.9081355333328247,
"logps": -88.3556137084961,
"loss": 0.4922,
"objective": 0.4935472011566162,
"ranking_idealized": 0.5991666913032532,
"ranking_idealized_expo": 0.5170833468437195,
"ranking_simple": 0.6608333587646484,
"regularize": 0.4935472011566162,
"step": 250,
"wo_beta": 5.325418472290039
},
{
"epoch": 0.7085498346717053,
"eval_dpo_loss": 0.6914450526237488,
"eval_logits": -1.9887231588363647,
"eval_logps": -82.8383560180664,
"eval_loss": 0.6975522041320801,
"eval_objective": 0.6914450526237488,
"eval_ranking_idealized": 0.6030020713806152,
"eval_ranking_idealized_expo": 0.5222567319869995,
"eval_ranking_simple": 0.5481366515159607,
"eval_regularize": 0.6914450526237488,
"eval_runtime": 307.641,
"eval_samples_per_second": 18.821,
"eval_steps_per_second": 1.57,
"eval_wo_beta": 7.878448486328125,
"step": 250
},
{
"dpo_loss": 0.4521400034427643,
"epoch": 0.8502598016060463,
"grad_norm": 13.428528764338076,
"learning_rate": 4.924776641419513e-06,
"logits": -2.0954272747039795,
"logps": -81.39044952392578,
"loss": 0.4535,
"objective": 0.4521400034427643,
"ranking_idealized": 0.5799999833106995,
"ranking_idealized_expo": 0.4970833361148834,
"ranking_simple": 0.6837499737739563,
"regularize": 0.4521400034427643,
"step": 300,
"wo_beta": 5.026640892028809
},
{
"epoch": 0.8502598016060463,
"eval_dpo_loss": 0.6893911957740784,
"eval_logits": -2.1208713054656982,
"eval_logps": -90.9490966796875,
"eval_loss": 0.6922824382781982,
"eval_objective": 0.6893911957740784,
"eval_ranking_idealized": 0.6030020713806152,
"eval_ranking_idealized_expo": 0.5222567319869995,
"eval_ranking_simple": 0.556418240070343,
"eval_regularize": 0.6893911957740784,
"eval_runtime": 307.9716,
"eval_samples_per_second": 18.8,
"eval_steps_per_second": 1.568,
"eval_wo_beta": 7.4231791496276855,
"step": 300
},
{
"dpo_loss": 0.40847164392471313,
"epoch": 0.9919697685403873,
"grad_norm": 15.041732370189118,
"learning_rate": 4.8526047530778175e-06,
"logits": -1.9279303550720215,
"logps": -84.23763275146484,
"loss": 0.4228,
"objective": 0.40847164392471313,
"ranking_idealized": 0.60916668176651,
"ranking_idealized_expo": 0.5270833373069763,
"ranking_simple": 0.7191666960716248,
"regularize": 0.40847164392471313,
"step": 350,
"wo_beta": 3.9990389347076416
},
{
"epoch": 0.9919697685403873,
"eval_dpo_loss": 0.6968410611152649,
"eval_logits": -1.9802873134613037,
"eval_logps": -87.72307586669922,
"eval_loss": 0.7063526511192322,
"eval_objective": 0.6968410611152649,
"eval_ranking_idealized": 0.6030020713806152,
"eval_ranking_idealized_expo": 0.5222567319869995,
"eval_ranking_simple": 0.5538302063941956,
"eval_regularize": 0.6968410611152649,
"eval_runtime": 307.977,
"eval_samples_per_second": 18.8,
"eval_steps_per_second": 1.568,
"eval_wo_beta": 8.025344848632812,
"step": 350
},
{
"dpo_loss": 0.28143200278282166,
"epoch": 1.1336797354747283,
"grad_norm": 12.112116162894052,
"learning_rate": 4.757316345716554e-06,
"logits": -2.0127880573272705,
"logps": -91.47932434082031,
"loss": 0.2845,
"objective": 0.28143200278282166,
"ranking_idealized": 0.6087499856948853,
"ranking_idealized_expo": 0.5337499976158142,
"ranking_simple": 0.8141666650772095,
"regularize": 0.28143200278282166,
"step": 400,
"wo_beta": 2.649115562438965
},
{
"epoch": 1.1336797354747283,
"eval_dpo_loss": 0.7269378900527954,
"eval_logits": -2.080526828765869,
"eval_logps": -101.31802368164062,
"eval_loss": 0.7304782867431641,
"eval_objective": 0.7269378900527954,
"eval_ranking_idealized": 0.6030020713806152,
"eval_ranking_idealized_expo": 0.5222567319869995,
"eval_ranking_simple": 0.5429606437683105,
"eval_regularize": 0.7269378900527954,
"eval_runtime": 308.0503,
"eval_samples_per_second": 18.796,
"eval_steps_per_second": 1.568,
"eval_wo_beta": 8.616350173950195,
"step": 400
},
{
"dpo_loss": 0.3042532503604889,
"epoch": 1.2753897024090695,
"grad_norm": 12.171625264502326,
"learning_rate": 4.639847716126855e-06,
"logits": -1.9084649085998535,
"logps": -92.91566467285156,
"loss": 0.2989,
"objective": 0.3042532503604889,
"ranking_idealized": 0.5975000262260437,
"ranking_idealized_expo": 0.5199999809265137,
"ranking_simple": 0.8075000047683716,
"regularize": 0.3042532503604889,
"step": 450,
"wo_beta": 3.341869592666626
},
{
"epoch": 1.2753897024090695,
"eval_dpo_loss": 0.6973706483840942,
"eval_logits": -1.8646337985992432,
"eval_logps": -93.1955337524414,
"eval_loss": 0.7005103826522827,
"eval_objective": 0.6973706483840942,
"eval_ranking_idealized": 0.6030020713806152,
"eval_ranking_idealized_expo": 0.5222567319869995,
"eval_ranking_simple": 0.5605590343475342,
"eval_regularize": 0.6973706483840942,
"eval_runtime": 308.4071,
"eval_samples_per_second": 18.774,
"eval_steps_per_second": 1.566,
"eval_wo_beta": 8.238639831542969,
"step": 450
},
{
"dpo_loss": 0.29817140102386475,
"epoch": 1.4170996693434104,
"grad_norm": 15.895661459470155,
"learning_rate": 4.501353102310901e-06,
"logits": -1.9325114488601685,
"logps": -91.53684997558594,
"loss": 0.3065,
"objective": 0.29817140102386475,
"ranking_idealized": 0.57833331823349,
"ranking_idealized_expo": 0.4983333349227905,
"ranking_simple": 0.7975000143051147,
"regularize": 0.29817140102386475,
"step": 500,
"wo_beta": 3.1243510246276855
},
{
"epoch": 1.4170996693434104,
"eval_dpo_loss": 0.7146824598312378,
"eval_logits": -1.9982556104660034,
"eval_logps": -97.01371002197266,
"eval_loss": 0.717850923538208,
"eval_objective": 0.7146824598312378,
"eval_ranking_idealized": 0.6030020713806152,
"eval_ranking_idealized_expo": 0.5222567319869995,
"eval_ranking_simple": 0.5548654198646545,
"eval_regularize": 0.7146824598312378,
"eval_runtime": 308.8392,
"eval_samples_per_second": 18.748,
"eval_steps_per_second": 1.564,
"eval_wo_beta": 8.27602767944336,
"step": 500
},
{
"dpo_loss": 0.2845906615257263,
"epoch": 1.5588096362777515,
"grad_norm": 10.66218965165015,
"learning_rate": 4.34319334202531e-06,
"logits": -1.9555292129516602,
"logps": -97.59473419189453,
"loss": 0.2885,
"objective": 0.2845906615257263,
"ranking_idealized": 0.5945833325386047,
"ranking_idealized_expo": 0.5116666555404663,
"ranking_simple": 0.8041666746139526,
"regularize": 0.2845906615257263,
"step": 550,
"wo_beta": 2.6915340423583984
},
{
"epoch": 1.5588096362777515,
"eval_dpo_loss": 0.7133627533912659,
"eval_logits": -1.9041162729263306,
"eval_logps": -107.96095275878906,
"eval_loss": 0.7091230750083923,
"eval_objective": 0.7133627533912659,
"eval_ranking_idealized": 0.6030020713806152,
"eval_ranking_idealized_expo": 0.5222567319869995,
"eval_ranking_simple": 0.5615941882133484,
"eval_regularize": 0.7133627533912659,
"eval_runtime": 308.0507,
"eval_samples_per_second": 18.796,
"eval_steps_per_second": 1.568,
"eval_wo_beta": 8.196797370910645,
"step": 550
},
{
"epoch": 1.5588096362777515,
"step": 550,
"total_flos": 0.0,
"train_loss": 0.4483427975394509,
"train_runtime": 15087.4424,
"train_samples_per_second": 16.836,
"train_steps_per_second": 0.117
}
],
"logging_steps": 50,
"max_steps": 1760,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 50,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 5,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}