{ "best_metric": 7.4231791496276855, "best_model_checkpoint": "./qwen2.5-0.5b/qwen2.5-0.5b-expo-DPO-ES-0.1/checkpoint-300", "epoch": 1.5588096362777515, "eval_steps": 50, "global_step": 550, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "dpo_loss": 0.6931471824645996, "epoch": 0.002834199338686821, "grad_norm": 18.44253347826331, "learning_rate": 2.840909090909091e-08, "logits": -1.359458565711975, "logps": -84.69721221923828, "loss": 0.6931, "objective": 0.6931471824645996, "ranking_idealized": 0.6458333134651184, "ranking_idealized_expo": 0.5833333134651184, "ranking_simple": 0.5833333134651184, "regularize": 0.6931471824645996, "step": 1, "wo_beta": 5.271125316619873 }, { "dpo_loss": 0.6822353601455688, "epoch": 0.14170996693434104, "grad_norm": 18.641365531241362, "learning_rate": 1.4204545454545458e-06, "logits": -1.508646011352539, "logps": -82.822021484375, "loss": 0.6785, "objective": 0.6822353601455688, "ranking_idealized": 0.608418345451355, "ranking_idealized_expo": 0.5229591727256775, "ranking_simple": 0.5267857313156128, "regularize": 0.6822353601455688, "step": 50, "wo_beta": 7.122643947601318 }, { "epoch": 0.14170996693434104, "eval_dpo_loss": 0.6842507719993591, "eval_logits": -1.6022367477416992, "eval_logps": -90.87205505371094, "eval_loss": 0.6813501715660095, "eval_objective": 0.6842507719993591, "eval_ranking_idealized": 0.6030020713806152, "eval_ranking_idealized_expo": 0.5222567319869995, "eval_ranking_simple": 0.5258799195289612, "eval_regularize": 0.6842507719993591, "eval_runtime": 308.2905, "eval_samples_per_second": 18.781, "eval_steps_per_second": 1.567, "eval_wo_beta": 7.874889850616455, "step": 50 }, { "dpo_loss": 0.61030513048172, "epoch": 0.2834199338686821, "grad_norm": 19.29211957833529, "learning_rate": 2.8409090909090916e-06, "logits": -1.6967747211456299, "logps": -88.03068542480469, "loss": 0.618, "objective": 0.61030513048172, "ranking_idealized": 0.6016666889190674, "ranking_idealized_expo": 0.5141666531562805, "ranking_simple": 0.5729166865348816, "regularize": 0.61030513048172, "step": 100, "wo_beta": 6.357723236083984 }, { "epoch": 0.2834199338686821, "eval_dpo_loss": 0.6765881776809692, "eval_logits": -1.7799152135849, "eval_logps": -98.88995361328125, "eval_loss": 0.6732921004295349, "eval_objective": 0.6765881776809692, "eval_ranking_idealized": 0.6030020713806152, "eval_ranking_idealized_expo": 0.5222567319869995, "eval_ranking_simple": 0.5398550629615784, "eval_regularize": 0.6765881776809692, "eval_runtime": 307.7771, "eval_samples_per_second": 18.812, "eval_steps_per_second": 1.569, "eval_wo_beta": 7.7840118408203125, "step": 100 }, { "dpo_loss": 0.5696364045143127, "epoch": 0.42512990080302315, "grad_norm": 18.142654654279198, "learning_rate": 4.2613636363636365e-06, "logits": -1.7447518110275269, "logps": -90.71894073486328, "loss": 0.5667, "objective": 0.5696364045143127, "ranking_idealized": 0.6066666841506958, "ranking_idealized_expo": 0.5287500023841858, "ranking_simple": 0.6016666889190674, "regularize": 0.5696364045143127, "step": 150, "wo_beta": 5.971243858337402 }, { "epoch": 0.42512990080302315, "eval_dpo_loss": 0.6829443573951721, "eval_logits": -1.807220697402954, "eval_logps": -99.12174987792969, "eval_loss": 0.686660647392273, "eval_objective": 0.6829443573951721, "eval_ranking_idealized": 0.6030020713806152, "eval_ranking_idealized_expo": 0.5222567319869995, "eval_ranking_simple": 0.5408902764320374, "eval_regularize": 0.6829443573951721, "eval_runtime": 308.0058, "eval_samples_per_second": 18.798, "eval_steps_per_second": 1.568, "eval_wo_beta": 7.853672981262207, "step": 150 }, { "dpo_loss": 0.5278546214103699, "epoch": 0.5668398677373642, "grad_norm": 15.355172339669656, "learning_rate": 4.997168347957521e-06, "logits": -1.9084746837615967, "logps": -92.50672149658203, "loss": 0.5214, "objective": 0.5278546214103699, "ranking_idealized": 0.5924999713897705, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.6362500190734863, "regularize": 0.5278546214103699, "step": 200, "wo_beta": 5.543394565582275 }, { "epoch": 0.5668398677373642, "eval_dpo_loss": 0.69049072265625, "eval_logits": -1.8895256519317627, "eval_logps": -99.51531982421875, "eval_loss": 0.6901801824569702, "eval_objective": 0.69049072265625, "eval_ranking_idealized": 0.6030020713806152, "eval_ranking_idealized_expo": 0.5222567319869995, "eval_ranking_simple": 0.544513463973999, "eval_regularize": 0.69049072265625, "eval_runtime": 308.0169, "eval_samples_per_second": 18.798, "eval_steps_per_second": 1.568, "eval_wo_beta": 7.701313495635986, "step": 200 }, { "dpo_loss": 0.4935472011566162, "epoch": 0.7085498346717053, "grad_norm": 14.324676993767012, "learning_rate": 4.973122855144066e-06, "logits": -1.9081355333328247, "logps": -88.3556137084961, "loss": 0.4922, "objective": 0.4935472011566162, "ranking_idealized": 0.5991666913032532, "ranking_idealized_expo": 0.5170833468437195, "ranking_simple": 0.6608333587646484, "regularize": 0.4935472011566162, "step": 250, "wo_beta": 5.325418472290039 }, { "epoch": 0.7085498346717053, "eval_dpo_loss": 0.6914450526237488, "eval_logits": -1.9887231588363647, "eval_logps": -82.8383560180664, "eval_loss": 0.6975522041320801, "eval_objective": 0.6914450526237488, "eval_ranking_idealized": 0.6030020713806152, "eval_ranking_idealized_expo": 0.5222567319869995, "eval_ranking_simple": 0.5481366515159607, "eval_regularize": 0.6914450526237488, "eval_runtime": 307.641, "eval_samples_per_second": 18.821, "eval_steps_per_second": 1.57, "eval_wo_beta": 7.878448486328125, "step": 250 }, { "dpo_loss": 0.4521400034427643, "epoch": 0.8502598016060463, "grad_norm": 13.428528764338076, "learning_rate": 4.924776641419513e-06, "logits": -2.0954272747039795, "logps": -81.39044952392578, "loss": 0.4535, "objective": 0.4521400034427643, "ranking_idealized": 0.5799999833106995, "ranking_idealized_expo": 0.4970833361148834, "ranking_simple": 0.6837499737739563, "regularize": 0.4521400034427643, "step": 300, "wo_beta": 5.026640892028809 }, { "epoch": 0.8502598016060463, "eval_dpo_loss": 0.6893911957740784, "eval_logits": -2.1208713054656982, "eval_logps": -90.9490966796875, "eval_loss": 0.6922824382781982, "eval_objective": 0.6893911957740784, "eval_ranking_idealized": 0.6030020713806152, "eval_ranking_idealized_expo": 0.5222567319869995, "eval_ranking_simple": 0.556418240070343, "eval_regularize": 0.6893911957740784, "eval_runtime": 307.9716, "eval_samples_per_second": 18.8, "eval_steps_per_second": 1.568, "eval_wo_beta": 7.4231791496276855, "step": 300 }, { "dpo_loss": 0.40847164392471313, "epoch": 0.9919697685403873, "grad_norm": 15.041732370189118, "learning_rate": 4.8526047530778175e-06, "logits": -1.9279303550720215, "logps": -84.23763275146484, "loss": 0.4228, "objective": 0.40847164392471313, "ranking_idealized": 0.60916668176651, "ranking_idealized_expo": 0.5270833373069763, "ranking_simple": 0.7191666960716248, "regularize": 0.40847164392471313, "step": 350, "wo_beta": 3.9990389347076416 }, { "epoch": 0.9919697685403873, "eval_dpo_loss": 0.6968410611152649, "eval_logits": -1.9802873134613037, "eval_logps": -87.72307586669922, "eval_loss": 0.7063526511192322, "eval_objective": 0.6968410611152649, "eval_ranking_idealized": 0.6030020713806152, "eval_ranking_idealized_expo": 0.5222567319869995, "eval_ranking_simple": 0.5538302063941956, "eval_regularize": 0.6968410611152649, "eval_runtime": 307.977, "eval_samples_per_second": 18.8, "eval_steps_per_second": 1.568, "eval_wo_beta": 8.025344848632812, "step": 350 }, { "dpo_loss": 0.28143200278282166, "epoch": 1.1336797354747283, "grad_norm": 12.112116162894052, "learning_rate": 4.757316345716554e-06, "logits": -2.0127880573272705, "logps": -91.47932434082031, "loss": 0.2845, "objective": 0.28143200278282166, "ranking_idealized": 0.6087499856948853, "ranking_idealized_expo": 0.5337499976158142, "ranking_simple": 0.8141666650772095, "regularize": 0.28143200278282166, "step": 400, "wo_beta": 2.649115562438965 }, { "epoch": 1.1336797354747283, "eval_dpo_loss": 0.7269378900527954, "eval_logits": -2.080526828765869, "eval_logps": -101.31802368164062, "eval_loss": 0.7304782867431641, "eval_objective": 0.7269378900527954, "eval_ranking_idealized": 0.6030020713806152, "eval_ranking_idealized_expo": 0.5222567319869995, "eval_ranking_simple": 0.5429606437683105, "eval_regularize": 0.7269378900527954, "eval_runtime": 308.0503, "eval_samples_per_second": 18.796, "eval_steps_per_second": 1.568, "eval_wo_beta": 8.616350173950195, "step": 400 }, { "dpo_loss": 0.3042532503604889, "epoch": 1.2753897024090695, "grad_norm": 12.171625264502326, "learning_rate": 4.639847716126855e-06, "logits": -1.9084649085998535, "logps": -92.91566467285156, "loss": 0.2989, "objective": 0.3042532503604889, "ranking_idealized": 0.5975000262260437, "ranking_idealized_expo": 0.5199999809265137, "ranking_simple": 0.8075000047683716, "regularize": 0.3042532503604889, "step": 450, "wo_beta": 3.341869592666626 }, { "epoch": 1.2753897024090695, "eval_dpo_loss": 0.6973706483840942, "eval_logits": -1.8646337985992432, "eval_logps": -93.1955337524414, "eval_loss": 0.7005103826522827, "eval_objective": 0.6973706483840942, "eval_ranking_idealized": 0.6030020713806152, "eval_ranking_idealized_expo": 0.5222567319869995, "eval_ranking_simple": 0.5605590343475342, "eval_regularize": 0.6973706483840942, "eval_runtime": 308.4071, "eval_samples_per_second": 18.774, "eval_steps_per_second": 1.566, "eval_wo_beta": 8.238639831542969, "step": 450 }, { "dpo_loss": 0.29817140102386475, "epoch": 1.4170996693434104, "grad_norm": 15.895661459470155, "learning_rate": 4.501353102310901e-06, "logits": -1.9325114488601685, "logps": -91.53684997558594, "loss": 0.3065, "objective": 0.29817140102386475, "ranking_idealized": 0.57833331823349, "ranking_idealized_expo": 0.4983333349227905, "ranking_simple": 0.7975000143051147, "regularize": 0.29817140102386475, "step": 500, "wo_beta": 3.1243510246276855 }, { "epoch": 1.4170996693434104, "eval_dpo_loss": 0.7146824598312378, "eval_logits": -1.9982556104660034, "eval_logps": -97.01371002197266, "eval_loss": 0.717850923538208, "eval_objective": 0.7146824598312378, "eval_ranking_idealized": 0.6030020713806152, "eval_ranking_idealized_expo": 0.5222567319869995, "eval_ranking_simple": 0.5548654198646545, "eval_regularize": 0.7146824598312378, "eval_runtime": 308.8392, "eval_samples_per_second": 18.748, "eval_steps_per_second": 1.564, "eval_wo_beta": 8.27602767944336, "step": 500 }, { "dpo_loss": 0.2845906615257263, "epoch": 1.5588096362777515, "grad_norm": 10.66218965165015, "learning_rate": 4.34319334202531e-06, "logits": -1.9555292129516602, "logps": -97.59473419189453, "loss": 0.2885, "objective": 0.2845906615257263, "ranking_idealized": 0.5945833325386047, "ranking_idealized_expo": 0.5116666555404663, "ranking_simple": 0.8041666746139526, "regularize": 0.2845906615257263, "step": 550, "wo_beta": 2.6915340423583984 }, { "epoch": 1.5588096362777515, "eval_dpo_loss": 0.7133627533912659, "eval_logits": -1.9041162729263306, "eval_logps": -107.96095275878906, "eval_loss": 0.7091230750083923, "eval_objective": 0.7133627533912659, "eval_ranking_idealized": 0.6030020713806152, "eval_ranking_idealized_expo": 0.5222567319869995, "eval_ranking_simple": 0.5615941882133484, "eval_regularize": 0.7133627533912659, "eval_runtime": 308.0507, "eval_samples_per_second": 18.796, "eval_steps_per_second": 1.568, "eval_wo_beta": 8.196797370910645, "step": 550 }, { "epoch": 1.5588096362777515, "step": 550, "total_flos": 0.0, "train_loss": 0.4483427975394509, "train_runtime": 15087.4424, "train_samples_per_second": 16.836, "train_steps_per_second": 0.117 } ], "logging_steps": 50, "max_steps": 1760, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }