{ "best_metric": 7.426205635070801, "best_model_checkpoint": "./qwen2.5-0.5b/qwen2.5-0.5b-expo-DPO-ES-0.1/checkpoint-300", "epoch": 1.5588096362777515, "eval_steps": 50, "global_step": 550, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "dpo_loss": 0.6931471824645996, "epoch": 0.002834199338686821, "grad_norm": 18.442536934850562, "learning_rate": 2.840909090909091e-08, "logits": -1.359458565711975, "logps": -84.69721221923828, "loss": 0.6931, "objective": 0.6931471824645996, "ranking_idealized": 0.6458333134651184, "ranking_idealized_expo": 0.5833333134651184, "ranking_simple": 0.5833333134651184, "regularize": 0.6931471824645996, "step": 1, "wo_beta": 5.271125316619873 }, { "dpo_loss": 0.6822353601455688, "epoch": 0.14170996693434104, "grad_norm": 18.641661833444882, "learning_rate": 1.4204545454545458e-06, "logits": -1.5086464881896973, "logps": -82.8218765258789, "loss": 0.6785, "objective": 0.6822353601455688, "ranking_idealized": 0.608418345451355, "ranking_idealized_expo": 0.5229591727256775, "ranking_simple": 0.5267857313156128, "regularize": 0.6822353601455688, "step": 50, "wo_beta": 7.122647285461426 }, { "epoch": 0.14170996693434104, "eval_dpo_loss": 0.6842505931854248, "eval_logits": -1.6022353172302246, "eval_logps": -90.87158203125, "eval_loss": 0.6813499927520752, "eval_objective": 0.6842505931854248, "eval_ranking_idealized": 0.6030020713806152, "eval_ranking_idealized_expo": 0.5222567319869995, "eval_ranking_simple": 0.5258799195289612, "eval_regularize": 0.6842505931854248, "eval_runtime": 309.4614, "eval_samples_per_second": 18.71, "eval_steps_per_second": 1.561, "eval_wo_beta": 7.874892711639404, "step": 50 }, { "dpo_loss": 0.6103044152259827, "epoch": 0.2834199338686821, "grad_norm": 19.292131977363915, "learning_rate": 2.8409090909090916e-06, "logits": -1.696779727935791, "logps": -88.03015899658203, "loss": 0.618, "objective": 0.6103044152259827, "ranking_idealized": 0.6016666889190674, "ranking_idealized_expo": 0.5141666531562805, "ranking_simple": 0.5729166865348816, "regularize": 0.6103044152259827, "step": 100, "wo_beta": 6.357714653015137 }, { "epoch": 0.2834199338686821, "eval_dpo_loss": 0.6765866279602051, "eval_logits": -1.7799253463745117, "eval_logps": -98.88992309570312, "eval_loss": 0.6732903122901917, "eval_objective": 0.6765866279602051, "eval_ranking_idealized": 0.6030020713806152, "eval_ranking_idealized_expo": 0.5222567319869995, "eval_ranking_simple": 0.5398550629615784, "eval_regularize": 0.6765866279602051, "eval_runtime": 308.813, "eval_samples_per_second": 18.749, "eval_steps_per_second": 1.564, "eval_wo_beta": 7.784023284912109, "step": 100 }, { "dpo_loss": 0.5696373581886292, "epoch": 0.42512990080302315, "grad_norm": 18.14389066821128, "learning_rate": 4.2613636363636365e-06, "logits": -1.744734287261963, "logps": -90.7203140258789, "loss": 0.5667, "objective": 0.5696373581886292, "ranking_idealized": 0.6066666841506958, "ranking_idealized_expo": 0.5287500023841858, "ranking_simple": 0.6016666889190674, "regularize": 0.5696373581886292, "step": 150, "wo_beta": 5.971276760101318 }, { "epoch": 0.42512990080302315, "eval_dpo_loss": 0.6829108595848083, "eval_logits": -1.8072086572647095, "eval_logps": -99.12300109863281, "eval_loss": 0.6866306066513062, "eval_objective": 0.6829108595848083, "eval_ranking_idealized": 0.6030020713806152, "eval_ranking_idealized_expo": 0.5222567319869995, "eval_ranking_simple": 0.5408902764320374, "eval_regularize": 0.6829108595848083, "eval_runtime": 312.2547, "eval_samples_per_second": 18.543, "eval_steps_per_second": 1.547, "eval_wo_beta": 7.8532514572143555, "step": 150 }, { "dpo_loss": 0.5278292298316956, "epoch": 0.5668398677373642, "grad_norm": 15.350860718764396, "learning_rate": 4.997168347957521e-06, "logits": -1.908250093460083, "logps": -92.51087951660156, "loss": 0.5214, "objective": 0.5278292298316956, "ranking_idealized": 0.5924999713897705, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.6362500190734863, "regularize": 0.5278292298316956, "step": 200, "wo_beta": 5.543264389038086 }, { "epoch": 0.5668398677373642, "eval_dpo_loss": 0.6904094815254211, "eval_logits": -1.8893996477127075, "eval_logps": -99.53878784179688, "eval_loss": 0.6900797486305237, "eval_objective": 0.6904094815254211, "eval_ranking_idealized": 0.6030020713806152, "eval_ranking_idealized_expo": 0.5222567319869995, "eval_ranking_simple": 0.544513463973999, "eval_regularize": 0.6904094815254211, "eval_runtime": 315.3971, "eval_samples_per_second": 18.358, "eval_steps_per_second": 1.531, "eval_wo_beta": 7.699478626251221, "step": 200 }, { "dpo_loss": 0.49355897307395935, "epoch": 0.7085498346717053, "grad_norm": 14.313286499637714, "learning_rate": 4.973122855144066e-06, "logits": -1.9076462984085083, "logps": -88.3504867553711, "loss": 0.4922, "objective": 0.49355897307395935, "ranking_idealized": 0.5991666913032532, "ranking_idealized_expo": 0.5170833468437195, "ranking_simple": 0.6608333587646484, "regularize": 0.49355897307395935, "step": 250, "wo_beta": 5.325013637542725 }, { "epoch": 0.7085498346717053, "eval_dpo_loss": 0.6915506720542908, "eval_logits": -1.9879554510116577, "eval_logps": -82.79730224609375, "eval_loss": 0.6976116895675659, "eval_objective": 0.6915506720542908, "eval_ranking_idealized": 0.6030020713806152, "eval_ranking_idealized_expo": 0.5222567319869995, "eval_ranking_simple": 0.5476190447807312, "eval_regularize": 0.6915506720542908, "eval_runtime": 308.7255, "eval_samples_per_second": 18.755, "eval_steps_per_second": 1.564, "eval_wo_beta": 7.87898588180542, "step": 250 }, { "dpo_loss": 0.4521573483943939, "epoch": 0.8502598016060463, "grad_norm": 13.44014162581437, "learning_rate": 4.924776641419513e-06, "logits": -2.09318470954895, "logps": -81.41643524169922, "loss": 0.4535, "objective": 0.4521573483943939, "ranking_idealized": 0.5799999833106995, "ranking_idealized_expo": 0.4970833361148834, "ranking_simple": 0.6837499737739563, "regularize": 0.4521573483943939, "step": 300, "wo_beta": 5.026339054107666 }, { "epoch": 0.8502598016060463, "eval_dpo_loss": 0.6892624497413635, "eval_logits": -2.120311975479126, "eval_logps": -91.03094482421875, "eval_loss": 0.6920701265335083, "eval_objective": 0.6892624497413635, "eval_ranking_idealized": 0.6030020713806152, "eval_ranking_idealized_expo": 0.5222567319869995, "eval_ranking_simple": 0.5559006333351135, "eval_regularize": 0.6892624497413635, "eval_runtime": 308.4714, "eval_samples_per_second": 18.77, "eval_steps_per_second": 1.566, "eval_wo_beta": 7.426205635070801, "step": 300 }, { "dpo_loss": 0.40864402055740356, "epoch": 0.9919697685403873, "grad_norm": 15.102301026818012, "learning_rate": 4.8526047530778175e-06, "logits": -1.9232014417648315, "logps": -84.39765930175781, "loss": 0.423, "objective": 0.40864402055740356, "ranking_idealized": 0.60916668176651, "ranking_idealized_expo": 0.5270833373069763, "ranking_simple": 0.7174999713897705, "regularize": 0.40864402055740356, "step": 350, "wo_beta": 3.9979019165039062 }, { "epoch": 0.9919697685403873, "eval_dpo_loss": 0.6959461569786072, "eval_logits": -1.987973690032959, "eval_logps": -88.1614990234375, "eval_loss": 0.7056758403778076, "eval_objective": 0.6959461569786072, "eval_ranking_idealized": 0.6030020713806152, "eval_ranking_idealized_expo": 0.5222567319869995, "eval_ranking_simple": 0.5548654198646545, "eval_regularize": 0.6959461569786072, "eval_runtime": 309.9548, "eval_samples_per_second": 18.68, "eval_steps_per_second": 1.558, "eval_wo_beta": 7.997907638549805, "step": 350 }, { "dpo_loss": 0.28186026215553284, "epoch": 1.1336797354747283, "grad_norm": 12.122355016988976, "learning_rate": 4.757316345716554e-06, "logits": -2.0241637229919434, "logps": -91.86678314208984, "loss": 0.2847, "objective": 0.28186026215553284, "ranking_idealized": 0.6087499856948853, "ranking_idealized_expo": 0.5337499976158142, "ranking_simple": 0.8145833611488342, "regularize": 0.28186026215553284, "step": 400, "wo_beta": 2.646965980529785 }, { "epoch": 1.1336797354747283, "eval_dpo_loss": 0.7281294465065002, "eval_logits": -2.0862255096435547, "eval_logps": -101.69258117675781, "eval_loss": 0.7314654588699341, "eval_objective": 0.7281294465065002, "eval_ranking_idealized": 0.6030020713806152, "eval_ranking_idealized_expo": 0.5222567319869995, "eval_ranking_simple": 0.542443037033081, "eval_regularize": 0.7281294465065002, "eval_runtime": 308.7063, "eval_samples_per_second": 18.756, "eval_steps_per_second": 1.565, "eval_wo_beta": 8.632596969604492, "step": 400 }, { "dpo_loss": 0.30417078733444214, "epoch": 1.2753897024090695, "grad_norm": 12.255120909837279, "learning_rate": 4.639847716126855e-06, "logits": -1.9048844575881958, "logps": -92.96432495117188, "loss": 0.2991, "objective": 0.30417078733444214, "ranking_idealized": 0.5975000262260437, "ranking_idealized_expo": 0.5199999809265137, "ranking_simple": 0.8087499737739563, "regularize": 0.30417078733444214, "step": 450, "wo_beta": 3.335141658782959 }, { "epoch": 1.2753897024090695, "eval_dpo_loss": 0.6979546546936035, "eval_logits": -1.8470289707183838, "eval_logps": -92.79419708251953, "eval_loss": 0.7008146047592163, "eval_objective": 0.6979546546936035, "eval_ranking_idealized": 0.6030020713806152, "eval_ranking_idealized_expo": 0.5222567319869995, "eval_ranking_simple": 0.5621117949485779, "eval_regularize": 0.6979546546936035, "eval_runtime": 308.7789, "eval_samples_per_second": 18.751, "eval_steps_per_second": 1.564, "eval_wo_beta": 8.258440017700195, "step": 450 }, { "dpo_loss": 0.29814377427101135, "epoch": 1.4170996693434104, "grad_norm": 15.647218082922008, "learning_rate": 4.501353102310901e-06, "logits": -1.9365119934082031, "logps": -90.89854431152344, "loss": 0.3065, "objective": 0.29814377427101135, "ranking_idealized": 0.57833331823349, "ranking_idealized_expo": 0.4983333349227905, "ranking_simple": 0.7991666793823242, "regularize": 0.29814377427101135, "step": 500, "wo_beta": 3.127906560897827 }, { "epoch": 1.4170996693434104, "eval_dpo_loss": 0.7147387266159058, "eval_logits": -2.006500005722046, "eval_logps": -96.67472076416016, "eval_loss": 0.7179672122001648, "eval_objective": 0.7147387266159058, "eval_ranking_idealized": 0.6030020713806152, "eval_ranking_idealized_expo": 0.5222567319869995, "eval_ranking_simple": 0.555383026599884, "eval_regularize": 0.7147387266159058, "eval_runtime": 309.3319, "eval_samples_per_second": 18.718, "eval_steps_per_second": 1.561, "eval_wo_beta": 8.252218246459961, "step": 500 }, { "dpo_loss": 0.2861484885215759, "epoch": 1.5588096362777515, "grad_norm": 10.885590323378537, "learning_rate": 4.34319334202531e-06, "logits": -1.9361701011657715, "logps": -95.84064483642578, "loss": 0.2895, "objective": 0.2861484885215759, "ranking_idealized": 0.5945833325386047, "ranking_idealized_expo": 0.5116666555404663, "ranking_simple": 0.8066666722297668, "regularize": 0.2861484885215759, "step": 550, "wo_beta": 2.692445993423462 }, { "epoch": 1.5588096362777515, "eval_dpo_loss": 0.7076632976531982, "eval_logits": -1.887025237083435, "eval_logps": -104.24694061279297, "eval_loss": 0.7044315338134766, "eval_objective": 0.7076632976531982, "eval_ranking_idealized": 0.6030020713806152, "eval_ranking_idealized_expo": 0.5222567319869995, "eval_ranking_simple": 0.5652173757553101, "eval_regularize": 0.7076632976531982, "eval_runtime": 309.8676, "eval_samples_per_second": 18.685, "eval_steps_per_second": 1.559, "eval_wo_beta": 8.194681167602539, "step": 550 }, { "epoch": 1.5588096362777515, "step": 550, "total_flos": 0.0, "train_loss": 0.4484944924441251, "train_runtime": 15200.6621, "train_samples_per_second": 16.71, "train_steps_per_second": 0.116 } ], "logging_steps": 50, "max_steps": 1760, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }