|
{ |
|
"best_metric": 7.426205635070801, |
|
"best_model_checkpoint": "./qwen2.5-0.5b/qwen2.5-0.5b-expo-DPO-ES-0.1/checkpoint-300", |
|
"epoch": 1.5588096362777515, |
|
"eval_steps": 50, |
|
"global_step": 550, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"dpo_loss": 0.6931471824645996, |
|
"epoch": 0.002834199338686821, |
|
"grad_norm": 18.442536934850562, |
|
"learning_rate": 2.840909090909091e-08, |
|
"logits": -1.359458565711975, |
|
"logps": -84.69721221923828, |
|
"loss": 0.6931, |
|
"objective": 0.6931471824645996, |
|
"ranking_idealized": 0.6458333134651184, |
|
"ranking_idealized_expo": 0.5833333134651184, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.6931471824645996, |
|
"step": 1, |
|
"wo_beta": 5.271125316619873 |
|
}, |
|
{ |
|
"dpo_loss": 0.6822353601455688, |
|
"epoch": 0.14170996693434104, |
|
"grad_norm": 18.641661833444882, |
|
"learning_rate": 1.4204545454545458e-06, |
|
"logits": -1.5086464881896973, |
|
"logps": -82.8218765258789, |
|
"loss": 0.6785, |
|
"objective": 0.6822353601455688, |
|
"ranking_idealized": 0.608418345451355, |
|
"ranking_idealized_expo": 0.5229591727256775, |
|
"ranking_simple": 0.5267857313156128, |
|
"regularize": 0.6822353601455688, |
|
"step": 50, |
|
"wo_beta": 7.122647285461426 |
|
}, |
|
{ |
|
"epoch": 0.14170996693434104, |
|
"eval_dpo_loss": 0.6842505931854248, |
|
"eval_logits": -1.6022353172302246, |
|
"eval_logps": -90.87158203125, |
|
"eval_loss": 0.6813499927520752, |
|
"eval_objective": 0.6842505931854248, |
|
"eval_ranking_idealized": 0.6030020713806152, |
|
"eval_ranking_idealized_expo": 0.5222567319869995, |
|
"eval_ranking_simple": 0.5258799195289612, |
|
"eval_regularize": 0.6842505931854248, |
|
"eval_runtime": 309.4614, |
|
"eval_samples_per_second": 18.71, |
|
"eval_steps_per_second": 1.561, |
|
"eval_wo_beta": 7.874892711639404, |
|
"step": 50 |
|
}, |
|
{ |
|
"dpo_loss": 0.6103044152259827, |
|
"epoch": 0.2834199338686821, |
|
"grad_norm": 19.292131977363915, |
|
"learning_rate": 2.8409090909090916e-06, |
|
"logits": -1.696779727935791, |
|
"logps": -88.03015899658203, |
|
"loss": 0.618, |
|
"objective": 0.6103044152259827, |
|
"ranking_idealized": 0.6016666889190674, |
|
"ranking_idealized_expo": 0.5141666531562805, |
|
"ranking_simple": 0.5729166865348816, |
|
"regularize": 0.6103044152259827, |
|
"step": 100, |
|
"wo_beta": 6.357714653015137 |
|
}, |
|
{ |
|
"epoch": 0.2834199338686821, |
|
"eval_dpo_loss": 0.6765866279602051, |
|
"eval_logits": -1.7799253463745117, |
|
"eval_logps": -98.88992309570312, |
|
"eval_loss": 0.6732903122901917, |
|
"eval_objective": 0.6765866279602051, |
|
"eval_ranking_idealized": 0.6030020713806152, |
|
"eval_ranking_idealized_expo": 0.5222567319869995, |
|
"eval_ranking_simple": 0.5398550629615784, |
|
"eval_regularize": 0.6765866279602051, |
|
"eval_runtime": 308.813, |
|
"eval_samples_per_second": 18.749, |
|
"eval_steps_per_second": 1.564, |
|
"eval_wo_beta": 7.784023284912109, |
|
"step": 100 |
|
}, |
|
{ |
|
"dpo_loss": 0.5696373581886292, |
|
"epoch": 0.42512990080302315, |
|
"grad_norm": 18.14389066821128, |
|
"learning_rate": 4.2613636363636365e-06, |
|
"logits": -1.744734287261963, |
|
"logps": -90.7203140258789, |
|
"loss": 0.5667, |
|
"objective": 0.5696373581886292, |
|
"ranking_idealized": 0.6066666841506958, |
|
"ranking_idealized_expo": 0.5287500023841858, |
|
"ranking_simple": 0.6016666889190674, |
|
"regularize": 0.5696373581886292, |
|
"step": 150, |
|
"wo_beta": 5.971276760101318 |
|
}, |
|
{ |
|
"epoch": 0.42512990080302315, |
|
"eval_dpo_loss": 0.6829108595848083, |
|
"eval_logits": -1.8072086572647095, |
|
"eval_logps": -99.12300109863281, |
|
"eval_loss": 0.6866306066513062, |
|
"eval_objective": 0.6829108595848083, |
|
"eval_ranking_idealized": 0.6030020713806152, |
|
"eval_ranking_idealized_expo": 0.5222567319869995, |
|
"eval_ranking_simple": 0.5408902764320374, |
|
"eval_regularize": 0.6829108595848083, |
|
"eval_runtime": 312.2547, |
|
"eval_samples_per_second": 18.543, |
|
"eval_steps_per_second": 1.547, |
|
"eval_wo_beta": 7.8532514572143555, |
|
"step": 150 |
|
}, |
|
{ |
|
"dpo_loss": 0.5278292298316956, |
|
"epoch": 0.5668398677373642, |
|
"grad_norm": 15.350860718764396, |
|
"learning_rate": 4.997168347957521e-06, |
|
"logits": -1.908250093460083, |
|
"logps": -92.51087951660156, |
|
"loss": 0.5214, |
|
"objective": 0.5278292298316956, |
|
"ranking_idealized": 0.5924999713897705, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.6362500190734863, |
|
"regularize": 0.5278292298316956, |
|
"step": 200, |
|
"wo_beta": 5.543264389038086 |
|
}, |
|
{ |
|
"epoch": 0.5668398677373642, |
|
"eval_dpo_loss": 0.6904094815254211, |
|
"eval_logits": -1.8893996477127075, |
|
"eval_logps": -99.53878784179688, |
|
"eval_loss": 0.6900797486305237, |
|
"eval_objective": 0.6904094815254211, |
|
"eval_ranking_idealized": 0.6030020713806152, |
|
"eval_ranking_idealized_expo": 0.5222567319869995, |
|
"eval_ranking_simple": 0.544513463973999, |
|
"eval_regularize": 0.6904094815254211, |
|
"eval_runtime": 315.3971, |
|
"eval_samples_per_second": 18.358, |
|
"eval_steps_per_second": 1.531, |
|
"eval_wo_beta": 7.699478626251221, |
|
"step": 200 |
|
}, |
|
{ |
|
"dpo_loss": 0.49355897307395935, |
|
"epoch": 0.7085498346717053, |
|
"grad_norm": 14.313286499637714, |
|
"learning_rate": 4.973122855144066e-06, |
|
"logits": -1.9076462984085083, |
|
"logps": -88.3504867553711, |
|
"loss": 0.4922, |
|
"objective": 0.49355897307395935, |
|
"ranking_idealized": 0.5991666913032532, |
|
"ranking_idealized_expo": 0.5170833468437195, |
|
"ranking_simple": 0.6608333587646484, |
|
"regularize": 0.49355897307395935, |
|
"step": 250, |
|
"wo_beta": 5.325013637542725 |
|
}, |
|
{ |
|
"epoch": 0.7085498346717053, |
|
"eval_dpo_loss": 0.6915506720542908, |
|
"eval_logits": -1.9879554510116577, |
|
"eval_logps": -82.79730224609375, |
|
"eval_loss": 0.6976116895675659, |
|
"eval_objective": 0.6915506720542908, |
|
"eval_ranking_idealized": 0.6030020713806152, |
|
"eval_ranking_idealized_expo": 0.5222567319869995, |
|
"eval_ranking_simple": 0.5476190447807312, |
|
"eval_regularize": 0.6915506720542908, |
|
"eval_runtime": 308.7255, |
|
"eval_samples_per_second": 18.755, |
|
"eval_steps_per_second": 1.564, |
|
"eval_wo_beta": 7.87898588180542, |
|
"step": 250 |
|
}, |
|
{ |
|
"dpo_loss": 0.4521573483943939, |
|
"epoch": 0.8502598016060463, |
|
"grad_norm": 13.44014162581437, |
|
"learning_rate": 4.924776641419513e-06, |
|
"logits": -2.09318470954895, |
|
"logps": -81.41643524169922, |
|
"loss": 0.4535, |
|
"objective": 0.4521573483943939, |
|
"ranking_idealized": 0.5799999833106995, |
|
"ranking_idealized_expo": 0.4970833361148834, |
|
"ranking_simple": 0.6837499737739563, |
|
"regularize": 0.4521573483943939, |
|
"step": 300, |
|
"wo_beta": 5.026339054107666 |
|
}, |
|
{ |
|
"epoch": 0.8502598016060463, |
|
"eval_dpo_loss": 0.6892624497413635, |
|
"eval_logits": -2.120311975479126, |
|
"eval_logps": -91.03094482421875, |
|
"eval_loss": 0.6920701265335083, |
|
"eval_objective": 0.6892624497413635, |
|
"eval_ranking_idealized": 0.6030020713806152, |
|
"eval_ranking_idealized_expo": 0.5222567319869995, |
|
"eval_ranking_simple": 0.5559006333351135, |
|
"eval_regularize": 0.6892624497413635, |
|
"eval_runtime": 308.4714, |
|
"eval_samples_per_second": 18.77, |
|
"eval_steps_per_second": 1.566, |
|
"eval_wo_beta": 7.426205635070801, |
|
"step": 300 |
|
}, |
|
{ |
|
"dpo_loss": 0.40864402055740356, |
|
"epoch": 0.9919697685403873, |
|
"grad_norm": 15.102301026818012, |
|
"learning_rate": 4.8526047530778175e-06, |
|
"logits": -1.9232014417648315, |
|
"logps": -84.39765930175781, |
|
"loss": 0.423, |
|
"objective": 0.40864402055740356, |
|
"ranking_idealized": 0.60916668176651, |
|
"ranking_idealized_expo": 0.5270833373069763, |
|
"ranking_simple": 0.7174999713897705, |
|
"regularize": 0.40864402055740356, |
|
"step": 350, |
|
"wo_beta": 3.9979019165039062 |
|
}, |
|
{ |
|
"epoch": 0.9919697685403873, |
|
"eval_dpo_loss": 0.6959461569786072, |
|
"eval_logits": -1.987973690032959, |
|
"eval_logps": -88.1614990234375, |
|
"eval_loss": 0.7056758403778076, |
|
"eval_objective": 0.6959461569786072, |
|
"eval_ranking_idealized": 0.6030020713806152, |
|
"eval_ranking_idealized_expo": 0.5222567319869995, |
|
"eval_ranking_simple": 0.5548654198646545, |
|
"eval_regularize": 0.6959461569786072, |
|
"eval_runtime": 309.9548, |
|
"eval_samples_per_second": 18.68, |
|
"eval_steps_per_second": 1.558, |
|
"eval_wo_beta": 7.997907638549805, |
|
"step": 350 |
|
}, |
|
{ |
|
"dpo_loss": 0.28186026215553284, |
|
"epoch": 1.1336797354747283, |
|
"grad_norm": 12.122355016988976, |
|
"learning_rate": 4.757316345716554e-06, |
|
"logits": -2.0241637229919434, |
|
"logps": -91.86678314208984, |
|
"loss": 0.2847, |
|
"objective": 0.28186026215553284, |
|
"ranking_idealized": 0.6087499856948853, |
|
"ranking_idealized_expo": 0.5337499976158142, |
|
"ranking_simple": 0.8145833611488342, |
|
"regularize": 0.28186026215553284, |
|
"step": 400, |
|
"wo_beta": 2.646965980529785 |
|
}, |
|
{ |
|
"epoch": 1.1336797354747283, |
|
"eval_dpo_loss": 0.7281294465065002, |
|
"eval_logits": -2.0862255096435547, |
|
"eval_logps": -101.69258117675781, |
|
"eval_loss": 0.7314654588699341, |
|
"eval_objective": 0.7281294465065002, |
|
"eval_ranking_idealized": 0.6030020713806152, |
|
"eval_ranking_idealized_expo": 0.5222567319869995, |
|
"eval_ranking_simple": 0.542443037033081, |
|
"eval_regularize": 0.7281294465065002, |
|
"eval_runtime": 308.7063, |
|
"eval_samples_per_second": 18.756, |
|
"eval_steps_per_second": 1.565, |
|
"eval_wo_beta": 8.632596969604492, |
|
"step": 400 |
|
}, |
|
{ |
|
"dpo_loss": 0.30417078733444214, |
|
"epoch": 1.2753897024090695, |
|
"grad_norm": 12.255120909837279, |
|
"learning_rate": 4.639847716126855e-06, |
|
"logits": -1.9048844575881958, |
|
"logps": -92.96432495117188, |
|
"loss": 0.2991, |
|
"objective": 0.30417078733444214, |
|
"ranking_idealized": 0.5975000262260437, |
|
"ranking_idealized_expo": 0.5199999809265137, |
|
"ranking_simple": 0.8087499737739563, |
|
"regularize": 0.30417078733444214, |
|
"step": 450, |
|
"wo_beta": 3.335141658782959 |
|
}, |
|
{ |
|
"epoch": 1.2753897024090695, |
|
"eval_dpo_loss": 0.6979546546936035, |
|
"eval_logits": -1.8470289707183838, |
|
"eval_logps": -92.79419708251953, |
|
"eval_loss": 0.7008146047592163, |
|
"eval_objective": 0.6979546546936035, |
|
"eval_ranking_idealized": 0.6030020713806152, |
|
"eval_ranking_idealized_expo": 0.5222567319869995, |
|
"eval_ranking_simple": 0.5621117949485779, |
|
"eval_regularize": 0.6979546546936035, |
|
"eval_runtime": 308.7789, |
|
"eval_samples_per_second": 18.751, |
|
"eval_steps_per_second": 1.564, |
|
"eval_wo_beta": 8.258440017700195, |
|
"step": 450 |
|
}, |
|
{ |
|
"dpo_loss": 0.29814377427101135, |
|
"epoch": 1.4170996693434104, |
|
"grad_norm": 15.647218082922008, |
|
"learning_rate": 4.501353102310901e-06, |
|
"logits": -1.9365119934082031, |
|
"logps": -90.89854431152344, |
|
"loss": 0.3065, |
|
"objective": 0.29814377427101135, |
|
"ranking_idealized": 0.57833331823349, |
|
"ranking_idealized_expo": 0.4983333349227905, |
|
"ranking_simple": 0.7991666793823242, |
|
"regularize": 0.29814377427101135, |
|
"step": 500, |
|
"wo_beta": 3.127906560897827 |
|
}, |
|
{ |
|
"epoch": 1.4170996693434104, |
|
"eval_dpo_loss": 0.7147387266159058, |
|
"eval_logits": -2.006500005722046, |
|
"eval_logps": -96.67472076416016, |
|
"eval_loss": 0.7179672122001648, |
|
"eval_objective": 0.7147387266159058, |
|
"eval_ranking_idealized": 0.6030020713806152, |
|
"eval_ranking_idealized_expo": 0.5222567319869995, |
|
"eval_ranking_simple": 0.555383026599884, |
|
"eval_regularize": 0.7147387266159058, |
|
"eval_runtime": 309.3319, |
|
"eval_samples_per_second": 18.718, |
|
"eval_steps_per_second": 1.561, |
|
"eval_wo_beta": 8.252218246459961, |
|
"step": 500 |
|
}, |
|
{ |
|
"dpo_loss": 0.2861484885215759, |
|
"epoch": 1.5588096362777515, |
|
"grad_norm": 10.885590323378537, |
|
"learning_rate": 4.34319334202531e-06, |
|
"logits": -1.9361701011657715, |
|
"logps": -95.84064483642578, |
|
"loss": 0.2895, |
|
"objective": 0.2861484885215759, |
|
"ranking_idealized": 0.5945833325386047, |
|
"ranking_idealized_expo": 0.5116666555404663, |
|
"ranking_simple": 0.8066666722297668, |
|
"regularize": 0.2861484885215759, |
|
"step": 550, |
|
"wo_beta": 2.692445993423462 |
|
}, |
|
{ |
|
"epoch": 1.5588096362777515, |
|
"eval_dpo_loss": 0.7076632976531982, |
|
"eval_logits": -1.887025237083435, |
|
"eval_logps": -104.24694061279297, |
|
"eval_loss": 0.7044315338134766, |
|
"eval_objective": 0.7076632976531982, |
|
"eval_ranking_idealized": 0.6030020713806152, |
|
"eval_ranking_idealized_expo": 0.5222567319869995, |
|
"eval_ranking_simple": 0.5652173757553101, |
|
"eval_regularize": 0.7076632976531982, |
|
"eval_runtime": 309.8676, |
|
"eval_samples_per_second": 18.685, |
|
"eval_steps_per_second": 1.559, |
|
"eval_wo_beta": 8.194681167602539, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.5588096362777515, |
|
"step": 550, |
|
"total_flos": 0.0, |
|
"train_loss": 0.4484944924441251, |
|
"train_runtime": 15200.6621, |
|
"train_samples_per_second": 16.71, |
|
"train_steps_per_second": 0.116 |
|
} |
|
], |
|
"logging_steps": 50, |
|
"max_steps": 1760, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|