qwen2.5-0.5b-expo-DPO-ES-0.1 / trainer_state.json
hZzy's picture
Model save
17e9f1f verified
{
"best_metric": 7.426205635070801,
"best_model_checkpoint": "./qwen2.5-0.5b/qwen2.5-0.5b-expo-DPO-ES-0.1/checkpoint-300",
"epoch": 1.5588096362777515,
"eval_steps": 50,
"global_step": 550,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"dpo_loss": 0.6931471824645996,
"epoch": 0.002834199338686821,
"grad_norm": 18.442536934850562,
"learning_rate": 2.840909090909091e-08,
"logits": -1.359458565711975,
"logps": -84.69721221923828,
"loss": 0.6931,
"objective": 0.6931471824645996,
"ranking_idealized": 0.6458333134651184,
"ranking_idealized_expo": 0.5833333134651184,
"ranking_simple": 0.5833333134651184,
"regularize": 0.6931471824645996,
"step": 1,
"wo_beta": 5.271125316619873
},
{
"dpo_loss": 0.6822353601455688,
"epoch": 0.14170996693434104,
"grad_norm": 18.641661833444882,
"learning_rate": 1.4204545454545458e-06,
"logits": -1.5086464881896973,
"logps": -82.8218765258789,
"loss": 0.6785,
"objective": 0.6822353601455688,
"ranking_idealized": 0.608418345451355,
"ranking_idealized_expo": 0.5229591727256775,
"ranking_simple": 0.5267857313156128,
"regularize": 0.6822353601455688,
"step": 50,
"wo_beta": 7.122647285461426
},
{
"epoch": 0.14170996693434104,
"eval_dpo_loss": 0.6842505931854248,
"eval_logits": -1.6022353172302246,
"eval_logps": -90.87158203125,
"eval_loss": 0.6813499927520752,
"eval_objective": 0.6842505931854248,
"eval_ranking_idealized": 0.6030020713806152,
"eval_ranking_idealized_expo": 0.5222567319869995,
"eval_ranking_simple": 0.5258799195289612,
"eval_regularize": 0.6842505931854248,
"eval_runtime": 309.4614,
"eval_samples_per_second": 18.71,
"eval_steps_per_second": 1.561,
"eval_wo_beta": 7.874892711639404,
"step": 50
},
{
"dpo_loss": 0.6103044152259827,
"epoch": 0.2834199338686821,
"grad_norm": 19.292131977363915,
"learning_rate": 2.8409090909090916e-06,
"logits": -1.696779727935791,
"logps": -88.03015899658203,
"loss": 0.618,
"objective": 0.6103044152259827,
"ranking_idealized": 0.6016666889190674,
"ranking_idealized_expo": 0.5141666531562805,
"ranking_simple": 0.5729166865348816,
"regularize": 0.6103044152259827,
"step": 100,
"wo_beta": 6.357714653015137
},
{
"epoch": 0.2834199338686821,
"eval_dpo_loss": 0.6765866279602051,
"eval_logits": -1.7799253463745117,
"eval_logps": -98.88992309570312,
"eval_loss": 0.6732903122901917,
"eval_objective": 0.6765866279602051,
"eval_ranking_idealized": 0.6030020713806152,
"eval_ranking_idealized_expo": 0.5222567319869995,
"eval_ranking_simple": 0.5398550629615784,
"eval_regularize": 0.6765866279602051,
"eval_runtime": 308.813,
"eval_samples_per_second": 18.749,
"eval_steps_per_second": 1.564,
"eval_wo_beta": 7.784023284912109,
"step": 100
},
{
"dpo_loss": 0.5696373581886292,
"epoch": 0.42512990080302315,
"grad_norm": 18.14389066821128,
"learning_rate": 4.2613636363636365e-06,
"logits": -1.744734287261963,
"logps": -90.7203140258789,
"loss": 0.5667,
"objective": 0.5696373581886292,
"ranking_idealized": 0.6066666841506958,
"ranking_idealized_expo": 0.5287500023841858,
"ranking_simple": 0.6016666889190674,
"regularize": 0.5696373581886292,
"step": 150,
"wo_beta": 5.971276760101318
},
{
"epoch": 0.42512990080302315,
"eval_dpo_loss": 0.6829108595848083,
"eval_logits": -1.8072086572647095,
"eval_logps": -99.12300109863281,
"eval_loss": 0.6866306066513062,
"eval_objective": 0.6829108595848083,
"eval_ranking_idealized": 0.6030020713806152,
"eval_ranking_idealized_expo": 0.5222567319869995,
"eval_ranking_simple": 0.5408902764320374,
"eval_regularize": 0.6829108595848083,
"eval_runtime": 312.2547,
"eval_samples_per_second": 18.543,
"eval_steps_per_second": 1.547,
"eval_wo_beta": 7.8532514572143555,
"step": 150
},
{
"dpo_loss": 0.5278292298316956,
"epoch": 0.5668398677373642,
"grad_norm": 15.350860718764396,
"learning_rate": 4.997168347957521e-06,
"logits": -1.908250093460083,
"logps": -92.51087951660156,
"loss": 0.5214,
"objective": 0.5278292298316956,
"ranking_idealized": 0.5924999713897705,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.6362500190734863,
"regularize": 0.5278292298316956,
"step": 200,
"wo_beta": 5.543264389038086
},
{
"epoch": 0.5668398677373642,
"eval_dpo_loss": 0.6904094815254211,
"eval_logits": -1.8893996477127075,
"eval_logps": -99.53878784179688,
"eval_loss": 0.6900797486305237,
"eval_objective": 0.6904094815254211,
"eval_ranking_idealized": 0.6030020713806152,
"eval_ranking_idealized_expo": 0.5222567319869995,
"eval_ranking_simple": 0.544513463973999,
"eval_regularize": 0.6904094815254211,
"eval_runtime": 315.3971,
"eval_samples_per_second": 18.358,
"eval_steps_per_second": 1.531,
"eval_wo_beta": 7.699478626251221,
"step": 200
},
{
"dpo_loss": 0.49355897307395935,
"epoch": 0.7085498346717053,
"grad_norm": 14.313286499637714,
"learning_rate": 4.973122855144066e-06,
"logits": -1.9076462984085083,
"logps": -88.3504867553711,
"loss": 0.4922,
"objective": 0.49355897307395935,
"ranking_idealized": 0.5991666913032532,
"ranking_idealized_expo": 0.5170833468437195,
"ranking_simple": 0.6608333587646484,
"regularize": 0.49355897307395935,
"step": 250,
"wo_beta": 5.325013637542725
},
{
"epoch": 0.7085498346717053,
"eval_dpo_loss": 0.6915506720542908,
"eval_logits": -1.9879554510116577,
"eval_logps": -82.79730224609375,
"eval_loss": 0.6976116895675659,
"eval_objective": 0.6915506720542908,
"eval_ranking_idealized": 0.6030020713806152,
"eval_ranking_idealized_expo": 0.5222567319869995,
"eval_ranking_simple": 0.5476190447807312,
"eval_regularize": 0.6915506720542908,
"eval_runtime": 308.7255,
"eval_samples_per_second": 18.755,
"eval_steps_per_second": 1.564,
"eval_wo_beta": 7.87898588180542,
"step": 250
},
{
"dpo_loss": 0.4521573483943939,
"epoch": 0.8502598016060463,
"grad_norm": 13.44014162581437,
"learning_rate": 4.924776641419513e-06,
"logits": -2.09318470954895,
"logps": -81.41643524169922,
"loss": 0.4535,
"objective": 0.4521573483943939,
"ranking_idealized": 0.5799999833106995,
"ranking_idealized_expo": 0.4970833361148834,
"ranking_simple": 0.6837499737739563,
"regularize": 0.4521573483943939,
"step": 300,
"wo_beta": 5.026339054107666
},
{
"epoch": 0.8502598016060463,
"eval_dpo_loss": 0.6892624497413635,
"eval_logits": -2.120311975479126,
"eval_logps": -91.03094482421875,
"eval_loss": 0.6920701265335083,
"eval_objective": 0.6892624497413635,
"eval_ranking_idealized": 0.6030020713806152,
"eval_ranking_idealized_expo": 0.5222567319869995,
"eval_ranking_simple": 0.5559006333351135,
"eval_regularize": 0.6892624497413635,
"eval_runtime": 308.4714,
"eval_samples_per_second": 18.77,
"eval_steps_per_second": 1.566,
"eval_wo_beta": 7.426205635070801,
"step": 300
},
{
"dpo_loss": 0.40864402055740356,
"epoch": 0.9919697685403873,
"grad_norm": 15.102301026818012,
"learning_rate": 4.8526047530778175e-06,
"logits": -1.9232014417648315,
"logps": -84.39765930175781,
"loss": 0.423,
"objective": 0.40864402055740356,
"ranking_idealized": 0.60916668176651,
"ranking_idealized_expo": 0.5270833373069763,
"ranking_simple": 0.7174999713897705,
"regularize": 0.40864402055740356,
"step": 350,
"wo_beta": 3.9979019165039062
},
{
"epoch": 0.9919697685403873,
"eval_dpo_loss": 0.6959461569786072,
"eval_logits": -1.987973690032959,
"eval_logps": -88.1614990234375,
"eval_loss": 0.7056758403778076,
"eval_objective": 0.6959461569786072,
"eval_ranking_idealized": 0.6030020713806152,
"eval_ranking_idealized_expo": 0.5222567319869995,
"eval_ranking_simple": 0.5548654198646545,
"eval_regularize": 0.6959461569786072,
"eval_runtime": 309.9548,
"eval_samples_per_second": 18.68,
"eval_steps_per_second": 1.558,
"eval_wo_beta": 7.997907638549805,
"step": 350
},
{
"dpo_loss": 0.28186026215553284,
"epoch": 1.1336797354747283,
"grad_norm": 12.122355016988976,
"learning_rate": 4.757316345716554e-06,
"logits": -2.0241637229919434,
"logps": -91.86678314208984,
"loss": 0.2847,
"objective": 0.28186026215553284,
"ranking_idealized": 0.6087499856948853,
"ranking_idealized_expo": 0.5337499976158142,
"ranking_simple": 0.8145833611488342,
"regularize": 0.28186026215553284,
"step": 400,
"wo_beta": 2.646965980529785
},
{
"epoch": 1.1336797354747283,
"eval_dpo_loss": 0.7281294465065002,
"eval_logits": -2.0862255096435547,
"eval_logps": -101.69258117675781,
"eval_loss": 0.7314654588699341,
"eval_objective": 0.7281294465065002,
"eval_ranking_idealized": 0.6030020713806152,
"eval_ranking_idealized_expo": 0.5222567319869995,
"eval_ranking_simple": 0.542443037033081,
"eval_regularize": 0.7281294465065002,
"eval_runtime": 308.7063,
"eval_samples_per_second": 18.756,
"eval_steps_per_second": 1.565,
"eval_wo_beta": 8.632596969604492,
"step": 400
},
{
"dpo_loss": 0.30417078733444214,
"epoch": 1.2753897024090695,
"grad_norm": 12.255120909837279,
"learning_rate": 4.639847716126855e-06,
"logits": -1.9048844575881958,
"logps": -92.96432495117188,
"loss": 0.2991,
"objective": 0.30417078733444214,
"ranking_idealized": 0.5975000262260437,
"ranking_idealized_expo": 0.5199999809265137,
"ranking_simple": 0.8087499737739563,
"regularize": 0.30417078733444214,
"step": 450,
"wo_beta": 3.335141658782959
},
{
"epoch": 1.2753897024090695,
"eval_dpo_loss": 0.6979546546936035,
"eval_logits": -1.8470289707183838,
"eval_logps": -92.79419708251953,
"eval_loss": 0.7008146047592163,
"eval_objective": 0.6979546546936035,
"eval_ranking_idealized": 0.6030020713806152,
"eval_ranking_idealized_expo": 0.5222567319869995,
"eval_ranking_simple": 0.5621117949485779,
"eval_regularize": 0.6979546546936035,
"eval_runtime": 308.7789,
"eval_samples_per_second": 18.751,
"eval_steps_per_second": 1.564,
"eval_wo_beta": 8.258440017700195,
"step": 450
},
{
"dpo_loss": 0.29814377427101135,
"epoch": 1.4170996693434104,
"grad_norm": 15.647218082922008,
"learning_rate": 4.501353102310901e-06,
"logits": -1.9365119934082031,
"logps": -90.89854431152344,
"loss": 0.3065,
"objective": 0.29814377427101135,
"ranking_idealized": 0.57833331823349,
"ranking_idealized_expo": 0.4983333349227905,
"ranking_simple": 0.7991666793823242,
"regularize": 0.29814377427101135,
"step": 500,
"wo_beta": 3.127906560897827
},
{
"epoch": 1.4170996693434104,
"eval_dpo_loss": 0.7147387266159058,
"eval_logits": -2.006500005722046,
"eval_logps": -96.67472076416016,
"eval_loss": 0.7179672122001648,
"eval_objective": 0.7147387266159058,
"eval_ranking_idealized": 0.6030020713806152,
"eval_ranking_idealized_expo": 0.5222567319869995,
"eval_ranking_simple": 0.555383026599884,
"eval_regularize": 0.7147387266159058,
"eval_runtime": 309.3319,
"eval_samples_per_second": 18.718,
"eval_steps_per_second": 1.561,
"eval_wo_beta": 8.252218246459961,
"step": 500
},
{
"dpo_loss": 0.2861484885215759,
"epoch": 1.5588096362777515,
"grad_norm": 10.885590323378537,
"learning_rate": 4.34319334202531e-06,
"logits": -1.9361701011657715,
"logps": -95.84064483642578,
"loss": 0.2895,
"objective": 0.2861484885215759,
"ranking_idealized": 0.5945833325386047,
"ranking_idealized_expo": 0.5116666555404663,
"ranking_simple": 0.8066666722297668,
"regularize": 0.2861484885215759,
"step": 550,
"wo_beta": 2.692445993423462
},
{
"epoch": 1.5588096362777515,
"eval_dpo_loss": 0.7076632976531982,
"eval_logits": -1.887025237083435,
"eval_logps": -104.24694061279297,
"eval_loss": 0.7044315338134766,
"eval_objective": 0.7076632976531982,
"eval_ranking_idealized": 0.6030020713806152,
"eval_ranking_idealized_expo": 0.5222567319869995,
"eval_ranking_simple": 0.5652173757553101,
"eval_regularize": 0.7076632976531982,
"eval_runtime": 309.8676,
"eval_samples_per_second": 18.685,
"eval_steps_per_second": 1.559,
"eval_wo_beta": 8.194681167602539,
"step": 550
},
{
"epoch": 1.5588096362777515,
"step": 550,
"total_flos": 0.0,
"train_loss": 0.4484944924441251,
"train_runtime": 15200.6621,
"train_samples_per_second": 16.71,
"train_steps_per_second": 0.116
}
],
"logging_steps": 50,
"max_steps": 1760,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 50,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 5,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}