qwen2.5-0.5b-expo-DPO-ES-10 / trainer_state.json
hZzy's picture
Model save
04e1e01 verified
{
"best_metric": 6.651296138763428,
"best_model_checkpoint": "./qwen2.5-0.5b/qwen2.5-0.5b-expo-DPO-ES-10/checkpoint-550",
"epoch": 2.2673594709494567,
"eval_steps": 50,
"global_step": 800,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"dpo_loss": 0.6931471824645996,
"epoch": 0.002834199338686821,
"grad_norm": 1844.2532039401294,
"learning_rate": 2.840909090909091e-08,
"logits": -1.359458565711975,
"logps": -84.69721221923828,
"loss": 0.6931,
"objective": 0.6931471824645996,
"ranking_idealized": 0.5833333134651184,
"ranking_idealized_expo": 0.5833333134651184,
"ranking_simple": 0.5833333134651184,
"regularize": 0.6931471824645996,
"step": 1,
"wo_beta": 5.271125316619873
},
{
"dpo_loss": 1.9794068336486816,
"epoch": 0.14170996693434104,
"grad_norm": 1879.680280823908,
"learning_rate": 1.4204545454545458e-06,
"logits": -1.4480701684951782,
"logps": -84.59326934814453,
"loss": 2.0094,
"objective": 1.9794068336486816,
"ranking_idealized": 0.5225340127944946,
"ranking_idealized_expo": 0.5216836929321289,
"ranking_simple": 0.5250850319862366,
"regularize": 1.9794068336486816,
"step": 50,
"wo_beta": 7.08821439743042
},
{
"epoch": 0.14170996693434104,
"eval_dpo_loss": 3.0980334281921387,
"eval_logits": -1.4591896533966064,
"eval_logps": -90.62417602539062,
"eval_loss": 3.106841564178467,
"eval_objective": 3.0980334281921387,
"eval_ranking_idealized": 0.5212215185165405,
"eval_ranking_idealized_expo": 0.5212215185165405,
"eval_ranking_simple": 0.5258799195289612,
"eval_regularize": 3.0980334281921387,
"eval_runtime": 308.1993,
"eval_samples_per_second": 18.787,
"eval_steps_per_second": 1.567,
"eval_wo_beta": 7.7179274559021,
"step": 50
},
{
"dpo_loss": 6.002392768859863,
"epoch": 0.2834199338686821,
"grad_norm": 1776.9003571892035,
"learning_rate": 2.8409090909090916e-06,
"logits": -1.4501550197601318,
"logps": -81.94203186035156,
"loss": 5.9165,
"objective": 6.002392768859863,
"ranking_idealized": 0.5141666531562805,
"ranking_idealized_expo": 0.5137500166893005,
"ranking_simple": 0.5425000190734863,
"regularize": 6.002392768859863,
"step": 100,
"wo_beta": 6.498049736022949
},
{
"epoch": 0.2834199338686821,
"eval_dpo_loss": 7.139862060546875,
"eval_logits": -1.4642183780670166,
"eval_logps": -82.83346557617188,
"eval_loss": 7.14874792098999,
"eval_objective": 7.139862060546875,
"eval_ranking_idealized": 0.5212215185165405,
"eval_ranking_idealized_expo": 0.5212215185165405,
"eval_ranking_simple": 0.5300207138061523,
"eval_regularize": 7.139862060546875,
"eval_runtime": 309.8222,
"eval_samples_per_second": 18.688,
"eval_steps_per_second": 1.559,
"eval_wo_beta": 7.4498443603515625,
"step": 100
},
{
"dpo_loss": 10.150534629821777,
"epoch": 0.42512990080302315,
"grad_norm": 1372.4824531102197,
"learning_rate": 4.2613636363636365e-06,
"logits": -1.4184441566467285,
"logps": -73.7444076538086,
"loss": 9.9617,
"objective": 10.150534629821777,
"ranking_idealized": 0.5287500023841858,
"ranking_idealized_expo": 0.527916669845581,
"ranking_simple": 0.5641666650772095,
"regularize": 10.150534629821777,
"step": 150,
"wo_beta": 6.36607027053833
},
{
"epoch": 0.42512990080302315,
"eval_dpo_loss": 11.353630065917969,
"eval_logits": -1.3437175750732422,
"eval_logps": -83.07452392578125,
"eval_loss": 11.899770736694336,
"eval_objective": 11.353630065917969,
"eval_ranking_idealized": 0.5212215185165405,
"eval_ranking_idealized_expo": 0.5212215185165405,
"eval_ranking_simple": 0.5305383205413818,
"eval_regularize": 11.353630065917969,
"eval_runtime": 307.9181,
"eval_samples_per_second": 18.804,
"eval_steps_per_second": 1.569,
"eval_wo_beta": 7.260918140411377,
"step": 150
},
{
"dpo_loss": 12.522791862487793,
"epoch": 0.5668398677373642,
"grad_norm": 1378.4122138720427,
"learning_rate": 4.997168347957521e-06,
"logits": -1.3766180276870728,
"logps": -77.5620346069336,
"loss": 12.4724,
"objective": 12.522791862487793,
"ranking_idealized": 0.51583331823349,
"ranking_idealized_expo": 0.51541668176651,
"ranking_simple": 0.5550000071525574,
"regularize": 12.522791862487793,
"step": 200,
"wo_beta": 6.352013111114502
},
{
"epoch": 0.5668398677373642,
"eval_dpo_loss": 16.061721801757812,
"eval_logits": -1.387966513633728,
"eval_logps": -79.93595886230469,
"eval_loss": 17.09868049621582,
"eval_objective": 16.061721801757812,
"eval_ranking_idealized": 0.5212215185165405,
"eval_ranking_idealized_expo": 0.5212215185165405,
"eval_ranking_simple": 0.5300207138061523,
"eval_regularize": 16.061721801757812,
"eval_runtime": 307.8039,
"eval_samples_per_second": 18.811,
"eval_steps_per_second": 1.569,
"eval_wo_beta": 7.228997230529785,
"step": 200
},
{
"dpo_loss": 13.093570709228516,
"epoch": 0.7085498346717053,
"grad_norm": 1274.583157442186,
"learning_rate": 4.973122855144066e-06,
"logits": -1.294631004333496,
"logps": -76.13822174072266,
"loss": 13.2936,
"objective": 13.093570709228516,
"ranking_idealized": 0.5166666507720947,
"ranking_idealized_expo": 0.5162500143051147,
"ranking_simple": 0.5824999809265137,
"regularize": 13.093570709228516,
"step": 250,
"wo_beta": 6.279551982879639
},
{
"epoch": 0.7085498346717053,
"eval_dpo_loss": 17.797138214111328,
"eval_logits": -1.3640648126602173,
"eval_logps": -77.31498718261719,
"eval_loss": 18.5308780670166,
"eval_objective": 17.797138214111328,
"eval_ranking_idealized": 0.5212215185165405,
"eval_ranking_idealized_expo": 0.5212215185165405,
"eval_ranking_simple": 0.5341615080833435,
"eval_regularize": 17.797138214111328,
"eval_runtime": 313.1016,
"eval_samples_per_second": 18.492,
"eval_steps_per_second": 1.543,
"eval_wo_beta": 7.207766532897949,
"step": 250
},
{
"dpo_loss": 10.712362289428711,
"epoch": 0.8502598016060463,
"grad_norm": 982.3462926804266,
"learning_rate": 4.924776641419513e-06,
"logits": -1.090299129486084,
"logps": -70.98873138427734,
"loss": 11.5204,
"objective": 10.712362289428711,
"ranking_idealized": 0.4962500035762787,
"ranking_idealized_expo": 0.4950000047683716,
"ranking_simple": 0.5679166913032532,
"regularize": 10.712362289428711,
"step": 300,
"wo_beta": 6.134185314178467
},
{
"epoch": 0.8502598016060463,
"eval_dpo_loss": 18.701662063598633,
"eval_logits": -0.9941285848617554,
"eval_logps": -76.97978210449219,
"eval_loss": 19.434432983398438,
"eval_objective": 18.701662063598633,
"eval_ranking_idealized": 0.5212215185165405,
"eval_ranking_idealized_expo": 0.5212215185165405,
"eval_ranking_simple": 0.5357142686843872,
"eval_regularize": 18.701662063598633,
"eval_runtime": 307.6602,
"eval_samples_per_second": 18.819,
"eval_steps_per_second": 1.57,
"eval_wo_beta": 7.013552188873291,
"step": 300
},
{
"dpo_loss": 10.92597484588623,
"epoch": 0.9919697685403873,
"grad_norm": 938.8397527375307,
"learning_rate": 4.8526047530778175e-06,
"logits": -0.9006206393241882,
"logps": -72.82616424560547,
"loss": 11.3717,
"objective": 10.92597484588623,
"ranking_idealized": 0.5262500047683716,
"ranking_idealized_expo": 0.5254166722297668,
"ranking_simple": 0.6033333539962769,
"regularize": 10.92597484588623,
"step": 350,
"wo_beta": 5.362515449523926
},
{
"epoch": 0.9919697685403873,
"eval_dpo_loss": 19.039833068847656,
"eval_logits": -1.0426429510116577,
"eval_logps": -76.1622543334961,
"eval_loss": 20.39594841003418,
"eval_objective": 19.039833068847656,
"eval_ranking_idealized": 0.5212215185165405,
"eval_ranking_idealized_expo": 0.5212215185165405,
"eval_ranking_simple": 0.5408902764320374,
"eval_regularize": 19.039833068847656,
"eval_runtime": 307.6502,
"eval_samples_per_second": 18.82,
"eval_steps_per_second": 1.57,
"eval_wo_beta": 7.0260910987854,
"step": 350
},
{
"dpo_loss": 6.708657264709473,
"epoch": 1.1336797354747283,
"grad_norm": 902.5480798954853,
"learning_rate": 4.757316345716554e-06,
"logits": -0.7562137246131897,
"logps": -70.6362075805664,
"loss": 7.0971,
"objective": 6.708657264709473,
"ranking_idealized": 0.5333333611488342,
"ranking_idealized_expo": 0.5320833325386047,
"ranking_simple": 0.6329166889190674,
"regularize": 6.708657264709473,
"step": 400,
"wo_beta": 4.74643087387085
},
{
"epoch": 1.1336797354747283,
"eval_dpo_loss": 21.69021987915039,
"eval_logits": -0.6236207485198975,
"eval_logps": -76.14582824707031,
"eval_loss": 21.927854537963867,
"eval_objective": 21.69021987915039,
"eval_ranking_idealized": 0.5212215185165405,
"eval_ranking_idealized_expo": 0.5212215185165405,
"eval_ranking_simple": 0.5388198494911194,
"eval_regularize": 21.69021987915039,
"eval_runtime": 308.4451,
"eval_samples_per_second": 18.772,
"eval_steps_per_second": 1.566,
"eval_wo_beta": 7.122740745544434,
"step": 400
},
{
"dpo_loss": 8.22778606414795,
"epoch": 1.2753897024090695,
"grad_norm": 701.1530681925066,
"learning_rate": 4.639847716126855e-06,
"logits": -0.6124467849731445,
"logps": -71.35508728027344,
"loss": 7.5725,
"objective": 8.22778606414795,
"ranking_idealized": 0.5195833444595337,
"ranking_idealized_expo": 0.5191666483879089,
"ranking_simple": 0.6312500238418579,
"regularize": 8.22778606414795,
"step": 450,
"wo_beta": 5.267808437347412
},
{
"epoch": 1.2753897024090695,
"eval_dpo_loss": 20.385303497314453,
"eval_logits": -0.8352137207984924,
"eval_logps": -76.3924331665039,
"eval_loss": 20.948013305664062,
"eval_objective": 20.385303497314453,
"eval_ranking_idealized": 0.5212215185165405,
"eval_ranking_idealized_expo": 0.5212215185165405,
"eval_ranking_simple": 0.5372670888900757,
"eval_regularize": 20.385303497314453,
"eval_runtime": 307.7791,
"eval_samples_per_second": 18.812,
"eval_steps_per_second": 1.569,
"eval_wo_beta": 6.8499908447265625,
"step": 450
},
{
"dpo_loss": 7.319465160369873,
"epoch": 1.4170996693434104,
"grad_norm": 1027.4391137177338,
"learning_rate": 4.501353102310901e-06,
"logits": -0.7022644877433777,
"logps": -74.45861053466797,
"loss": 7.6466,
"objective": 7.319465160369873,
"ranking_idealized": 0.49791666865348816,
"ranking_idealized_expo": 0.4970833361148834,
"ranking_simple": 0.6225000023841858,
"regularize": 7.319465160369873,
"step": 500,
"wo_beta": 5.078485488891602
},
{
"epoch": 1.4170996693434104,
"eval_dpo_loss": 20.265100479125977,
"eval_logits": -0.7483307123184204,
"eval_logps": -80.78058624267578,
"eval_loss": 20.982105255126953,
"eval_objective": 20.265100479125977,
"eval_ranking_idealized": 0.5212215185165405,
"eval_ranking_idealized_expo": 0.5212215185165405,
"eval_ranking_simple": 0.532608687877655,
"eval_regularize": 20.265100479125977,
"eval_runtime": 307.6224,
"eval_samples_per_second": 18.822,
"eval_steps_per_second": 1.57,
"eval_wo_beta": 6.882425785064697,
"step": 500
},
{
"dpo_loss": 6.880460739135742,
"epoch": 1.5588096362777515,
"grad_norm": 856.3100755197052,
"learning_rate": 4.34319334202531e-06,
"logits": -0.6065574884414673,
"logps": -75.99507141113281,
"loss": 6.9565,
"objective": 6.880460739135742,
"ranking_idealized": 0.5112500190734863,
"ranking_idealized_expo": 0.5104166865348816,
"ranking_simple": 0.6445833444595337,
"regularize": 6.880460739135742,
"step": 550,
"wo_beta": 4.776731967926025
},
{
"epoch": 1.5588096362777515,
"eval_dpo_loss": 20.566144943237305,
"eval_logits": -0.6148493885993958,
"eval_logps": -80.20514678955078,
"eval_loss": 21.350601196289062,
"eval_objective": 20.566144943237305,
"eval_ranking_idealized": 0.5212215185165405,
"eval_ranking_idealized_expo": 0.5212215185165405,
"eval_ranking_simple": 0.5383023023605347,
"eval_regularize": 20.566144943237305,
"eval_runtime": 311.6281,
"eval_samples_per_second": 18.58,
"eval_steps_per_second": 1.55,
"eval_wo_beta": 6.651296138763428,
"step": 550
},
{
"dpo_loss": 6.715544700622559,
"epoch": 1.7005196032120926,
"grad_norm": 710.120603889053,
"learning_rate": 4.16692250129073e-06,
"logits": -0.4755525290966034,
"logps": -75.72002410888672,
"loss": 6.7183,
"objective": 6.715544700622559,
"ranking_idealized": 0.51541668176651,
"ranking_idealized_expo": 0.5149999856948853,
"ranking_simple": 0.637499988079071,
"regularize": 6.715544700622559,
"step": 600,
"wo_beta": 4.843540668487549
},
{
"epoch": 1.7005196032120926,
"eval_dpo_loss": 20.002656936645508,
"eval_logits": -0.606741189956665,
"eval_logps": -78.53438568115234,
"eval_loss": 21.126510620117188,
"eval_objective": 20.002656936645508,
"eval_ranking_idealized": 0.5212215185165405,
"eval_ranking_idealized_expo": 0.5212215185165405,
"eval_ranking_simple": 0.5367494821548462,
"eval_regularize": 20.002656936645508,
"eval_runtime": 307.6088,
"eval_samples_per_second": 18.823,
"eval_steps_per_second": 1.57,
"eval_wo_beta": 6.676760673522949,
"step": 600
},
{
"dpo_loss": 7.323308944702148,
"epoch": 1.8422295701464337,
"grad_norm": 790.4876193704064,
"learning_rate": 3.974272604254906e-06,
"logits": -0.5003318190574646,
"logps": -74.34846496582031,
"loss": 6.9931,
"objective": 7.323308944702148,
"ranking_idealized": 0.5291666388511658,
"ranking_idealized_expo": 0.527916669845581,
"ranking_simple": 0.64083331823349,
"regularize": 7.323308944702148,
"step": 650,
"wo_beta": 5.1112799644470215
},
{
"epoch": 1.8422295701464337,
"eval_dpo_loss": 21.445514678955078,
"eval_logits": -0.5872498154640198,
"eval_logps": -77.65087127685547,
"eval_loss": 22.20830535888672,
"eval_objective": 21.445514678955078,
"eval_ranking_idealized": 0.5212215185165405,
"eval_ranking_idealized_expo": 0.5212215185165405,
"eval_ranking_simple": 0.5383023023605347,
"eval_regularize": 21.445514678955078,
"eval_runtime": 307.7733,
"eval_samples_per_second": 18.813,
"eval_steps_per_second": 1.569,
"eval_wo_beta": 6.819047451019287,
"step": 650
},
{
"dpo_loss": 6.620248317718506,
"epoch": 1.9839395370807746,
"grad_norm": 688.8652687295252,
"learning_rate": 3.767136614452458e-06,
"logits": -0.40135031938552856,
"logps": -73.09497833251953,
"loss": 6.1685,
"objective": 6.620248317718506,
"ranking_idealized": 0.5129166841506958,
"ranking_idealized_expo": 0.5108333230018616,
"ranking_simple": 0.6358333230018616,
"regularize": 6.620248317718506,
"step": 700,
"wo_beta": 5.023129463195801
},
{
"epoch": 1.9839395370807746,
"eval_dpo_loss": 21.55119514465332,
"eval_logits": -0.5436362028121948,
"eval_logps": -77.14934539794922,
"eval_loss": 22.36069679260254,
"eval_objective": 21.55119514465332,
"eval_ranking_idealized": 0.5212215185165405,
"eval_ranking_idealized_expo": 0.5212215185165405,
"eval_ranking_simple": 0.5403726696968079,
"eval_regularize": 21.55119514465332,
"eval_runtime": 310.6834,
"eval_samples_per_second": 18.636,
"eval_steps_per_second": 1.555,
"eval_wo_beta": 6.729911804199219,
"step": 700
},
{
"dpo_loss": 3.5603878498077393,
"epoch": 2.1256495040151155,
"grad_norm": 579.769858214478,
"learning_rate": 3.547549834686222e-06,
"logits": -0.5370141863822937,
"logps": -73.9045639038086,
"loss": 3.4811,
"objective": 3.5603878498077393,
"ranking_idealized": 0.5129166841506958,
"ranking_idealized_expo": 0.5112500190734863,
"ranking_simple": 0.6691666841506958,
"regularize": 3.5603878498077393,
"step": 750,
"wo_beta": 4.534417152404785
},
{
"epoch": 2.1256495040151155,
"eval_dpo_loss": 21.137874603271484,
"eval_logits": -0.7312601804733276,
"eval_logps": -78.93118286132812,
"eval_loss": 21.834890365600586,
"eval_objective": 21.137874603271484,
"eval_ranking_idealized": 0.5212215185165405,
"eval_ranking_idealized_expo": 0.5212215185165405,
"eval_ranking_simple": 0.542443037033081,
"eval_regularize": 21.137874603271484,
"eval_runtime": 307.6064,
"eval_samples_per_second": 18.823,
"eval_steps_per_second": 1.57,
"eval_wo_beta": 6.821295261383057,
"step": 750
},
{
"dpo_loss": 3.4036636352539062,
"epoch": 2.2673594709494567,
"grad_norm": 573.1633009551587,
"learning_rate": 3.3176699082935546e-06,
"logits": -0.5852146148681641,
"logps": -75.8536376953125,
"loss": 3.3995,
"objective": 3.4036636352539062,
"ranking_idealized": 0.512499988079071,
"ranking_idealized_expo": 0.512499988079071,
"ranking_simple": 0.6625000238418579,
"regularize": 3.4036636352539062,
"step": 800,
"wo_beta": 4.245257377624512
},
{
"epoch": 2.2673594709494567,
"eval_dpo_loss": 20.453168869018555,
"eval_logits": -0.5475257635116577,
"eval_logps": -79.71145629882812,
"eval_loss": 21.353944778442383,
"eval_objective": 20.453168869018555,
"eval_ranking_idealized": 0.5212215185165405,
"eval_ranking_idealized_expo": 0.5212215185165405,
"eval_ranking_simple": 0.5362318754196167,
"eval_regularize": 20.453168869018555,
"eval_runtime": 307.5814,
"eval_samples_per_second": 18.824,
"eval_steps_per_second": 1.57,
"eval_wo_beta": 6.686735153198242,
"step": 800
},
{
"epoch": 2.2673594709494567,
"step": 800,
"total_flos": 0.0,
"train_loss": 7.659533626437187,
"train_runtime": 21973.9049,
"train_samples_per_second": 11.56,
"train_steps_per_second": 0.08
}
],
"logging_steps": 50,
"max_steps": 1760,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 50,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 5,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}