qwen2.5-0.5b-expo-DPO-ES-TRY / trainer_state.json
hZzy's picture
Model save
9ff305a verified
raw
history blame
16 kB
{
"best_metric": 0.5734989643096924,
"best_model_checkpoint": "./qwen2.5-0.5b/qwen2.5-0.5b-expo-DPO-ES-TRY/checkpoint-300",
"epoch": 0.8502598016060463,
"eval_steps": 50,
"global_step": 600,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"dpo_loss": 0.6931471824645996,
"dpo_wo_beta": -0.6931471824645996,
"epoch": 0.0014170996693434106,
"grad_norm": 25.66138500619404,
"learning_rate": 2.358490566037736e-08,
"logits": -1.7146095037460327,
"logps": -79.01810455322266,
"loss": 0.6931,
"objective": 0.6931471824645996,
"ranking_idealized": 0.3333333432674408,
"ranking_idealized_expo": 0.3333333432674408,
"ranking_simple": 0.3333333432674408,
"regularize": 0.6931471824645996,
"step": 1
},
{
"dpo_loss": 0.6873584985733032,
"dpo_wo_beta": -0.8693115711212158,
"epoch": 0.07085498346717052,
"grad_norm": 25.93169363980804,
"learning_rate": 1.179245283018868e-06,
"logits": -1.890508770942688,
"logps": -82.390869140625,
"loss": 0.6857,
"objective": 0.6873584985733032,
"ranking_idealized": 0.6258503198623657,
"ranking_idealized_expo": 0.5408163070678711,
"ranking_simple": 0.5408163070678711,
"regularize": 0.6873584985733032,
"step": 50
},
{
"epoch": 0.07085498346717052,
"eval_dpo_loss": 0.6927401423454285,
"eval_dpo_wo_beta": -1.280670404434204,
"eval_logits": -1.960647463798523,
"eval_logps": -88.98406219482422,
"eval_loss": 0.6913570761680603,
"eval_objective": 0.6927401423454285,
"eval_ranking_idealized": 0.6045548915863037,
"eval_ranking_idealized_expo": 0.5279502868652344,
"eval_ranking_simple": 0.5362318754196167,
"eval_regularize": 0.6927401423454285,
"eval_runtime": 318.1168,
"eval_samples_per_second": 18.201,
"eval_steps_per_second": 1.518,
"step": 50
},
{
"dpo_loss": 0.6656978130340576,
"dpo_wo_beta": -1.517911434173584,
"epoch": 0.14170996693434104,
"grad_norm": 29.18536320021953,
"learning_rate": 2.358490566037736e-06,
"logits": -2.088792085647583,
"logps": -82.19146728515625,
"loss": 0.6524,
"objective": 0.6656978130340576,
"ranking_idealized": 0.5950000286102295,
"ranking_idealized_expo": 0.5149999856948853,
"ranking_simple": 0.5233333110809326,
"regularize": 0.6656978130340576,
"step": 100
},
{
"epoch": 0.14170996693434104,
"eval_dpo_loss": 0.7010491490364075,
"eval_dpo_wo_beta": -1.8911339044570923,
"eval_logits": -2.0578620433807373,
"eval_logps": -98.63578033447266,
"eval_loss": 0.6921781897544861,
"eval_objective": 0.7010491490364075,
"eval_ranking_idealized": 0.6045548915863037,
"eval_ranking_idealized_expo": 0.5279502868652344,
"eval_ranking_simple": 0.5269151329994202,
"eval_regularize": 0.7010491490364075,
"eval_runtime": 317.6312,
"eval_samples_per_second": 18.229,
"eval_steps_per_second": 1.521,
"step": 100
},
{
"dpo_loss": 0.6302288770675659,
"dpo_wo_beta": -1.9832934141159058,
"epoch": 0.21256495040151158,
"grad_norm": 31.92092339525771,
"learning_rate": 3.5377358490566038e-06,
"logits": -2.047361135482788,
"logps": -90.97752380371094,
"loss": 0.6123,
"objective": 0.6302288770675659,
"ranking_idealized": 0.6050000190734863,
"ranking_idealized_expo": 0.528333306312561,
"ranking_simple": 0.5799999833106995,
"regularize": 0.6302288770675659,
"step": 150
},
{
"epoch": 0.21256495040151158,
"eval_dpo_loss": 0.7015214562416077,
"eval_dpo_wo_beta": -2.1165764331817627,
"eval_logits": -1.9032589197158813,
"eval_logps": -102.8927001953125,
"eval_loss": 0.6967350840568542,
"eval_objective": 0.7015214562416077,
"eval_ranking_idealized": 0.6045548915863037,
"eval_ranking_idealized_expo": 0.5279502868652344,
"eval_ranking_simple": 0.5279502868652344,
"eval_regularize": 0.7015214562416077,
"eval_runtime": 317.4305,
"eval_samples_per_second": 18.24,
"eval_steps_per_second": 1.522,
"step": 150
},
{
"dpo_loss": 0.5571741461753845,
"dpo_wo_beta": -1.7987542152404785,
"epoch": 0.2834199338686821,
"grad_norm": 22.667783186241508,
"learning_rate": 4.716981132075472e-06,
"logits": -2.1097896099090576,
"logps": -93.05426025390625,
"loss": 0.5779,
"objective": 0.5571741461753845,
"ranking_idealized": 0.6333333253860474,
"ranking_idealized_expo": 0.5433333516120911,
"ranking_simple": 0.6449999809265137,
"regularize": 0.5571741461753845,
"step": 200
},
{
"epoch": 0.2834199338686821,
"eval_dpo_loss": 0.6816009283065796,
"eval_dpo_wo_beta": -2.1417369842529297,
"eval_logits": -2.071585178375244,
"eval_logps": -106.49442291259766,
"eval_loss": 0.6793810129165649,
"eval_objective": 0.6816009283065796,
"eval_ranking_idealized": 0.6045548915863037,
"eval_ranking_idealized_expo": 0.5279502868652344,
"eval_ranking_simple": 0.5507246255874634,
"eval_regularize": 0.6816009283065796,
"eval_runtime": 316.4091,
"eval_samples_per_second": 18.299,
"eval_steps_per_second": 1.527,
"step": 200
},
{
"dpo_loss": 0.5780055522918701,
"dpo_wo_beta": -2.330664873123169,
"epoch": 0.35427491733585265,
"grad_norm": 19.831646672236253,
"learning_rate": 4.995082357614404e-06,
"logits": -2.255446434020996,
"logps": -94.9359130859375,
"loss": 0.5709,
"objective": 0.5780055522918701,
"ranking_idealized": 0.5699999928474426,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.6083333492279053,
"regularize": 0.5780055522918701,
"step": 250
},
{
"epoch": 0.35427491733585265,
"eval_dpo_loss": 0.6816768646240234,
"eval_dpo_wo_beta": -2.267601728439331,
"eval_logits": -2.246999740600586,
"eval_logps": -87.73257446289062,
"eval_loss": 0.6882591843605042,
"eval_objective": 0.6816768646240234,
"eval_ranking_idealized": 0.6045548915863037,
"eval_ranking_idealized_expo": 0.5279502868652344,
"eval_ranking_simple": 0.542443037033081,
"eval_regularize": 0.6816768646240234,
"eval_runtime": 316.123,
"eval_samples_per_second": 18.316,
"eval_steps_per_second": 1.528,
"step": 250
},
{
"dpo_loss": 0.5427210330963135,
"dpo_wo_beta": -2.1861753463745117,
"epoch": 0.42512990080302315,
"grad_norm": 27.500670384351913,
"learning_rate": 4.973664984850435e-06,
"logits": -2.136077404022217,
"logps": -81.2485122680664,
"loss": 0.5563,
"objective": 0.5427210330963135,
"ranking_idealized": 0.6299999952316284,
"ranking_idealized_expo": 0.5649999976158142,
"ranking_simple": 0.6583333611488342,
"regularize": 0.5427210330963135,
"step": 300
},
{
"epoch": 0.42512990080302315,
"eval_dpo_loss": 0.6618562340736389,
"eval_dpo_wo_beta": -2.3796472549438477,
"eval_logits": -2.269749641418457,
"eval_logps": -89.50890350341797,
"eval_loss": 0.6810693740844727,
"eval_objective": 0.6618562340736389,
"eval_ranking_idealized": 0.6045548915863037,
"eval_ranking_idealized_expo": 0.5279502868652344,
"eval_ranking_simple": 0.5734989643096924,
"eval_regularize": 0.6618562340736389,
"eval_runtime": 319.8742,
"eval_samples_per_second": 18.101,
"eval_steps_per_second": 1.51,
"step": 300
},
{
"dpo_loss": 0.5661785006523132,
"dpo_wo_beta": -2.481991767883301,
"epoch": 0.49598488427019366,
"grad_norm": 19.802376166923445,
"learning_rate": 4.9354031766005005e-06,
"logits": -2.372645616531372,
"logps": -87.0498046875,
"loss": 0.5321,
"objective": 0.5661785006523132,
"ranking_idealized": 0.6016666889190674,
"ranking_idealized_expo": 0.5233333110809326,
"ranking_simple": 0.6499999761581421,
"regularize": 0.5661785006523132,
"step": 350
},
{
"epoch": 0.49598488427019366,
"eval_dpo_loss": 0.6773233413696289,
"eval_dpo_wo_beta": -2.6295416355133057,
"eval_logits": -2.3682754039764404,
"eval_logps": -99.09272766113281,
"eval_loss": 0.6925813555717468,
"eval_objective": 0.6773233413696289,
"eval_ranking_idealized": 0.6045548915863037,
"eval_ranking_idealized_expo": 0.5279502868652344,
"eval_ranking_simple": 0.5734989643096924,
"eval_regularize": 0.6773233413696289,
"eval_runtime": 316.8112,
"eval_samples_per_second": 18.276,
"eval_steps_per_second": 1.525,
"step": 350
},
{
"dpo_loss": 0.5010849237442017,
"dpo_wo_beta": -2.259007453918457,
"epoch": 0.5668398677373642,
"grad_norm": 15.355707906669174,
"learning_rate": 4.880557476860893e-06,
"logits": -2.343498468399048,
"logps": -100.16142272949219,
"loss": 0.4963,
"objective": 0.5010849237442017,
"ranking_idealized": 0.6200000047683716,
"ranking_idealized_expo": 0.5199999809265137,
"ranking_simple": 0.6883333325386047,
"regularize": 0.5010849237442017,
"step": 400
},
{
"epoch": 0.5668398677373642,
"eval_dpo_loss": 0.6835893392562866,
"eval_dpo_wo_beta": -2.691314697265625,
"eval_logits": -2.250782012939453,
"eval_logps": -106.707275390625,
"eval_loss": 0.6914249062538147,
"eval_objective": 0.6835893392562866,
"eval_ranking_idealized": 0.6045548915863037,
"eval_ranking_idealized_expo": 0.5279502868652344,
"eval_ranking_simple": 0.567287802696228,
"eval_regularize": 0.6835893392562866,
"eval_runtime": 320.5789,
"eval_samples_per_second": 18.061,
"eval_steps_per_second": 1.507,
"step": 400
},
{
"dpo_loss": 0.47148290276527405,
"dpo_wo_beta": -2.062403678894043,
"epoch": 0.6376948512045347,
"grad_norm": 19.132752913050712,
"learning_rate": 4.80950135772933e-06,
"logits": -2.2268483638763428,
"logps": -102.33979034423828,
"loss": 0.4745,
"objective": 0.47148290276527405,
"ranking_idealized": 0.5916666388511658,
"ranking_idealized_expo": 0.4933333396911621,
"ranking_simple": 0.675000011920929,
"regularize": 0.47148290276527405,
"step": 450
},
{
"epoch": 0.6376948512045347,
"eval_dpo_loss": 0.681454062461853,
"eval_dpo_wo_beta": -2.673811912536621,
"eval_logits": -2.2347095012664795,
"eval_logps": -105.86691284179688,
"eval_loss": 0.6937749981880188,
"eval_objective": 0.681454062461853,
"eval_ranking_idealized": 0.6045548915863037,
"eval_ranking_idealized_expo": 0.5279502868652344,
"eval_ranking_simple": 0.5631470084190369,
"eval_regularize": 0.681454062461853,
"eval_runtime": 319.8507,
"eval_samples_per_second": 18.102,
"eval_steps_per_second": 1.51,
"step": 450
},
{
"dpo_loss": 0.4877893626689911,
"dpo_wo_beta": -2.4107022285461426,
"epoch": 0.7085498346717053,
"grad_norm": 18.950528232263615,
"learning_rate": 4.72271867624463e-06,
"logits": -2.197690963745117,
"logps": -101.995361328125,
"loss": 0.4867,
"objective": 0.4877893626689911,
"ranking_idealized": 0.5933333039283752,
"ranking_idealized_expo": 0.5216666460037231,
"ranking_simple": 0.699999988079071,
"regularize": 0.4877893626689911,
"step": 500
},
{
"epoch": 0.7085498346717053,
"eval_dpo_loss": 0.6994954347610474,
"eval_dpo_wo_beta": -2.7257001399993896,
"eval_logits": -2.2181758880615234,
"eval_logps": -105.1847915649414,
"eval_loss": 0.7040360569953918,
"eval_objective": 0.6994954347610474,
"eval_ranking_idealized": 0.6045548915863037,
"eval_ranking_idealized_expo": 0.5279502868652344,
"eval_ranking_simple": 0.5507246255874634,
"eval_regularize": 0.6994954347610474,
"eval_runtime": 315.9927,
"eval_samples_per_second": 18.323,
"eval_steps_per_second": 1.529,
"step": 500
},
{
"dpo_loss": 0.4837046265602112,
"dpo_wo_beta": -2.136967897415161,
"epoch": 0.7794048181388757,
"grad_norm": 17.007084301353213,
"learning_rate": 4.620800379559508e-06,
"logits": -2.328810214996338,
"logps": -102.86935424804688,
"loss": 0.4582,
"objective": 0.4837046265602112,
"ranking_idealized": 0.5716666579246521,
"ranking_idealized_expo": 0.47833332419395447,
"ranking_simple": 0.6866666674613953,
"regularize": 0.4837046265602112,
"step": 550
},
{
"epoch": 0.7794048181388757,
"eval_dpo_loss": 0.7027432918548584,
"eval_dpo_wo_beta": -3.1023459434509277,
"eval_logits": -2.3854899406433105,
"eval_logps": -102.664306640625,
"eval_loss": 0.6995241045951843,
"eval_objective": 0.7027432918548584,
"eval_ranking_idealized": 0.6045548915863037,
"eval_ranking_idealized_expo": 0.5279502868652344,
"eval_ranking_simple": 0.5683229565620422,
"eval_regularize": 0.7027432918548584,
"eval_runtime": 317.401,
"eval_samples_per_second": 18.242,
"eval_steps_per_second": 1.522,
"step": 550
},
{
"dpo_loss": 0.42498964071273804,
"dpo_wo_beta": -1.8326289653778076,
"epoch": 0.8502598016060463,
"grad_norm": 15.283086339392508,
"learning_rate": 4.504440480882651e-06,
"logits": -2.4975786209106445,
"logps": -99.132568359375,
"loss": 0.4339,
"objective": 0.42498964071273804,
"ranking_idealized": 0.5350000262260437,
"ranking_idealized_expo": 0.4749999940395355,
"ranking_simple": 0.7083333134651184,
"regularize": 0.42498964071273804,
"step": 600
},
{
"epoch": 0.8502598016060463,
"eval_dpo_loss": 0.7050178647041321,
"eval_dpo_wo_beta": -3.216639995574951,
"eval_logits": -2.445580005645752,
"eval_logps": -103.54557037353516,
"eval_loss": 0.6964531540870667,
"eval_objective": 0.7050178647041321,
"eval_ranking_idealized": 0.6045548915863037,
"eval_ranking_idealized_expo": 0.5279502868652344,
"eval_ranking_simple": 0.5734989643096924,
"eval_regularize": 0.7050178647041321,
"eval_runtime": 317.5725,
"eval_samples_per_second": 18.232,
"eval_steps_per_second": 1.521,
"step": 600
},
{
"epoch": 0.8502598016060463,
"step": 600,
"total_flos": 0.0,
"train_loss": 0.15444423039754232,
"train_runtime": 3628.8607,
"train_samples_per_second": 41.998,
"train_steps_per_second": 0.583
}
],
"logging_steps": 50,
"max_steps": 2115,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 50,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 4,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}