|
{ |
|
"best_metric": 0.5734989643096924, |
|
"best_model_checkpoint": "./qwen2.5-0.5b/qwen2.5-0.5b-expo-DPO-ES-TRY/checkpoint-300", |
|
"epoch": 0.8502598016060463, |
|
"eval_steps": 50, |
|
"global_step": 600, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"dpo_loss": 0.6931471824645996, |
|
"dpo_wo_beta": -0.6931471824645996, |
|
"epoch": 0.0014170996693434106, |
|
"grad_norm": 25.66138500619404, |
|
"learning_rate": 2.358490566037736e-08, |
|
"logits": -1.7146095037460327, |
|
"logps": -79.01810455322266, |
|
"loss": 0.6931, |
|
"objective": 0.6931471824645996, |
|
"ranking_idealized": 0.3333333432674408, |
|
"ranking_idealized_expo": 0.3333333432674408, |
|
"ranking_simple": 0.3333333432674408, |
|
"regularize": 0.6931471824645996, |
|
"step": 1 |
|
}, |
|
{ |
|
"dpo_loss": 0.6873584985733032, |
|
"dpo_wo_beta": -0.8693115711212158, |
|
"epoch": 0.07085498346717052, |
|
"grad_norm": 25.93169363980804, |
|
"learning_rate": 1.179245283018868e-06, |
|
"logits": -1.890508770942688, |
|
"logps": -82.390869140625, |
|
"loss": 0.6857, |
|
"objective": 0.6873584985733032, |
|
"ranking_idealized": 0.6258503198623657, |
|
"ranking_idealized_expo": 0.5408163070678711, |
|
"ranking_simple": 0.5408163070678711, |
|
"regularize": 0.6873584985733032, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.07085498346717052, |
|
"eval_dpo_loss": 0.6927401423454285, |
|
"eval_dpo_wo_beta": -1.280670404434204, |
|
"eval_logits": -1.960647463798523, |
|
"eval_logps": -88.98406219482422, |
|
"eval_loss": 0.6913570761680603, |
|
"eval_objective": 0.6927401423454285, |
|
"eval_ranking_idealized": 0.6045548915863037, |
|
"eval_ranking_idealized_expo": 0.5279502868652344, |
|
"eval_ranking_simple": 0.5362318754196167, |
|
"eval_regularize": 0.6927401423454285, |
|
"eval_runtime": 318.1168, |
|
"eval_samples_per_second": 18.201, |
|
"eval_steps_per_second": 1.518, |
|
"step": 50 |
|
}, |
|
{ |
|
"dpo_loss": 0.6656978130340576, |
|
"dpo_wo_beta": -1.517911434173584, |
|
"epoch": 0.14170996693434104, |
|
"grad_norm": 29.18536320021953, |
|
"learning_rate": 2.358490566037736e-06, |
|
"logits": -2.088792085647583, |
|
"logps": -82.19146728515625, |
|
"loss": 0.6524, |
|
"objective": 0.6656978130340576, |
|
"ranking_idealized": 0.5950000286102295, |
|
"ranking_idealized_expo": 0.5149999856948853, |
|
"ranking_simple": 0.5233333110809326, |
|
"regularize": 0.6656978130340576, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.14170996693434104, |
|
"eval_dpo_loss": 0.7010491490364075, |
|
"eval_dpo_wo_beta": -1.8911339044570923, |
|
"eval_logits": -2.0578620433807373, |
|
"eval_logps": -98.63578033447266, |
|
"eval_loss": 0.6921781897544861, |
|
"eval_objective": 0.7010491490364075, |
|
"eval_ranking_idealized": 0.6045548915863037, |
|
"eval_ranking_idealized_expo": 0.5279502868652344, |
|
"eval_ranking_simple": 0.5269151329994202, |
|
"eval_regularize": 0.7010491490364075, |
|
"eval_runtime": 317.6312, |
|
"eval_samples_per_second": 18.229, |
|
"eval_steps_per_second": 1.521, |
|
"step": 100 |
|
}, |
|
{ |
|
"dpo_loss": 0.6302288770675659, |
|
"dpo_wo_beta": -1.9832934141159058, |
|
"epoch": 0.21256495040151158, |
|
"grad_norm": 31.92092339525771, |
|
"learning_rate": 3.5377358490566038e-06, |
|
"logits": -2.047361135482788, |
|
"logps": -90.97752380371094, |
|
"loss": 0.6123, |
|
"objective": 0.6302288770675659, |
|
"ranking_idealized": 0.6050000190734863, |
|
"ranking_idealized_expo": 0.528333306312561, |
|
"ranking_simple": 0.5799999833106995, |
|
"regularize": 0.6302288770675659, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.21256495040151158, |
|
"eval_dpo_loss": 0.7015214562416077, |
|
"eval_dpo_wo_beta": -2.1165764331817627, |
|
"eval_logits": -1.9032589197158813, |
|
"eval_logps": -102.8927001953125, |
|
"eval_loss": 0.6967350840568542, |
|
"eval_objective": 0.7015214562416077, |
|
"eval_ranking_idealized": 0.6045548915863037, |
|
"eval_ranking_idealized_expo": 0.5279502868652344, |
|
"eval_ranking_simple": 0.5279502868652344, |
|
"eval_regularize": 0.7015214562416077, |
|
"eval_runtime": 317.4305, |
|
"eval_samples_per_second": 18.24, |
|
"eval_steps_per_second": 1.522, |
|
"step": 150 |
|
}, |
|
{ |
|
"dpo_loss": 0.5571741461753845, |
|
"dpo_wo_beta": -1.7987542152404785, |
|
"epoch": 0.2834199338686821, |
|
"grad_norm": 22.667783186241508, |
|
"learning_rate": 4.716981132075472e-06, |
|
"logits": -2.1097896099090576, |
|
"logps": -93.05426025390625, |
|
"loss": 0.5779, |
|
"objective": 0.5571741461753845, |
|
"ranking_idealized": 0.6333333253860474, |
|
"ranking_idealized_expo": 0.5433333516120911, |
|
"ranking_simple": 0.6449999809265137, |
|
"regularize": 0.5571741461753845, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.2834199338686821, |
|
"eval_dpo_loss": 0.6816009283065796, |
|
"eval_dpo_wo_beta": -2.1417369842529297, |
|
"eval_logits": -2.071585178375244, |
|
"eval_logps": -106.49442291259766, |
|
"eval_loss": 0.6793810129165649, |
|
"eval_objective": 0.6816009283065796, |
|
"eval_ranking_idealized": 0.6045548915863037, |
|
"eval_ranking_idealized_expo": 0.5279502868652344, |
|
"eval_ranking_simple": 0.5507246255874634, |
|
"eval_regularize": 0.6816009283065796, |
|
"eval_runtime": 316.4091, |
|
"eval_samples_per_second": 18.299, |
|
"eval_steps_per_second": 1.527, |
|
"step": 200 |
|
}, |
|
{ |
|
"dpo_loss": 0.5780055522918701, |
|
"dpo_wo_beta": -2.330664873123169, |
|
"epoch": 0.35427491733585265, |
|
"grad_norm": 19.831646672236253, |
|
"learning_rate": 4.995082357614404e-06, |
|
"logits": -2.255446434020996, |
|
"logps": -94.9359130859375, |
|
"loss": 0.5709, |
|
"objective": 0.5780055522918701, |
|
"ranking_idealized": 0.5699999928474426, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.6083333492279053, |
|
"regularize": 0.5780055522918701, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.35427491733585265, |
|
"eval_dpo_loss": 0.6816768646240234, |
|
"eval_dpo_wo_beta": -2.267601728439331, |
|
"eval_logits": -2.246999740600586, |
|
"eval_logps": -87.73257446289062, |
|
"eval_loss": 0.6882591843605042, |
|
"eval_objective": 0.6816768646240234, |
|
"eval_ranking_idealized": 0.6045548915863037, |
|
"eval_ranking_idealized_expo": 0.5279502868652344, |
|
"eval_ranking_simple": 0.542443037033081, |
|
"eval_regularize": 0.6816768646240234, |
|
"eval_runtime": 316.123, |
|
"eval_samples_per_second": 18.316, |
|
"eval_steps_per_second": 1.528, |
|
"step": 250 |
|
}, |
|
{ |
|
"dpo_loss": 0.5427210330963135, |
|
"dpo_wo_beta": -2.1861753463745117, |
|
"epoch": 0.42512990080302315, |
|
"grad_norm": 27.500670384351913, |
|
"learning_rate": 4.973664984850435e-06, |
|
"logits": -2.136077404022217, |
|
"logps": -81.2485122680664, |
|
"loss": 0.5563, |
|
"objective": 0.5427210330963135, |
|
"ranking_idealized": 0.6299999952316284, |
|
"ranking_idealized_expo": 0.5649999976158142, |
|
"ranking_simple": 0.6583333611488342, |
|
"regularize": 0.5427210330963135, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.42512990080302315, |
|
"eval_dpo_loss": 0.6618562340736389, |
|
"eval_dpo_wo_beta": -2.3796472549438477, |
|
"eval_logits": -2.269749641418457, |
|
"eval_logps": -89.50890350341797, |
|
"eval_loss": 0.6810693740844727, |
|
"eval_objective": 0.6618562340736389, |
|
"eval_ranking_idealized": 0.6045548915863037, |
|
"eval_ranking_idealized_expo": 0.5279502868652344, |
|
"eval_ranking_simple": 0.5734989643096924, |
|
"eval_regularize": 0.6618562340736389, |
|
"eval_runtime": 319.8742, |
|
"eval_samples_per_second": 18.101, |
|
"eval_steps_per_second": 1.51, |
|
"step": 300 |
|
}, |
|
{ |
|
"dpo_loss": 0.5661785006523132, |
|
"dpo_wo_beta": -2.481991767883301, |
|
"epoch": 0.49598488427019366, |
|
"grad_norm": 19.802376166923445, |
|
"learning_rate": 4.9354031766005005e-06, |
|
"logits": -2.372645616531372, |
|
"logps": -87.0498046875, |
|
"loss": 0.5321, |
|
"objective": 0.5661785006523132, |
|
"ranking_idealized": 0.6016666889190674, |
|
"ranking_idealized_expo": 0.5233333110809326, |
|
"ranking_simple": 0.6499999761581421, |
|
"regularize": 0.5661785006523132, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.49598488427019366, |
|
"eval_dpo_loss": 0.6773233413696289, |
|
"eval_dpo_wo_beta": -2.6295416355133057, |
|
"eval_logits": -2.3682754039764404, |
|
"eval_logps": -99.09272766113281, |
|
"eval_loss": 0.6925813555717468, |
|
"eval_objective": 0.6773233413696289, |
|
"eval_ranking_idealized": 0.6045548915863037, |
|
"eval_ranking_idealized_expo": 0.5279502868652344, |
|
"eval_ranking_simple": 0.5734989643096924, |
|
"eval_regularize": 0.6773233413696289, |
|
"eval_runtime": 316.8112, |
|
"eval_samples_per_second": 18.276, |
|
"eval_steps_per_second": 1.525, |
|
"step": 350 |
|
}, |
|
{ |
|
"dpo_loss": 0.5010849237442017, |
|
"dpo_wo_beta": -2.259007453918457, |
|
"epoch": 0.5668398677373642, |
|
"grad_norm": 15.355707906669174, |
|
"learning_rate": 4.880557476860893e-06, |
|
"logits": -2.343498468399048, |
|
"logps": -100.16142272949219, |
|
"loss": 0.4963, |
|
"objective": 0.5010849237442017, |
|
"ranking_idealized": 0.6200000047683716, |
|
"ranking_idealized_expo": 0.5199999809265137, |
|
"ranking_simple": 0.6883333325386047, |
|
"regularize": 0.5010849237442017, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.5668398677373642, |
|
"eval_dpo_loss": 0.6835893392562866, |
|
"eval_dpo_wo_beta": -2.691314697265625, |
|
"eval_logits": -2.250782012939453, |
|
"eval_logps": -106.707275390625, |
|
"eval_loss": 0.6914249062538147, |
|
"eval_objective": 0.6835893392562866, |
|
"eval_ranking_idealized": 0.6045548915863037, |
|
"eval_ranking_idealized_expo": 0.5279502868652344, |
|
"eval_ranking_simple": 0.567287802696228, |
|
"eval_regularize": 0.6835893392562866, |
|
"eval_runtime": 320.5789, |
|
"eval_samples_per_second": 18.061, |
|
"eval_steps_per_second": 1.507, |
|
"step": 400 |
|
}, |
|
{ |
|
"dpo_loss": 0.47148290276527405, |
|
"dpo_wo_beta": -2.062403678894043, |
|
"epoch": 0.6376948512045347, |
|
"grad_norm": 19.132752913050712, |
|
"learning_rate": 4.80950135772933e-06, |
|
"logits": -2.2268483638763428, |
|
"logps": -102.33979034423828, |
|
"loss": 0.4745, |
|
"objective": 0.47148290276527405, |
|
"ranking_idealized": 0.5916666388511658, |
|
"ranking_idealized_expo": 0.4933333396911621, |
|
"ranking_simple": 0.675000011920929, |
|
"regularize": 0.47148290276527405, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.6376948512045347, |
|
"eval_dpo_loss": 0.681454062461853, |
|
"eval_dpo_wo_beta": -2.673811912536621, |
|
"eval_logits": -2.2347095012664795, |
|
"eval_logps": -105.86691284179688, |
|
"eval_loss": 0.6937749981880188, |
|
"eval_objective": 0.681454062461853, |
|
"eval_ranking_idealized": 0.6045548915863037, |
|
"eval_ranking_idealized_expo": 0.5279502868652344, |
|
"eval_ranking_simple": 0.5631470084190369, |
|
"eval_regularize": 0.681454062461853, |
|
"eval_runtime": 319.8507, |
|
"eval_samples_per_second": 18.102, |
|
"eval_steps_per_second": 1.51, |
|
"step": 450 |
|
}, |
|
{ |
|
"dpo_loss": 0.4877893626689911, |
|
"dpo_wo_beta": -2.4107022285461426, |
|
"epoch": 0.7085498346717053, |
|
"grad_norm": 18.950528232263615, |
|
"learning_rate": 4.72271867624463e-06, |
|
"logits": -2.197690963745117, |
|
"logps": -101.995361328125, |
|
"loss": 0.4867, |
|
"objective": 0.4877893626689911, |
|
"ranking_idealized": 0.5933333039283752, |
|
"ranking_idealized_expo": 0.5216666460037231, |
|
"ranking_simple": 0.699999988079071, |
|
"regularize": 0.4877893626689911, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.7085498346717053, |
|
"eval_dpo_loss": 0.6994954347610474, |
|
"eval_dpo_wo_beta": -2.7257001399993896, |
|
"eval_logits": -2.2181758880615234, |
|
"eval_logps": -105.1847915649414, |
|
"eval_loss": 0.7040360569953918, |
|
"eval_objective": 0.6994954347610474, |
|
"eval_ranking_idealized": 0.6045548915863037, |
|
"eval_ranking_idealized_expo": 0.5279502868652344, |
|
"eval_ranking_simple": 0.5507246255874634, |
|
"eval_regularize": 0.6994954347610474, |
|
"eval_runtime": 315.9927, |
|
"eval_samples_per_second": 18.323, |
|
"eval_steps_per_second": 1.529, |
|
"step": 500 |
|
}, |
|
{ |
|
"dpo_loss": 0.4837046265602112, |
|
"dpo_wo_beta": -2.136967897415161, |
|
"epoch": 0.7794048181388757, |
|
"grad_norm": 17.007084301353213, |
|
"learning_rate": 4.620800379559508e-06, |
|
"logits": -2.328810214996338, |
|
"logps": -102.86935424804688, |
|
"loss": 0.4582, |
|
"objective": 0.4837046265602112, |
|
"ranking_idealized": 0.5716666579246521, |
|
"ranking_idealized_expo": 0.47833332419395447, |
|
"ranking_simple": 0.6866666674613953, |
|
"regularize": 0.4837046265602112, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.7794048181388757, |
|
"eval_dpo_loss": 0.7027432918548584, |
|
"eval_dpo_wo_beta": -3.1023459434509277, |
|
"eval_logits": -2.3854899406433105, |
|
"eval_logps": -102.664306640625, |
|
"eval_loss": 0.6995241045951843, |
|
"eval_objective": 0.7027432918548584, |
|
"eval_ranking_idealized": 0.6045548915863037, |
|
"eval_ranking_idealized_expo": 0.5279502868652344, |
|
"eval_ranking_simple": 0.5683229565620422, |
|
"eval_regularize": 0.7027432918548584, |
|
"eval_runtime": 317.401, |
|
"eval_samples_per_second": 18.242, |
|
"eval_steps_per_second": 1.522, |
|
"step": 550 |
|
}, |
|
{ |
|
"dpo_loss": 0.42498964071273804, |
|
"dpo_wo_beta": -1.8326289653778076, |
|
"epoch": 0.8502598016060463, |
|
"grad_norm": 15.283086339392508, |
|
"learning_rate": 4.504440480882651e-06, |
|
"logits": -2.4975786209106445, |
|
"logps": -99.132568359375, |
|
"loss": 0.4339, |
|
"objective": 0.42498964071273804, |
|
"ranking_idealized": 0.5350000262260437, |
|
"ranking_idealized_expo": 0.4749999940395355, |
|
"ranking_simple": 0.7083333134651184, |
|
"regularize": 0.42498964071273804, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.8502598016060463, |
|
"eval_dpo_loss": 0.7050178647041321, |
|
"eval_dpo_wo_beta": -3.216639995574951, |
|
"eval_logits": -2.445580005645752, |
|
"eval_logps": -103.54557037353516, |
|
"eval_loss": 0.6964531540870667, |
|
"eval_objective": 0.7050178647041321, |
|
"eval_ranking_idealized": 0.6045548915863037, |
|
"eval_ranking_idealized_expo": 0.5279502868652344, |
|
"eval_ranking_simple": 0.5734989643096924, |
|
"eval_regularize": 0.7050178647041321, |
|
"eval_runtime": 317.5725, |
|
"eval_samples_per_second": 18.232, |
|
"eval_steps_per_second": 1.521, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.8502598016060463, |
|
"step": 600, |
|
"total_flos": 0.0, |
|
"train_loss": 0.15444423039754232, |
|
"train_runtime": 3628.8607, |
|
"train_samples_per_second": 41.998, |
|
"train_steps_per_second": 0.583 |
|
} |
|
], |
|
"logging_steps": 50, |
|
"max_steps": 2115, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 4, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|