{ "best_metric": 0.5734989643096924, "best_model_checkpoint": "./qwen2.5-0.5b/qwen2.5-0.5b-expo-DPO-ES-TRY/checkpoint-300", "epoch": 0.8502598016060463, "eval_steps": 50, "global_step": 600, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "dpo_loss": 0.6931471824645996, "dpo_wo_beta": -0.6931471824645996, "epoch": 0.0014170996693434106, "grad_norm": 25.66138500619404, "learning_rate": 2.358490566037736e-08, "logits": -1.7146095037460327, "logps": -79.01810455322266, "loss": 0.6931, "objective": 0.6931471824645996, "ranking_idealized": 0.3333333432674408, "ranking_idealized_expo": 0.3333333432674408, "ranking_simple": 0.3333333432674408, "regularize": 0.6931471824645996, "step": 1 }, { "dpo_loss": 0.6873584985733032, "dpo_wo_beta": -0.8693115711212158, "epoch": 0.07085498346717052, "grad_norm": 25.93169363980804, "learning_rate": 1.179245283018868e-06, "logits": -1.890508770942688, "logps": -82.390869140625, "loss": 0.6857, "objective": 0.6873584985733032, "ranking_idealized": 0.6258503198623657, "ranking_idealized_expo": 0.5408163070678711, "ranking_simple": 0.5408163070678711, "regularize": 0.6873584985733032, "step": 50 }, { "epoch": 0.07085498346717052, "eval_dpo_loss": 0.6927401423454285, "eval_dpo_wo_beta": -1.280670404434204, "eval_logits": -1.960647463798523, "eval_logps": -88.98406219482422, "eval_loss": 0.6913570761680603, "eval_objective": 0.6927401423454285, "eval_ranking_idealized": 0.6045548915863037, "eval_ranking_idealized_expo": 0.5279502868652344, "eval_ranking_simple": 0.5362318754196167, "eval_regularize": 0.6927401423454285, "eval_runtime": 318.1168, "eval_samples_per_second": 18.201, "eval_steps_per_second": 1.518, "step": 50 }, { "dpo_loss": 0.6656978130340576, "dpo_wo_beta": -1.517911434173584, "epoch": 0.14170996693434104, "grad_norm": 29.18536320021953, "learning_rate": 2.358490566037736e-06, "logits": -2.088792085647583, "logps": -82.19146728515625, "loss": 0.6524, "objective": 0.6656978130340576, "ranking_idealized": 0.5950000286102295, "ranking_idealized_expo": 0.5149999856948853, "ranking_simple": 0.5233333110809326, "regularize": 0.6656978130340576, "step": 100 }, { "epoch": 0.14170996693434104, "eval_dpo_loss": 0.7010491490364075, "eval_dpo_wo_beta": -1.8911339044570923, "eval_logits": -2.0578620433807373, "eval_logps": -98.63578033447266, "eval_loss": 0.6921781897544861, "eval_objective": 0.7010491490364075, "eval_ranking_idealized": 0.6045548915863037, "eval_ranking_idealized_expo": 0.5279502868652344, "eval_ranking_simple": 0.5269151329994202, "eval_regularize": 0.7010491490364075, "eval_runtime": 317.6312, "eval_samples_per_second": 18.229, "eval_steps_per_second": 1.521, "step": 100 }, { "dpo_loss": 0.6302288770675659, "dpo_wo_beta": -1.9832934141159058, "epoch": 0.21256495040151158, "grad_norm": 31.92092339525771, "learning_rate": 3.5377358490566038e-06, "logits": -2.047361135482788, "logps": -90.97752380371094, "loss": 0.6123, "objective": 0.6302288770675659, "ranking_idealized": 0.6050000190734863, "ranking_idealized_expo": 0.528333306312561, "ranking_simple": 0.5799999833106995, "regularize": 0.6302288770675659, "step": 150 }, { "epoch": 0.21256495040151158, "eval_dpo_loss": 0.7015214562416077, "eval_dpo_wo_beta": -2.1165764331817627, "eval_logits": -1.9032589197158813, "eval_logps": -102.8927001953125, "eval_loss": 0.6967350840568542, "eval_objective": 0.7015214562416077, "eval_ranking_idealized": 0.6045548915863037, "eval_ranking_idealized_expo": 0.5279502868652344, "eval_ranking_simple": 0.5279502868652344, "eval_regularize": 0.7015214562416077, "eval_runtime": 317.4305, "eval_samples_per_second": 18.24, "eval_steps_per_second": 1.522, "step": 150 }, { "dpo_loss": 0.5571741461753845, "dpo_wo_beta": -1.7987542152404785, "epoch": 0.2834199338686821, "grad_norm": 22.667783186241508, "learning_rate": 4.716981132075472e-06, "logits": -2.1097896099090576, "logps": -93.05426025390625, "loss": 0.5779, "objective": 0.5571741461753845, "ranking_idealized": 0.6333333253860474, "ranking_idealized_expo": 0.5433333516120911, "ranking_simple": 0.6449999809265137, "regularize": 0.5571741461753845, "step": 200 }, { "epoch": 0.2834199338686821, "eval_dpo_loss": 0.6816009283065796, "eval_dpo_wo_beta": -2.1417369842529297, "eval_logits": -2.071585178375244, "eval_logps": -106.49442291259766, "eval_loss": 0.6793810129165649, "eval_objective": 0.6816009283065796, "eval_ranking_idealized": 0.6045548915863037, "eval_ranking_idealized_expo": 0.5279502868652344, "eval_ranking_simple": 0.5507246255874634, "eval_regularize": 0.6816009283065796, "eval_runtime": 316.4091, "eval_samples_per_second": 18.299, "eval_steps_per_second": 1.527, "step": 200 }, { "dpo_loss": 0.5780055522918701, "dpo_wo_beta": -2.330664873123169, "epoch": 0.35427491733585265, "grad_norm": 19.831646672236253, "learning_rate": 4.995082357614404e-06, "logits": -2.255446434020996, "logps": -94.9359130859375, "loss": 0.5709, "objective": 0.5780055522918701, "ranking_idealized": 0.5699999928474426, "ranking_idealized_expo": 0.4833333194255829, "ranking_simple": 0.6083333492279053, "regularize": 0.5780055522918701, "step": 250 }, { "epoch": 0.35427491733585265, "eval_dpo_loss": 0.6816768646240234, "eval_dpo_wo_beta": -2.267601728439331, "eval_logits": -2.246999740600586, "eval_logps": -87.73257446289062, "eval_loss": 0.6882591843605042, "eval_objective": 0.6816768646240234, "eval_ranking_idealized": 0.6045548915863037, "eval_ranking_idealized_expo": 0.5279502868652344, "eval_ranking_simple": 0.542443037033081, "eval_regularize": 0.6816768646240234, "eval_runtime": 316.123, "eval_samples_per_second": 18.316, "eval_steps_per_second": 1.528, "step": 250 }, { "dpo_loss": 0.5427210330963135, "dpo_wo_beta": -2.1861753463745117, "epoch": 0.42512990080302315, "grad_norm": 27.500670384351913, "learning_rate": 4.973664984850435e-06, "logits": -2.136077404022217, "logps": -81.2485122680664, "loss": 0.5563, "objective": 0.5427210330963135, "ranking_idealized": 0.6299999952316284, "ranking_idealized_expo": 0.5649999976158142, "ranking_simple": 0.6583333611488342, "regularize": 0.5427210330963135, "step": 300 }, { "epoch": 0.42512990080302315, "eval_dpo_loss": 0.6618562340736389, "eval_dpo_wo_beta": -2.3796472549438477, "eval_logits": -2.269749641418457, "eval_logps": -89.50890350341797, "eval_loss": 0.6810693740844727, "eval_objective": 0.6618562340736389, "eval_ranking_idealized": 0.6045548915863037, "eval_ranking_idealized_expo": 0.5279502868652344, "eval_ranking_simple": 0.5734989643096924, "eval_regularize": 0.6618562340736389, "eval_runtime": 319.8742, "eval_samples_per_second": 18.101, "eval_steps_per_second": 1.51, "step": 300 }, { "dpo_loss": 0.5661785006523132, "dpo_wo_beta": -2.481991767883301, "epoch": 0.49598488427019366, "grad_norm": 19.802376166923445, "learning_rate": 4.9354031766005005e-06, "logits": -2.372645616531372, "logps": -87.0498046875, "loss": 0.5321, "objective": 0.5661785006523132, "ranking_idealized": 0.6016666889190674, "ranking_idealized_expo": 0.5233333110809326, "ranking_simple": 0.6499999761581421, "regularize": 0.5661785006523132, "step": 350 }, { "epoch": 0.49598488427019366, "eval_dpo_loss": 0.6773233413696289, "eval_dpo_wo_beta": -2.6295416355133057, "eval_logits": -2.3682754039764404, "eval_logps": -99.09272766113281, "eval_loss": 0.6925813555717468, "eval_objective": 0.6773233413696289, "eval_ranking_idealized": 0.6045548915863037, "eval_ranking_idealized_expo": 0.5279502868652344, "eval_ranking_simple": 0.5734989643096924, "eval_regularize": 0.6773233413696289, "eval_runtime": 316.8112, "eval_samples_per_second": 18.276, "eval_steps_per_second": 1.525, "step": 350 }, { "dpo_loss": 0.5010849237442017, "dpo_wo_beta": -2.259007453918457, "epoch": 0.5668398677373642, "grad_norm": 15.355707906669174, "learning_rate": 4.880557476860893e-06, "logits": -2.343498468399048, "logps": -100.16142272949219, "loss": 0.4963, "objective": 0.5010849237442017, "ranking_idealized": 0.6200000047683716, "ranking_idealized_expo": 0.5199999809265137, "ranking_simple": 0.6883333325386047, "regularize": 0.5010849237442017, "step": 400 }, { "epoch": 0.5668398677373642, "eval_dpo_loss": 0.6835893392562866, "eval_dpo_wo_beta": -2.691314697265625, "eval_logits": -2.250782012939453, "eval_logps": -106.707275390625, "eval_loss": 0.6914249062538147, "eval_objective": 0.6835893392562866, "eval_ranking_idealized": 0.6045548915863037, "eval_ranking_idealized_expo": 0.5279502868652344, "eval_ranking_simple": 0.567287802696228, "eval_regularize": 0.6835893392562866, "eval_runtime": 320.5789, "eval_samples_per_second": 18.061, "eval_steps_per_second": 1.507, "step": 400 }, { "dpo_loss": 0.47148290276527405, "dpo_wo_beta": -2.062403678894043, "epoch": 0.6376948512045347, "grad_norm": 19.132752913050712, "learning_rate": 4.80950135772933e-06, "logits": -2.2268483638763428, "logps": -102.33979034423828, "loss": 0.4745, "objective": 0.47148290276527405, "ranking_idealized": 0.5916666388511658, "ranking_idealized_expo": 0.4933333396911621, "ranking_simple": 0.675000011920929, "regularize": 0.47148290276527405, "step": 450 }, { "epoch": 0.6376948512045347, "eval_dpo_loss": 0.681454062461853, "eval_dpo_wo_beta": -2.673811912536621, "eval_logits": -2.2347095012664795, "eval_logps": -105.86691284179688, "eval_loss": 0.6937749981880188, "eval_objective": 0.681454062461853, "eval_ranking_idealized": 0.6045548915863037, "eval_ranking_idealized_expo": 0.5279502868652344, "eval_ranking_simple": 0.5631470084190369, "eval_regularize": 0.681454062461853, "eval_runtime": 319.8507, "eval_samples_per_second": 18.102, "eval_steps_per_second": 1.51, "step": 450 }, { "dpo_loss": 0.4877893626689911, "dpo_wo_beta": -2.4107022285461426, "epoch": 0.7085498346717053, "grad_norm": 18.950528232263615, "learning_rate": 4.72271867624463e-06, "logits": -2.197690963745117, "logps": -101.995361328125, "loss": 0.4867, "objective": 0.4877893626689911, "ranking_idealized": 0.5933333039283752, "ranking_idealized_expo": 0.5216666460037231, "ranking_simple": 0.699999988079071, "regularize": 0.4877893626689911, "step": 500 }, { "epoch": 0.7085498346717053, "eval_dpo_loss": 0.6994954347610474, "eval_dpo_wo_beta": -2.7257001399993896, "eval_logits": -2.2181758880615234, "eval_logps": -105.1847915649414, "eval_loss": 0.7040360569953918, "eval_objective": 0.6994954347610474, "eval_ranking_idealized": 0.6045548915863037, "eval_ranking_idealized_expo": 0.5279502868652344, "eval_ranking_simple": 0.5507246255874634, "eval_regularize": 0.6994954347610474, "eval_runtime": 315.9927, "eval_samples_per_second": 18.323, "eval_steps_per_second": 1.529, "step": 500 }, { "dpo_loss": 0.4837046265602112, "dpo_wo_beta": -2.136967897415161, "epoch": 0.7794048181388757, "grad_norm": 17.007084301353213, "learning_rate": 4.620800379559508e-06, "logits": -2.328810214996338, "logps": -102.86935424804688, "loss": 0.4582, "objective": 0.4837046265602112, "ranking_idealized": 0.5716666579246521, "ranking_idealized_expo": 0.47833332419395447, "ranking_simple": 0.6866666674613953, "regularize": 0.4837046265602112, "step": 550 }, { "epoch": 0.7794048181388757, "eval_dpo_loss": 0.7027432918548584, "eval_dpo_wo_beta": -3.1023459434509277, "eval_logits": -2.3854899406433105, "eval_logps": -102.664306640625, "eval_loss": 0.6995241045951843, "eval_objective": 0.7027432918548584, "eval_ranking_idealized": 0.6045548915863037, "eval_ranking_idealized_expo": 0.5279502868652344, "eval_ranking_simple": 0.5683229565620422, "eval_regularize": 0.7027432918548584, "eval_runtime": 317.401, "eval_samples_per_second": 18.242, "eval_steps_per_second": 1.522, "step": 550 }, { "dpo_loss": 0.42498964071273804, "dpo_wo_beta": -1.8326289653778076, "epoch": 0.8502598016060463, "grad_norm": 15.283086339392508, "learning_rate": 4.504440480882651e-06, "logits": -2.4975786209106445, "logps": -99.132568359375, "loss": 0.4339, "objective": 0.42498964071273804, "ranking_idealized": 0.5350000262260437, "ranking_idealized_expo": 0.4749999940395355, "ranking_simple": 0.7083333134651184, "regularize": 0.42498964071273804, "step": 600 }, { "epoch": 0.8502598016060463, "eval_dpo_loss": 0.7050178647041321, "eval_dpo_wo_beta": -3.216639995574951, "eval_logits": -2.445580005645752, "eval_logps": -103.54557037353516, "eval_loss": 0.6964531540870667, "eval_objective": 0.7050178647041321, "eval_ranking_idealized": 0.6045548915863037, "eval_ranking_idealized_expo": 0.5279502868652344, "eval_ranking_simple": 0.5734989643096924, "eval_regularize": 0.7050178647041321, "eval_runtime": 317.5725, "eval_samples_per_second": 18.232, "eval_steps_per_second": 1.521, "step": 600 }, { "epoch": 0.8502598016060463, "step": 600, "total_flos": 0.0, "train_loss": 0.15444423039754232, "train_runtime": 3628.8607, "train_samples_per_second": 41.998, "train_steps_per_second": 0.583 } ], "logging_steps": 50, "max_steps": 2115, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 4, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }