{ "best_metric": 6.651296138763428, "best_model_checkpoint": "./qwen2.5-0.5b/qwen2.5-0.5b-expo-DPO-ES-10/checkpoint-550", "epoch": 2.2673594709494567, "eval_steps": 50, "global_step": 800, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "dpo_loss": 0.6931471824645996, "epoch": 0.002834199338686821, "grad_norm": 1844.2532039401294, "learning_rate": 2.840909090909091e-08, "logits": -1.359458565711975, "logps": -84.69721221923828, "loss": 0.6931, "objective": 0.6931471824645996, "ranking_idealized": 0.5833333134651184, "ranking_idealized_expo": 0.5833333134651184, "ranking_simple": 0.5833333134651184, "regularize": 0.6931471824645996, "step": 1, "wo_beta": 5.271125316619873 }, { "dpo_loss": 1.9794068336486816, "epoch": 0.14170996693434104, "grad_norm": 1879.680280823908, "learning_rate": 1.4204545454545458e-06, "logits": -1.4480701684951782, "logps": -84.59326934814453, "loss": 2.0094, "objective": 1.9794068336486816, "ranking_idealized": 0.5225340127944946, "ranking_idealized_expo": 0.5216836929321289, "ranking_simple": 0.5250850319862366, "regularize": 1.9794068336486816, "step": 50, "wo_beta": 7.08821439743042 }, { "epoch": 0.14170996693434104, "eval_dpo_loss": 3.0980334281921387, "eval_logits": -1.4591896533966064, "eval_logps": -90.62417602539062, "eval_loss": 3.106841564178467, "eval_objective": 3.0980334281921387, "eval_ranking_idealized": 0.5212215185165405, "eval_ranking_idealized_expo": 0.5212215185165405, "eval_ranking_simple": 0.5258799195289612, "eval_regularize": 3.0980334281921387, "eval_runtime": 308.1993, "eval_samples_per_second": 18.787, "eval_steps_per_second": 1.567, "eval_wo_beta": 7.7179274559021, "step": 50 }, { "dpo_loss": 6.002392768859863, "epoch": 0.2834199338686821, "grad_norm": 1776.9003571892035, "learning_rate": 2.8409090909090916e-06, "logits": -1.4501550197601318, "logps": -81.94203186035156, "loss": 5.9165, "objective": 6.002392768859863, "ranking_idealized": 0.5141666531562805, "ranking_idealized_expo": 0.5137500166893005, "ranking_simple": 0.5425000190734863, "regularize": 6.002392768859863, "step": 100, "wo_beta": 6.498049736022949 }, { "epoch": 0.2834199338686821, "eval_dpo_loss": 7.139862060546875, "eval_logits": -1.4642183780670166, "eval_logps": -82.83346557617188, "eval_loss": 7.14874792098999, "eval_objective": 7.139862060546875, "eval_ranking_idealized": 0.5212215185165405, "eval_ranking_idealized_expo": 0.5212215185165405, "eval_ranking_simple": 0.5300207138061523, "eval_regularize": 7.139862060546875, "eval_runtime": 309.8222, "eval_samples_per_second": 18.688, "eval_steps_per_second": 1.559, "eval_wo_beta": 7.4498443603515625, "step": 100 }, { "dpo_loss": 10.150534629821777, "epoch": 0.42512990080302315, "grad_norm": 1372.4824531102197, "learning_rate": 4.2613636363636365e-06, "logits": -1.4184441566467285, "logps": -73.7444076538086, "loss": 9.9617, "objective": 10.150534629821777, "ranking_idealized": 0.5287500023841858, "ranking_idealized_expo": 0.527916669845581, "ranking_simple": 0.5641666650772095, "regularize": 10.150534629821777, "step": 150, "wo_beta": 6.36607027053833 }, { "epoch": 0.42512990080302315, "eval_dpo_loss": 11.353630065917969, "eval_logits": -1.3437175750732422, "eval_logps": -83.07452392578125, "eval_loss": 11.899770736694336, "eval_objective": 11.353630065917969, "eval_ranking_idealized": 0.5212215185165405, "eval_ranking_idealized_expo": 0.5212215185165405, "eval_ranking_simple": 0.5305383205413818, "eval_regularize": 11.353630065917969, "eval_runtime": 307.9181, "eval_samples_per_second": 18.804, "eval_steps_per_second": 1.569, "eval_wo_beta": 7.260918140411377, "step": 150 }, { "dpo_loss": 12.522791862487793, "epoch": 0.5668398677373642, "grad_norm": 1378.4122138720427, "learning_rate": 4.997168347957521e-06, "logits": -1.3766180276870728, "logps": -77.5620346069336, "loss": 12.4724, "objective": 12.522791862487793, "ranking_idealized": 0.51583331823349, "ranking_idealized_expo": 0.51541668176651, "ranking_simple": 0.5550000071525574, "regularize": 12.522791862487793, "step": 200, "wo_beta": 6.352013111114502 }, { "epoch": 0.5668398677373642, "eval_dpo_loss": 16.061721801757812, "eval_logits": -1.387966513633728, "eval_logps": -79.93595886230469, "eval_loss": 17.09868049621582, "eval_objective": 16.061721801757812, "eval_ranking_idealized": 0.5212215185165405, "eval_ranking_idealized_expo": 0.5212215185165405, "eval_ranking_simple": 0.5300207138061523, "eval_regularize": 16.061721801757812, "eval_runtime": 307.8039, "eval_samples_per_second": 18.811, "eval_steps_per_second": 1.569, "eval_wo_beta": 7.228997230529785, "step": 200 }, { "dpo_loss": 13.093570709228516, "epoch": 0.7085498346717053, "grad_norm": 1274.583157442186, "learning_rate": 4.973122855144066e-06, "logits": -1.294631004333496, "logps": -76.13822174072266, "loss": 13.2936, "objective": 13.093570709228516, "ranking_idealized": 0.5166666507720947, "ranking_idealized_expo": 0.5162500143051147, "ranking_simple": 0.5824999809265137, "regularize": 13.093570709228516, "step": 250, "wo_beta": 6.279551982879639 }, { "epoch": 0.7085498346717053, "eval_dpo_loss": 17.797138214111328, "eval_logits": -1.3640648126602173, "eval_logps": -77.31498718261719, "eval_loss": 18.5308780670166, "eval_objective": 17.797138214111328, "eval_ranking_idealized": 0.5212215185165405, "eval_ranking_idealized_expo": 0.5212215185165405, "eval_ranking_simple": 0.5341615080833435, "eval_regularize": 17.797138214111328, "eval_runtime": 313.1016, "eval_samples_per_second": 18.492, "eval_steps_per_second": 1.543, "eval_wo_beta": 7.207766532897949, "step": 250 }, { "dpo_loss": 10.712362289428711, "epoch": 0.8502598016060463, "grad_norm": 982.3462926804266, "learning_rate": 4.924776641419513e-06, "logits": -1.090299129486084, "logps": -70.98873138427734, "loss": 11.5204, "objective": 10.712362289428711, "ranking_idealized": 0.4962500035762787, "ranking_idealized_expo": 0.4950000047683716, "ranking_simple": 0.5679166913032532, "regularize": 10.712362289428711, "step": 300, "wo_beta": 6.134185314178467 }, { "epoch": 0.8502598016060463, "eval_dpo_loss": 18.701662063598633, "eval_logits": -0.9941285848617554, "eval_logps": -76.97978210449219, "eval_loss": 19.434432983398438, "eval_objective": 18.701662063598633, "eval_ranking_idealized": 0.5212215185165405, "eval_ranking_idealized_expo": 0.5212215185165405, "eval_ranking_simple": 0.5357142686843872, "eval_regularize": 18.701662063598633, "eval_runtime": 307.6602, "eval_samples_per_second": 18.819, "eval_steps_per_second": 1.57, "eval_wo_beta": 7.013552188873291, "step": 300 }, { "dpo_loss": 10.92597484588623, "epoch": 0.9919697685403873, "grad_norm": 938.8397527375307, "learning_rate": 4.8526047530778175e-06, "logits": -0.9006206393241882, "logps": -72.82616424560547, "loss": 11.3717, "objective": 10.92597484588623, "ranking_idealized": 0.5262500047683716, "ranking_idealized_expo": 0.5254166722297668, "ranking_simple": 0.6033333539962769, "regularize": 10.92597484588623, "step": 350, "wo_beta": 5.362515449523926 }, { "epoch": 0.9919697685403873, "eval_dpo_loss": 19.039833068847656, "eval_logits": -1.0426429510116577, "eval_logps": -76.1622543334961, "eval_loss": 20.39594841003418, "eval_objective": 19.039833068847656, "eval_ranking_idealized": 0.5212215185165405, "eval_ranking_idealized_expo": 0.5212215185165405, "eval_ranking_simple": 0.5408902764320374, "eval_regularize": 19.039833068847656, "eval_runtime": 307.6502, "eval_samples_per_second": 18.82, "eval_steps_per_second": 1.57, "eval_wo_beta": 7.0260910987854, "step": 350 }, { "dpo_loss": 6.708657264709473, "epoch": 1.1336797354747283, "grad_norm": 902.5480798954853, "learning_rate": 4.757316345716554e-06, "logits": -0.7562137246131897, "logps": -70.6362075805664, "loss": 7.0971, "objective": 6.708657264709473, "ranking_idealized": 0.5333333611488342, "ranking_idealized_expo": 0.5320833325386047, "ranking_simple": 0.6329166889190674, "regularize": 6.708657264709473, "step": 400, "wo_beta": 4.74643087387085 }, { "epoch": 1.1336797354747283, "eval_dpo_loss": 21.69021987915039, "eval_logits": -0.6236207485198975, "eval_logps": -76.14582824707031, "eval_loss": 21.927854537963867, "eval_objective": 21.69021987915039, "eval_ranking_idealized": 0.5212215185165405, "eval_ranking_idealized_expo": 0.5212215185165405, "eval_ranking_simple": 0.5388198494911194, "eval_regularize": 21.69021987915039, "eval_runtime": 308.4451, "eval_samples_per_second": 18.772, "eval_steps_per_second": 1.566, "eval_wo_beta": 7.122740745544434, "step": 400 }, { "dpo_loss": 8.22778606414795, "epoch": 1.2753897024090695, "grad_norm": 701.1530681925066, "learning_rate": 4.639847716126855e-06, "logits": -0.6124467849731445, "logps": -71.35508728027344, "loss": 7.5725, "objective": 8.22778606414795, "ranking_idealized": 0.5195833444595337, "ranking_idealized_expo": 0.5191666483879089, "ranking_simple": 0.6312500238418579, "regularize": 8.22778606414795, "step": 450, "wo_beta": 5.267808437347412 }, { "epoch": 1.2753897024090695, "eval_dpo_loss": 20.385303497314453, "eval_logits": -0.8352137207984924, "eval_logps": -76.3924331665039, "eval_loss": 20.948013305664062, "eval_objective": 20.385303497314453, "eval_ranking_idealized": 0.5212215185165405, "eval_ranking_idealized_expo": 0.5212215185165405, "eval_ranking_simple": 0.5372670888900757, "eval_regularize": 20.385303497314453, "eval_runtime": 307.7791, "eval_samples_per_second": 18.812, "eval_steps_per_second": 1.569, "eval_wo_beta": 6.8499908447265625, "step": 450 }, { "dpo_loss": 7.319465160369873, "epoch": 1.4170996693434104, "grad_norm": 1027.4391137177338, "learning_rate": 4.501353102310901e-06, "logits": -0.7022644877433777, "logps": -74.45861053466797, "loss": 7.6466, "objective": 7.319465160369873, "ranking_idealized": 0.49791666865348816, "ranking_idealized_expo": 0.4970833361148834, "ranking_simple": 0.6225000023841858, "regularize": 7.319465160369873, "step": 500, "wo_beta": 5.078485488891602 }, { "epoch": 1.4170996693434104, "eval_dpo_loss": 20.265100479125977, "eval_logits": -0.7483307123184204, "eval_logps": -80.78058624267578, "eval_loss": 20.982105255126953, "eval_objective": 20.265100479125977, "eval_ranking_idealized": 0.5212215185165405, "eval_ranking_idealized_expo": 0.5212215185165405, "eval_ranking_simple": 0.532608687877655, "eval_regularize": 20.265100479125977, "eval_runtime": 307.6224, "eval_samples_per_second": 18.822, "eval_steps_per_second": 1.57, "eval_wo_beta": 6.882425785064697, "step": 500 }, { "dpo_loss": 6.880460739135742, "epoch": 1.5588096362777515, "grad_norm": 856.3100755197052, "learning_rate": 4.34319334202531e-06, "logits": -0.6065574884414673, "logps": -75.99507141113281, "loss": 6.9565, "objective": 6.880460739135742, "ranking_idealized": 0.5112500190734863, "ranking_idealized_expo": 0.5104166865348816, "ranking_simple": 0.6445833444595337, "regularize": 6.880460739135742, "step": 550, "wo_beta": 4.776731967926025 }, { "epoch": 1.5588096362777515, "eval_dpo_loss": 20.566144943237305, "eval_logits": -0.6148493885993958, "eval_logps": -80.20514678955078, "eval_loss": 21.350601196289062, "eval_objective": 20.566144943237305, "eval_ranking_idealized": 0.5212215185165405, "eval_ranking_idealized_expo": 0.5212215185165405, "eval_ranking_simple": 0.5383023023605347, "eval_regularize": 20.566144943237305, "eval_runtime": 311.6281, "eval_samples_per_second": 18.58, "eval_steps_per_second": 1.55, "eval_wo_beta": 6.651296138763428, "step": 550 }, { "dpo_loss": 6.715544700622559, "epoch": 1.7005196032120926, "grad_norm": 710.120603889053, "learning_rate": 4.16692250129073e-06, "logits": -0.4755525290966034, "logps": -75.72002410888672, "loss": 6.7183, "objective": 6.715544700622559, "ranking_idealized": 0.51541668176651, "ranking_idealized_expo": 0.5149999856948853, "ranking_simple": 0.637499988079071, "regularize": 6.715544700622559, "step": 600, "wo_beta": 4.843540668487549 }, { "epoch": 1.7005196032120926, "eval_dpo_loss": 20.002656936645508, "eval_logits": -0.606741189956665, "eval_logps": -78.53438568115234, "eval_loss": 21.126510620117188, "eval_objective": 20.002656936645508, "eval_ranking_idealized": 0.5212215185165405, "eval_ranking_idealized_expo": 0.5212215185165405, "eval_ranking_simple": 0.5367494821548462, "eval_regularize": 20.002656936645508, "eval_runtime": 307.6088, "eval_samples_per_second": 18.823, "eval_steps_per_second": 1.57, "eval_wo_beta": 6.676760673522949, "step": 600 }, { "dpo_loss": 7.323308944702148, "epoch": 1.8422295701464337, "grad_norm": 790.4876193704064, "learning_rate": 3.974272604254906e-06, "logits": -0.5003318190574646, "logps": -74.34846496582031, "loss": 6.9931, "objective": 7.323308944702148, "ranking_idealized": 0.5291666388511658, "ranking_idealized_expo": 0.527916669845581, "ranking_simple": 0.64083331823349, "regularize": 7.323308944702148, "step": 650, "wo_beta": 5.1112799644470215 }, { "epoch": 1.8422295701464337, "eval_dpo_loss": 21.445514678955078, "eval_logits": -0.5872498154640198, "eval_logps": -77.65087127685547, "eval_loss": 22.20830535888672, "eval_objective": 21.445514678955078, "eval_ranking_idealized": 0.5212215185165405, "eval_ranking_idealized_expo": 0.5212215185165405, "eval_ranking_simple": 0.5383023023605347, "eval_regularize": 21.445514678955078, "eval_runtime": 307.7733, "eval_samples_per_second": 18.813, "eval_steps_per_second": 1.569, "eval_wo_beta": 6.819047451019287, "step": 650 }, { "dpo_loss": 6.620248317718506, "epoch": 1.9839395370807746, "grad_norm": 688.8652687295252, "learning_rate": 3.767136614452458e-06, "logits": -0.40135031938552856, "logps": -73.09497833251953, "loss": 6.1685, "objective": 6.620248317718506, "ranking_idealized": 0.5129166841506958, "ranking_idealized_expo": 0.5108333230018616, "ranking_simple": 0.6358333230018616, "regularize": 6.620248317718506, "step": 700, "wo_beta": 5.023129463195801 }, { "epoch": 1.9839395370807746, "eval_dpo_loss": 21.55119514465332, "eval_logits": -0.5436362028121948, "eval_logps": -77.14934539794922, "eval_loss": 22.36069679260254, "eval_objective": 21.55119514465332, "eval_ranking_idealized": 0.5212215185165405, "eval_ranking_idealized_expo": 0.5212215185165405, "eval_ranking_simple": 0.5403726696968079, "eval_regularize": 21.55119514465332, "eval_runtime": 310.6834, "eval_samples_per_second": 18.636, "eval_steps_per_second": 1.555, "eval_wo_beta": 6.729911804199219, "step": 700 }, { "dpo_loss": 3.5603878498077393, "epoch": 2.1256495040151155, "grad_norm": 579.769858214478, "learning_rate": 3.547549834686222e-06, "logits": -0.5370141863822937, "logps": -73.9045639038086, "loss": 3.4811, "objective": 3.5603878498077393, "ranking_idealized": 0.5129166841506958, "ranking_idealized_expo": 0.5112500190734863, "ranking_simple": 0.6691666841506958, "regularize": 3.5603878498077393, "step": 750, "wo_beta": 4.534417152404785 }, { "epoch": 2.1256495040151155, "eval_dpo_loss": 21.137874603271484, "eval_logits": -0.7312601804733276, "eval_logps": -78.93118286132812, "eval_loss": 21.834890365600586, "eval_objective": 21.137874603271484, "eval_ranking_idealized": 0.5212215185165405, "eval_ranking_idealized_expo": 0.5212215185165405, "eval_ranking_simple": 0.542443037033081, "eval_regularize": 21.137874603271484, "eval_runtime": 307.6064, "eval_samples_per_second": 18.823, "eval_steps_per_second": 1.57, "eval_wo_beta": 6.821295261383057, "step": 750 }, { "dpo_loss": 3.4036636352539062, "epoch": 2.2673594709494567, "grad_norm": 573.1633009551587, "learning_rate": 3.3176699082935546e-06, "logits": -0.5852146148681641, "logps": -75.8536376953125, "loss": 3.3995, "objective": 3.4036636352539062, "ranking_idealized": 0.512499988079071, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.6625000238418579, "regularize": 3.4036636352539062, "step": 800, "wo_beta": 4.245257377624512 }, { "epoch": 2.2673594709494567, "eval_dpo_loss": 20.453168869018555, "eval_logits": -0.5475257635116577, "eval_logps": -79.71145629882812, "eval_loss": 21.353944778442383, "eval_objective": 20.453168869018555, "eval_ranking_idealized": 0.5212215185165405, "eval_ranking_idealized_expo": 0.5212215185165405, "eval_ranking_simple": 0.5362318754196167, "eval_regularize": 20.453168869018555, "eval_runtime": 307.5814, "eval_samples_per_second": 18.824, "eval_steps_per_second": 1.57, "eval_wo_beta": 6.686735153198242, "step": 800 }, { "epoch": 2.2673594709494567, "step": 800, "total_flos": 0.0, "train_loss": 7.659533626437187, "train_runtime": 21973.9049, "train_samples_per_second": 11.56, "train_steps_per_second": 0.08 } ], "logging_steps": 50, "max_steps": 1760, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }