{ "best_metric": 6.621464252471924, "best_model_checkpoint": "./qwen2.5-0.5b/qwen2.5-0.5b-expo-DPO-ES-100/checkpoint-700", "epoch": 2.69248937175248, "eval_steps": 50, "global_step": 950, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "dpo_loss": 0.6931471824645996, "epoch": 0.002834199338686821, "grad_norm": 18442.54709688657, "learning_rate": 2.840909090909091e-08, "logits": -1.359458565711975, "logps": -84.69721221923828, "loss": 0.6931, "objective": 0.6931471824645996, "ranking_idealized": 0.5833333134651184, "ranking_idealized_expo": 0.5833333134651184, "ranking_simple": 0.5833333134651184, "regularize": 0.6931471824645996, "step": 1, "wo_beta": 5.271125316619873 }, { "dpo_loss": 16.993518829345703, "epoch": 0.14170996693434104, "grad_norm": 20039.682390740076, "learning_rate": 1.4204545454545458e-06, "logits": -1.4496889114379883, "logps": -84.4629898071289, "loss": 17.3723, "objective": 16.993518829345703, "ranking_idealized": 0.5221088528633118, "ranking_idealized_expo": 0.5216836929321289, "ranking_simple": 0.5242347121238708, "regularize": 16.993518829345703, "step": 50, "wo_beta": 7.0965657234191895 }, { "epoch": 0.14170996693434104, "eval_dpo_loss": 31.485410690307617, "eval_logits": -1.4391247034072876, "eval_logps": -90.95195770263672, "eval_loss": 32.11253356933594, "eval_objective": 31.485410690307617, "eval_ranking_idealized": 0.5212215185165405, "eval_ranking_idealized_expo": 0.5212215185165405, "eval_ranking_simple": 0.5263975262641907, "eval_regularize": 31.485410690307617, "eval_runtime": 309.3922, "eval_samples_per_second": 18.714, "eval_steps_per_second": 1.561, "eval_wo_beta": 7.685111999511719, "step": 50 }, { "dpo_loss": 59.36994934082031, "epoch": 0.2834199338686821, "grad_norm": 16985.157671530273, "learning_rate": 2.8409090909090916e-06, "logits": -1.4624311923980713, "logps": -81.97602844238281, "loss": 60.4454, "objective": 59.36994934082031, "ranking_idealized": 0.5137500166893005, "ranking_idealized_expo": 0.5137500166893005, "ranking_simple": 0.5433333516120911, "regularize": 59.36994934082031, "step": 100, "wo_beta": 6.49465799331665 }, { "epoch": 0.2834199338686821, "eval_dpo_loss": 70.77193450927734, "eval_logits": -1.4385725259780884, "eval_logps": -86.5999984741211, "eval_loss": 70.99683380126953, "eval_objective": 70.77193450927734, "eval_ranking_idealized": 0.5212215185165405, "eval_ranking_idealized_expo": 0.5212215185165405, "eval_ranking_simple": 0.5305383205413818, "eval_regularize": 70.77193450927734, "eval_runtime": 308.6565, "eval_samples_per_second": 18.759, "eval_steps_per_second": 1.565, "eval_wo_beta": 7.528871059417725, "step": 100 }, { "dpo_loss": 101.89765930175781, "epoch": 0.42512990080302315, "grad_norm": 14194.684745919038, "learning_rate": 4.2613636363636365e-06, "logits": -1.386507511138916, "logps": -78.6588134765625, "loss": 100.2237, "objective": 101.89765930175781, "ranking_idealized": 0.527916669845581, "ranking_idealized_expo": 0.527916669845581, "ranking_simple": 0.5612499713897705, "regularize": 101.89765930175781, "step": 150, "wo_beta": 6.392862319946289 }, { "epoch": 0.42512990080302315, "eval_dpo_loss": 126.88445281982422, "eval_logits": -1.2892017364501953, "eval_logps": -85.73033905029297, "eval_loss": 129.89280700683594, "eval_objective": 126.88445281982422, "eval_ranking_idealized": 0.5212215185165405, "eval_ranking_idealized_expo": 0.5212215185165405, "eval_ranking_simple": 0.5320910811424255, "eval_regularize": 126.88445281982422, "eval_runtime": 308.7582, "eval_samples_per_second": 18.753, "eval_steps_per_second": 1.564, "eval_wo_beta": 7.464066505432129, "step": 150 }, { "dpo_loss": 119.16841125488281, "epoch": 0.5668398677373642, "grad_norm": 12833.152797223363, "learning_rate": 4.997168347957521e-06, "logits": -1.3282432556152344, "logps": -75.89098358154297, "loss": 120.8284, "objective": 119.16841125488281, "ranking_idealized": 0.51541668176651, "ranking_idealized_expo": 0.51541668176651, "ranking_simple": 0.5562499761581421, "regularize": 119.16841125488281, "step": 200, "wo_beta": 6.29520320892334 }, { "epoch": 0.5668398677373642, "eval_dpo_loss": 159.50131225585938, "eval_logits": -1.3194636106491089, "eval_logps": -75.5542221069336, "eval_loss": 164.0151824951172, "eval_objective": 159.50131225585938, "eval_ranking_idealized": 0.5212215185165405, "eval_ranking_idealized_expo": 0.5212215185165405, "eval_ranking_simple": 0.5357142686843872, "eval_regularize": 159.50131225585938, "eval_runtime": 308.6776, "eval_samples_per_second": 18.757, "eval_steps_per_second": 1.565, "eval_wo_beta": 7.183592319488525, "step": 200 }, { "dpo_loss": 134.0004119873047, "epoch": 0.7085498346717053, "grad_norm": 13188.097817084132, "learning_rate": 4.973122855144066e-06, "logits": -1.138743281364441, "logps": -72.49617004394531, "loss": 134.8217, "objective": 134.0004119873047, "ranking_idealized": 0.5162500143051147, "ranking_idealized_expo": 0.5162500143051147, "ranking_simple": 0.5795833468437195, "regularize": 134.0004119873047, "step": 250, "wo_beta": 6.357597827911377 }, { "epoch": 0.7085498346717053, "eval_dpo_loss": 190.8510284423828, "eval_logits": -1.2058484554290771, "eval_logps": -79.3890609741211, "eval_loss": 195.72117614746094, "eval_objective": 190.8510284423828, "eval_ranking_idealized": 0.5212215185165405, "eval_ranking_idealized_expo": 0.5212215185165405, "eval_ranking_simple": 0.5284678936004639, "eval_regularize": 190.8510284423828, "eval_runtime": 308.7239, "eval_samples_per_second": 18.755, "eval_steps_per_second": 1.565, "eval_wo_beta": 7.27105188369751, "step": 250 }, { "dpo_loss": 114.66310119628906, "epoch": 0.8502598016060463, "grad_norm": 8558.083204689276, "learning_rate": 4.924776641419513e-06, "logits": -1.090308666229248, "logps": -76.0093765258789, "loss": 119.0273, "objective": 114.66310119628906, "ranking_idealized": 0.4950000047683716, "ranking_idealized_expo": 0.4950000047683716, "ranking_simple": 0.5687500238418579, "regularize": 114.66310119628906, "step": 300, "wo_beta": 6.086948871612549 }, { "epoch": 0.8502598016060463, "eval_dpo_loss": 188.05804443359375, "eval_logits": -0.994479775428772, "eval_logps": -84.29710388183594, "eval_loss": 192.52310180664062, "eval_objective": 188.05804443359375, "eval_ranking_idealized": 0.5212215185165405, "eval_ranking_idealized_expo": 0.5212215185165405, "eval_ranking_simple": 0.5357142686843872, "eval_regularize": 188.05804443359375, "eval_runtime": 308.8243, "eval_samples_per_second": 18.749, "eval_steps_per_second": 1.564, "eval_wo_beta": 6.938241958618164, "step": 300 }, { "dpo_loss": 116.39104461669922, "epoch": 0.9919697685403873, "grad_norm": 14112.952284083942, "learning_rate": 4.8526047530778175e-06, "logits": -0.9387365579605103, "logps": -79.04769897460938, "loss": 114.0792, "objective": 116.39104461669922, "ranking_idealized": 0.5254166722297668, "ranking_idealized_expo": 0.5254166722297668, "ranking_simple": 0.5975000262260437, "regularize": 116.39104461669922, "step": 350, "wo_beta": 5.450421333312988 }, { "epoch": 0.9919697685403873, "eval_dpo_loss": 192.3919677734375, "eval_logits": -1.0045075416564941, "eval_logps": -82.11246490478516, "eval_loss": 205.77967834472656, "eval_objective": 192.3919677734375, "eval_ranking_idealized": 0.5212215185165405, "eval_ranking_idealized_expo": 0.5212215185165405, "eval_ranking_simple": 0.5408902764320374, "eval_regularize": 192.3919677734375, "eval_runtime": 308.4486, "eval_samples_per_second": 18.771, "eval_steps_per_second": 1.566, "eval_wo_beta": 6.923487663269043, "step": 350 }, { "dpo_loss": 73.98444366455078, "epoch": 1.1336797354747283, "grad_norm": 7172.9062533852075, "learning_rate": 4.757316345716554e-06, "logits": -0.8197985887527466, "logps": -76.91300201416016, "loss": 72.4145, "objective": 73.98444366455078, "ranking_idealized": 0.5320833325386047, "ranking_idealized_expo": 0.5320833325386047, "ranking_simple": 0.621666669845581, "regularize": 73.98444366455078, "step": 400, "wo_beta": 4.771157264709473 }, { "epoch": 1.1336797354747283, "eval_dpo_loss": 204.8121795654297, "eval_logits": -0.7119522094726562, "eval_logps": -82.81564331054688, "eval_loss": 212.66128540039062, "eval_objective": 204.8121795654297, "eval_ranking_idealized": 0.5212215185165405, "eval_ranking_idealized_expo": 0.5212215185165405, "eval_ranking_simple": 0.5408902764320374, "eval_regularize": 204.8121795654297, "eval_runtime": 308.465, "eval_samples_per_second": 18.77, "eval_steps_per_second": 1.566, "eval_wo_beta": 7.0484819412231445, "step": 400 }, { "dpo_loss": 81.11451721191406, "epoch": 1.2753897024090695, "grad_norm": 7257.334900105363, "learning_rate": 4.639847716126855e-06, "logits": -0.6066682934761047, "logps": -78.56853485107422, "loss": 76.9668, "objective": 81.11451721191406, "ranking_idealized": 0.5191666483879089, "ranking_idealized_expo": 0.5191666483879089, "ranking_simple": 0.6295833587646484, "regularize": 81.11451721191406, "step": 450, "wo_beta": 5.272921562194824 }, { "epoch": 1.2753897024090695, "eval_dpo_loss": 203.026123046875, "eval_logits": -0.7807133197784424, "eval_logps": -82.4189682006836, "eval_loss": 210.22909545898438, "eval_objective": 203.026123046875, "eval_ranking_idealized": 0.5212215185165405, "eval_ranking_idealized_expo": 0.5212215185165405, "eval_ranking_simple": 0.5383023023605347, "eval_regularize": 203.026123046875, "eval_runtime": 309.148, "eval_samples_per_second": 18.729, "eval_steps_per_second": 1.562, "eval_wo_beta": 6.924361705780029, "step": 450 }, { "dpo_loss": 76.8044204711914, "epoch": 1.4170996693434104, "grad_norm": 8969.209638335737, "learning_rate": 4.501353102310901e-06, "logits": -0.6817887425422668, "logps": -77.68151092529297, "loss": 77.9261, "objective": 76.8044204711914, "ranking_idealized": 0.4970833361148834, "ranking_idealized_expo": 0.4970833361148834, "ranking_simple": 0.6170833110809326, "regularize": 76.8044204711914, "step": 500, "wo_beta": 5.202561855316162 }, { "epoch": 1.4170996693434104, "eval_dpo_loss": 202.15687561035156, "eval_logits": -0.7438479661941528, "eval_logps": -81.37281036376953, "eval_loss": 211.31556701660156, "eval_objective": 202.15687561035156, "eval_ranking_idealized": 0.5212215185165405, "eval_ranking_idealized_expo": 0.5212215185165405, "eval_ranking_simple": 0.5362318754196167, "eval_regularize": 202.15687561035156, "eval_runtime": 308.8567, "eval_samples_per_second": 18.747, "eval_steps_per_second": 1.564, "eval_wo_beta": 6.886315822601318, "step": 500 }, { "dpo_loss": 70.9330062866211, "epoch": 1.5588096362777515, "grad_norm": 8040.484078292184, "learning_rate": 4.34319334202531e-06, "logits": -0.636380136013031, "logps": -77.03202056884766, "loss": 70.5755, "objective": 70.9330062866211, "ranking_idealized": 0.5104166865348816, "ranking_idealized_expo": 0.5104166865348816, "ranking_simple": 0.637499988079071, "regularize": 70.9330062866211, "step": 550, "wo_beta": 4.849801063537598 }, { "epoch": 1.5588096362777515, "eval_dpo_loss": 200.1410369873047, "eval_logits": -0.6838412880897522, "eval_logps": -82.32962799072266, "eval_loss": 212.6468048095703, "eval_objective": 200.1410369873047, "eval_ranking_idealized": 0.5212215185165405, "eval_ranking_idealized_expo": 0.5212215185165405, "eval_ranking_simple": 0.5429606437683105, "eval_regularize": 200.1410369873047, "eval_runtime": 311.5527, "eval_samples_per_second": 18.584, "eval_steps_per_second": 1.55, "eval_wo_beta": 6.724142551422119, "step": 550 }, { "dpo_loss": 65.89614868164062, "epoch": 1.7005196032120926, "grad_norm": 7186.548213944491, "learning_rate": 4.16692250129073e-06, "logits": -0.510294497013092, "logps": -78.14505004882812, "loss": 69.6026, "objective": 65.89614868164062, "ranking_idealized": 0.5149999856948853, "ranking_idealized_expo": 0.5149999856948853, "ranking_simple": 0.6416666507720947, "regularize": 65.89614868164062, "step": 600, "wo_beta": 4.862266540527344 }, { "epoch": 1.7005196032120926, "eval_dpo_loss": 196.96690368652344, "eval_logits": -0.5568598508834839, "eval_logps": -80.71286010742188, "eval_loss": 212.02540588378906, "eval_objective": 196.96690368652344, "eval_ranking_idealized": 0.5212215185165405, "eval_ranking_idealized_expo": 0.5212215185165405, "eval_ranking_simple": 0.5419254899024963, "eval_regularize": 196.96690368652344, "eval_runtime": 308.7949, "eval_samples_per_second": 18.75, "eval_steps_per_second": 1.564, "eval_wo_beta": 6.6975417137146, "step": 600 }, { "dpo_loss": 69.19684600830078, "epoch": 1.8422295701464337, "grad_norm": 7556.588020373631, "learning_rate": 3.974272604254906e-06, "logits": -0.5819191336631775, "logps": -75.77037048339844, "loss": 69.7829, "objective": 69.19684600830078, "ranking_idealized": 0.527916669845581, "ranking_idealized_expo": 0.527916669845581, "ranking_simple": 0.6395833492279053, "regularize": 69.19684600830078, "step": 650, "wo_beta": 5.048923969268799 }, { "epoch": 1.8422295701464337, "eval_dpo_loss": 209.67823791503906, "eval_logits": -0.7061935663223267, "eval_logps": -79.49684143066406, "eval_loss": 222.27664184570312, "eval_objective": 209.67823791503906, "eval_ranking_idealized": 0.5212215185165405, "eval_ranking_idealized_expo": 0.5212215185165405, "eval_ranking_simple": 0.5403726696968079, "eval_regularize": 209.67823791503906, "eval_runtime": 308.514, "eval_samples_per_second": 18.767, "eval_steps_per_second": 1.566, "eval_wo_beta": 6.654106140136719, "step": 650 }, { "dpo_loss": 60.93992233276367, "epoch": 1.9839395370807746, "grad_norm": 7674.886013172465, "learning_rate": 3.767136614452458e-06, "logits": -0.5295735597610474, "logps": -76.11310577392578, "loss": 62.7864, "objective": 60.93992233276367, "ranking_idealized": 0.5112500190734863, "ranking_idealized_expo": 0.5108333230018616, "ranking_simple": 0.6387500166893005, "regularize": 60.93992233276367, "step": 700, "wo_beta": 4.921832084655762 }, { "epoch": 1.9839395370807746, "eval_dpo_loss": 213.3031463623047, "eval_logits": -0.6268911957740784, "eval_logps": -80.2667236328125, "eval_loss": 226.3468017578125, "eval_objective": 213.3031463623047, "eval_ranking_idealized": 0.5212215185165405, "eval_ranking_idealized_expo": 0.5212215185165405, "eval_ranking_simple": 0.5398550629615784, "eval_regularize": 213.3031463623047, "eval_runtime": 312.7433, "eval_samples_per_second": 18.514, "eval_steps_per_second": 1.544, "eval_wo_beta": 6.621464252471924, "step": 700 }, { "dpo_loss": 37.5257682800293, "epoch": 2.1256495040151155, "grad_norm": 6013.443827898748, "learning_rate": 3.547549834686222e-06, "logits": -0.507068932056427, "logps": -76.31243133544922, "loss": 37.3326, "objective": 37.5257682800293, "ranking_idealized": 0.5112500190734863, "ranking_idealized_expo": 0.5112500190734863, "ranking_simple": 0.6583333611488342, "regularize": 37.5257682800293, "step": 750, "wo_beta": 4.600498676300049 }, { "epoch": 2.1256495040151155, "eval_dpo_loss": 208.872314453125, "eval_logits": -0.7007076144218445, "eval_logps": -80.56648254394531, "eval_loss": 219.77853393554688, "eval_objective": 208.872314453125, "eval_ranking_idealized": 0.5212215185165405, "eval_ranking_idealized_expo": 0.5212215185165405, "eval_ranking_simple": 0.5439958572387695, "eval_regularize": 208.872314453125, "eval_runtime": 308.6702, "eval_samples_per_second": 18.758, "eval_steps_per_second": 1.565, "eval_wo_beta": 6.726495265960693, "step": 750 }, { "dpo_loss": 32.680686950683594, "epoch": 2.2673594709494567, "grad_norm": 5530.199901679942, "learning_rate": 3.3176699082935546e-06, "logits": -0.5285750031471252, "logps": -77.53636932373047, "loss": 33.2099, "objective": 32.680686950683594, "ranking_idealized": 0.512499988079071, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.6654166579246521, "regularize": 32.680686950683594, "step": 800, "wo_beta": 4.228292465209961 }, { "epoch": 2.2673594709494567, "eval_dpo_loss": 207.68812561035156, "eval_logits": -0.5673097968101501, "eval_logps": -81.89009857177734, "eval_loss": 221.87864685058594, "eval_objective": 207.68812561035156, "eval_ranking_idealized": 0.5212215185165405, "eval_ranking_idealized_expo": 0.5212215185165405, "eval_ranking_simple": 0.5450310707092285, "eval_regularize": 207.68812561035156, "eval_runtime": 308.8922, "eval_samples_per_second": 18.744, "eval_steps_per_second": 1.564, "eval_wo_beta": 6.671654224395752, "step": 800 }, { "dpo_loss": 34.45829772949219, "epoch": 2.409069437883798, "grad_norm": 4586.247966907105, "learning_rate": 3.0797556183036582e-06, "logits": -0.49624624848365784, "logps": -79.02293395996094, "loss": 33.915, "objective": 34.45829772949219, "ranking_idealized": 0.5141666531562805, "ranking_idealized_expo": 0.5133333206176758, "ranking_simple": 0.6575000286102295, "regularize": 34.45829772949219, "step": 850, "wo_beta": 4.579156398773193 }, { "epoch": 2.409069437883798, "eval_dpo_loss": 205.05154418945312, "eval_logits": -0.5177903771400452, "eval_logps": -81.91341400146484, "eval_loss": 217.6955108642578, "eval_objective": 205.05154418945312, "eval_ranking_idealized": 0.5212215185165405, "eval_ranking_idealized_expo": 0.5212215185165405, "eval_ranking_simple": 0.542443037033081, "eval_regularize": 205.05154418945312, "eval_runtime": 308.6493, "eval_samples_per_second": 18.759, "eval_steps_per_second": 1.565, "eval_wo_beta": 6.724928855895996, "step": 850 }, { "dpo_loss": 39.46757888793945, "epoch": 2.550779404818139, "grad_norm": 5711.491250646742, "learning_rate": 2.8361446928038298e-06, "logits": -0.44471094012260437, "logps": -76.97750091552734, "loss": 35.3572, "objective": 39.46757888793945, "ranking_idealized": 0.5183333158493042, "ranking_idealized_expo": 0.5179166793823242, "ranking_simple": 0.6600000262260437, "regularize": 39.46757888793945, "step": 900, "wo_beta": 4.633584022521973 }, { "epoch": 2.550779404818139, "eval_dpo_loss": 214.5051727294922, "eval_logits": -0.47291529178619385, "eval_logps": -81.58795166015625, "eval_loss": 224.5401611328125, "eval_objective": 214.5051727294922, "eval_ranking_idealized": 0.5212215185165405, "eval_ranking_idealized_expo": 0.5212215185165405, "eval_ranking_simple": 0.54347825050354, "eval_regularize": 214.5051727294922, "eval_runtime": 308.5271, "eval_samples_per_second": 18.767, "eval_steps_per_second": 1.566, "eval_wo_beta": 6.827796936035156, "step": 900 }, { "dpo_loss": 30.270259857177734, "epoch": 2.69248937175248, "grad_norm": 4986.6651394423525, "learning_rate": 2.5892308345974517e-06, "logits": -0.46939340233802795, "logps": -76.20037841796875, "loss": 31.032, "objective": 30.270259857177734, "ranking_idealized": 0.5058333277702332, "ranking_idealized_expo": 0.5058333277702332, "ranking_simple": 0.6645833253860474, "regularize": 30.270259857177734, "step": 950, "wo_beta": 4.50759220123291 }, { "epoch": 2.69248937175248, "eval_dpo_loss": 216.58029174804688, "eval_logits": -0.55417799949646, "eval_logps": -80.34796905517578, "eval_loss": 225.29067993164062, "eval_objective": 216.58029174804688, "eval_ranking_idealized": 0.5212215185165405, "eval_ranking_idealized_expo": 0.5212215185165405, "eval_ranking_simple": 0.5419254899024963, "eval_regularize": 216.58029174804688, "eval_runtime": 309.3192, "eval_samples_per_second": 18.719, "eval_steps_per_second": 1.561, "eval_wo_beta": 6.8429484367370605, "step": 950 }, { "epoch": 2.69248937175248, "step": 950, "total_flos": 0.0, "train_loss": 70.38767440143384, "train_runtime": 26138.5983, "train_samples_per_second": 9.718, "train_steps_per_second": 0.067 } ], "logging_steps": 50, "max_steps": 1760, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }