{ "best_metric": 14.316285133361816, "best_model_checkpoint": "./qwen2.5-0.5b/qwen2.5-0.5b-expo-L2EXPO-ES-0.1/checkpoint-800", "epoch": 2.975909305621162, "eval_steps": 50, "global_step": 1050, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "dpo_loss": 0.6931471824645996, "epoch": 0.002834199338686821, "grad_norm": 36.88507599678088, "learning_rate": 2.840909090909091e-08, "logits": -1.359458565711975, "logps": -84.69721221923828, "loss": 0.3913, "objective": 0.3618059456348419, "ranking_idealized": 0.6458333134651184, "ranking_idealized_expo": 0.5833333134651184, "ranking_simple": 0.5833333134651184, "regularize": 0.3618059456348419, "step": 1, "wo_beta": 14.830931663513184 }, { "dpo_loss": 0.6800611615180969, "epoch": 0.14170996693434104, "grad_norm": 34.65452784204521, "learning_rate": 1.4204545454545458e-06, "logits": -1.481619119644165, "logps": -83.80532836914062, "loss": 0.4017, "objective": 0.4062296152114868, "ranking_idealized": 0.608418345451355, "ranking_idealized_expo": 0.5229591727256775, "ranking_simple": 0.5250850319862366, "regularize": 0.4062296152114868, "step": 50, "wo_beta": 15.713354110717773 }, { "epoch": 0.14170996693434104, "eval_dpo_loss": 0.6868039965629578, "eval_logits": -1.5024017095565796, "eval_logps": -93.17259979248047, "eval_loss": 0.4164615273475647, "eval_objective": 0.41487643122673035, "eval_ranking_idealized": 0.6030020713806152, "eval_ranking_idealized_expo": 0.5222567319869995, "eval_ranking_simple": 0.5258799195289612, "eval_regularize": 0.41487643122673035, "eval_runtime": 308.9601, "eval_samples_per_second": 18.74, "eval_steps_per_second": 1.563, "eval_wo_beta": 16.426729202270508, "step": 50 }, { "dpo_loss": 0.6331456303596497, "epoch": 0.2834199338686821, "grad_norm": 29.48988078429707, "learning_rate": 2.8409090909090916e-06, "logits": -1.514231562614441, "logps": -84.27212524414062, "loss": 0.3777, "objective": 0.38673925399780273, "ranking_idealized": 0.6016666889190674, "ranking_idealized_expo": 0.5141666531562805, "ranking_simple": 0.5520833134651184, "regularize": 0.38673925399780273, "step": 100, "wo_beta": 15.562942504882812 }, { "epoch": 0.2834199338686821, "eval_dpo_loss": 0.6818161606788635, "eval_logits": -1.4774748086929321, "eval_logps": -92.86526489257812, "eval_loss": 0.4359625577926636, "eval_objective": 0.4269382953643799, "eval_ranking_idealized": 0.6030020713806152, "eval_ranking_idealized_expo": 0.5222567319869995, "eval_ranking_simple": 0.531573474407196, "eval_regularize": 0.4269382953643799, "eval_runtime": 308.6402, "eval_samples_per_second": 18.76, "eval_steps_per_second": 1.565, "eval_wo_beta": 16.243934631347656, "step": 100 }, { "dpo_loss": 0.6157870292663574, "epoch": 0.42512990080302315, "grad_norm": 23.64962148842917, "learning_rate": 4.2613636363636365e-06, "logits": -1.363812804222107, "logps": -83.03893280029297, "loss": 0.4057, "objective": 0.4109911620616913, "ranking_idealized": 0.6066666841506958, "ranking_idealized_expo": 0.5287500023841858, "ranking_simple": 0.5679166913032532, "regularize": 0.4109911620616913, "step": 150, "wo_beta": 15.67545223236084 }, { "epoch": 0.42512990080302315, "eval_dpo_loss": 0.6897013783454895, "eval_logits": -1.2946008443832397, "eval_logps": -84.17744445800781, "eval_loss": 0.49110475182533264, "eval_objective": 0.48045814037323, "eval_ranking_idealized": 0.6030020713806152, "eval_ranking_idealized_expo": 0.5222567319869995, "eval_ranking_simple": 0.5383023023605347, "eval_regularize": 0.48045814037323, "eval_runtime": 312.8899, "eval_samples_per_second": 18.505, "eval_steps_per_second": 1.544, "eval_wo_beta": 15.630563735961914, "step": 150 }, { "dpo_loss": 0.6037490963935852, "epoch": 0.5668398677373642, "grad_norm": 20.792422526564724, "learning_rate": 4.997168347957521e-06, "logits": -1.1515488624572754, "logps": -78.54210662841797, "loss": 0.4475, "objective": 0.4344017505645752, "ranking_idealized": 0.5924999713897705, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.5537499785423279, "regularize": 0.4344017505645752, "step": 200, "wo_beta": 15.705690383911133 }, { "epoch": 0.5668398677373642, "eval_dpo_loss": 0.7102847695350647, "eval_logits": -0.9896814823150635, "eval_logps": -89.7341537475586, "eval_loss": 0.5660186409950256, "eval_objective": 0.5515478253364563, "eval_ranking_idealized": 0.6030020713806152, "eval_ranking_idealized_expo": 0.5222567319869995, "eval_ranking_simple": 0.531573474407196, "eval_regularize": 0.5515478253364563, "eval_runtime": 307.7447, "eval_samples_per_second": 18.814, "eval_steps_per_second": 1.569, "eval_wo_beta": 15.128002166748047, "step": 200 }, { "dpo_loss": 0.6019502878189087, "epoch": 0.7085498346717053, "grad_norm": 19.149849902460005, "learning_rate": 4.973122855144066e-06, "logits": -0.9062835574150085, "logps": -77.80750274658203, "loss": 0.455, "objective": 0.4496636390686035, "ranking_idealized": 0.5991666913032532, "ranking_idealized_expo": 0.5170833468437195, "ranking_simple": 0.5724999904632568, "regularize": 0.4496636390686035, "step": 250, "wo_beta": 16.00295639038086 }, { "epoch": 0.7085498346717053, "eval_dpo_loss": 0.7171492576599121, "eval_logits": -1.0032674074172974, "eval_logps": -78.19169616699219, "eval_loss": 0.5978298783302307, "eval_objective": 0.5822399258613586, "eval_ranking_idealized": 0.6030020713806152, "eval_ranking_idealized_expo": 0.5222567319869995, "eval_ranking_simple": 0.5310559272766113, "eval_regularize": 0.5822399258613586, "eval_runtime": 309.2921, "eval_samples_per_second": 18.72, "eval_steps_per_second": 1.562, "eval_wo_beta": 14.676263809204102, "step": 250 }, { "dpo_loss": 0.5993608236312866, "epoch": 0.8502598016060463, "grad_norm": 17.89888160897824, "learning_rate": 4.924776641419513e-06, "logits": -0.8504629731178284, "logps": -74.18943786621094, "loss": 0.4337, "objective": 0.42672449350357056, "ranking_idealized": 0.5799999833106995, "ranking_idealized_expo": 0.4970833361148834, "ranking_simple": 0.5529166460037231, "regularize": 0.42672449350357056, "step": 300, "wo_beta": 15.642317771911621 }, { "epoch": 0.8502598016060463, "eval_dpo_loss": 0.710507333278656, "eval_logits": -0.6760910153388977, "eval_logps": -78.8918228149414, "eval_loss": 0.5993344783782959, "eval_objective": 0.5779486894607544, "eval_ranking_idealized": 0.6030020713806152, "eval_ranking_idealized_expo": 0.5222567319869995, "eval_ranking_simple": 0.5300207138061523, "eval_regularize": 0.5779486894607544, "eval_runtime": 308.8688, "eval_samples_per_second": 18.746, "eval_steps_per_second": 1.564, "eval_wo_beta": 14.919622421264648, "step": 300 }, { "dpo_loss": 0.5866905450820923, "epoch": 0.9919697685403873, "grad_norm": 18.680005245459032, "learning_rate": 4.8526047530778175e-06, "logits": -0.6557392477989197, "logps": -72.1249008178711, "loss": 0.4039, "objective": 0.3963530361652374, "ranking_idealized": 0.60916668176651, "ranking_idealized_expo": 0.5270833373069763, "ranking_simple": 0.5874999761581421, "regularize": 0.3963530361652374, "step": 350, "wo_beta": 15.802534103393555 }, { "epoch": 0.9919697685403873, "eval_dpo_loss": 0.707767128944397, "eval_logits": -0.7968087792396545, "eval_logps": -75.1519775390625, "eval_loss": 0.5977659821510315, "eval_objective": 0.5765314102172852, "eval_ranking_idealized": 0.6030020713806152, "eval_ranking_idealized_expo": 0.5222567319869995, "eval_ranking_simple": 0.5289855003356934, "eval_regularize": 0.5765314102172852, "eval_runtime": 309.2048, "eval_samples_per_second": 18.725, "eval_steps_per_second": 1.562, "eval_wo_beta": 14.653112411499023, "step": 350 }, { "dpo_loss": 0.571822464466095, "epoch": 1.1336797354747283, "grad_norm": 17.758722724655755, "learning_rate": 4.757316345716554e-06, "logits": -0.6229808926582336, "logps": -71.7779541015625, "loss": 0.3729, "objective": 0.38071343302726746, "ranking_idealized": 0.6087499856948853, "ranking_idealized_expo": 0.5337499976158142, "ranking_simple": 0.596666693687439, "regularize": 0.38071343302726746, "step": 400, "wo_beta": 15.872475624084473 }, { "epoch": 1.1336797354747283, "eval_dpo_loss": 0.7153333425521851, "eval_logits": -0.5569362044334412, "eval_logps": -75.14326477050781, "eval_loss": 0.6180254817008972, "eval_objective": 0.6000439524650574, "eval_ranking_idealized": 0.6030020713806152, "eval_ranking_idealized_expo": 0.5222567319869995, "eval_ranking_simple": 0.522774338722229, "eval_regularize": 0.6000439524650574, "eval_runtime": 308.681, "eval_samples_per_second": 18.757, "eval_steps_per_second": 1.565, "eval_wo_beta": 14.647075653076172, "step": 400 }, { "dpo_loss": 0.5686503052711487, "epoch": 1.2753897024090695, "grad_norm": 17.208572847816768, "learning_rate": 4.639847716126855e-06, "logits": -0.558698296546936, "logps": -71.48841094970703, "loss": 0.3454, "objective": 0.34283894300460815, "ranking_idealized": 0.5975000262260437, "ranking_idealized_expo": 0.5199999809265137, "ranking_simple": 0.59375, "regularize": 0.34283894300460815, "step": 450, "wo_beta": 16.339805603027344 }, { "epoch": 1.2753897024090695, "eval_dpo_loss": 0.716464638710022, "eval_logits": -0.6214241981506348, "eval_logps": -76.22888946533203, "eval_loss": 0.6315773725509644, "eval_objective": 0.6131163239479065, "eval_ranking_idealized": 0.6030020713806152, "eval_ranking_idealized_expo": 0.5222567319869995, "eval_ranking_simple": 0.533643901348114, "eval_regularize": 0.6131163239479065, "eval_runtime": 309.544, "eval_samples_per_second": 18.705, "eval_steps_per_second": 1.56, "eval_wo_beta": 14.503443717956543, "step": 450 }, { "dpo_loss": 0.5579800605773926, "epoch": 1.4170996693434104, "grad_norm": 17.247428143304518, "learning_rate": 4.501353102310901e-06, "logits": -0.5506080985069275, "logps": -72.05484008789062, "loss": 0.3226, "objective": 0.321167916059494, "ranking_idealized": 0.57833331823349, "ranking_idealized_expo": 0.4983333349227905, "ranking_simple": 0.5724999904632568, "regularize": 0.321167916059494, "step": 500, "wo_beta": 15.88575553894043 }, { "epoch": 1.4170996693434104, "eval_dpo_loss": 0.7203696370124817, "eval_logits": -0.5608097910881042, "eval_logps": -77.60398864746094, "eval_loss": 0.6255373358726501, "eval_objective": 0.6084341406822205, "eval_ranking_idealized": 0.6030020713806152, "eval_ranking_idealized_expo": 0.5222567319869995, "eval_ranking_simple": 0.5284678936004639, "eval_regularize": 0.6084341406822205, "eval_runtime": 308.7073, "eval_samples_per_second": 18.756, "eval_steps_per_second": 1.565, "eval_wo_beta": 14.499795913696289, "step": 500 }, { "dpo_loss": 0.5586966872215271, "epoch": 1.5588096362777515, "grad_norm": 16.23813175362919, "learning_rate": 4.34319334202531e-06, "logits": -0.5342339873313904, "logps": -74.90955352783203, "loss": 0.3133, "objective": 0.31819403171539307, "ranking_idealized": 0.5945833325386047, "ranking_idealized_expo": 0.5116666555404663, "ranking_simple": 0.5854166746139526, "regularize": 0.31819403171539307, "step": 550, "wo_beta": 15.694311141967773 }, { "epoch": 1.5588096362777515, "eval_dpo_loss": 0.7138590216636658, "eval_logits": -0.6736307144165039, "eval_logps": -78.62907409667969, "eval_loss": 0.6281688809394836, "eval_objective": 0.613820493221283, "eval_ranking_idealized": 0.6030020713806152, "eval_ranking_idealized_expo": 0.5222567319869995, "eval_ranking_simple": 0.533643901348114, "eval_regularize": 0.613820493221283, "eval_runtime": 309.2677, "eval_samples_per_second": 18.722, "eval_steps_per_second": 1.562, "eval_wo_beta": 14.406906127929688, "step": 550 }, { "dpo_loss": 0.5538429021835327, "epoch": 1.7005196032120926, "grad_norm": 16.284668340767176, "learning_rate": 4.16692250129073e-06, "logits": -0.5140345692634583, "logps": -74.2342758178711, "loss": 0.2944, "objective": 0.29470422863960266, "ranking_idealized": 0.6004166603088379, "ranking_idealized_expo": 0.51583331823349, "ranking_simple": 0.5895833373069763, "regularize": 0.29470422863960266, "step": 600, "wo_beta": 15.488865852355957 }, { "epoch": 1.7005196032120926, "eval_dpo_loss": 0.717461884021759, "eval_logits": -0.5620033740997314, "eval_logps": -78.91792297363281, "eval_loss": 0.6321352124214172, "eval_objective": 0.6138916015625, "eval_ranking_idealized": 0.6030020713806152, "eval_ranking_idealized_expo": 0.5222567319869995, "eval_ranking_simple": 0.5357142686843872, "eval_regularize": 0.6138916015625, "eval_runtime": 308.6792, "eval_samples_per_second": 18.757, "eval_steps_per_second": 1.565, "eval_wo_beta": 14.614200592041016, "step": 600 }, { "dpo_loss": 0.5554340481758118, "epoch": 1.8422295701464337, "grad_norm": 15.91791899711329, "learning_rate": 3.974272604254906e-06, "logits": -0.5311375260353088, "logps": -74.39502716064453, "loss": 0.2915, "objective": 0.28872814774513245, "ranking_idealized": 0.6058333516120911, "ranking_idealized_expo": 0.5295833349227905, "ranking_simple": 0.5991666913032532, "regularize": 0.28872814774513245, "step": 650, "wo_beta": 16.528623580932617 }, { "epoch": 1.8422295701464337, "eval_dpo_loss": 0.7137619256973267, "eval_logits": -0.702060341835022, "eval_logps": -77.44371795654297, "eval_loss": 0.6321162581443787, "eval_objective": 0.6157041788101196, "eval_ranking_idealized": 0.6030020713806152, "eval_ranking_idealized_expo": 0.5222567319869995, "eval_ranking_simple": 0.5367494821548462, "eval_regularize": 0.6157041788101196, "eval_runtime": 309.0389, "eval_samples_per_second": 18.736, "eval_steps_per_second": 1.563, "eval_wo_beta": 14.385796546936035, "step": 650 }, { "dpo_loss": 0.5520748496055603, "epoch": 1.9839395370807746, "grad_norm": 15.323602142583136, "learning_rate": 3.767136614452458e-06, "logits": -0.5264750719070435, "logps": -75.2638931274414, "loss": 0.2675, "objective": 0.2728944420814514, "ranking_idealized": 0.5954166650772095, "ranking_idealized_expo": 0.5129166841506958, "ranking_simple": 0.5883333086967468, "regularize": 0.2728944420814514, "step": 700, "wo_beta": 15.999488830566406 }, { "epoch": 1.9839395370807746, "eval_dpo_loss": 0.7185091972351074, "eval_logits": -0.561150312423706, "eval_logps": -79.35997009277344, "eval_loss": 0.6386255621910095, "eval_objective": 0.6233482956886292, "eval_ranking_idealized": 0.6030020713806152, "eval_ranking_idealized_expo": 0.5222567319869995, "eval_ranking_simple": 0.5289855003356934, "eval_regularize": 0.6233482956886292, "eval_runtime": 309.5574, "eval_samples_per_second": 18.704, "eval_steps_per_second": 1.56, "eval_wo_beta": 14.317137718200684, "step": 700 }, { "dpo_loss": 0.5413146615028381, "epoch": 2.1256495040151155, "grad_norm": 14.916489496350042, "learning_rate": 3.547549834686222e-06, "logits": -0.5209631323814392, "logps": -75.42438507080078, "loss": 0.2415, "objective": 0.23713654279708862, "ranking_idealized": 0.6066666841506958, "ranking_idealized_expo": 0.5133333206176758, "ranking_simple": 0.597083330154419, "regularize": 0.23713654279708862, "step": 750, "wo_beta": 15.934895515441895 }, { "epoch": 2.1256495040151155, "eval_dpo_loss": 0.7177355885505676, "eval_logits": -0.6173678636550903, "eval_logps": -80.0989761352539, "eval_loss": 0.6405187249183655, "eval_objective": 0.6263132095336914, "eval_ranking_idealized": 0.6030020713806152, "eval_ranking_idealized_expo": 0.5222567319869995, "eval_ranking_simple": 0.534679114818573, "eval_regularize": 0.6263132095336914, "eval_runtime": 309.3835, "eval_samples_per_second": 18.715, "eval_steps_per_second": 1.561, "eval_wo_beta": 14.430180549621582, "step": 750 }, { "dpo_loss": 0.5419275760650635, "epoch": 2.2673594709494567, "grad_norm": 15.335797694469315, "learning_rate": 3.3176699082935546e-06, "logits": -0.5584273338317871, "logps": -76.04695129394531, "loss": 0.2263, "objective": 0.223616361618042, "ranking_idealized": 0.5941666960716248, "ranking_idealized_expo": 0.5129166841506958, "ranking_simple": 0.5866666436195374, "regularize": 0.223616361618042, "step": 800, "wo_beta": 15.526464462280273 }, { "epoch": 2.2673594709494567, "eval_dpo_loss": 0.7205542922019958, "eval_logits": -0.5665243268013, "eval_logps": -79.37840270996094, "eval_loss": 0.6457626223564148, "eval_objective": 0.6297247409820557, "eval_ranking_idealized": 0.6030020713806152, "eval_ranking_idealized_expo": 0.5222567319869995, "eval_ranking_simple": 0.534679114818573, "eval_regularize": 0.6297247409820557, "eval_runtime": 308.5045, "eval_samples_per_second": 18.768, "eval_steps_per_second": 1.566, "eval_wo_beta": 14.316285133361816, "step": 800 }, { "dpo_loss": 0.539085865020752, "epoch": 2.409069437883798, "grad_norm": 15.300324456571563, "learning_rate": 3.0797556183036582e-06, "logits": -0.5494623780250549, "logps": -75.91812133789062, "loss": 0.2148, "objective": 0.2133045643568039, "ranking_idealized": 0.5979166626930237, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.5879166722297668, "regularize": 0.2133045643568039, "step": 850, "wo_beta": 15.76329231262207 }, { "epoch": 2.409069437883798, "eval_dpo_loss": 0.7192490100860596, "eval_logits": -0.5793017148971558, "eval_logps": -79.08055114746094, "eval_loss": 0.6435712575912476, "eval_objective": 0.6275891661643982, "eval_ranking_idealized": 0.6030020713806152, "eval_ranking_idealized_expo": 0.5222567319869995, "eval_ranking_simple": 0.5362318754196167, "eval_regularize": 0.6275891661643982, "eval_runtime": 312.2924, "eval_samples_per_second": 18.54, "eval_steps_per_second": 1.547, "eval_wo_beta": 14.426342010498047, "step": 850 }, { "dpo_loss": 0.5431251525878906, "epoch": 2.550779404818139, "grad_norm": 15.744993696603856, "learning_rate": 2.8361446928038298e-06, "logits": -0.5151351690292358, "logps": -75.99987030029297, "loss": 0.1993, "objective": 0.1999633014202118, "ranking_idealized": 0.5887500047683716, "ranking_idealized_expo": 0.518750011920929, "ranking_simple": 0.5899999737739563, "regularize": 0.1999633014202118, "step": 900, "wo_beta": 16.303361892700195 }, { "epoch": 2.550779404818139, "eval_dpo_loss": 0.7217252850532532, "eval_logits": -0.5620540976524353, "eval_logps": -80.38152313232422, "eval_loss": 0.6453951001167297, "eval_objective": 0.6301912665367126, "eval_ranking_idealized": 0.6030020713806152, "eval_ranking_idealized_expo": 0.5222567319869995, "eval_ranking_simple": 0.5341615080833435, "eval_regularize": 0.6301912665367126, "eval_runtime": 308.7307, "eval_samples_per_second": 18.754, "eval_steps_per_second": 1.564, "eval_wo_beta": 14.44913387298584, "step": 900 }, { "dpo_loss": 0.5340785980224609, "epoch": 2.69248937175248, "grad_norm": 15.888338408049977, "learning_rate": 2.5892308345974517e-06, "logits": -0.5341619253158569, "logps": -76.2621841430664, "loss": 0.1887, "objective": 0.1899857223033905, "ranking_idealized": 0.5975000262260437, "ranking_idealized_expo": 0.5087500214576721, "ranking_simple": 0.5933333039283752, "regularize": 0.1899857223033905, "step": 950, "wo_beta": 16.227678298950195 }, { "epoch": 2.69248937175248, "eval_dpo_loss": 0.7204239964485168, "eval_logits": -0.6216442584991455, "eval_logps": -79.14459228515625, "eval_loss": 0.6443176865577698, "eval_objective": 0.6274449825286865, "eval_ranking_idealized": 0.6030020713806152, "eval_ranking_idealized_expo": 0.5222567319869995, "eval_ranking_simple": 0.533643901348114, "eval_regularize": 0.6274449825286865, "eval_runtime": 309.1927, "eval_samples_per_second": 18.726, "eval_steps_per_second": 1.562, "eval_wo_beta": 14.318567276000977, "step": 950 }, { "dpo_loss": 0.5356190204620361, "epoch": 2.8341993386868207, "grad_norm": 14.415639648177313, "learning_rate": 2.341440200858589e-06, "logits": -0.5420577526092529, "logps": -74.890869140625, "loss": 0.1764, "objective": 0.17968998849391937, "ranking_idealized": 0.6020833253860474, "ranking_idealized_expo": 0.5104166865348816, "ranking_simple": 0.590416669845581, "regularize": 0.17968998849391937, "step": 1000, "wo_beta": 15.519268035888672 }, { "epoch": 2.8341993386868207, "eval_dpo_loss": 0.7200449705123901, "eval_logits": -0.6086606383323669, "eval_logps": -79.77206420898438, "eval_loss": 0.639886736869812, "eval_objective": 0.6246000528335571, "eval_ranking_idealized": 0.6030020713806152, "eval_ranking_idealized_expo": 0.5222567319869995, "eval_ranking_simple": 0.533643901348114, "eval_regularize": 0.6246000528335571, "eval_runtime": 309.9286, "eval_samples_per_second": 18.682, "eval_steps_per_second": 1.558, "eval_wo_beta": 14.450177192687988, "step": 1000 }, { "dpo_loss": 0.5345789194107056, "epoch": 2.975909305621162, "grad_norm": 14.911688792125858, "learning_rate": 2.0952075638923656e-06, "logits": -0.5775164365768433, "logps": -75.94026184082031, "loss": 0.163, "objective": 0.162851020693779, "ranking_idealized": 0.6075000166893005, "ranking_idealized_expo": 0.5179166793823242, "ranking_simple": 0.6041666865348816, "regularize": 0.162851020693779, "step": 1050, "wo_beta": 16.27153205871582 }, { "epoch": 2.975909305621162, "eval_dpo_loss": 0.721105694770813, "eval_logits": -0.6067584156990051, "eval_logps": -79.58184814453125, "eval_loss": 0.6428102850914001, "eval_objective": 0.6266354322433472, "eval_ranking_idealized": 0.6030020713806152, "eval_ranking_idealized_expo": 0.5222567319869995, "eval_ranking_simple": 0.531573474407196, "eval_regularize": 0.6266354322433472, "eval_runtime": 308.0819, "eval_samples_per_second": 18.794, "eval_steps_per_second": 1.568, "eval_wo_beta": 14.340644836425781, "step": 1050 }, { "epoch": 2.975909305621162, "step": 1050, "total_flos": 0.0, "train_loss": 0.3115594020343962, "train_runtime": 28931.7373, "train_samples_per_second": 8.78, "train_steps_per_second": 0.061 } ], "logging_steps": 50, "max_steps": 1760, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }