{ "best_metric": 14.046432495117188, "best_model_checkpoint": "./qwen2.5-0.5b/qwen2.5-0.5b-expo-L2EXPO-ES-100/checkpoint-550", "epoch": 2.9806329711856403, "eval_steps": 50, "global_step": 1050, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "dpo_loss": 0.6931471824645996, "epoch": 0.002834199338686821, "grad_norm": 36884.87916049903, "learning_rate": 2.840909090909091e-08, "logits": -1.359458565711975, "logps": -84.69721221923828, "loss": 0.3913, "objective": 0.3618059456348419, "ranking_idealized": 0.5833333134651184, "ranking_idealized_expo": 0.5833333134651184, "ranking_simple": 0.5833333134651184, "regularize": 0.3618059456348419, "step": 1, "wo_beta": 14.830931663513184 }, { "dpo_loss": 20.195844650268555, "epoch": 0.14170996693434104, "grad_norm": 35665.35173471636, "learning_rate": 1.4204545454545458e-06, "logits": -1.4575351476669312, "logps": -84.27513122558594, "loss": 43.2587, "objective": 41.916500091552734, "ranking_idealized": 0.5221088528633118, "ranking_idealized_expo": 0.5216836929321289, "ranking_simple": 0.5216836929321289, "regularize": 41.916500091552734, "step": 50, "wo_beta": 15.635692596435547 }, { "epoch": 0.14170996693434104, "eval_dpo_loss": 26.447525024414062, "eval_logits": -1.4447709321975708, "eval_logps": -90.52921295166016, "eval_loss": 52.66217041015625, "eval_objective": 53.697696685791016, "eval_ranking_idealized": 0.5212215185165405, "eval_ranking_idealized_expo": 0.5212215185165405, "eval_ranking_simple": 0.5263975262641907, "eval_regularize": 53.697696685791016, "eval_runtime": 309.407, "eval_samples_per_second": 18.713, "eval_steps_per_second": 1.561, "eval_wo_beta": 16.170011520385742, "step": 50 }, { "dpo_loss": 89.97029876708984, "epoch": 0.2834199338686821, "grad_norm": 28235.60144716246, "learning_rate": 2.8409090909090916e-06, "logits": -1.398974061012268, "logps": -82.89569091796875, "loss": 169.8852, "objective": 172.36553955078125, "ranking_idealized": 0.5137500166893005, "ranking_idealized_expo": 0.5137500166893005, "ranking_simple": 0.51583331823349, "regularize": 172.36553955078125, "step": 100, "wo_beta": 15.30754566192627 }, { "epoch": 0.2834199338686821, "eval_dpo_loss": 85.763916015625, "eval_logits": -1.3620884418487549, "eval_logps": -85.2786636352539, "eval_loss": 173.986083984375, "eval_objective": 172.1890869140625, "eval_ranking_idealized": 0.5212215185165405, "eval_ranking_idealized_expo": 0.5212215185165405, "eval_ranking_simple": 0.5243270993232727, "eval_regularize": 172.1890869140625, "eval_runtime": 308.6643, "eval_samples_per_second": 18.758, "eval_steps_per_second": 1.565, "eval_wo_beta": 15.439105033874512, "step": 100 }, { "dpo_loss": 149.43614196777344, "epoch": 0.42512990080302315, "grad_norm": 21173.096618846714, "learning_rate": 4.2613636363636365e-06, "logits": -1.213483214378357, "logps": -78.55652618408203, "loss": 285.0432, "objective": 280.7228088378906, "ranking_idealized": 0.527916669845581, "ranking_idealized_expo": 0.527916669845581, "ranking_simple": 0.5229166746139526, "regularize": 280.7228088378906, "step": 150, "wo_beta": 15.080223083496094 }, { "epoch": 0.42512990080302315, "eval_dpo_loss": 143.02996826171875, "eval_logits": -1.1693531274795532, "eval_logps": -83.2181167602539, "eval_loss": 291.4833679199219, "eval_objective": 293.4403991699219, "eval_ranking_idealized": 0.5212215185165405, "eval_ranking_idealized_expo": 0.5212215185165405, "eval_ranking_simple": 0.5279502868652344, "eval_regularize": 293.4403991699219, "eval_runtime": 311.5234, "eval_samples_per_second": 18.586, "eval_steps_per_second": 1.55, "eval_wo_beta": 15.222454071044922, "step": 150 }, { "dpo_loss": 181.66571044921875, "epoch": 0.5668398677373642, "grad_norm": 20619.52737873687, "learning_rate": 4.997168347957521e-06, "logits": -0.9392554759979248, "logps": -78.61503601074219, "loss": 355.4066, "objective": 356.2313232421875, "ranking_idealized": 0.51541668176651, "ranking_idealized_expo": 0.51541668176651, "ranking_simple": 0.5191666483879089, "regularize": 356.2313232421875, "step": 200, "wo_beta": 15.349950790405273 }, { "epoch": 0.5668398677373642, "eval_dpo_loss": 189.846923828125, "eval_logits": -0.9273601174354553, "eval_logps": -84.03199768066406, "eval_loss": 372.7905578613281, "eval_objective": 365.21240234375, "eval_ranking_idealized": 0.5212215185165405, "eval_ranking_idealized_expo": 0.5212215185165405, "eval_ranking_simple": 0.5232919454574585, "eval_regularize": 365.21240234375, "eval_runtime": 308.8911, "eval_samples_per_second": 18.744, "eval_steps_per_second": 1.564, "eval_wo_beta": 14.868427276611328, "step": 200 }, { "dpo_loss": 186.671142578125, "epoch": 0.7085498346717053, "grad_norm": 17849.874243754268, "learning_rate": 4.973122855144066e-06, "logits": -0.822012186050415, "logps": -76.92431640625, "loss": 368.9811, "objective": 366.2552490234375, "ranking_idealized": 0.5162500143051147, "ranking_idealized_expo": 0.5162500143051147, "ranking_simple": 0.5074999928474426, "regularize": 366.2552490234375, "step": 250, "wo_beta": 15.5939359664917 }, { "epoch": 0.7085498346717053, "eval_dpo_loss": 216.4584197998047, "eval_logits": -0.7745867967605591, "eval_logps": -81.50496673583984, "eval_loss": 446.6966247558594, "eval_objective": 442.3320617675781, "eval_ranking_idealized": 0.5212215185165405, "eval_ranking_idealized_expo": 0.5212215185165405, "eval_ranking_simple": 0.5258799195289612, "eval_regularize": 442.3320617675781, "eval_runtime": 308.996, "eval_samples_per_second": 18.738, "eval_steps_per_second": 1.563, "eval_wo_beta": 14.479047775268555, "step": 250 }, { "dpo_loss": 182.8459930419922, "epoch": 0.8502598016060463, "grad_norm": 16846.286343055544, "learning_rate": 4.924776641419513e-06, "logits": -0.6342157125473022, "logps": -78.78164672851562, "loss": 360.5868, "objective": 363.29473876953125, "ranking_idealized": 0.4950000047683716, "ranking_idealized_expo": 0.4950000047683716, "ranking_simple": 0.4970833361148834, "regularize": 363.29473876953125, "step": 300, "wo_beta": 15.358329772949219 }, { "epoch": 0.8502598016060463, "eval_dpo_loss": 222.88400268554688, "eval_logits": -0.5983948707580566, "eval_logps": -82.20111083984375, "eval_loss": 448.9505920410156, "eval_objective": 443.9051208496094, "eval_ranking_idealized": 0.5212215185165405, "eval_ranking_idealized_expo": 0.5212215185165405, "eval_ranking_simple": 0.5248447060585022, "eval_regularize": 443.9051208496094, "eval_runtime": 309.9292, "eval_samples_per_second": 18.682, "eval_steps_per_second": 1.558, "eval_wo_beta": 14.392961502075195, "step": 300 }, { "dpo_loss": 171.55615234375, "epoch": 0.9919697685403873, "grad_norm": 16864.304890654712, "learning_rate": 4.8526047530778175e-06, "logits": -0.6237902641296387, "logps": -79.1826400756836, "loss": 338.3987, "objective": 335.6865234375, "ranking_idealized": 0.5254166722297668, "ranking_idealized_expo": 0.5254166722297668, "ranking_simple": 0.5216666460037231, "regularize": 335.6865234375, "step": 350, "wo_beta": 15.20045280456543 }, { "epoch": 0.9919697685403873, "eval_dpo_loss": 232.93649291992188, "eval_logits": -0.78554368019104, "eval_logps": -84.16381072998047, "eval_loss": 462.19232177734375, "eval_objective": 461.2073059082031, "eval_ranking_idealized": 0.5212215185165405, "eval_ranking_idealized_expo": 0.5212215185165405, "eval_ranking_simple": 0.5269151329994202, "eval_regularize": 461.2073059082031, "eval_runtime": 308.3435, "eval_samples_per_second": 18.778, "eval_steps_per_second": 1.566, "eval_wo_beta": 14.297853469848633, "step": 350 }, { "dpo_loss": 158.00930786132812, "epoch": 1.1336797354747283, "grad_norm": 16419.225936790586, "learning_rate": 4.757316345716554e-06, "logits": -0.689696192741394, "logps": -80.24182891845703, "loss": 309.1712, "objective": 306.0483703613281, "ranking_idealized": 0.5320833325386047, "ranking_idealized_expo": 0.5320833325386047, "ranking_simple": 0.5229166746139526, "regularize": 306.0483703613281, "step": 400, "wo_beta": 15.245408058166504 }, { "epoch": 1.1336797354747283, "eval_dpo_loss": 248.07177734375, "eval_logits": -0.6413922905921936, "eval_logps": -82.49343872070312, "eval_loss": 480.5964660644531, "eval_objective": 478.7404479980469, "eval_ranking_idealized": 0.5212215185165405, "eval_ranking_idealized_expo": 0.5212215185165405, "eval_ranking_simple": 0.5253623127937317, "eval_regularize": 478.7404479980469, "eval_runtime": 308.667, "eval_samples_per_second": 18.758, "eval_steps_per_second": 1.565, "eval_wo_beta": 14.387246131896973, "step": 400 }, { "dpo_loss": 142.9551239013672, "epoch": 1.2753897024090695, "grad_norm": 16215.238701636586, "learning_rate": 4.639847716126855e-06, "logits": -0.5957368612289429, "logps": -78.66122436523438, "loss": 298.1424, "objective": 298.3353271484375, "ranking_idealized": 0.5191666483879089, "ranking_idealized_expo": 0.5191666483879089, "ranking_simple": 0.5204166769981384, "regularize": 298.3353271484375, "step": 450, "wo_beta": 15.736668586730957 }, { "epoch": 1.2753897024090695, "eval_dpo_loss": 247.8721923828125, "eval_logits": -0.7014132142066956, "eval_logps": -82.14649200439453, "eval_loss": 480.3255615234375, "eval_objective": 482.1766052246094, "eval_ranking_idealized": 0.5212215185165405, "eval_ranking_idealized_expo": 0.5212215185165405, "eval_ranking_simple": 0.523809552192688, "eval_regularize": 482.1766052246094, "eval_runtime": 309.6985, "eval_samples_per_second": 18.696, "eval_steps_per_second": 1.56, "eval_wo_beta": 14.369455337524414, "step": 450 }, { "dpo_loss": 135.57347106933594, "epoch": 1.4170996693434104, "grad_norm": 15944.03549179383, "learning_rate": 4.501353102310901e-06, "logits": -0.5253962278366089, "logps": -78.11959075927734, "loss": 282.4504, "objective": 278.3994445800781, "ranking_idealized": 0.4970833361148834, "ranking_idealized_expo": 0.4970833361148834, "ranking_simple": 0.49958333373069763, "regularize": 278.3994445800781, "step": 500, "wo_beta": 15.295467376708984 }, { "epoch": 1.4170996693434104, "eval_dpo_loss": 252.20928955078125, "eval_logits": -0.45775941014289856, "eval_logps": -83.41010284423828, "eval_loss": 493.7484436035156, "eval_objective": 495.763916015625, "eval_ranking_idealized": 0.5212215185165405, "eval_ranking_idealized_expo": 0.5212215185165405, "eval_ranking_simple": 0.5248447060585022, "eval_regularize": 495.763916015625, "eval_runtime": 308.7418, "eval_samples_per_second": 18.754, "eval_steps_per_second": 1.564, "eval_wo_beta": 14.174327850341797, "step": 500 }, { "dpo_loss": 138.2200164794922, "epoch": 1.5588096362777515, "grad_norm": 14777.775900953873, "learning_rate": 4.34319334202531e-06, "logits": -0.43370625376701355, "logps": -79.58238220214844, "loss": 261.1027, "objective": 261.7706298828125, "ranking_idealized": 0.5104166865348816, "ranking_idealized_expo": 0.5104166865348816, "ranking_simple": 0.5104166865348816, "regularize": 261.7706298828125, "step": 550, "wo_beta": 15.099705696105957 }, { "epoch": 1.5588096362777515, "eval_dpo_loss": 245.87562561035156, "eval_logits": -0.5435077548027039, "eval_logps": -82.8267593383789, "eval_loss": 486.1625671386719, "eval_objective": 489.7927551269531, "eval_ranking_idealized": 0.5212215185165405, "eval_ranking_idealized_expo": 0.5212215185165405, "eval_ranking_simple": 0.5253623127937317, "eval_regularize": 489.7927551269531, "eval_runtime": 313.2811, "eval_samples_per_second": 18.482, "eval_steps_per_second": 1.542, "eval_wo_beta": 14.046432495117188, "step": 550 }, { "dpo_loss": 129.54061889648438, "epoch": 1.7005196032120926, "grad_norm": 15449.253251441141, "learning_rate": 4.16692250129073e-06, "logits": -0.4195112884044647, "logps": -79.62303924560547, "loss": 255.9288, "objective": 256.1207275390625, "ranking_idealized": 0.5149999856948853, "ranking_idealized_expo": 0.5149999856948853, "ranking_simple": 0.5095833539962769, "regularize": 256.1207275390625, "step": 600, "wo_beta": 15.011371612548828 }, { "epoch": 1.7005196032120926, "eval_dpo_loss": 251.29342651367188, "eval_logits": -0.5346657633781433, "eval_logps": -82.17684173583984, "eval_loss": 500.38006591796875, "eval_objective": 502.1727294921875, "eval_ranking_idealized": 0.5212215185165405, "eval_ranking_idealized_expo": 0.5212215185165405, "eval_ranking_simple": 0.5269151329994202, "eval_regularize": 502.1727294921875, "eval_runtime": 309.4233, "eval_samples_per_second": 18.712, "eval_steps_per_second": 1.561, "eval_wo_beta": 14.243566513061523, "step": 600 }, { "dpo_loss": 130.9433135986328, "epoch": 1.8422295701464337, "grad_norm": 15825.93366010816, "learning_rate": 3.974272604254906e-06, "logits": -0.4912276566028595, "logps": -78.56413269042969, "loss": 248.6787, "objective": 253.7882843017578, "ranking_idealized": 0.527916669845581, "ranking_idealized_expo": 0.527916669845581, "ranking_simple": 0.5224999785423279, "regularize": 253.7882843017578, "step": 650, "wo_beta": 15.684560775756836 }, { "epoch": 1.8422295701464337, "eval_dpo_loss": 254.5959014892578, "eval_logits": -0.5140498876571655, "eval_logps": -81.49234771728516, "eval_loss": 502.3152770996094, "eval_objective": 504.1581726074219, "eval_ranking_idealized": 0.5212215185165405, "eval_ranking_idealized_expo": 0.5212215185165405, "eval_ranking_simple": 0.5248447060585022, "eval_regularize": 504.1581726074219, "eval_runtime": 308.5565, "eval_samples_per_second": 18.765, "eval_steps_per_second": 1.565, "eval_wo_beta": 14.331953048706055, "step": 650 }, { "dpo_loss": 112.15733337402344, "epoch": 1.9839395370807746, "grad_norm": 16229.838704301123, "learning_rate": 3.767136614452458e-06, "logits": -0.4758701026439667, "logps": -78.98329162597656, "loss": 226.4676, "objective": 224.5537567138672, "ranking_idealized": 0.5112500190734863, "ranking_idealized_expo": 0.5108333230018616, "ranking_simple": 0.5087500214576721, "regularize": 224.5537567138672, "step": 700, "wo_beta": 15.5169095993042 }, { "epoch": 1.9839395370807746, "eval_dpo_loss": 264.166015625, "eval_logits": -0.4816124141216278, "eval_logps": -83.42156219482422, "eval_loss": 512.698974609375, "eval_objective": 516.7103271484375, "eval_ranking_idealized": 0.5212215185165405, "eval_ranking_idealized_expo": 0.5212215185165405, "eval_ranking_simple": 0.5253623127937317, "eval_regularize": 516.7103271484375, "eval_runtime": 308.9876, "eval_samples_per_second": 18.739, "eval_steps_per_second": 1.563, "eval_wo_beta": 14.08342456817627, "step": 700 }, { "dpo_loss": 105.4539566040039, "epoch": 2.1256495040151155, "grad_norm": 15281.11762954245, "learning_rate": 3.547549834686222e-06, "logits": -0.47171100974082947, "logps": -79.71944427490234, "loss": 207.1551, "objective": 207.78355407714844, "ranking_idealized": 0.5112500190734863, "ranking_idealized_expo": 0.5112500190734863, "ranking_simple": 0.51583331823349, "regularize": 207.78355407714844, "step": 750, "wo_beta": 15.408516883850098 }, { "epoch": 2.1256495040151155, "eval_dpo_loss": 259.2528381347656, "eval_logits": -0.5409926772117615, "eval_logps": -83.45890045166016, "eval_loss": 506.4237365722656, "eval_objective": 510.6129150390625, "eval_ranking_idealized": 0.5212215185165405, "eval_ranking_idealized_expo": 0.5212215185165405, "eval_ranking_simple": 0.523809552192688, "eval_regularize": 510.6129150390625, "eval_runtime": 308.6604, "eval_samples_per_second": 18.758, "eval_steps_per_second": 1.565, "eval_wo_beta": 14.129502296447754, "step": 750 }, { "dpo_loss": 97.64974975585938, "epoch": 2.2673594709494567, "grad_norm": 15212.863475959, "learning_rate": 3.3176699082935546e-06, "logits": -0.48046550154685974, "logps": -81.52581787109375, "loss": 197.3545, "objective": 198.5575714111328, "ranking_idealized": 0.512499988079071, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.5083333253860474, "regularize": 198.5575714111328, "step": 800, "wo_beta": 15.000423431396484 }, { "epoch": 2.2673594709494567, "eval_dpo_loss": 262.3102111816406, "eval_logits": -0.5658813118934631, "eval_logps": -84.87467956542969, "eval_loss": 513.39794921875, "eval_objective": 514.31201171875, "eval_ranking_idealized": 0.5212215185165405, "eval_ranking_idealized_expo": 0.5212215185165405, "eval_ranking_simple": 0.522774338722229, "eval_regularize": 514.31201171875, "eval_runtime": 310.3348, "eval_samples_per_second": 18.657, "eval_steps_per_second": 1.556, "eval_wo_beta": 14.070388793945312, "step": 800 }, { "dpo_loss": 94.56294250488281, "epoch": 2.413793103448276, "grad_norm": 14099.488226379159, "learning_rate": 3.0797556183036582e-06, "logits": -0.5114213824272156, "logps": -81.04893493652344, "loss": 182.3796, "objective": 182.07008361816406, "ranking_idealized": 0.5145833492279053, "ranking_idealized_expo": 0.5137500166893005, "ranking_simple": 0.5129166841506958, "regularize": 182.07008361816406, "step": 850, "wo_beta": 15.241059303283691 }, { "epoch": 2.413793103448276, "eval_dpo_loss": 254.12506103515625, "eval_logits": -0.5509631037712097, "eval_logps": -82.86239624023438, "eval_loss": 501.88311767578125, "eval_objective": 504.852294921875, "eval_ranking_idealized": 0.5212215185165405, "eval_ranking_idealized_expo": 0.5212215185165405, "eval_ranking_simple": 0.5274327397346497, "eval_regularize": 504.852294921875, "eval_runtime": 309.6743, "eval_samples_per_second": 18.697, "eval_steps_per_second": 1.56, "eval_wo_beta": 14.17065143585205, "step": 850 }, { "dpo_loss": 92.21830749511719, "epoch": 2.555503070382617, "grad_norm": 14214.15214964791, "learning_rate": 2.8361446928038298e-06, "logits": -0.49887704849243164, "logps": -80.69136810302734, "loss": 176.042, "objective": 176.71592712402344, "ranking_idealized": 0.5183333158493042, "ranking_idealized_expo": 0.5179166793823242, "ranking_simple": 0.5129166841506958, "regularize": 176.71592712402344, "step": 900, "wo_beta": 15.599862098693848 }, { "epoch": 2.555503070382617, "eval_dpo_loss": 263.27996826171875, "eval_logits": -0.5038771033287048, "eval_logps": -85.07097625732422, "eval_loss": 518.1983032226562, "eval_objective": 519.5007934570312, "eval_ranking_idealized": 0.5212215185165405, "eval_ranking_idealized_expo": 0.5212215185165405, "eval_ranking_simple": 0.523809552192688, "eval_regularize": 519.5007934570312, "eval_runtime": 308.5117, "eval_samples_per_second": 18.768, "eval_steps_per_second": 1.566, "eval_wo_beta": 14.112290382385254, "step": 900 }, { "dpo_loss": 81.76506805419922, "epoch": 2.697213037316958, "grad_norm": 14116.155586290091, "learning_rate": 2.5892308345974517e-06, "logits": -0.45734792947769165, "logps": -81.56855773925781, "loss": 164.8281, "objective": 161.75088500976562, "ranking_idealized": 0.5045833587646484, "ranking_idealized_expo": 0.5045833587646484, "ranking_simple": 0.5099999904632568, "regularize": 161.75088500976562, "step": 950, "wo_beta": 15.513051986694336 }, { "epoch": 2.697213037316958, "eval_dpo_loss": 262.8074035644531, "eval_logits": -0.5199795961380005, "eval_logps": -84.58429718017578, "eval_loss": 512.1844482421875, "eval_objective": 512.7650756835938, "eval_ranking_idealized": 0.5212215185165405, "eval_ranking_idealized_expo": 0.5212215185165405, "eval_ranking_simple": 0.523809552192688, "eval_regularize": 512.7650756835938, "eval_runtime": 309.6572, "eval_samples_per_second": 18.698, "eval_steps_per_second": 1.56, "eval_wo_beta": 14.164327621459961, "step": 950 }, { "dpo_loss": 76.9761962890625, "epoch": 2.838923004251299, "grad_norm": 13709.192801586882, "learning_rate": 2.341440200858589e-06, "logits": -0.46604040265083313, "logps": -79.8453598022461, "loss": 150.0401, "objective": 150.50038146972656, "ranking_idealized": 0.5112500190734863, "ranking_idealized_expo": 0.5112500190734863, "ranking_simple": 0.5099999904632568, "regularize": 150.50038146972656, "step": 1000, "wo_beta": 15.057799339294434 }, { "epoch": 2.838923004251299, "eval_dpo_loss": 263.6168518066406, "eval_logits": -0.5218656659126282, "eval_logps": -83.73433685302734, "eval_loss": 514.70361328125, "eval_objective": 516.595947265625, "eval_ranking_idealized": 0.5212215185165405, "eval_ranking_idealized_expo": 0.5212215185165405, "eval_ranking_simple": 0.5258799195289612, "eval_regularize": 516.595947265625, "eval_runtime": 308.5762, "eval_samples_per_second": 18.764, "eval_steps_per_second": 1.565, "eval_wo_beta": 14.180042266845703, "step": 1000 }, { "dpo_loss": 69.53437805175781, "epoch": 2.9806329711856403, "grad_norm": 14565.063000853466, "learning_rate": 2.0952075638923656e-06, "logits": -0.4686031639575958, "logps": -80.63922882080078, "loss": 141.0317, "objective": 136.92088317871094, "ranking_idealized": 0.5174999833106995, "ranking_idealized_expo": 0.5174999833106995, "ranking_simple": 0.5262500047683716, "regularize": 136.92088317871094, "step": 1050, "wo_beta": 15.512243270874023 }, { "epoch": 2.9806329711856403, "eval_dpo_loss": 266.94525146484375, "eval_logits": -0.49528759717941284, "eval_logps": -84.26761627197266, "eval_loss": 519.2467041015625, "eval_objective": 521.8153076171875, "eval_ranking_idealized": 0.5212215185165405, "eval_ranking_idealized_expo": 0.5212215185165405, "eval_ranking_simple": 0.5263975262641907, "eval_regularize": 521.8153076171875, "eval_runtime": 308.2205, "eval_samples_per_second": 18.785, "eval_steps_per_second": 1.567, "eval_wo_beta": 14.257741928100586, "step": 1050 }, { "epoch": 2.9806329711856403, "step": 1050, "total_flos": 0.0, "train_loss": 38.77721726190476, "train_runtime": 6898.2426, "train_samples_per_second": 36.822, "train_steps_per_second": 0.255 } ], "logging_steps": 50, "max_steps": 1760, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }