qwen2.5-0.5b-expo-L2EXPO-ES-0.1 / trainer_state.json
hZzy's picture
Model save
14ba8a9 verified
{
"best_metric": 14.316285133361816,
"best_model_checkpoint": "./qwen2.5-0.5b/qwen2.5-0.5b-expo-L2EXPO-ES-0.1/checkpoint-800",
"epoch": 2.975909305621162,
"eval_steps": 50,
"global_step": 1050,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"dpo_loss": 0.6931471824645996,
"epoch": 0.002834199338686821,
"grad_norm": 36.88507599678088,
"learning_rate": 2.840909090909091e-08,
"logits": -1.359458565711975,
"logps": -84.69721221923828,
"loss": 0.3913,
"objective": 0.3618059456348419,
"ranking_idealized": 0.6458333134651184,
"ranking_idealized_expo": 0.5833333134651184,
"ranking_simple": 0.5833333134651184,
"regularize": 0.3618059456348419,
"step": 1,
"wo_beta": 14.830931663513184
},
{
"dpo_loss": 0.6800611615180969,
"epoch": 0.14170996693434104,
"grad_norm": 34.65452784204521,
"learning_rate": 1.4204545454545458e-06,
"logits": -1.481619119644165,
"logps": -83.80532836914062,
"loss": 0.4017,
"objective": 0.4062296152114868,
"ranking_idealized": 0.608418345451355,
"ranking_idealized_expo": 0.5229591727256775,
"ranking_simple": 0.5250850319862366,
"regularize": 0.4062296152114868,
"step": 50,
"wo_beta": 15.713354110717773
},
{
"epoch": 0.14170996693434104,
"eval_dpo_loss": 0.6868039965629578,
"eval_logits": -1.5024017095565796,
"eval_logps": -93.17259979248047,
"eval_loss": 0.4164615273475647,
"eval_objective": 0.41487643122673035,
"eval_ranking_idealized": 0.6030020713806152,
"eval_ranking_idealized_expo": 0.5222567319869995,
"eval_ranking_simple": 0.5258799195289612,
"eval_regularize": 0.41487643122673035,
"eval_runtime": 308.9601,
"eval_samples_per_second": 18.74,
"eval_steps_per_second": 1.563,
"eval_wo_beta": 16.426729202270508,
"step": 50
},
{
"dpo_loss": 0.6331456303596497,
"epoch": 0.2834199338686821,
"grad_norm": 29.48988078429707,
"learning_rate": 2.8409090909090916e-06,
"logits": -1.514231562614441,
"logps": -84.27212524414062,
"loss": 0.3777,
"objective": 0.38673925399780273,
"ranking_idealized": 0.6016666889190674,
"ranking_idealized_expo": 0.5141666531562805,
"ranking_simple": 0.5520833134651184,
"regularize": 0.38673925399780273,
"step": 100,
"wo_beta": 15.562942504882812
},
{
"epoch": 0.2834199338686821,
"eval_dpo_loss": 0.6818161606788635,
"eval_logits": -1.4774748086929321,
"eval_logps": -92.86526489257812,
"eval_loss": 0.4359625577926636,
"eval_objective": 0.4269382953643799,
"eval_ranking_idealized": 0.6030020713806152,
"eval_ranking_idealized_expo": 0.5222567319869995,
"eval_ranking_simple": 0.531573474407196,
"eval_regularize": 0.4269382953643799,
"eval_runtime": 308.6402,
"eval_samples_per_second": 18.76,
"eval_steps_per_second": 1.565,
"eval_wo_beta": 16.243934631347656,
"step": 100
},
{
"dpo_loss": 0.6157870292663574,
"epoch": 0.42512990080302315,
"grad_norm": 23.64962148842917,
"learning_rate": 4.2613636363636365e-06,
"logits": -1.363812804222107,
"logps": -83.03893280029297,
"loss": 0.4057,
"objective": 0.4109911620616913,
"ranking_idealized": 0.6066666841506958,
"ranking_idealized_expo": 0.5287500023841858,
"ranking_simple": 0.5679166913032532,
"regularize": 0.4109911620616913,
"step": 150,
"wo_beta": 15.67545223236084
},
{
"epoch": 0.42512990080302315,
"eval_dpo_loss": 0.6897013783454895,
"eval_logits": -1.2946008443832397,
"eval_logps": -84.17744445800781,
"eval_loss": 0.49110475182533264,
"eval_objective": 0.48045814037323,
"eval_ranking_idealized": 0.6030020713806152,
"eval_ranking_idealized_expo": 0.5222567319869995,
"eval_ranking_simple": 0.5383023023605347,
"eval_regularize": 0.48045814037323,
"eval_runtime": 312.8899,
"eval_samples_per_second": 18.505,
"eval_steps_per_second": 1.544,
"eval_wo_beta": 15.630563735961914,
"step": 150
},
{
"dpo_loss": 0.6037490963935852,
"epoch": 0.5668398677373642,
"grad_norm": 20.792422526564724,
"learning_rate": 4.997168347957521e-06,
"logits": -1.1515488624572754,
"logps": -78.54210662841797,
"loss": 0.4475,
"objective": 0.4344017505645752,
"ranking_idealized": 0.5924999713897705,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5537499785423279,
"regularize": 0.4344017505645752,
"step": 200,
"wo_beta": 15.705690383911133
},
{
"epoch": 0.5668398677373642,
"eval_dpo_loss": 0.7102847695350647,
"eval_logits": -0.9896814823150635,
"eval_logps": -89.7341537475586,
"eval_loss": 0.5660186409950256,
"eval_objective": 0.5515478253364563,
"eval_ranking_idealized": 0.6030020713806152,
"eval_ranking_idealized_expo": 0.5222567319869995,
"eval_ranking_simple": 0.531573474407196,
"eval_regularize": 0.5515478253364563,
"eval_runtime": 307.7447,
"eval_samples_per_second": 18.814,
"eval_steps_per_second": 1.569,
"eval_wo_beta": 15.128002166748047,
"step": 200
},
{
"dpo_loss": 0.6019502878189087,
"epoch": 0.7085498346717053,
"grad_norm": 19.149849902460005,
"learning_rate": 4.973122855144066e-06,
"logits": -0.9062835574150085,
"logps": -77.80750274658203,
"loss": 0.455,
"objective": 0.4496636390686035,
"ranking_idealized": 0.5991666913032532,
"ranking_idealized_expo": 0.5170833468437195,
"ranking_simple": 0.5724999904632568,
"regularize": 0.4496636390686035,
"step": 250,
"wo_beta": 16.00295639038086
},
{
"epoch": 0.7085498346717053,
"eval_dpo_loss": 0.7171492576599121,
"eval_logits": -1.0032674074172974,
"eval_logps": -78.19169616699219,
"eval_loss": 0.5978298783302307,
"eval_objective": 0.5822399258613586,
"eval_ranking_idealized": 0.6030020713806152,
"eval_ranking_idealized_expo": 0.5222567319869995,
"eval_ranking_simple": 0.5310559272766113,
"eval_regularize": 0.5822399258613586,
"eval_runtime": 309.2921,
"eval_samples_per_second": 18.72,
"eval_steps_per_second": 1.562,
"eval_wo_beta": 14.676263809204102,
"step": 250
},
{
"dpo_loss": 0.5993608236312866,
"epoch": 0.8502598016060463,
"grad_norm": 17.89888160897824,
"learning_rate": 4.924776641419513e-06,
"logits": -0.8504629731178284,
"logps": -74.18943786621094,
"loss": 0.4337,
"objective": 0.42672449350357056,
"ranking_idealized": 0.5799999833106995,
"ranking_idealized_expo": 0.4970833361148834,
"ranking_simple": 0.5529166460037231,
"regularize": 0.42672449350357056,
"step": 300,
"wo_beta": 15.642317771911621
},
{
"epoch": 0.8502598016060463,
"eval_dpo_loss": 0.710507333278656,
"eval_logits": -0.6760910153388977,
"eval_logps": -78.8918228149414,
"eval_loss": 0.5993344783782959,
"eval_objective": 0.5779486894607544,
"eval_ranking_idealized": 0.6030020713806152,
"eval_ranking_idealized_expo": 0.5222567319869995,
"eval_ranking_simple": 0.5300207138061523,
"eval_regularize": 0.5779486894607544,
"eval_runtime": 308.8688,
"eval_samples_per_second": 18.746,
"eval_steps_per_second": 1.564,
"eval_wo_beta": 14.919622421264648,
"step": 300
},
{
"dpo_loss": 0.5866905450820923,
"epoch": 0.9919697685403873,
"grad_norm": 18.680005245459032,
"learning_rate": 4.8526047530778175e-06,
"logits": -0.6557392477989197,
"logps": -72.1249008178711,
"loss": 0.4039,
"objective": 0.3963530361652374,
"ranking_idealized": 0.60916668176651,
"ranking_idealized_expo": 0.5270833373069763,
"ranking_simple": 0.5874999761581421,
"regularize": 0.3963530361652374,
"step": 350,
"wo_beta": 15.802534103393555
},
{
"epoch": 0.9919697685403873,
"eval_dpo_loss": 0.707767128944397,
"eval_logits": -0.7968087792396545,
"eval_logps": -75.1519775390625,
"eval_loss": 0.5977659821510315,
"eval_objective": 0.5765314102172852,
"eval_ranking_idealized": 0.6030020713806152,
"eval_ranking_idealized_expo": 0.5222567319869995,
"eval_ranking_simple": 0.5289855003356934,
"eval_regularize": 0.5765314102172852,
"eval_runtime": 309.2048,
"eval_samples_per_second": 18.725,
"eval_steps_per_second": 1.562,
"eval_wo_beta": 14.653112411499023,
"step": 350
},
{
"dpo_loss": 0.571822464466095,
"epoch": 1.1336797354747283,
"grad_norm": 17.758722724655755,
"learning_rate": 4.757316345716554e-06,
"logits": -0.6229808926582336,
"logps": -71.7779541015625,
"loss": 0.3729,
"objective": 0.38071343302726746,
"ranking_idealized": 0.6087499856948853,
"ranking_idealized_expo": 0.5337499976158142,
"ranking_simple": 0.596666693687439,
"regularize": 0.38071343302726746,
"step": 400,
"wo_beta": 15.872475624084473
},
{
"epoch": 1.1336797354747283,
"eval_dpo_loss": 0.7153333425521851,
"eval_logits": -0.5569362044334412,
"eval_logps": -75.14326477050781,
"eval_loss": 0.6180254817008972,
"eval_objective": 0.6000439524650574,
"eval_ranking_idealized": 0.6030020713806152,
"eval_ranking_idealized_expo": 0.5222567319869995,
"eval_ranking_simple": 0.522774338722229,
"eval_regularize": 0.6000439524650574,
"eval_runtime": 308.681,
"eval_samples_per_second": 18.757,
"eval_steps_per_second": 1.565,
"eval_wo_beta": 14.647075653076172,
"step": 400
},
{
"dpo_loss": 0.5686503052711487,
"epoch": 1.2753897024090695,
"grad_norm": 17.208572847816768,
"learning_rate": 4.639847716126855e-06,
"logits": -0.558698296546936,
"logps": -71.48841094970703,
"loss": 0.3454,
"objective": 0.34283894300460815,
"ranking_idealized": 0.5975000262260437,
"ranking_idealized_expo": 0.5199999809265137,
"ranking_simple": 0.59375,
"regularize": 0.34283894300460815,
"step": 450,
"wo_beta": 16.339805603027344
},
{
"epoch": 1.2753897024090695,
"eval_dpo_loss": 0.716464638710022,
"eval_logits": -0.6214241981506348,
"eval_logps": -76.22888946533203,
"eval_loss": 0.6315773725509644,
"eval_objective": 0.6131163239479065,
"eval_ranking_idealized": 0.6030020713806152,
"eval_ranking_idealized_expo": 0.5222567319869995,
"eval_ranking_simple": 0.533643901348114,
"eval_regularize": 0.6131163239479065,
"eval_runtime": 309.544,
"eval_samples_per_second": 18.705,
"eval_steps_per_second": 1.56,
"eval_wo_beta": 14.503443717956543,
"step": 450
},
{
"dpo_loss": 0.5579800605773926,
"epoch": 1.4170996693434104,
"grad_norm": 17.247428143304518,
"learning_rate": 4.501353102310901e-06,
"logits": -0.5506080985069275,
"logps": -72.05484008789062,
"loss": 0.3226,
"objective": 0.321167916059494,
"ranking_idealized": 0.57833331823349,
"ranking_idealized_expo": 0.4983333349227905,
"ranking_simple": 0.5724999904632568,
"regularize": 0.321167916059494,
"step": 500,
"wo_beta": 15.88575553894043
},
{
"epoch": 1.4170996693434104,
"eval_dpo_loss": 0.7203696370124817,
"eval_logits": -0.5608097910881042,
"eval_logps": -77.60398864746094,
"eval_loss": 0.6255373358726501,
"eval_objective": 0.6084341406822205,
"eval_ranking_idealized": 0.6030020713806152,
"eval_ranking_idealized_expo": 0.5222567319869995,
"eval_ranking_simple": 0.5284678936004639,
"eval_regularize": 0.6084341406822205,
"eval_runtime": 308.7073,
"eval_samples_per_second": 18.756,
"eval_steps_per_second": 1.565,
"eval_wo_beta": 14.499795913696289,
"step": 500
},
{
"dpo_loss": 0.5586966872215271,
"epoch": 1.5588096362777515,
"grad_norm": 16.23813175362919,
"learning_rate": 4.34319334202531e-06,
"logits": -0.5342339873313904,
"logps": -74.90955352783203,
"loss": 0.3133,
"objective": 0.31819403171539307,
"ranking_idealized": 0.5945833325386047,
"ranking_idealized_expo": 0.5116666555404663,
"ranking_simple": 0.5854166746139526,
"regularize": 0.31819403171539307,
"step": 550,
"wo_beta": 15.694311141967773
},
{
"epoch": 1.5588096362777515,
"eval_dpo_loss": 0.7138590216636658,
"eval_logits": -0.6736307144165039,
"eval_logps": -78.62907409667969,
"eval_loss": 0.6281688809394836,
"eval_objective": 0.613820493221283,
"eval_ranking_idealized": 0.6030020713806152,
"eval_ranking_idealized_expo": 0.5222567319869995,
"eval_ranking_simple": 0.533643901348114,
"eval_regularize": 0.613820493221283,
"eval_runtime": 309.2677,
"eval_samples_per_second": 18.722,
"eval_steps_per_second": 1.562,
"eval_wo_beta": 14.406906127929688,
"step": 550
},
{
"dpo_loss": 0.5538429021835327,
"epoch": 1.7005196032120926,
"grad_norm": 16.284668340767176,
"learning_rate": 4.16692250129073e-06,
"logits": -0.5140345692634583,
"logps": -74.2342758178711,
"loss": 0.2944,
"objective": 0.29470422863960266,
"ranking_idealized": 0.6004166603088379,
"ranking_idealized_expo": 0.51583331823349,
"ranking_simple": 0.5895833373069763,
"regularize": 0.29470422863960266,
"step": 600,
"wo_beta": 15.488865852355957
},
{
"epoch": 1.7005196032120926,
"eval_dpo_loss": 0.717461884021759,
"eval_logits": -0.5620033740997314,
"eval_logps": -78.91792297363281,
"eval_loss": 0.6321352124214172,
"eval_objective": 0.6138916015625,
"eval_ranking_idealized": 0.6030020713806152,
"eval_ranking_idealized_expo": 0.5222567319869995,
"eval_ranking_simple": 0.5357142686843872,
"eval_regularize": 0.6138916015625,
"eval_runtime": 308.6792,
"eval_samples_per_second": 18.757,
"eval_steps_per_second": 1.565,
"eval_wo_beta": 14.614200592041016,
"step": 600
},
{
"dpo_loss": 0.5554340481758118,
"epoch": 1.8422295701464337,
"grad_norm": 15.91791899711329,
"learning_rate": 3.974272604254906e-06,
"logits": -0.5311375260353088,
"logps": -74.39502716064453,
"loss": 0.2915,
"objective": 0.28872814774513245,
"ranking_idealized": 0.6058333516120911,
"ranking_idealized_expo": 0.5295833349227905,
"ranking_simple": 0.5991666913032532,
"regularize": 0.28872814774513245,
"step": 650,
"wo_beta": 16.528623580932617
},
{
"epoch": 1.8422295701464337,
"eval_dpo_loss": 0.7137619256973267,
"eval_logits": -0.702060341835022,
"eval_logps": -77.44371795654297,
"eval_loss": 0.6321162581443787,
"eval_objective": 0.6157041788101196,
"eval_ranking_idealized": 0.6030020713806152,
"eval_ranking_idealized_expo": 0.5222567319869995,
"eval_ranking_simple": 0.5367494821548462,
"eval_regularize": 0.6157041788101196,
"eval_runtime": 309.0389,
"eval_samples_per_second": 18.736,
"eval_steps_per_second": 1.563,
"eval_wo_beta": 14.385796546936035,
"step": 650
},
{
"dpo_loss": 0.5520748496055603,
"epoch": 1.9839395370807746,
"grad_norm": 15.323602142583136,
"learning_rate": 3.767136614452458e-06,
"logits": -0.5264750719070435,
"logps": -75.2638931274414,
"loss": 0.2675,
"objective": 0.2728944420814514,
"ranking_idealized": 0.5954166650772095,
"ranking_idealized_expo": 0.5129166841506958,
"ranking_simple": 0.5883333086967468,
"regularize": 0.2728944420814514,
"step": 700,
"wo_beta": 15.999488830566406
},
{
"epoch": 1.9839395370807746,
"eval_dpo_loss": 0.7185091972351074,
"eval_logits": -0.561150312423706,
"eval_logps": -79.35997009277344,
"eval_loss": 0.6386255621910095,
"eval_objective": 0.6233482956886292,
"eval_ranking_idealized": 0.6030020713806152,
"eval_ranking_idealized_expo": 0.5222567319869995,
"eval_ranking_simple": 0.5289855003356934,
"eval_regularize": 0.6233482956886292,
"eval_runtime": 309.5574,
"eval_samples_per_second": 18.704,
"eval_steps_per_second": 1.56,
"eval_wo_beta": 14.317137718200684,
"step": 700
},
{
"dpo_loss": 0.5413146615028381,
"epoch": 2.1256495040151155,
"grad_norm": 14.916489496350042,
"learning_rate": 3.547549834686222e-06,
"logits": -0.5209631323814392,
"logps": -75.42438507080078,
"loss": 0.2415,
"objective": 0.23713654279708862,
"ranking_idealized": 0.6066666841506958,
"ranking_idealized_expo": 0.5133333206176758,
"ranking_simple": 0.597083330154419,
"regularize": 0.23713654279708862,
"step": 750,
"wo_beta": 15.934895515441895
},
{
"epoch": 2.1256495040151155,
"eval_dpo_loss": 0.7177355885505676,
"eval_logits": -0.6173678636550903,
"eval_logps": -80.0989761352539,
"eval_loss": 0.6405187249183655,
"eval_objective": 0.6263132095336914,
"eval_ranking_idealized": 0.6030020713806152,
"eval_ranking_idealized_expo": 0.5222567319869995,
"eval_ranking_simple": 0.534679114818573,
"eval_regularize": 0.6263132095336914,
"eval_runtime": 309.3835,
"eval_samples_per_second": 18.715,
"eval_steps_per_second": 1.561,
"eval_wo_beta": 14.430180549621582,
"step": 750
},
{
"dpo_loss": 0.5419275760650635,
"epoch": 2.2673594709494567,
"grad_norm": 15.335797694469315,
"learning_rate": 3.3176699082935546e-06,
"logits": -0.5584273338317871,
"logps": -76.04695129394531,
"loss": 0.2263,
"objective": 0.223616361618042,
"ranking_idealized": 0.5941666960716248,
"ranking_idealized_expo": 0.5129166841506958,
"ranking_simple": 0.5866666436195374,
"regularize": 0.223616361618042,
"step": 800,
"wo_beta": 15.526464462280273
},
{
"epoch": 2.2673594709494567,
"eval_dpo_loss": 0.7205542922019958,
"eval_logits": -0.5665243268013,
"eval_logps": -79.37840270996094,
"eval_loss": 0.6457626223564148,
"eval_objective": 0.6297247409820557,
"eval_ranking_idealized": 0.6030020713806152,
"eval_ranking_idealized_expo": 0.5222567319869995,
"eval_ranking_simple": 0.534679114818573,
"eval_regularize": 0.6297247409820557,
"eval_runtime": 308.5045,
"eval_samples_per_second": 18.768,
"eval_steps_per_second": 1.566,
"eval_wo_beta": 14.316285133361816,
"step": 800
},
{
"dpo_loss": 0.539085865020752,
"epoch": 2.409069437883798,
"grad_norm": 15.300324456571563,
"learning_rate": 3.0797556183036582e-06,
"logits": -0.5494623780250549,
"logps": -75.91812133789062,
"loss": 0.2148,
"objective": 0.2133045643568039,
"ranking_idealized": 0.5979166626930237,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5879166722297668,
"regularize": 0.2133045643568039,
"step": 850,
"wo_beta": 15.76329231262207
},
{
"epoch": 2.409069437883798,
"eval_dpo_loss": 0.7192490100860596,
"eval_logits": -0.5793017148971558,
"eval_logps": -79.08055114746094,
"eval_loss": 0.6435712575912476,
"eval_objective": 0.6275891661643982,
"eval_ranking_idealized": 0.6030020713806152,
"eval_ranking_idealized_expo": 0.5222567319869995,
"eval_ranking_simple": 0.5362318754196167,
"eval_regularize": 0.6275891661643982,
"eval_runtime": 312.2924,
"eval_samples_per_second": 18.54,
"eval_steps_per_second": 1.547,
"eval_wo_beta": 14.426342010498047,
"step": 850
},
{
"dpo_loss": 0.5431251525878906,
"epoch": 2.550779404818139,
"grad_norm": 15.744993696603856,
"learning_rate": 2.8361446928038298e-06,
"logits": -0.5151351690292358,
"logps": -75.99987030029297,
"loss": 0.1993,
"objective": 0.1999633014202118,
"ranking_idealized": 0.5887500047683716,
"ranking_idealized_expo": 0.518750011920929,
"ranking_simple": 0.5899999737739563,
"regularize": 0.1999633014202118,
"step": 900,
"wo_beta": 16.303361892700195
},
{
"epoch": 2.550779404818139,
"eval_dpo_loss": 0.7217252850532532,
"eval_logits": -0.5620540976524353,
"eval_logps": -80.38152313232422,
"eval_loss": 0.6453951001167297,
"eval_objective": 0.6301912665367126,
"eval_ranking_idealized": 0.6030020713806152,
"eval_ranking_idealized_expo": 0.5222567319869995,
"eval_ranking_simple": 0.5341615080833435,
"eval_regularize": 0.6301912665367126,
"eval_runtime": 308.7307,
"eval_samples_per_second": 18.754,
"eval_steps_per_second": 1.564,
"eval_wo_beta": 14.44913387298584,
"step": 900
},
{
"dpo_loss": 0.5340785980224609,
"epoch": 2.69248937175248,
"grad_norm": 15.888338408049977,
"learning_rate": 2.5892308345974517e-06,
"logits": -0.5341619253158569,
"logps": -76.2621841430664,
"loss": 0.1887,
"objective": 0.1899857223033905,
"ranking_idealized": 0.5975000262260437,
"ranking_idealized_expo": 0.5087500214576721,
"ranking_simple": 0.5933333039283752,
"regularize": 0.1899857223033905,
"step": 950,
"wo_beta": 16.227678298950195
},
{
"epoch": 2.69248937175248,
"eval_dpo_loss": 0.7204239964485168,
"eval_logits": -0.6216442584991455,
"eval_logps": -79.14459228515625,
"eval_loss": 0.6443176865577698,
"eval_objective": 0.6274449825286865,
"eval_ranking_idealized": 0.6030020713806152,
"eval_ranking_idealized_expo": 0.5222567319869995,
"eval_ranking_simple": 0.533643901348114,
"eval_regularize": 0.6274449825286865,
"eval_runtime": 309.1927,
"eval_samples_per_second": 18.726,
"eval_steps_per_second": 1.562,
"eval_wo_beta": 14.318567276000977,
"step": 950
},
{
"dpo_loss": 0.5356190204620361,
"epoch": 2.8341993386868207,
"grad_norm": 14.415639648177313,
"learning_rate": 2.341440200858589e-06,
"logits": -0.5420577526092529,
"logps": -74.890869140625,
"loss": 0.1764,
"objective": 0.17968998849391937,
"ranking_idealized": 0.6020833253860474,
"ranking_idealized_expo": 0.5104166865348816,
"ranking_simple": 0.590416669845581,
"regularize": 0.17968998849391937,
"step": 1000,
"wo_beta": 15.519268035888672
},
{
"epoch": 2.8341993386868207,
"eval_dpo_loss": 0.7200449705123901,
"eval_logits": -0.6086606383323669,
"eval_logps": -79.77206420898438,
"eval_loss": 0.639886736869812,
"eval_objective": 0.6246000528335571,
"eval_ranking_idealized": 0.6030020713806152,
"eval_ranking_idealized_expo": 0.5222567319869995,
"eval_ranking_simple": 0.533643901348114,
"eval_regularize": 0.6246000528335571,
"eval_runtime": 309.9286,
"eval_samples_per_second": 18.682,
"eval_steps_per_second": 1.558,
"eval_wo_beta": 14.450177192687988,
"step": 1000
},
{
"dpo_loss": 0.5345789194107056,
"epoch": 2.975909305621162,
"grad_norm": 14.911688792125858,
"learning_rate": 2.0952075638923656e-06,
"logits": -0.5775164365768433,
"logps": -75.94026184082031,
"loss": 0.163,
"objective": 0.162851020693779,
"ranking_idealized": 0.6075000166893005,
"ranking_idealized_expo": 0.5179166793823242,
"ranking_simple": 0.6041666865348816,
"regularize": 0.162851020693779,
"step": 1050,
"wo_beta": 16.27153205871582
},
{
"epoch": 2.975909305621162,
"eval_dpo_loss": 0.721105694770813,
"eval_logits": -0.6067584156990051,
"eval_logps": -79.58184814453125,
"eval_loss": 0.6428102850914001,
"eval_objective": 0.6266354322433472,
"eval_ranking_idealized": 0.6030020713806152,
"eval_ranking_idealized_expo": 0.5222567319869995,
"eval_ranking_simple": 0.531573474407196,
"eval_regularize": 0.6266354322433472,
"eval_runtime": 308.0819,
"eval_samples_per_second": 18.794,
"eval_steps_per_second": 1.568,
"eval_wo_beta": 14.340644836425781,
"step": 1050
},
{
"epoch": 2.975909305621162,
"step": 1050,
"total_flos": 0.0,
"train_loss": 0.3115594020343962,
"train_runtime": 28931.7373,
"train_samples_per_second": 8.78,
"train_steps_per_second": 0.061
}
],
"logging_steps": 50,
"max_steps": 1760,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 50,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 5,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}