|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 4.988190836088805, |
|
"eval_steps": 50, |
|
"global_step": 880, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"dpo_loss": 0.6931471824645996, |
|
"epoch": 0.005668398677373642, |
|
"grad_norm": 13.413600039235007, |
|
"learning_rate": 5.681818181818181e-09, |
|
"logits": -1.3147305250167847, |
|
"logps": -88.0877456665039, |
|
"loss": 0.4113, |
|
"objective": 0.41588976979255676, |
|
"ranking_idealized": 0.6875, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.5208333134651184, |
|
"regularize": 0.41588976979255676, |
|
"step": 1 |
|
}, |
|
{ |
|
"dpo_loss": 0.6931512951850891, |
|
"epoch": 0.02834199338686821, |
|
"grad_norm": 13.318320815280419, |
|
"learning_rate": 2.8409090909090908e-08, |
|
"logits": -1.3678321838378906, |
|
"logps": -84.44427490234375, |
|
"loss": 0.4131, |
|
"objective": 0.3755509555339813, |
|
"ranking_idealized": 0.6510416865348816, |
|
"ranking_idealized_expo": 0.5572916865348816, |
|
"ranking_simple": 0.546875, |
|
"regularize": 0.3755509555339813, |
|
"step": 5 |
|
}, |
|
{ |
|
"dpo_loss": 0.6927531361579895, |
|
"epoch": 0.05668398677373642, |
|
"grad_norm": 13.050623089340824, |
|
"learning_rate": 5.6818181818181815e-08, |
|
"logits": -1.4463988542556763, |
|
"logps": -83.39988708496094, |
|
"loss": 0.4176, |
|
"objective": 0.4423220753669739, |
|
"ranking_idealized": 0.675000011920929, |
|
"ranking_idealized_expo": 0.512499988079071, |
|
"ranking_simple": 0.512499988079071, |
|
"regularize": 0.4423220753669739, |
|
"step": 10 |
|
}, |
|
{ |
|
"dpo_loss": 0.6927918195724487, |
|
"epoch": 0.08502598016060463, |
|
"grad_norm": 12.549385306441062, |
|
"learning_rate": 8.522727272727271e-08, |
|
"logits": -1.4107797145843506, |
|
"logps": -83.50421905517578, |
|
"loss": 0.4254, |
|
"objective": 0.41179904341697693, |
|
"ranking_idealized": 0.6541666388511658, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.5083333253860474, |
|
"regularize": 0.41179904341697693, |
|
"step": 15 |
|
}, |
|
{ |
|
"dpo_loss": 0.6924694776535034, |
|
"epoch": 0.11336797354747284, |
|
"grad_norm": 13.269620119946596, |
|
"learning_rate": 1.1363636363636363e-07, |
|
"logits": -1.4003115892410278, |
|
"logps": -84.06736755371094, |
|
"loss": 0.4149, |
|
"objective": 0.40317121148109436, |
|
"ranking_idealized": 0.6791666746139526, |
|
"ranking_idealized_expo": 0.5375000238418579, |
|
"ranking_simple": 0.5375000238418579, |
|
"regularize": 0.40317121148109436, |
|
"step": 20 |
|
}, |
|
{ |
|
"dpo_loss": 0.6906281113624573, |
|
"epoch": 0.14170996693434104, |
|
"grad_norm": 12.65234373247132, |
|
"learning_rate": 1.4204545454545455e-07, |
|
"logits": -1.4490704536437988, |
|
"logps": -83.72380065917969, |
|
"loss": 0.412, |
|
"objective": 0.4304184317588806, |
|
"ranking_idealized": 0.675000011920929, |
|
"ranking_idealized_expo": 0.4958333373069763, |
|
"ranking_simple": 0.5, |
|
"regularize": 0.4304184317588806, |
|
"step": 25 |
|
}, |
|
{ |
|
"dpo_loss": 0.6906370520591736, |
|
"epoch": 0.17005196032120926, |
|
"grad_norm": 13.419812147505471, |
|
"learning_rate": 1.7045454545454543e-07, |
|
"logits": -1.4248003959655762, |
|
"logps": -84.09757232666016, |
|
"loss": 0.4126, |
|
"objective": 0.41593801975250244, |
|
"ranking_idealized": 0.6875, |
|
"ranking_idealized_expo": 0.5416666865348816, |
|
"ranking_simple": 0.5333333611488342, |
|
"regularize": 0.41593801975250244, |
|
"step": 30 |
|
}, |
|
{ |
|
"dpo_loss": 0.6881809234619141, |
|
"epoch": 0.19839395370807747, |
|
"grad_norm": 13.431894879328123, |
|
"learning_rate": 1.9886363636363636e-07, |
|
"logits": -1.398374319076538, |
|
"logps": -82.60546112060547, |
|
"loss": 0.4095, |
|
"objective": 0.3929609954357147, |
|
"ranking_idealized": 0.6583333611488342, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.5083333253860474, |
|
"regularize": 0.3929609954357147, |
|
"step": 35 |
|
}, |
|
{ |
|
"dpo_loss": 0.6868489384651184, |
|
"epoch": 0.22673594709494568, |
|
"grad_norm": 15.0250838416837, |
|
"learning_rate": 2.2727272727272726e-07, |
|
"logits": -1.3904410600662231, |
|
"logps": -82.84651947021484, |
|
"loss": 0.42, |
|
"objective": 0.43919187784194946, |
|
"ranking_idealized": 0.6625000238418579, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 0.43919187784194946, |
|
"step": 40 |
|
}, |
|
{ |
|
"dpo_loss": 0.6844364404678345, |
|
"epoch": 0.25507794048181387, |
|
"grad_norm": 13.128806663839857, |
|
"learning_rate": 2.5568181818181816e-07, |
|
"logits": -1.5230154991149902, |
|
"logps": -84.21646118164062, |
|
"loss": 0.4194, |
|
"objective": 0.4717731177806854, |
|
"ranking_idealized": 0.7208333611488342, |
|
"ranking_idealized_expo": 0.5375000238418579, |
|
"ranking_simple": 0.5458333492279053, |
|
"regularize": 0.4717731177806854, |
|
"step": 45 |
|
}, |
|
{ |
|
"dpo_loss": 0.6831071376800537, |
|
"epoch": 0.2834199338686821, |
|
"grad_norm": 12.39410793472882, |
|
"learning_rate": 2.840909090909091e-07, |
|
"logits": -1.431780219078064, |
|
"logps": -82.2941665649414, |
|
"loss": 0.4122, |
|
"objective": 0.3948862850666046, |
|
"ranking_idealized": 0.637499988079071, |
|
"ranking_idealized_expo": 0.42500001192092896, |
|
"ranking_simple": 0.44999998807907104, |
|
"regularize": 0.3948862850666046, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.2834199338686821, |
|
"eval_dpo_loss": 0.6914567947387695, |
|
"eval_logits": -1.4614633321762085, |
|
"eval_logps": -90.56139373779297, |
|
"eval_loss": 0.4102250635623932, |
|
"eval_objective": 0.40930914878845215, |
|
"eval_ranking_idealized": 0.6570248007774353, |
|
"eval_ranking_idealized_expo": 0.5113636255264282, |
|
"eval_ranking_simple": 0.5123966932296753, |
|
"eval_regularize": 0.40930914878845215, |
|
"eval_runtime": 260.1383, |
|
"eval_samples_per_second": 22.257, |
|
"eval_steps_per_second": 0.93, |
|
"step": 50 |
|
}, |
|
{ |
|
"dpo_loss": 0.6807647943496704, |
|
"epoch": 0.3117619272555503, |
|
"grad_norm": 13.979478083508853, |
|
"learning_rate": 3.1249999999999997e-07, |
|
"logits": -1.4781759977340698, |
|
"logps": -84.0101089477539, |
|
"loss": 0.4052, |
|
"objective": 0.4063163101673126, |
|
"ranking_idealized": 0.7124999761581421, |
|
"ranking_idealized_expo": 0.5416666865348816, |
|
"ranking_simple": 0.5458333492279053, |
|
"regularize": 0.4063163101673126, |
|
"step": 55 |
|
}, |
|
{ |
|
"dpo_loss": 0.6824926137924194, |
|
"epoch": 0.3401039206424185, |
|
"grad_norm": 13.484676530515722, |
|
"learning_rate": 3.4090909090909085e-07, |
|
"logits": -1.4679373502731323, |
|
"logps": -83.09486389160156, |
|
"loss": 0.3992, |
|
"objective": 0.387731671333313, |
|
"ranking_idealized": 0.7041666507720947, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5333333611488342, |
|
"regularize": 0.387731671333313, |
|
"step": 60 |
|
}, |
|
{ |
|
"dpo_loss": 0.6788213849067688, |
|
"epoch": 0.3684459140292867, |
|
"grad_norm": 13.535493104004898, |
|
"learning_rate": 3.693181818181818e-07, |
|
"logits": -1.4250341653823853, |
|
"logps": -83.52283477783203, |
|
"loss": 0.3842, |
|
"objective": 0.3719988465309143, |
|
"ranking_idealized": 0.6291666626930237, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.5083333253860474, |
|
"regularize": 0.3719988465309143, |
|
"step": 65 |
|
}, |
|
{ |
|
"dpo_loss": 0.6763210296630859, |
|
"epoch": 0.39678790741615494, |
|
"grad_norm": 13.25897931133664, |
|
"learning_rate": 3.977272727272727e-07, |
|
"logits": -1.5077797174453735, |
|
"logps": -85.39080047607422, |
|
"loss": 0.3855, |
|
"objective": 0.42043933272361755, |
|
"ranking_idealized": 0.6916666626930237, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5291666388511658, |
|
"regularize": 0.42043933272361755, |
|
"step": 70 |
|
}, |
|
{ |
|
"dpo_loss": 0.678033709526062, |
|
"epoch": 0.42512990080302315, |
|
"grad_norm": 14.035157652400327, |
|
"learning_rate": 4.2613636363636364e-07, |
|
"logits": -1.5349814891815186, |
|
"logps": -86.0143051147461, |
|
"loss": 0.3945, |
|
"objective": 0.41438591480255127, |
|
"ranking_idealized": 0.6791666746139526, |
|
"ranking_idealized_expo": 0.5458333492279053, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.41438591480255127, |
|
"step": 75 |
|
}, |
|
{ |
|
"dpo_loss": 0.6748775243759155, |
|
"epoch": 0.45347189418989137, |
|
"grad_norm": 13.539091864104346, |
|
"learning_rate": 4.545454545454545e-07, |
|
"logits": -1.5832253694534302, |
|
"logps": -85.59701538085938, |
|
"loss": 0.3789, |
|
"objective": 0.37422579526901245, |
|
"ranking_idealized": 0.6666666865348816, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5333333611488342, |
|
"regularize": 0.37422579526901245, |
|
"step": 80 |
|
}, |
|
{ |
|
"dpo_loss": 0.6750870943069458, |
|
"epoch": 0.4818138875767596, |
|
"grad_norm": 12.78905385712093, |
|
"learning_rate": 4.829545454545455e-07, |
|
"logits": -1.5551499128341675, |
|
"logps": -84.24475860595703, |
|
"loss": 0.374, |
|
"objective": 0.39821094274520874, |
|
"ranking_idealized": 0.6541666388511658, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.5375000238418579, |
|
"regularize": 0.39821094274520874, |
|
"step": 85 |
|
}, |
|
{ |
|
"dpo_loss": 0.6703960299491882, |
|
"epoch": 0.5101558809636277, |
|
"grad_norm": 14.26040681218726, |
|
"learning_rate": 4.999921328558332e-07, |
|
"logits": -1.37662935256958, |
|
"logps": -86.21568298339844, |
|
"loss": 0.3761, |
|
"objective": 0.3837045729160309, |
|
"ranking_idealized": 0.6291666626930237, |
|
"ranking_idealized_expo": 0.4791666567325592, |
|
"ranking_simple": 0.4833333194255829, |
|
"regularize": 0.3837045729160309, |
|
"step": 90 |
|
}, |
|
{ |
|
"dpo_loss": 0.658724844455719, |
|
"epoch": 0.538497874350496, |
|
"grad_norm": 14.003866252787525, |
|
"learning_rate": 4.999036331701828e-07, |
|
"logits": -1.4695987701416016, |
|
"logps": -85.49458312988281, |
|
"loss": 0.3642, |
|
"objective": 0.39033612608909607, |
|
"ranking_idealized": 0.6833333373069763, |
|
"ranking_idealized_expo": 0.5458333492279053, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.39033612608909607, |
|
"step": 95 |
|
}, |
|
{ |
|
"dpo_loss": 0.6546652317047119, |
|
"epoch": 0.5668398677373642, |
|
"grad_norm": 13.876424116810778, |
|
"learning_rate": 4.99716834795752e-07, |
|
"logits": -1.5616024732589722, |
|
"logps": -86.23612213134766, |
|
"loss": 0.374, |
|
"objective": 0.4149954915046692, |
|
"ranking_idealized": 0.6708333492279053, |
|
"ranking_idealized_expo": 0.5416666865348816, |
|
"ranking_simple": 0.5458333492279053, |
|
"regularize": 0.4149954915046692, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.5668398677373642, |
|
"eval_dpo_loss": 0.6882808208465576, |
|
"eval_logits": -1.5521211624145508, |
|
"eval_logps": -92.03614807128906, |
|
"eval_loss": 0.4072900712490082, |
|
"eval_objective": 0.40819329023361206, |
|
"eval_ranking_idealized": 0.6570248007774353, |
|
"eval_ranking_idealized_expo": 0.5113636255264282, |
|
"eval_ranking_simple": 0.5144628286361694, |
|
"eval_regularize": 0.40819329023361206, |
|
"eval_runtime": 258.9254, |
|
"eval_samples_per_second": 22.362, |
|
"eval_steps_per_second": 0.935, |
|
"step": 100 |
|
}, |
|
{ |
|
"dpo_loss": 0.6509627103805542, |
|
"epoch": 0.5951818611242324, |
|
"grad_norm": 15.031011715031442, |
|
"learning_rate": 4.994318112090048e-07, |
|
"logits": -1.4410721063613892, |
|
"logps": -85.85182189941406, |
|
"loss": 0.3711, |
|
"objective": 0.3764886260032654, |
|
"ranking_idealized": 0.7083333134651184, |
|
"ranking_idealized_expo": 0.5375000238418579, |
|
"ranking_simple": 0.574999988079071, |
|
"regularize": 0.3764886260032654, |
|
"step": 105 |
|
}, |
|
{ |
|
"dpo_loss": 0.6576470136642456, |
|
"epoch": 0.6235238545111006, |
|
"grad_norm": 16.068373019347053, |
|
"learning_rate": 4.990486745229364e-07, |
|
"logits": -1.6439845561981201, |
|
"logps": -84.1036376953125, |
|
"loss": 0.3694, |
|
"objective": 0.39763620495796204, |
|
"ranking_idealized": 0.6541666388511658, |
|
"ranking_idealized_expo": 0.4583333432674408, |
|
"ranking_simple": 0.48750001192092896, |
|
"regularize": 0.39763620495796204, |
|
"step": 110 |
|
}, |
|
{ |
|
"dpo_loss": 0.6543448567390442, |
|
"epoch": 0.6518658478979689, |
|
"grad_norm": 15.729444594038945, |
|
"learning_rate": 4.985675754429743e-07, |
|
"logits": -1.6000815629959106, |
|
"logps": -83.94436645507812, |
|
"loss": 0.3477, |
|
"objective": 0.3455929458141327, |
|
"ranking_idealized": 0.6583333611488342, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.5, |
|
"regularize": 0.3455929458141327, |
|
"step": 115 |
|
}, |
|
{ |
|
"dpo_loss": 0.6514815092086792, |
|
"epoch": 0.680207841284837, |
|
"grad_norm": 14.396427732147952, |
|
"learning_rate": 4.979887032076988e-07, |
|
"logits": -1.5459378957748413, |
|
"logps": -85.23513793945312, |
|
"loss": 0.3534, |
|
"objective": 0.3168259561061859, |
|
"ranking_idealized": 0.6583333611488342, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.5458333492279053, |
|
"regularize": 0.3168259561061859, |
|
"step": 120 |
|
}, |
|
{ |
|
"dpo_loss": 0.6492612957954407, |
|
"epoch": 0.7085498346717053, |
|
"grad_norm": 16.706445645247783, |
|
"learning_rate": 4.973122855144065e-07, |
|
"logits": -1.5174397230148315, |
|
"logps": -86.0051040649414, |
|
"loss": 0.3448, |
|
"objective": 0.3529473543167114, |
|
"ranking_idealized": 0.699999988079071, |
|
"ranking_idealized_expo": 0.550000011920929, |
|
"ranking_simple": 0.5708333253860474, |
|
"regularize": 0.3529473543167114, |
|
"step": 125 |
|
}, |
|
{ |
|
"dpo_loss": 0.65309077501297, |
|
"epoch": 0.7368918280585735, |
|
"grad_norm": 15.417556754357976, |
|
"learning_rate": 4.965385884295466e-07, |
|
"logits": -1.664696455001831, |
|
"logps": -85.23889923095703, |
|
"loss": 0.3464, |
|
"objective": 0.33712950348854065, |
|
"ranking_idealized": 0.637499988079071, |
|
"ranking_idealized_expo": 0.4541666805744171, |
|
"ranking_simple": 0.4958333373069763, |
|
"regularize": 0.33712950348854065, |
|
"step": 130 |
|
}, |
|
{ |
|
"dpo_loss": 0.6549941301345825, |
|
"epoch": 0.7652338214454416, |
|
"grad_norm": 13.59480500578719, |
|
"learning_rate": 4.956679162840645e-07, |
|
"logits": -1.626897931098938, |
|
"logps": -86.90068817138672, |
|
"loss": 0.3309, |
|
"objective": 0.34302666783332825, |
|
"ranking_idealized": 0.6416666507720947, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.5458333492279053, |
|
"regularize": 0.34302666783332825, |
|
"step": 135 |
|
}, |
|
{ |
|
"dpo_loss": 0.6500818729400635, |
|
"epoch": 0.7935758148323099, |
|
"grad_norm": 14.79485288903614, |
|
"learning_rate": 4.947006115536947e-07, |
|
"logits": -1.523794412612915, |
|
"logps": -86.5340576171875, |
|
"loss": 0.3244, |
|
"objective": 0.3356337249279022, |
|
"ranking_idealized": 0.6291666626930237, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 0.3356337249279022, |
|
"step": 140 |
|
}, |
|
{ |
|
"dpo_loss": 0.6436840295791626, |
|
"epoch": 0.821917808219178, |
|
"grad_norm": 14.29078834943314, |
|
"learning_rate": 4.936370547242482e-07, |
|
"logits": -1.5991618633270264, |
|
"logps": -86.87805938720703, |
|
"loss": 0.3315, |
|
"objective": 0.35039833188056946, |
|
"ranking_idealized": 0.6458333134651184, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.5, |
|
"regularize": 0.35039833188056946, |
|
"step": 145 |
|
}, |
|
{ |
|
"dpo_loss": 0.6453251242637634, |
|
"epoch": 0.8502598016060463, |
|
"grad_norm": 14.662823673975787, |
|
"learning_rate": 4.924776641419512e-07, |
|
"logits": -1.5607432126998901, |
|
"logps": -84.04727935791016, |
|
"loss": 0.3231, |
|
"objective": 0.32859519124031067, |
|
"ranking_idealized": 0.6916666626930237, |
|
"ranking_idealized_expo": 0.5416666865348816, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.32859519124031067, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.8502598016060463, |
|
"eval_dpo_loss": 0.6880838871002197, |
|
"eval_logits": -1.6073634624481201, |
|
"eval_logps": -92.8072509765625, |
|
"eval_loss": 0.40292537212371826, |
|
"eval_objective": 0.4087039530277252, |
|
"eval_ranking_idealized": 0.6570248007774353, |
|
"eval_ranking_idealized_expo": 0.5113636255264282, |
|
"eval_ranking_simple": 0.5185950398445129, |
|
"eval_regularize": 0.4087039530277252, |
|
"eval_runtime": 258.55, |
|
"eval_samples_per_second": 22.394, |
|
"eval_steps_per_second": 0.936, |
|
"step": 150 |
|
}, |
|
{ |
|
"dpo_loss": 0.6386777758598328, |
|
"epoch": 0.8786017949929145, |
|
"grad_norm": 14.231064502358223, |
|
"learning_rate": 4.912228958488892e-07, |
|
"logits": -1.5854390859603882, |
|
"logps": -84.10832214355469, |
|
"loss": 0.3257, |
|
"objective": 0.3301841616630554, |
|
"ranking_idealized": 0.6791666746139526, |
|
"ranking_idealized_expo": 0.512499988079071, |
|
"ranking_simple": 0.5916666388511658, |
|
"regularize": 0.3301841616630554, |
|
"step": 155 |
|
}, |
|
{ |
|
"dpo_loss": 0.635101854801178, |
|
"epoch": 0.9069437883797827, |
|
"grad_norm": 16.2268120086952, |
|
"learning_rate": 4.898732434036243e-07, |
|
"logits": -1.4904930591583252, |
|
"logps": -86.09799194335938, |
|
"loss": 0.3107, |
|
"objective": 0.32787373661994934, |
|
"ranking_idealized": 0.6625000238418579, |
|
"ranking_idealized_expo": 0.4958333373069763, |
|
"ranking_simple": 0.5458333492279053, |
|
"regularize": 0.32787373661994934, |
|
"step": 160 |
|
}, |
|
{ |
|
"dpo_loss": 0.632634162902832, |
|
"epoch": 0.9352857817666509, |
|
"grad_norm": 16.041101199008867, |
|
"learning_rate": 4.884292376870567e-07, |
|
"logits": -1.5242409706115723, |
|
"logps": -86.48987579345703, |
|
"loss": 0.3212, |
|
"objective": 0.3137226700782776, |
|
"ranking_idealized": 0.6541666388511658, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.574999988079071, |
|
"regularize": 0.3137226700782776, |
|
"step": 165 |
|
}, |
|
{ |
|
"dpo_loss": 0.6288425922393799, |
|
"epoch": 0.9636277751535192, |
|
"grad_norm": 16.89173365453321, |
|
"learning_rate": 4.868914466936037e-07, |
|
"logits": -1.5360677242279053, |
|
"logps": -86.72618103027344, |
|
"loss": 0.3151, |
|
"objective": 0.30297866463661194, |
|
"ranking_idealized": 0.6291666626930237, |
|
"ranking_idealized_expo": 0.512499988079071, |
|
"ranking_simple": 0.5708333253860474, |
|
"regularize": 0.30297866463661194, |
|
"step": 170 |
|
}, |
|
{ |
|
"dpo_loss": 0.6335378289222717, |
|
"epoch": 0.9919697685403873, |
|
"grad_norm": 15.347989877166441, |
|
"learning_rate": 4.852604753077817e-07, |
|
"logits": -1.4790997505187988, |
|
"logps": -87.8569107055664, |
|
"loss": 0.3103, |
|
"objective": 0.29884618520736694, |
|
"ranking_idealized": 0.6833333373069763, |
|
"ranking_idealized_expo": 0.574999988079071, |
|
"ranking_simple": 0.6166666746139526, |
|
"regularize": 0.29884618520736694, |
|
"step": 175 |
|
}, |
|
{ |
|
"dpo_loss": 0.6220327615737915, |
|
"epoch": 1.0203117619272555, |
|
"grad_norm": 15.022798279596007, |
|
"learning_rate": 4.835369650662767e-07, |
|
"logits": -1.6809762716293335, |
|
"logps": -87.00578308105469, |
|
"loss": 0.2902, |
|
"objective": 0.3023075461387634, |
|
"ranking_idealized": 0.6833333373069763, |
|
"ranking_idealized_expo": 0.5416666865348816, |
|
"ranking_simple": 0.5874999761581421, |
|
"regularize": 0.3023075461387634, |
|
"step": 180 |
|
}, |
|
{ |
|
"dpo_loss": 0.6156979203224182, |
|
"epoch": 1.0486537553141237, |
|
"grad_norm": 17.673807880039096, |
|
"learning_rate": 4.817215939055985e-07, |
|
"logits": -1.54806387424469, |
|
"logps": -86.16964721679688, |
|
"loss": 0.2856, |
|
"objective": 0.30200377106666565, |
|
"ranking_idealized": 0.6541666388511658, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.5416666865348816, |
|
"regularize": 0.30200377106666565, |
|
"step": 185 |
|
}, |
|
{ |
|
"dpo_loss": 0.6105552315711975, |
|
"epoch": 1.076995748700992, |
|
"grad_norm": 16.47130075175902, |
|
"learning_rate": 4.798150758954164e-07, |
|
"logits": -1.6065795421600342, |
|
"logps": -88.57856750488281, |
|
"loss": 0.2661, |
|
"objective": 0.23887412250041962, |
|
"ranking_idealized": 0.6916666626930237, |
|
"ranking_idealized_expo": 0.5541666746139526, |
|
"ranking_simple": 0.6041666865348816, |
|
"regularize": 0.23887412250041962, |
|
"step": 190 |
|
}, |
|
{ |
|
"dpo_loss": 0.6094806790351868, |
|
"epoch": 1.10533774208786, |
|
"grad_norm": 15.979183042956787, |
|
"learning_rate": 4.778181609576831e-07, |
|
"logits": -1.58108651638031, |
|
"logps": -86.33049011230469, |
|
"loss": 0.2734, |
|
"objective": 0.23615716397762299, |
|
"ranking_idealized": 0.7041666507720947, |
|
"ranking_idealized_expo": 0.5708333253860474, |
|
"ranking_simple": 0.6333333253860474, |
|
"regularize": 0.23615716397762299, |
|
"step": 195 |
|
}, |
|
{ |
|
"dpo_loss": 0.6135362982749939, |
|
"epoch": 1.1336797354747283, |
|
"grad_norm": 15.241345178579065, |
|
"learning_rate": 4.757316345716553e-07, |
|
"logits": -1.6668376922607422, |
|
"logps": -85.64834594726562, |
|
"loss": 0.267, |
|
"objective": 0.2661064565181732, |
|
"ranking_idealized": 0.6499999761581421, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.5541666746139526, |
|
"regularize": 0.2661064565181732, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.1336797354747283, |
|
"eval_dpo_loss": 0.6866354942321777, |
|
"eval_logits": -1.6423935890197754, |
|
"eval_logps": -94.7991943359375, |
|
"eval_loss": 0.4068562686443329, |
|
"eval_objective": 0.41099515557289124, |
|
"eval_ranking_idealized": 0.6570248007774353, |
|
"eval_ranking_idealized_expo": 0.5113636255264282, |
|
"eval_ranking_simple": 0.5185950398445129, |
|
"eval_regularize": 0.41099515557289124, |
|
"eval_runtime": 258.6603, |
|
"eval_samples_per_second": 22.385, |
|
"eval_steps_per_second": 0.936, |
|
"step": 200 |
|
}, |
|
{ |
|
"dpo_loss": 0.6104326844215393, |
|
"epoch": 1.1620217288615966, |
|
"grad_norm": 16.09148449696529, |
|
"learning_rate": 4.735563174649278e-07, |
|
"logits": -1.6373622417449951, |
|
"logps": -88.22838592529297, |
|
"loss": 0.2748, |
|
"objective": 0.2591724991798401, |
|
"ranking_idealized": 0.6458333134651184, |
|
"ranking_idealized_expo": 0.48750001192092896, |
|
"ranking_simple": 0.5541666746139526, |
|
"regularize": 0.2591724991798401, |
|
"step": 205 |
|
}, |
|
{ |
|
"dpo_loss": 0.6027090549468994, |
|
"epoch": 1.1903637222484649, |
|
"grad_norm": 16.78316844909737, |
|
"learning_rate": 4.7129306529060407e-07, |
|
"logits": -1.604967474937439, |
|
"logps": -88.00846099853516, |
|
"loss": 0.2647, |
|
"objective": 0.28820380568504333, |
|
"ranking_idealized": 0.6291666626930237, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.5541666746139526, |
|
"regularize": 0.28820380568504333, |
|
"step": 210 |
|
}, |
|
{ |
|
"dpo_loss": 0.6120165586471558, |
|
"epoch": 1.2187057156353331, |
|
"grad_norm": 17.38824297135803, |
|
"learning_rate": 4.6894276829072786e-07, |
|
"logits": -1.577252745628357, |
|
"logps": -88.0232925415039, |
|
"loss": 0.2457, |
|
"objective": 0.2474772185087204, |
|
"ranking_idealized": 0.6666666865348816, |
|
"ranking_idealized_expo": 0.48750001192092896, |
|
"ranking_simple": 0.5625, |
|
"regularize": 0.2474772185087204, |
|
"step": 215 |
|
}, |
|
{ |
|
"dpo_loss": 0.6058060526847839, |
|
"epoch": 1.2470477090222012, |
|
"grad_norm": 16.33918535416867, |
|
"learning_rate": 4.6650635094610966e-07, |
|
"logits": -1.5445390939712524, |
|
"logps": -87.7970199584961, |
|
"loss": 0.25, |
|
"objective": 0.23831520974636078, |
|
"ranking_idealized": 0.7208333611488342, |
|
"ranking_idealized_expo": 0.6041666865348816, |
|
"ranking_simple": 0.637499988079071, |
|
"regularize": 0.23831520974636078, |
|
"step": 220 |
|
}, |
|
{ |
|
"dpo_loss": 0.6021844744682312, |
|
"epoch": 1.2753897024090695, |
|
"grad_norm": 14.989005292751132, |
|
"learning_rate": 4.639847716126854e-07, |
|
"logits": -1.6192957162857056, |
|
"logps": -89.04407501220703, |
|
"loss": 0.2607, |
|
"objective": 0.26420968770980835, |
|
"ranking_idealized": 0.6791666746139526, |
|
"ranking_idealized_expo": 0.5291666388511658, |
|
"ranking_simple": 0.6041666865348816, |
|
"regularize": 0.26420965790748596, |
|
"step": 225 |
|
}, |
|
{ |
|
"dpo_loss": 0.6108235120773315, |
|
"epoch": 1.3037316957959377, |
|
"grad_norm": 16.245321246774985, |
|
"learning_rate": 4.6137902214455106e-07, |
|
"logits": -1.5698676109313965, |
|
"logps": -89.06554412841797, |
|
"loss": 0.2454, |
|
"objective": 0.24457047879695892, |
|
"ranking_idealized": 0.6541666388511658, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.5541666746139526, |
|
"regularize": 0.24457047879695892, |
|
"step": 230 |
|
}, |
|
{ |
|
"dpo_loss": 0.59207683801651, |
|
"epoch": 1.3320736891828058, |
|
"grad_norm": 16.42864016636988, |
|
"learning_rate": 4.5869012750382004e-07, |
|
"logits": -1.6616859436035156, |
|
"logps": -87.82197570800781, |
|
"loss": 0.2583, |
|
"objective": 0.26300859451293945, |
|
"ranking_idealized": 0.6875, |
|
"ranking_idealized_expo": 0.48750001192092896, |
|
"ranking_simple": 0.6041666865348816, |
|
"regularize": 0.26300859451293945, |
|
"step": 235 |
|
}, |
|
{ |
|
"dpo_loss": 0.5991641283035278, |
|
"epoch": 1.360415682569674, |
|
"grad_norm": 17.146089761318706, |
|
"learning_rate": 4.5591914535745817e-07, |
|
"logits": -1.5948702096939087, |
|
"logps": -89.31143188476562, |
|
"loss": 0.2442, |
|
"objective": 0.25130581855773926, |
|
"ranking_idealized": 0.637499988079071, |
|
"ranking_idealized_expo": 0.4791666567325592, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.25130581855773926, |
|
"step": 240 |
|
}, |
|
{ |
|
"dpo_loss": 0.6016849279403687, |
|
"epoch": 1.3887576759565423, |
|
"grad_norm": 14.997203138603757, |
|
"learning_rate": 4.5306716566125433e-07, |
|
"logits": -1.6367671489715576, |
|
"logps": -88.17431640625, |
|
"loss": 0.2399, |
|
"objective": 0.23935823142528534, |
|
"ranking_idealized": 0.6833333373069763, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.23935823142528534, |
|
"step": 245 |
|
}, |
|
{ |
|
"dpo_loss": 0.5918813943862915, |
|
"epoch": 1.4170996693434104, |
|
"grad_norm": 15.668454928081044, |
|
"learning_rate": 4.501353102310901e-07, |
|
"logits": -1.5877238512039185, |
|
"logps": -87.66322326660156, |
|
"loss": 0.2432, |
|
"objective": 0.2531537711620331, |
|
"ranking_idealized": 0.6625000238418579, |
|
"ranking_idealized_expo": 0.4749999940395355, |
|
"ranking_simple": 0.5625, |
|
"regularize": 0.2531537711620331, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.4170996693434104, |
|
"eval_dpo_loss": 0.6876620650291443, |
|
"eval_logits": -1.6720653772354126, |
|
"eval_logps": -96.13894653320312, |
|
"eval_loss": 0.4107522666454315, |
|
"eval_objective": 0.4137335419654846, |
|
"eval_ranking_idealized": 0.6570248007774353, |
|
"eval_ranking_idealized_expo": 0.5113636255264282, |
|
"eval_ranking_simple": 0.51962810754776, |
|
"eval_regularize": 0.4137335419654846, |
|
"eval_runtime": 259.3309, |
|
"eval_samples_per_second": 22.327, |
|
"eval_steps_per_second": 0.933, |
|
"step": 250 |
|
}, |
|
{ |
|
"dpo_loss": 0.5952737927436829, |
|
"epoch": 1.4454416627302786, |
|
"grad_norm": 16.40280338029817, |
|
"learning_rate": 4.471247323016777e-07, |
|
"logits": -1.5863794088363647, |
|
"logps": -89.24433898925781, |
|
"loss": 0.2442, |
|
"objective": 0.2290959656238556, |
|
"ranking_idealized": 0.675000011920929, |
|
"ranking_idealized_expo": 0.5458333492279053, |
|
"ranking_simple": 0.6291666626930237, |
|
"regularize": 0.2290959656238556, |
|
"step": 255 |
|
}, |
|
{ |
|
"dpo_loss": 0.5954132080078125, |
|
"epoch": 1.473783656117147, |
|
"grad_norm": 16.11674277744465, |
|
"learning_rate": 4.440366160729392e-07, |
|
"logits": -1.6588572263717651, |
|
"logps": -89.44280242919922, |
|
"loss": 0.2443, |
|
"objective": 0.2354036122560501, |
|
"ranking_idealized": 0.6499999761581421, |
|
"ranking_idealized_expo": 0.48750001192092896, |
|
"ranking_simple": 0.5625, |
|
"regularize": 0.2354036122560501, |
|
"step": 260 |
|
}, |
|
{ |
|
"dpo_loss": 0.5868396759033203, |
|
"epoch": 1.5021256495040152, |
|
"grad_norm": 16.753008834337265, |
|
"learning_rate": 4.4087217624420585e-07, |
|
"logits": -1.6106855869293213, |
|
"logps": -88.14371490478516, |
|
"loss": 0.239, |
|
"objective": 0.24956756830215454, |
|
"ranking_idealized": 0.6333333253860474, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.5958333611488342, |
|
"regularize": 0.24956756830215454, |
|
"step": 265 |
|
}, |
|
{ |
|
"dpo_loss": 0.5915893316268921, |
|
"epoch": 1.5304676428908834, |
|
"grad_norm": 17.579129679111187, |
|
"learning_rate": 4.3763265753642055e-07, |
|
"logits": -1.6173158884048462, |
|
"logps": -90.8720703125, |
|
"loss": 0.2421, |
|
"objective": 0.24301743507385254, |
|
"ranking_idealized": 0.6791666746139526, |
|
"ranking_idealized_expo": 0.512499988079071, |
|
"ranking_simple": 0.5958333611488342, |
|
"regularize": 0.24301742017269135, |
|
"step": 270 |
|
}, |
|
{ |
|
"dpo_loss": 0.5958731770515442, |
|
"epoch": 1.5588096362777515, |
|
"grad_norm": 16.101798479127662, |
|
"learning_rate": 4.34319334202531e-07, |
|
"logits": -1.6187034845352173, |
|
"logps": -90.12999725341797, |
|
"loss": 0.2448, |
|
"objective": 0.22899790108203888, |
|
"ranking_idealized": 0.6916666626930237, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.6208333373069763, |
|
"regularize": 0.22899790108203888, |
|
"step": 275 |
|
}, |
|
{ |
|
"dpo_loss": 0.5842003226280212, |
|
"epoch": 1.5871516296646198, |
|
"grad_norm": 15.709789047148108, |
|
"learning_rate": 4.309335095262675e-07, |
|
"logits": -1.5244942903518677, |
|
"logps": -88.604248046875, |
|
"loss": 0.2411, |
|
"objective": 0.23850402235984802, |
|
"ranking_idealized": 0.675000011920929, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.6000000238418579, |
|
"regularize": 0.23850402235984802, |
|
"step": 280 |
|
}, |
|
{ |
|
"dpo_loss": 0.5853084921836853, |
|
"epoch": 1.615493623051488, |
|
"grad_norm": 16.6854633771705, |
|
"learning_rate": 4.274765153095007e-07, |
|
"logits": -1.6502856016159058, |
|
"logps": -89.77727508544922, |
|
"loss": 0.2219, |
|
"objective": 0.21514521539211273, |
|
"ranking_idealized": 0.675000011920929, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.6041666865348816, |
|
"regularize": 0.21514521539211273, |
|
"step": 285 |
|
}, |
|
{ |
|
"dpo_loss": 0.5910848379135132, |
|
"epoch": 1.643835616438356, |
|
"grad_norm": 18.978761606300836, |
|
"learning_rate": 4.239497113483819e-07, |
|
"logits": -1.7089149951934814, |
|
"logps": -86.87386322021484, |
|
"loss": 0.2312, |
|
"objective": 0.23006680607795715, |
|
"ranking_idealized": 0.699999988079071, |
|
"ranking_idealized_expo": 0.5625, |
|
"ranking_simple": 0.6291666626930237, |
|
"regularize": 0.23006680607795715, |
|
"step": 290 |
|
}, |
|
{ |
|
"dpo_loss": 0.5802692174911499, |
|
"epoch": 1.6721776098252243, |
|
"grad_norm": 16.652074965539576, |
|
"learning_rate": 4.203544848984728e-07, |
|
"logits": -1.5955086946487427, |
|
"logps": -86.49956512451172, |
|
"loss": 0.2276, |
|
"objective": 0.23742005228996277, |
|
"ranking_idealized": 0.6583333611488342, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.5791666507720947, |
|
"regularize": 0.23742005228996277, |
|
"step": 295 |
|
}, |
|
{ |
|
"dpo_loss": 0.5916833281517029, |
|
"epoch": 1.7005196032120926, |
|
"grad_norm": 16.884463449554712, |
|
"learning_rate": 4.166922501290729e-07, |
|
"logits": -1.6546835899353027, |
|
"logps": -88.2989730834961, |
|
"loss": 0.2252, |
|
"objective": 0.26394858956336975, |
|
"ranking_idealized": 0.7083333134651184, |
|
"ranking_idealized_expo": 0.574999988079071, |
|
"ranking_simple": 0.6208333373069763, |
|
"regularize": 0.26394858956336975, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.7005196032120926, |
|
"eval_dpo_loss": 0.6866207718849182, |
|
"eval_logits": -1.6648496389389038, |
|
"eval_logps": -95.62443542480469, |
|
"eval_loss": 0.410134494304657, |
|
"eval_objective": 0.4137687385082245, |
|
"eval_ranking_idealized": 0.6570248007774353, |
|
"eval_ranking_idealized_expo": 0.5113636255264282, |
|
"eval_ranking_simple": 0.5216942429542542, |
|
"eval_regularize": 0.4137687385082245, |
|
"eval_runtime": 259.4458, |
|
"eval_samples_per_second": 22.317, |
|
"eval_steps_per_second": 0.933, |
|
"step": 300 |
|
}, |
|
{ |
|
"dpo_loss": 0.5952399373054504, |
|
"epoch": 1.7288615965989607, |
|
"grad_norm": 16.24562342201146, |
|
"learning_rate": 4.129644475669616e-07, |
|
"logits": -1.6116312742233276, |
|
"logps": -88.82595825195312, |
|
"loss": 0.218, |
|
"objective": 0.2242499738931656, |
|
"ranking_idealized": 0.6291666626930237, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.2242499738931656, |
|
"step": 305 |
|
}, |
|
{ |
|
"dpo_loss": 0.583368182182312, |
|
"epoch": 1.7572035899858292, |
|
"grad_norm": 18.099666352463437, |
|
"learning_rate": 4.0917254352977206e-07, |
|
"logits": -1.7004183530807495, |
|
"logps": -87.11441040039062, |
|
"loss": 0.2283, |
|
"objective": 0.2325660139322281, |
|
"ranking_idealized": 0.6583333611488342, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.612500011920929, |
|
"regularize": 0.2325660139322281, |
|
"step": 310 |
|
}, |
|
{ |
|
"dpo_loss": 0.5933206677436829, |
|
"epoch": 1.7855455833726972, |
|
"grad_norm": 16.545516113765466, |
|
"learning_rate": 4.053180295492202e-07, |
|
"logits": -1.602583408355713, |
|
"logps": -88.69900512695312, |
|
"loss": 0.2287, |
|
"objective": 0.21895338594913483, |
|
"ranking_idealized": 0.6541666388511658, |
|
"ranking_idealized_expo": 0.512499988079071, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.21895337104797363, |
|
"step": 315 |
|
}, |
|
{ |
|
"dpo_loss": 0.5876157283782959, |
|
"epoch": 1.8138875767595655, |
|
"grad_norm": 18.487916312721516, |
|
"learning_rate": 4.0140242178441665e-07, |
|
"logits": -1.6777514219284058, |
|
"logps": -90.22407531738281, |
|
"loss": 0.2153, |
|
"objective": 0.20208925008773804, |
|
"ranking_idealized": 0.6625000238418579, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.5874999761581421, |
|
"regularize": 0.20208925008773804, |
|
"step": 320 |
|
}, |
|
{ |
|
"dpo_loss": 0.5746586918830872, |
|
"epoch": 1.8422295701464337, |
|
"grad_norm": 18.670087833334332, |
|
"learning_rate": 3.9742726042549053e-07, |
|
"logits": -1.7464016675949097, |
|
"logps": -91.97502899169922, |
|
"loss": 0.219, |
|
"objective": 0.2114688903093338, |
|
"ranking_idealized": 0.7166666388511658, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.6416666507720947, |
|
"regularize": 0.2114688903093338, |
|
"step": 325 |
|
}, |
|
{ |
|
"dpo_loss": 0.5767069458961487, |
|
"epoch": 1.8705715635333018, |
|
"grad_norm": 18.91235181922618, |
|
"learning_rate": 3.933941090877615e-07, |
|
"logits": -1.466091275215149, |
|
"logps": -90.11954498291016, |
|
"loss": 0.219, |
|
"objective": 0.2171897292137146, |
|
"ranking_idealized": 0.6499999761581421, |
|
"ranking_idealized_expo": 0.4749999940395355, |
|
"ranking_simple": 0.574999988079071, |
|
"regularize": 0.2171897292137146, |
|
"step": 330 |
|
}, |
|
{ |
|
"dpo_loss": 0.5894278287887573, |
|
"epoch": 1.89891355692017, |
|
"grad_norm": 15.592599296406116, |
|
"learning_rate": 3.8930455419669744e-07, |
|
"logits": -1.6301844120025635, |
|
"logps": -89.44200134277344, |
|
"loss": 0.2112, |
|
"objective": 0.18907961249351501, |
|
"ranking_idealized": 0.6499999761581421, |
|
"ranking_idealized_expo": 0.4958333373069763, |
|
"ranking_simple": 0.6000000238418579, |
|
"regularize": 0.18907961249351501, |
|
"step": 335 |
|
}, |
|
{ |
|
"dpo_loss": 0.5853725075721741, |
|
"epoch": 1.9272555503070383, |
|
"grad_norm": 15.83418724261755, |
|
"learning_rate": 3.851602043638994e-07, |
|
"logits": -1.660121202468872, |
|
"logps": -91.48560333251953, |
|
"loss": 0.2026, |
|
"objective": 0.18658672273159027, |
|
"ranking_idealized": 0.7291666865348816, |
|
"ranking_idealized_expo": 0.5458333492279053, |
|
"ranking_simple": 0.6416666507720947, |
|
"regularize": 0.18658672273159027, |
|
"step": 340 |
|
}, |
|
{ |
|
"dpo_loss": 0.5825453400611877, |
|
"epoch": 1.9555975436939064, |
|
"grad_norm": 16.366640560133238, |
|
"learning_rate": 3.809626897543604e-07, |
|
"logits": -1.657557725906372, |
|
"logps": -90.72650909423828, |
|
"loss": 0.1961, |
|
"objective": 0.18415075540542603, |
|
"ranking_idealized": 0.6458333134651184, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.6166666746139526, |
|
"regularize": 0.18415075540542603, |
|
"step": 345 |
|
}, |
|
{ |
|
"dpo_loss": 0.5838915109634399, |
|
"epoch": 1.9839395370807746, |
|
"grad_norm": 17.651439137685784, |
|
"learning_rate": 3.7671366144524576e-07, |
|
"logits": -1.551125407218933, |
|
"logps": -91.74525451660156, |
|
"loss": 0.2082, |
|
"objective": 0.20508398115634918, |
|
"ranking_idealized": 0.6333333253860474, |
|
"ranking_idealized_expo": 0.49166667461395264, |
|
"ranking_simple": 0.5541666746139526, |
|
"regularize": 0.20508398115634918, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.9839395370807746, |
|
"eval_dpo_loss": 0.6863144040107727, |
|
"eval_logits": -1.6988588571548462, |
|
"eval_logps": -97.52546691894531, |
|
"eval_loss": 0.4102429747581482, |
|
"eval_objective": 0.4131539762020111, |
|
"eval_ranking_idealized": 0.6570248007774353, |
|
"eval_ranking_idealized_expo": 0.5113636255264282, |
|
"eval_ranking_simple": 0.51962810754776, |
|
"eval_regularize": 0.4131539762020111, |
|
"eval_runtime": 258.5165, |
|
"eval_samples_per_second": 22.397, |
|
"eval_steps_per_second": 0.936, |
|
"step": 350 |
|
}, |
|
{ |
|
"dpo_loss": 0.5745717287063599, |
|
"epoch": 2.012281530467643, |
|
"grad_norm": 15.784086525377202, |
|
"learning_rate": 3.724147907764478e-07, |
|
"logits": -1.5323989391326904, |
|
"logps": -90.18486785888672, |
|
"loss": 0.2055, |
|
"objective": 0.20713359117507935, |
|
"ranking_idealized": 0.699999988079071, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.6458333134651184, |
|
"regularize": 0.20713359117507935, |
|
"step": 355 |
|
}, |
|
{ |
|
"dpo_loss": 0.5675494074821472, |
|
"epoch": 2.040623523854511, |
|
"grad_norm": 20.55210866626824, |
|
"learning_rate": 3.6806776869317067e-07, |
|
"logits": -1.6239458322525024, |
|
"logps": -89.69377899169922, |
|
"loss": 0.1726, |
|
"objective": 0.17787505686283112, |
|
"ranking_idealized": 0.7250000238418579, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.675000011920929, |
|
"regularize": 0.17787505686283112, |
|
"step": 360 |
|
}, |
|
{ |
|
"dpo_loss": 0.5650666952133179, |
|
"epoch": 2.0689655172413794, |
|
"grad_norm": 16.544231581396616, |
|
"learning_rate": 3.636743050808028e-07, |
|
"logits": -1.6872822046279907, |
|
"logps": -91.26659393310547, |
|
"loss": 0.1866, |
|
"objective": 0.16895455121994019, |
|
"ranking_idealized": 0.6708333492279053, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.6333333253860474, |
|
"regularize": 0.16895455121994019, |
|
"step": 365 |
|
}, |
|
{ |
|
"dpo_loss": 0.575705349445343, |
|
"epoch": 2.0973075106282475, |
|
"grad_norm": 16.758890304778106, |
|
"learning_rate": 3.5923612809233984e-07, |
|
"logits": -1.662663221359253, |
|
"logps": -87.82825469970703, |
|
"loss": 0.1679, |
|
"objective": 0.1752353459596634, |
|
"ranking_idealized": 0.6541666388511658, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.574999988079071, |
|
"regularize": 0.1752353310585022, |
|
"step": 370 |
|
}, |
|
{ |
|
"dpo_loss": 0.5633853077888489, |
|
"epoch": 2.1256495040151155, |
|
"grad_norm": 16.99783941953761, |
|
"learning_rate": 3.5475498346862214e-07, |
|
"logits": -1.6271302700042725, |
|
"logps": -91.13916015625, |
|
"loss": 0.1726, |
|
"objective": 0.16911908984184265, |
|
"ranking_idealized": 0.7291666865348816, |
|
"ranking_idealized_expo": 0.5958333611488342, |
|
"ranking_simple": 0.6791666746139526, |
|
"regularize": 0.16911907494068146, |
|
"step": 375 |
|
}, |
|
{ |
|
"dpo_loss": 0.5752108097076416, |
|
"epoch": 2.153991497401984, |
|
"grad_norm": 17.23340187781712, |
|
"learning_rate": 3.502326338516534e-07, |
|
"logits": -1.5394021272659302, |
|
"logps": -89.99533081054688, |
|
"loss": 0.179, |
|
"objective": 0.1650255024433136, |
|
"ranking_idealized": 0.6833333373069763, |
|
"ranking_idealized_expo": 0.5375000238418579, |
|
"ranking_simple": 0.6000000238418579, |
|
"regularize": 0.1650255024433136, |
|
"step": 380 |
|
}, |
|
{ |
|
"dpo_loss": 0.571977972984314, |
|
"epoch": 2.182333490788852, |
|
"grad_norm": 15.78796183229778, |
|
"learning_rate": 3.4567085809127245e-07, |
|
"logits": -1.6716177463531494, |
|
"logps": -91.3305892944336, |
|
"loss": 0.1653, |
|
"objective": 0.13291777670383453, |
|
"ranking_idealized": 0.6458333134651184, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.612500011920929, |
|
"regularize": 0.13291777670383453, |
|
"step": 385 |
|
}, |
|
{ |
|
"dpo_loss": 0.5752423405647278, |
|
"epoch": 2.21067548417572, |
|
"grad_norm": 18.129151048308177, |
|
"learning_rate": 3.4107145054544855e-07, |
|
"logits": -1.5358682870864868, |
|
"logps": -91.15263366699219, |
|
"loss": 0.1744, |
|
"objective": 0.16379062831401825, |
|
"ranking_idealized": 0.6041666865348816, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.5625, |
|
"regularize": 0.16379062831401825, |
|
"step": 390 |
|
}, |
|
{ |
|
"dpo_loss": 0.5485681891441345, |
|
"epoch": 2.2390174775625886, |
|
"grad_norm": 16.313781937896024, |
|
"learning_rate": 3.3643622037447767e-07, |
|
"logits": -1.5593619346618652, |
|
"logps": -92.42921447753906, |
|
"loss": 0.1776, |
|
"objective": 0.1637614667415619, |
|
"ranking_idealized": 0.6916666626930237, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.6416666507720947, |
|
"regularize": 0.1637614667415619, |
|
"step": 395 |
|
}, |
|
{ |
|
"dpo_loss": 0.5597947239875793, |
|
"epoch": 2.2673594709494567, |
|
"grad_norm": 16.659127876259, |
|
"learning_rate": 3.317669908293554e-07, |
|
"logits": -1.631813645362854, |
|
"logps": -92.92410278320312, |
|
"loss": 0.1825, |
|
"objective": 0.196553573012352, |
|
"ranking_idealized": 0.7583333253860474, |
|
"ranking_idealized_expo": 0.5791666507720947, |
|
"ranking_simple": 0.6958333253860474, |
|
"regularize": 0.1965535581111908, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.2673594709494567, |
|
"eval_dpo_loss": 0.6862595677375793, |
|
"eval_logits": -1.6931663751602173, |
|
"eval_logps": -97.79962158203125, |
|
"eval_loss": 0.4124037027359009, |
|
"eval_objective": 0.4144473969936371, |
|
"eval_ranking_idealized": 0.6570248007774353, |
|
"eval_ranking_idealized_expo": 0.5113636255264282, |
|
"eval_ranking_simple": 0.5206611752510071, |
|
"eval_regularize": 0.4144473969936371, |
|
"eval_runtime": 258.4529, |
|
"eval_samples_per_second": 22.403, |
|
"eval_steps_per_second": 0.936, |
|
"step": 400 |
|
}, |
|
{ |
|
"dpo_loss": 0.5691500902175903, |
|
"epoch": 2.295701464336325, |
|
"grad_norm": 17.30117286858182, |
|
"learning_rate": 3.270655985346081e-07, |
|
"logits": -1.7139372825622559, |
|
"logps": -89.78938293457031, |
|
"loss": 0.1717, |
|
"objective": 0.18224166333675385, |
|
"ranking_idealized": 0.737500011920929, |
|
"ranking_idealized_expo": 0.5583333373069763, |
|
"ranking_simple": 0.6458333134651184, |
|
"regularize": 0.18224166333675385, |
|
"step": 405 |
|
}, |
|
{ |
|
"dpo_loss": 0.5579439997673035, |
|
"epoch": 2.324043457723193, |
|
"grad_norm": 17.123218301010457, |
|
"learning_rate": 3.223338927658632e-07, |
|
"logits": -1.5741162300109863, |
|
"logps": -91.07009887695312, |
|
"loss": 0.1618, |
|
"objective": 0.15759395062923431, |
|
"ranking_idealized": 0.6499999761581421, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.5958333611488342, |
|
"regularize": 0.15759395062923431, |
|
"step": 410 |
|
}, |
|
{ |
|
"dpo_loss": 0.5704253315925598, |
|
"epoch": 2.3523854511100613, |
|
"grad_norm": 17.68931154440285, |
|
"learning_rate": 3.175737347224432e-07, |
|
"logits": -1.6476367712020874, |
|
"logps": -91.30075073242188, |
|
"loss": 0.1732, |
|
"objective": 0.19281157851219177, |
|
"ranking_idealized": 0.6458333134651184, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.612500011920929, |
|
"regularize": 0.19281157851219177, |
|
"step": 415 |
|
}, |
|
{ |
|
"dpo_loss": 0.5771389603614807, |
|
"epoch": 2.3807274444969297, |
|
"grad_norm": 17.274582557860825, |
|
"learning_rate": 3.1278699679526975e-07, |
|
"logits": -1.5415838956832886, |
|
"logps": -92.63572692871094, |
|
"loss": 0.1579, |
|
"objective": 0.15308959782123566, |
|
"ranking_idealized": 0.6458333134651184, |
|
"ranking_idealized_expo": 0.4749999940395355, |
|
"ranking_simple": 0.5958333611488342, |
|
"regularize": 0.15308959782123566, |
|
"step": 420 |
|
}, |
|
{ |
|
"dpo_loss": 0.566936731338501, |
|
"epoch": 2.409069437883798, |
|
"grad_norm": 16.445557447346342, |
|
"learning_rate": 3.0797556183036575e-07, |
|
"logits": -1.5967096090316772, |
|
"logps": -91.4622802734375, |
|
"loss": 0.1607, |
|
"objective": 0.16068215668201447, |
|
"ranking_idealized": 0.6625000238418579, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.6000000238418579, |
|
"regularize": 0.16068214178085327, |
|
"step": 425 |
|
}, |
|
{ |
|
"dpo_loss": 0.5632474422454834, |
|
"epoch": 2.4374114312706663, |
|
"grad_norm": 15.962055488306607, |
|
"learning_rate": 3.0314132238824415e-07, |
|
"logits": -1.6247813701629639, |
|
"logps": -92.1604995727539, |
|
"loss": 0.1547, |
|
"objective": 0.1360505074262619, |
|
"ranking_idealized": 0.6708333492279053, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.625, |
|
"regularize": 0.1360505074262619, |
|
"step": 430 |
|
}, |
|
{ |
|
"dpo_loss": 0.566851019859314, |
|
"epoch": 2.4657534246575343, |
|
"grad_norm": 16.006081940650837, |
|
"learning_rate": 2.982861799994764e-07, |
|
"logits": -1.6544443368911743, |
|
"logps": -92.63692474365234, |
|
"loss": 0.1637, |
|
"objective": 0.17756709456443787, |
|
"ranking_idealized": 0.6333333253860474, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.6208333373069763, |
|
"regularize": 0.17756709456443787, |
|
"step": 435 |
|
}, |
|
{ |
|
"dpo_loss": 0.5565729141235352, |
|
"epoch": 2.4940954180444024, |
|
"grad_norm": 17.37344722468487, |
|
"learning_rate": 2.934120444167326e-07, |
|
"logits": -1.5883994102478027, |
|
"logps": -91.88066101074219, |
|
"loss": 0.159, |
|
"objective": 0.15150482952594757, |
|
"ranking_idealized": 0.6333333253860474, |
|
"ranking_idealized_expo": 0.5375000238418579, |
|
"ranking_simple": 0.6041666865348816, |
|
"regularize": 0.15150482952594757, |
|
"step": 440 |
|
}, |
|
{ |
|
"dpo_loss": 0.565682590007782, |
|
"epoch": 2.5224374114312704, |
|
"grad_norm": 18.453788667979182, |
|
"learning_rate": 2.885208328635864e-07, |
|
"logits": -1.6123565435409546, |
|
"logps": -89.5006332397461, |
|
"loss": 0.1576, |
|
"objective": 0.1587233543395996, |
|
"ranking_idealized": 0.6958333253860474, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.6458333134651184, |
|
"regularize": 0.1587233543395996, |
|
"step": 445 |
|
}, |
|
{ |
|
"dpo_loss": 0.5824019312858582, |
|
"epoch": 2.550779404818139, |
|
"grad_norm": 19.424550718198045, |
|
"learning_rate": 2.83614469280383e-07, |
|
"logits": -1.6537593603134155, |
|
"logps": -91.4095230102539, |
|
"loss": 0.1504, |
|
"objective": 0.15120406448841095, |
|
"ranking_idealized": 0.6583333611488342, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.5791666507720947, |
|
"regularize": 0.15120406448841095, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 2.550779404818139, |
|
"eval_dpo_loss": 0.6864377856254578, |
|
"eval_logits": -1.711348056793213, |
|
"eval_logps": -99.202880859375, |
|
"eval_loss": 0.41492125391960144, |
|
"eval_objective": 0.4176488518714905, |
|
"eval_ranking_idealized": 0.6570248007774353, |
|
"eval_ranking_idealized_expo": 0.5113636255264282, |
|
"eval_ranking_simple": 0.5216942429542542, |
|
"eval_regularize": 0.4176488518714905, |
|
"eval_runtime": 258.9375, |
|
"eval_samples_per_second": 22.361, |
|
"eval_steps_per_second": 0.935, |
|
"step": 450 |
|
}, |
|
{ |
|
"dpo_loss": 0.5587320923805237, |
|
"epoch": 2.579121398205007, |
|
"grad_norm": 18.174711126742732, |
|
"learning_rate": 2.786948835674634e-07, |
|
"logits": -1.6923545598983765, |
|
"logps": -92.0631103515625, |
|
"loss": 0.1514, |
|
"objective": 0.15467478334903717, |
|
"ranking_idealized": 0.6666666865348816, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.6416666507720947, |
|
"regularize": 0.15467478334903717, |
|
"step": 455 |
|
}, |
|
{ |
|
"dpo_loss": 0.5529297590255737, |
|
"epoch": 2.6074633915918755, |
|
"grad_norm": 18.378396938924546, |
|
"learning_rate": 2.737640108260456e-07, |
|
"logits": -1.765284776687622, |
|
"logps": -92.5921401977539, |
|
"loss": 0.1544, |
|
"objective": 0.13981758058071136, |
|
"ranking_idealized": 0.6833333373069763, |
|
"ranking_idealized_expo": 0.512499988079071, |
|
"ranking_simple": 0.6291666626930237, |
|
"regularize": 0.13981756567955017, |
|
"step": 460 |
|
}, |
|
{ |
|
"dpo_loss": 0.5604754090309143, |
|
"epoch": 2.6358053849787435, |
|
"grad_norm": 17.16312208138119, |
|
"learning_rate": 2.6882379059705953e-07, |
|
"logits": -1.6412590742111206, |
|
"logps": -91.83204650878906, |
|
"loss": 0.1571, |
|
"objective": 0.15992027521133423, |
|
"ranking_idealized": 0.612500011920929, |
|
"ranking_idealized_expo": 0.4958333373069763, |
|
"ranking_simple": 0.5791666507720947, |
|
"regularize": 0.15992026031017303, |
|
"step": 465 |
|
}, |
|
{ |
|
"dpo_loss": 0.5741956830024719, |
|
"epoch": 2.6641473783656116, |
|
"grad_norm": 17.444271577746782, |
|
"learning_rate": 2.6387616609823504e-07, |
|
"logits": -1.6750518083572388, |
|
"logps": -91.33477020263672, |
|
"loss": 0.151, |
|
"objective": 0.17329135537147522, |
|
"ranking_idealized": 0.6625000238418579, |
|
"ranking_idealized_expo": 0.5583333373069763, |
|
"ranking_simple": 0.625, |
|
"regularize": 0.17329135537147522, |
|
"step": 470 |
|
}, |
|
{ |
|
"dpo_loss": 0.5746079087257385, |
|
"epoch": 2.69248937175248, |
|
"grad_norm": 17.607595627923466, |
|
"learning_rate": 2.5892308345974514e-07, |
|
"logits": -1.6217347383499146, |
|
"logps": -90.19564819335938, |
|
"loss": 0.1521, |
|
"objective": 0.1534017026424408, |
|
"ranking_idealized": 0.6791666746139526, |
|
"ranking_idealized_expo": 0.4958333373069763, |
|
"ranking_simple": 0.6166666746139526, |
|
"regularize": 0.1534017026424408, |
|
"step": 475 |
|
}, |
|
{ |
|
"dpo_loss": 0.56805020570755, |
|
"epoch": 2.720831365139348, |
|
"grad_norm": 18.441983400540806, |
|
"learning_rate": 2.53966490958702e-07, |
|
"logits": -1.7197903394699097, |
|
"logps": -90.20177459716797, |
|
"loss": 0.148, |
|
"objective": 0.14620445668697357, |
|
"ranking_idealized": 0.6833333373069763, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.6291666626930237, |
|
"regularize": 0.14620442688465118, |
|
"step": 480 |
|
}, |
|
{ |
|
"dpo_loss": 0.5559974312782288, |
|
"epoch": 2.7491733585262166, |
|
"grad_norm": 17.00220355810742, |
|
"learning_rate": 2.4900833825280967e-07, |
|
"logits": -1.628369927406311, |
|
"logps": -93.048828125, |
|
"loss": 0.1488, |
|
"objective": 0.1451708972454071, |
|
"ranking_idealized": 0.6708333492279053, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.6583333611488342, |
|
"regularize": 0.1451708972454071, |
|
"step": 485 |
|
}, |
|
{ |
|
"dpo_loss": 0.555105984210968, |
|
"epoch": 2.7775153519130846, |
|
"grad_norm": 17.798810379621077, |
|
"learning_rate": 2.4405057561347313e-07, |
|
"logits": -1.647185206413269, |
|
"logps": -90.4990463256836, |
|
"loss": 0.1613, |
|
"objective": 0.17200501263141632, |
|
"ranking_idealized": 0.6833333373069763, |
|
"ranking_idealized_expo": 0.4749999940395355, |
|
"ranking_simple": 0.6208333373069763, |
|
"regularize": 0.17200501263141632, |
|
"step": 490 |
|
}, |
|
{ |
|
"dpo_loss": 0.5594576001167297, |
|
"epoch": 2.8058573452999527, |
|
"grad_norm": 18.076540126591944, |
|
"learning_rate": 2.39095153158666e-07, |
|
"logits": -1.6548616886138916, |
|
"logps": -90.19225311279297, |
|
"loss": 0.1504, |
|
"objective": 0.1365150660276413, |
|
"ranking_idealized": 0.6416666507720947, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.6208333373069763, |
|
"regularize": 0.1365150511264801, |
|
"step": 495 |
|
}, |
|
{ |
|
"dpo_loss": 0.5552747845649719, |
|
"epoch": 2.8341993386868207, |
|
"grad_norm": 17.278782223651127, |
|
"learning_rate": 2.3414402008585886e-07, |
|
"logits": -1.6857832670211792, |
|
"logps": -89.0853500366211, |
|
"loss": 0.1494, |
|
"objective": 0.15246258676052094, |
|
"ranking_idealized": 0.6916666626930237, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.6416666507720947, |
|
"regularize": 0.15246258676052094, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.8341993386868207, |
|
"eval_dpo_loss": 0.6861580014228821, |
|
"eval_logits": -1.7174702882766724, |
|
"eval_logps": -99.17545318603516, |
|
"eval_loss": 0.41525644063949585, |
|
"eval_objective": 0.4182237386703491, |
|
"eval_ranking_idealized": 0.6570248007774353, |
|
"eval_ranking_idealized_expo": 0.5113636255264282, |
|
"eval_ranking_simple": 0.5227272510528564, |
|
"eval_regularize": 0.4182237386703491, |
|
"eval_runtime": 259.2438, |
|
"eval_samples_per_second": 22.334, |
|
"eval_steps_per_second": 0.933, |
|
"step": 500 |
|
}, |
|
{ |
|
"dpo_loss": 0.5660989284515381, |
|
"epoch": 2.862541332073689, |
|
"grad_norm": 18.182680782212074, |
|
"learning_rate": 2.2919912390530943e-07, |
|
"logits": -1.6143929958343506, |
|
"logps": -91.0888900756836, |
|
"loss": 0.1437, |
|
"objective": 0.16082407534122467, |
|
"ranking_idealized": 0.6583333611488342, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.6333333253860474, |
|
"regularize": 0.16082406044006348, |
|
"step": 505 |
|
}, |
|
{ |
|
"dpo_loss": 0.5675150752067566, |
|
"epoch": 2.8908833254605573, |
|
"grad_norm": 16.373132303441977, |
|
"learning_rate": 2.2426240967401638e-07, |
|
"logits": -1.5807684659957886, |
|
"logps": -91.39689636230469, |
|
"loss": 0.1433, |
|
"objective": 0.1494457870721817, |
|
"ranking_idealized": 0.6958333253860474, |
|
"ranking_idealized_expo": 0.5291666388511658, |
|
"ranking_simple": 0.6291666626930237, |
|
"regularize": 0.1494457870721817, |
|
"step": 510 |
|
}, |
|
{ |
|
"dpo_loss": 0.5627566576004028, |
|
"epoch": 2.9192253188474258, |
|
"grad_norm": 18.008132213394468, |
|
"learning_rate": 2.1933581923063837e-07, |
|
"logits": -1.7557440996170044, |
|
"logps": -91.32353210449219, |
|
"loss": 0.1448, |
|
"objective": 0.13260915875434875, |
|
"ranking_idealized": 0.6583333611488342, |
|
"ranking_idealized_expo": 0.4791666567325592, |
|
"ranking_simple": 0.6291666626930237, |
|
"regularize": 0.13260914385318756, |
|
"step": 515 |
|
}, |
|
{ |
|
"dpo_loss": 0.5646940469741821, |
|
"epoch": 2.947567312234294, |
|
"grad_norm": 17.30767973762921, |
|
"learning_rate": 2.1442129043167873e-07, |
|
"logits": -1.610668420791626, |
|
"logps": -92.7865219116211, |
|
"loss": 0.1368, |
|
"objective": 0.11772733181715012, |
|
"ranking_idealized": 0.6583333611488342, |
|
"ranking_idealized_expo": 0.512499988079071, |
|
"ranking_simple": 0.6291666626930237, |
|
"regularize": 0.11772733181715012, |
|
"step": 520 |
|
}, |
|
{ |
|
"dpo_loss": 0.5658089518547058, |
|
"epoch": 2.975909305621162, |
|
"grad_norm": 18.116492800551395, |
|
"learning_rate": 2.0952075638923652e-07, |
|
"logits": -1.6272333860397339, |
|
"logps": -92.43870544433594, |
|
"loss": 0.1424, |
|
"objective": 0.15236981213092804, |
|
"ranking_idealized": 0.6875, |
|
"ranking_idealized_expo": 0.5708333253860474, |
|
"ranking_simple": 0.6416666507720947, |
|
"regularize": 0.15236981213092804, |
|
"step": 525 |
|
}, |
|
{ |
|
"dpo_loss": 0.5534684658050537, |
|
"epoch": 3.0042512990080303, |
|
"grad_norm": 18.337044286762765, |
|
"learning_rate": 2.0463614471062435e-07, |
|
"logits": -1.6210473775863647, |
|
"logps": -91.47294616699219, |
|
"loss": 0.1502, |
|
"objective": 0.17477649450302124, |
|
"ranking_idealized": 0.675000011920929, |
|
"ranking_idealized_expo": 0.512499988079071, |
|
"ranking_simple": 0.6291666626930237, |
|
"regularize": 0.17477649450302124, |
|
"step": 530 |
|
}, |
|
{ |
|
"dpo_loss": 0.5659457445144653, |
|
"epoch": 3.0325932923948984, |
|
"grad_norm": 16.444884726429134, |
|
"learning_rate": 1.9976937674015026e-07, |
|
"logits": -1.6844907999038696, |
|
"logps": -93.2222671508789, |
|
"loss": 0.1284, |
|
"objective": 0.14268328249454498, |
|
"ranking_idealized": 0.6708333492279053, |
|
"ranking_idealized_expo": 0.5458333492279053, |
|
"ranking_simple": 0.6583333611488342, |
|
"regularize": 0.14268328249454498, |
|
"step": 535 |
|
}, |
|
{ |
|
"dpo_loss": 0.5521051287651062, |
|
"epoch": 3.0609352857817664, |
|
"grad_norm": 19.963683437356444, |
|
"learning_rate": 1.9492236680336483e-07, |
|
"logits": -1.7760847806930542, |
|
"logps": -90.89082336425781, |
|
"loss": 0.1216, |
|
"objective": 0.10329335182905197, |
|
"ranking_idealized": 0.6625000238418579, |
|
"ranking_idealized_expo": 0.5375000238418579, |
|
"ranking_simple": 0.6541666388511658, |
|
"regularize": 0.10329335182905197, |
|
"step": 540 |
|
}, |
|
{ |
|
"dpo_loss": 0.5619763731956482, |
|
"epoch": 3.089277279168635, |
|
"grad_norm": 17.450130382767895, |
|
"learning_rate": 1.9009702145406724e-07, |
|
"logits": -1.6995065212249756, |
|
"logps": -92.40625, |
|
"loss": 0.1232, |
|
"objective": 0.1230437308549881, |
|
"ranking_idealized": 0.637499988079071, |
|
"ranking_idealized_expo": 0.49166667461395264, |
|
"ranking_simple": 0.6000000238418579, |
|
"regularize": 0.1230437308549881, |
|
"step": 545 |
|
}, |
|
{ |
|
"dpo_loss": 0.5528106689453125, |
|
"epoch": 3.117619272555503, |
|
"grad_norm": 18.245098236126562, |
|
"learning_rate": 1.8529523872436977e-07, |
|
"logits": -1.5086556673049927, |
|
"logps": -92.30103302001953, |
|
"loss": 0.1407, |
|
"objective": 0.12957319617271423, |
|
"ranking_idealized": 0.699999988079071, |
|
"ranking_idealized_expo": 0.5458333492279053, |
|
"ranking_simple": 0.6791666746139526, |
|
"regularize": 0.12957318127155304, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 3.117619272555503, |
|
"eval_dpo_loss": 0.6856257915496826, |
|
"eval_logits": -1.7183054685592651, |
|
"eval_logps": -99.2997055053711, |
|
"eval_loss": 0.4161340296268463, |
|
"eval_objective": 0.41743505001068115, |
|
"eval_ranking_idealized": 0.6570248007774353, |
|
"eval_ranking_idealized_expo": 0.5113636255264282, |
|
"eval_ranking_simple": 0.5216942429542542, |
|
"eval_regularize": 0.41743505001068115, |
|
"eval_runtime": 258.7783, |
|
"eval_samples_per_second": 22.374, |
|
"eval_steps_per_second": 0.935, |
|
"step": 550 |
|
}, |
|
{ |
|
"dpo_loss": 0.5473430752754211, |
|
"epoch": 3.1459612659423715, |
|
"grad_norm": 18.87722095427309, |
|
"learning_rate": 1.8051890737811393e-07, |
|
"logits": -1.6218358278274536, |
|
"logps": -93.05738067626953, |
|
"loss": 0.1336, |
|
"objective": 0.1305130124092102, |
|
"ranking_idealized": 0.6833333373069763, |
|
"ranking_idealized_expo": 0.550000011920929, |
|
"ranking_simple": 0.6625000238418579, |
|
"regularize": 0.1305130124092102, |
|
"step": 555 |
|
}, |
|
{ |
|
"dpo_loss": 0.5478367209434509, |
|
"epoch": 3.1743032593292395, |
|
"grad_norm": 22.714698597290123, |
|
"learning_rate": 1.7576990616793137e-07, |
|
"logits": -1.601859211921692, |
|
"logps": -90.21554565429688, |
|
"loss": 0.1212, |
|
"objective": 0.10795855522155762, |
|
"ranking_idealized": 0.6666666865348816, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.637499988079071, |
|
"regularize": 0.10795855522155762, |
|
"step": 560 |
|
}, |
|
{ |
|
"dpo_loss": 0.5566601157188416, |
|
"epoch": 3.2026452527161076, |
|
"grad_norm": 24.322678833478967, |
|
"learning_rate": 1.710501030962438e-07, |
|
"logits": -1.663177728652954, |
|
"logps": -91.7726058959961, |
|
"loss": 0.1298, |
|
"objective": 0.13216590881347656, |
|
"ranking_idealized": 0.6916666626930237, |
|
"ranking_idealized_expo": 0.5458333492279053, |
|
"ranking_simple": 0.6458333134651184, |
|
"regularize": 0.13216587901115417, |
|
"step": 565 |
|
}, |
|
{ |
|
"dpo_loss": 0.5519458055496216, |
|
"epoch": 3.230987246102976, |
|
"grad_norm": 19.102063233264193, |
|
"learning_rate": 1.663613546804912e-07, |
|
"logits": -1.5763607025146484, |
|
"logps": -91.98208618164062, |
|
"loss": 0.1293, |
|
"objective": 0.13738204538822174, |
|
"ranking_idealized": 0.6416666507720947, |
|
"ranking_idealized_expo": 0.4791666567325592, |
|
"ranking_simple": 0.5916666388511658, |
|
"regularize": 0.13738203048706055, |
|
"step": 570 |
|
}, |
|
{ |
|
"dpo_loss": 0.5559364557266235, |
|
"epoch": 3.259329239489844, |
|
"grad_norm": 16.15481429380041, |
|
"learning_rate": 1.617055052228768e-07, |
|
"logits": -1.6705526113510132, |
|
"logps": -92.17435455322266, |
|
"loss": 0.1266, |
|
"objective": 0.12801046669483185, |
|
"ranking_idealized": 0.7083333134651184, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.6499999761581421, |
|
"regularize": 0.12801046669483185, |
|
"step": 575 |
|
}, |
|
{ |
|
"dpo_loss": 0.5649384260177612, |
|
"epoch": 3.287671232876712, |
|
"grad_norm": 17.44743081337015, |
|
"learning_rate": 1.5708438608491815e-07, |
|
"logits": -1.6591442823410034, |
|
"logps": -93.50952911376953, |
|
"loss": 0.1277, |
|
"objective": 0.11801984906196594, |
|
"ranking_idealized": 0.6666666865348816, |
|
"ranking_idealized_expo": 0.5583333373069763, |
|
"ranking_simple": 0.6291666626930237, |
|
"regularize": 0.11801984906196594, |
|
"step": 580 |
|
}, |
|
{ |
|
"dpo_loss": 0.5595548152923584, |
|
"epoch": 3.3160132262635806, |
|
"grad_norm": 16.72082331684023, |
|
"learning_rate": 1.524998149670871e-07, |
|
"logits": -1.69523286819458, |
|
"logps": -93.74117279052734, |
|
"loss": 0.12, |
|
"objective": 0.10769928246736526, |
|
"ranking_idealized": 0.6958333253860474, |
|
"ranking_idealized_expo": 0.5375000238418579, |
|
"ranking_simple": 0.6583333611488342, |
|
"regularize": 0.10769927501678467, |
|
"step": 585 |
|
}, |
|
{ |
|
"dpo_loss": 0.561581015586853, |
|
"epoch": 3.3443552196504487, |
|
"grad_norm": 19.465809423510365, |
|
"learning_rate": 1.479535951938243e-07, |
|
"logits": -1.7049933671951294, |
|
"logps": -93.83617401123047, |
|
"loss": 0.1205, |
|
"objective": 0.09809862077236176, |
|
"ranking_idealized": 0.6666666865348816, |
|
"ranking_idealized_expo": 0.4958333373069763, |
|
"ranking_simple": 0.637499988079071, |
|
"regularize": 0.09809862077236176, |
|
"step": 590 |
|
}, |
|
{ |
|
"dpo_loss": 0.5538628101348877, |
|
"epoch": 3.372697213037317, |
|
"grad_norm": 17.81052400873953, |
|
"learning_rate": 1.43447515004208e-07, |
|
"logits": -1.613613247871399, |
|
"logps": -92.85578155517578, |
|
"loss": 0.1191, |
|
"objective": 0.12334737926721573, |
|
"ranking_idealized": 0.6458333134651184, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.637499988079071, |
|
"regularize": 0.12334737926721573, |
|
"step": 595 |
|
}, |
|
{ |
|
"dpo_loss": 0.5357978940010071, |
|
"epoch": 3.4010392064241852, |
|
"grad_norm": 18.626853535104544, |
|
"learning_rate": 1.3898334684855645e-07, |
|
"logits": -1.624743938446045, |
|
"logps": -92.40316009521484, |
|
"loss": 0.1149, |
|
"objective": 0.13463754951953888, |
|
"ranking_idealized": 0.6916666626930237, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.6458333134651184, |
|
"regularize": 0.13463754951953888, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 3.4010392064241852, |
|
"eval_dpo_loss": 0.6852067112922668, |
|
"eval_logits": -1.71807062625885, |
|
"eval_logps": -99.92455291748047, |
|
"eval_loss": 0.41705650091171265, |
|
"eval_objective": 0.41811424493789673, |
|
"eval_ranking_idealized": 0.6570248007774353, |
|
"eval_ranking_idealized_expo": 0.5113636255264282, |
|
"eval_ranking_simple": 0.5247933864593506, |
|
"eval_regularize": 0.41811424493789673, |
|
"eval_runtime": 259.0859, |
|
"eval_samples_per_second": 22.348, |
|
"eval_steps_per_second": 0.934, |
|
"step": 600 |
|
}, |
|
{ |
|
"dpo_loss": 0.5652448534965515, |
|
"epoch": 3.4293811998110533, |
|
"grad_norm": 17.958926430591173, |
|
"learning_rate": 1.3456284669124157e-07, |
|
"logits": -1.6740020513534546, |
|
"logps": -94.55862426757812, |
|
"loss": 0.1179, |
|
"objective": 0.11572790890932083, |
|
"ranking_idealized": 0.6583333611488342, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.625, |
|
"regularize": 0.11572788655757904, |
|
"step": 605 |
|
}, |
|
{ |
|
"dpo_loss": 0.5543821454048157, |
|
"epoch": 3.4577231931979218, |
|
"grad_norm": 17.326848783729876, |
|
"learning_rate": 1.301877533199859e-07, |
|
"logits": -1.6315828561782837, |
|
"logps": -92.49845886230469, |
|
"loss": 0.1149, |
|
"objective": 0.1067105308175087, |
|
"ranking_idealized": 0.6791666746139526, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.6499999761581421, |
|
"regularize": 0.1067105159163475, |
|
"step": 610 |
|
}, |
|
{ |
|
"dpo_loss": 0.5443283915519714, |
|
"epoch": 3.48606518658479, |
|
"grad_norm": 16.84586393500809, |
|
"learning_rate": 1.2585978766191724e-07, |
|
"logits": -1.664933681488037, |
|
"logps": -93.27455139160156, |
|
"loss": 0.1142, |
|
"objective": 0.10945113748311996, |
|
"ranking_idealized": 0.6458333134651184, |
|
"ranking_idealized_expo": 0.47083333134651184, |
|
"ranking_simple": 0.5958333611488342, |
|
"regularize": 0.10945113748311996, |
|
"step": 615 |
|
}, |
|
{ |
|
"dpo_loss": 0.5419160723686218, |
|
"epoch": 3.514407179971658, |
|
"grad_norm": 17.32874521556865, |
|
"learning_rate": 1.2158065210664848e-07, |
|
"logits": -1.5332224369049072, |
|
"logps": -92.34308624267578, |
|
"loss": 0.1203, |
|
"objective": 0.12084861099720001, |
|
"ranking_idealized": 0.637499988079071, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.6166666746139526, |
|
"regularize": 0.12084860354661942, |
|
"step": 620 |
|
}, |
|
{ |
|
"dpo_loss": 0.5541211366653442, |
|
"epoch": 3.5427491733585263, |
|
"grad_norm": 17.48530471086995, |
|
"learning_rate": 1.1735202983664802e-07, |
|
"logits": -1.6171096563339233, |
|
"logps": -91.3125991821289, |
|
"loss": 0.1178, |
|
"objective": 0.11125477403402328, |
|
"ranking_idealized": 0.6708333492279053, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.6416666507720947, |
|
"regularize": 0.11125477403402328, |
|
"step": 625 |
|
}, |
|
{ |
|
"dpo_loss": 0.5698776245117188, |
|
"epoch": 3.5710911667453944, |
|
"grad_norm": 17.864701578880954, |
|
"learning_rate": 1.1317558416516696e-07, |
|
"logits": -1.697689175605774, |
|
"logps": -91.67240905761719, |
|
"loss": 0.1261, |
|
"objective": 0.13253255188465118, |
|
"ranking_idealized": 0.6333333253860474, |
|
"ranking_idealized_expo": 0.512499988079071, |
|
"ranking_simple": 0.6083333492279053, |
|
"regularize": 0.13253255188465118, |
|
"step": 630 |
|
}, |
|
{ |
|
"dpo_loss": 0.5599467158317566, |
|
"epoch": 3.5994331601322624, |
|
"grad_norm": 17.33519253157568, |
|
"learning_rate": 1.090529578819799e-07, |
|
"logits": -1.6461411714553833, |
|
"logps": -91.57376098632812, |
|
"loss": 0.1157, |
|
"objective": 0.10732007026672363, |
|
"ranking_idealized": 0.637499988079071, |
|
"ranking_idealized_expo": 0.48750001192092896, |
|
"ranking_simple": 0.6208333373069763, |
|
"regularize": 0.10732006281614304, |
|
"step": 635 |
|
}, |
|
{ |
|
"dpo_loss": 0.540539562702179, |
|
"epoch": 3.627775153519131, |
|
"grad_norm": 17.025667462047203, |
|
"learning_rate": 1.0498577260720048e-07, |
|
"logits": -1.5717778205871582, |
|
"logps": -93.14022827148438, |
|
"loss": 0.1146, |
|
"objective": 0.13500064611434937, |
|
"ranking_idealized": 0.6416666507720947, |
|
"ranking_idealized_expo": 0.4625000059604645, |
|
"ranking_simple": 0.574999988079071, |
|
"regularize": 0.13500064611434937, |
|
"step": 640 |
|
}, |
|
{ |
|
"dpo_loss": 0.5469278693199158, |
|
"epoch": 3.656117146905999, |
|
"grad_norm": 17.536092815770388, |
|
"learning_rate": 1.0097562815342214e-07, |
|
"logits": -1.6058826446533203, |
|
"logps": -90.76680755615234, |
|
"loss": 0.1144, |
|
"objective": 0.1191474050283432, |
|
"ranking_idealized": 0.6666666865348816, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.6291666626930237, |
|
"regularize": 0.1191474050283432, |
|
"step": 645 |
|
}, |
|
{ |
|
"dpo_loss": 0.5611483454704285, |
|
"epoch": 3.6844591402928675, |
|
"grad_norm": 17.646206320924833, |
|
"learning_rate": 9.702410189643836e-08, |
|
"logits": -1.6121342182159424, |
|
"logps": -92.83375549316406, |
|
"loss": 0.1108, |
|
"objective": 0.09943919628858566, |
|
"ranking_idealized": 0.6458333134651184, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.6083333492279053, |
|
"regularize": 0.09943918883800507, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 3.6844591402928675, |
|
"eval_dpo_loss": 0.6852837800979614, |
|
"eval_logits": -1.7315040826797485, |
|
"eval_logps": -99.91177368164062, |
|
"eval_loss": 0.41784536838531494, |
|
"eval_objective": 0.41884875297546387, |
|
"eval_ranking_idealized": 0.6570248007774353, |
|
"eval_ranking_idealized_expo": 0.5113636255264282, |
|
"eval_ranking_simple": 0.5247933864593506, |
|
"eval_regularize": 0.41884875297546387, |
|
"eval_runtime": 259.4431, |
|
"eval_samples_per_second": 22.317, |
|
"eval_steps_per_second": 0.933, |
|
"step": 650 |
|
}, |
|
{ |
|
"dpo_loss": 0.5510907769203186, |
|
"epoch": 3.7128011336797355, |
|
"grad_norm": 17.84028807284025, |
|
"learning_rate": 9.313274815478698e-08, |
|
"logits": -1.6280105113983154, |
|
"logps": -92.27388763427734, |
|
"loss": 0.117, |
|
"objective": 0.10249165445566177, |
|
"ranking_idealized": 0.6583333611488342, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.6291666626930237, |
|
"regularize": 0.10249165445566177, |
|
"step": 655 |
|
}, |
|
{ |
|
"dpo_loss": 0.5551621913909912, |
|
"epoch": 3.7411431270666036, |
|
"grad_norm": 17.863904309670215, |
|
"learning_rate": 8.930309757836516e-08, |
|
"logits": -1.7605994939804077, |
|
"logps": -92.74076080322266, |
|
"loss": 0.1162, |
|
"objective": 0.11682406812906265, |
|
"ranking_idealized": 0.6916666626930237, |
|
"ranking_idealized_expo": 0.5541666746139526, |
|
"ranking_simple": 0.6625000238418579, |
|
"regularize": 0.11682406812906265, |
|
"step": 660 |
|
}, |
|
{ |
|
"dpo_loss": 0.5553780198097229, |
|
"epoch": 3.769485120453472, |
|
"grad_norm": 17.287090327509993, |
|
"learning_rate": 8.553665654635342e-08, |
|
"logits": -1.6500779390335083, |
|
"logps": -92.00687408447266, |
|
"loss": 0.116, |
|
"objective": 0.11367592960596085, |
|
"ranking_idealized": 0.6958333253860474, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.6541666388511658, |
|
"regularize": 0.11367591470479965, |
|
"step": 665 |
|
}, |
|
{ |
|
"dpo_loss": 0.5561904311180115, |
|
"epoch": 3.79782711384034, |
|
"grad_norm": 18.08647866984471, |
|
"learning_rate": 8.183490657468686e-08, |
|
"logits": -1.7430044412612915, |
|
"logps": -92.36637878417969, |
|
"loss": 0.1153, |
|
"objective": 0.13086958229541779, |
|
"ranking_idealized": 0.7333333492279053, |
|
"ranking_idealized_expo": 0.5958333611488342, |
|
"ranking_simple": 0.7124999761581421, |
|
"regularize": 0.13086958229541779, |
|
"step": 670 |
|
}, |
|
{ |
|
"dpo_loss": 0.5525475740432739, |
|
"epoch": 3.826169107227208, |
|
"grad_norm": 17.678219555630616, |
|
"learning_rate": 7.819930373330669e-08, |
|
"logits": -1.6892848014831543, |
|
"logps": -91.58055114746094, |
|
"loss": 0.1165, |
|
"objective": 0.10088498890399933, |
|
"ranking_idealized": 0.6708333492279053, |
|
"ranking_idealized_expo": 0.5458333492279053, |
|
"ranking_simple": 0.6416666507720947, |
|
"regularize": 0.10088498145341873, |
|
"step": 675 |
|
}, |
|
{ |
|
"dpo_loss": 0.5708147883415222, |
|
"epoch": 3.8545111006140766, |
|
"grad_norm": 17.208804705028182, |
|
"learning_rate": 7.463127807341966e-08, |
|
"logits": -1.6462949514389038, |
|
"logps": -92.41487884521484, |
|
"loss": 0.1181, |
|
"objective": 0.11868777871131897, |
|
"ranking_idealized": 0.6541666388511658, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.6208333373069763, |
|
"regularize": 0.11868777871131897, |
|
"step": 680 |
|
}, |
|
{ |
|
"dpo_loss": 0.5442604422569275, |
|
"epoch": 3.8828530940009447, |
|
"grad_norm": 18.600166194890996, |
|
"learning_rate": 7.113223306499336e-08, |
|
"logits": -1.7259678840637207, |
|
"logps": -91.63528442382812, |
|
"loss": 0.1127, |
|
"objective": 0.10472600162029266, |
|
"ranking_idealized": 0.6875, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.6499999761581421, |
|
"regularize": 0.10472600162029266, |
|
"step": 685 |
|
}, |
|
{ |
|
"dpo_loss": 0.5496628284454346, |
|
"epoch": 3.9111950873878127, |
|
"grad_norm": 17.80496655704031, |
|
"learning_rate": 6.770354504470574e-08, |
|
"logits": -1.6540542840957642, |
|
"logps": -90.78262329101562, |
|
"loss": 0.1164, |
|
"objective": 0.10735266655683517, |
|
"ranking_idealized": 0.7041666507720947, |
|
"ranking_idealized_expo": 0.550000011920929, |
|
"ranking_simple": 0.6875, |
|
"regularize": 0.10735265165567398, |
|
"step": 690 |
|
}, |
|
{ |
|
"dpo_loss": 0.5639461874961853, |
|
"epoch": 3.9395370807746812, |
|
"grad_norm": 16.492816826616206, |
|
"learning_rate": 6.434656267456842e-08, |
|
"logits": -1.6047898530960083, |
|
"logps": -92.38011932373047, |
|
"loss": 0.1193, |
|
"objective": 0.12910698354244232, |
|
"ranking_idealized": 0.6083333492279053, |
|
"ranking_idealized_expo": 0.46666666865348816, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.12910698354244232, |
|
"step": 695 |
|
}, |
|
{ |
|
"dpo_loss": 0.5509793162345886, |
|
"epoch": 3.9678790741615493, |
|
"grad_norm": 19.17114576937693, |
|
"learning_rate": 6.106260641143546e-08, |
|
"logits": -1.6564711332321167, |
|
"logps": -92.65071868896484, |
|
"loss": 0.1146, |
|
"objective": 0.1030283123254776, |
|
"ranking_idealized": 0.612500011920929, |
|
"ranking_idealized_expo": 0.48750001192092896, |
|
"ranking_simple": 0.5958333611488342, |
|
"regularize": 0.103028304874897, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 3.9678790741615493, |
|
"eval_dpo_loss": 0.6854080557823181, |
|
"eval_logits": -1.7319272756576538, |
|
"eval_logps": -99.89824676513672, |
|
"eval_loss": 0.4175797998905182, |
|
"eval_objective": 0.4186650514602661, |
|
"eval_ranking_idealized": 0.6570248007774353, |
|
"eval_ranking_idealized_expo": 0.5113636255264282, |
|
"eval_ranking_simple": 0.5237603187561035, |
|
"eval_regularize": 0.4186650514602661, |
|
"eval_runtime": 258.7065, |
|
"eval_samples_per_second": 22.381, |
|
"eval_steps_per_second": 0.935, |
|
"step": 700 |
|
}, |
|
{ |
|
"dpo_loss": 0.5571620464324951, |
|
"epoch": 3.9962210675484178, |
|
"grad_norm": 18.74884098276965, |
|
"learning_rate": 5.7852967987606e-08, |
|
"logits": -1.554320216178894, |
|
"logps": -90.9109878540039, |
|
"loss": 0.1135, |
|
"objective": 0.12702669203281403, |
|
"ranking_idealized": 0.6666666865348816, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.6583333611488342, |
|
"regularize": 0.12702666223049164, |
|
"step": 705 |
|
}, |
|
{ |
|
"dpo_loss": 0.5528541803359985, |
|
"epoch": 4.024563060935286, |
|
"grad_norm": 17.40621172754528, |
|
"learning_rate": 5.471890990272665e-08, |
|
"logits": -1.6468113660812378, |
|
"logps": -92.61128997802734, |
|
"loss": 0.1135, |
|
"objective": 0.1373264044523239, |
|
"ranking_idealized": 0.6916666626930237, |
|
"ranking_idealized_expo": 0.4791666567325592, |
|
"ranking_simple": 0.612500011920929, |
|
"regularize": 0.13732638955116272, |
|
"step": 710 |
|
}, |
|
{ |
|
"dpo_loss": 0.5556226968765259, |
|
"epoch": 4.052905054322154, |
|
"grad_norm": 18.812501686123863, |
|
"learning_rate": 5.166166492719124e-08, |
|
"logits": -1.6049120426177979, |
|
"logps": -92.74799346923828, |
|
"loss": 0.1035, |
|
"objective": 0.10493499785661697, |
|
"ranking_idealized": 0.6833333373069763, |
|
"ranking_idealized_expo": 0.5416666865348816, |
|
"ranking_simple": 0.6458333134651184, |
|
"regularize": 0.10493497550487518, |
|
"step": 715 |
|
}, |
|
{ |
|
"dpo_loss": 0.5424375534057617, |
|
"epoch": 4.081247047709022, |
|
"grad_norm": 17.435339866299028, |
|
"learning_rate": 4.868243561723534e-08, |
|
"logits": -1.511703372001648, |
|
"logps": -94.71248626708984, |
|
"loss": 0.0978, |
|
"objective": 0.09467672556638718, |
|
"ranking_idealized": 0.6583333611488342, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.6166666746139526, |
|
"regularize": 0.09467671811580658, |
|
"step": 720 |
|
}, |
|
{ |
|
"dpo_loss": 0.5652304291725159, |
|
"epoch": 4.109589041095891, |
|
"grad_norm": 17.507850551258127, |
|
"learning_rate": 4.578239384191529e-08, |
|
"logits": -1.6384118795394897, |
|
"logps": -92.1180191040039, |
|
"loss": 0.1076, |
|
"objective": 0.09941933304071426, |
|
"ranking_idealized": 0.6541666388511658, |
|
"ranking_idealized_expo": 0.5375000238418579, |
|
"ranking_simple": 0.637499988079071, |
|
"regularize": 0.09941932559013367, |
|
"step": 725 |
|
}, |
|
{ |
|
"dpo_loss": 0.5497789978981018, |
|
"epoch": 4.137931034482759, |
|
"grad_norm": 18.803004868454263, |
|
"learning_rate": 4.296268032215733e-08, |
|
"logits": -1.7138111591339111, |
|
"logps": -91.83662414550781, |
|
"loss": 0.1086, |
|
"objective": 0.10822432488203049, |
|
"ranking_idealized": 0.675000011920929, |
|
"ranking_idealized_expo": 0.5375000238418579, |
|
"ranking_simple": 0.6208333373069763, |
|
"regularize": 0.10822432488203049, |
|
"step": 730 |
|
}, |
|
{ |
|
"dpo_loss": 0.55525803565979, |
|
"epoch": 4.166273027869627, |
|
"grad_norm": 18.177348382836357, |
|
"learning_rate": 4.022440418205944e-08, |
|
"logits": -1.6232236623764038, |
|
"logps": -93.14463806152344, |
|
"loss": 0.1028, |
|
"objective": 0.10451411455869675, |
|
"ranking_idealized": 0.6791666746139526, |
|
"ranking_idealized_expo": 0.5291666388511658, |
|
"ranking_simple": 0.6541666388511658, |
|
"regularize": 0.10451411455869675, |
|
"step": 735 |
|
}, |
|
{ |
|
"dpo_loss": 0.535234808921814, |
|
"epoch": 4.194615021256495, |
|
"grad_norm": 17.133543410858152, |
|
"learning_rate": 3.756864251262143e-08, |
|
"logits": -1.610323190689087, |
|
"logps": -93.36137390136719, |
|
"loss": 0.1062, |
|
"objective": 0.10550294071435928, |
|
"ranking_idealized": 0.6791666746139526, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.6541666388511658, |
|
"regularize": 0.10550292581319809, |
|
"step": 740 |
|
}, |
|
{ |
|
"dpo_loss": 0.546442449092865, |
|
"epoch": 4.222957014643363, |
|
"grad_norm": 18.78547392108143, |
|
"learning_rate": 3.4996439948074855e-08, |
|
"logits": -1.6879092454910278, |
|
"logps": -90.12301635742188, |
|
"loss": 0.1001, |
|
"objective": 0.10297367721796036, |
|
"ranking_idealized": 0.6291666626930237, |
|
"ranking_idealized_expo": 0.48750001192092896, |
|
"ranking_simple": 0.6208333373069763, |
|
"regularize": 0.10297366231679916, |
|
"step": 745 |
|
}, |
|
{ |
|
"dpo_loss": 0.5416663289070129, |
|
"epoch": 4.251299008030231, |
|
"grad_norm": 19.462231175744662, |
|
"learning_rate": 3.250880825498026e-08, |
|
"logits": -1.8104737997055054, |
|
"logps": -92.32807922363281, |
|
"loss": 0.0986, |
|
"objective": 0.10099396854639053, |
|
"ranking_idealized": 0.6458333134651184, |
|
"ranking_idealized_expo": 0.5291666388511658, |
|
"ranking_simple": 0.6291666626930237, |
|
"regularize": 0.10099395364522934, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 4.251299008030231, |
|
"eval_dpo_loss": 0.6853212714195251, |
|
"eval_logits": -1.7322306632995605, |
|
"eval_logps": -99.86943054199219, |
|
"eval_loss": 0.41747406125068665, |
|
"eval_objective": 0.41828420758247375, |
|
"eval_ranking_idealized": 0.6570248007774353, |
|
"eval_ranking_idealized_expo": 0.5113636255264282, |
|
"eval_ranking_simple": 0.5237603187561035, |
|
"eval_regularize": 0.41828420758247375, |
|
"eval_runtime": 258.964, |
|
"eval_samples_per_second": 22.358, |
|
"eval_steps_per_second": 0.934, |
|
"step": 750 |
|
}, |
|
{ |
|
"dpo_loss": 0.5468146800994873, |
|
"epoch": 4.2796410014171, |
|
"grad_norm": 18.270752967286892, |
|
"learning_rate": 3.010672593425209e-08, |
|
"logits": -1.7138711214065552, |
|
"logps": -92.11996459960938, |
|
"loss": 0.1147, |
|
"objective": 0.10049024224281311, |
|
"ranking_idealized": 0.6791666746139526, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.6458333134651184, |
|
"regularize": 0.10049023479223251, |
|
"step": 755 |
|
}, |
|
{ |
|
"dpo_loss": 0.5455428957939148, |
|
"epoch": 4.307982994803968, |
|
"grad_norm": 17.285027330879423, |
|
"learning_rate": 2.7791137836269158e-08, |
|
"logits": -1.6757961511611938, |
|
"logps": -93.05391693115234, |
|
"loss": 0.0961, |
|
"objective": 0.10086000710725784, |
|
"ranking_idealized": 0.7083333134651184, |
|
"ranking_idealized_expo": 0.5666666626930237, |
|
"ranking_simple": 0.6833333373069763, |
|
"regularize": 0.10086000710725784, |
|
"step": 760 |
|
}, |
|
{ |
|
"dpo_loss": 0.5395826101303101, |
|
"epoch": 4.336324988190836, |
|
"grad_norm": 17.680760382133624, |
|
"learning_rate": 2.556295478922116e-08, |
|
"logits": -1.7200431823730469, |
|
"logps": -93.1734619140625, |
|
"loss": 0.1053, |
|
"objective": 0.12091321498155594, |
|
"ranking_idealized": 0.7291666865348816, |
|
"ranking_idealized_expo": 0.5291666388511658, |
|
"ranking_simple": 0.6833333373069763, |
|
"regularize": 0.12091320008039474, |
|
"step": 765 |
|
}, |
|
{ |
|
"dpo_loss": 0.5380468964576721, |
|
"epoch": 4.364666981577704, |
|
"grad_norm": 17.361908211383366, |
|
"learning_rate": 2.3423053240837514e-08, |
|
"logits": -1.577264428138733, |
|
"logps": -91.18030548095703, |
|
"loss": 0.1064, |
|
"objective": 0.11028440296649933, |
|
"ranking_idealized": 0.6666666865348816, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.637499988079071, |
|
"regularize": 0.11028438061475754, |
|
"step": 770 |
|
}, |
|
{ |
|
"dpo_loss": 0.5494747161865234, |
|
"epoch": 4.393008974964572, |
|
"grad_norm": 16.640977636984772, |
|
"learning_rate": 2.137227491364016e-08, |
|
"logits": -1.627792239189148, |
|
"logps": -91.97000885009766, |
|
"loss": 0.1067, |
|
"objective": 0.10595239698886871, |
|
"ranking_idealized": 0.6666666865348816, |
|
"ranking_idealized_expo": 0.512499988079071, |
|
"ranking_simple": 0.6333333253860474, |
|
"regularize": 0.10595235228538513, |
|
"step": 775 |
|
}, |
|
{ |
|
"dpo_loss": 0.5575358867645264, |
|
"epoch": 4.42135096835144, |
|
"grad_norm": 17.112345268128863, |
|
"learning_rate": 1.9411426473854687e-08, |
|
"logits": -1.693690538406372, |
|
"logps": -90.5418472290039, |
|
"loss": 0.1004, |
|
"objective": 0.10788667947053909, |
|
"ranking_idealized": 0.6541666388511658, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.6208333373069763, |
|
"regularize": 0.10788667947053909, |
|
"step": 780 |
|
}, |
|
{ |
|
"dpo_loss": 0.5676646828651428, |
|
"epoch": 4.449692961738309, |
|
"grad_norm": 16.657419373072543, |
|
"learning_rate": 1.7541279214111275e-08, |
|
"logits": -1.7215303182601929, |
|
"logps": -90.63499450683594, |
|
"loss": 0.1128, |
|
"objective": 0.1177934780716896, |
|
"ranking_idealized": 0.6583333611488342, |
|
"ranking_idealized_expo": 0.5291666388511658, |
|
"ranking_simple": 0.612500011920929, |
|
"regularize": 0.1177934780716896, |
|
"step": 785 |
|
}, |
|
{ |
|
"dpo_loss": 0.539345920085907, |
|
"epoch": 4.478034955125177, |
|
"grad_norm": 17.089680176209615, |
|
"learning_rate": 1.57625687500596e-08, |
|
"logits": -1.6345340013504028, |
|
"logps": -93.4063720703125, |
|
"loss": 0.0998, |
|
"objective": 0.10817180573940277, |
|
"ranking_idealized": 0.7291666865348816, |
|
"ranking_idealized_expo": 0.5583333373069763, |
|
"ranking_simple": 0.6916666626930237, |
|
"regularize": 0.10817176848649979, |
|
"step": 790 |
|
}, |
|
{ |
|
"dpo_loss": 0.547528088092804, |
|
"epoch": 4.506376948512045, |
|
"grad_norm": 17.320194779122446, |
|
"learning_rate": 1.4075994731016894e-08, |
|
"logits": -1.5627334117889404, |
|
"logps": -93.30286407470703, |
|
"loss": 0.1058, |
|
"objective": 0.102629154920578, |
|
"ranking_idealized": 0.6541666388511658, |
|
"ranking_idealized_expo": 0.550000011920929, |
|
"ranking_simple": 0.6291666626930237, |
|
"regularize": 0.10262913256883621, |
|
"step": 795 |
|
}, |
|
{ |
|
"dpo_loss": 0.5486911535263062, |
|
"epoch": 4.534718941898913, |
|
"grad_norm": 17.9229448629881, |
|
"learning_rate": 1.2482220564763667e-08, |
|
"logits": -1.5870776176452637, |
|
"logps": -92.80538940429688, |
|
"loss": 0.1042, |
|
"objective": 0.11460768431425095, |
|
"ranking_idealized": 0.6875, |
|
"ranking_idealized_expo": 0.512499988079071, |
|
"ranking_simple": 0.6499999761581421, |
|
"regularize": 0.11460768431425095, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 4.534718941898913, |
|
"eval_dpo_loss": 0.6852768659591675, |
|
"eval_logits": -1.7317209243774414, |
|
"eval_logps": -99.85995483398438, |
|
"eval_loss": 0.4175398349761963, |
|
"eval_objective": 0.4183157980442047, |
|
"eval_ranking_idealized": 0.6570248007774353, |
|
"eval_ranking_idealized_expo": 0.5113636255264282, |
|
"eval_ranking_simple": 0.5237603187561035, |
|
"eval_regularize": 0.4183157980442047, |
|
"eval_runtime": 259.0856, |
|
"eval_samples_per_second": 22.348, |
|
"eval_steps_per_second": 0.934, |
|
"step": 800 |
|
}, |
|
{ |
|
"dpo_loss": 0.5488670468330383, |
|
"epoch": 4.563060935285781, |
|
"grad_norm": 18.799592478883184, |
|
"learning_rate": 1.0981873156594379e-08, |
|
"logits": -1.627816081047058, |
|
"logps": -91.32179260253906, |
|
"loss": 0.1001, |
|
"objective": 0.10988225042819977, |
|
"ranking_idealized": 0.6458333134651184, |
|
"ranking_idealized_expo": 0.46666666865348816, |
|
"ranking_simple": 0.6041666865348816, |
|
"regularize": 0.10988224297761917, |
|
"step": 805 |
|
}, |
|
{ |
|
"dpo_loss": 0.5488799810409546, |
|
"epoch": 4.59140292867265, |
|
"grad_norm": 16.566296936758206, |
|
"learning_rate": 9.575542662726754e-09, |
|
"logits": -1.7243562936782837, |
|
"logps": -91.10765075683594, |
|
"loss": 0.0996, |
|
"objective": 0.08869278430938721, |
|
"ranking_idealized": 0.5874999761581421, |
|
"ranking_idealized_expo": 0.4625000059604645, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.08869277685880661, |
|
"step": 810 |
|
}, |
|
{ |
|
"dpo_loss": 0.5502530336380005, |
|
"epoch": 4.619744922059518, |
|
"grad_norm": 16.948787644578637, |
|
"learning_rate": 8.263782258165819e-09, |
|
"logits": -1.5700196027755737, |
|
"logps": -92.37843322753906, |
|
"loss": 0.0991, |
|
"objective": 0.07977009564638138, |
|
"ranking_idealized": 0.7250000238418579, |
|
"ranking_idealized_expo": 0.5916666388511658, |
|
"ranking_simple": 0.6958333253860474, |
|
"regularize": 0.07977008074522018, |
|
"step": 815 |
|
}, |
|
{ |
|
"dpo_loss": 0.5550402402877808, |
|
"epoch": 4.648086915446386, |
|
"grad_norm": 21.501267763535818, |
|
"learning_rate": 7.047107919114586e-09, |
|
"logits": -1.6636712551116943, |
|
"logps": -92.61454010009766, |
|
"loss": 0.0982, |
|
"objective": 0.08367303013801575, |
|
"ranking_idealized": 0.6875, |
|
"ranking_idealized_expo": 0.5625, |
|
"ranking_simple": 0.6708333492279053, |
|
"regularize": 0.08367302268743515, |
|
"step": 820 |
|
}, |
|
{ |
|
"dpo_loss": 0.5588962435722351, |
|
"epoch": 4.6764289088332545, |
|
"grad_norm": 16.315710057694485, |
|
"learning_rate": 5.925998220016659e-09, |
|
"logits": -1.5499807596206665, |
|
"logps": -90.22130584716797, |
|
"loss": 0.1018, |
|
"objective": 0.09301813691854477, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.09301812201738358, |
|
"step": 825 |
|
}, |
|
{ |
|
"dpo_loss": 0.5498708486557007, |
|
"epoch": 4.7047709022201225, |
|
"grad_norm": 17.748669049129045, |
|
"learning_rate": 4.9008941453107525e-09, |
|
"logits": -1.7388263940811157, |
|
"logps": -92.17695617675781, |
|
"loss": 0.1092, |
|
"objective": 0.11468993872404099, |
|
"ranking_idealized": 0.675000011920929, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.6416666507720947, |
|
"regularize": 0.1146899089217186, |
|
"step": 830 |
|
}, |
|
{ |
|
"dpo_loss": 0.5327169299125671, |
|
"epoch": 4.733112895606991, |
|
"grad_norm": 17.389468155390862, |
|
"learning_rate": 3.9721989159709754e-09, |
|
"logits": -1.6580873727798462, |
|
"logps": -92.22929382324219, |
|
"loss": 0.1, |
|
"objective": 0.09779965132474899, |
|
"ranking_idealized": 0.6499999761581421, |
|
"ranking_idealized_expo": 0.48750001192092896, |
|
"ranking_simple": 0.6041666865348816, |
|
"regularize": 0.0977996364235878, |
|
"step": 835 |
|
}, |
|
{ |
|
"dpo_loss": 0.5324665904045105, |
|
"epoch": 4.7614548889938595, |
|
"grad_norm": 18.51697686947663, |
|
"learning_rate": 3.140277830901428e-09, |
|
"logits": -1.6570351123809814, |
|
"logps": -92.75865173339844, |
|
"loss": 0.1079, |
|
"objective": 0.11856434494256973, |
|
"ranking_idealized": 0.6958333253860474, |
|
"ranking_idealized_expo": 0.5541666746139526, |
|
"ranking_simple": 0.6499999761581421, |
|
"regularize": 0.11856433004140854, |
|
"step": 840 |
|
}, |
|
{ |
|
"dpo_loss": 0.560818612575531, |
|
"epoch": 4.7897968823807275, |
|
"grad_norm": 17.858581136510683, |
|
"learning_rate": 2.4054581232470785e-09, |
|
"logits": -1.6901015043258667, |
|
"logps": -92.13179016113281, |
|
"loss": 0.1067, |
|
"objective": 0.10368030518293381, |
|
"ranking_idealized": 0.6416666507720947, |
|
"ranking_idealized_expo": 0.4583333432674408, |
|
"ranking_simple": 0.6208333373069763, |
|
"regularize": 0.10368029028177261, |
|
"step": 845 |
|
}, |
|
{ |
|
"dpo_loss": 0.5496495366096497, |
|
"epoch": 4.818138875767596, |
|
"grad_norm": 17.640915580271592, |
|
"learning_rate": 1.7680288316779256e-09, |
|
"logits": -1.6190950870513916, |
|
"logps": -90.9464340209961, |
|
"loss": 0.103, |
|
"objective": 0.10453298687934875, |
|
"ranking_idealized": 0.6833333373069763, |
|
"ranking_idealized_expo": 0.5291666388511658, |
|
"ranking_simple": 0.6333333253860474, |
|
"regularize": 0.10453297942876816, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 4.818138875767596, |
|
"eval_dpo_loss": 0.6852567791938782, |
|
"eval_logits": -1.732380986213684, |
|
"eval_logps": -99.89720153808594, |
|
"eval_loss": 0.4175875782966614, |
|
"eval_objective": 0.4183763563632965, |
|
"eval_ranking_idealized": 0.6570248007774353, |
|
"eval_ranking_idealized_expo": 0.5113636255264282, |
|
"eval_ranking_simple": 0.5237603187561035, |
|
"eval_regularize": 0.4183763563632965, |
|
"eval_runtime": 259.1903, |
|
"eval_samples_per_second": 22.339, |
|
"eval_steps_per_second": 0.934, |
|
"step": 850 |
|
}, |
|
{ |
|
"dpo_loss": 0.5584205389022827, |
|
"epoch": 4.846480869154464, |
|
"grad_norm": 17.56984774908714, |
|
"learning_rate": 1.2282406866966078e-09, |
|
"logits": -1.6185228824615479, |
|
"logps": -91.83565521240234, |
|
"loss": 0.0974, |
|
"objective": 0.09289266169071198, |
|
"ranking_idealized": 0.7083333134651184, |
|
"ranking_idealized_expo": 0.5375000238418579, |
|
"ranking_simple": 0.675000011920929, |
|
"regularize": 0.09289265424013138, |
|
"step": 855 |
|
}, |
|
{ |
|
"dpo_loss": 0.5458131432533264, |
|
"epoch": 4.874822862541333, |
|
"grad_norm": 17.337457908328606, |
|
"learning_rate": 7.863060120144316e-10, |
|
"logits": -1.5824497938156128, |
|
"logps": -91.32083892822266, |
|
"loss": 0.0959, |
|
"objective": 0.11178465932607651, |
|
"ranking_idealized": 0.7166666388511658, |
|
"ranking_idealized_expo": 0.550000011920929, |
|
"ranking_simple": 0.6791666746139526, |
|
"regularize": 0.11178465187549591, |
|
"step": 860 |
|
}, |
|
{ |
|
"dpo_loss": 0.555813729763031, |
|
"epoch": 4.903164855928201, |
|
"grad_norm": 16.568497702615847, |
|
"learning_rate": 4.4239864103465254e-10, |
|
"logits": -1.6553268432617188, |
|
"logps": -90.40623474121094, |
|
"loss": 0.1003, |
|
"objective": 0.1271737664937973, |
|
"ranking_idealized": 0.6625000238418579, |
|
"ranking_idealized_expo": 0.4958333373069763, |
|
"ranking_simple": 0.6041666865348816, |
|
"regularize": 0.1271737515926361, |
|
"step": 865 |
|
}, |
|
{ |
|
"dpo_loss": 0.5469813942909241, |
|
"epoch": 4.931506849315069, |
|
"grad_norm": 17.67813635168232, |
|
"learning_rate": 1.966538484758362e-10, |
|
"logits": -1.7142003774642944, |
|
"logps": -92.42487335205078, |
|
"loss": 0.1053, |
|
"objective": 0.10629518330097198, |
|
"ranking_idealized": 0.6916666626930237, |
|
"ranking_idealized_expo": 0.512499988079071, |
|
"ranking_simple": 0.6625000238418579, |
|
"regularize": 0.1062951609492302, |
|
"step": 870 |
|
}, |
|
{ |
|
"dpo_loss": 0.5456808805465698, |
|
"epoch": 4.959848842701937, |
|
"grad_norm": 17.372715214830695, |
|
"learning_rate": 4.9168297161839014e-11, |
|
"logits": -1.6318602561950684, |
|
"logps": -92.3662109375, |
|
"loss": 0.1017, |
|
"objective": 0.08666170388460159, |
|
"ranking_idealized": 0.6833333373069763, |
|
"ranking_idealized_expo": 0.5458333492279053, |
|
"ranking_simple": 0.6541666388511658, |
|
"regularize": 0.086661696434021, |
|
"step": 875 |
|
}, |
|
{ |
|
"dpo_loss": 0.5451498627662659, |
|
"epoch": 4.988190836088805, |
|
"grad_norm": 17.608890670600516, |
|
"learning_rate": 0.0, |
|
"logits": -1.7033004760742188, |
|
"logps": -92.65689849853516, |
|
"loss": 0.0938, |
|
"objective": 0.09012699872255325, |
|
"ranking_idealized": 0.6708333492279053, |
|
"ranking_idealized_expo": 0.5291666388511658, |
|
"ranking_simple": 0.6541666388511658, |
|
"regularize": 0.09012699127197266, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 4.988190836088805, |
|
"step": 880, |
|
"total_flos": 0.0, |
|
"train_loss": 0.19899855256080629, |
|
"train_runtime": 35117.4941, |
|
"train_samples_per_second": 7.233, |
|
"train_steps_per_second": 0.025 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 880, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": false, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|