|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 4.988190836088805, |
|
"eval_steps": 50, |
|
"global_step": 880, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"dpo_loss": 0.6931471824645996, |
|
"epoch": 0.005668398677373642, |
|
"grad_norm": 134.13611854511257, |
|
"learning_rate": 1.1363636363636363e-08, |
|
"logits": -1.3147305250167847, |
|
"logps": -88.0877456665039, |
|
"loss": 0.4113, |
|
"objective": 0.41588976979255676, |
|
"ranking_idealized": 0.5416666865348816, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.5208333134651184, |
|
"regularize": 0.41588976979255676, |
|
"step": 1 |
|
}, |
|
{ |
|
"dpo_loss": 0.6930932998657227, |
|
"epoch": 0.02834199338686821, |
|
"grad_norm": 133.21235814006465, |
|
"learning_rate": 5.6818181818181815e-08, |
|
"logits": -1.3678579330444336, |
|
"logps": -84.42462921142578, |
|
"loss": 0.4128, |
|
"objective": 0.3755575716495514, |
|
"ranking_idealized": 0.5677083134651184, |
|
"ranking_idealized_expo": 0.546875, |
|
"ranking_simple": 0.546875, |
|
"regularize": 0.3755575716495514, |
|
"step": 5 |
|
}, |
|
{ |
|
"dpo_loss": 0.6867414116859436, |
|
"epoch": 0.05668398677373642, |
|
"grad_norm": 142.75690820723997, |
|
"learning_rate": 1.1363636363636363e-07, |
|
"logits": -1.4464198350906372, |
|
"logps": -83.337890625, |
|
"loss": 0.4131, |
|
"objective": 0.43237432837486267, |
|
"ranking_idealized": 0.5375000238418579, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.512499988079071, |
|
"regularize": 0.43237432837486267, |
|
"step": 10 |
|
}, |
|
{ |
|
"dpo_loss": 0.6844848990440369, |
|
"epoch": 0.08502598016060463, |
|
"grad_norm": 134.06873444315383, |
|
"learning_rate": 1.7045454545454543e-07, |
|
"logits": -1.4124974012374878, |
|
"logps": -83.50977325439453, |
|
"loss": 0.4134, |
|
"objective": 0.3983347713947296, |
|
"ranking_idealized": 0.5249999761581421, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.5083333253860474, |
|
"regularize": 0.3983347713947296, |
|
"step": 15 |
|
}, |
|
{ |
|
"dpo_loss": 0.6885910034179688, |
|
"epoch": 0.11336797354747284, |
|
"grad_norm": 128.26617155521478, |
|
"learning_rate": 2.2727272727272726e-07, |
|
"logits": -1.4028390645980835, |
|
"logps": -84.32319641113281, |
|
"loss": 0.396, |
|
"objective": 0.40446150302886963, |
|
"ranking_idealized": 0.550000011920929, |
|
"ranking_idealized_expo": 0.5375000238418579, |
|
"ranking_simple": 0.5375000238418579, |
|
"regularize": 0.40446150302886963, |
|
"step": 20 |
|
}, |
|
{ |
|
"dpo_loss": 0.664176881313324, |
|
"epoch": 0.14170996693434104, |
|
"grad_norm": 129.31399976570776, |
|
"learning_rate": 2.840909090909091e-07, |
|
"logits": -1.4484522342681885, |
|
"logps": -84.15487670898438, |
|
"loss": 0.3876, |
|
"objective": 0.3946884274482727, |
|
"ranking_idealized": 0.5041666626930237, |
|
"ranking_idealized_expo": 0.4958333373069763, |
|
"ranking_simple": 0.5, |
|
"regularize": 0.3946884274482727, |
|
"step": 25 |
|
}, |
|
{ |
|
"dpo_loss": 0.6644570827484131, |
|
"epoch": 0.17005196032120926, |
|
"grad_norm": 129.8647460663128, |
|
"learning_rate": 3.4090909090909085e-07, |
|
"logits": -1.4183248281478882, |
|
"logps": -84.92523956298828, |
|
"loss": 0.3768, |
|
"objective": 0.38247305154800415, |
|
"ranking_idealized": 0.5583333373069763, |
|
"ranking_idealized_expo": 0.5416666865348816, |
|
"ranking_simple": 0.550000011920929, |
|
"regularize": 0.38247305154800415, |
|
"step": 30 |
|
}, |
|
{ |
|
"dpo_loss": 0.6450185775756836, |
|
"epoch": 0.19839395370807747, |
|
"grad_norm": 130.99553488282152, |
|
"learning_rate": 3.977272727272727e-07, |
|
"logits": -1.3883568048477173, |
|
"logps": -83.79637145996094, |
|
"loss": 0.3692, |
|
"objective": 0.3310621976852417, |
|
"ranking_idealized": 0.5249999761581421, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.5041666626930237, |
|
"regularize": 0.3310621976852417, |
|
"step": 35 |
|
}, |
|
{ |
|
"dpo_loss": 0.6378442049026489, |
|
"epoch": 0.22673594709494568, |
|
"grad_norm": 124.69353315131622, |
|
"learning_rate": 4.545454545454545e-07, |
|
"logits": -1.3773659467697144, |
|
"logps": -83.89529418945312, |
|
"loss": 0.3685, |
|
"objective": 0.3828122019767761, |
|
"ranking_idealized": 0.5375000238418579, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.5249999761581421, |
|
"regularize": 0.3828122019767761, |
|
"step": 40 |
|
}, |
|
{ |
|
"dpo_loss": 0.6174018383026123, |
|
"epoch": 0.25507794048181387, |
|
"grad_norm": 131.46158666021728, |
|
"learning_rate": 5.113636363636363e-07, |
|
"logits": -1.4973019361495972, |
|
"logps": -85.6716537475586, |
|
"loss": 0.3654, |
|
"objective": 0.4057728052139282, |
|
"ranking_idealized": 0.5625, |
|
"ranking_idealized_expo": 0.5375000238418579, |
|
"ranking_simple": 0.550000011920929, |
|
"regularize": 0.40577277541160583, |
|
"step": 45 |
|
}, |
|
{ |
|
"dpo_loss": 0.6175411343574524, |
|
"epoch": 0.2834199338686821, |
|
"grad_norm": 120.92405538498733, |
|
"learning_rate": 5.681818181818182e-07, |
|
"logits": -1.3952524662017822, |
|
"logps": -83.97753143310547, |
|
"loss": 0.3563, |
|
"objective": 0.3284095227718353, |
|
"ranking_idealized": 0.4416666626930237, |
|
"ranking_idealized_expo": 0.42500001192092896, |
|
"ranking_simple": 0.4333333373069763, |
|
"regularize": 0.3284095227718353, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.2834199338686821, |
|
"eval_dpo_loss": 0.687321662902832, |
|
"eval_logits": -1.420853853225708, |
|
"eval_logps": -92.33389282226562, |
|
"eval_loss": 0.42110419273376465, |
|
"eval_objective": 0.42802950739860535, |
|
"eval_ranking_idealized": 0.5247933864593506, |
|
"eval_ranking_idealized_expo": 0.5092975497245789, |
|
"eval_ranking_simple": 0.5113636255264282, |
|
"eval_regularize": 0.42802950739860535, |
|
"eval_runtime": 269.5653, |
|
"eval_samples_per_second": 21.479, |
|
"eval_steps_per_second": 0.898, |
|
"step": 50 |
|
}, |
|
{ |
|
"dpo_loss": 0.6299411058425903, |
|
"epoch": 0.3117619272555503, |
|
"grad_norm": 143.5714789398323, |
|
"learning_rate": 6.249999999999999e-07, |
|
"logits": -1.4425883293151855, |
|
"logps": -86.13849639892578, |
|
"loss": 0.3849, |
|
"objective": 0.3963293433189392, |
|
"ranking_idealized": 0.550000011920929, |
|
"ranking_idealized_expo": 0.5416666865348816, |
|
"ranking_simple": 0.5416666865348816, |
|
"regularize": 0.3963293433189392, |
|
"step": 55 |
|
}, |
|
{ |
|
"dpo_loss": 0.6125720143318176, |
|
"epoch": 0.3401039206424185, |
|
"grad_norm": 136.6167401869809, |
|
"learning_rate": 6.818181818181817e-07, |
|
"logits": -1.4236023426055908, |
|
"logps": -84.95130157470703, |
|
"loss": 0.3788, |
|
"objective": 0.3742086589336395, |
|
"ranking_idealized": 0.5333333611488342, |
|
"ranking_idealized_expo": 0.512499988079071, |
|
"ranking_simple": 0.5208333134651184, |
|
"regularize": 0.3742086589336395, |
|
"step": 60 |
|
}, |
|
{ |
|
"dpo_loss": 0.6023539900779724, |
|
"epoch": 0.3684459140292867, |
|
"grad_norm": 145.32268284333037, |
|
"learning_rate": 7.386363636363636e-07, |
|
"logits": -1.3705960512161255, |
|
"logps": -85.3038330078125, |
|
"loss": 0.3824, |
|
"objective": 0.3741196095943451, |
|
"ranking_idealized": 0.5083333253860474, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.512499988079071, |
|
"regularize": 0.3741196095943451, |
|
"step": 65 |
|
}, |
|
{ |
|
"dpo_loss": 0.6065691709518433, |
|
"epoch": 0.39678790741615494, |
|
"grad_norm": 139.19992394086944, |
|
"learning_rate": 7.954545454545454e-07, |
|
"logits": -1.448442816734314, |
|
"logps": -86.62315368652344, |
|
"loss": 0.3952, |
|
"objective": 0.4089803397655487, |
|
"ranking_idealized": 0.5208333134651184, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5208333134651184, |
|
"regularize": 0.4089803397655487, |
|
"step": 70 |
|
}, |
|
{ |
|
"dpo_loss": 0.5920066833496094, |
|
"epoch": 0.42512990080302315, |
|
"grad_norm": 124.21133200378506, |
|
"learning_rate": 8.522727272727273e-07, |
|
"logits": -1.4492307901382446, |
|
"logps": -86.64204406738281, |
|
"loss": 0.4203, |
|
"objective": 0.4280329644680023, |
|
"ranking_idealized": 0.5583333373069763, |
|
"ranking_idealized_expo": 0.5458333492279053, |
|
"ranking_simple": 0.5541666746139526, |
|
"regularize": 0.4280329644680023, |
|
"step": 75 |
|
}, |
|
{ |
|
"dpo_loss": 0.5960291028022766, |
|
"epoch": 0.45347189418989137, |
|
"grad_norm": 189.2274993381048, |
|
"learning_rate": 9.09090909090909e-07, |
|
"logits": -1.4888454675674438, |
|
"logps": -86.4117431640625, |
|
"loss": 0.4188, |
|
"objective": 0.41209739446640015, |
|
"ranking_idealized": 0.5291666388511658, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.512499988079071, |
|
"regularize": 0.41209739446640015, |
|
"step": 80 |
|
}, |
|
{ |
|
"dpo_loss": 0.5794143676757812, |
|
"epoch": 0.4818138875767596, |
|
"grad_norm": 135.1857661271161, |
|
"learning_rate": 9.65909090909091e-07, |
|
"logits": -1.449118971824646, |
|
"logps": -85.0038833618164, |
|
"loss": 0.4453, |
|
"objective": 0.4282515347003937, |
|
"ranking_idealized": 0.5416666865348816, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.5333333611488342, |
|
"regularize": 0.4282515347003937, |
|
"step": 85 |
|
}, |
|
{ |
|
"dpo_loss": 0.5998800992965698, |
|
"epoch": 0.5101558809636277, |
|
"grad_norm": 176.339935388579, |
|
"learning_rate": 9.999842657116664e-07, |
|
"logits": -1.262819766998291, |
|
"logps": -84.80656433105469, |
|
"loss": 0.4721, |
|
"objective": 0.4337444007396698, |
|
"ranking_idealized": 0.4958333373069763, |
|
"ranking_idealized_expo": 0.4791666567325592, |
|
"ranking_simple": 0.49166667461395264, |
|
"regularize": 0.4337444007396698, |
|
"step": 90 |
|
}, |
|
{ |
|
"dpo_loss": 0.5696186423301697, |
|
"epoch": 0.538497874350496, |
|
"grad_norm": 126.57058201712371, |
|
"learning_rate": 9.998072663403656e-07, |
|
"logits": -1.356713056564331, |
|
"logps": -84.7969741821289, |
|
"loss": 0.4657, |
|
"objective": 0.49121516942977905, |
|
"ranking_idealized": 0.5625, |
|
"ranking_idealized_expo": 0.5416666865348816, |
|
"ranking_simple": 0.5625, |
|
"regularize": 0.49121516942977905, |
|
"step": 95 |
|
}, |
|
{ |
|
"dpo_loss": 0.5877792835235596, |
|
"epoch": 0.5668398677373642, |
|
"grad_norm": 132.17672172143168, |
|
"learning_rate": 9.99433669591504e-07, |
|
"logits": -1.4267613887786865, |
|
"logps": -85.57508850097656, |
|
"loss": 0.4749, |
|
"objective": 0.49508702754974365, |
|
"ranking_idealized": 0.550000011920929, |
|
"ranking_idealized_expo": 0.5416666865348816, |
|
"ranking_simple": 0.5375000238418579, |
|
"regularize": 0.49508702754974365, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.5668398677373642, |
|
"eval_dpo_loss": 0.6957917213439941, |
|
"eval_logits": -1.3981915712356567, |
|
"eval_logps": -91.31111907958984, |
|
"eval_loss": 0.46728843450546265, |
|
"eval_objective": 0.4711840748786926, |
|
"eval_ranking_idealized": 0.5247933864593506, |
|
"eval_ranking_idealized_expo": 0.5092975497245789, |
|
"eval_ranking_simple": 0.5144628286361694, |
|
"eval_regularize": 0.4711840748786926, |
|
"eval_runtime": 258.6905, |
|
"eval_samples_per_second": 22.382, |
|
"eval_steps_per_second": 0.935, |
|
"step": 100 |
|
}, |
|
{ |
|
"dpo_loss": 0.5984219908714294, |
|
"epoch": 0.5951818611242324, |
|
"grad_norm": 141.88949705555274, |
|
"learning_rate": 9.988636224180095e-07, |
|
"logits": -1.3065966367721558, |
|
"logps": -85.25997924804688, |
|
"loss": 0.4971, |
|
"objective": 0.5261873602867126, |
|
"ranking_idealized": 0.550000011920929, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.550000011920929, |
|
"regularize": 0.5261873602867126, |
|
"step": 105 |
|
}, |
|
{ |
|
"dpo_loss": 0.5468531847000122, |
|
"epoch": 0.6235238545111006, |
|
"grad_norm": 118.1204631787259, |
|
"learning_rate": 9.980973490458728e-07, |
|
"logits": -1.4602758884429932, |
|
"logps": -83.59262084960938, |
|
"loss": 0.4868, |
|
"objective": 0.4539487659931183, |
|
"ranking_idealized": 0.47083333134651184, |
|
"ranking_idealized_expo": 0.44999998807907104, |
|
"ranking_simple": 0.47083333134651184, |
|
"regularize": 0.4539487659931183, |
|
"step": 110 |
|
}, |
|
{ |
|
"dpo_loss": 0.595252275466919, |
|
"epoch": 0.6518658478979689, |
|
"grad_norm": 125.27205037996261, |
|
"learning_rate": 9.971351508859486e-07, |
|
"logits": -1.4180139303207397, |
|
"logps": -83.7674560546875, |
|
"loss": 0.5199, |
|
"objective": 0.49929893016815186, |
|
"ranking_idealized": 0.4958333373069763, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.48750001192092896, |
|
"regularize": 0.49929893016815186, |
|
"step": 115 |
|
}, |
|
{ |
|
"dpo_loss": 0.6112563014030457, |
|
"epoch": 0.680207841284837, |
|
"grad_norm": 115.02330529438134, |
|
"learning_rate": 9.959774064153975e-07, |
|
"logits": -1.3522943258285522, |
|
"logps": -84.06304168701172, |
|
"loss": 0.5115, |
|
"objective": 0.5348313450813293, |
|
"ranking_idealized": 0.5375000238418579, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.5375000238418579, |
|
"regularize": 0.5348313450813293, |
|
"step": 120 |
|
}, |
|
{ |
|
"dpo_loss": 0.5981053709983826, |
|
"epoch": 0.7085498346717053, |
|
"grad_norm": 125.9955613878692, |
|
"learning_rate": 9.94624571028813e-07, |
|
"logits": -1.3196688890457153, |
|
"logps": -82.79936981201172, |
|
"loss": 0.5262, |
|
"objective": 0.5262423157691956, |
|
"ranking_idealized": 0.574999988079071, |
|
"ranking_idealized_expo": 0.550000011920929, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.5262423157691956, |
|
"step": 125 |
|
}, |
|
{ |
|
"dpo_loss": 0.5788747668266296, |
|
"epoch": 0.7368918280585735, |
|
"grad_norm": 127.79997373259253, |
|
"learning_rate": 9.930771768590933e-07, |
|
"logits": -1.4446656703948975, |
|
"logps": -82.71467590332031, |
|
"loss": 0.5057, |
|
"objective": 0.5254269242286682, |
|
"ranking_idealized": 0.4833333194255829, |
|
"ranking_idealized_expo": 0.44999998807907104, |
|
"ranking_simple": 0.47083333134651184, |
|
"regularize": 0.5254269242286682, |
|
"step": 130 |
|
}, |
|
{ |
|
"dpo_loss": 0.5741093754768372, |
|
"epoch": 0.7652338214454416, |
|
"grad_norm": 119.65644724778777, |
|
"learning_rate": 9.91335832568129e-07, |
|
"logits": -1.412257432937622, |
|
"logps": -85.09854125976562, |
|
"loss": 0.5219, |
|
"objective": 0.5582370758056641, |
|
"ranking_idealized": 0.5458333492279053, |
|
"ranking_idealized_expo": 0.5291666388511658, |
|
"ranking_simple": 0.5416666865348816, |
|
"regularize": 0.5582370758056641, |
|
"step": 135 |
|
}, |
|
{ |
|
"dpo_loss": 0.5746143460273743, |
|
"epoch": 0.7935758148323099, |
|
"grad_norm": 117.38563727058376, |
|
"learning_rate": 9.894012231073895e-07, |
|
"logits": -1.307041049003601, |
|
"logps": -83.801025390625, |
|
"loss": 0.5804, |
|
"objective": 0.5208708643913269, |
|
"ranking_idealized": 0.5249999761581421, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.5291666388511658, |
|
"regularize": 0.5208708643913269, |
|
"step": 140 |
|
}, |
|
{ |
|
"dpo_loss": 0.5846189856529236, |
|
"epoch": 0.821917808219178, |
|
"grad_norm": 130.2534703806674, |
|
"learning_rate": 9.872741094484964e-07, |
|
"logits": -1.3578908443450928, |
|
"logps": -85.40940856933594, |
|
"loss": 0.5595, |
|
"objective": 0.5616883635520935, |
|
"ranking_idealized": 0.5041666626930237, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.48750001192092896, |
|
"regularize": 0.5616883635520935, |
|
"step": 145 |
|
}, |
|
{ |
|
"dpo_loss": 0.5902164578437805, |
|
"epoch": 0.8502598016060463, |
|
"grad_norm": 124.65999306889285, |
|
"learning_rate": 9.849553282839024e-07, |
|
"logits": -1.3182071447372437, |
|
"logps": -83.66584014892578, |
|
"loss": 0.5468, |
|
"objective": 0.5547689199447632, |
|
"ranking_idealized": 0.5541666746139526, |
|
"ranking_idealized_expo": 0.5375000238418579, |
|
"ranking_simple": 0.5333333611488342, |
|
"regularize": 0.5547689199447632, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.8502598016060463, |
|
"eval_dpo_loss": 0.7061101198196411, |
|
"eval_logits": -1.3204107284545898, |
|
"eval_logps": -91.07585144042969, |
|
"eval_loss": 0.5596091151237488, |
|
"eval_objective": 0.5683301687240601, |
|
"eval_ranking_idealized": 0.5247933864593506, |
|
"eval_ranking_idealized_expo": 0.5092975497245789, |
|
"eval_ranking_simple": 0.5144628286361694, |
|
"eval_regularize": 0.5683301687240601, |
|
"eval_runtime": 262.2295, |
|
"eval_samples_per_second": 22.08, |
|
"eval_steps_per_second": 0.923, |
|
"step": 150 |
|
}, |
|
{ |
|
"dpo_loss": 0.6094241738319397, |
|
"epoch": 0.8786017949929145, |
|
"grad_norm": 120.42788649324545, |
|
"learning_rate": 9.824457916977784e-07, |
|
"logits": -1.3233551979064941, |
|
"logps": -83.14967346191406, |
|
"loss": 0.5545, |
|
"objective": 0.5938875675201416, |
|
"ranking_idealized": 0.5541666746139526, |
|
"ranking_idealized_expo": 0.512499988079071, |
|
"ranking_simple": 0.5416666865348816, |
|
"regularize": 0.5938875675201416, |
|
"step": 155 |
|
}, |
|
{ |
|
"dpo_loss": 0.5945659875869751, |
|
"epoch": 0.9069437883797827, |
|
"grad_norm": 130.17538489613264, |
|
"learning_rate": 9.797464868072486e-07, |
|
"logits": -1.2508231401443481, |
|
"logps": -84.22561645507812, |
|
"loss": 0.5582, |
|
"objective": 0.5726324319839478, |
|
"ranking_idealized": 0.5041666626930237, |
|
"ranking_idealized_expo": 0.4958333373069763, |
|
"ranking_simple": 0.5, |
|
"regularize": 0.5726324319839478, |
|
"step": 160 |
|
}, |
|
{ |
|
"dpo_loss": 0.5802300572395325, |
|
"epoch": 0.9352857817666509, |
|
"grad_norm": 108.46916479196223, |
|
"learning_rate": 9.768584753741134e-07, |
|
"logits": -1.2593294382095337, |
|
"logps": -84.2457046508789, |
|
"loss": 0.5428, |
|
"objective": 0.5054454207420349, |
|
"ranking_idealized": 0.5416666865348816, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.550000011920929, |
|
"regularize": 0.5054454207420349, |
|
"step": 165 |
|
}, |
|
{ |
|
"dpo_loss": 0.5963813066482544, |
|
"epoch": 0.9636277751535192, |
|
"grad_norm": 112.45672430142012, |
|
"learning_rate": 9.737828933872073e-07, |
|
"logits": -1.3053935766220093, |
|
"logps": -84.02145385742188, |
|
"loss": 0.5633, |
|
"objective": 0.5686451196670532, |
|
"ranking_idealized": 0.5416666865348816, |
|
"ranking_idealized_expo": 0.512499988079071, |
|
"ranking_simple": 0.5333333611488342, |
|
"regularize": 0.5686451196670532, |
|
"step": 170 |
|
}, |
|
{ |
|
"dpo_loss": 0.6094807982444763, |
|
"epoch": 0.9919697685403873, |
|
"grad_norm": 112.35460354829323, |
|
"learning_rate": 9.705209506155634e-07, |
|
"logits": -1.2585358619689941, |
|
"logps": -84.55461883544922, |
|
"loss": 0.5613, |
|
"objective": 0.567017674446106, |
|
"ranking_idealized": 0.5958333611488342, |
|
"ranking_idealized_expo": 0.574999988079071, |
|
"ranking_simple": 0.5874999761581421, |
|
"regularize": 0.567017674446106, |
|
"step": 175 |
|
}, |
|
{ |
|
"dpo_loss": 0.5619717836380005, |
|
"epoch": 1.0203117619272555, |
|
"grad_norm": 111.48678467404953, |
|
"learning_rate": 9.670739301325534e-07, |
|
"logits": -1.4066673517227173, |
|
"logps": -83.15278625488281, |
|
"loss": 0.567, |
|
"objective": 0.633197009563446, |
|
"ranking_idealized": 0.550000011920929, |
|
"ranking_idealized_expo": 0.5416666865348816, |
|
"ranking_simple": 0.5416666865348816, |
|
"regularize": 0.633197009563446, |
|
"step": 180 |
|
}, |
|
{ |
|
"dpo_loss": 0.5666179656982422, |
|
"epoch": 1.0486537553141237, |
|
"grad_norm": 131.38985186167173, |
|
"learning_rate": 9.63443187811197e-07, |
|
"logits": -1.2431377172470093, |
|
"logps": -83.49596405029297, |
|
"loss": 0.5655, |
|
"objective": 0.5990104079246521, |
|
"ranking_idealized": 0.5208333134651184, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.512499988079071, |
|
"regularize": 0.5990104079246521, |
|
"step": 185 |
|
}, |
|
{ |
|
"dpo_loss": 0.5910226106643677, |
|
"epoch": 1.076995748700992, |
|
"grad_norm": 121.16034753219803, |
|
"learning_rate": 9.596301517908328e-07, |
|
"logits": -1.3014639616012573, |
|
"logps": -85.08562469482422, |
|
"loss": 0.5836, |
|
"objective": 0.6070137023925781, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.550000011920929, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.6070137023925781, |
|
"step": 190 |
|
}, |
|
{ |
|
"dpo_loss": 0.6265178918838501, |
|
"epoch": 1.10533774208786, |
|
"grad_norm": 113.24543776185278, |
|
"learning_rate": 9.556363219153662e-07, |
|
"logits": -1.2846567630767822, |
|
"logps": -83.17445373535156, |
|
"loss": 0.5509, |
|
"objective": 0.5626471638679504, |
|
"ranking_idealized": 0.574999988079071, |
|
"ranking_idealized_expo": 0.5666666626930237, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.5626471638679504, |
|
"step": 195 |
|
}, |
|
{ |
|
"dpo_loss": 0.5952022671699524, |
|
"epoch": 1.1336797354747283, |
|
"grad_norm": 109.3282186534433, |
|
"learning_rate": 9.514632691433106e-07, |
|
"logits": -1.3481907844543457, |
|
"logps": -82.64450073242188, |
|
"loss": 0.5501, |
|
"objective": 0.5247953534126282, |
|
"ranking_idealized": 0.5208333134651184, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.5249999761581421, |
|
"regularize": 0.5247953534126282, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.1336797354747283, |
|
"eval_dpo_loss": 0.7155318856239319, |
|
"eval_logits": -1.3003575801849365, |
|
"eval_logps": -89.7054672241211, |
|
"eval_loss": 0.6048148274421692, |
|
"eval_objective": 0.5995473861694336, |
|
"eval_ranking_idealized": 0.5247933864593506, |
|
"eval_ranking_idealized_expo": 0.5092975497245789, |
|
"eval_ranking_simple": 0.5061983466148376, |
|
"eval_regularize": 0.5995473861694336, |
|
"eval_runtime": 259.4012, |
|
"eval_samples_per_second": 22.321, |
|
"eval_steps_per_second": 0.933, |
|
"step": 200 |
|
}, |
|
{ |
|
"dpo_loss": 0.5928524136543274, |
|
"epoch": 1.1620217288615966, |
|
"grad_norm": 117.4507736272679, |
|
"learning_rate": 9.471126349298556e-07, |
|
"logits": -1.304435133934021, |
|
"logps": -83.2208480834961, |
|
"loss": 0.5617, |
|
"objective": 0.569757878780365, |
|
"ranking_idealized": 0.5, |
|
"ranking_idealized_expo": 0.48750001192092896, |
|
"ranking_simple": 0.5041666626930237, |
|
"regularize": 0.569757878780365, |
|
"step": 205 |
|
}, |
|
{ |
|
"dpo_loss": 0.5430352687835693, |
|
"epoch": 1.1903637222484649, |
|
"grad_norm": 108.28365774149364, |
|
"learning_rate": 9.425861305812081e-07, |
|
"logits": -1.28606116771698, |
|
"logps": -83.92695617675781, |
|
"loss": 0.5256, |
|
"objective": 0.4972154200077057, |
|
"ranking_idealized": 0.5083333253860474, |
|
"ranking_idealized_expo": 0.4958333373069763, |
|
"ranking_simple": 0.5083333253860474, |
|
"regularize": 0.4972154200077057, |
|
"step": 210 |
|
}, |
|
{ |
|
"dpo_loss": 0.6411459445953369, |
|
"epoch": 1.2187057156353331, |
|
"grad_norm": 117.64538520858981, |
|
"learning_rate": 9.378855365814557e-07, |
|
"logits": -1.2710984945297241, |
|
"logps": -83.3528823852539, |
|
"loss": 0.5386, |
|
"objective": 0.6103346943855286, |
|
"ranking_idealized": 0.49166667461395264, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.5041666626930237, |
|
"regularize": 0.6103346943855286, |
|
"step": 215 |
|
}, |
|
{ |
|
"dpo_loss": 0.6046126484870911, |
|
"epoch": 1.2470477090222012, |
|
"grad_norm": 112.2724627593423, |
|
"learning_rate": 9.330127018922193e-07, |
|
"logits": -1.22818124294281, |
|
"logps": -82.01517486572266, |
|
"loss": 0.5413, |
|
"objective": 0.5305136442184448, |
|
"ranking_idealized": 0.6083333492279053, |
|
"ranking_idealized_expo": 0.6000000238418579, |
|
"ranking_simple": 0.6083333492279053, |
|
"regularize": 0.5305136442184448, |
|
"step": 220 |
|
}, |
|
{ |
|
"dpo_loss": 0.5910329222679138, |
|
"epoch": 1.2753897024090695, |
|
"grad_norm": 114.94988107695234, |
|
"learning_rate": 9.279695432253708e-07, |
|
"logits": -1.3015425205230713, |
|
"logps": -83.29090881347656, |
|
"loss": 0.5262, |
|
"objective": 0.528804361820221, |
|
"ranking_idealized": 0.5375000238418579, |
|
"ranking_idealized_expo": 0.5291666388511658, |
|
"ranking_simple": 0.5375000238418579, |
|
"regularize": 0.528804361820221, |
|
"step": 225 |
|
}, |
|
{ |
|
"dpo_loss": 0.5783368945121765, |
|
"epoch": 1.3037316957959377, |
|
"grad_norm": 112.00444337751402, |
|
"learning_rate": 9.227580442891021e-07, |
|
"logits": -1.2694045305252075, |
|
"logps": -82.34114837646484, |
|
"loss": 0.5259, |
|
"objective": 0.5298904180526733, |
|
"ranking_idealized": 0.5291666388511658, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.5208333134651184, |
|
"regularize": 0.5298903584480286, |
|
"step": 230 |
|
}, |
|
{ |
|
"dpo_loss": 0.559749186038971, |
|
"epoch": 1.3320736891828058, |
|
"grad_norm": 117.1742133311728, |
|
"learning_rate": 9.173802550076401e-07, |
|
"logits": -1.3496894836425781, |
|
"logps": -80.74533081054688, |
|
"loss": 0.5275, |
|
"objective": 0.5420801639556885, |
|
"ranking_idealized": 0.512499988079071, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.512499988079071, |
|
"regularize": 0.5420801639556885, |
|
"step": 235 |
|
}, |
|
{ |
|
"dpo_loss": 0.5604056715965271, |
|
"epoch": 1.360415682569674, |
|
"grad_norm": 120.20610234512412, |
|
"learning_rate": 9.118382907149163e-07, |
|
"logits": -1.2856090068817139, |
|
"logps": -81.93352508544922, |
|
"loss": 0.5307, |
|
"objective": 0.4905354678630829, |
|
"ranking_idealized": 0.4958333373069763, |
|
"ranking_idealized_expo": 0.4791666567325592, |
|
"ranking_simple": 0.49166667461395264, |
|
"regularize": 0.4905354678630829, |
|
"step": 240 |
|
}, |
|
{ |
|
"dpo_loss": 0.5622699856758118, |
|
"epoch": 1.3887576759565423, |
|
"grad_norm": 112.51743847797803, |
|
"learning_rate": 9.061343313225087e-07, |
|
"logits": -1.3067680597305298, |
|
"logps": -81.71417999267578, |
|
"loss": 0.4769, |
|
"objective": 0.4980693757534027, |
|
"ranking_idealized": 0.5249999761581421, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5416666865348816, |
|
"regularize": 0.4980693757534027, |
|
"step": 245 |
|
}, |
|
{ |
|
"dpo_loss": 0.5571755766868591, |
|
"epoch": 1.4170996693434104, |
|
"grad_norm": 105.16402218584498, |
|
"learning_rate": 9.002706204621802e-07, |
|
"logits": -1.2600089311599731, |
|
"logps": -81.47142028808594, |
|
"loss": 0.5045, |
|
"objective": 0.48103129863739014, |
|
"ranking_idealized": 0.48750001192092896, |
|
"ranking_idealized_expo": 0.47083333134651184, |
|
"ranking_simple": 0.48750001192092896, |
|
"regularize": 0.48103129863739014, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.4170996693434104, |
|
"eval_dpo_loss": 0.7336850166320801, |
|
"eval_logits": -1.319351077079773, |
|
"eval_logps": -88.7256088256836, |
|
"eval_loss": 0.6191251873970032, |
|
"eval_objective": 0.6396905779838562, |
|
"eval_ranking_idealized": 0.5247933864593506, |
|
"eval_ranking_idealized_expo": 0.5092975497245789, |
|
"eval_ranking_simple": 0.5175619721412659, |
|
"eval_regularize": 0.6396905779838562, |
|
"eval_runtime": 261.8978, |
|
"eval_samples_per_second": 22.108, |
|
"eval_steps_per_second": 0.924, |
|
"step": 250 |
|
}, |
|
{ |
|
"dpo_loss": 0.5806334614753723, |
|
"epoch": 1.4454416627302786, |
|
"grad_norm": 108.9060179401822, |
|
"learning_rate": 8.942494646033554e-07, |
|
"logits": -1.268131971359253, |
|
"logps": -82.72068786621094, |
|
"loss": 0.5226, |
|
"objective": 0.5074787735939026, |
|
"ranking_idealized": 0.5708333253860474, |
|
"ranking_idealized_expo": 0.5458333492279053, |
|
"ranking_simple": 0.5625, |
|
"regularize": 0.5074787735939026, |
|
"step": 255 |
|
}, |
|
{ |
|
"dpo_loss": 0.6120874285697937, |
|
"epoch": 1.473783656117147, |
|
"grad_norm": 113.8812403973121, |
|
"learning_rate": 8.880732321458784e-07, |
|
"logits": -1.3066486120224, |
|
"logps": -84.5510482788086, |
|
"loss": 0.5203, |
|
"objective": 0.5250194072723389, |
|
"ranking_idealized": 0.5041666626930237, |
|
"ranking_idealized_expo": 0.48750001192092896, |
|
"ranking_simple": 0.5041666626930237, |
|
"regularize": 0.5250194072723389, |
|
"step": 260 |
|
}, |
|
{ |
|
"dpo_loss": 0.5722587704658508, |
|
"epoch": 1.5021256495040152, |
|
"grad_norm": 107.47217195176817, |
|
"learning_rate": 8.817443524884117e-07, |
|
"logits": -1.302085280418396, |
|
"logps": -82.64102935791016, |
|
"loss": 0.5125, |
|
"objective": 0.5349072813987732, |
|
"ranking_idealized": 0.5249999761581421, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.5375000238418579, |
|
"regularize": 0.5349072813987732, |
|
"step": 265 |
|
}, |
|
{ |
|
"dpo_loss": 0.5912469625473022, |
|
"epoch": 1.5304676428908834, |
|
"grad_norm": 102.61360328866256, |
|
"learning_rate": 8.752653150728411e-07, |
|
"logits": -1.33375883102417, |
|
"logps": -83.93022918701172, |
|
"loss": 0.4989, |
|
"objective": 0.5215144753456116, |
|
"ranking_idealized": 0.5208333134651184, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 0.5215144753456116, |
|
"step": 270 |
|
}, |
|
{ |
|
"dpo_loss": 0.5620048642158508, |
|
"epoch": 1.5588096362777515, |
|
"grad_norm": 120.75621294263999, |
|
"learning_rate": 8.68638668405062e-07, |
|
"logits": -1.346874713897705, |
|
"logps": -83.51717376708984, |
|
"loss": 0.508, |
|
"objective": 0.49223792552948, |
|
"ranking_idealized": 0.550000011920929, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.5416666865348816, |
|
"regularize": 0.49223792552948, |
|
"step": 275 |
|
}, |
|
{ |
|
"dpo_loss": 0.5491313934326172, |
|
"epoch": 1.5871516296646198, |
|
"grad_norm": 111.38123469249379, |
|
"learning_rate": 8.61867019052535e-07, |
|
"logits": -1.2457977533340454, |
|
"logps": -82.07854461669922, |
|
"loss": 0.4912, |
|
"objective": 0.4965251088142395, |
|
"ranking_idealized": 0.5291666388511658, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.5249999761581421, |
|
"regularize": 0.4965251088142395, |
|
"step": 280 |
|
}, |
|
{ |
|
"dpo_loss": 0.597549319267273, |
|
"epoch": 1.615493623051488, |
|
"grad_norm": 108.78031876782775, |
|
"learning_rate": 8.549530306190014e-07, |
|
"logits": -1.3423717021942139, |
|
"logps": -83.64887237548828, |
|
"loss": 0.4878, |
|
"objective": 0.5166993141174316, |
|
"ranking_idealized": 0.5375000238418579, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5416666865348816, |
|
"regularize": 0.5166993141174316, |
|
"step": 285 |
|
}, |
|
{ |
|
"dpo_loss": 0.5523730516433716, |
|
"epoch": 1.643835616438356, |
|
"grad_norm": 101.86340958175018, |
|
"learning_rate": 8.478994226967638e-07, |
|
"logits": -1.3935832977294922, |
|
"logps": -81.38288116455078, |
|
"loss": 0.4753, |
|
"objective": 0.45616135001182556, |
|
"ranking_idealized": 0.5708333253860474, |
|
"ranking_idealized_expo": 0.5625, |
|
"ranking_simple": 0.5791666507720947, |
|
"regularize": 0.45616135001182556, |
|
"step": 290 |
|
}, |
|
{ |
|
"dpo_loss": 0.5671105980873108, |
|
"epoch": 1.6721776098252243, |
|
"grad_norm": 97.05334097299942, |
|
"learning_rate": 8.407089697969456e-07, |
|
"logits": -1.3089579343795776, |
|
"logps": -79.79814910888672, |
|
"loss": 0.4806, |
|
"objective": 0.48036739230155945, |
|
"ranking_idealized": 0.4958333373069763, |
|
"ranking_idealized_expo": 0.4791666567325592, |
|
"ranking_simple": 0.5041666626930237, |
|
"regularize": 0.48036739230155945, |
|
"step": 295 |
|
}, |
|
{ |
|
"dpo_loss": 0.5690053701400757, |
|
"epoch": 1.7005196032120926, |
|
"grad_norm": 103.94959921577468, |
|
"learning_rate": 8.333845002581458e-07, |
|
"logits": -1.3550407886505127, |
|
"logps": -81.06673431396484, |
|
"loss": 0.5191, |
|
"objective": 0.5282995700836182, |
|
"ranking_idealized": 0.5916666388511658, |
|
"ranking_idealized_expo": 0.574999988079071, |
|
"ranking_simple": 0.6041666865348816, |
|
"regularize": 0.5282995700836182, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.7005196032120926, |
|
"eval_dpo_loss": 0.7341631650924683, |
|
"eval_logits": -1.3167152404785156, |
|
"eval_logps": -87.61885833740234, |
|
"eval_loss": 0.6501524448394775, |
|
"eval_objective": 0.6544150114059448, |
|
"eval_ranking_idealized": 0.5247933864593506, |
|
"eval_ranking_idealized_expo": 0.5092975497245789, |
|
"eval_ranking_simple": 0.5165289044380188, |
|
"eval_regularize": 0.6544150114059448, |
|
"eval_runtime": 258.5096, |
|
"eval_samples_per_second": 22.398, |
|
"eval_steps_per_second": 0.936, |
|
"step": 300 |
|
}, |
|
{ |
|
"dpo_loss": 0.5860777497291565, |
|
"epoch": 1.7288615965989607, |
|
"grad_norm": 101.19563914464877, |
|
"learning_rate": 8.259288951339232e-07, |
|
"logits": -1.290361762046814, |
|
"logps": -81.49486541748047, |
|
"loss": 0.4715, |
|
"objective": 0.5236480236053467, |
|
"ranking_idealized": 0.5208333134651184, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.5333333611488342, |
|
"regularize": 0.5236480236053467, |
|
"step": 305 |
|
}, |
|
{ |
|
"dpo_loss": 0.5874544382095337, |
|
"epoch": 1.7572035899858292, |
|
"grad_norm": 101.32381454333631, |
|
"learning_rate": 8.183450870595441e-07, |
|
"logits": -1.323080062866211, |
|
"logps": -79.28206634521484, |
|
"loss": 0.4743, |
|
"objective": 0.4995422959327698, |
|
"ranking_idealized": 0.5375000238418579, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.5333333611488342, |
|
"regularize": 0.4995422959327698, |
|
"step": 310 |
|
}, |
|
{ |
|
"dpo_loss": 0.5733292698860168, |
|
"epoch": 1.7855455833726972, |
|
"grad_norm": 101.22251777355339, |
|
"learning_rate": 8.106360590984404e-07, |
|
"logits": -1.2535948753356934, |
|
"logps": -80.41255950927734, |
|
"loss": 0.4496, |
|
"objective": 0.44809386134147644, |
|
"ranking_idealized": 0.5208333134651184, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 0.44809386134147644, |
|
"step": 315 |
|
}, |
|
{ |
|
"dpo_loss": 0.5748218297958374, |
|
"epoch": 1.8138875767595655, |
|
"grad_norm": 102.63800844635435, |
|
"learning_rate": 8.028048435688333e-07, |
|
"logits": -1.3195849657058716, |
|
"logps": -82.20491027832031, |
|
"loss": 0.464, |
|
"objective": 0.4412696659564972, |
|
"ranking_idealized": 0.5291666388511658, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.5249999761581421, |
|
"regularize": 0.4412696659564972, |
|
"step": 320 |
|
}, |
|
{ |
|
"dpo_loss": 0.5583847761154175, |
|
"epoch": 1.8422295701464337, |
|
"grad_norm": 106.29080888058539, |
|
"learning_rate": 7.948545208509811e-07, |
|
"logits": -1.3646790981292725, |
|
"logps": -83.66368865966797, |
|
"loss": 0.4757, |
|
"objective": 0.43752506375312805, |
|
"ranking_idealized": 0.5583333373069763, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5416666865348816, |
|
"regularize": 0.43752506375312805, |
|
"step": 325 |
|
}, |
|
{ |
|
"dpo_loss": 0.5669862627983093, |
|
"epoch": 1.8705715635333018, |
|
"grad_norm": 114.61344867316569, |
|
"learning_rate": 7.86788218175523e-07, |
|
"logits": -1.1525540351867676, |
|
"logps": -81.54851531982422, |
|
"loss": 0.4342, |
|
"objective": 0.4504217505455017, |
|
"ranking_idealized": 0.48750001192092896, |
|
"ranking_idealized_expo": 0.47083333134651184, |
|
"ranking_simple": 0.48750001192092896, |
|
"regularize": 0.4504217505455017, |
|
"step": 330 |
|
}, |
|
{ |
|
"dpo_loss": 0.5577248334884644, |
|
"epoch": 1.89891355692017, |
|
"grad_norm": 107.68338896171758, |
|
"learning_rate": 7.786091083933949e-07, |
|
"logits": -1.3191189765930176, |
|
"logps": -82.01612091064453, |
|
"loss": 0.4554, |
|
"objective": 0.4512566030025482, |
|
"ranking_idealized": 0.5249999761581421, |
|
"ranking_idealized_expo": 0.4958333373069763, |
|
"ranking_simple": 0.5208333134651184, |
|
"regularize": 0.4512566030025482, |
|
"step": 335 |
|
}, |
|
{ |
|
"dpo_loss": 0.5417340993881226, |
|
"epoch": 1.9272555503070383, |
|
"grad_norm": 113.63225902754026, |
|
"learning_rate": 7.703204087277988e-07, |
|
"logits": -1.311868667602539, |
|
"logps": -84.563720703125, |
|
"loss": 0.4472, |
|
"objective": 0.4667154550552368, |
|
"ranking_idealized": 0.5541666746139526, |
|
"ranking_idealized_expo": 0.5416666865348816, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.46671542525291443, |
|
"step": 340 |
|
}, |
|
{ |
|
"dpo_loss": 0.577439546585083, |
|
"epoch": 1.9555975436939064, |
|
"grad_norm": 105.47346696011448, |
|
"learning_rate": 7.619253795087208e-07, |
|
"logits": -1.309786081314087, |
|
"logps": -82.33320617675781, |
|
"loss": 0.4317, |
|
"objective": 0.45227622985839844, |
|
"ranking_idealized": 0.5416666865348816, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.5416666865348816, |
|
"regularize": 0.45227622985839844, |
|
"step": 345 |
|
}, |
|
{ |
|
"dpo_loss": 0.5885747671127319, |
|
"epoch": 1.9839395370807746, |
|
"grad_norm": 109.05004528153341, |
|
"learning_rate": 7.534273228904915e-07, |
|
"logits": -1.22944974899292, |
|
"logps": -83.1298599243164, |
|
"loss": 0.4473, |
|
"objective": 0.4539943039417267, |
|
"ranking_idealized": 0.5041666626930237, |
|
"ranking_idealized_expo": 0.48750001192092896, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 0.4539943039417267, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.9839395370807746, |
|
"eval_dpo_loss": 0.7476473450660706, |
|
"eval_logits": -1.3002201318740845, |
|
"eval_logps": -88.6204833984375, |
|
"eval_loss": 0.6869083642959595, |
|
"eval_objective": 0.6877632737159729, |
|
"eval_ranking_idealized": 0.5247933864593506, |
|
"eval_ranking_idealized_expo": 0.5092975497245789, |
|
"eval_ranking_simple": 0.5092975497245789, |
|
"eval_regularize": 0.6877632737159729, |
|
"eval_runtime": 258.9539, |
|
"eval_samples_per_second": 22.359, |
|
"eval_steps_per_second": 0.935, |
|
"step": 350 |
|
}, |
|
{ |
|
"dpo_loss": 0.5712209343910217, |
|
"epoch": 2.012281530467643, |
|
"grad_norm": 101.66242864172723, |
|
"learning_rate": 7.448295815528956e-07, |
|
"logits": -1.2166054248809814, |
|
"logps": -82.02034759521484, |
|
"loss": 0.4247, |
|
"objective": 0.4026139974594116, |
|
"ranking_idealized": 0.5208333134651184, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 0.4026139974594116, |
|
"step": 355 |
|
}, |
|
{ |
|
"dpo_loss": 0.5501471161842346, |
|
"epoch": 2.040623523854511, |
|
"grad_norm": 111.09255218939096, |
|
"learning_rate": 7.361355373863413e-07, |
|
"logits": -1.2762349843978882, |
|
"logps": -80.2708969116211, |
|
"loss": 0.4188, |
|
"objective": 0.3990139663219452, |
|
"ranking_idealized": 0.5541666746139526, |
|
"ranking_idealized_expo": 0.5291666388511658, |
|
"ranking_simple": 0.5541666746139526, |
|
"regularize": 0.3990139663219452, |
|
"step": 360 |
|
}, |
|
{ |
|
"dpo_loss": 0.5589497089385986, |
|
"epoch": 2.0689655172413794, |
|
"grad_norm": 102.40571901595573, |
|
"learning_rate": 7.273486101616056e-07, |
|
"logits": -1.3285800218582153, |
|
"logps": -82.2364730834961, |
|
"loss": 0.4468, |
|
"objective": 0.4508870840072632, |
|
"ranking_idealized": 0.5249999761581421, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.5333333611488342, |
|
"regularize": 0.4508870840072632, |
|
"step": 365 |
|
}, |
|
{ |
|
"dpo_loss": 0.5617626309394836, |
|
"epoch": 2.0973075106282475, |
|
"grad_norm": 97.76405475943756, |
|
"learning_rate": 7.184722561846797e-07, |
|
"logits": -1.2896106243133545, |
|
"logps": -79.0242691040039, |
|
"loss": 0.4108, |
|
"objective": 0.4170443117618561, |
|
"ranking_idealized": 0.5041666626930237, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.5, |
|
"regularize": 0.4170443117618561, |
|
"step": 370 |
|
}, |
|
{ |
|
"dpo_loss": 0.5481951236724854, |
|
"epoch": 2.1256495040151155, |
|
"grad_norm": 102.54464966495118, |
|
"learning_rate": 7.095099669372443e-07, |
|
"logits": -1.2680145502090454, |
|
"logps": -80.49474334716797, |
|
"loss": 0.4088, |
|
"objective": 0.3847118020057678, |
|
"ranking_idealized": 0.625, |
|
"ranking_idealized_expo": 0.5874999761581421, |
|
"ranking_simple": 0.612500011920929, |
|
"regularize": 0.3847118020057678, |
|
"step": 375 |
|
}, |
|
{ |
|
"dpo_loss": 0.5886502861976624, |
|
"epoch": 2.153991497401984, |
|
"grad_norm": 99.36933926359579, |
|
"learning_rate": 7.004652677033068e-07, |
|
"logits": -1.2002967596054077, |
|
"logps": -79.81450653076172, |
|
"loss": 0.4196, |
|
"objective": 0.43431776762008667, |
|
"ranking_idealized": 0.5458333492279053, |
|
"ranking_idealized_expo": 0.5291666388511658, |
|
"ranking_simple": 0.5416666865348816, |
|
"regularize": 0.43431776762008667, |
|
"step": 380 |
|
}, |
|
{ |
|
"dpo_loss": 0.566298246383667, |
|
"epoch": 2.182333490788852, |
|
"grad_norm": 100.41990113674308, |
|
"learning_rate": 6.913417161825449e-07, |
|
"logits": -1.307159423828125, |
|
"logps": -81.99685668945312, |
|
"loss": 0.4125, |
|
"objective": 0.4449020326137543, |
|
"ranking_idealized": 0.5249999761581421, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.5375000238418579, |
|
"regularize": 0.4449020326137543, |
|
"step": 385 |
|
}, |
|
{ |
|
"dpo_loss": 0.5851306319236755, |
|
"epoch": 2.21067548417572, |
|
"grad_norm": 101.58148277220718, |
|
"learning_rate": 6.821429010908971e-07, |
|
"logits": -1.2254120111465454, |
|
"logps": -80.0687484741211, |
|
"loss": 0.4184, |
|
"objective": 0.41351255774497986, |
|
"ranking_idealized": 0.48750001192092896, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.4791666567325592, |
|
"regularize": 0.41351255774497986, |
|
"step": 390 |
|
}, |
|
{ |
|
"dpo_loss": 0.5543534159660339, |
|
"epoch": 2.2390174775625886, |
|
"grad_norm": 102.754307701718, |
|
"learning_rate": 6.728724407489553e-07, |
|
"logits": -1.2246791124343872, |
|
"logps": -80.76174926757812, |
|
"loss": 0.394, |
|
"objective": 0.3953525424003601, |
|
"ranking_idealized": 0.5416666865348816, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5291666388511658, |
|
"regularize": 0.3953525424003601, |
|
"step": 395 |
|
}, |
|
{ |
|
"dpo_loss": 0.5204497575759888, |
|
"epoch": 2.2673594709494567, |
|
"grad_norm": 105.48313372380032, |
|
"learning_rate": 6.635339816587108e-07, |
|
"logits": -1.2704825401306152, |
|
"logps": -82.68233489990234, |
|
"loss": 0.3926, |
|
"objective": 0.4119986891746521, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.574999988079071, |
|
"ranking_simple": 0.6208333373069763, |
|
"regularize": 0.4119986891746521, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.2673594709494567, |
|
"eval_dpo_loss": 0.7518758773803711, |
|
"eval_logits": -1.273954153060913, |
|
"eval_logps": -87.79329681396484, |
|
"eval_loss": 0.708670973777771, |
|
"eval_objective": 0.7146722078323364, |
|
"eval_ranking_idealized": 0.5247933864593506, |
|
"eval_ranking_idealized_expo": 0.5092975497245789, |
|
"eval_ranking_simple": 0.5113636255264282, |
|
"eval_regularize": 0.7146722078323364, |
|
"eval_runtime": 259.1203, |
|
"eval_samples_per_second": 22.345, |
|
"eval_steps_per_second": 0.934, |
|
"step": 400 |
|
}, |
|
{ |
|
"dpo_loss": 0.5729255676269531, |
|
"epoch": 2.295701464336325, |
|
"grad_norm": 97.83556162175466, |
|
"learning_rate": 6.541311970692162e-07, |
|
"logits": -1.3065953254699707, |
|
"logps": -81.27420043945312, |
|
"loss": 0.388, |
|
"objective": 0.3636757433414459, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.5583333373069763, |
|
"ranking_simple": 0.574999988079071, |
|
"regularize": 0.3636757433414459, |
|
"step": 405 |
|
}, |
|
{ |
|
"dpo_loss": 0.5597470998764038, |
|
"epoch": 2.324043457723193, |
|
"grad_norm": 103.83650251762533, |
|
"learning_rate": 6.446677855317264e-07, |
|
"logits": -1.1975177526474, |
|
"logps": -80.72605895996094, |
|
"loss": 0.3899, |
|
"objective": 0.39117977023124695, |
|
"ranking_idealized": 0.48750001192092896, |
|
"ranking_idealized_expo": 0.4749999940395355, |
|
"ranking_simple": 0.4958333373069763, |
|
"regularize": 0.39117977023124695, |
|
"step": 410 |
|
}, |
|
{ |
|
"dpo_loss": 0.5665189027786255, |
|
"epoch": 2.3523854511100613, |
|
"grad_norm": 98.79350448658165, |
|
"learning_rate": 6.351474694448864e-07, |
|
"logits": -1.227319598197937, |
|
"logps": -81.35487365722656, |
|
"loss": 0.3741, |
|
"objective": 0.38428404927253723, |
|
"ranking_idealized": 0.550000011920929, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.5541666746139526, |
|
"regularize": 0.38428401947021484, |
|
"step": 415 |
|
}, |
|
{ |
|
"dpo_loss": 0.5536468029022217, |
|
"epoch": 2.3807274444969297, |
|
"grad_norm": 107.78716307916815, |
|
"learning_rate": 6.255739935905395e-07, |
|
"logits": -1.1353336572647095, |
|
"logps": -83.33781433105469, |
|
"loss": 0.3686, |
|
"objective": 0.3524840176105499, |
|
"ranking_idealized": 0.4791666567325592, |
|
"ranking_idealized_expo": 0.47083333134651184, |
|
"ranking_simple": 0.4833333194255829, |
|
"regularize": 0.3524840176105499, |
|
"step": 420 |
|
}, |
|
{ |
|
"dpo_loss": 0.5507148504257202, |
|
"epoch": 2.409069437883798, |
|
"grad_norm": 95.31420015325158, |
|
"learning_rate": 6.159511236607315e-07, |
|
"logits": -1.186567783355713, |
|
"logps": -81.95353698730469, |
|
"loss": 0.3829, |
|
"objective": 0.3914056718349457, |
|
"ranking_idealized": 0.5208333134651184, |
|
"ranking_idealized_expo": 0.4958333373069763, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 0.3914056718349457, |
|
"step": 425 |
|
}, |
|
{ |
|
"dpo_loss": 0.5727216601371765, |
|
"epoch": 2.4374114312706663, |
|
"grad_norm": 113.51215777470752, |
|
"learning_rate": 6.062826447764883e-07, |
|
"logits": -1.2177358865737915, |
|
"logps": -82.20878601074219, |
|
"loss": 0.3667, |
|
"objective": 0.38485896587371826, |
|
"ranking_idealized": 0.5416666865348816, |
|
"ranking_idealized_expo": 0.5291666388511658, |
|
"ranking_simple": 0.550000011920929, |
|
"regularize": 0.38485896587371826, |
|
"step": 430 |
|
}, |
|
{ |
|
"dpo_loss": 0.5795385837554932, |
|
"epoch": 2.4657534246575343, |
|
"grad_norm": 109.86296645766184, |
|
"learning_rate": 5.965723599989528e-07, |
|
"logits": -1.2774423360824585, |
|
"logps": -81.68889617919922, |
|
"loss": 0.3742, |
|
"objective": 0.373045951128006, |
|
"ranking_idealized": 0.5416666865348816, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.5208333134651184, |
|
"regularize": 0.373045951128006, |
|
"step": 435 |
|
}, |
|
{ |
|
"dpo_loss": 0.5365008115768433, |
|
"epoch": 2.4940954180444024, |
|
"grad_norm": 111.81192989308617, |
|
"learning_rate": 5.868240888334652e-07, |
|
"logits": -1.2080743312835693, |
|
"logps": -81.38243865966797, |
|
"loss": 0.3754, |
|
"objective": 0.3666243851184845, |
|
"ranking_idealized": 0.5458333492279053, |
|
"ranking_idealized_expo": 0.5375000238418579, |
|
"ranking_simple": 0.5416666865348816, |
|
"regularize": 0.3666243851184845, |
|
"step": 440 |
|
}, |
|
{ |
|
"dpo_loss": 0.5457199811935425, |
|
"epoch": 2.5224374114312704, |
|
"grad_norm": 111.8881514776807, |
|
"learning_rate": 5.770416657271728e-07, |
|
"logits": -1.220347285270691, |
|
"logps": -79.05127716064453, |
|
"loss": 0.3504, |
|
"objective": 0.3269334137439728, |
|
"ranking_idealized": 0.5208333134651184, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5375000238418579, |
|
"regularize": 0.3269334137439728, |
|
"step": 445 |
|
}, |
|
{ |
|
"dpo_loss": 0.5769312977790833, |
|
"epoch": 2.550779404818139, |
|
"grad_norm": 99.97534603124807, |
|
"learning_rate": 5.67228938560766e-07, |
|
"logits": -1.2545174360275269, |
|
"logps": -81.12376403808594, |
|
"loss": 0.3583, |
|
"objective": 0.3686024844646454, |
|
"ranking_idealized": 0.512499988079071, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.512499988079071, |
|
"regularize": 0.3686024844646454, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 2.550779404818139, |
|
"eval_dpo_loss": 0.7467784881591797, |
|
"eval_logits": -1.2637890577316284, |
|
"eval_logps": -87.71803283691406, |
|
"eval_loss": 0.6997210383415222, |
|
"eval_objective": 0.7073113918304443, |
|
"eval_ranking_idealized": 0.5247933864593506, |
|
"eval_ranking_idealized_expo": 0.5092975497245789, |
|
"eval_ranking_simple": 0.5092975497245789, |
|
"eval_regularize": 0.7073113918304443, |
|
"eval_runtime": 259.5146, |
|
"eval_samples_per_second": 22.311, |
|
"eval_steps_per_second": 0.933, |
|
"step": 450 |
|
}, |
|
{ |
|
"dpo_loss": 0.5546112656593323, |
|
"epoch": 2.579121398205007, |
|
"grad_norm": 103.50827450574826, |
|
"learning_rate": 5.573897671349268e-07, |
|
"logits": -1.282201886177063, |
|
"logps": -82.07099151611328, |
|
"loss": 0.3709, |
|
"objective": 0.3527103662490845, |
|
"ranking_idealized": 0.5166666507720947, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.512499988079071, |
|
"regularize": 0.3527103662490845, |
|
"step": 455 |
|
}, |
|
{ |
|
"dpo_loss": 0.5413371920585632, |
|
"epoch": 2.6074633915918755, |
|
"grad_norm": 103.11622894521913, |
|
"learning_rate": 5.475280216520912e-07, |
|
"logits": -1.3678227663040161, |
|
"logps": -82.53510284423828, |
|
"loss": 0.3493, |
|
"objective": 0.3583069443702698, |
|
"ranking_idealized": 0.5291666388511658, |
|
"ranking_idealized_expo": 0.512499988079071, |
|
"ranking_simple": 0.5249999761581421, |
|
"regularize": 0.3583069443702698, |
|
"step": 460 |
|
}, |
|
{ |
|
"dpo_loss": 0.5440939664840698, |
|
"epoch": 2.6358053849787435, |
|
"grad_norm": 101.71475758586865, |
|
"learning_rate": 5.376475811941191e-07, |
|
"logits": -1.2582870721817017, |
|
"logps": -81.93384552001953, |
|
"loss": 0.3418, |
|
"objective": 0.3399708867073059, |
|
"ranking_idealized": 0.5083333253860474, |
|
"ranking_idealized_expo": 0.4958333373069763, |
|
"ranking_simple": 0.512499988079071, |
|
"regularize": 0.3399708867073059, |
|
"step": 465 |
|
}, |
|
{ |
|
"dpo_loss": 0.5473323464393616, |
|
"epoch": 2.6641473783656116, |
|
"grad_norm": 106.63182084368584, |
|
"learning_rate": 5.277523321964701e-07, |
|
"logits": -1.266352891921997, |
|
"logps": -80.5593490600586, |
|
"loss": 0.3361, |
|
"objective": 0.3204086124897003, |
|
"ranking_idealized": 0.5791666507720947, |
|
"ranking_idealized_expo": 0.550000011920929, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.3204086124897003, |
|
"step": 470 |
|
}, |
|
{ |
|
"dpo_loss": 0.5676419734954834, |
|
"epoch": 2.69248937175248, |
|
"grad_norm": 100.07291475508246, |
|
"learning_rate": 5.178461669194903e-07, |
|
"logits": -1.2326582670211792, |
|
"logps": -81.09722137451172, |
|
"loss": 0.3221, |
|
"objective": 0.3266924321651459, |
|
"ranking_idealized": 0.512499988079071, |
|
"ranking_idealized_expo": 0.4958333373069763, |
|
"ranking_simple": 0.5083333253860474, |
|
"regularize": 0.3266924321651459, |
|
"step": 475 |
|
}, |
|
{ |
|
"dpo_loss": 0.5700205564498901, |
|
"epoch": 2.720831365139348, |
|
"grad_norm": 114.43771495362513, |
|
"learning_rate": 5.07932981917404e-07, |
|
"logits": -1.3194983005523682, |
|
"logps": -80.66010284423828, |
|
"loss": 0.3398, |
|
"objective": 0.3393530547618866, |
|
"ranking_idealized": 0.5249999761581421, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 0.3393530547618866, |
|
"step": 480 |
|
}, |
|
{ |
|
"dpo_loss": 0.540759801864624, |
|
"epoch": 2.7491733585262166, |
|
"grad_norm": 103.16997454398687, |
|
"learning_rate": 4.980166765056193e-07, |
|
"logits": -1.254343032836914, |
|
"logps": -82.84033203125, |
|
"loss": 0.3085, |
|
"objective": 0.3383033871650696, |
|
"ranking_idealized": 0.5333333611488342, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.5333333611488342, |
|
"regularize": 0.3383033871650696, |
|
"step": 485 |
|
}, |
|
{ |
|
"dpo_loss": 0.5252385139465332, |
|
"epoch": 2.7775153519130846, |
|
"grad_norm": 106.99635988259067, |
|
"learning_rate": 4.881011512269463e-07, |
|
"logits": -1.227257251739502, |
|
"logps": -80.78081512451172, |
|
"loss": 0.3134, |
|
"objective": 0.3047190010547638, |
|
"ranking_idealized": 0.5041666626930237, |
|
"ranking_idealized_expo": 0.47083333134651184, |
|
"ranking_simple": 0.4958333373069763, |
|
"regularize": 0.3047190010547638, |
|
"step": 490 |
|
}, |
|
{ |
|
"dpo_loss": 0.5381739139556885, |
|
"epoch": 2.8058573452999527, |
|
"grad_norm": 96.94469590251771, |
|
"learning_rate": 4.78190306317332e-07, |
|
"logits": -1.2355995178222656, |
|
"logps": -80.06063842773438, |
|
"loss": 0.3118, |
|
"objective": 0.31756675243377686, |
|
"ranking_idealized": 0.48750001192092896, |
|
"ranking_idealized_expo": 0.4791666567325592, |
|
"ranking_simple": 0.4958333373069763, |
|
"regularize": 0.31756675243377686, |
|
"step": 495 |
|
}, |
|
{ |
|
"dpo_loss": 0.537841796875, |
|
"epoch": 2.8341993386868207, |
|
"grad_norm": 101.86509864093333, |
|
"learning_rate": 4.682880401717177e-07, |
|
"logits": -1.2800052165985107, |
|
"logps": -79.0132064819336, |
|
"loss": 0.2969, |
|
"objective": 0.2936791479587555, |
|
"ranking_idealized": 0.5333333611488342, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.5416666865348816, |
|
"regularize": 0.2936791479587555, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.8341993386868207, |
|
"eval_dpo_loss": 0.7569719552993774, |
|
"eval_logits": -1.2819868326187134, |
|
"eval_logps": -87.59934997558594, |
|
"eval_loss": 0.7206214070320129, |
|
"eval_objective": 0.7300238609313965, |
|
"eval_ranking_idealized": 0.5247933864593506, |
|
"eval_ranking_idealized_expo": 0.5092975497245789, |
|
"eval_ranking_simple": 0.5134297609329224, |
|
"eval_regularize": 0.7300238609313965, |
|
"eval_runtime": 259.8973, |
|
"eval_samples_per_second": 22.278, |
|
"eval_steps_per_second": 0.931, |
|
"step": 500 |
|
}, |
|
{ |
|
"dpo_loss": 0.5372523069381714, |
|
"epoch": 2.862541332073689, |
|
"grad_norm": 99.30099425250114, |
|
"learning_rate": 4.5839824781061886e-07, |
|
"logits": -1.239542841911316, |
|
"logps": -80.23788452148438, |
|
"loss": 0.2969, |
|
"objective": 0.2953982949256897, |
|
"ranking_idealized": 0.5458333492279053, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.5458333492279053, |
|
"regularize": 0.2953982949256897, |
|
"step": 505 |
|
}, |
|
{ |
|
"dpo_loss": 0.551425576210022, |
|
"epoch": 2.8908833254605573, |
|
"grad_norm": 97.19792911916865, |
|
"learning_rate": 4.4852481934803277e-07, |
|
"logits": -1.1939424276351929, |
|
"logps": -80.47624206542969, |
|
"loss": 0.2884, |
|
"objective": 0.2630988359451294, |
|
"ranking_idealized": 0.5416666865348816, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.550000011920929, |
|
"regularize": 0.2630988359451294, |
|
"step": 510 |
|
}, |
|
{ |
|
"dpo_loss": 0.553286612033844, |
|
"epoch": 2.9192253188474258, |
|
"grad_norm": 105.92002880687674, |
|
"learning_rate": 4.3867163846127674e-07, |
|
"logits": -1.3426212072372437, |
|
"logps": -80.19091796875, |
|
"loss": 0.2858, |
|
"objective": 0.2995811402797699, |
|
"ranking_idealized": 0.49166667461395264, |
|
"ranking_idealized_expo": 0.4791666567325592, |
|
"ranking_simple": 0.5, |
|
"regularize": 0.2995811104774475, |
|
"step": 515 |
|
}, |
|
{ |
|
"dpo_loss": 0.5569177269935608, |
|
"epoch": 2.947567312234294, |
|
"grad_norm": 101.90420898944761, |
|
"learning_rate": 4.2884258086335745e-07, |
|
"logits": -1.2548803091049194, |
|
"logps": -82.30675506591797, |
|
"loss": 0.2885, |
|
"objective": 0.2949267625808716, |
|
"ranking_idealized": 0.5375000238418579, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.5291666388511658, |
|
"regularize": 0.2949267625808716, |
|
"step": 520 |
|
}, |
|
{ |
|
"dpo_loss": 0.5335093140602112, |
|
"epoch": 2.975909305621162, |
|
"grad_norm": 118.61355678596335, |
|
"learning_rate": 4.1904151277847305e-07, |
|
"logits": -1.2422376871109009, |
|
"logps": -81.52430725097656, |
|
"loss": 0.2939, |
|
"objective": 0.277982234954834, |
|
"ranking_idealized": 0.5791666507720947, |
|
"ranking_idealized_expo": 0.5666666626930237, |
|
"ranking_simple": 0.5874999761581421, |
|
"regularize": 0.277982234954834, |
|
"step": 525 |
|
}, |
|
{ |
|
"dpo_loss": 0.5339943766593933, |
|
"epoch": 3.0042512990080303, |
|
"grad_norm": 95.3080580242307, |
|
"learning_rate": 4.092722894212487e-07, |
|
"logits": -1.245071530342102, |
|
"logps": -80.53169250488281, |
|
"loss": 0.2807, |
|
"objective": 0.28686758875846863, |
|
"ranking_idealized": 0.5333333611488342, |
|
"ranking_idealized_expo": 0.512499988079071, |
|
"ranking_simple": 0.5249999761581421, |
|
"regularize": 0.28686758875846863, |
|
"step": 530 |
|
}, |
|
{ |
|
"dpo_loss": 0.5502054691314697, |
|
"epoch": 3.0325932923948984, |
|
"grad_norm": 102.03710421357799, |
|
"learning_rate": 3.995387534803005e-07, |
|
"logits": -1.2770276069641113, |
|
"logps": -82.10601043701172, |
|
"loss": 0.2686, |
|
"objective": 0.2612551748752594, |
|
"ranking_idealized": 0.5541666746139526, |
|
"ranking_idealized_expo": 0.5416666865348816, |
|
"ranking_simple": 0.5541666746139526, |
|
"regularize": 0.2612551748752594, |
|
"step": 535 |
|
}, |
|
{ |
|
"dpo_loss": 0.5417371392250061, |
|
"epoch": 3.0609352857817664, |
|
"grad_norm": 93.88545782890976, |
|
"learning_rate": 3.8984473360672967e-07, |
|
"logits": -1.3437141180038452, |
|
"logps": -80.88935089111328, |
|
"loss": 0.2546, |
|
"objective": 0.2543182373046875, |
|
"ranking_idealized": 0.5583333373069763, |
|
"ranking_idealized_expo": 0.5375000238418579, |
|
"ranking_simple": 0.5416666865348816, |
|
"regularize": 0.2543182373046875, |
|
"step": 540 |
|
}, |
|
{ |
|
"dpo_loss": 0.5468733906745911, |
|
"epoch": 3.089277279168635, |
|
"grad_norm": 104.98103692622652, |
|
"learning_rate": 3.801940429081345e-07, |
|
"logits": -1.29421067237854, |
|
"logps": -81.87580108642578, |
|
"loss": 0.2483, |
|
"objective": 0.23621630668640137, |
|
"ranking_idealized": 0.5, |
|
"ranking_idealized_expo": 0.49166667461395264, |
|
"ranking_simple": 0.49166667461395264, |
|
"regularize": 0.23621630668640137, |
|
"step": 545 |
|
}, |
|
{ |
|
"dpo_loss": 0.5142976641654968, |
|
"epoch": 3.117619272555503, |
|
"grad_norm": 98.44480374148442, |
|
"learning_rate": 3.7059047744873955e-07, |
|
"logits": -1.1407238245010376, |
|
"logps": -82.03406524658203, |
|
"loss": 0.2456, |
|
"objective": 0.23313947021961212, |
|
"ranking_idealized": 0.5583333373069763, |
|
"ranking_idealized_expo": 0.5375000238418579, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.23313947021961212, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 3.117619272555503, |
|
"eval_dpo_loss": 0.7502080202102661, |
|
"eval_logits": -1.2747371196746826, |
|
"eval_logps": -87.48565673828125, |
|
"eval_loss": 0.7081578969955444, |
|
"eval_objective": 0.7094937562942505, |
|
"eval_ranking_idealized": 0.5247933864593506, |
|
"eval_ranking_idealized_expo": 0.5092975497245789, |
|
"eval_ranking_simple": 0.5134297609329224, |
|
"eval_regularize": 0.7094937562942505, |
|
"eval_runtime": 259.3716, |
|
"eval_samples_per_second": 22.323, |
|
"eval_steps_per_second": 0.933, |
|
"step": 550 |
|
}, |
|
{ |
|
"dpo_loss": 0.519844651222229, |
|
"epoch": 3.1459612659423715, |
|
"grad_norm": 110.59601884737206, |
|
"learning_rate": 3.6103781475622786e-07, |
|
"logits": -1.2129274606704712, |
|
"logps": -82.34542083740234, |
|
"loss": 0.2514, |
|
"objective": 0.24437649548053741, |
|
"ranking_idealized": 0.5583333373069763, |
|
"ranking_idealized_expo": 0.550000011920929, |
|
"ranking_simple": 0.5791666507720947, |
|
"regularize": 0.24437648057937622, |
|
"step": 555 |
|
}, |
|
{ |
|
"dpo_loss": 0.5489280223846436, |
|
"epoch": 3.1743032593292395, |
|
"grad_norm": 96.14728671233563, |
|
"learning_rate": 3.5153981233586274e-07, |
|
"logits": -1.213472843170166, |
|
"logps": -79.25808715820312, |
|
"loss": 0.2372, |
|
"objective": 0.2192198634147644, |
|
"ranking_idealized": 0.5083333253860474, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 0.2192198634147644, |
|
"step": 560 |
|
}, |
|
{ |
|
"dpo_loss": 0.537466287612915, |
|
"epoch": 3.2026452527161076, |
|
"grad_norm": 107.72259319859731, |
|
"learning_rate": 3.421002061924876e-07, |
|
"logits": -1.2699893712997437, |
|
"logps": -80.88977813720703, |
|
"loss": 0.2311, |
|
"objective": 0.22347129881381989, |
|
"ranking_idealized": 0.5625, |
|
"ranking_idealized_expo": 0.5416666865348816, |
|
"ranking_simple": 0.5625, |
|
"regularize": 0.22347129881381989, |
|
"step": 565 |
|
}, |
|
{ |
|
"dpo_loss": 0.5202825665473938, |
|
"epoch": 3.230987246102976, |
|
"grad_norm": 96.26968527459513, |
|
"learning_rate": 3.327227093609824e-07, |
|
"logits": -1.1865532398223877, |
|
"logps": -80.67752075195312, |
|
"loss": 0.2252, |
|
"objective": 0.21609430015087128, |
|
"ranking_idealized": 0.4958333373069763, |
|
"ranking_idealized_expo": 0.4791666567325592, |
|
"ranking_simple": 0.5, |
|
"regularize": 0.21609430015087128, |
|
"step": 570 |
|
}, |
|
{ |
|
"dpo_loss": 0.5354868769645691, |
|
"epoch": 3.259329239489844, |
|
"grad_norm": 107.37637317824311, |
|
"learning_rate": 3.234110104457536e-07, |
|
"logits": -1.2506965398788452, |
|
"logps": -81.32064056396484, |
|
"loss": 0.2293, |
|
"objective": 0.2312368005514145, |
|
"ranking_idealized": 0.5208333134651184, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.5375000238418579, |
|
"regularize": 0.2312368005514145, |
|
"step": 575 |
|
}, |
|
{ |
|
"dpo_loss": 0.5328246355056763, |
|
"epoch": 3.287671232876712, |
|
"grad_norm": 100.4320056036311, |
|
"learning_rate": 3.141687721698363e-07, |
|
"logits": -1.2623833417892456, |
|
"logps": -82.67001342773438, |
|
"loss": 0.2288, |
|
"objective": 0.22114989161491394, |
|
"ranking_idealized": 0.5583333373069763, |
|
"ranking_idealized_expo": 0.5541666746139526, |
|
"ranking_simple": 0.574999988079071, |
|
"regularize": 0.22114989161491394, |
|
"step": 580 |
|
}, |
|
{ |
|
"dpo_loss": 0.5769094228744507, |
|
"epoch": 3.3160132262635806, |
|
"grad_norm": 95.72812686493451, |
|
"learning_rate": 3.049996299341742e-07, |
|
"logits": -1.279525876045227, |
|
"logps": -83.05570220947266, |
|
"loss": 0.2225, |
|
"objective": 0.2485423982143402, |
|
"ranking_idealized": 0.550000011920929, |
|
"ranking_idealized_expo": 0.5291666388511658, |
|
"ranking_simple": 0.5458333492279053, |
|
"regularize": 0.2485423982143402, |
|
"step": 585 |
|
}, |
|
{ |
|
"dpo_loss": 0.5602288842201233, |
|
"epoch": 3.3443552196504487, |
|
"grad_norm": 114.56713956727607, |
|
"learning_rate": 2.959071903876486e-07, |
|
"logits": -1.294264554977417, |
|
"logps": -83.3205795288086, |
|
"loss": 0.2291, |
|
"objective": 0.22891105711460114, |
|
"ranking_idealized": 0.5208333134651184, |
|
"ranking_idealized_expo": 0.48750001192092896, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 0.22891105711460114, |
|
"step": 590 |
|
}, |
|
{ |
|
"dpo_loss": 0.5364168286323547, |
|
"epoch": 3.372697213037317, |
|
"grad_norm": 110.16666826508619, |
|
"learning_rate": 2.86895030008416e-07, |
|
"logits": -1.2113687992095947, |
|
"logps": -82.52010345458984, |
|
"loss": 0.2129, |
|
"objective": 0.22527165710926056, |
|
"ranking_idealized": 0.5375000238418579, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.5458333492279053, |
|
"regularize": 0.22527165710926056, |
|
"step": 595 |
|
}, |
|
{ |
|
"dpo_loss": 0.5123292803764343, |
|
"epoch": 3.4010392064241852, |
|
"grad_norm": 102.02441515454663, |
|
"learning_rate": 2.779666936971129e-07, |
|
"logits": -1.2210050821304321, |
|
"logps": -81.7782974243164, |
|
"loss": 0.2121, |
|
"objective": 0.22540892660617828, |
|
"ranking_idealized": 0.512499988079071, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.512499988079071, |
|
"regularize": 0.22540892660617828, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 3.4010392064241852, |
|
"eval_dpo_loss": 0.7512553930282593, |
|
"eval_logits": -1.2610689401626587, |
|
"eval_logps": -87.82508087158203, |
|
"eval_loss": 0.7149726152420044, |
|
"eval_objective": 0.7195205688476562, |
|
"eval_ranking_idealized": 0.5247933864593506, |
|
"eval_ranking_idealized_expo": 0.5092975497245789, |
|
"eval_ranking_simple": 0.5123966932296753, |
|
"eval_regularize": 0.7195205688476562, |
|
"eval_runtime": 260.141, |
|
"eval_samples_per_second": 22.257, |
|
"eval_steps_per_second": 0.93, |
|
"step": 600 |
|
}, |
|
{ |
|
"dpo_loss": 0.5458318591117859, |
|
"epoch": 3.4293811998110533, |
|
"grad_norm": 109.5352185195456, |
|
"learning_rate": 2.6912569338248315e-07, |
|
"logits": -1.2689701318740845, |
|
"logps": -83.29906463623047, |
|
"loss": 0.2091, |
|
"objective": 0.18265347182750702, |
|
"ranking_idealized": 0.5458333492279053, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.550000011920929, |
|
"regularize": 0.18265347182750702, |
|
"step": 605 |
|
}, |
|
{ |
|
"dpo_loss": 0.5613773465156555, |
|
"epoch": 3.4577231931979218, |
|
"grad_norm": 98.96988548944374, |
|
"learning_rate": 2.603755066399718e-07, |
|
"logits": -1.240617036819458, |
|
"logps": -81.78998565673828, |
|
"loss": 0.208, |
|
"objective": 0.20637358725070953, |
|
"ranking_idealized": 0.5291666388511658, |
|
"ranking_idealized_expo": 0.512499988079071, |
|
"ranking_simple": 0.5291666388511658, |
|
"regularize": 0.20637358725070953, |
|
"step": 610 |
|
}, |
|
{ |
|
"dpo_loss": 0.5179942846298218, |
|
"epoch": 3.48606518658479, |
|
"grad_norm": 101.26514646147335, |
|
"learning_rate": 2.517195753238345e-07, |
|
"logits": -1.2508882284164429, |
|
"logps": -81.7972183227539, |
|
"loss": 0.1973, |
|
"objective": 0.19295792281627655, |
|
"ranking_idealized": 0.4749999940395355, |
|
"ranking_idealized_expo": 0.46666666865348816, |
|
"ranking_simple": 0.47083333134651184, |
|
"regularize": 0.19295792281627655, |
|
"step": 615 |
|
}, |
|
{ |
|
"dpo_loss": 0.49570992588996887, |
|
"epoch": 3.514407179971658, |
|
"grad_norm": 94.4850227020022, |
|
"learning_rate": 2.4316130421329696e-07, |
|
"logits": -1.1392742395401, |
|
"logps": -81.18209838867188, |
|
"loss": 0.1959, |
|
"objective": 0.19464272260665894, |
|
"ranking_idealized": 0.512499988079071, |
|
"ranking_idealized_expo": 0.4958333373069763, |
|
"ranking_simple": 0.512499988079071, |
|
"regularize": 0.19464272260665894, |
|
"step": 620 |
|
}, |
|
{ |
|
"dpo_loss": 0.5331357717514038, |
|
"epoch": 3.5427491733585263, |
|
"grad_norm": 102.01306480785293, |
|
"learning_rate": 2.3470405967329604e-07, |
|
"logits": -1.2014575004577637, |
|
"logps": -80.8895492553711, |
|
"loss": 0.189, |
|
"objective": 0.20246191322803497, |
|
"ranking_idealized": 0.5333333611488342, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.5291666388511658, |
|
"regularize": 0.20246191322803497, |
|
"step": 625 |
|
}, |
|
{ |
|
"dpo_loss": 0.5293785929679871, |
|
"epoch": 3.5710911667453944, |
|
"grad_norm": 104.25371252839783, |
|
"learning_rate": 2.2635116833033392e-07, |
|
"logits": -1.28461754322052, |
|
"logps": -80.65286254882812, |
|
"loss": 0.1914, |
|
"objective": 0.1838250458240509, |
|
"ranking_idealized": 0.5291666388511658, |
|
"ranking_idealized_expo": 0.512499988079071, |
|
"ranking_simple": 0.5333333611488342, |
|
"regularize": 0.1838250458240509, |
|
"step": 630 |
|
}, |
|
{ |
|
"dpo_loss": 0.5447375178337097, |
|
"epoch": 3.5994331601322624, |
|
"grad_norm": 98.62302663391017, |
|
"learning_rate": 2.181059157639598e-07, |
|
"logits": -1.2204653024673462, |
|
"logps": -81.04381561279297, |
|
"loss": 0.1798, |
|
"objective": 0.1847100853919983, |
|
"ranking_idealized": 0.4958333373069763, |
|
"ranking_idealized_expo": 0.48750001192092896, |
|
"ranking_simple": 0.49166667461395264, |
|
"regularize": 0.1847100704908371, |
|
"step": 635 |
|
}, |
|
{ |
|
"dpo_loss": 0.5025536417961121, |
|
"epoch": 3.627775153519131, |
|
"grad_norm": 98.21161327414355, |
|
"learning_rate": 2.0997154521440097e-07, |
|
"logits": -1.178464412689209, |
|
"logps": -81.83779907226562, |
|
"loss": 0.1761, |
|
"objective": 0.18075956404209137, |
|
"ranking_idealized": 0.4958333373069763, |
|
"ranking_idealized_expo": 0.4625000059604645, |
|
"ranking_simple": 0.5, |
|
"regularize": 0.18075956404209137, |
|
"step": 640 |
|
}, |
|
{ |
|
"dpo_loss": 0.5238084197044373, |
|
"epoch": 3.656117146905999, |
|
"grad_norm": 96.02562335204865, |
|
"learning_rate": 2.0195125630684428e-07, |
|
"logits": -1.2101988792419434, |
|
"logps": -80.41759490966797, |
|
"loss": 0.1754, |
|
"objective": 0.17556659877300262, |
|
"ranking_idealized": 0.5333333611488342, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.5458333492279053, |
|
"regularize": 0.17556659877300262, |
|
"step": 645 |
|
}, |
|
{ |
|
"dpo_loss": 0.5436745882034302, |
|
"epoch": 3.6844591402928675, |
|
"grad_norm": 99.15450590850473, |
|
"learning_rate": 1.9404820379287672e-07, |
|
"logits": -1.2024494409561157, |
|
"logps": -81.63809967041016, |
|
"loss": 0.1721, |
|
"objective": 0.15629148483276367, |
|
"ranking_idealized": 0.5166666507720947, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.512499988079071, |
|
"regularize": 0.15629148483276367, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 3.6844591402928675, |
|
"eval_dpo_loss": 0.7529332041740417, |
|
"eval_logits": -1.2667409181594849, |
|
"eval_logps": -87.55421447753906, |
|
"eval_loss": 0.7180734276771545, |
|
"eval_objective": 0.7210157513618469, |
|
"eval_ranking_idealized": 0.5247933864593506, |
|
"eval_ranking_idealized_expo": 0.5092975497245789, |
|
"eval_ranking_simple": 0.5123966932296753, |
|
"eval_regularize": 0.7210157513618469, |
|
"eval_runtime": 258.8884, |
|
"eval_samples_per_second": 22.365, |
|
"eval_steps_per_second": 0.935, |
|
"step": 650 |
|
}, |
|
{ |
|
"dpo_loss": 0.5238640904426575, |
|
"epoch": 3.7128011336797355, |
|
"grad_norm": 97.25223896173355, |
|
"learning_rate": 1.8626549630957395e-07, |
|
"logits": -1.240193486213684, |
|
"logps": -80.53789520263672, |
|
"loss": 0.1708, |
|
"objective": 0.1773752123117447, |
|
"ranking_idealized": 0.5375000238418579, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.5416666865348816, |
|
"regularize": 0.1773752123117447, |
|
"step": 655 |
|
}, |
|
{ |
|
"dpo_loss": 0.5414367914199829, |
|
"epoch": 3.7411431270666036, |
|
"grad_norm": 98.36333988268215, |
|
"learning_rate": 1.7860619515673032e-07, |
|
"logits": -1.318145513534546, |
|
"logps": -81.55570983886719, |
|
"loss": 0.1704, |
|
"objective": 0.1702161431312561, |
|
"ranking_idealized": 0.5666666626930237, |
|
"ranking_idealized_expo": 0.5541666746139526, |
|
"ranking_simple": 0.5625, |
|
"regularize": 0.1702161431312561, |
|
"step": 660 |
|
}, |
|
{ |
|
"dpo_loss": 0.5241764187812805, |
|
"epoch": 3.769485120453472, |
|
"grad_norm": 104.11429458509453, |
|
"learning_rate": 1.7107331309270684e-07, |
|
"logits": -1.2156672477722168, |
|
"logps": -81.38536071777344, |
|
"loss": 0.1559, |
|
"objective": 0.16640320420265198, |
|
"ranking_idealized": 0.5208333134651184, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5208333134651184, |
|
"regularize": 0.16640320420265198, |
|
"step": 665 |
|
}, |
|
{ |
|
"dpo_loss": 0.5278924703598022, |
|
"epoch": 3.79782711384034, |
|
"grad_norm": 101.56797379943947, |
|
"learning_rate": 1.6366981314937372e-07, |
|
"logits": -1.321338415145874, |
|
"logps": -81.34001922607422, |
|
"loss": 0.1551, |
|
"objective": 0.14367233216762543, |
|
"ranking_idealized": 0.6041666865348816, |
|
"ranking_idealized_expo": 0.5916666388511658, |
|
"ranking_simple": 0.6041666865348816, |
|
"regularize": 0.14367233216762543, |
|
"step": 670 |
|
}, |
|
{ |
|
"dpo_loss": 0.529498279094696, |
|
"epoch": 3.826169107227208, |
|
"grad_norm": 93.64293062728967, |
|
"learning_rate": 1.5639860746661338e-07, |
|
"logits": -1.2852925062179565, |
|
"logps": -80.37777709960938, |
|
"loss": 0.1531, |
|
"objective": 0.1431640088558197, |
|
"ranking_idealized": 0.5666666626930237, |
|
"ranking_idealized_expo": 0.5375000238418579, |
|
"ranking_simple": 0.574999988079071, |
|
"regularize": 0.1431640088558197, |
|
"step": 675 |
|
}, |
|
{ |
|
"dpo_loss": 0.5504888892173767, |
|
"epoch": 3.8545111006140766, |
|
"grad_norm": 101.91053202750965, |
|
"learning_rate": 1.492625561468393e-07, |
|
"logits": -1.2134761810302734, |
|
"logps": -80.77507781982422, |
|
"loss": 0.1495, |
|
"objective": 0.15138006210327148, |
|
"ranking_idealized": 0.5083333253860474, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 0.15138006210327148, |
|
"step": 680 |
|
}, |
|
{ |
|
"dpo_loss": 0.5266403555870056, |
|
"epoch": 3.8828530940009447, |
|
"grad_norm": 105.20220630755058, |
|
"learning_rate": 1.4226446612998671e-07, |
|
"logits": -1.2806271314620972, |
|
"logps": -80.39690399169922, |
|
"loss": 0.1478, |
|
"objective": 0.1402387022972107, |
|
"ranking_idealized": 0.5458333492279053, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5416666865348816, |
|
"regularize": 0.1402387022972107, |
|
"step": 685 |
|
}, |
|
{ |
|
"dpo_loss": 0.5246676802635193, |
|
"epoch": 3.9111950873878127, |
|
"grad_norm": 93.05384336107711, |
|
"learning_rate": 1.3540709008941147e-07, |
|
"logits": -1.2790921926498413, |
|
"logps": -80.16386413574219, |
|
"loss": 0.1448, |
|
"objective": 0.1356712281703949, |
|
"ranking_idealized": 0.5625, |
|
"ranking_idealized_expo": 0.5458333492279053, |
|
"ranking_simple": 0.574999988079071, |
|
"regularize": 0.1356712281703949, |
|
"step": 690 |
|
}, |
|
{ |
|
"dpo_loss": 0.5320358276367188, |
|
"epoch": 3.9395370807746812, |
|
"grad_norm": 92.75785969700081, |
|
"learning_rate": 1.2869312534913685e-07, |
|
"logits": -1.1912401914596558, |
|
"logps": -81.54454803466797, |
|
"loss": 0.1365, |
|
"objective": 0.15173907577991486, |
|
"ranking_idealized": 0.4791666567325592, |
|
"ranking_idealized_expo": 0.46666666865348816, |
|
"ranking_simple": 0.4791666567325592, |
|
"regularize": 0.15173907577991486, |
|
"step": 695 |
|
}, |
|
{ |
|
"dpo_loss": 0.5335038304328918, |
|
"epoch": 3.9678790741615493, |
|
"grad_norm": 98.04635697056264, |
|
"learning_rate": 1.2212521282287093e-07, |
|
"logits": -1.2281270027160645, |
|
"logps": -81.2812728881836, |
|
"loss": 0.1386, |
|
"objective": 0.15212775766849518, |
|
"ranking_idealized": 0.49166667461395264, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.49166667461395264, |
|
"regularize": 0.15212775766849518, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 3.9678790741615493, |
|
"eval_dpo_loss": 0.7514122128486633, |
|
"eval_logits": -1.26543128490448, |
|
"eval_logps": -87.5438003540039, |
|
"eval_loss": 0.7064864635467529, |
|
"eval_objective": 0.7093826532363892, |
|
"eval_ranking_idealized": 0.5247933864593506, |
|
"eval_ranking_idealized_expo": 0.5092975497245789, |
|
"eval_ranking_simple": 0.5113636255264282, |
|
"eval_regularize": 0.7093826532363892, |
|
"eval_runtime": 259.4271, |
|
"eval_samples_per_second": 22.318, |
|
"eval_steps_per_second": 0.933, |
|
"step": 700 |
|
}, |
|
{ |
|
"dpo_loss": 0.5151931047439575, |
|
"epoch": 3.9962210675484178, |
|
"grad_norm": 98.98779841672567, |
|
"learning_rate": 1.15705935975212e-07, |
|
"logits": -1.1545416116714478, |
|
"logps": -79.99464416503906, |
|
"loss": 0.133, |
|
"objective": 0.12396544963121414, |
|
"ranking_idealized": 0.5583333373069763, |
|
"ranking_idealized_expo": 0.5291666388511658, |
|
"ranking_simple": 0.5541666746139526, |
|
"regularize": 0.12396544963121414, |
|
"step": 705 |
|
}, |
|
{ |
|
"dpo_loss": 0.5161293745040894, |
|
"epoch": 4.024563060935286, |
|
"grad_norm": 94.60148232757011, |
|
"learning_rate": 1.094378198054533e-07, |
|
"logits": -1.2483078241348267, |
|
"logps": -81.60172271728516, |
|
"loss": 0.1163, |
|
"objective": 0.09800054132938385, |
|
"ranking_idealized": 0.48750001192092896, |
|
"ranking_idealized_expo": 0.4791666567325592, |
|
"ranking_simple": 0.49166667461395264, |
|
"regularize": 0.09800054132938385, |
|
"step": 710 |
|
}, |
|
{ |
|
"dpo_loss": 0.5236179232597351, |
|
"epoch": 4.052905054322154, |
|
"grad_norm": 95.22257327163594, |
|
"learning_rate": 1.0332332985438247e-07, |
|
"logits": -1.2045139074325562, |
|
"logps": -81.0995864868164, |
|
"loss": 0.1109, |
|
"objective": 0.10611793398857117, |
|
"ranking_idealized": 0.5458333492279053, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.5458333492279053, |
|
"regularize": 0.10611793398857117, |
|
"step": 715 |
|
}, |
|
{ |
|
"dpo_loss": 0.5168012976646423, |
|
"epoch": 4.081247047709022, |
|
"grad_norm": 101.20095806117061, |
|
"learning_rate": 9.736487123447068e-08, |
|
"logits": -1.1531497240066528, |
|
"logps": -83.11766052246094, |
|
"loss": 0.1094, |
|
"objective": 0.11501701176166534, |
|
"ranking_idealized": 0.5333333611488342, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.5291666388511658, |
|
"regularize": 0.11501700431108475, |
|
"step": 720 |
|
}, |
|
{ |
|
"dpo_loss": 0.5381873250007629, |
|
"epoch": 4.109589041095891, |
|
"grad_norm": 95.89781184589388, |
|
"learning_rate": 9.156478768383058e-08, |
|
"logits": -1.226915955543518, |
|
"logps": -81.01741790771484, |
|
"loss": 0.1131, |
|
"objective": 0.10321195423603058, |
|
"ranking_idealized": 0.5375000238418579, |
|
"ranking_idealized_expo": 0.5291666388511658, |
|
"ranking_simple": 0.5375000238418579, |
|
"regularize": 0.10321194678544998, |
|
"step": 725 |
|
}, |
|
{ |
|
"dpo_loss": 0.5230408310890198, |
|
"epoch": 4.137931034482759, |
|
"grad_norm": 100.59516155263596, |
|
"learning_rate": 8.592536064431466e-08, |
|
"logits": -1.2960838079452515, |
|
"logps": -81.26818084716797, |
|
"loss": 0.1063, |
|
"objective": 0.09859620779752731, |
|
"ranking_idealized": 0.550000011920929, |
|
"ranking_idealized_expo": 0.5375000238418579, |
|
"ranking_simple": 0.5541666746139526, |
|
"regularize": 0.09859620034694672, |
|
"step": 730 |
|
}, |
|
{ |
|
"dpo_loss": 0.5300396084785461, |
|
"epoch": 4.166273027869627, |
|
"grad_norm": 99.36395143789497, |
|
"learning_rate": 8.044880836411888e-08, |
|
"logits": -1.1851133108139038, |
|
"logps": -82.21375274658203, |
|
"loss": 0.1031, |
|
"objective": 0.10836609452962875, |
|
"ranking_idealized": 0.5541666746139526, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.5458333492279053, |
|
"regularize": 0.10836609452962875, |
|
"step": 735 |
|
}, |
|
{ |
|
"dpo_loss": 0.5081815719604492, |
|
"epoch": 4.194615021256495, |
|
"grad_norm": 97.85835254690582, |
|
"learning_rate": 7.513728502524286e-08, |
|
"logits": -1.214992880821228, |
|
"logps": -81.85246276855469, |
|
"loss": 0.1108, |
|
"objective": 0.09990967810153961, |
|
"ranking_idealized": 0.5375000238418579, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.5375000238418579, |
|
"regularize": 0.09990967065095901, |
|
"step": 740 |
|
}, |
|
{ |
|
"dpo_loss": 0.5218914747238159, |
|
"epoch": 4.222957014643363, |
|
"grad_norm": 102.73912212411751, |
|
"learning_rate": 6.999287989614971e-08, |
|
"logits": -1.2808277606964111, |
|
"logps": -79.33137512207031, |
|
"loss": 0.1003, |
|
"objective": 0.0994986817240715, |
|
"ranking_idealized": 0.5083333253860474, |
|
"ranking_idealized_expo": 0.48750001192092896, |
|
"ranking_simple": 0.5041666626930237, |
|
"regularize": 0.0994986817240715, |
|
"step": 745 |
|
}, |
|
{ |
|
"dpo_loss": 0.5142425298690796, |
|
"epoch": 4.251299008030231, |
|
"grad_norm": 98.36808351404612, |
|
"learning_rate": 6.501761650996052e-08, |
|
"logits": -1.3703263998031616, |
|
"logps": -81.33523559570312, |
|
"loss": 0.0985, |
|
"objective": 0.09714233130216599, |
|
"ranking_idealized": 0.5416666865348816, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.5416666865348816, |
|
"regularize": 0.09714233130216599, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 4.251299008030231, |
|
"eval_dpo_loss": 0.7508513331413269, |
|
"eval_logits": -1.2698993682861328, |
|
"eval_logps": -87.64305877685547, |
|
"eval_loss": 0.7095766663551331, |
|
"eval_objective": 0.7117514610290527, |
|
"eval_ranking_idealized": 0.5247933864593506, |
|
"eval_ranking_idealized_expo": 0.5092975497245789, |
|
"eval_ranking_simple": 0.5144628286361694, |
|
"eval_regularize": 0.7117514610290527, |
|
"eval_runtime": 259.3031, |
|
"eval_samples_per_second": 22.329, |
|
"eval_steps_per_second": 0.933, |
|
"step": 750 |
|
}, |
|
{ |
|
"dpo_loss": 0.5111901760101318, |
|
"epoch": 4.2796410014171, |
|
"grad_norm": 95.47883788378027, |
|
"learning_rate": 6.021345186850418e-08, |
|
"logits": -1.2960675954818726, |
|
"logps": -81.29663848876953, |
|
"loss": 0.0959, |
|
"objective": 0.0885348990559578, |
|
"ranking_idealized": 0.5458333492279053, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.5458333492279053, |
|
"regularize": 0.0885348990559578, |
|
"step": 755 |
|
}, |
|
{ |
|
"dpo_loss": 0.5198584794998169, |
|
"epoch": 4.307982994803968, |
|
"grad_norm": 107.35986940095462, |
|
"learning_rate": 5.5582275672538316e-08, |
|
"logits": -1.2736433744430542, |
|
"logps": -81.2705078125, |
|
"loss": 0.0936, |
|
"objective": 0.08593542128801346, |
|
"ranking_idealized": 0.5791666507720947, |
|
"ranking_idealized_expo": 0.5625, |
|
"ranking_simple": 0.5791666507720947, |
|
"regularize": 0.08593542128801346, |
|
"step": 760 |
|
}, |
|
{ |
|
"dpo_loss": 0.508173406124115, |
|
"epoch": 4.336324988190836, |
|
"grad_norm": 99.70173708140004, |
|
"learning_rate": 5.112590957844232e-08, |
|
"logits": -1.2959843873977661, |
|
"logps": -82.02350616455078, |
|
"loss": 0.0954, |
|
"objective": 0.09023122489452362, |
|
"ranking_idealized": 0.5458333492279053, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.5458333492279053, |
|
"regularize": 0.09023122489452362, |
|
"step": 765 |
|
}, |
|
{ |
|
"dpo_loss": 0.5115682482719421, |
|
"epoch": 4.364666981577704, |
|
"grad_norm": 94.66084425031231, |
|
"learning_rate": 4.684610648167503e-08, |
|
"logits": -1.1798807382583618, |
|
"logps": -80.28400421142578, |
|
"loss": 0.1006, |
|
"objective": 0.10149048268795013, |
|
"ranking_idealized": 0.5416666865348816, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.5416666865348816, |
|
"regularize": 0.10149047523736954, |
|
"step": 770 |
|
}, |
|
{ |
|
"dpo_loss": 0.5190815329551697, |
|
"epoch": 4.393008974964572, |
|
"grad_norm": 97.786866794993, |
|
"learning_rate": 4.274454982728032e-08, |
|
"logits": -1.2468054294586182, |
|
"logps": -81.20288848876953, |
|
"loss": 0.0877, |
|
"objective": 0.08333703130483627, |
|
"ranking_idealized": 0.5333333611488342, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.5249999761581421, |
|
"regularize": 0.08333703130483627, |
|
"step": 775 |
|
}, |
|
{ |
|
"dpo_loss": 0.5364963412284851, |
|
"epoch": 4.42135096835144, |
|
"grad_norm": 93.67850493598061, |
|
"learning_rate": 3.882285294770937e-08, |
|
"logits": -1.2755876779556274, |
|
"logps": -80.01787567138672, |
|
"loss": 0.0938, |
|
"objective": 0.09289808571338654, |
|
"ranking_idealized": 0.5083333253860474, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.5, |
|
"regularize": 0.09289808571338654, |
|
"step": 780 |
|
}, |
|
{ |
|
"dpo_loss": 0.5412746667861938, |
|
"epoch": 4.449692961738309, |
|
"grad_norm": 94.9731716353141, |
|
"learning_rate": 3.508255842822255e-08, |
|
"logits": -1.292734980583191, |
|
"logps": -80.0306625366211, |
|
"loss": 0.0913, |
|
"objective": 0.09275906533002853, |
|
"ranking_idealized": 0.5458333492279053, |
|
"ranking_idealized_expo": 0.5291666388511658, |
|
"ranking_simple": 0.5416666865348816, |
|
"regularize": 0.09275905787944794, |
|
"step": 785 |
|
}, |
|
{ |
|
"dpo_loss": 0.5126563906669617, |
|
"epoch": 4.478034955125177, |
|
"grad_norm": 96.74045611069916, |
|
"learning_rate": 3.15251375001192e-08, |
|
"logits": -1.253103494644165, |
|
"logps": -82.71080017089844, |
|
"loss": 0.0897, |
|
"objective": 0.08310778439044952, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.5583333373069763, |
|
"ranking_simple": 0.5791666507720947, |
|
"regularize": 0.08310777693986893, |
|
"step": 790 |
|
}, |
|
{ |
|
"dpo_loss": 0.5277770161628723, |
|
"epoch": 4.506376948512045, |
|
"grad_norm": 102.60968536429716, |
|
"learning_rate": 2.8151989462033787e-08, |
|
"logits": -1.1557574272155762, |
|
"logps": -82.18769073486328, |
|
"loss": 0.0898, |
|
"objective": 0.08717220276594162, |
|
"ranking_idealized": 0.5583333373069763, |
|
"ranking_idealized_expo": 0.550000011920929, |
|
"ranking_simple": 0.5583333373069763, |
|
"regularize": 0.08717220276594162, |
|
"step": 795 |
|
}, |
|
{ |
|
"dpo_loss": 0.5270230770111084, |
|
"epoch": 4.534718941898913, |
|
"grad_norm": 96.30549999556874, |
|
"learning_rate": 2.4964441129527335e-08, |
|
"logits": -1.1533504724502563, |
|
"logps": -81.5091323852539, |
|
"loss": 0.0882, |
|
"objective": 0.09708014130592346, |
|
"ranking_idealized": 0.5375000238418579, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.5333333611488342, |
|
"regularize": 0.09708014130592346, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 4.534718941898913, |
|
"eval_dpo_loss": 0.7520135641098022, |
|
"eval_logits": -1.2693291902542114, |
|
"eval_logps": -87.7427749633789, |
|
"eval_loss": 0.7119117975234985, |
|
"eval_objective": 0.7144764065742493, |
|
"eval_ranking_idealized": 0.5247933864593506, |
|
"eval_ranking_idealized_expo": 0.5092975497245789, |
|
"eval_ranking_simple": 0.5113636255264282, |
|
"eval_regularize": 0.7144764065742493, |
|
"eval_runtime": 258.9953, |
|
"eval_samples_per_second": 22.356, |
|
"eval_steps_per_second": 0.934, |
|
"step": 800 |
|
}, |
|
{ |
|
"dpo_loss": 0.5197975039482117, |
|
"epoch": 4.563060935285781, |
|
"grad_norm": 94.79722521361904, |
|
"learning_rate": 2.1963746313188757e-08, |
|
"logits": -1.226369023323059, |
|
"logps": -80.86296844482422, |
|
"loss": 0.0813, |
|
"objective": 0.07861018925905228, |
|
"ranking_idealized": 0.4833333194255829, |
|
"ranking_idealized_expo": 0.4625000059604645, |
|
"ranking_simple": 0.48750001192092896, |
|
"regularize": 0.07861018925905228, |
|
"step": 805 |
|
}, |
|
{ |
|
"dpo_loss": 0.5268690586090088, |
|
"epoch": 4.59140292867265, |
|
"grad_norm": 95.24668751727883, |
|
"learning_rate": 1.915108532545351e-08, |
|
"logits": -1.290175199508667, |
|
"logps": -80.24264526367188, |
|
"loss": 0.0875, |
|
"objective": 0.08437123894691467, |
|
"ranking_idealized": 0.46666666865348816, |
|
"ranking_idealized_expo": 0.4583333432674408, |
|
"ranking_simple": 0.46666666865348816, |
|
"regularize": 0.08437123149633408, |
|
"step": 810 |
|
}, |
|
{ |
|
"dpo_loss": 0.5314128398895264, |
|
"epoch": 4.619744922059518, |
|
"grad_norm": 94.93833650674884, |
|
"learning_rate": 1.6527564516331638e-08, |
|
"logits": -1.1623790264129639, |
|
"logps": -81.81945037841797, |
|
"loss": 0.0826, |
|
"objective": 0.09123408049345016, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5874999761581421, |
|
"ranking_simple": 0.6166666746139526, |
|
"regularize": 0.09123408049345016, |
|
"step": 815 |
|
}, |
|
{ |
|
"dpo_loss": 0.5448810458183289, |
|
"epoch": 4.648086915446386, |
|
"grad_norm": 99.02648077337858, |
|
"learning_rate": 1.4094215838229172e-08, |
|
"logits": -1.2590090036392212, |
|
"logps": -81.42196655273438, |
|
"loss": 0.0864, |
|
"objective": 0.09086828678846359, |
|
"ranking_idealized": 0.5625, |
|
"ranking_idealized_expo": 0.5625, |
|
"ranking_simple": 0.5583333373069763, |
|
"regularize": 0.09086828678846359, |
|
"step": 820 |
|
}, |
|
{ |
|
"dpo_loss": 0.5343818664550781, |
|
"epoch": 4.6764289088332545, |
|
"grad_norm": 93.21086746148787, |
|
"learning_rate": 1.1851996440033318e-08, |
|
"logits": -1.1644405126571655, |
|
"logps": -79.48759460449219, |
|
"loss": 0.0796, |
|
"objective": 0.07582148164510727, |
|
"ranking_idealized": 0.5083333253860474, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.5083333253860474, |
|
"regularize": 0.07582148164510727, |
|
"step": 825 |
|
}, |
|
{ |
|
"dpo_loss": 0.5317553281784058, |
|
"epoch": 4.7047709022201225, |
|
"grad_norm": 97.38269826526711, |
|
"learning_rate": 9.801788290621505e-09, |
|
"logits": -1.3180792331695557, |
|
"logps": -81.53900909423828, |
|
"loss": 0.0901, |
|
"objective": 0.10412049293518066, |
|
"ranking_idealized": 0.5375000238418579, |
|
"ranking_idealized_expo": 0.512499988079071, |
|
"ranking_simple": 0.5375000238418579, |
|
"regularize": 0.10412048548460007, |
|
"step": 830 |
|
}, |
|
{ |
|
"dpo_loss": 0.5185703635215759, |
|
"epoch": 4.733112895606991, |
|
"grad_norm": 92.96268594737676, |
|
"learning_rate": 7.944397831941951e-09, |
|
"logits": -1.2221894264221191, |
|
"logps": -81.21542358398438, |
|
"loss": 0.0893, |
|
"objective": 0.0936354324221611, |
|
"ranking_idealized": 0.512499988079071, |
|
"ranking_idealized_expo": 0.48750001192092896, |
|
"ranking_simple": 0.5083333253860474, |
|
"regularize": 0.09363541752099991, |
|
"step": 835 |
|
}, |
|
{ |
|
"dpo_loss": 0.5039588212966919, |
|
"epoch": 4.7614548889938595, |
|
"grad_norm": 93.6593830751853, |
|
"learning_rate": 6.280555661802856e-09, |
|
"logits": -1.2342692613601685, |
|
"logps": -81.57914733886719, |
|
"loss": 0.0827, |
|
"objective": 0.09424074739217758, |
|
"ranking_idealized": 0.5583333373069763, |
|
"ranking_idealized_expo": 0.5541666746139526, |
|
"ranking_simple": 0.5583333373069763, |
|
"regularize": 0.09424074739217758, |
|
"step": 840 |
|
}, |
|
{ |
|
"dpo_loss": 0.5341091752052307, |
|
"epoch": 4.7897968823807275, |
|
"grad_norm": 94.7722019315539, |
|
"learning_rate": 4.810916246494157e-09, |
|
"logits": -1.2643479108810425, |
|
"logps": -81.62488555908203, |
|
"loss": 0.0815, |
|
"objective": 0.07745879143476486, |
|
"ranking_idealized": 0.4791666567325592, |
|
"ranking_idealized_expo": 0.4541666805744171, |
|
"ranking_simple": 0.47083333134651184, |
|
"regularize": 0.07745879143476486, |
|
"step": 845 |
|
}, |
|
{ |
|
"dpo_loss": 0.5221010446548462, |
|
"epoch": 4.818138875767596, |
|
"grad_norm": 94.27228247906854, |
|
"learning_rate": 3.5360576633558513e-09, |
|
"logits": -1.2364792823791504, |
|
"logps": -80.28359985351562, |
|
"loss": 0.0796, |
|
"objective": 0.0774383395910263, |
|
"ranking_idealized": 0.5333333611488342, |
|
"ranking_idealized_expo": 0.5291666388511658, |
|
"ranking_simple": 0.5291666388511658, |
|
"regularize": 0.07743828743696213, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 4.818138875767596, |
|
"eval_dpo_loss": 0.7514771223068237, |
|
"eval_logits": -1.2688941955566406, |
|
"eval_logps": -87.71552276611328, |
|
"eval_loss": 0.7094753384590149, |
|
"eval_objective": 0.711796224117279, |
|
"eval_ranking_idealized": 0.5247933864593506, |
|
"eval_ranking_idealized_expo": 0.5092975497245789, |
|
"eval_ranking_simple": 0.5123966932296753, |
|
"eval_regularize": 0.711796224117279, |
|
"eval_runtime": 259.5779, |
|
"eval_samples_per_second": 22.305, |
|
"eval_steps_per_second": 0.932, |
|
"step": 850 |
|
}, |
|
{ |
|
"dpo_loss": 0.5401983857154846, |
|
"epoch": 4.846480869154464, |
|
"grad_norm": 92.34602743429262, |
|
"learning_rate": 2.4564813733932155e-09, |
|
"logits": -1.205161690711975, |
|
"logps": -80.62550354003906, |
|
"loss": 0.08, |
|
"objective": 0.08250019699335098, |
|
"ranking_idealized": 0.5583333373069763, |
|
"ranking_idealized_expo": 0.5375000238418579, |
|
"ranking_simple": 0.5625, |
|
"regularize": 0.08250019699335098, |
|
"step": 855 |
|
}, |
|
{ |
|
"dpo_loss": 0.5175269246101379, |
|
"epoch": 4.874822862541333, |
|
"grad_norm": 99.70852124493375, |
|
"learning_rate": 1.5726120240288631e-09, |
|
"logits": -1.1921393871307373, |
|
"logps": -80.51878356933594, |
|
"loss": 0.0814, |
|
"objective": 0.07799576222896576, |
|
"ranking_idealized": 0.5666666626930237, |
|
"ranking_idealized_expo": 0.550000011920929, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.07799576222896576, |
|
"step": 860 |
|
}, |
|
{ |
|
"dpo_loss": 0.5134026408195496, |
|
"epoch": 4.903164855928201, |
|
"grad_norm": 93.71216642807723, |
|
"learning_rate": 8.847972820693051e-10, |
|
"logits": -1.2603986263275146, |
|
"logps": -79.67554473876953, |
|
"loss": 0.0745, |
|
"objective": 0.07266159355640411, |
|
"ranking_idealized": 0.5083333253860474, |
|
"ranking_idealized_expo": 0.4958333373069763, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 0.07266159355640411, |
|
"step": 865 |
|
}, |
|
{ |
|
"dpo_loss": 0.5160818099975586, |
|
"epoch": 4.931506849315069, |
|
"grad_norm": 95.34876322242407, |
|
"learning_rate": 3.933076969516724e-10, |
|
"logits": -1.29552161693573, |
|
"logps": -81.27151489257812, |
|
"loss": 0.0794, |
|
"objective": 0.08014672249555588, |
|
"ranking_idealized": 0.5375000238418579, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.5333333611488342, |
|
"regularize": 0.08014672249555588, |
|
"step": 870 |
|
}, |
|
{ |
|
"dpo_loss": 0.522140622138977, |
|
"epoch": 4.959848842701937, |
|
"grad_norm": 95.71079860788076, |
|
"learning_rate": 9.833659432367803e-11, |
|
"logits": -1.2183899879455566, |
|
"logps": -81.16183471679688, |
|
"loss": 0.081, |
|
"objective": 0.08778008073568344, |
|
"ranking_idealized": 0.574999988079071, |
|
"ranking_idealized_expo": 0.5458333492279053, |
|
"ranking_simple": 0.574999988079071, |
|
"regularize": 0.08778008073568344, |
|
"step": 875 |
|
}, |
|
{ |
|
"dpo_loss": 0.5225833058357239, |
|
"epoch": 4.988190836088805, |
|
"grad_norm": 104.51115430406888, |
|
"learning_rate": 0.0, |
|
"logits": -1.2887017726898193, |
|
"logps": -81.99433898925781, |
|
"loss": 0.0809, |
|
"objective": 0.0927945151925087, |
|
"ranking_idealized": 0.5375000238418579, |
|
"ranking_idealized_expo": 0.5291666388511658, |
|
"ranking_simple": 0.5375000238418579, |
|
"regularize": 0.0927945151925087, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 4.988190836088805, |
|
"step": 880, |
|
"total_flos": 0.0, |
|
"train_loss": 0.3223599465055899, |
|
"train_runtime": 35128.3246, |
|
"train_samples_per_second": 7.231, |
|
"train_steps_per_second": 0.025 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 880, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": false, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|