hZzy's picture
Model save
f487054 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 4.988190836088805,
"eval_steps": 50,
"global_step": 880,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"dpo_loss": 0.6931471824645996,
"epoch": 0.005668398677373642,
"grad_norm": 1341.3653828621927,
"learning_rate": 1.1363636363636363e-08,
"logits": -1.3147305250167847,
"logps": -88.0877456665039,
"loss": 0.4113,
"objective": 0.41588976979255676,
"ranking_idealized": 0.5208333134651184,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.5208333134651184,
"regularize": 0.41588976979255676,
"step": 1
},
{
"dpo_loss": 0.6927290558815002,
"epoch": 0.02834199338686821,
"grad_norm": 1318.7970843939129,
"learning_rate": 5.6818181818181815e-08,
"logits": -1.367867350578308,
"logps": -84.43714141845703,
"loss": 0.4128,
"objective": 0.3797340393066406,
"ranking_idealized": 0.546875,
"ranking_idealized_expo": 0.546875,
"ranking_simple": 0.546875,
"regularize": 0.3797340393066406,
"step": 5
},
{
"dpo_loss": 0.66615891456604,
"epoch": 0.05668398677373642,
"grad_norm": 1529.7102214452402,
"learning_rate": 1.1363636363636363e-07,
"logits": -1.446859359741211,
"logps": -83.48344421386719,
"loss": 0.4289,
"objective": 0.4494988024234772,
"ranking_idealized": 0.512499988079071,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.5083333253860474,
"regularize": 0.4494988024234772,
"step": 10
},
{
"dpo_loss": 0.6892092823982239,
"epoch": 0.08502598016060463,
"grad_norm": 1398.9545573108187,
"learning_rate": 1.7045454545454543e-07,
"logits": -1.410345196723938,
"logps": -83.83523559570312,
"loss": 0.5083,
"objective": 0.5087102055549622,
"ranking_idealized": 0.5041666626930237,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.5083333253860474,
"regularize": 0.5087102055549622,
"step": 15
},
{
"dpo_loss": 0.737316906452179,
"epoch": 0.11336797354747284,
"grad_norm": 1480.9989705702396,
"learning_rate": 2.2727272727272726e-07,
"logits": -1.397745966911316,
"logps": -84.64732360839844,
"loss": 0.5834,
"objective": 0.6373786330223083,
"ranking_idealized": 0.5375000238418579,
"ranking_idealized_expo": 0.5375000238418579,
"ranking_simple": 0.5375000238418579,
"regularize": 0.6373786330223083,
"step": 20
},
{
"dpo_loss": 0.6649187803268433,
"epoch": 0.14170996693434104,
"grad_norm": 1589.2205546755508,
"learning_rate": 2.840909090909091e-07,
"logits": -1.4405299425125122,
"logps": -84.5063705444336,
"loss": 0.7072,
"objective": 0.7172243595123291,
"ranking_idealized": 0.4958333373069763,
"ranking_idealized_expo": 0.4958333373069763,
"ranking_simple": 0.4958333373069763,
"regularize": 0.7172243595123291,
"step": 25
},
{
"dpo_loss": 0.7319389581680298,
"epoch": 0.17005196032120926,
"grad_norm": 1608.2967040181402,
"learning_rate": 3.4090909090909085e-07,
"logits": -1.4063345193862915,
"logps": -85.10441589355469,
"loss": 0.9251,
"objective": 0.919275164604187,
"ranking_idealized": 0.5416666865348816,
"ranking_idealized_expo": 0.5416666865348816,
"ranking_simple": 0.5416666865348816,
"regularize": 0.919275164604187,
"step": 30
},
{
"dpo_loss": 0.7343574166297913,
"epoch": 0.19839395370807747,
"grad_norm": 1660.500800571558,
"learning_rate": 3.977272727272727e-07,
"logits": -1.3780211210250854,
"logps": -83.85320281982422,
"loss": 1.1082,
"objective": 1.1600453853607178,
"ranking_idealized": 0.5041666626930237,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.5,
"regularize": 1.1600453853607178,
"step": 35
},
{
"dpo_loss": 0.7147431969642639,
"epoch": 0.22673594709494568,
"grad_norm": 1529.1766380039583,
"learning_rate": 4.545454545454545e-07,
"logits": -1.3637299537658691,
"logps": -84.27665710449219,
"loss": 1.1991,
"objective": 1.0680582523345947,
"ranking_idealized": 0.5208333134651184,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.5208333134651184,
"regularize": 1.0680582523345947,
"step": 40
},
{
"dpo_loss": 0.8464261889457703,
"epoch": 0.25507794048181387,
"grad_norm": 1547.4803845815225,
"learning_rate": 5.113636363636363e-07,
"logits": -1.483019471168518,
"logps": -85.49544525146484,
"loss": 1.4787,
"objective": 1.7124279737472534,
"ranking_idealized": 0.5416666865348816,
"ranking_idealized_expo": 0.5375000238418579,
"ranking_simple": 0.5416666865348816,
"regularize": 1.7124279737472534,
"step": 45
},
{
"dpo_loss": 0.9592286348342896,
"epoch": 0.2834199338686821,
"grad_norm": 1190.1245065397072,
"learning_rate": 5.681818181818182e-07,
"logits": -1.3799251317977905,
"logps": -83.56061553955078,
"loss": 1.7171,
"objective": 1.6765538454055786,
"ranking_idealized": 0.42500001192092896,
"ranking_idealized_expo": 0.42500001192092896,
"ranking_simple": 0.42500001192092896,
"regularize": 1.6765538454055786,
"step": 50
},
{
"epoch": 0.2834199338686821,
"eval_dpo_loss": 0.8390654921531677,
"eval_logits": -1.3979839086532593,
"eval_logps": -91.42163848876953,
"eval_loss": 0.9451757073402405,
"eval_objective": 0.9804208874702454,
"eval_ranking_idealized": 0.5092975497245789,
"eval_ranking_idealized_expo": 0.5092975497245789,
"eval_ranking_simple": 0.5113636255264282,
"eval_regularize": 0.9804208874702454,
"eval_runtime": 260.041,
"eval_samples_per_second": 22.266,
"eval_steps_per_second": 0.931,
"step": 50
},
{
"dpo_loss": 1.1626336574554443,
"epoch": 0.3117619272555503,
"grad_norm": 1379.607675790991,
"learning_rate": 6.249999999999999e-07,
"logits": -1.4113659858703613,
"logps": -85.0433578491211,
"loss": 1.963,
"objective": 2.1214394569396973,
"ranking_idealized": 0.5416666865348816,
"ranking_idealized_expo": 0.5416666865348816,
"ranking_simple": 0.5416666865348816,
"regularize": 2.1214394569396973,
"step": 55
},
{
"dpo_loss": 1.1583107709884644,
"epoch": 0.3401039206424185,
"grad_norm": 1583.6531529945255,
"learning_rate": 6.818181818181817e-07,
"logits": -1.387775182723999,
"logps": -83.92105102539062,
"loss": 2.1311,
"objective": 2.187351703643799,
"ranking_idealized": 0.5166666507720947,
"ranking_idealized_expo": 0.512499988079071,
"ranking_simple": 0.5041666626930237,
"regularize": 2.187351703643799,
"step": 60
},
{
"dpo_loss": 1.1024636030197144,
"epoch": 0.3684459140292867,
"grad_norm": 1400.9200642265332,
"learning_rate": 7.386363636363636e-07,
"logits": -1.3549267053604126,
"logps": -84.21534729003906,
"loss": 2.5118,
"objective": 2.5817580223083496,
"ranking_idealized": 0.5083333253860474,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.5041666626930237,
"regularize": 2.5817580223083496,
"step": 65
},
{
"dpo_loss": 1.401374340057373,
"epoch": 0.39678790741615494,
"grad_norm": 1397.7577328959405,
"learning_rate": 7.954545454545454e-07,
"logits": -1.434369444847107,
"logps": -85.42965698242188,
"loss": 2.8155,
"objective": 2.7807960510253906,
"ranking_idealized": 0.5166666507720947,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5083333253860474,
"regularize": 2.7807960510253906,
"step": 70
},
{
"dpo_loss": 1.3847529888153076,
"epoch": 0.42512990080302315,
"grad_norm": 1574.992862602621,
"learning_rate": 8.522727272727273e-07,
"logits": -1.4534552097320557,
"logps": -85.975341796875,
"loss": 3.0239,
"objective": 2.7482104301452637,
"ranking_idealized": 0.5458333492279053,
"ranking_idealized_expo": 0.5458333492279053,
"ranking_simple": 0.5458333492279053,
"regularize": 2.7482104301452637,
"step": 75
},
{
"dpo_loss": 1.6628150939941406,
"epoch": 0.45347189418989137,
"grad_norm": 1317.6328063663461,
"learning_rate": 9.09090909090909e-07,
"logits": -1.4975560903549194,
"logps": -85.59205627441406,
"loss": 3.0523,
"objective": 3.2058732509613037,
"ranking_idealized": 0.5166666507720947,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5166666507720947,
"regularize": 3.2058732509613037,
"step": 80
},
{
"dpo_loss": 1.5282264947891235,
"epoch": 0.4818138875767596,
"grad_norm": 1409.5000185848141,
"learning_rate": 9.65909090909091e-07,
"logits": -1.4272305965423584,
"logps": -83.47660827636719,
"loss": 3.6628,
"objective": 3.7103846073150635,
"ranking_idealized": 0.5249999761581421,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.5208333134651184,
"regularize": 3.7103846073150635,
"step": 85
},
{
"dpo_loss": 2.017771005630493,
"epoch": 0.5101558809636277,
"grad_norm": 1413.0881700978623,
"learning_rate": 9.999842657116664e-07,
"logits": -1.2261414527893066,
"logps": -84.69164276123047,
"loss": 3.9801,
"objective": 3.7396154403686523,
"ranking_idealized": 0.4791666567325592,
"ranking_idealized_expo": 0.4791666567325592,
"ranking_simple": 0.4749999940395355,
"regularize": 3.7396154403686523,
"step": 90
},
{
"dpo_loss": 2.145413875579834,
"epoch": 0.538497874350496,
"grad_norm": 1451.2644391659398,
"learning_rate": 9.998072663403656e-07,
"logits": -1.3078831434249878,
"logps": -83.98829650878906,
"loss": 4.1074,
"objective": 4.225299835205078,
"ranking_idealized": 0.5458333492279053,
"ranking_idealized_expo": 0.5416666865348816,
"ranking_simple": 0.550000011920929,
"regularize": 4.225299835205078,
"step": 95
},
{
"dpo_loss": 2.2973792552948,
"epoch": 0.5668398677373642,
"grad_norm": 1458.4336577249642,
"learning_rate": 9.99433669591504e-07,
"logits": -1.3813899755477905,
"logps": -85.42733001708984,
"loss": 4.4116,
"objective": 4.421018600463867,
"ranking_idealized": 0.5416666865348816,
"ranking_idealized_expo": 0.5416666865348816,
"ranking_simple": 0.5416666865348816,
"regularize": 4.421018600463867,
"step": 100
},
{
"epoch": 0.5668398677373642,
"eval_dpo_loss": 1.3937046527862549,
"eval_logits": -1.3645607233047485,
"eval_logps": -91.35843658447266,
"eval_loss": 2.288909912109375,
"eval_objective": 2.2847275733947754,
"eval_ranking_idealized": 0.5092975497245789,
"eval_ranking_idealized_expo": 0.5092975497245789,
"eval_ranking_simple": 0.5144628286361694,
"eval_regularize": 2.2847275733947754,
"eval_runtime": 259.4011,
"eval_samples_per_second": 22.321,
"eval_steps_per_second": 0.933,
"step": 100
},
{
"dpo_loss": 2.3384857177734375,
"epoch": 0.5951818611242324,
"grad_norm": 1377.8614811370987,
"learning_rate": 9.988636224180095e-07,
"logits": -1.2764217853546143,
"logps": -85.17194366455078,
"loss": 4.8397,
"objective": 4.943901062011719,
"ranking_idealized": 0.5375000238418579,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.5541666746139526,
"regularize": 4.943901062011719,
"step": 105
},
{
"dpo_loss": 2.419879198074341,
"epoch": 0.6235238545111006,
"grad_norm": 1709.2314342435861,
"learning_rate": 9.980973490458728e-07,
"logits": -1.4455102682113647,
"logps": -84.0779037475586,
"loss": 4.9241,
"objective": 4.433398723602295,
"ranking_idealized": 0.4541666805744171,
"ranking_idealized_expo": 0.44999998807907104,
"ranking_simple": 0.44583332538604736,
"regularize": 4.433398723602295,
"step": 110
},
{
"dpo_loss": 2.3997786045074463,
"epoch": 0.6518658478979689,
"grad_norm": 1267.7408034843309,
"learning_rate": 9.971351508859486e-07,
"logits": -1.403380036354065,
"logps": -83.38719940185547,
"loss": 4.8313,
"objective": 4.724060535430908,
"ranking_idealized": 0.4833333194255829,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.48750001192092896,
"regularize": 4.724060535430908,
"step": 115
},
{
"dpo_loss": 2.01283597946167,
"epoch": 0.680207841284837,
"grad_norm": 1334.5055640243738,
"learning_rate": 9.959774064153975e-07,
"logits": -1.3471440076828003,
"logps": -84.59120178222656,
"loss": 4.9092,
"objective": 4.763902187347412,
"ranking_idealized": 0.5375000238418579,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.5333333611488342,
"regularize": 4.763902187347412,
"step": 120
},
{
"dpo_loss": 2.8711302280426025,
"epoch": 0.7085498346717053,
"grad_norm": 1217.4260816961178,
"learning_rate": 9.94624571028813e-07,
"logits": -1.2994908094406128,
"logps": -83.49886322021484,
"loss": 5.2406,
"objective": 5.268767356872559,
"ranking_idealized": 0.550000011920929,
"ranking_idealized_expo": 0.550000011920929,
"ranking_simple": 0.550000011920929,
"regularize": 5.268767356872559,
"step": 125
},
{
"dpo_loss": 2.1492836475372314,
"epoch": 0.7368918280585735,
"grad_norm": 1181.5936213316104,
"learning_rate": 9.930771768590933e-07,
"logits": -1.4135076999664307,
"logps": -82.80963897705078,
"loss": 4.7897,
"objective": 4.472428798675537,
"ranking_idealized": 0.44999998807907104,
"ranking_idealized_expo": 0.44999998807907104,
"ranking_simple": 0.44999998807907104,
"regularize": 4.472428798675537,
"step": 130
},
{
"dpo_loss": 2.715928554534912,
"epoch": 0.7652338214454416,
"grad_norm": 1143.7481041860115,
"learning_rate": 9.91335832568129e-07,
"logits": -1.387623906135559,
"logps": -84.99431610107422,
"loss": 5.1988,
"objective": 5.63712215423584,
"ranking_idealized": 0.5291666388511658,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.5291666388511658,
"regularize": 5.63712215423584,
"step": 135
},
{
"dpo_loss": 2.748910665512085,
"epoch": 0.7935758148323099,
"grad_norm": 1147.9091652909822,
"learning_rate": 9.894012231073895e-07,
"logits": -1.2905735969543457,
"logps": -84.26557922363281,
"loss": 5.4168,
"objective": 5.596283912658691,
"ranking_idealized": 0.5041666626930237,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.512499988079071,
"regularize": 5.596283912658691,
"step": 140
},
{
"dpo_loss": 2.3077232837677,
"epoch": 0.821917808219178,
"grad_norm": 1202.3074237963756,
"learning_rate": 9.872741094484964e-07,
"logits": -1.3657087087631226,
"logps": -83.96611785888672,
"loss": 5.3248,
"objective": 4.7676682472229,
"ranking_idealized": 0.4833333194255829,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.48750001192092896,
"regularize": 4.7676682472229,
"step": 145
},
{
"dpo_loss": 2.2885777950286865,
"epoch": 0.8502598016060463,
"grad_norm": 1261.3028504954063,
"learning_rate": 9.849553282839024e-07,
"logits": -1.350846767425537,
"logps": -82.00806427001953,
"loss": 5.641,
"objective": 5.300591945648193,
"ranking_idealized": 0.5416666865348816,
"ranking_idealized_expo": 0.5375000238418579,
"ranking_simple": 0.5416666865348816,
"regularize": 5.300591945648193,
"step": 150
},
{
"epoch": 0.8502598016060463,
"eval_dpo_loss": 1.8989161252975464,
"eval_logits": -1.361150860786438,
"eval_logps": -89.60128784179688,
"eval_loss": 3.659212827682495,
"eval_objective": 3.6993324756622314,
"eval_ranking_idealized": 0.5092975497245789,
"eval_ranking_idealized_expo": 0.5092975497245789,
"eval_ranking_simple": 0.5123966932296753,
"eval_regularize": 3.6993324756622314,
"eval_runtime": 258.9031,
"eval_samples_per_second": 22.364,
"eval_steps_per_second": 0.935,
"step": 150
},
{
"dpo_loss": 2.7537026405334473,
"epoch": 0.8786017949929145,
"grad_norm": 1297.1182521614555,
"learning_rate": 9.824457916977784e-07,
"logits": -1.358762264251709,
"logps": -81.92320251464844,
"loss": 5.4151,
"objective": 5.554434776306152,
"ranking_idealized": 0.512499988079071,
"ranking_idealized_expo": 0.512499988079071,
"ranking_simple": 0.5249999761581421,
"regularize": 5.554434776306152,
"step": 155
},
{
"dpo_loss": 2.6335387229919434,
"epoch": 0.9069437883797827,
"grad_norm": 1187.7670981291164,
"learning_rate": 9.797464868072486e-07,
"logits": -1.2611459493637085,
"logps": -83.41938018798828,
"loss": 5.5854,
"objective": 5.695128917694092,
"ranking_idealized": 0.4958333373069763,
"ranking_idealized_expo": 0.4958333373069763,
"ranking_simple": 0.48750001192092896,
"regularize": 5.695128917694092,
"step": 160
},
{
"dpo_loss": 2.661656618118286,
"epoch": 0.9352857817666509,
"grad_norm": 1120.1403651445853,
"learning_rate": 9.768584753741134e-07,
"logits": -1.2767577171325684,
"logps": -84.16160583496094,
"loss": 5.4701,
"objective": 5.190924644470215,
"ranking_idealized": 0.5208333134651184,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.5208333134651184,
"regularize": 5.190924644470215,
"step": 165
},
{
"dpo_loss": 3.1157445907592773,
"epoch": 0.9636277751535192,
"grad_norm": 1093.9866208390724,
"learning_rate": 9.737828933872073e-07,
"logits": -1.3006123304367065,
"logps": -84.45008087158203,
"loss": 5.3421,
"objective": 5.613492488861084,
"ranking_idealized": 0.512499988079071,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.5,
"regularize": 5.613492488861084,
"step": 170
},
{
"dpo_loss": 2.3348023891448975,
"epoch": 0.9919697685403873,
"grad_norm": 1151.3293772515624,
"learning_rate": 9.705209506155634e-07,
"logits": -1.2408747673034668,
"logps": -84.64601135253906,
"loss": 5.2759,
"objective": 4.945895671844482,
"ranking_idealized": 0.574999988079071,
"ranking_idealized_expo": 0.574999988079071,
"ranking_simple": 0.574999988079071,
"regularize": 4.945895671844482,
"step": 175
},
{
"dpo_loss": 2.864588737487793,
"epoch": 1.0203117619272555,
"grad_norm": 1140.0762636078218,
"learning_rate": 9.670739301325534e-07,
"logits": -1.3873549699783325,
"logps": -83.78386688232422,
"loss": 5.4744,
"objective": 5.529939651489258,
"ranking_idealized": 0.5416666865348816,
"ranking_idealized_expo": 0.5416666865348816,
"ranking_simple": 0.5375000238418579,
"regularize": 5.529939651489258,
"step": 180
},
{
"dpo_loss": 2.8053672313690186,
"epoch": 1.0486537553141237,
"grad_norm": 1077.7726094731238,
"learning_rate": 9.63443187811197e-07,
"logits": -1.2356277704238892,
"logps": -84.0350341796875,
"loss": 5.2551,
"objective": 5.298059463500977,
"ranking_idealized": 0.4833333194255829,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.48750001192092896,
"regularize": 5.298059463500977,
"step": 185
},
{
"dpo_loss": 2.420074462890625,
"epoch": 1.076995748700992,
"grad_norm": 1190.6128199162288,
"learning_rate": 9.596301517908328e-07,
"logits": -1.3124566078186035,
"logps": -84.89928436279297,
"loss": 5.3934,
"objective": 5.610664367675781,
"ranking_idealized": 0.5541666746139526,
"ranking_idealized_expo": 0.550000011920929,
"ranking_simple": 0.5458333492279053,
"regularize": 5.610664367675781,
"step": 190
},
{
"dpo_loss": 2.3999674320220947,
"epoch": 1.10533774208786,
"grad_norm": 1115.8540591706928,
"learning_rate": 9.556363219153662e-07,
"logits": -1.2911279201507568,
"logps": -84.42256927490234,
"loss": 5.3254,
"objective": 5.321680545806885,
"ranking_idealized": 0.5666666626930237,
"ranking_idealized_expo": 0.5666666626930237,
"ranking_simple": 0.5666666626930237,
"regularize": 5.321680545806885,
"step": 195
},
{
"dpo_loss": 2.4562041759490967,
"epoch": 1.1336797354747283,
"grad_norm": 1209.8487619605175,
"learning_rate": 9.514632691433106e-07,
"logits": -1.357124924659729,
"logps": -84.73216247558594,
"loss": 5.6662,
"objective": 5.265989303588867,
"ranking_idealized": 0.5083333253860474,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.5083333253860474,
"regularize": 5.265989303588867,
"step": 200
},
{
"epoch": 1.1336797354747283,
"eval_dpo_loss": 2.5621941089630127,
"eval_logits": -1.312853455543518,
"eval_logps": -91.82030487060547,
"eval_loss": 4.9017205238342285,
"eval_objective": 5.1433634757995605,
"eval_ranking_idealized": 0.5092975497245789,
"eval_ranking_idealized_expo": 0.5092975497245789,
"eval_ranking_simple": 0.5134297609329224,
"eval_regularize": 5.1433634757995605,
"eval_runtime": 259.021,
"eval_samples_per_second": 22.353,
"eval_steps_per_second": 0.934,
"step": 200
},
{
"dpo_loss": 2.70652174949646,
"epoch": 1.1620217288615966,
"grad_norm": 1176.9552446858397,
"learning_rate": 9.471126349298556e-07,
"logits": -1.3222142457962036,
"logps": -84.78860473632812,
"loss": 5.5162,
"objective": 5.902441501617432,
"ranking_idealized": 0.48750001192092896,
"ranking_idealized_expo": 0.48750001192092896,
"ranking_simple": 0.5,
"regularize": 5.902441501617432,
"step": 205
},
{
"dpo_loss": 2.1185081005096436,
"epoch": 1.1903637222484649,
"grad_norm": 1095.8529561562762,
"learning_rate": 9.425861305812081e-07,
"logits": -1.302480936050415,
"logps": -84.719482421875,
"loss": 5.425,
"objective": 5.670342922210693,
"ranking_idealized": 0.5,
"ranking_idealized_expo": 0.4958333373069763,
"ranking_simple": 0.5,
"regularize": 5.670342922210693,
"step": 210
},
{
"dpo_loss": 2.6837000846862793,
"epoch": 1.2187057156353331,
"grad_norm": 1062.9514542837012,
"learning_rate": 9.378855365814557e-07,
"logits": -1.273558497428894,
"logps": -84.21690368652344,
"loss": 5.3289,
"objective": 5.31361722946167,
"ranking_idealized": 0.48750001192092896,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.48750001192092896,
"regularize": 5.31361722946167,
"step": 215
},
{
"dpo_loss": 2.5074095726013184,
"epoch": 1.2470477090222012,
"grad_norm": 1053.1396201008674,
"learning_rate": 9.330127018922193e-07,
"logits": -1.1912199258804321,
"logps": -83.59181213378906,
"loss": 5.2229,
"objective": 5.1353912353515625,
"ranking_idealized": 0.6041666865348816,
"ranking_idealized_expo": 0.6000000238418579,
"ranking_simple": 0.5958333611488342,
"regularize": 5.1353912353515625,
"step": 220
},
{
"dpo_loss": 2.4020681381225586,
"epoch": 1.2753897024090695,
"grad_norm": 1047.2312940257925,
"learning_rate": 9.279695432253708e-07,
"logits": -1.2806742191314697,
"logps": -84.57674407958984,
"loss": 5.1377,
"objective": 5.066871643066406,
"ranking_idealized": 0.5291666388511658,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.5166666507720947,
"regularize": 5.066871643066406,
"step": 225
},
{
"dpo_loss": 2.466576337814331,
"epoch": 1.3037316957959377,
"grad_norm": 1030.5617485584146,
"learning_rate": 9.227580442891021e-07,
"logits": -1.2669168710708618,
"logps": -82.93086242675781,
"loss": 4.8601,
"objective": 4.961721420288086,
"ranking_idealized": 0.5083333253860474,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.512499988079071,
"regularize": 4.961721420288086,
"step": 230
},
{
"dpo_loss": 2.1171398162841797,
"epoch": 1.3320736891828058,
"grad_norm": 1061.077062757036,
"learning_rate": 9.173802550076401e-07,
"logits": -1.3311480283737183,
"logps": -81.57727813720703,
"loss": 4.9903,
"objective": 4.573681354522705,
"ranking_idealized": 0.4833333194255829,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.4958333373069763,
"regularize": 4.573681354522705,
"step": 235
},
{
"dpo_loss": 2.8134536743164062,
"epoch": 1.360415682569674,
"grad_norm": 970.3779104307249,
"learning_rate": 9.118382907149163e-07,
"logits": -1.267702579498291,
"logps": -83.0981216430664,
"loss": 5.0505,
"objective": 5.230247974395752,
"ranking_idealized": 0.4791666567325592,
"ranking_idealized_expo": 0.4791666567325592,
"ranking_simple": 0.4791666567325592,
"regularize": 5.230247974395752,
"step": 240
},
{
"dpo_loss": 2.3336708545684814,
"epoch": 1.3887576759565423,
"grad_norm": 1037.7314557451798,
"learning_rate": 9.061343313225087e-07,
"logits": -1.2927136421203613,
"logps": -82.71648406982422,
"loss": 4.97,
"objective": 4.810959815979004,
"ranking_idealized": 0.5208333134651184,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5208333134651184,
"regularize": 4.810959815979004,
"step": 245
},
{
"dpo_loss": 1.7325116395950317,
"epoch": 1.4170996693434104,
"grad_norm": 1022.8469212904224,
"learning_rate": 9.002706204621802e-07,
"logits": -1.2380987405776978,
"logps": -82.20935821533203,
"loss": 5.0544,
"objective": 4.453593730926514,
"ranking_idealized": 0.47083333134651184,
"ranking_idealized_expo": 0.47083333134651184,
"ranking_simple": 0.47083333134651184,
"regularize": 4.453593730926514,
"step": 250
},
{
"epoch": 1.4170996693434104,
"eval_dpo_loss": 2.388360023498535,
"eval_logits": -1.2957789897918701,
"eval_logps": -89.6596450805664,
"eval_loss": 4.645730495452881,
"eval_objective": 4.698073387145996,
"eval_ranking_idealized": 0.5092975497245789,
"eval_ranking_idealized_expo": 0.5092975497245789,
"eval_ranking_simple": 0.5092975497245789,
"eval_regularize": 4.698073387145996,
"eval_runtime": 258.9175,
"eval_samples_per_second": 22.362,
"eval_steps_per_second": 0.935,
"step": 250
},
{
"dpo_loss": 2.6409242153167725,
"epoch": 1.4454416627302786,
"grad_norm": 1045.3372739075203,
"learning_rate": 8.942494646033554e-07,
"logits": -1.248718500137329,
"logps": -83.6023941040039,
"loss": 5.3714,
"objective": 5.307827949523926,
"ranking_idealized": 0.550000011920929,
"ranking_idealized_expo": 0.5458333492279053,
"ranking_simple": 0.5375000238418579,
"regularize": 5.307827949523926,
"step": 255
},
{
"dpo_loss": 2.64563250541687,
"epoch": 1.473783656117147,
"grad_norm": 1197.986844648104,
"learning_rate": 8.880732321458784e-07,
"logits": -1.2879093885421753,
"logps": -84.90482330322266,
"loss": 5.0662,
"objective": 5.523282051086426,
"ranking_idealized": 0.48750001192092896,
"ranking_idealized_expo": 0.48750001192092896,
"ranking_simple": 0.4791666567325592,
"regularize": 5.523282051086426,
"step": 260
},
{
"dpo_loss": 2.4120934009552,
"epoch": 1.5021256495040152,
"grad_norm": 1129.3043637151231,
"learning_rate": 8.817443524884117e-07,
"logits": -1.2617005109786987,
"logps": -83.68741607666016,
"loss": 5.09,
"objective": 4.771634578704834,
"ranking_idealized": 0.5041666626930237,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.5,
"regularize": 4.771634578704834,
"step": 265
},
{
"dpo_loss": 3.012559175491333,
"epoch": 1.5304676428908834,
"grad_norm": 1023.223627517731,
"learning_rate": 8.752653150728411e-07,
"logits": -1.284982442855835,
"logps": -84.35843658447266,
"loss": 5.3376,
"objective": 5.6197028160095215,
"ranking_idealized": 0.5083333253860474,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.5041666626930237,
"regularize": 5.6197028160095215,
"step": 270
},
{
"dpo_loss": 2.465777635574341,
"epoch": 1.5588096362777515,
"grad_norm": 1088.952583153439,
"learning_rate": 8.68638668405062e-07,
"logits": -1.2928842306137085,
"logps": -84.76825714111328,
"loss": 5.0697,
"objective": 4.782662868499756,
"ranking_idealized": 0.5249999761581421,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.512499988079071,
"regularize": 4.782662868499756,
"step": 275
},
{
"dpo_loss": 2.743584156036377,
"epoch": 1.5871516296646198,
"grad_norm": 1199.9945832988806,
"learning_rate": 8.61867019052535e-07,
"logits": -1.212363362312317,
"logps": -82.49223327636719,
"loss": 4.971,
"objective": 5.158485412597656,
"ranking_idealized": 0.5041666626930237,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.5041666626930237,
"regularize": 5.158485412597656,
"step": 280
},
{
"dpo_loss": 2.19926381111145,
"epoch": 1.615493623051488,
"grad_norm": 1072.4621792134556,
"learning_rate": 8.549530306190014e-07,
"logits": -1.3124572038650513,
"logps": -84.11770629882812,
"loss": 4.7979,
"objective": 4.820002555847168,
"ranking_idealized": 0.5208333134651184,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5249999761581421,
"regularize": 4.820002555847168,
"step": 285
},
{
"dpo_loss": 2.2039902210235596,
"epoch": 1.643835616438356,
"grad_norm": 1037.1720137680302,
"learning_rate": 8.478994226967638e-07,
"logits": -1.331601619720459,
"logps": -82.66283416748047,
"loss": 4.7089,
"objective": 4.694666385650635,
"ranking_idealized": 0.5625,
"ranking_idealized_expo": 0.5625,
"ranking_simple": 0.5583333373069763,
"regularize": 4.694666385650635,
"step": 290
},
{
"dpo_loss": 2.586439847946167,
"epoch": 1.6721776098252243,
"grad_norm": 1019.0647957946281,
"learning_rate": 8.407089697969456e-07,
"logits": -1.2327104806900024,
"logps": -81.66007995605469,
"loss": 4.8644,
"objective": 4.790833473205566,
"ranking_idealized": 0.4833333194255829,
"ranking_idealized_expo": 0.4791666567325592,
"ranking_simple": 0.49166667461395264,
"regularize": 4.790833473205566,
"step": 295
},
{
"dpo_loss": 2.176333427429199,
"epoch": 1.7005196032120926,
"grad_norm": 1054.8932248051876,
"learning_rate": 8.333845002581458e-07,
"logits": -1.3139069080352783,
"logps": -83.0549087524414,
"loss": 4.799,
"objective": 4.67409610748291,
"ranking_idealized": 0.574999988079071,
"ranking_idealized_expo": 0.574999988079071,
"ranking_simple": 0.5708333253860474,
"regularize": 4.67409610748291,
"step": 300
},
{
"epoch": 1.7005196032120926,
"eval_dpo_loss": 2.5370917320251465,
"eval_logits": -1.312793254852295,
"eval_logps": -89.6458969116211,
"eval_loss": 5.069702625274658,
"eval_objective": 5.148064136505127,
"eval_ranking_idealized": 0.5092975497245789,
"eval_ranking_idealized_expo": 0.5092975497245789,
"eval_ranking_simple": 0.5113636255264282,
"eval_regularize": 5.148064136505127,
"eval_runtime": 259.1048,
"eval_samples_per_second": 22.346,
"eval_steps_per_second": 0.934,
"step": 300
},
{
"dpo_loss": 2.535557985305786,
"epoch": 1.7288615965989607,
"grad_norm": 1059.8267179953498,
"learning_rate": 8.259288951339232e-07,
"logits": -1.2971898317337036,
"logps": -83.48696899414062,
"loss": 4.7904,
"objective": 5.117157459259033,
"ranking_idealized": 0.5083333253860474,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.512499988079071,
"regularize": 5.117157459259033,
"step": 305
},
{
"dpo_loss": 2.743206024169922,
"epoch": 1.7572035899858292,
"grad_norm": 1062.5530460229647,
"learning_rate": 8.183450870595441e-07,
"logits": -1.3307418823242188,
"logps": -81.59162902832031,
"loss": 4.7455,
"objective": 4.8372015953063965,
"ranking_idealized": 0.5249999761581421,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.5166666507720947,
"regularize": 4.8372015953063965,
"step": 310
},
{
"dpo_loss": 2.1805214881896973,
"epoch": 1.7855455833726972,
"grad_norm": 967.1673859822481,
"learning_rate": 8.106360590984404e-07,
"logits": -1.2439404726028442,
"logps": -82.7205810546875,
"loss": 4.5159,
"objective": 4.586319923400879,
"ranking_idealized": 0.5083333253860474,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.512499988079071,
"regularize": 4.586319923400879,
"step": 315
},
{
"dpo_loss": 2.943455219268799,
"epoch": 1.8138875767595655,
"grad_norm": 1040.3088755706913,
"learning_rate": 8.028048435688333e-07,
"logits": -1.3003054857254028,
"logps": -83.96493530273438,
"loss": 4.8496,
"objective": 5.258904933929443,
"ranking_idealized": 0.5041666626930237,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.5083333253860474,
"regularize": 5.258904933929443,
"step": 320
},
{
"dpo_loss": 1.8657586574554443,
"epoch": 1.8422295701464337,
"grad_norm": 1028.4810077841332,
"learning_rate": 7.948545208509811e-07,
"logits": -1.3532642126083374,
"logps": -85.16321563720703,
"loss": 4.6322,
"objective": 4.409180164337158,
"ranking_idealized": 0.5249999761581421,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5291666388511658,
"regularize": 4.409180164337158,
"step": 325
},
{
"dpo_loss": 2.035325050354004,
"epoch": 1.8705715635333018,
"grad_norm": 981.4379644106098,
"learning_rate": 7.86788218175523e-07,
"logits": -1.124271035194397,
"logps": -81.62163543701172,
"loss": 4.5108,
"objective": 4.548847675323486,
"ranking_idealized": 0.4749999940395355,
"ranking_idealized_expo": 0.47083333134651184,
"ranking_simple": 0.4791666567325592,
"regularize": 4.548847675323486,
"step": 330
},
{
"dpo_loss": 2.2992029190063477,
"epoch": 1.89891355692017,
"grad_norm": 1060.3744283391652,
"learning_rate": 7.786091083933949e-07,
"logits": -1.2721890211105347,
"logps": -81.50038146972656,
"loss": 4.574,
"objective": 4.452338218688965,
"ranking_idealized": 0.4958333373069763,
"ranking_idealized_expo": 0.4958333373069763,
"ranking_simple": 0.5041666626930237,
"regularize": 4.452338218688965,
"step": 335
},
{
"dpo_loss": 2.214193820953369,
"epoch": 1.9272555503070383,
"grad_norm": 1066.9038354372583,
"learning_rate": 7.703204087277988e-07,
"logits": -1.2717024087905884,
"logps": -82.89705657958984,
"loss": 4.1923,
"objective": 4.533308029174805,
"ranking_idealized": 0.5416666865348816,
"ranking_idealized_expo": 0.5416666865348816,
"ranking_simple": 0.5416666865348816,
"regularize": 4.533308029174805,
"step": 340
},
{
"dpo_loss": 1.864801287651062,
"epoch": 1.9555975436939064,
"grad_norm": 1068.1959237278584,
"learning_rate": 7.619253795087208e-07,
"logits": -1.2882568836212158,
"logps": -81.01626586914062,
"loss": 4.3847,
"objective": 4.10886812210083,
"ranking_idealized": 0.5333333611488342,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.5333333611488342,
"regularize": 4.10886812210083,
"step": 345
},
{
"dpo_loss": 1.7610963582992554,
"epoch": 1.9839395370807746,
"grad_norm": 1065.3539167654192,
"learning_rate": 7.534273228904915e-07,
"logits": -1.2216317653656006,
"logps": -82.78074645996094,
"loss": 4.3968,
"objective": 4.256522178649902,
"ranking_idealized": 0.48750001192092896,
"ranking_idealized_expo": 0.48750001192092896,
"ranking_simple": 0.5,
"regularize": 4.256522178649902,
"step": 350
},
{
"epoch": 1.9839395370807746,
"eval_dpo_loss": 2.7970640659332275,
"eval_logits": -1.2878926992416382,
"eval_logps": -88.54591369628906,
"eval_loss": 5.4044508934021,
"eval_objective": 5.363577365875244,
"eval_ranking_idealized": 0.5092975497245789,
"eval_ranking_idealized_expo": 0.5092975497245789,
"eval_ranking_simple": 0.5103305578231812,
"eval_regularize": 5.363577365875244,
"eval_runtime": 258.9945,
"eval_samples_per_second": 22.356,
"eval_steps_per_second": 0.934,
"step": 350
},
{
"dpo_loss": 2.0882179737091064,
"epoch": 2.012281530467643,
"grad_norm": 1012.7784752507871,
"learning_rate": 7.448295815528956e-07,
"logits": -1.180530309677124,
"logps": -82.18423461914062,
"loss": 4.222,
"objective": 4.049466609954834,
"ranking_idealized": 0.512499988079071,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.49166667461395264,
"regularize": 4.049466609954834,
"step": 355
},
{
"dpo_loss": 2.207961082458496,
"epoch": 2.040623523854511,
"grad_norm": 991.7225128541588,
"learning_rate": 7.361355373863413e-07,
"logits": -1.2234774827957153,
"logps": -80.75540161132812,
"loss": 4.1947,
"objective": 4.063844680786133,
"ranking_idealized": 0.5333333611488342,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.5375000238418579,
"regularize": 4.063844680786133,
"step": 360
},
{
"dpo_loss": 1.986746907234192,
"epoch": 2.0689655172413794,
"grad_norm": 1102.1948086047603,
"learning_rate": 7.273486101616056e-07,
"logits": -1.2934725284576416,
"logps": -83.87660217285156,
"loss": 4.2559,
"objective": 4.076398849487305,
"ranking_idealized": 0.5041666626930237,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.5208333134651184,
"regularize": 4.076398849487305,
"step": 365
},
{
"dpo_loss": 1.9069340229034424,
"epoch": 2.0973075106282475,
"grad_norm": 1148.422426893069,
"learning_rate": 7.184722561846797e-07,
"logits": -1.2647373676300049,
"logps": -80.90840911865234,
"loss": 3.9351,
"objective": 3.6877379417419434,
"ranking_idealized": 0.4833333194255829,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.49166667461395264,
"regularize": 3.6877379417419434,
"step": 370
},
{
"dpo_loss": 1.905211329460144,
"epoch": 2.1256495040151155,
"grad_norm": 982.12339135048,
"learning_rate": 7.095099669372443e-07,
"logits": -1.2502344846725464,
"logps": -83.15164947509766,
"loss": 4.1889,
"objective": 3.8048832416534424,
"ranking_idealized": 0.5916666388511658,
"ranking_idealized_expo": 0.5874999761581421,
"ranking_simple": 0.6000000238418579,
"regularize": 3.8048832416534424,
"step": 375
},
{
"dpo_loss": 2.2980270385742188,
"epoch": 2.153991497401984,
"grad_norm": 1002.9053178903335,
"learning_rate": 7.004652677033068e-07,
"logits": -1.1937479972839355,
"logps": -82.11659240722656,
"loss": 4.0113,
"objective": 4.026199817657471,
"ranking_idealized": 0.5291666388511658,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.5208333134651184,
"regularize": 4.026199817657471,
"step": 380
},
{
"dpo_loss": 2.430462121963501,
"epoch": 2.182333490788852,
"grad_norm": 1002.2263691968883,
"learning_rate": 6.913417161825449e-07,
"logits": -1.2855180501937866,
"logps": -84.00334930419922,
"loss": 4.1443,
"objective": 4.582634925842285,
"ranking_idealized": 0.5041666626930237,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.5041666626930237,
"regularize": 4.582634925842285,
"step": 385
},
{
"dpo_loss": 2.034668445587158,
"epoch": 2.21067548417572,
"grad_norm": 1073.0050710345477,
"learning_rate": 6.821429010908971e-07,
"logits": -1.183647632598877,
"logps": -82.46015930175781,
"loss": 3.9544,
"objective": 3.7511041164398193,
"ranking_idealized": 0.4833333194255829,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.4791666567325592,
"regularize": 3.7511041164398193,
"step": 390
},
{
"dpo_loss": 2.1025197505950928,
"epoch": 2.2390174775625886,
"grad_norm": 928.4936074724073,
"learning_rate": 6.728724407489553e-07,
"logits": -1.176824688911438,
"logps": -82.696044921875,
"loss": 4.0116,
"objective": 4.03180456161499,
"ranking_idealized": 0.5208333134651184,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5249999761581421,
"regularize": 4.03180456161499,
"step": 395
},
{
"dpo_loss": 1.8622020483016968,
"epoch": 2.2673594709494567,
"grad_norm": 950.7422986439022,
"learning_rate": 6.635339816587108e-07,
"logits": -1.242794394493103,
"logps": -83.7727279663086,
"loss": 3.8148,
"objective": 3.835386037826538,
"ranking_idealized": 0.574999988079071,
"ranking_idealized_expo": 0.574999988079071,
"ranking_simple": 0.574999988079071,
"regularize": 3.835386037826538,
"step": 400
},
{
"epoch": 2.2673594709494567,
"eval_dpo_loss": 2.9398233890533447,
"eval_logits": -1.2679872512817383,
"eval_logps": -88.25416564941406,
"eval_loss": 5.762566089630127,
"eval_objective": 5.820021152496338,
"eval_ranking_idealized": 0.5092975497245789,
"eval_ranking_idealized_expo": 0.5092975497245789,
"eval_ranking_simple": 0.5092975497245789,
"eval_regularize": 5.820021152496338,
"eval_runtime": 259.3166,
"eval_samples_per_second": 22.328,
"eval_steps_per_second": 0.933,
"step": 400
},
{
"dpo_loss": 1.8469058275222778,
"epoch": 2.295701464336325,
"grad_norm": 963.0280264659817,
"learning_rate": 6.541311970692162e-07,
"logits": -1.301633358001709,
"logps": -81.62474060058594,
"loss": 3.8783,
"objective": 3.7539381980895996,
"ranking_idealized": 0.5583333373069763,
"ranking_idealized_expo": 0.5583333373069763,
"ranking_simple": 0.5708333253860474,
"regularize": 3.7539381980895996,
"step": 405
},
{
"dpo_loss": 1.6361950635910034,
"epoch": 2.324043457723193,
"grad_norm": 1074.5619431455382,
"learning_rate": 6.446677855317264e-07,
"logits": -1.2071573734283447,
"logps": -80.61900329589844,
"loss": 3.9829,
"objective": 3.7228012084960938,
"ranking_idealized": 0.4833333194255829,
"ranking_idealized_expo": 0.4749999940395355,
"ranking_simple": 0.47083333134651184,
"regularize": 3.7228012084960938,
"step": 410
},
{
"dpo_loss": 1.8135488033294678,
"epoch": 2.3523854511100613,
"grad_norm": 1058.2549464280587,
"learning_rate": 6.351474694448864e-07,
"logits": -1.2399203777313232,
"logps": -81.5149917602539,
"loss": 3.748,
"objective": 3.9710586071014404,
"ranking_idealized": 0.5333333611488342,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.5333333611488342,
"regularize": 3.9710586071014404,
"step": 415
},
{
"dpo_loss": 2.120755434036255,
"epoch": 2.3807274444969297,
"grad_norm": 1114.426082281688,
"learning_rate": 6.255739935905395e-07,
"logits": -1.1594359874725342,
"logps": -83.35548400878906,
"loss": 3.7118,
"objective": 3.681281089782715,
"ranking_idealized": 0.4749999940395355,
"ranking_idealized_expo": 0.47083333134651184,
"ranking_simple": 0.4583333432674408,
"regularize": 3.681281089782715,
"step": 420
},
{
"dpo_loss": 1.7397000789642334,
"epoch": 2.409069437883798,
"grad_norm": 1024.8329703830655,
"learning_rate": 6.159511236607315e-07,
"logits": -1.2232387065887451,
"logps": -81.95706939697266,
"loss": 3.5923,
"objective": 3.648602247238159,
"ranking_idealized": 0.5,
"ranking_idealized_expo": 0.4958333373069763,
"ranking_simple": 0.5166666507720947,
"regularize": 3.648602247238159,
"step": 425
},
{
"dpo_loss": 1.804638385772705,
"epoch": 2.4374114312706663,
"grad_norm": 979.544871113056,
"learning_rate": 6.062826447764883e-07,
"logits": -1.2574443817138672,
"logps": -82.60694122314453,
"loss": 3.4778,
"objective": 3.4914093017578125,
"ranking_idealized": 0.5333333611488342,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.5249999761581421,
"regularize": 3.4914093017578125,
"step": 430
},
{
"dpo_loss": 1.6648954153060913,
"epoch": 2.4657534246575343,
"grad_norm": 1000.8401446346558,
"learning_rate": 5.965723599989528e-07,
"logits": -1.300902009010315,
"logps": -82.3549575805664,
"loss": 3.5217,
"objective": 3.345120668411255,
"ranking_idealized": 0.5083333253860474,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.5,
"regularize": 3.345120668411255,
"step": 435
},
{
"dpo_loss": 2.0332047939300537,
"epoch": 2.4940954180444024,
"grad_norm": 1031.1441408735554,
"learning_rate": 5.868240888334652e-07,
"logits": -1.2219815254211426,
"logps": -82.15070343017578,
"loss": 3.7915,
"objective": 3.9336984157562256,
"ranking_idealized": 0.5375000238418579,
"ranking_idealized_expo": 0.5375000238418579,
"ranking_simple": 0.5333333611488342,
"regularize": 3.9336984157562256,
"step": 440
},
{
"dpo_loss": 2.0011653900146484,
"epoch": 2.5224374114312704,
"grad_norm": 970.4125487560452,
"learning_rate": 5.770416657271728e-07,
"logits": -1.2273285388946533,
"logps": -80.1659164428711,
"loss": 3.4124,
"objective": 3.168755292892456,
"ranking_idealized": 0.5166666507720947,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5208333134651184,
"regularize": 3.168755292892456,
"step": 445
},
{
"dpo_loss": 1.613458275794983,
"epoch": 2.550779404818139,
"grad_norm": 1061.4659212212357,
"learning_rate": 5.67228938560766e-07,
"logits": -1.2801109552383423,
"logps": -81.43175506591797,
"loss": 3.4169,
"objective": 3.3297555446624756,
"ranking_idealized": 0.5,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.49166667461395264,
"regularize": 3.3297555446624756,
"step": 450
},
{
"epoch": 2.550779404818139,
"eval_dpo_loss": 3.138432502746582,
"eval_logits": -1.2897279262542725,
"eval_logps": -88.01155853271484,
"eval_loss": 5.953909397125244,
"eval_objective": 6.106462478637695,
"eval_ranking_idealized": 0.5092975497245789,
"eval_ranking_idealized_expo": 0.5092975497245789,
"eval_ranking_simple": 0.5144628286361694,
"eval_regularize": 6.106462478637695,
"eval_runtime": 259.8077,
"eval_samples_per_second": 22.286,
"eval_steps_per_second": 0.931,
"step": 450
},
{
"dpo_loss": 1.8731194734573364,
"epoch": 2.579121398205007,
"grad_norm": 1156.0005265988177,
"learning_rate": 5.573897671349268e-07,
"logits": -1.300366759300232,
"logps": -82.42855834960938,
"loss": 3.5224,
"objective": 3.593839168548584,
"ranking_idealized": 0.5041666626930237,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.5041666626930237,
"regularize": 3.593839168548584,
"step": 455
},
{
"dpo_loss": 1.952242136001587,
"epoch": 2.6074633915918755,
"grad_norm": 1095.9802997680708,
"learning_rate": 5.475280216520912e-07,
"logits": -1.3715617656707764,
"logps": -82.51021575927734,
"loss": 3.3771,
"objective": 3.524890422821045,
"ranking_idealized": 0.512499988079071,
"ranking_idealized_expo": 0.512499988079071,
"ranking_simple": 0.5208333134651184,
"regularize": 3.524890422821045,
"step": 460
},
{
"dpo_loss": 1.5585062503814697,
"epoch": 2.6358053849787435,
"grad_norm": 935.1251434271745,
"learning_rate": 5.376475811941191e-07,
"logits": -1.2529839277267456,
"logps": -82.1951904296875,
"loss": 3.4445,
"objective": 3.3441879749298096,
"ranking_idealized": 0.4958333373069763,
"ranking_idealized_expo": 0.4958333373069763,
"ranking_simple": 0.5,
"regularize": 3.3441879749298096,
"step": 465
},
{
"dpo_loss": 1.6525613069534302,
"epoch": 2.6641473783656116,
"grad_norm": 977.6740437303708,
"learning_rate": 5.277523321964701e-07,
"logits": -1.2638176679611206,
"logps": -81.61119079589844,
"loss": 3.3653,
"objective": 3.1554312705993652,
"ranking_idealized": 0.5541666746139526,
"ranking_idealized_expo": 0.550000011920929,
"ranking_simple": 0.5625,
"regularize": 3.1554312705993652,
"step": 470
},
{
"dpo_loss": 1.4508802890777588,
"epoch": 2.69248937175248,
"grad_norm": 993.1353802099517,
"learning_rate": 5.178461669194903e-07,
"logits": -1.228849172592163,
"logps": -81.23489379882812,
"loss": 2.9912,
"objective": 2.843146562576294,
"ranking_idealized": 0.4958333373069763,
"ranking_idealized_expo": 0.4958333373069763,
"ranking_simple": 0.5,
"regularize": 2.843146562576294,
"step": 475
},
{
"dpo_loss": 1.4309269189834595,
"epoch": 2.720831365139348,
"grad_norm": 1013.9109275016526,
"learning_rate": 5.07932981917404e-07,
"logits": -1.3067494630813599,
"logps": -80.33541107177734,
"loss": 3.1599,
"objective": 3.0493505001068115,
"ranking_idealized": 0.5,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.512499988079071,
"regularize": 3.0493505001068115,
"step": 480
},
{
"dpo_loss": 1.6128212213516235,
"epoch": 2.7491733585262166,
"grad_norm": 935.3777113198429,
"learning_rate": 4.980166765056193e-07,
"logits": -1.2488080263137817,
"logps": -82.94316101074219,
"loss": 3.2342,
"objective": 3.1210060119628906,
"ranking_idealized": 0.5249999761581421,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.5166666507720947,
"regularize": 3.1210060119628906,
"step": 485
},
{
"dpo_loss": 1.2609761953353882,
"epoch": 2.7775153519130846,
"grad_norm": 1021.9995000614038,
"learning_rate": 4.881011512269463e-07,
"logits": -1.224744439125061,
"logps": -80.88065338134766,
"loss": 3.0434,
"objective": 2.964614152908325,
"ranking_idealized": 0.4749999940395355,
"ranking_idealized_expo": 0.47083333134651184,
"ranking_simple": 0.46666666865348816,
"regularize": 2.964614152908325,
"step": 490
},
{
"dpo_loss": 1.3648375272750854,
"epoch": 2.8058573452999527,
"grad_norm": 957.4547050967395,
"learning_rate": 4.78190306317332e-07,
"logits": -1.2405409812927246,
"logps": -79.93798065185547,
"loss": 2.9176,
"objective": 2.8268253803253174,
"ranking_idealized": 0.4791666567325592,
"ranking_idealized_expo": 0.4791666567325592,
"ranking_simple": 0.47083333134651184,
"regularize": 2.8268253803253174,
"step": 495
},
{
"dpo_loss": 1.4600600004196167,
"epoch": 2.8341993386868207,
"grad_norm": 956.91251387349,
"learning_rate": 4.682880401717177e-07,
"logits": -1.2869919538497925,
"logps": -79.13894653320312,
"loss": 2.988,
"objective": 2.750535488128662,
"ranking_idealized": 0.5249999761581421,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.5208333134651184,
"regularize": 2.750535488128662,
"step": 500
},
{
"epoch": 2.8341993386868207,
"eval_dpo_loss": 3.1318445205688477,
"eval_logits": -1.2855963706970215,
"eval_logps": -87.95059967041016,
"eval_loss": 5.985367298126221,
"eval_objective": 6.018334865570068,
"eval_ranking_idealized": 0.5092975497245789,
"eval_ranking_idealized_expo": 0.5092975497245789,
"eval_ranking_simple": 0.5092975497245789,
"eval_regularize": 6.018334865570068,
"eval_runtime": 259.5234,
"eval_samples_per_second": 22.31,
"eval_steps_per_second": 0.932,
"step": 500
},
{
"dpo_loss": 1.2522823810577393,
"epoch": 2.862541332073689,
"grad_norm": 990.3820386306984,
"learning_rate": 4.5839824781061886e-07,
"logits": -1.238278865814209,
"logps": -80.7296371459961,
"loss": 2.9937,
"objective": 2.6271092891693115,
"ranking_idealized": 0.5208333134651184,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.512499988079071,
"regularize": 2.6271092891693115,
"step": 505
},
{
"dpo_loss": 1.5270129442214966,
"epoch": 2.8908833254605573,
"grad_norm": 943.7310548857196,
"learning_rate": 4.4852481934803277e-07,
"logits": -1.1962122917175293,
"logps": -81.28018951416016,
"loss": 2.8508,
"objective": 3.192195177078247,
"ranking_idealized": 0.5291666388511658,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.5416666865348816,
"regularize": 3.192195177078247,
"step": 510
},
{
"dpo_loss": 1.3985689878463745,
"epoch": 2.9192253188474258,
"grad_norm": 1066.3526544144515,
"learning_rate": 4.3867163846127674e-07,
"logits": -1.3405725955963135,
"logps": -81.18116760253906,
"loss": 2.8001,
"objective": 2.7043962478637695,
"ranking_idealized": 0.4791666567325592,
"ranking_idealized_expo": 0.4791666567325592,
"ranking_simple": 0.4791666567325592,
"regularize": 2.7043962478637695,
"step": 515
},
{
"dpo_loss": 1.549082636833191,
"epoch": 2.947567312234294,
"grad_norm": 976.1195004276424,
"learning_rate": 4.2884258086335745e-07,
"logits": -1.2520852088928223,
"logps": -83.24747467041016,
"loss": 2.7691,
"objective": 2.9670374393463135,
"ranking_idealized": 0.512499988079071,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.512499988079071,
"regularize": 2.9670374393463135,
"step": 520
},
{
"dpo_loss": 1.1514889001846313,
"epoch": 2.975909305621162,
"grad_norm": 993.1767181389736,
"learning_rate": 4.1904151277847305e-07,
"logits": -1.2369369268417358,
"logps": -82.1258316040039,
"loss": 2.7535,
"objective": 2.7600042819976807,
"ranking_idealized": 0.5666666626930237,
"ranking_idealized_expo": 0.5666666626930237,
"ranking_simple": 0.574999988079071,
"regularize": 2.7600042819976807,
"step": 525
},
{
"dpo_loss": 1.234760046005249,
"epoch": 3.0042512990080303,
"grad_norm": 954.3168391295485,
"learning_rate": 4.092722894212487e-07,
"logits": -1.2384470701217651,
"logps": -81.53229522705078,
"loss": 2.772,
"objective": 2.536973237991333,
"ranking_idealized": 0.512499988079071,
"ranking_idealized_expo": 0.512499988079071,
"ranking_simple": 0.5166666507720947,
"regularize": 2.536973237991333,
"step": 530
},
{
"dpo_loss": 1.2055299282073975,
"epoch": 3.0325932923948984,
"grad_norm": 1192.8110305376617,
"learning_rate": 3.995387534803005e-07,
"logits": -1.2691408395767212,
"logps": -82.96357727050781,
"loss": 2.6555,
"objective": 2.663506031036377,
"ranking_idealized": 0.5458333492279053,
"ranking_idealized_expo": 0.5416666865348816,
"ranking_simple": 0.550000011920929,
"regularize": 2.663506031036377,
"step": 535
},
{
"dpo_loss": 1.3903727531433105,
"epoch": 3.0609352857817664,
"grad_norm": 949.2725322799425,
"learning_rate": 3.8984473360672967e-07,
"logits": -1.3405746221542358,
"logps": -81.4103012084961,
"loss": 2.5599,
"objective": 2.611825704574585,
"ranking_idealized": 0.5375000238418579,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.5291666388511658,
"regularize": 2.611825704574585,
"step": 540
},
{
"dpo_loss": 1.3084332942962646,
"epoch": 3.089277279168635,
"grad_norm": 999.2271817343484,
"learning_rate": 3.801940429081345e-07,
"logits": -1.2964000701904297,
"logps": -82.18136596679688,
"loss": 2.504,
"objective": 2.4276323318481445,
"ranking_idealized": 0.49166667461395264,
"ranking_idealized_expo": 0.49166667461395264,
"ranking_simple": 0.4833333194255829,
"regularize": 2.4276323318481445,
"step": 545
},
{
"dpo_loss": 1.0889129638671875,
"epoch": 3.117619272555503,
"grad_norm": 934.3758046690474,
"learning_rate": 3.7059047744873955e-07,
"logits": -1.1509153842926025,
"logps": -82.78107452392578,
"loss": 2.4859,
"objective": 2.5204343795776367,
"ranking_idealized": 0.5458333492279053,
"ranking_idealized_expo": 0.5375000238418579,
"ranking_simple": 0.5458333492279053,
"regularize": 2.5204343795776367,
"step": 550
},
{
"epoch": 3.117619272555503,
"eval_dpo_loss": 3.178955316543579,
"eval_logits": -1.2804911136627197,
"eval_logps": -88.50298309326172,
"eval_loss": 6.194624423980713,
"eval_objective": 6.202889919281006,
"eval_ranking_idealized": 0.5092975497245789,
"eval_ranking_idealized_expo": 0.5092975497245789,
"eval_ranking_simple": 0.5103305578231812,
"eval_regularize": 6.202889919281006,
"eval_runtime": 260.2352,
"eval_samples_per_second": 22.249,
"eval_steps_per_second": 0.93,
"step": 550
},
{
"dpo_loss": 1.148443579673767,
"epoch": 3.1459612659423715,
"grad_norm": 977.893659928589,
"learning_rate": 3.6103781475622786e-07,
"logits": -1.2158228158950806,
"logps": -83.3880844116211,
"loss": 2.4258,
"objective": 2.2721104621887207,
"ranking_idealized": 0.550000011920929,
"ranking_idealized_expo": 0.550000011920929,
"ranking_simple": 0.5458333492279053,
"regularize": 2.2721104621887207,
"step": 555
},
{
"dpo_loss": 1.2861641645431519,
"epoch": 3.1743032593292395,
"grad_norm": 912.4395782657139,
"learning_rate": 3.5153981233586274e-07,
"logits": -1.2103866338729858,
"logps": -80.51148223876953,
"loss": 2.4037,
"objective": 2.3816921710968018,
"ranking_idealized": 0.5041666626930237,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.5041666626930237,
"regularize": 2.3816921710968018,
"step": 560
},
{
"dpo_loss": 1.0393404960632324,
"epoch": 3.2026452527161076,
"grad_norm": 959.0200932296882,
"learning_rate": 3.421002061924876e-07,
"logits": -1.262898325920105,
"logps": -82.25037384033203,
"loss": 2.3058,
"objective": 2.267256259918213,
"ranking_idealized": 0.5458333492279053,
"ranking_idealized_expo": 0.5416666865348816,
"ranking_simple": 0.5583333373069763,
"regularize": 2.267256259918213,
"step": 565
},
{
"dpo_loss": 1.2455191612243652,
"epoch": 3.230987246102976,
"grad_norm": 937.4751934096442,
"learning_rate": 3.327227093609824e-07,
"logits": -1.1880606412887573,
"logps": -81.5679702758789,
"loss": 2.2052,
"objective": 2.3692660331726074,
"ranking_idealized": 0.4791666567325592,
"ranking_idealized_expo": 0.4791666567325592,
"ranking_simple": 0.4833333194255829,
"regularize": 2.3692660331726074,
"step": 570
},
{
"dpo_loss": 1.228649616241455,
"epoch": 3.259329239489844,
"grad_norm": 996.5558020139559,
"learning_rate": 3.234110104457536e-07,
"logits": -1.2556049823760986,
"logps": -81.8873519897461,
"loss": 2.2515,
"objective": 2.29750657081604,
"ranking_idealized": 0.5,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.49166667461395264,
"regularize": 2.29750657081604,
"step": 575
},
{
"dpo_loss": 1.3365857601165771,
"epoch": 3.287671232876712,
"grad_norm": 996.2512645672206,
"learning_rate": 3.141687721698363e-07,
"logits": -1.2736408710479736,
"logps": -82.88424682617188,
"loss": 2.3155,
"objective": 2.366077423095703,
"ranking_idealized": 0.5541666746139526,
"ranking_idealized_expo": 0.5541666746139526,
"ranking_simple": 0.5458333492279053,
"regularize": 2.366077423095703,
"step": 580
},
{
"dpo_loss": 1.1693744659423828,
"epoch": 3.3160132262635806,
"grad_norm": 990.5330100186713,
"learning_rate": 3.049996299341742e-07,
"logits": -1.289427638053894,
"logps": -83.39188385009766,
"loss": 2.1009,
"objective": 2.1158106327056885,
"ranking_idealized": 0.5333333611488342,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.5249999761581421,
"regularize": 2.1158106327056885,
"step": 585
},
{
"dpo_loss": 1.1631190776824951,
"epoch": 3.3443552196504487,
"grad_norm": 986.1485755213372,
"learning_rate": 2.959071903876486e-07,
"logits": -1.3035333156585693,
"logps": -83.53241729736328,
"loss": 2.1276,
"objective": 2.4290575981140137,
"ranking_idealized": 0.49166667461395264,
"ranking_idealized_expo": 0.48750001192092896,
"ranking_simple": 0.5083333253860474,
"regularize": 2.4290575981140137,
"step": 590
},
{
"dpo_loss": 1.3233096599578857,
"epoch": 3.372697213037317,
"grad_norm": 930.1393629687922,
"learning_rate": 2.86895030008416e-07,
"logits": -1.2197285890579224,
"logps": -82.90579223632812,
"loss": 2.1207,
"objective": 2.1894338130950928,
"ranking_idealized": 0.5249999761581421,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.5291666388511658,
"regularize": 2.1894338130950928,
"step": 595
},
{
"dpo_loss": 1.007699728012085,
"epoch": 3.4010392064241852,
"grad_norm": 935.3828921290786,
"learning_rate": 2.779666936971129e-07,
"logits": -1.230286717414856,
"logps": -81.8963851928711,
"loss": 2.0539,
"objective": 2.0761570930480957,
"ranking_idealized": 0.5041666626930237,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.4958333373069763,
"regularize": 2.0761570930480957,
"step": 600
},
{
"epoch": 3.4010392064241852,
"eval_dpo_loss": 3.1110734939575195,
"eval_logits": -1.2650625705718994,
"eval_logps": -88.16163635253906,
"eval_loss": 5.933178901672363,
"eval_objective": 6.031818866729736,
"eval_ranking_idealized": 0.5092975497245789,
"eval_ranking_idealized_expo": 0.5092975497245789,
"eval_ranking_simple": 0.5113636255264282,
"eval_regularize": 6.031818866729736,
"eval_runtime": 258.4717,
"eval_samples_per_second": 22.401,
"eval_steps_per_second": 0.936,
"step": 600
},
{
"dpo_loss": 1.11422860622406,
"epoch": 3.4293811998110533,
"grad_norm": 962.8376859443554,
"learning_rate": 2.6912569338248315e-07,
"logits": -1.2667381763458252,
"logps": -83.77916717529297,
"loss": 2.0057,
"objective": 1.9590004682540894,
"ranking_idealized": 0.5333333611488342,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.5333333611488342,
"regularize": 1.9590004682540894,
"step": 605
},
{
"dpo_loss": 1.1715530157089233,
"epoch": 3.4577231931979218,
"grad_norm": 934.6534827687339,
"learning_rate": 2.603755066399718e-07,
"logits": -1.2390044927597046,
"logps": -82.3260269165039,
"loss": 2.1286,
"objective": 2.050309658050537,
"ranking_idealized": 0.5208333134651184,
"ranking_idealized_expo": 0.512499988079071,
"ranking_simple": 0.512499988079071,
"regularize": 2.050309658050537,
"step": 610
},
{
"dpo_loss": 1.1228054761886597,
"epoch": 3.48606518658479,
"grad_norm": 924.4634252845681,
"learning_rate": 2.517195753238345e-07,
"logits": -1.2509876489639282,
"logps": -82.427001953125,
"loss": 1.9336,
"objective": 1.9248236417770386,
"ranking_idealized": 0.47083333134651184,
"ranking_idealized_expo": 0.46666666865348816,
"ranking_simple": 0.4625000059604645,
"regularize": 1.9248236417770386,
"step": 615
},
{
"dpo_loss": 1.0055824518203735,
"epoch": 3.514407179971658,
"grad_norm": 996.235630609532,
"learning_rate": 2.4316130421329696e-07,
"logits": -1.1457880735397339,
"logps": -81.78346252441406,
"loss": 1.8569,
"objective": 1.8841525316238403,
"ranking_idealized": 0.4958333373069763,
"ranking_idealized_expo": 0.4958333373069763,
"ranking_simple": 0.49166667461395264,
"regularize": 1.8841525316238403,
"step": 620
},
{
"dpo_loss": 1.0400264263153076,
"epoch": 3.5427491733585263,
"grad_norm": 988.2054187208093,
"learning_rate": 2.3470405967329604e-07,
"logits": -1.202520489692688,
"logps": -81.60346984863281,
"loss": 1.8727,
"objective": 2.022357702255249,
"ranking_idealized": 0.5249999761581421,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.5166666507720947,
"regularize": 2.022357702255249,
"step": 625
},
{
"dpo_loss": 0.9165257215499878,
"epoch": 3.5710911667453944,
"grad_norm": 970.3302350643374,
"learning_rate": 2.2635116833033392e-07,
"logits": -1.2847248315811157,
"logps": -81.50933074951172,
"loss": 1.8387,
"objective": 1.6823768615722656,
"ranking_idealized": 0.512499988079071,
"ranking_idealized_expo": 0.512499988079071,
"ranking_simple": 0.5291666388511658,
"regularize": 1.6823768615722656,
"step": 630
},
{
"dpo_loss": 0.8153350949287415,
"epoch": 3.5994331601322624,
"grad_norm": 964.0631369889124,
"learning_rate": 2.181059157639598e-07,
"logits": -1.2249428033828735,
"logps": -82.01227569580078,
"loss": 1.7136,
"objective": 1.8080626726150513,
"ranking_idealized": 0.48750001192092896,
"ranking_idealized_expo": 0.48750001192092896,
"ranking_simple": 0.48750001192092896,
"regularize": 1.8080624341964722,
"step": 635
},
{
"dpo_loss": 0.9864614605903625,
"epoch": 3.627775153519131,
"grad_norm": 1000.0610851173434,
"learning_rate": 2.0997154521440097e-07,
"logits": -1.1798667907714844,
"logps": -82.78071594238281,
"loss": 1.7312,
"objective": 1.868208646774292,
"ranking_idealized": 0.4625000059604645,
"ranking_idealized_expo": 0.4625000059604645,
"ranking_simple": 0.46666666865348816,
"regularize": 1.868208646774292,
"step": 640
},
{
"dpo_loss": 1.0488131046295166,
"epoch": 3.656117146905999,
"grad_norm": 1049.7832510532623,
"learning_rate": 2.0195125630684428e-07,
"logits": -1.2025647163391113,
"logps": -81.37299346923828,
"loss": 1.7885,
"objective": 1.6037089824676514,
"ranking_idealized": 0.5249999761581421,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.5249999761581421,
"regularize": 1.6037089824676514,
"step": 645
},
{
"dpo_loss": 1.022878646850586,
"epoch": 3.6844591402928675,
"grad_norm": 919.1271980482485,
"learning_rate": 1.9404820379287672e-07,
"logits": -1.1927168369293213,
"logps": -82.80690002441406,
"loss": 1.664,
"objective": 1.7033976316452026,
"ranking_idealized": 0.5041666626930237,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.5041666626930237,
"regularize": 1.7033976316452026,
"step": 650
},
{
"epoch": 3.6844591402928675,
"eval_dpo_loss": 3.096806764602661,
"eval_logits": -1.2607561349868774,
"eval_logps": -88.69917297363281,
"eval_loss": 5.923920631408691,
"eval_objective": 5.985104084014893,
"eval_ranking_idealized": 0.5092975497245789,
"eval_ranking_idealized_expo": 0.5092975497245789,
"eval_ranking_simple": 0.5113636255264282,
"eval_regularize": 5.985104084014893,
"eval_runtime": 258.729,
"eval_samples_per_second": 22.379,
"eval_steps_per_second": 0.935,
"step": 650
},
{
"dpo_loss": 0.9369535446166992,
"epoch": 3.7128011336797355,
"grad_norm": 961.5116821455488,
"learning_rate": 1.8626549630957395e-07,
"logits": -1.2329158782958984,
"logps": -81.57703399658203,
"loss": 1.6411,
"objective": 1.523728847503662,
"ranking_idealized": 0.5208333134651184,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.5208333134651184,
"regularize": 1.523728847503662,
"step": 655
},
{
"dpo_loss": 0.8511099219322205,
"epoch": 3.7411431270666036,
"grad_norm": 1089.6596694384646,
"learning_rate": 1.7860619515673032e-07,
"logits": -1.3094429969787598,
"logps": -82.70073699951172,
"loss": 1.6123,
"objective": 1.475422739982605,
"ranking_idealized": 0.5541666746139526,
"ranking_idealized_expo": 0.5541666746139526,
"ranking_simple": 0.5541666746139526,
"regularize": 1.475422739982605,
"step": 660
},
{
"dpo_loss": 0.9054993987083435,
"epoch": 3.769485120453472,
"grad_norm": 1093.6896679613712,
"learning_rate": 1.7107331309270684e-07,
"logits": -1.2157032489776611,
"logps": -82.55271911621094,
"loss": 1.561,
"objective": 1.6129040718078613,
"ranking_idealized": 0.5166666507720947,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.512499988079071,
"regularize": 1.6129040718078613,
"step": 665
},
{
"dpo_loss": 0.8416043519973755,
"epoch": 3.79782711384034,
"grad_norm": 1024.9155399545134,
"learning_rate": 1.6366981314937372e-07,
"logits": -1.3233702182769775,
"logps": -82.39900970458984,
"loss": 1.5252,
"objective": 1.4049646854400635,
"ranking_idealized": 0.5916666388511658,
"ranking_idealized_expo": 0.5916666388511658,
"ranking_simple": 0.6000000238418579,
"regularize": 1.4049646854400635,
"step": 670
},
{
"dpo_loss": 0.8841171264648438,
"epoch": 3.826169107227208,
"grad_norm": 971.974542125377,
"learning_rate": 1.5639860746661338e-07,
"logits": -1.288584589958191,
"logps": -81.51100158691406,
"loss": 1.5256,
"objective": 1.549899935722351,
"ranking_idealized": 0.5458333492279053,
"ranking_idealized_expo": 0.5375000238418579,
"ranking_simple": 0.5458333492279053,
"regularize": 1.549899935722351,
"step": 675
},
{
"dpo_loss": 0.9599818587303162,
"epoch": 3.8545111006140766,
"grad_norm": 930.812509420293,
"learning_rate": 1.492625561468393e-07,
"logits": -1.2116204500198364,
"logps": -81.91743469238281,
"loss": 1.4834,
"objective": 1.6812348365783691,
"ranking_idealized": 0.5083333253860474,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.5083333253860474,
"regularize": 1.6812348365783691,
"step": 680
},
{
"dpo_loss": 0.9112765192985535,
"epoch": 3.8828530940009447,
"grad_norm": 1163.329406971879,
"learning_rate": 1.4226446612998671e-07,
"logits": -1.273834228515625,
"logps": -81.50404357910156,
"loss": 1.4897,
"objective": 1.4795509576797485,
"ranking_idealized": 0.5166666507720947,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.512499988079071,
"regularize": 1.4795509576797485,
"step": 685
},
{
"dpo_loss": 0.8565966486930847,
"epoch": 3.9111950873878127,
"grad_norm": 938.1098675569013,
"learning_rate": 1.3540709008941147e-07,
"logits": -1.2776970863342285,
"logps": -81.18965911865234,
"loss": 1.4652,
"objective": 1.3701138496398926,
"ranking_idealized": 0.5458333492279053,
"ranking_idealized_expo": 0.5458333492279053,
"ranking_simple": 0.550000011920929,
"regularize": 1.3701138496398926,
"step": 690
},
{
"dpo_loss": 1.0229411125183105,
"epoch": 3.9395370807746812,
"grad_norm": 1013.5958054606544,
"learning_rate": 1.2869312534913685e-07,
"logits": -1.191641092300415,
"logps": -82.60445404052734,
"loss": 1.3376,
"objective": 1.4829381704330444,
"ranking_idealized": 0.46666666865348816,
"ranking_idealized_expo": 0.46666666865348816,
"ranking_simple": 0.4625000059604645,
"regularize": 1.4829381704330444,
"step": 695
},
{
"dpo_loss": 0.8406078815460205,
"epoch": 3.9678790741615493,
"grad_norm": 1063.5432850972618,
"learning_rate": 1.2212521282287093e-07,
"logits": -1.2275745868682861,
"logps": -82.30086517333984,
"loss": 1.3502,
"objective": 1.3136423826217651,
"ranking_idealized": 0.48750001192092896,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.48750001192092896,
"regularize": 1.3136423826217651,
"step": 700
},
{
"epoch": 3.9678790741615493,
"eval_dpo_loss": 3.089524269104004,
"eval_logits": -1.2646851539611816,
"eval_logps": -88.52361297607422,
"eval_loss": 5.917611122131348,
"eval_objective": 5.957097053527832,
"eval_ranking_idealized": 0.5092975497245789,
"eval_ranking_idealized_expo": 0.5092975497245789,
"eval_ranking_simple": 0.5134297609329224,
"eval_regularize": 5.957097053527832,
"eval_runtime": 258.9235,
"eval_samples_per_second": 22.362,
"eval_steps_per_second": 0.935,
"step": 700
},
{
"dpo_loss": 0.8173587918281555,
"epoch": 3.9962210675484178,
"grad_norm": 985.2767504669318,
"learning_rate": 1.15705935975212e-07,
"logits": -1.1528183221817017,
"logps": -80.9789810180664,
"loss": 1.3032,
"objective": 1.2147972583770752,
"ranking_idealized": 0.5291666388511658,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.5291666388511658,
"regularize": 1.2147972583770752,
"step": 705
},
{
"dpo_loss": 0.7131792902946472,
"epoch": 4.024563060935286,
"grad_norm": 1011.8698807645903,
"learning_rate": 1.094378198054533e-07,
"logits": -1.2421104907989502,
"logps": -82.51912689208984,
"loss": 1.1035,
"objective": 1.1794158220291138,
"ranking_idealized": 0.4791666567325592,
"ranking_idealized_expo": 0.4791666567325592,
"ranking_simple": 0.4833333194255829,
"regularize": 1.1794158220291138,
"step": 710
},
{
"dpo_loss": 0.7108194231987,
"epoch": 4.052905054322154,
"grad_norm": 940.6537803599663,
"learning_rate": 1.0332332985438247e-07,
"logits": -1.1998772621154785,
"logps": -81.95320892333984,
"loss": 1.0461,
"objective": 0.9881690740585327,
"ranking_idealized": 0.5375000238418579,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.5333333611488342,
"regularize": 0.9881690740585327,
"step": 715
},
{
"dpo_loss": 0.6712031960487366,
"epoch": 4.081247047709022,
"grad_norm": 960.2681465991928,
"learning_rate": 9.736487123447068e-08,
"logits": -1.15168035030365,
"logps": -83.81200408935547,
"loss": 1.0468,
"objective": 1.1416888236999512,
"ranking_idealized": 0.5208333134651184,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.5249999761581421,
"regularize": 1.1416888236999512,
"step": 720
},
{
"dpo_loss": 0.6989570260047913,
"epoch": 4.109589041095891,
"grad_norm": 935.0432434834537,
"learning_rate": 9.156478768383058e-08,
"logits": -1.2237892150878906,
"logps": -81.69219207763672,
"loss": 1.074,
"objective": 0.9603613615036011,
"ranking_idealized": 0.5291666388511658,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.5291666388511658,
"regularize": 0.9603613615036011,
"step": 725
},
{
"dpo_loss": 0.7995573878288269,
"epoch": 4.137931034482759,
"grad_norm": 980.3733885345745,
"learning_rate": 8.592536064431466e-08,
"logits": -1.2934256792068481,
"logps": -81.94860076904297,
"loss": 1.0596,
"objective": 1.06425142288208,
"ranking_idealized": 0.5375000238418579,
"ranking_idealized_expo": 0.5375000238418579,
"ranking_simple": 0.5375000238418579,
"regularize": 1.06425142288208,
"step": 730
},
{
"dpo_loss": 0.7126405835151672,
"epoch": 4.166273027869627,
"grad_norm": 1007.1386546772866,
"learning_rate": 8.044880836411888e-08,
"logits": -1.1827558279037476,
"logps": -82.89656066894531,
"loss": 1.0028,
"objective": 0.9806023240089417,
"ranking_idealized": 0.5249999761581421,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.5208333134651184,
"regularize": 0.9806023240089417,
"step": 735
},
{
"dpo_loss": 0.7121079564094543,
"epoch": 4.194615021256495,
"grad_norm": 940.1459069064864,
"learning_rate": 7.513728502524286e-08,
"logits": -1.2114641666412354,
"logps": -82.5162582397461,
"loss": 1.0102,
"objective": 0.9698477387428284,
"ranking_idealized": 0.5208333134651184,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.5208333134651184,
"regularize": 0.9698477387428284,
"step": 740
},
{
"dpo_loss": 0.7223809957504272,
"epoch": 4.222957014643363,
"grad_norm": 956.701027117306,
"learning_rate": 6.999287989614971e-08,
"logits": -1.274079442024231,
"logps": -80.06112670898438,
"loss": 0.9886,
"objective": 1.010962724685669,
"ranking_idealized": 0.48750001192092896,
"ranking_idealized_expo": 0.48750001192092896,
"ranking_simple": 0.4958333373069763,
"regularize": 1.010962724685669,
"step": 745
},
{
"dpo_loss": 0.6834555268287659,
"epoch": 4.251299008030231,
"grad_norm": 994.5271297786792,
"learning_rate": 6.501761650996052e-08,
"logits": -1.3588440418243408,
"logps": -82.0845947265625,
"loss": 1.0052,
"objective": 1.0897630453109741,
"ranking_idealized": 0.5249999761581421,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.5291666388511658,
"regularize": 1.0897630453109741,
"step": 750
},
{
"epoch": 4.251299008030231,
"eval_dpo_loss": 3.103642702102661,
"eval_logits": -1.2629750967025757,
"eval_logps": -88.36180877685547,
"eval_loss": 5.9642486572265625,
"eval_objective": 6.00606632232666,
"eval_ranking_idealized": 0.5092975497245789,
"eval_ranking_idealized_expo": 0.5092975497245789,
"eval_ranking_simple": 0.5134297609329224,
"eval_regularize": 6.00606632232666,
"eval_runtime": 258.8045,
"eval_samples_per_second": 22.372,
"eval_steps_per_second": 0.935,
"step": 750
},
{
"dpo_loss": 0.6240705251693726,
"epoch": 4.2796410014171,
"grad_norm": 928.8030739675138,
"learning_rate": 6.021345186850418e-08,
"logits": -1.2852017879486084,
"logps": -82.10335540771484,
"loss": 0.972,
"objective": 1.01486337184906,
"ranking_idealized": 0.5333333611488342,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.5333333611488342,
"regularize": 1.01486337184906,
"step": 755
},
{
"dpo_loss": 0.7065821886062622,
"epoch": 4.307982994803968,
"grad_norm": 929.3861266719559,
"learning_rate": 5.5582275672538316e-08,
"logits": -1.2621345520019531,
"logps": -82.05753326416016,
"loss": 0.8992,
"objective": 0.8614501953125,
"ranking_idealized": 0.5666666626930237,
"ranking_idealized_expo": 0.5625,
"ranking_simple": 0.5666666626930237,
"regularize": 0.8614501953125,
"step": 760
},
{
"dpo_loss": 0.6562435030937195,
"epoch": 4.336324988190836,
"grad_norm": 977.0057816667661,
"learning_rate": 5.112590957844232e-08,
"logits": -1.2853424549102783,
"logps": -82.94247436523438,
"loss": 0.9593,
"objective": 0.8854343295097351,
"ranking_idealized": 0.5291666388511658,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.5333333611488342,
"regularize": 0.8854343295097351,
"step": 765
},
{
"dpo_loss": 0.7083169221878052,
"epoch": 4.364666981577704,
"grad_norm": 932.8658870761088,
"learning_rate": 4.684610648167503e-08,
"logits": -1.1741254329681396,
"logps": -81.14856719970703,
"loss": 0.9364,
"objective": 0.9174091815948486,
"ranking_idealized": 0.5083333253860474,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.5083333253860474,
"regularize": 0.9174091815948486,
"step": 770
},
{
"dpo_loss": 0.6173561811447144,
"epoch": 4.393008974964572,
"grad_norm": 973.1322872774431,
"learning_rate": 4.274454982728032e-08,
"logits": -1.2410908937454224,
"logps": -81.94947052001953,
"loss": 0.8846,
"objective": 0.7543167471885681,
"ranking_idealized": 0.5083333253860474,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.5083333253860474,
"regularize": 0.7543167471885681,
"step": 775
},
{
"dpo_loss": 0.6880941390991211,
"epoch": 4.42135096835144,
"grad_norm": 934.0673376048558,
"learning_rate": 3.882285294770937e-08,
"logits": -1.2685768604278564,
"logps": -80.74879455566406,
"loss": 0.8891,
"objective": 0.9145664572715759,
"ranking_idealized": 0.5041666626930237,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.5041666626930237,
"regularize": 0.9145664572715759,
"step": 780
},
{
"dpo_loss": 0.6857554316520691,
"epoch": 4.449692961738309,
"grad_norm": 906.6964623786657,
"learning_rate": 3.508255842822255e-08,
"logits": -1.286802887916565,
"logps": -80.75093841552734,
"loss": 0.8708,
"objective": 0.865050196647644,
"ranking_idealized": 0.5291666388511658,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.5249999761581421,
"regularize": 0.865050196647644,
"step": 785
},
{
"dpo_loss": 0.6727093458175659,
"epoch": 4.478034955125177,
"grad_norm": 954.6542045385403,
"learning_rate": 3.15251375001192e-08,
"logits": -1.248421549797058,
"logps": -83.45836639404297,
"loss": 0.8584,
"objective": 0.825614869594574,
"ranking_idealized": 0.5583333373069763,
"ranking_idealized_expo": 0.5541666746139526,
"ranking_simple": 0.5541666746139526,
"regularize": 0.825614869594574,
"step": 790
},
{
"dpo_loss": 0.7268415689468384,
"epoch": 4.506376948512045,
"grad_norm": 940.0246507195186,
"learning_rate": 2.8151989462033787e-08,
"logits": -1.1517890691757202,
"logps": -82.83182525634766,
"loss": 0.8256,
"objective": 0.8579443693161011,
"ranking_idealized": 0.550000011920929,
"ranking_idealized_expo": 0.550000011920929,
"ranking_simple": 0.550000011920929,
"regularize": 0.8579443693161011,
"step": 795
},
{
"dpo_loss": 0.6633224487304688,
"epoch": 4.534718941898913,
"grad_norm": 974.8383451116964,
"learning_rate": 2.4964441129527335e-08,
"logits": -1.148630976676941,
"logps": -82.155029296875,
"loss": 0.8548,
"objective": 0.8071673512458801,
"ranking_idealized": 0.512499988079071,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.512499988079071,
"regularize": 0.8071673512458801,
"step": 800
},
{
"epoch": 4.534718941898913,
"eval_dpo_loss": 3.0852909088134766,
"eval_logits": -1.2662479877471924,
"eval_logps": -88.35336303710938,
"eval_loss": 5.92377233505249,
"eval_objective": 5.97105073928833,
"eval_ranking_idealized": 0.5092975497245789,
"eval_ranking_idealized_expo": 0.5092975497245789,
"eval_ranking_simple": 0.5134297609329224,
"eval_regularize": 5.97105073928833,
"eval_runtime": 258.7722,
"eval_samples_per_second": 22.375,
"eval_steps_per_second": 0.935,
"step": 800
},
{
"dpo_loss": 0.6257317662239075,
"epoch": 4.563060935285781,
"grad_norm": 904.8852856060951,
"learning_rate": 2.1963746313188757e-08,
"logits": -1.2208141088485718,
"logps": -81.56495666503906,
"loss": 0.8065,
"objective": 0.8274087905883789,
"ranking_idealized": 0.46666666865348816,
"ranking_idealized_expo": 0.4583333432674408,
"ranking_simple": 0.46666666865348816,
"regularize": 0.8274087905883789,
"step": 805
},
{
"dpo_loss": 0.6851524710655212,
"epoch": 4.59140292867265,
"grad_norm": 950.7745711731907,
"learning_rate": 1.915108532545351e-08,
"logits": -1.284138798713684,
"logps": -80.96287536621094,
"loss": 0.7829,
"objective": 0.8247645497322083,
"ranking_idealized": 0.4583333432674408,
"ranking_idealized_expo": 0.4583333432674408,
"ranking_simple": 0.4625000059604645,
"regularize": 0.8247645497322083,
"step": 810
},
{
"dpo_loss": 0.6184301972389221,
"epoch": 4.619744922059518,
"grad_norm": 910.9864796470945,
"learning_rate": 1.6527564516331638e-08,
"logits": -1.1581400632858276,
"logps": -82.53880310058594,
"loss": 0.7758,
"objective": 0.8514427542686462,
"ranking_idealized": 0.5916666388511658,
"ranking_idealized_expo": 0.5874999761581421,
"ranking_simple": 0.5916666388511658,
"regularize": 0.8514427542686462,
"step": 815
},
{
"dpo_loss": 0.7527021765708923,
"epoch": 4.648086915446386,
"grad_norm": 990.2454056593273,
"learning_rate": 1.4094215838229172e-08,
"logits": -1.2537620067596436,
"logps": -82.07845306396484,
"loss": 0.8496,
"objective": 0.8979706168174744,
"ranking_idealized": 0.5625,
"ranking_idealized_expo": 0.5625,
"ranking_simple": 0.5583333373069763,
"regularize": 0.8979706168174744,
"step": 820
},
{
"dpo_loss": 0.6094750165939331,
"epoch": 4.6764289088332545,
"grad_norm": 933.278247470779,
"learning_rate": 1.1851996440033318e-08,
"logits": -1.1612147092819214,
"logps": -80.19402313232422,
"loss": 0.7784,
"objective": 0.6954202651977539,
"ranking_idealized": 0.5,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.5,
"regularize": 0.6954202651977539,
"step": 825
},
{
"dpo_loss": 0.7630098462104797,
"epoch": 4.7047709022201225,
"grad_norm": 1034.6630104905564,
"learning_rate": 9.801788290621505e-09,
"logits": -1.3119471073150635,
"logps": -82.28199768066406,
"loss": 0.8393,
"objective": 0.9094979166984558,
"ranking_idealized": 0.5166666507720947,
"ranking_idealized_expo": 0.512499988079071,
"ranking_simple": 0.5208333134651184,
"regularize": 0.9094979166984558,
"step": 830
},
{
"dpo_loss": 0.6389487385749817,
"epoch": 4.733112895606991,
"grad_norm": 919.014869305993,
"learning_rate": 7.944397831941951e-09,
"logits": -1.2169098854064941,
"logps": -81.839111328125,
"loss": 0.7883,
"objective": 0.7863165140151978,
"ranking_idealized": 0.49166667461395264,
"ranking_idealized_expo": 0.48750001192092896,
"ranking_simple": 0.4958333373069763,
"regularize": 0.7863165140151978,
"step": 835
},
{
"dpo_loss": 0.6490210890769958,
"epoch": 4.7614548889938595,
"grad_norm": 984.0918792700685,
"learning_rate": 6.280555661802856e-09,
"logits": -1.2279409170150757,
"logps": -82.31600189208984,
"loss": 0.7359,
"objective": 0.7738173007965088,
"ranking_idealized": 0.5541666746139526,
"ranking_idealized_expo": 0.5541666746139526,
"ranking_simple": 0.5541666746139526,
"regularize": 0.7738173007965088,
"step": 840
},
{
"dpo_loss": 0.6469201445579529,
"epoch": 4.7897968823807275,
"grad_norm": 943.5599388069303,
"learning_rate": 4.810916246494157e-09,
"logits": -1.2585629224777222,
"logps": -82.3524398803711,
"loss": 0.7615,
"objective": 0.7981647849082947,
"ranking_idealized": 0.4583333432674408,
"ranking_idealized_expo": 0.4541666805744171,
"ranking_simple": 0.4625000059604645,
"regularize": 0.7981647849082947,
"step": 845
},
{
"dpo_loss": 0.6333919167518616,
"epoch": 4.818138875767596,
"grad_norm": 953.2591490386869,
"learning_rate": 3.5360576633558513e-09,
"logits": -1.2325036525726318,
"logps": -81.02188873291016,
"loss": 0.7765,
"objective": 0.7740827798843384,
"ranking_idealized": 0.5291666388511658,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.5291666388511658,
"regularize": 0.7740827798843384,
"step": 850
},
{
"epoch": 4.818138875767596,
"eval_dpo_loss": 3.091606378555298,
"eval_logits": -1.266028642654419,
"eval_logps": -88.38737487792969,
"eval_loss": 5.932301044464111,
"eval_objective": 5.976984977722168,
"eval_ranking_idealized": 0.5092975497245789,
"eval_ranking_idealized_expo": 0.5092975497245789,
"eval_ranking_simple": 0.5134297609329224,
"eval_regularize": 5.976984977722168,
"eval_runtime": 259.1307,
"eval_samples_per_second": 22.344,
"eval_steps_per_second": 0.934,
"step": 850
},
{
"dpo_loss": 0.7061713337898254,
"epoch": 4.846480869154464,
"grad_norm": 938.2798417262723,
"learning_rate": 2.4564813733932155e-09,
"logits": -1.2008044719696045,
"logps": -81.2861328125,
"loss": 0.7477,
"objective": 0.7695434093475342,
"ranking_idealized": 0.5375000238418579,
"ranking_idealized_expo": 0.5375000238418579,
"ranking_simple": 0.5375000238418579,
"regularize": 0.7695434093475342,
"step": 855
},
{
"dpo_loss": 0.6985270380973816,
"epoch": 4.874822862541333,
"grad_norm": 978.3840453547333,
"learning_rate": 1.5726120240288631e-09,
"logits": -1.1891288757324219,
"logps": -81.2249984741211,
"loss": 0.787,
"objective": 0.8799866437911987,
"ranking_idealized": 0.5541666746139526,
"ranking_idealized_expo": 0.550000011920929,
"ranking_simple": 0.550000011920929,
"regularize": 0.8799866437911987,
"step": 860
},
{
"dpo_loss": 0.6121171116828918,
"epoch": 4.903164855928201,
"grad_norm": 903.3650703276869,
"learning_rate": 8.847972820693051e-10,
"logits": -1.255650281906128,
"logps": -80.4308090209961,
"loss": 0.6987,
"objective": 0.6325153708457947,
"ranking_idealized": 0.4958333373069763,
"ranking_idealized_expo": 0.4958333373069763,
"ranking_simple": 0.4958333373069763,
"regularize": 0.6325153708457947,
"step": 865
},
{
"dpo_loss": 0.6306089162826538,
"epoch": 4.931506849315069,
"grad_norm": 920.898527815901,
"learning_rate": 3.933076969516724e-10,
"logits": -1.288960576057434,
"logps": -81.9923095703125,
"loss": 0.6978,
"objective": 0.6587303876876831,
"ranking_idealized": 0.512499988079071,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.512499988079071,
"regularize": 0.6587303876876831,
"step": 870
},
{
"dpo_loss": 0.72224360704422,
"epoch": 4.959848842701937,
"grad_norm": 973.4560843637579,
"learning_rate": 9.833659432367803e-11,
"logits": -1.2122024297714233,
"logps": -81.87159729003906,
"loss": 0.7712,
"objective": 0.9232720136642456,
"ranking_idealized": 0.5458333492279053,
"ranking_idealized_expo": 0.5458333492279053,
"ranking_simple": 0.550000011920929,
"regularize": 0.9232720136642456,
"step": 875
},
{
"dpo_loss": 0.6564382910728455,
"epoch": 4.988190836088805,
"grad_norm": 950.1802722480554,
"learning_rate": 0.0,
"logits": -1.284375786781311,
"logps": -82.72828674316406,
"loss": 0.7367,
"objective": 0.7989345788955688,
"ranking_idealized": 0.5291666388511658,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.5291666388511658,
"regularize": 0.7989345788955688,
"step": 880
},
{
"epoch": 4.988190836088805,
"step": 880,
"total_flos": 0.0,
"train_loss": 2.940262828902765,
"train_runtime": 35074.095,
"train_samples_per_second": 7.242,
"train_steps_per_second": 0.025
}
],
"logging_steps": 5,
"max_steps": 880,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": false,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}