{ "best_metric": null, "best_model_checkpoint": null, "epoch": 4.988190836088805, "eval_steps": 50, "global_step": 880, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "dpo_loss": 0.6931471824645996, "epoch": 0.005668398677373642, "grad_norm": 1341.3653828621927, "learning_rate": 1.1363636363636363e-08, "logits": -1.3147305250167847, "logps": -88.0877456665039, "loss": 0.4113, "objective": 0.41588976979255676, "ranking_idealized": 0.5208333134651184, "ranking_idealized_expo": 0.5208333134651184, "ranking_simple": 0.5208333134651184, "regularize": 0.41588976979255676, "step": 1 }, { "dpo_loss": 0.6927290558815002, "epoch": 0.02834199338686821, "grad_norm": 1318.7970843939129, "learning_rate": 5.6818181818181815e-08, "logits": -1.367867350578308, "logps": -84.43714141845703, "loss": 0.4128, "objective": 0.3797340393066406, "ranking_idealized": 0.546875, "ranking_idealized_expo": 0.546875, "ranking_simple": 0.546875, "regularize": 0.3797340393066406, "step": 5 }, { "dpo_loss": 0.66615891456604, "epoch": 0.05668398677373642, "grad_norm": 1529.7102214452402, "learning_rate": 1.1363636363636363e-07, "logits": -1.446859359741211, "logps": -83.48344421386719, "loss": 0.4289, "objective": 0.4494988024234772, "ranking_idealized": 0.512499988079071, "ranking_idealized_expo": 0.5083333253860474, "ranking_simple": 0.5083333253860474, "regularize": 0.4494988024234772, "step": 10 }, { "dpo_loss": 0.6892092823982239, "epoch": 0.08502598016060463, "grad_norm": 1398.9545573108187, "learning_rate": 1.7045454545454543e-07, "logits": -1.410345196723938, "logps": -83.83523559570312, "loss": 0.5083, "objective": 0.5087102055549622, "ranking_idealized": 0.5041666626930237, "ranking_idealized_expo": 0.5041666626930237, "ranking_simple": 0.5083333253860474, "regularize": 0.5087102055549622, "step": 15 }, { "dpo_loss": 0.737316906452179, "epoch": 0.11336797354747284, "grad_norm": 1480.9989705702396, "learning_rate": 2.2727272727272726e-07, "logits": -1.397745966911316, "logps": -84.64732360839844, "loss": 0.5834, "objective": 0.6373786330223083, "ranking_idealized": 0.5375000238418579, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.5375000238418579, "regularize": 0.6373786330223083, "step": 20 }, { "dpo_loss": 0.6649187803268433, "epoch": 0.14170996693434104, "grad_norm": 1589.2205546755508, "learning_rate": 2.840909090909091e-07, "logits": -1.4405299425125122, "logps": -84.5063705444336, "loss": 0.7072, "objective": 0.7172243595123291, "ranking_idealized": 0.4958333373069763, "ranking_idealized_expo": 0.4958333373069763, "ranking_simple": 0.4958333373069763, "regularize": 0.7172243595123291, "step": 25 }, { "dpo_loss": 0.7319389581680298, "epoch": 0.17005196032120926, "grad_norm": 1608.2967040181402, "learning_rate": 3.4090909090909085e-07, "logits": -1.4063345193862915, "logps": -85.10441589355469, "loss": 0.9251, "objective": 0.919275164604187, "ranking_idealized": 0.5416666865348816, "ranking_idealized_expo": 0.5416666865348816, "ranking_simple": 0.5416666865348816, "regularize": 0.919275164604187, "step": 30 }, { "dpo_loss": 0.7343574166297913, "epoch": 0.19839395370807747, "grad_norm": 1660.500800571558, "learning_rate": 3.977272727272727e-07, "logits": -1.3780211210250854, "logps": -83.85320281982422, "loss": 1.1082, "objective": 1.1600453853607178, "ranking_idealized": 0.5041666626930237, "ranking_idealized_expo": 0.5041666626930237, "ranking_simple": 0.5, "regularize": 1.1600453853607178, "step": 35 }, { "dpo_loss": 0.7147431969642639, "epoch": 0.22673594709494568, "grad_norm": 1529.1766380039583, "learning_rate": 4.545454545454545e-07, "logits": -1.3637299537658691, "logps": -84.27665710449219, "loss": 1.1991, "objective": 1.0680582523345947, "ranking_idealized": 0.5208333134651184, "ranking_idealized_expo": 0.5208333134651184, "ranking_simple": 0.5208333134651184, "regularize": 1.0680582523345947, "step": 40 }, { "dpo_loss": 0.8464261889457703, "epoch": 0.25507794048181387, "grad_norm": 1547.4803845815225, "learning_rate": 5.113636363636363e-07, "logits": -1.483019471168518, "logps": -85.49544525146484, "loss": 1.4787, "objective": 1.7124279737472534, "ranking_idealized": 0.5416666865348816, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.5416666865348816, "regularize": 1.7124279737472534, "step": 45 }, { "dpo_loss": 0.9592286348342896, "epoch": 0.2834199338686821, "grad_norm": 1190.1245065397072, "learning_rate": 5.681818181818182e-07, "logits": -1.3799251317977905, "logps": -83.56061553955078, "loss": 1.7171, "objective": 1.6765538454055786, "ranking_idealized": 0.42500001192092896, "ranking_idealized_expo": 0.42500001192092896, "ranking_simple": 0.42500001192092896, "regularize": 1.6765538454055786, "step": 50 }, { "epoch": 0.2834199338686821, "eval_dpo_loss": 0.8390654921531677, "eval_logits": -1.3979839086532593, "eval_logps": -91.42163848876953, "eval_loss": 0.9451757073402405, "eval_objective": 0.9804208874702454, "eval_ranking_idealized": 0.5092975497245789, "eval_ranking_idealized_expo": 0.5092975497245789, "eval_ranking_simple": 0.5113636255264282, "eval_regularize": 0.9804208874702454, "eval_runtime": 260.041, "eval_samples_per_second": 22.266, "eval_steps_per_second": 0.931, "step": 50 }, { "dpo_loss": 1.1626336574554443, "epoch": 0.3117619272555503, "grad_norm": 1379.607675790991, "learning_rate": 6.249999999999999e-07, "logits": -1.4113659858703613, "logps": -85.0433578491211, "loss": 1.963, "objective": 2.1214394569396973, "ranking_idealized": 0.5416666865348816, "ranking_idealized_expo": 0.5416666865348816, "ranking_simple": 0.5416666865348816, "regularize": 2.1214394569396973, "step": 55 }, { "dpo_loss": 1.1583107709884644, "epoch": 0.3401039206424185, "grad_norm": 1583.6531529945255, "learning_rate": 6.818181818181817e-07, "logits": -1.387775182723999, "logps": -83.92105102539062, "loss": 2.1311, "objective": 2.187351703643799, "ranking_idealized": 0.5166666507720947, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.5041666626930237, "regularize": 2.187351703643799, "step": 60 }, { "dpo_loss": 1.1024636030197144, "epoch": 0.3684459140292867, "grad_norm": 1400.9200642265332, "learning_rate": 7.386363636363636e-07, "logits": -1.3549267053604126, "logps": -84.21534729003906, "loss": 2.5118, "objective": 2.5817580223083496, "ranking_idealized": 0.5083333253860474, "ranking_idealized_expo": 0.5083333253860474, "ranking_simple": 0.5041666626930237, "regularize": 2.5817580223083496, "step": 65 }, { "dpo_loss": 1.401374340057373, "epoch": 0.39678790741615494, "grad_norm": 1397.7577328959405, "learning_rate": 7.954545454545454e-07, "logits": -1.434369444847107, "logps": -85.42965698242188, "loss": 2.8155, "objective": 2.7807960510253906, "ranking_idealized": 0.5166666507720947, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.5083333253860474, "regularize": 2.7807960510253906, "step": 70 }, { "dpo_loss": 1.3847529888153076, "epoch": 0.42512990080302315, "grad_norm": 1574.992862602621, "learning_rate": 8.522727272727273e-07, "logits": -1.4534552097320557, "logps": -85.975341796875, "loss": 3.0239, "objective": 2.7482104301452637, "ranking_idealized": 0.5458333492279053, "ranking_idealized_expo": 0.5458333492279053, "ranking_simple": 0.5458333492279053, "regularize": 2.7482104301452637, "step": 75 }, { "dpo_loss": 1.6628150939941406, "epoch": 0.45347189418989137, "grad_norm": 1317.6328063663461, "learning_rate": 9.09090909090909e-07, "logits": -1.4975560903549194, "logps": -85.59205627441406, "loss": 3.0523, "objective": 3.2058732509613037, "ranking_idealized": 0.5166666507720947, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.5166666507720947, "regularize": 3.2058732509613037, "step": 80 }, { "dpo_loss": 1.5282264947891235, "epoch": 0.4818138875767596, "grad_norm": 1409.5000185848141, "learning_rate": 9.65909090909091e-07, "logits": -1.4272305965423584, "logps": -83.47660827636719, "loss": 3.6628, "objective": 3.7103846073150635, "ranking_idealized": 0.5249999761581421, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.5208333134651184, "regularize": 3.7103846073150635, "step": 85 }, { "dpo_loss": 2.017771005630493, "epoch": 0.5101558809636277, "grad_norm": 1413.0881700978623, "learning_rate": 9.999842657116664e-07, "logits": -1.2261414527893066, "logps": -84.69164276123047, "loss": 3.9801, "objective": 3.7396154403686523, "ranking_idealized": 0.4791666567325592, "ranking_idealized_expo": 0.4791666567325592, "ranking_simple": 0.4749999940395355, "regularize": 3.7396154403686523, "step": 90 }, { "dpo_loss": 2.145413875579834, "epoch": 0.538497874350496, "grad_norm": 1451.2644391659398, "learning_rate": 9.998072663403656e-07, "logits": -1.3078831434249878, "logps": -83.98829650878906, "loss": 4.1074, "objective": 4.225299835205078, "ranking_idealized": 0.5458333492279053, "ranking_idealized_expo": 0.5416666865348816, "ranking_simple": 0.550000011920929, "regularize": 4.225299835205078, "step": 95 }, { "dpo_loss": 2.2973792552948, "epoch": 0.5668398677373642, "grad_norm": 1458.4336577249642, "learning_rate": 9.99433669591504e-07, "logits": -1.3813899755477905, "logps": -85.42733001708984, "loss": 4.4116, "objective": 4.421018600463867, "ranking_idealized": 0.5416666865348816, "ranking_idealized_expo": 0.5416666865348816, "ranking_simple": 0.5416666865348816, "regularize": 4.421018600463867, "step": 100 }, { "epoch": 0.5668398677373642, "eval_dpo_loss": 1.3937046527862549, "eval_logits": -1.3645607233047485, "eval_logps": -91.35843658447266, "eval_loss": 2.288909912109375, "eval_objective": 2.2847275733947754, "eval_ranking_idealized": 0.5092975497245789, "eval_ranking_idealized_expo": 0.5092975497245789, "eval_ranking_simple": 0.5144628286361694, "eval_regularize": 2.2847275733947754, "eval_runtime": 259.4011, "eval_samples_per_second": 22.321, "eval_steps_per_second": 0.933, "step": 100 }, { "dpo_loss": 2.3384857177734375, "epoch": 0.5951818611242324, "grad_norm": 1377.8614811370987, "learning_rate": 9.988636224180095e-07, "logits": -1.2764217853546143, "logps": -85.17194366455078, "loss": 4.8397, "objective": 4.943901062011719, "ranking_idealized": 0.5375000238418579, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.5541666746139526, "regularize": 4.943901062011719, "step": 105 }, { "dpo_loss": 2.419879198074341, "epoch": 0.6235238545111006, "grad_norm": 1709.2314342435861, "learning_rate": 9.980973490458728e-07, "logits": -1.4455102682113647, "logps": -84.0779037475586, "loss": 4.9241, "objective": 4.433398723602295, "ranking_idealized": 0.4541666805744171, "ranking_idealized_expo": 0.44999998807907104, "ranking_simple": 0.44583332538604736, "regularize": 4.433398723602295, "step": 110 }, { "dpo_loss": 2.3997786045074463, "epoch": 0.6518658478979689, "grad_norm": 1267.7408034843309, "learning_rate": 9.971351508859486e-07, "logits": -1.403380036354065, "logps": -83.38719940185547, "loss": 4.8313, "objective": 4.724060535430908, "ranking_idealized": 0.4833333194255829, "ranking_idealized_expo": 0.4833333194255829, "ranking_simple": 0.48750001192092896, "regularize": 4.724060535430908, "step": 115 }, { "dpo_loss": 2.01283597946167, "epoch": 0.680207841284837, "grad_norm": 1334.5055640243738, "learning_rate": 9.959774064153975e-07, "logits": -1.3471440076828003, "logps": -84.59120178222656, "loss": 4.9092, "objective": 4.763902187347412, "ranking_idealized": 0.5375000238418579, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.5333333611488342, "regularize": 4.763902187347412, "step": 120 }, { "dpo_loss": 2.8711302280426025, "epoch": 0.7085498346717053, "grad_norm": 1217.4260816961178, "learning_rate": 9.94624571028813e-07, "logits": -1.2994908094406128, "logps": -83.49886322021484, "loss": 5.2406, "objective": 5.268767356872559, "ranking_idealized": 0.550000011920929, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.550000011920929, "regularize": 5.268767356872559, "step": 125 }, { "dpo_loss": 2.1492836475372314, "epoch": 0.7368918280585735, "grad_norm": 1181.5936213316104, "learning_rate": 9.930771768590933e-07, "logits": -1.4135076999664307, "logps": -82.80963897705078, "loss": 4.7897, "objective": 4.472428798675537, "ranking_idealized": 0.44999998807907104, "ranking_idealized_expo": 0.44999998807907104, "ranking_simple": 0.44999998807907104, "regularize": 4.472428798675537, "step": 130 }, { "dpo_loss": 2.715928554534912, "epoch": 0.7652338214454416, "grad_norm": 1143.7481041860115, "learning_rate": 9.91335832568129e-07, "logits": -1.387623906135559, "logps": -84.99431610107422, "loss": 5.1988, "objective": 5.63712215423584, "ranking_idealized": 0.5291666388511658, "ranking_idealized_expo": 0.5291666388511658, "ranking_simple": 0.5291666388511658, "regularize": 5.63712215423584, "step": 135 }, { "dpo_loss": 2.748910665512085, "epoch": 0.7935758148323099, "grad_norm": 1147.9091652909822, "learning_rate": 9.894012231073895e-07, "logits": -1.2905735969543457, "logps": -84.26557922363281, "loss": 5.4168, "objective": 5.596283912658691, "ranking_idealized": 0.5041666626930237, "ranking_idealized_expo": 0.5041666626930237, "ranking_simple": 0.512499988079071, "regularize": 5.596283912658691, "step": 140 }, { "dpo_loss": 2.3077232837677, "epoch": 0.821917808219178, "grad_norm": 1202.3074237963756, "learning_rate": 9.872741094484964e-07, "logits": -1.3657087087631226, "logps": -83.96611785888672, "loss": 5.3248, "objective": 4.7676682472229, "ranking_idealized": 0.4833333194255829, "ranking_idealized_expo": 0.4833333194255829, "ranking_simple": 0.48750001192092896, "regularize": 4.7676682472229, "step": 145 }, { "dpo_loss": 2.2885777950286865, "epoch": 0.8502598016060463, "grad_norm": 1261.3028504954063, "learning_rate": 9.849553282839024e-07, "logits": -1.350846767425537, "logps": -82.00806427001953, "loss": 5.641, "objective": 5.300591945648193, "ranking_idealized": 0.5416666865348816, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.5416666865348816, "regularize": 5.300591945648193, "step": 150 }, { "epoch": 0.8502598016060463, "eval_dpo_loss": 1.8989161252975464, "eval_logits": -1.361150860786438, "eval_logps": -89.60128784179688, "eval_loss": 3.659212827682495, "eval_objective": 3.6993324756622314, "eval_ranking_idealized": 0.5092975497245789, "eval_ranking_idealized_expo": 0.5092975497245789, "eval_ranking_simple": 0.5123966932296753, "eval_regularize": 3.6993324756622314, "eval_runtime": 258.9031, "eval_samples_per_second": 22.364, "eval_steps_per_second": 0.935, "step": 150 }, { "dpo_loss": 2.7537026405334473, "epoch": 0.8786017949929145, "grad_norm": 1297.1182521614555, "learning_rate": 9.824457916977784e-07, "logits": -1.358762264251709, "logps": -81.92320251464844, "loss": 5.4151, "objective": 5.554434776306152, "ranking_idealized": 0.512499988079071, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.5249999761581421, "regularize": 5.554434776306152, "step": 155 }, { "dpo_loss": 2.6335387229919434, "epoch": 0.9069437883797827, "grad_norm": 1187.7670981291164, "learning_rate": 9.797464868072486e-07, "logits": -1.2611459493637085, "logps": -83.41938018798828, "loss": 5.5854, "objective": 5.695128917694092, "ranking_idealized": 0.4958333373069763, "ranking_idealized_expo": 0.4958333373069763, "ranking_simple": 0.48750001192092896, "regularize": 5.695128917694092, "step": 160 }, { "dpo_loss": 2.661656618118286, "epoch": 0.9352857817666509, "grad_norm": 1120.1403651445853, "learning_rate": 9.768584753741134e-07, "logits": -1.2767577171325684, "logps": -84.16160583496094, "loss": 5.4701, "objective": 5.190924644470215, "ranking_idealized": 0.5208333134651184, "ranking_idealized_expo": 0.5208333134651184, "ranking_simple": 0.5208333134651184, "regularize": 5.190924644470215, "step": 165 }, { "dpo_loss": 3.1157445907592773, "epoch": 0.9636277751535192, "grad_norm": 1093.9866208390724, "learning_rate": 9.737828933872073e-07, "logits": -1.3006123304367065, "logps": -84.45008087158203, "loss": 5.3421, "objective": 5.613492488861084, "ranking_idealized": 0.512499988079071, "ranking_idealized_expo": 0.5083333253860474, "ranking_simple": 0.5, "regularize": 5.613492488861084, "step": 170 }, { "dpo_loss": 2.3348023891448975, "epoch": 0.9919697685403873, "grad_norm": 1151.3293772515624, "learning_rate": 9.705209506155634e-07, "logits": -1.2408747673034668, "logps": -84.64601135253906, "loss": 5.2759, "objective": 4.945895671844482, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.574999988079071, "ranking_simple": 0.574999988079071, "regularize": 4.945895671844482, "step": 175 }, { "dpo_loss": 2.864588737487793, "epoch": 1.0203117619272555, "grad_norm": 1140.0762636078218, "learning_rate": 9.670739301325534e-07, "logits": -1.3873549699783325, "logps": -83.78386688232422, "loss": 5.4744, "objective": 5.529939651489258, "ranking_idealized": 0.5416666865348816, "ranking_idealized_expo": 0.5416666865348816, "ranking_simple": 0.5375000238418579, "regularize": 5.529939651489258, "step": 180 }, { "dpo_loss": 2.8053672313690186, "epoch": 1.0486537553141237, "grad_norm": 1077.7726094731238, "learning_rate": 9.63443187811197e-07, "logits": -1.2356277704238892, "logps": -84.0350341796875, "loss": 5.2551, "objective": 5.298059463500977, "ranking_idealized": 0.4833333194255829, "ranking_idealized_expo": 0.4833333194255829, "ranking_simple": 0.48750001192092896, "regularize": 5.298059463500977, "step": 185 }, { "dpo_loss": 2.420074462890625, "epoch": 1.076995748700992, "grad_norm": 1190.6128199162288, "learning_rate": 9.596301517908328e-07, "logits": -1.3124566078186035, "logps": -84.89928436279297, "loss": 5.3934, "objective": 5.610664367675781, "ranking_idealized": 0.5541666746139526, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.5458333492279053, "regularize": 5.610664367675781, "step": 190 }, { "dpo_loss": 2.3999674320220947, "epoch": 1.10533774208786, "grad_norm": 1115.8540591706928, "learning_rate": 9.556363219153662e-07, "logits": -1.2911279201507568, "logps": -84.42256927490234, "loss": 5.3254, "objective": 5.321680545806885, "ranking_idealized": 0.5666666626930237, "ranking_idealized_expo": 0.5666666626930237, "ranking_simple": 0.5666666626930237, "regularize": 5.321680545806885, "step": 195 }, { "dpo_loss": 2.4562041759490967, "epoch": 1.1336797354747283, "grad_norm": 1209.8487619605175, "learning_rate": 9.514632691433106e-07, "logits": -1.357124924659729, "logps": -84.73216247558594, "loss": 5.6662, "objective": 5.265989303588867, "ranking_idealized": 0.5083333253860474, "ranking_idealized_expo": 0.5083333253860474, "ranking_simple": 0.5083333253860474, "regularize": 5.265989303588867, "step": 200 }, { "epoch": 1.1336797354747283, "eval_dpo_loss": 2.5621941089630127, "eval_logits": -1.312853455543518, "eval_logps": -91.82030487060547, "eval_loss": 4.9017205238342285, "eval_objective": 5.1433634757995605, "eval_ranking_idealized": 0.5092975497245789, "eval_ranking_idealized_expo": 0.5092975497245789, "eval_ranking_simple": 0.5134297609329224, "eval_regularize": 5.1433634757995605, "eval_runtime": 259.021, "eval_samples_per_second": 22.353, "eval_steps_per_second": 0.934, "step": 200 }, { "dpo_loss": 2.70652174949646, "epoch": 1.1620217288615966, "grad_norm": 1176.9552446858397, "learning_rate": 9.471126349298556e-07, "logits": -1.3222142457962036, "logps": -84.78860473632812, "loss": 5.5162, "objective": 5.902441501617432, "ranking_idealized": 0.48750001192092896, "ranking_idealized_expo": 0.48750001192092896, "ranking_simple": 0.5, "regularize": 5.902441501617432, "step": 205 }, { "dpo_loss": 2.1185081005096436, "epoch": 1.1903637222484649, "grad_norm": 1095.8529561562762, "learning_rate": 9.425861305812081e-07, "logits": -1.302480936050415, "logps": -84.719482421875, "loss": 5.425, "objective": 5.670342922210693, "ranking_idealized": 0.5, "ranking_idealized_expo": 0.4958333373069763, "ranking_simple": 0.5, "regularize": 5.670342922210693, "step": 210 }, { "dpo_loss": 2.6837000846862793, "epoch": 1.2187057156353331, "grad_norm": 1062.9514542837012, "learning_rate": 9.378855365814557e-07, "logits": -1.273558497428894, "logps": -84.21690368652344, "loss": 5.3289, "objective": 5.31361722946167, "ranking_idealized": 0.48750001192092896, "ranking_idealized_expo": 0.4833333194255829, "ranking_simple": 0.48750001192092896, "regularize": 5.31361722946167, "step": 215 }, { "dpo_loss": 2.5074095726013184, "epoch": 1.2470477090222012, "grad_norm": 1053.1396201008674, "learning_rate": 9.330127018922193e-07, "logits": -1.1912199258804321, "logps": -83.59181213378906, "loss": 5.2229, "objective": 5.1353912353515625, "ranking_idealized": 0.6041666865348816, "ranking_idealized_expo": 0.6000000238418579, "ranking_simple": 0.5958333611488342, "regularize": 5.1353912353515625, "step": 220 }, { "dpo_loss": 2.4020681381225586, "epoch": 1.2753897024090695, "grad_norm": 1047.2312940257925, "learning_rate": 9.279695432253708e-07, "logits": -1.2806742191314697, "logps": -84.57674407958984, "loss": 5.1377, "objective": 5.066871643066406, "ranking_idealized": 0.5291666388511658, "ranking_idealized_expo": 0.5291666388511658, "ranking_simple": 0.5166666507720947, "regularize": 5.066871643066406, "step": 225 }, { "dpo_loss": 2.466576337814331, "epoch": 1.3037316957959377, "grad_norm": 1030.5617485584146, "learning_rate": 9.227580442891021e-07, "logits": -1.2669168710708618, "logps": -82.93086242675781, "loss": 4.8601, "objective": 4.961721420288086, "ranking_idealized": 0.5083333253860474, "ranking_idealized_expo": 0.5083333253860474, "ranking_simple": 0.512499988079071, "regularize": 4.961721420288086, "step": 230 }, { "dpo_loss": 2.1171398162841797, "epoch": 1.3320736891828058, "grad_norm": 1061.077062757036, "learning_rate": 9.173802550076401e-07, "logits": -1.3311480283737183, "logps": -81.57727813720703, "loss": 4.9903, "objective": 4.573681354522705, "ranking_idealized": 0.4833333194255829, "ranking_idealized_expo": 0.4833333194255829, "ranking_simple": 0.4958333373069763, "regularize": 4.573681354522705, "step": 235 }, { "dpo_loss": 2.8134536743164062, "epoch": 1.360415682569674, "grad_norm": 970.3779104307249, "learning_rate": 9.118382907149163e-07, "logits": -1.267702579498291, "logps": -83.0981216430664, "loss": 5.0505, "objective": 5.230247974395752, "ranking_idealized": 0.4791666567325592, "ranking_idealized_expo": 0.4791666567325592, "ranking_simple": 0.4791666567325592, "regularize": 5.230247974395752, "step": 240 }, { "dpo_loss": 2.3336708545684814, "epoch": 1.3887576759565423, "grad_norm": 1037.7314557451798, "learning_rate": 9.061343313225087e-07, "logits": -1.2927136421203613, "logps": -82.71648406982422, "loss": 4.97, "objective": 4.810959815979004, "ranking_idealized": 0.5208333134651184, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.5208333134651184, "regularize": 4.810959815979004, "step": 245 }, { "dpo_loss": 1.7325116395950317, "epoch": 1.4170996693434104, "grad_norm": 1022.8469212904224, "learning_rate": 9.002706204621802e-07, "logits": -1.2380987405776978, "logps": -82.20935821533203, "loss": 5.0544, "objective": 4.453593730926514, "ranking_idealized": 0.47083333134651184, "ranking_idealized_expo": 0.47083333134651184, "ranking_simple": 0.47083333134651184, "regularize": 4.453593730926514, "step": 250 }, { "epoch": 1.4170996693434104, "eval_dpo_loss": 2.388360023498535, "eval_logits": -1.2957789897918701, "eval_logps": -89.6596450805664, "eval_loss": 4.645730495452881, "eval_objective": 4.698073387145996, "eval_ranking_idealized": 0.5092975497245789, "eval_ranking_idealized_expo": 0.5092975497245789, "eval_ranking_simple": 0.5092975497245789, "eval_regularize": 4.698073387145996, "eval_runtime": 258.9175, "eval_samples_per_second": 22.362, "eval_steps_per_second": 0.935, "step": 250 }, { "dpo_loss": 2.6409242153167725, "epoch": 1.4454416627302786, "grad_norm": 1045.3372739075203, "learning_rate": 8.942494646033554e-07, "logits": -1.248718500137329, "logps": -83.6023941040039, "loss": 5.3714, "objective": 5.307827949523926, "ranking_idealized": 0.550000011920929, "ranking_idealized_expo": 0.5458333492279053, "ranking_simple": 0.5375000238418579, "regularize": 5.307827949523926, "step": 255 }, { "dpo_loss": 2.64563250541687, "epoch": 1.473783656117147, "grad_norm": 1197.986844648104, "learning_rate": 8.880732321458784e-07, "logits": -1.2879093885421753, "logps": -84.90482330322266, "loss": 5.0662, "objective": 5.523282051086426, "ranking_idealized": 0.48750001192092896, "ranking_idealized_expo": 0.48750001192092896, "ranking_simple": 0.4791666567325592, "regularize": 5.523282051086426, "step": 260 }, { "dpo_loss": 2.4120934009552, "epoch": 1.5021256495040152, "grad_norm": 1129.3043637151231, "learning_rate": 8.817443524884117e-07, "logits": -1.2617005109786987, "logps": -83.68741607666016, "loss": 5.09, "objective": 4.771634578704834, "ranking_idealized": 0.5041666626930237, "ranking_idealized_expo": 0.5041666626930237, "ranking_simple": 0.5, "regularize": 4.771634578704834, "step": 265 }, { "dpo_loss": 3.012559175491333, "epoch": 1.5304676428908834, "grad_norm": 1023.223627517731, "learning_rate": 8.752653150728411e-07, "logits": -1.284982442855835, "logps": -84.35843658447266, "loss": 5.3376, "objective": 5.6197028160095215, "ranking_idealized": 0.5083333253860474, "ranking_idealized_expo": 0.5041666626930237, "ranking_simple": 0.5041666626930237, "regularize": 5.6197028160095215, "step": 270 }, { "dpo_loss": 2.465777635574341, "epoch": 1.5588096362777515, "grad_norm": 1088.952583153439, "learning_rate": 8.68638668405062e-07, "logits": -1.2928842306137085, "logps": -84.76825714111328, "loss": 5.0697, "objective": 4.782662868499756, "ranking_idealized": 0.5249999761581421, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.512499988079071, "regularize": 4.782662868499756, "step": 275 }, { "dpo_loss": 2.743584156036377, "epoch": 1.5871516296646198, "grad_norm": 1199.9945832988806, "learning_rate": 8.61867019052535e-07, "logits": -1.212363362312317, "logps": -82.49223327636719, "loss": 4.971, "objective": 5.158485412597656, "ranking_idealized": 0.5041666626930237, "ranking_idealized_expo": 0.5041666626930237, "ranking_simple": 0.5041666626930237, "regularize": 5.158485412597656, "step": 280 }, { "dpo_loss": 2.19926381111145, "epoch": 1.615493623051488, "grad_norm": 1072.4621792134556, "learning_rate": 8.549530306190014e-07, "logits": -1.3124572038650513, "logps": -84.11770629882812, "loss": 4.7979, "objective": 4.820002555847168, "ranking_idealized": 0.5208333134651184, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.5249999761581421, "regularize": 4.820002555847168, "step": 285 }, { "dpo_loss": 2.2039902210235596, "epoch": 1.643835616438356, "grad_norm": 1037.1720137680302, "learning_rate": 8.478994226967638e-07, "logits": -1.331601619720459, "logps": -82.66283416748047, "loss": 4.7089, "objective": 4.694666385650635, "ranking_idealized": 0.5625, "ranking_idealized_expo": 0.5625, "ranking_simple": 0.5583333373069763, "regularize": 4.694666385650635, "step": 290 }, { "dpo_loss": 2.586439847946167, "epoch": 1.6721776098252243, "grad_norm": 1019.0647957946281, "learning_rate": 8.407089697969456e-07, "logits": -1.2327104806900024, "logps": -81.66007995605469, "loss": 4.8644, "objective": 4.790833473205566, "ranking_idealized": 0.4833333194255829, "ranking_idealized_expo": 0.4791666567325592, "ranking_simple": 0.49166667461395264, "regularize": 4.790833473205566, "step": 295 }, { "dpo_loss": 2.176333427429199, "epoch": 1.7005196032120926, "grad_norm": 1054.8932248051876, "learning_rate": 8.333845002581458e-07, "logits": -1.3139069080352783, "logps": -83.0549087524414, "loss": 4.799, "objective": 4.67409610748291, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.574999988079071, "ranking_simple": 0.5708333253860474, "regularize": 4.67409610748291, "step": 300 }, { "epoch": 1.7005196032120926, "eval_dpo_loss": 2.5370917320251465, "eval_logits": -1.312793254852295, "eval_logps": -89.6458969116211, "eval_loss": 5.069702625274658, "eval_objective": 5.148064136505127, "eval_ranking_idealized": 0.5092975497245789, "eval_ranking_idealized_expo": 0.5092975497245789, "eval_ranking_simple": 0.5113636255264282, "eval_regularize": 5.148064136505127, "eval_runtime": 259.1048, "eval_samples_per_second": 22.346, "eval_steps_per_second": 0.934, "step": 300 }, { "dpo_loss": 2.535557985305786, "epoch": 1.7288615965989607, "grad_norm": 1059.8267179953498, "learning_rate": 8.259288951339232e-07, "logits": -1.2971898317337036, "logps": -83.48696899414062, "loss": 4.7904, "objective": 5.117157459259033, "ranking_idealized": 0.5083333253860474, "ranking_idealized_expo": 0.5041666626930237, "ranking_simple": 0.512499988079071, "regularize": 5.117157459259033, "step": 305 }, { "dpo_loss": 2.743206024169922, "epoch": 1.7572035899858292, "grad_norm": 1062.5530460229647, "learning_rate": 8.183450870595441e-07, "logits": -1.3307418823242188, "logps": -81.59162902832031, "loss": 4.7455, "objective": 4.8372015953063965, "ranking_idealized": 0.5249999761581421, "ranking_idealized_expo": 0.5208333134651184, "ranking_simple": 0.5166666507720947, "regularize": 4.8372015953063965, "step": 310 }, { "dpo_loss": 2.1805214881896973, "epoch": 1.7855455833726972, "grad_norm": 967.1673859822481, "learning_rate": 8.106360590984404e-07, "logits": -1.2439404726028442, "logps": -82.7205810546875, "loss": 4.5159, "objective": 4.586319923400879, "ranking_idealized": 0.5083333253860474, "ranking_idealized_expo": 0.5083333253860474, "ranking_simple": 0.512499988079071, "regularize": 4.586319923400879, "step": 315 }, { "dpo_loss": 2.943455219268799, "epoch": 1.8138875767595655, "grad_norm": 1040.3088755706913, "learning_rate": 8.028048435688333e-07, "logits": -1.3003054857254028, "logps": -83.96493530273438, "loss": 4.8496, "objective": 5.258904933929443, "ranking_idealized": 0.5041666626930237, "ranking_idealized_expo": 0.5041666626930237, "ranking_simple": 0.5083333253860474, "regularize": 5.258904933929443, "step": 320 }, { "dpo_loss": 1.8657586574554443, "epoch": 1.8422295701464337, "grad_norm": 1028.4810077841332, "learning_rate": 7.948545208509811e-07, "logits": -1.3532642126083374, "logps": -85.16321563720703, "loss": 4.6322, "objective": 4.409180164337158, "ranking_idealized": 0.5249999761581421, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.5291666388511658, "regularize": 4.409180164337158, "step": 325 }, { "dpo_loss": 2.035325050354004, "epoch": 1.8705715635333018, "grad_norm": 981.4379644106098, "learning_rate": 7.86788218175523e-07, "logits": -1.124271035194397, "logps": -81.62163543701172, "loss": 4.5108, "objective": 4.548847675323486, "ranking_idealized": 0.4749999940395355, "ranking_idealized_expo": 0.47083333134651184, "ranking_simple": 0.4791666567325592, "regularize": 4.548847675323486, "step": 330 }, { "dpo_loss": 2.2992029190063477, "epoch": 1.89891355692017, "grad_norm": 1060.3744283391652, "learning_rate": 7.786091083933949e-07, "logits": -1.2721890211105347, "logps": -81.50038146972656, "loss": 4.574, "objective": 4.452338218688965, "ranking_idealized": 0.4958333373069763, "ranking_idealized_expo": 0.4958333373069763, "ranking_simple": 0.5041666626930237, "regularize": 4.452338218688965, "step": 335 }, { "dpo_loss": 2.214193820953369, "epoch": 1.9272555503070383, "grad_norm": 1066.9038354372583, "learning_rate": 7.703204087277988e-07, "logits": -1.2717024087905884, "logps": -82.89705657958984, "loss": 4.1923, "objective": 4.533308029174805, "ranking_idealized": 0.5416666865348816, "ranking_idealized_expo": 0.5416666865348816, "ranking_simple": 0.5416666865348816, "regularize": 4.533308029174805, "step": 340 }, { "dpo_loss": 1.864801287651062, "epoch": 1.9555975436939064, "grad_norm": 1068.1959237278584, "learning_rate": 7.619253795087208e-07, "logits": -1.2882568836212158, "logps": -81.01626586914062, "loss": 4.3847, "objective": 4.10886812210083, "ranking_idealized": 0.5333333611488342, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.5333333611488342, "regularize": 4.10886812210083, "step": 345 }, { "dpo_loss": 1.7610963582992554, "epoch": 1.9839395370807746, "grad_norm": 1065.3539167654192, "learning_rate": 7.534273228904915e-07, "logits": -1.2216317653656006, "logps": -82.78074645996094, "loss": 4.3968, "objective": 4.256522178649902, "ranking_idealized": 0.48750001192092896, "ranking_idealized_expo": 0.48750001192092896, "ranking_simple": 0.5, "regularize": 4.256522178649902, "step": 350 }, { "epoch": 1.9839395370807746, "eval_dpo_loss": 2.7970640659332275, "eval_logits": -1.2878926992416382, "eval_logps": -88.54591369628906, "eval_loss": 5.4044508934021, "eval_objective": 5.363577365875244, "eval_ranking_idealized": 0.5092975497245789, "eval_ranking_idealized_expo": 0.5092975497245789, "eval_ranking_simple": 0.5103305578231812, "eval_regularize": 5.363577365875244, "eval_runtime": 258.9945, "eval_samples_per_second": 22.356, "eval_steps_per_second": 0.934, "step": 350 }, { "dpo_loss": 2.0882179737091064, "epoch": 2.012281530467643, "grad_norm": 1012.7784752507871, "learning_rate": 7.448295815528956e-07, "logits": -1.180530309677124, "logps": -82.18423461914062, "loss": 4.222, "objective": 4.049466609954834, "ranking_idealized": 0.512499988079071, "ranking_idealized_expo": 0.5083333253860474, "ranking_simple": 0.49166667461395264, "regularize": 4.049466609954834, "step": 355 }, { "dpo_loss": 2.207961082458496, "epoch": 2.040623523854511, "grad_norm": 991.7225128541588, "learning_rate": 7.361355373863413e-07, "logits": -1.2234774827957153, "logps": -80.75540161132812, "loss": 4.1947, "objective": 4.063844680786133, "ranking_idealized": 0.5333333611488342, "ranking_idealized_expo": 0.5291666388511658, "ranking_simple": 0.5375000238418579, "regularize": 4.063844680786133, "step": 360 }, { "dpo_loss": 1.986746907234192, "epoch": 2.0689655172413794, "grad_norm": 1102.1948086047603, "learning_rate": 7.273486101616056e-07, "logits": -1.2934725284576416, "logps": -83.87660217285156, "loss": 4.2559, "objective": 4.076398849487305, "ranking_idealized": 0.5041666626930237, "ranking_idealized_expo": 0.5041666626930237, "ranking_simple": 0.5208333134651184, "regularize": 4.076398849487305, "step": 365 }, { "dpo_loss": 1.9069340229034424, "epoch": 2.0973075106282475, "grad_norm": 1148.422426893069, "learning_rate": 7.184722561846797e-07, "logits": -1.2647373676300049, "logps": -80.90840911865234, "loss": 3.9351, "objective": 3.6877379417419434, "ranking_idealized": 0.4833333194255829, "ranking_idealized_expo": 0.4833333194255829, "ranking_simple": 0.49166667461395264, "regularize": 3.6877379417419434, "step": 370 }, { "dpo_loss": 1.905211329460144, "epoch": 2.1256495040151155, "grad_norm": 982.12339135048, "learning_rate": 7.095099669372443e-07, "logits": -1.2502344846725464, "logps": -83.15164947509766, "loss": 4.1889, "objective": 3.8048832416534424, "ranking_idealized": 0.5916666388511658, "ranking_idealized_expo": 0.5874999761581421, "ranking_simple": 0.6000000238418579, "regularize": 3.8048832416534424, "step": 375 }, { "dpo_loss": 2.2980270385742188, "epoch": 2.153991497401984, "grad_norm": 1002.9053178903335, "learning_rate": 7.004652677033068e-07, "logits": -1.1937479972839355, "logps": -82.11659240722656, "loss": 4.0113, "objective": 4.026199817657471, "ranking_idealized": 0.5291666388511658, "ranking_idealized_expo": 0.5291666388511658, "ranking_simple": 0.5208333134651184, "regularize": 4.026199817657471, "step": 380 }, { "dpo_loss": 2.430462121963501, "epoch": 2.182333490788852, "grad_norm": 1002.2263691968883, "learning_rate": 6.913417161825449e-07, "logits": -1.2855180501937866, "logps": -84.00334930419922, "loss": 4.1443, "objective": 4.582634925842285, "ranking_idealized": 0.5041666626930237, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5041666626930237, "regularize": 4.582634925842285, "step": 385 }, { "dpo_loss": 2.034668445587158, "epoch": 2.21067548417572, "grad_norm": 1073.0050710345477, "learning_rate": 6.821429010908971e-07, "logits": -1.183647632598877, "logps": -82.46015930175781, "loss": 3.9544, "objective": 3.7511041164398193, "ranking_idealized": 0.4833333194255829, "ranking_idealized_expo": 0.4833333194255829, "ranking_simple": 0.4791666567325592, "regularize": 3.7511041164398193, "step": 390 }, { "dpo_loss": 2.1025197505950928, "epoch": 2.2390174775625886, "grad_norm": 928.4936074724073, "learning_rate": 6.728724407489553e-07, "logits": -1.176824688911438, "logps": -82.696044921875, "loss": 4.0116, "objective": 4.03180456161499, "ranking_idealized": 0.5208333134651184, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.5249999761581421, "regularize": 4.03180456161499, "step": 395 }, { "dpo_loss": 1.8622020483016968, "epoch": 2.2673594709494567, "grad_norm": 950.7422986439022, "learning_rate": 6.635339816587108e-07, "logits": -1.242794394493103, "logps": -83.7727279663086, "loss": 3.8148, "objective": 3.835386037826538, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.574999988079071, "ranking_simple": 0.574999988079071, "regularize": 3.835386037826538, "step": 400 }, { "epoch": 2.2673594709494567, "eval_dpo_loss": 2.9398233890533447, "eval_logits": -1.2679872512817383, "eval_logps": -88.25416564941406, "eval_loss": 5.762566089630127, "eval_objective": 5.820021152496338, "eval_ranking_idealized": 0.5092975497245789, "eval_ranking_idealized_expo": 0.5092975497245789, "eval_ranking_simple": 0.5092975497245789, "eval_regularize": 5.820021152496338, "eval_runtime": 259.3166, "eval_samples_per_second": 22.328, "eval_steps_per_second": 0.933, "step": 400 }, { "dpo_loss": 1.8469058275222778, "epoch": 2.295701464336325, "grad_norm": 963.0280264659817, "learning_rate": 6.541311970692162e-07, "logits": -1.301633358001709, "logps": -81.62474060058594, "loss": 3.8783, "objective": 3.7539381980895996, "ranking_idealized": 0.5583333373069763, "ranking_idealized_expo": 0.5583333373069763, "ranking_simple": 0.5708333253860474, "regularize": 3.7539381980895996, "step": 405 }, { "dpo_loss": 1.6361950635910034, "epoch": 2.324043457723193, "grad_norm": 1074.5619431455382, "learning_rate": 6.446677855317264e-07, "logits": -1.2071573734283447, "logps": -80.61900329589844, "loss": 3.9829, "objective": 3.7228012084960938, "ranking_idealized": 0.4833333194255829, "ranking_idealized_expo": 0.4749999940395355, "ranking_simple": 0.47083333134651184, "regularize": 3.7228012084960938, "step": 410 }, { "dpo_loss": 1.8135488033294678, "epoch": 2.3523854511100613, "grad_norm": 1058.2549464280587, "learning_rate": 6.351474694448864e-07, "logits": -1.2399203777313232, "logps": -81.5149917602539, "loss": 3.748, "objective": 3.9710586071014404, "ranking_idealized": 0.5333333611488342, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.5333333611488342, "regularize": 3.9710586071014404, "step": 415 }, { "dpo_loss": 2.120755434036255, "epoch": 2.3807274444969297, "grad_norm": 1114.426082281688, "learning_rate": 6.255739935905395e-07, "logits": -1.1594359874725342, "logps": -83.35548400878906, "loss": 3.7118, "objective": 3.681281089782715, "ranking_idealized": 0.4749999940395355, "ranking_idealized_expo": 0.47083333134651184, "ranking_simple": 0.4583333432674408, "regularize": 3.681281089782715, "step": 420 }, { "dpo_loss": 1.7397000789642334, "epoch": 2.409069437883798, "grad_norm": 1024.8329703830655, "learning_rate": 6.159511236607315e-07, "logits": -1.2232387065887451, "logps": -81.95706939697266, "loss": 3.5923, "objective": 3.648602247238159, "ranking_idealized": 0.5, "ranking_idealized_expo": 0.4958333373069763, "ranking_simple": 0.5166666507720947, "regularize": 3.648602247238159, "step": 425 }, { "dpo_loss": 1.804638385772705, "epoch": 2.4374114312706663, "grad_norm": 979.544871113056, "learning_rate": 6.062826447764883e-07, "logits": -1.2574443817138672, "logps": -82.60694122314453, "loss": 3.4778, "objective": 3.4914093017578125, "ranking_idealized": 0.5333333611488342, "ranking_idealized_expo": 0.5291666388511658, "ranking_simple": 0.5249999761581421, "regularize": 3.4914093017578125, "step": 430 }, { "dpo_loss": 1.6648954153060913, "epoch": 2.4657534246575343, "grad_norm": 1000.8401446346558, "learning_rate": 5.965723599989528e-07, "logits": -1.300902009010315, "logps": -82.3549575805664, "loss": 3.5217, "objective": 3.345120668411255, "ranking_idealized": 0.5083333253860474, "ranking_idealized_expo": 0.5083333253860474, "ranking_simple": 0.5, "regularize": 3.345120668411255, "step": 435 }, { "dpo_loss": 2.0332047939300537, "epoch": 2.4940954180444024, "grad_norm": 1031.1441408735554, "learning_rate": 5.868240888334652e-07, "logits": -1.2219815254211426, "logps": -82.15070343017578, "loss": 3.7915, "objective": 3.9336984157562256, "ranking_idealized": 0.5375000238418579, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.5333333611488342, "regularize": 3.9336984157562256, "step": 440 }, { "dpo_loss": 2.0011653900146484, "epoch": 2.5224374114312704, "grad_norm": 970.4125487560452, "learning_rate": 5.770416657271728e-07, "logits": -1.2273285388946533, "logps": -80.1659164428711, "loss": 3.4124, "objective": 3.168755292892456, "ranking_idealized": 0.5166666507720947, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.5208333134651184, "regularize": 3.168755292892456, "step": 445 }, { "dpo_loss": 1.613458275794983, "epoch": 2.550779404818139, "grad_norm": 1061.4659212212357, "learning_rate": 5.67228938560766e-07, "logits": -1.2801109552383423, "logps": -81.43175506591797, "loss": 3.4169, "objective": 3.3297555446624756, "ranking_idealized": 0.5, "ranking_idealized_expo": 0.5, "ranking_simple": 0.49166667461395264, "regularize": 3.3297555446624756, "step": 450 }, { "epoch": 2.550779404818139, "eval_dpo_loss": 3.138432502746582, "eval_logits": -1.2897279262542725, "eval_logps": -88.01155853271484, "eval_loss": 5.953909397125244, "eval_objective": 6.106462478637695, "eval_ranking_idealized": 0.5092975497245789, "eval_ranking_idealized_expo": 0.5092975497245789, "eval_ranking_simple": 0.5144628286361694, "eval_regularize": 6.106462478637695, "eval_runtime": 259.8077, "eval_samples_per_second": 22.286, "eval_steps_per_second": 0.931, "step": 450 }, { "dpo_loss": 1.8731194734573364, "epoch": 2.579121398205007, "grad_norm": 1156.0005265988177, "learning_rate": 5.573897671349268e-07, "logits": -1.300366759300232, "logps": -82.42855834960938, "loss": 3.5224, "objective": 3.593839168548584, "ranking_idealized": 0.5041666626930237, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5041666626930237, "regularize": 3.593839168548584, "step": 455 }, { "dpo_loss": 1.952242136001587, "epoch": 2.6074633915918755, "grad_norm": 1095.9802997680708, "learning_rate": 5.475280216520912e-07, "logits": -1.3715617656707764, "logps": -82.51021575927734, "loss": 3.3771, "objective": 3.524890422821045, "ranking_idealized": 0.512499988079071, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.5208333134651184, "regularize": 3.524890422821045, "step": 460 }, { "dpo_loss": 1.5585062503814697, "epoch": 2.6358053849787435, "grad_norm": 935.1251434271745, "learning_rate": 5.376475811941191e-07, "logits": -1.2529839277267456, "logps": -82.1951904296875, "loss": 3.4445, "objective": 3.3441879749298096, "ranking_idealized": 0.4958333373069763, "ranking_idealized_expo": 0.4958333373069763, "ranking_simple": 0.5, "regularize": 3.3441879749298096, "step": 465 }, { "dpo_loss": 1.6525613069534302, "epoch": 2.6641473783656116, "grad_norm": 977.6740437303708, "learning_rate": 5.277523321964701e-07, "logits": -1.2638176679611206, "logps": -81.61119079589844, "loss": 3.3653, "objective": 3.1554312705993652, "ranking_idealized": 0.5541666746139526, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.5625, "regularize": 3.1554312705993652, "step": 470 }, { "dpo_loss": 1.4508802890777588, "epoch": 2.69248937175248, "grad_norm": 993.1353802099517, "learning_rate": 5.178461669194903e-07, "logits": -1.228849172592163, "logps": -81.23489379882812, "loss": 2.9912, "objective": 2.843146562576294, "ranking_idealized": 0.4958333373069763, "ranking_idealized_expo": 0.4958333373069763, "ranking_simple": 0.5, "regularize": 2.843146562576294, "step": 475 }, { "dpo_loss": 1.4309269189834595, "epoch": 2.720831365139348, "grad_norm": 1013.9109275016526, "learning_rate": 5.07932981917404e-07, "logits": -1.3067494630813599, "logps": -80.33541107177734, "loss": 3.1599, "objective": 3.0493505001068115, "ranking_idealized": 0.5, "ranking_idealized_expo": 0.5, "ranking_simple": 0.512499988079071, "regularize": 3.0493505001068115, "step": 480 }, { "dpo_loss": 1.6128212213516235, "epoch": 2.7491733585262166, "grad_norm": 935.3777113198429, "learning_rate": 4.980166765056193e-07, "logits": -1.2488080263137817, "logps": -82.94316101074219, "loss": 3.2342, "objective": 3.1210060119628906, "ranking_idealized": 0.5249999761581421, "ranking_idealized_expo": 0.5208333134651184, "ranking_simple": 0.5166666507720947, "regularize": 3.1210060119628906, "step": 485 }, { "dpo_loss": 1.2609761953353882, "epoch": 2.7775153519130846, "grad_norm": 1021.9995000614038, "learning_rate": 4.881011512269463e-07, "logits": -1.224744439125061, "logps": -80.88065338134766, "loss": 3.0434, "objective": 2.964614152908325, "ranking_idealized": 0.4749999940395355, "ranking_idealized_expo": 0.47083333134651184, "ranking_simple": 0.46666666865348816, "regularize": 2.964614152908325, "step": 490 }, { "dpo_loss": 1.3648375272750854, "epoch": 2.8058573452999527, "grad_norm": 957.4547050967395, "learning_rate": 4.78190306317332e-07, "logits": -1.2405409812927246, "logps": -79.93798065185547, "loss": 2.9176, "objective": 2.8268253803253174, "ranking_idealized": 0.4791666567325592, "ranking_idealized_expo": 0.4791666567325592, "ranking_simple": 0.47083333134651184, "regularize": 2.8268253803253174, "step": 495 }, { "dpo_loss": 1.4600600004196167, "epoch": 2.8341993386868207, "grad_norm": 956.91251387349, "learning_rate": 4.682880401717177e-07, "logits": -1.2869919538497925, "logps": -79.13894653320312, "loss": 2.988, "objective": 2.750535488128662, "ranking_idealized": 0.5249999761581421, "ranking_idealized_expo": 0.5208333134651184, "ranking_simple": 0.5208333134651184, "regularize": 2.750535488128662, "step": 500 }, { "epoch": 2.8341993386868207, "eval_dpo_loss": 3.1318445205688477, "eval_logits": -1.2855963706970215, "eval_logps": -87.95059967041016, "eval_loss": 5.985367298126221, "eval_objective": 6.018334865570068, "eval_ranking_idealized": 0.5092975497245789, "eval_ranking_idealized_expo": 0.5092975497245789, "eval_ranking_simple": 0.5092975497245789, "eval_regularize": 6.018334865570068, "eval_runtime": 259.5234, "eval_samples_per_second": 22.31, "eval_steps_per_second": 0.932, "step": 500 }, { "dpo_loss": 1.2522823810577393, "epoch": 2.862541332073689, "grad_norm": 990.3820386306984, "learning_rate": 4.5839824781061886e-07, "logits": -1.238278865814209, "logps": -80.7296371459961, "loss": 2.9937, "objective": 2.6271092891693115, "ranking_idealized": 0.5208333134651184, "ranking_idealized_expo": 0.5083333253860474, "ranking_simple": 0.512499988079071, "regularize": 2.6271092891693115, "step": 505 }, { "dpo_loss": 1.5270129442214966, "epoch": 2.8908833254605573, "grad_norm": 943.7310548857196, "learning_rate": 4.4852481934803277e-07, "logits": -1.1962122917175293, "logps": -81.28018951416016, "loss": 2.8508, "objective": 3.192195177078247, "ranking_idealized": 0.5291666388511658, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.5416666865348816, "regularize": 3.192195177078247, "step": 510 }, { "dpo_loss": 1.3985689878463745, "epoch": 2.9192253188474258, "grad_norm": 1066.3526544144515, "learning_rate": 4.3867163846127674e-07, "logits": -1.3405725955963135, "logps": -81.18116760253906, "loss": 2.8001, "objective": 2.7043962478637695, "ranking_idealized": 0.4791666567325592, "ranking_idealized_expo": 0.4791666567325592, "ranking_simple": 0.4791666567325592, "regularize": 2.7043962478637695, "step": 515 }, { "dpo_loss": 1.549082636833191, "epoch": 2.947567312234294, "grad_norm": 976.1195004276424, "learning_rate": 4.2884258086335745e-07, "logits": -1.2520852088928223, "logps": -83.24747467041016, "loss": 2.7691, "objective": 2.9670374393463135, "ranking_idealized": 0.512499988079071, "ranking_idealized_expo": 0.5083333253860474, "ranking_simple": 0.512499988079071, "regularize": 2.9670374393463135, "step": 520 }, { "dpo_loss": 1.1514889001846313, "epoch": 2.975909305621162, "grad_norm": 993.1767181389736, "learning_rate": 4.1904151277847305e-07, "logits": -1.2369369268417358, "logps": -82.1258316040039, "loss": 2.7535, "objective": 2.7600042819976807, "ranking_idealized": 0.5666666626930237, "ranking_idealized_expo": 0.5666666626930237, "ranking_simple": 0.574999988079071, "regularize": 2.7600042819976807, "step": 525 }, { "dpo_loss": 1.234760046005249, "epoch": 3.0042512990080303, "grad_norm": 954.3168391295485, "learning_rate": 4.092722894212487e-07, "logits": -1.2384470701217651, "logps": -81.53229522705078, "loss": 2.772, "objective": 2.536973237991333, "ranking_idealized": 0.512499988079071, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.5166666507720947, "regularize": 2.536973237991333, "step": 530 }, { "dpo_loss": 1.2055299282073975, "epoch": 3.0325932923948984, "grad_norm": 1192.8110305376617, "learning_rate": 3.995387534803005e-07, "logits": -1.2691408395767212, "logps": -82.96357727050781, "loss": 2.6555, "objective": 2.663506031036377, "ranking_idealized": 0.5458333492279053, "ranking_idealized_expo": 0.5416666865348816, "ranking_simple": 0.550000011920929, "regularize": 2.663506031036377, "step": 535 }, { "dpo_loss": 1.3903727531433105, "epoch": 3.0609352857817664, "grad_norm": 949.2725322799425, "learning_rate": 3.8984473360672967e-07, "logits": -1.3405746221542358, "logps": -81.4103012084961, "loss": 2.5599, "objective": 2.611825704574585, "ranking_idealized": 0.5375000238418579, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.5291666388511658, "regularize": 2.611825704574585, "step": 540 }, { "dpo_loss": 1.3084332942962646, "epoch": 3.089277279168635, "grad_norm": 999.2271817343484, "learning_rate": 3.801940429081345e-07, "logits": -1.2964000701904297, "logps": -82.18136596679688, "loss": 2.504, "objective": 2.4276323318481445, "ranking_idealized": 0.49166667461395264, "ranking_idealized_expo": 0.49166667461395264, "ranking_simple": 0.4833333194255829, "regularize": 2.4276323318481445, "step": 545 }, { "dpo_loss": 1.0889129638671875, "epoch": 3.117619272555503, "grad_norm": 934.3758046690474, "learning_rate": 3.7059047744873955e-07, "logits": -1.1509153842926025, "logps": -82.78107452392578, "loss": 2.4859, "objective": 2.5204343795776367, "ranking_idealized": 0.5458333492279053, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.5458333492279053, "regularize": 2.5204343795776367, "step": 550 }, { "epoch": 3.117619272555503, "eval_dpo_loss": 3.178955316543579, "eval_logits": -1.2804911136627197, "eval_logps": -88.50298309326172, "eval_loss": 6.194624423980713, "eval_objective": 6.202889919281006, "eval_ranking_idealized": 0.5092975497245789, "eval_ranking_idealized_expo": 0.5092975497245789, "eval_ranking_simple": 0.5103305578231812, "eval_regularize": 6.202889919281006, "eval_runtime": 260.2352, "eval_samples_per_second": 22.249, "eval_steps_per_second": 0.93, "step": 550 }, { "dpo_loss": 1.148443579673767, "epoch": 3.1459612659423715, "grad_norm": 977.893659928589, "learning_rate": 3.6103781475622786e-07, "logits": -1.2158228158950806, "logps": -83.3880844116211, "loss": 2.4258, "objective": 2.2721104621887207, "ranking_idealized": 0.550000011920929, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.5458333492279053, "regularize": 2.2721104621887207, "step": 555 }, { "dpo_loss": 1.2861641645431519, "epoch": 3.1743032593292395, "grad_norm": 912.4395782657139, "learning_rate": 3.5153981233586274e-07, "logits": -1.2103866338729858, "logps": -80.51148223876953, "loss": 2.4037, "objective": 2.3816921710968018, "ranking_idealized": 0.5041666626930237, "ranking_idealized_expo": 0.5041666626930237, "ranking_simple": 0.5041666626930237, "regularize": 2.3816921710968018, "step": 560 }, { "dpo_loss": 1.0393404960632324, "epoch": 3.2026452527161076, "grad_norm": 959.0200932296882, "learning_rate": 3.421002061924876e-07, "logits": -1.262898325920105, "logps": -82.25037384033203, "loss": 2.3058, "objective": 2.267256259918213, "ranking_idealized": 0.5458333492279053, "ranking_idealized_expo": 0.5416666865348816, "ranking_simple": 0.5583333373069763, "regularize": 2.267256259918213, "step": 565 }, { "dpo_loss": 1.2455191612243652, "epoch": 3.230987246102976, "grad_norm": 937.4751934096442, "learning_rate": 3.327227093609824e-07, "logits": -1.1880606412887573, "logps": -81.5679702758789, "loss": 2.2052, "objective": 2.3692660331726074, "ranking_idealized": 0.4791666567325592, "ranking_idealized_expo": 0.4791666567325592, "ranking_simple": 0.4833333194255829, "regularize": 2.3692660331726074, "step": 570 }, { "dpo_loss": 1.228649616241455, "epoch": 3.259329239489844, "grad_norm": 996.5558020139559, "learning_rate": 3.234110104457536e-07, "logits": -1.2556049823760986, "logps": -81.8873519897461, "loss": 2.2515, "objective": 2.29750657081604, "ranking_idealized": 0.5, "ranking_idealized_expo": 0.5, "ranking_simple": 0.49166667461395264, "regularize": 2.29750657081604, "step": 575 }, { "dpo_loss": 1.3365857601165771, "epoch": 3.287671232876712, "grad_norm": 996.2512645672206, "learning_rate": 3.141687721698363e-07, "logits": -1.2736408710479736, "logps": -82.88424682617188, "loss": 2.3155, "objective": 2.366077423095703, "ranking_idealized": 0.5541666746139526, "ranking_idealized_expo": 0.5541666746139526, "ranking_simple": 0.5458333492279053, "regularize": 2.366077423095703, "step": 580 }, { "dpo_loss": 1.1693744659423828, "epoch": 3.3160132262635806, "grad_norm": 990.5330100186713, "learning_rate": 3.049996299341742e-07, "logits": -1.289427638053894, "logps": -83.39188385009766, "loss": 2.1009, "objective": 2.1158106327056885, "ranking_idealized": 0.5333333611488342, "ranking_idealized_expo": 0.5291666388511658, "ranking_simple": 0.5249999761581421, "regularize": 2.1158106327056885, "step": 585 }, { "dpo_loss": 1.1631190776824951, "epoch": 3.3443552196504487, "grad_norm": 986.1485755213372, "learning_rate": 2.959071903876486e-07, "logits": -1.3035333156585693, "logps": -83.53241729736328, "loss": 2.1276, "objective": 2.4290575981140137, "ranking_idealized": 0.49166667461395264, "ranking_idealized_expo": 0.48750001192092896, "ranking_simple": 0.5083333253860474, "regularize": 2.4290575981140137, "step": 590 }, { "dpo_loss": 1.3233096599578857, "epoch": 3.372697213037317, "grad_norm": 930.1393629687922, "learning_rate": 2.86895030008416e-07, "logits": -1.2197285890579224, "logps": -82.90579223632812, "loss": 2.1207, "objective": 2.1894338130950928, "ranking_idealized": 0.5249999761581421, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.5291666388511658, "regularize": 2.1894338130950928, "step": 595 }, { "dpo_loss": 1.007699728012085, "epoch": 3.4010392064241852, "grad_norm": 935.3828921290786, "learning_rate": 2.779666936971129e-07, "logits": -1.230286717414856, "logps": -81.8963851928711, "loss": 2.0539, "objective": 2.0761570930480957, "ranking_idealized": 0.5041666626930237, "ranking_idealized_expo": 0.5041666626930237, "ranking_simple": 0.4958333373069763, "regularize": 2.0761570930480957, "step": 600 }, { "epoch": 3.4010392064241852, "eval_dpo_loss": 3.1110734939575195, "eval_logits": -1.2650625705718994, "eval_logps": -88.16163635253906, "eval_loss": 5.933178901672363, "eval_objective": 6.031818866729736, "eval_ranking_idealized": 0.5092975497245789, "eval_ranking_idealized_expo": 0.5092975497245789, "eval_ranking_simple": 0.5113636255264282, "eval_regularize": 6.031818866729736, "eval_runtime": 258.4717, "eval_samples_per_second": 22.401, "eval_steps_per_second": 0.936, "step": 600 }, { "dpo_loss": 1.11422860622406, "epoch": 3.4293811998110533, "grad_norm": 962.8376859443554, "learning_rate": 2.6912569338248315e-07, "logits": -1.2667381763458252, "logps": -83.77916717529297, "loss": 2.0057, "objective": 1.9590004682540894, "ranking_idealized": 0.5333333611488342, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.5333333611488342, "regularize": 1.9590004682540894, "step": 605 }, { "dpo_loss": 1.1715530157089233, "epoch": 3.4577231931979218, "grad_norm": 934.6534827687339, "learning_rate": 2.603755066399718e-07, "logits": -1.2390044927597046, "logps": -82.3260269165039, "loss": 2.1286, "objective": 2.050309658050537, "ranking_idealized": 0.5208333134651184, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.512499988079071, "regularize": 2.050309658050537, "step": 610 }, { "dpo_loss": 1.1228054761886597, "epoch": 3.48606518658479, "grad_norm": 924.4634252845681, "learning_rate": 2.517195753238345e-07, "logits": -1.2509876489639282, "logps": -82.427001953125, "loss": 1.9336, "objective": 1.9248236417770386, "ranking_idealized": 0.47083333134651184, "ranking_idealized_expo": 0.46666666865348816, "ranking_simple": 0.4625000059604645, "regularize": 1.9248236417770386, "step": 615 }, { "dpo_loss": 1.0055824518203735, "epoch": 3.514407179971658, "grad_norm": 996.235630609532, "learning_rate": 2.4316130421329696e-07, "logits": -1.1457880735397339, "logps": -81.78346252441406, "loss": 1.8569, "objective": 1.8841525316238403, "ranking_idealized": 0.4958333373069763, "ranking_idealized_expo": 0.4958333373069763, "ranking_simple": 0.49166667461395264, "regularize": 1.8841525316238403, "step": 620 }, { "dpo_loss": 1.0400264263153076, "epoch": 3.5427491733585263, "grad_norm": 988.2054187208093, "learning_rate": 2.3470405967329604e-07, "logits": -1.202520489692688, "logps": -81.60346984863281, "loss": 1.8727, "objective": 2.022357702255249, "ranking_idealized": 0.5249999761581421, "ranking_idealized_expo": 0.5208333134651184, "ranking_simple": 0.5166666507720947, "regularize": 2.022357702255249, "step": 625 }, { "dpo_loss": 0.9165257215499878, "epoch": 3.5710911667453944, "grad_norm": 970.3302350643374, "learning_rate": 2.2635116833033392e-07, "logits": -1.2847248315811157, "logps": -81.50933074951172, "loss": 1.8387, "objective": 1.6823768615722656, "ranking_idealized": 0.512499988079071, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.5291666388511658, "regularize": 1.6823768615722656, "step": 630 }, { "dpo_loss": 0.8153350949287415, "epoch": 3.5994331601322624, "grad_norm": 964.0631369889124, "learning_rate": 2.181059157639598e-07, "logits": -1.2249428033828735, "logps": -82.01227569580078, "loss": 1.7136, "objective": 1.8080626726150513, "ranking_idealized": 0.48750001192092896, "ranking_idealized_expo": 0.48750001192092896, "ranking_simple": 0.48750001192092896, "regularize": 1.8080624341964722, "step": 635 }, { "dpo_loss": 0.9864614605903625, "epoch": 3.627775153519131, "grad_norm": 1000.0610851173434, "learning_rate": 2.0997154521440097e-07, "logits": -1.1798667907714844, "logps": -82.78071594238281, "loss": 1.7312, "objective": 1.868208646774292, "ranking_idealized": 0.4625000059604645, "ranking_idealized_expo": 0.4625000059604645, "ranking_simple": 0.46666666865348816, "regularize": 1.868208646774292, "step": 640 }, { "dpo_loss": 1.0488131046295166, "epoch": 3.656117146905999, "grad_norm": 1049.7832510532623, "learning_rate": 2.0195125630684428e-07, "logits": -1.2025647163391113, "logps": -81.37299346923828, "loss": 1.7885, "objective": 1.6037089824676514, "ranking_idealized": 0.5249999761581421, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.5249999761581421, "regularize": 1.6037089824676514, "step": 645 }, { "dpo_loss": 1.022878646850586, "epoch": 3.6844591402928675, "grad_norm": 919.1271980482485, "learning_rate": 1.9404820379287672e-07, "logits": -1.1927168369293213, "logps": -82.80690002441406, "loss": 1.664, "objective": 1.7033976316452026, "ranking_idealized": 0.5041666626930237, "ranking_idealized_expo": 0.5041666626930237, "ranking_simple": 0.5041666626930237, "regularize": 1.7033976316452026, "step": 650 }, { "epoch": 3.6844591402928675, "eval_dpo_loss": 3.096806764602661, "eval_logits": -1.2607561349868774, "eval_logps": -88.69917297363281, "eval_loss": 5.923920631408691, "eval_objective": 5.985104084014893, "eval_ranking_idealized": 0.5092975497245789, "eval_ranking_idealized_expo": 0.5092975497245789, "eval_ranking_simple": 0.5113636255264282, "eval_regularize": 5.985104084014893, "eval_runtime": 258.729, "eval_samples_per_second": 22.379, "eval_steps_per_second": 0.935, "step": 650 }, { "dpo_loss": 0.9369535446166992, "epoch": 3.7128011336797355, "grad_norm": 961.5116821455488, "learning_rate": 1.8626549630957395e-07, "logits": -1.2329158782958984, "logps": -81.57703399658203, "loss": 1.6411, "objective": 1.523728847503662, "ranking_idealized": 0.5208333134651184, "ranking_idealized_expo": 0.5208333134651184, "ranking_simple": 0.5208333134651184, "regularize": 1.523728847503662, "step": 655 }, { "dpo_loss": 0.8511099219322205, "epoch": 3.7411431270666036, "grad_norm": 1089.6596694384646, "learning_rate": 1.7860619515673032e-07, "logits": -1.3094429969787598, "logps": -82.70073699951172, "loss": 1.6123, "objective": 1.475422739982605, "ranking_idealized": 0.5541666746139526, "ranking_idealized_expo": 0.5541666746139526, "ranking_simple": 0.5541666746139526, "regularize": 1.475422739982605, "step": 660 }, { "dpo_loss": 0.9054993987083435, "epoch": 3.769485120453472, "grad_norm": 1093.6896679613712, "learning_rate": 1.7107331309270684e-07, "logits": -1.2157032489776611, "logps": -82.55271911621094, "loss": 1.561, "objective": 1.6129040718078613, "ranking_idealized": 0.5166666507720947, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.512499988079071, "regularize": 1.6129040718078613, "step": 665 }, { "dpo_loss": 0.8416043519973755, "epoch": 3.79782711384034, "grad_norm": 1024.9155399545134, "learning_rate": 1.6366981314937372e-07, "logits": -1.3233702182769775, "logps": -82.39900970458984, "loss": 1.5252, "objective": 1.4049646854400635, "ranking_idealized": 0.5916666388511658, "ranking_idealized_expo": 0.5916666388511658, "ranking_simple": 0.6000000238418579, "regularize": 1.4049646854400635, "step": 670 }, { "dpo_loss": 0.8841171264648438, "epoch": 3.826169107227208, "grad_norm": 971.974542125377, "learning_rate": 1.5639860746661338e-07, "logits": -1.288584589958191, "logps": -81.51100158691406, "loss": 1.5256, "objective": 1.549899935722351, "ranking_idealized": 0.5458333492279053, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.5458333492279053, "regularize": 1.549899935722351, "step": 675 }, { "dpo_loss": 0.9599818587303162, "epoch": 3.8545111006140766, "grad_norm": 930.812509420293, "learning_rate": 1.492625561468393e-07, "logits": -1.2116204500198364, "logps": -81.91743469238281, "loss": 1.4834, "objective": 1.6812348365783691, "ranking_idealized": 0.5083333253860474, "ranking_idealized_expo": 0.5083333253860474, "ranking_simple": 0.5083333253860474, "regularize": 1.6812348365783691, "step": 680 }, { "dpo_loss": 0.9112765192985535, "epoch": 3.8828530940009447, "grad_norm": 1163.329406971879, "learning_rate": 1.4226446612998671e-07, "logits": -1.273834228515625, "logps": -81.50404357910156, "loss": 1.4897, "objective": 1.4795509576797485, "ranking_idealized": 0.5166666507720947, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.512499988079071, "regularize": 1.4795509576797485, "step": 685 }, { "dpo_loss": 0.8565966486930847, "epoch": 3.9111950873878127, "grad_norm": 938.1098675569013, "learning_rate": 1.3540709008941147e-07, "logits": -1.2776970863342285, "logps": -81.18965911865234, "loss": 1.4652, "objective": 1.3701138496398926, "ranking_idealized": 0.5458333492279053, "ranking_idealized_expo": 0.5458333492279053, "ranking_simple": 0.550000011920929, "regularize": 1.3701138496398926, "step": 690 }, { "dpo_loss": 1.0229411125183105, "epoch": 3.9395370807746812, "grad_norm": 1013.5958054606544, "learning_rate": 1.2869312534913685e-07, "logits": -1.191641092300415, "logps": -82.60445404052734, "loss": 1.3376, "objective": 1.4829381704330444, "ranking_idealized": 0.46666666865348816, "ranking_idealized_expo": 0.46666666865348816, "ranking_simple": 0.4625000059604645, "regularize": 1.4829381704330444, "step": 695 }, { "dpo_loss": 0.8406078815460205, "epoch": 3.9678790741615493, "grad_norm": 1063.5432850972618, "learning_rate": 1.2212521282287093e-07, "logits": -1.2275745868682861, "logps": -82.30086517333984, "loss": 1.3502, "objective": 1.3136423826217651, "ranking_idealized": 0.48750001192092896, "ranking_idealized_expo": 0.4833333194255829, "ranking_simple": 0.48750001192092896, "regularize": 1.3136423826217651, "step": 700 }, { "epoch": 3.9678790741615493, "eval_dpo_loss": 3.089524269104004, "eval_logits": -1.2646851539611816, "eval_logps": -88.52361297607422, "eval_loss": 5.917611122131348, "eval_objective": 5.957097053527832, "eval_ranking_idealized": 0.5092975497245789, "eval_ranking_idealized_expo": 0.5092975497245789, "eval_ranking_simple": 0.5134297609329224, "eval_regularize": 5.957097053527832, "eval_runtime": 258.9235, "eval_samples_per_second": 22.362, "eval_steps_per_second": 0.935, "step": 700 }, { "dpo_loss": 0.8173587918281555, "epoch": 3.9962210675484178, "grad_norm": 985.2767504669318, "learning_rate": 1.15705935975212e-07, "logits": -1.1528183221817017, "logps": -80.9789810180664, "loss": 1.3032, "objective": 1.2147972583770752, "ranking_idealized": 0.5291666388511658, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.5291666388511658, "regularize": 1.2147972583770752, "step": 705 }, { "dpo_loss": 0.7131792902946472, "epoch": 4.024563060935286, "grad_norm": 1011.8698807645903, "learning_rate": 1.094378198054533e-07, "logits": -1.2421104907989502, "logps": -82.51912689208984, "loss": 1.1035, "objective": 1.1794158220291138, "ranking_idealized": 0.4791666567325592, "ranking_idealized_expo": 0.4791666567325592, "ranking_simple": 0.4833333194255829, "regularize": 1.1794158220291138, "step": 710 }, { "dpo_loss": 0.7108194231987, "epoch": 4.052905054322154, "grad_norm": 940.6537803599663, "learning_rate": 1.0332332985438247e-07, "logits": -1.1998772621154785, "logps": -81.95320892333984, "loss": 1.0461, "objective": 0.9881690740585327, "ranking_idealized": 0.5375000238418579, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.5333333611488342, "regularize": 0.9881690740585327, "step": 715 }, { "dpo_loss": 0.6712031960487366, "epoch": 4.081247047709022, "grad_norm": 960.2681465991928, "learning_rate": 9.736487123447068e-08, "logits": -1.15168035030365, "logps": -83.81200408935547, "loss": 1.0468, "objective": 1.1416888236999512, "ranking_idealized": 0.5208333134651184, "ranking_idealized_expo": 0.5208333134651184, "ranking_simple": 0.5249999761581421, "regularize": 1.1416888236999512, "step": 720 }, { "dpo_loss": 0.6989570260047913, "epoch": 4.109589041095891, "grad_norm": 935.0432434834537, "learning_rate": 9.156478768383058e-08, "logits": -1.2237892150878906, "logps": -81.69219207763672, "loss": 1.074, "objective": 0.9603613615036011, "ranking_idealized": 0.5291666388511658, "ranking_idealized_expo": 0.5291666388511658, "ranking_simple": 0.5291666388511658, "regularize": 0.9603613615036011, "step": 725 }, { "dpo_loss": 0.7995573878288269, "epoch": 4.137931034482759, "grad_norm": 980.3733885345745, "learning_rate": 8.592536064431466e-08, "logits": -1.2934256792068481, "logps": -81.94860076904297, "loss": 1.0596, "objective": 1.06425142288208, "ranking_idealized": 0.5375000238418579, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.5375000238418579, "regularize": 1.06425142288208, "step": 730 }, { "dpo_loss": 0.7126405835151672, "epoch": 4.166273027869627, "grad_norm": 1007.1386546772866, "learning_rate": 8.044880836411888e-08, "logits": -1.1827558279037476, "logps": -82.89656066894531, "loss": 1.0028, "objective": 0.9806023240089417, "ranking_idealized": 0.5249999761581421, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.5208333134651184, "regularize": 0.9806023240089417, "step": 735 }, { "dpo_loss": 0.7121079564094543, "epoch": 4.194615021256495, "grad_norm": 940.1459069064864, "learning_rate": 7.513728502524286e-08, "logits": -1.2114641666412354, "logps": -82.5162582397461, "loss": 1.0102, "objective": 0.9698477387428284, "ranking_idealized": 0.5208333134651184, "ranking_idealized_expo": 0.5208333134651184, "ranking_simple": 0.5208333134651184, "regularize": 0.9698477387428284, "step": 740 }, { "dpo_loss": 0.7223809957504272, "epoch": 4.222957014643363, "grad_norm": 956.701027117306, "learning_rate": 6.999287989614971e-08, "logits": -1.274079442024231, "logps": -80.06112670898438, "loss": 0.9886, "objective": 1.010962724685669, "ranking_idealized": 0.48750001192092896, "ranking_idealized_expo": 0.48750001192092896, "ranking_simple": 0.4958333373069763, "regularize": 1.010962724685669, "step": 745 }, { "dpo_loss": 0.6834555268287659, "epoch": 4.251299008030231, "grad_norm": 994.5271297786792, "learning_rate": 6.501761650996052e-08, "logits": -1.3588440418243408, "logps": -82.0845947265625, "loss": 1.0052, "objective": 1.0897630453109741, "ranking_idealized": 0.5249999761581421, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.5291666388511658, "regularize": 1.0897630453109741, "step": 750 }, { "epoch": 4.251299008030231, "eval_dpo_loss": 3.103642702102661, "eval_logits": -1.2629750967025757, "eval_logps": -88.36180877685547, "eval_loss": 5.9642486572265625, "eval_objective": 6.00606632232666, "eval_ranking_idealized": 0.5092975497245789, "eval_ranking_idealized_expo": 0.5092975497245789, "eval_ranking_simple": 0.5134297609329224, "eval_regularize": 6.00606632232666, "eval_runtime": 258.8045, "eval_samples_per_second": 22.372, "eval_steps_per_second": 0.935, "step": 750 }, { "dpo_loss": 0.6240705251693726, "epoch": 4.2796410014171, "grad_norm": 928.8030739675138, "learning_rate": 6.021345186850418e-08, "logits": -1.2852017879486084, "logps": -82.10335540771484, "loss": 0.972, "objective": 1.01486337184906, "ranking_idealized": 0.5333333611488342, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.5333333611488342, "regularize": 1.01486337184906, "step": 755 }, { "dpo_loss": 0.7065821886062622, "epoch": 4.307982994803968, "grad_norm": 929.3861266719559, "learning_rate": 5.5582275672538316e-08, "logits": -1.2621345520019531, "logps": -82.05753326416016, "loss": 0.8992, "objective": 0.8614501953125, "ranking_idealized": 0.5666666626930237, "ranking_idealized_expo": 0.5625, "ranking_simple": 0.5666666626930237, "regularize": 0.8614501953125, "step": 760 }, { "dpo_loss": 0.6562435030937195, "epoch": 4.336324988190836, "grad_norm": 977.0057816667661, "learning_rate": 5.112590957844232e-08, "logits": -1.2853424549102783, "logps": -82.94247436523438, "loss": 0.9593, "objective": 0.8854343295097351, "ranking_idealized": 0.5291666388511658, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.5333333611488342, "regularize": 0.8854343295097351, "step": 765 }, { "dpo_loss": 0.7083169221878052, "epoch": 4.364666981577704, "grad_norm": 932.8658870761088, "learning_rate": 4.684610648167503e-08, "logits": -1.1741254329681396, "logps": -81.14856719970703, "loss": 0.9364, "objective": 0.9174091815948486, "ranking_idealized": 0.5083333253860474, "ranking_idealized_expo": 0.5041666626930237, "ranking_simple": 0.5083333253860474, "regularize": 0.9174091815948486, "step": 770 }, { "dpo_loss": 0.6173561811447144, "epoch": 4.393008974964572, "grad_norm": 973.1322872774431, "learning_rate": 4.274454982728032e-08, "logits": -1.2410908937454224, "logps": -81.94947052001953, "loss": 0.8846, "objective": 0.7543167471885681, "ranking_idealized": 0.5083333253860474, "ranking_idealized_expo": 0.5083333253860474, "ranking_simple": 0.5083333253860474, "regularize": 0.7543167471885681, "step": 775 }, { "dpo_loss": 0.6880941390991211, "epoch": 4.42135096835144, "grad_norm": 934.0673376048558, "learning_rate": 3.882285294770937e-08, "logits": -1.2685768604278564, "logps": -80.74879455566406, "loss": 0.8891, "objective": 0.9145664572715759, "ranking_idealized": 0.5041666626930237, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5041666626930237, "regularize": 0.9145664572715759, "step": 780 }, { "dpo_loss": 0.6857554316520691, "epoch": 4.449692961738309, "grad_norm": 906.6964623786657, "learning_rate": 3.508255842822255e-08, "logits": -1.286802887916565, "logps": -80.75093841552734, "loss": 0.8708, "objective": 0.865050196647644, "ranking_idealized": 0.5291666388511658, "ranking_idealized_expo": 0.5291666388511658, "ranking_simple": 0.5249999761581421, "regularize": 0.865050196647644, "step": 785 }, { "dpo_loss": 0.6727093458175659, "epoch": 4.478034955125177, "grad_norm": 954.6542045385403, "learning_rate": 3.15251375001192e-08, "logits": -1.248421549797058, "logps": -83.45836639404297, "loss": 0.8584, "objective": 0.825614869594574, "ranking_idealized": 0.5583333373069763, "ranking_idealized_expo": 0.5541666746139526, "ranking_simple": 0.5541666746139526, "regularize": 0.825614869594574, "step": 790 }, { "dpo_loss": 0.7268415689468384, "epoch": 4.506376948512045, "grad_norm": 940.0246507195186, "learning_rate": 2.8151989462033787e-08, "logits": -1.1517890691757202, "logps": -82.83182525634766, "loss": 0.8256, "objective": 0.8579443693161011, "ranking_idealized": 0.550000011920929, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.550000011920929, "regularize": 0.8579443693161011, "step": 795 }, { "dpo_loss": 0.6633224487304688, "epoch": 4.534718941898913, "grad_norm": 974.8383451116964, "learning_rate": 2.4964441129527335e-08, "logits": -1.148630976676941, "logps": -82.155029296875, "loss": 0.8548, "objective": 0.8071673512458801, "ranking_idealized": 0.512499988079071, "ranking_idealized_expo": 0.5083333253860474, "ranking_simple": 0.512499988079071, "regularize": 0.8071673512458801, "step": 800 }, { "epoch": 4.534718941898913, "eval_dpo_loss": 3.0852909088134766, "eval_logits": -1.2662479877471924, "eval_logps": -88.35336303710938, "eval_loss": 5.92377233505249, "eval_objective": 5.97105073928833, "eval_ranking_idealized": 0.5092975497245789, "eval_ranking_idealized_expo": 0.5092975497245789, "eval_ranking_simple": 0.5134297609329224, "eval_regularize": 5.97105073928833, "eval_runtime": 258.7722, "eval_samples_per_second": 22.375, "eval_steps_per_second": 0.935, "step": 800 }, { "dpo_loss": 0.6257317662239075, "epoch": 4.563060935285781, "grad_norm": 904.8852856060951, "learning_rate": 2.1963746313188757e-08, "logits": -1.2208141088485718, "logps": -81.56495666503906, "loss": 0.8065, "objective": 0.8274087905883789, "ranking_idealized": 0.46666666865348816, "ranking_idealized_expo": 0.4583333432674408, "ranking_simple": 0.46666666865348816, "regularize": 0.8274087905883789, "step": 805 }, { "dpo_loss": 0.6851524710655212, "epoch": 4.59140292867265, "grad_norm": 950.7745711731907, "learning_rate": 1.915108532545351e-08, "logits": -1.284138798713684, "logps": -80.96287536621094, "loss": 0.7829, "objective": 0.8247645497322083, "ranking_idealized": 0.4583333432674408, "ranking_idealized_expo": 0.4583333432674408, "ranking_simple": 0.4625000059604645, "regularize": 0.8247645497322083, "step": 810 }, { "dpo_loss": 0.6184301972389221, "epoch": 4.619744922059518, "grad_norm": 910.9864796470945, "learning_rate": 1.6527564516331638e-08, "logits": -1.1581400632858276, "logps": -82.53880310058594, "loss": 0.7758, "objective": 0.8514427542686462, "ranking_idealized": 0.5916666388511658, "ranking_idealized_expo": 0.5874999761581421, "ranking_simple": 0.5916666388511658, "regularize": 0.8514427542686462, "step": 815 }, { "dpo_loss": 0.7527021765708923, "epoch": 4.648086915446386, "grad_norm": 990.2454056593273, "learning_rate": 1.4094215838229172e-08, "logits": -1.2537620067596436, "logps": -82.07845306396484, "loss": 0.8496, "objective": 0.8979706168174744, "ranking_idealized": 0.5625, "ranking_idealized_expo": 0.5625, "ranking_simple": 0.5583333373069763, "regularize": 0.8979706168174744, "step": 820 }, { "dpo_loss": 0.6094750165939331, "epoch": 4.6764289088332545, "grad_norm": 933.278247470779, "learning_rate": 1.1851996440033318e-08, "logits": -1.1612147092819214, "logps": -80.19402313232422, "loss": 0.7784, "objective": 0.6954202651977539, "ranking_idealized": 0.5, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5, "regularize": 0.6954202651977539, "step": 825 }, { "dpo_loss": 0.7630098462104797, "epoch": 4.7047709022201225, "grad_norm": 1034.6630104905564, "learning_rate": 9.801788290621505e-09, "logits": -1.3119471073150635, "logps": -82.28199768066406, "loss": 0.8393, "objective": 0.9094979166984558, "ranking_idealized": 0.5166666507720947, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.5208333134651184, "regularize": 0.9094979166984558, "step": 830 }, { "dpo_loss": 0.6389487385749817, "epoch": 4.733112895606991, "grad_norm": 919.014869305993, "learning_rate": 7.944397831941951e-09, "logits": -1.2169098854064941, "logps": -81.839111328125, "loss": 0.7883, "objective": 0.7863165140151978, "ranking_idealized": 0.49166667461395264, "ranking_idealized_expo": 0.48750001192092896, "ranking_simple": 0.4958333373069763, "regularize": 0.7863165140151978, "step": 835 }, { "dpo_loss": 0.6490210890769958, "epoch": 4.7614548889938595, "grad_norm": 984.0918792700685, "learning_rate": 6.280555661802856e-09, "logits": -1.2279409170150757, "logps": -82.31600189208984, "loss": 0.7359, "objective": 0.7738173007965088, "ranking_idealized": 0.5541666746139526, "ranking_idealized_expo": 0.5541666746139526, "ranking_simple": 0.5541666746139526, "regularize": 0.7738173007965088, "step": 840 }, { "dpo_loss": 0.6469201445579529, "epoch": 4.7897968823807275, "grad_norm": 943.5599388069303, "learning_rate": 4.810916246494157e-09, "logits": -1.2585629224777222, "logps": -82.3524398803711, "loss": 0.7615, "objective": 0.7981647849082947, "ranking_idealized": 0.4583333432674408, "ranking_idealized_expo": 0.4541666805744171, "ranking_simple": 0.4625000059604645, "regularize": 0.7981647849082947, "step": 845 }, { "dpo_loss": 0.6333919167518616, "epoch": 4.818138875767596, "grad_norm": 953.2591490386869, "learning_rate": 3.5360576633558513e-09, "logits": -1.2325036525726318, "logps": -81.02188873291016, "loss": 0.7765, "objective": 0.7740827798843384, "ranking_idealized": 0.5291666388511658, "ranking_idealized_expo": 0.5291666388511658, "ranking_simple": 0.5291666388511658, "regularize": 0.7740827798843384, "step": 850 }, { "epoch": 4.818138875767596, "eval_dpo_loss": 3.091606378555298, "eval_logits": -1.266028642654419, "eval_logps": -88.38737487792969, "eval_loss": 5.932301044464111, "eval_objective": 5.976984977722168, "eval_ranking_idealized": 0.5092975497245789, "eval_ranking_idealized_expo": 0.5092975497245789, "eval_ranking_simple": 0.5134297609329224, "eval_regularize": 5.976984977722168, "eval_runtime": 259.1307, "eval_samples_per_second": 22.344, "eval_steps_per_second": 0.934, "step": 850 }, { "dpo_loss": 0.7061713337898254, "epoch": 4.846480869154464, "grad_norm": 938.2798417262723, "learning_rate": 2.4564813733932155e-09, "logits": -1.2008044719696045, "logps": -81.2861328125, "loss": 0.7477, "objective": 0.7695434093475342, "ranking_idealized": 0.5375000238418579, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.5375000238418579, "regularize": 0.7695434093475342, "step": 855 }, { "dpo_loss": 0.6985270380973816, "epoch": 4.874822862541333, "grad_norm": 978.3840453547333, "learning_rate": 1.5726120240288631e-09, "logits": -1.1891288757324219, "logps": -81.2249984741211, "loss": 0.787, "objective": 0.8799866437911987, "ranking_idealized": 0.5541666746139526, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.550000011920929, "regularize": 0.8799866437911987, "step": 860 }, { "dpo_loss": 0.6121171116828918, "epoch": 4.903164855928201, "grad_norm": 903.3650703276869, "learning_rate": 8.847972820693051e-10, "logits": -1.255650281906128, "logps": -80.4308090209961, "loss": 0.6987, "objective": 0.6325153708457947, "ranking_idealized": 0.4958333373069763, "ranking_idealized_expo": 0.4958333373069763, "ranking_simple": 0.4958333373069763, "regularize": 0.6325153708457947, "step": 865 }, { "dpo_loss": 0.6306089162826538, "epoch": 4.931506849315069, "grad_norm": 920.898527815901, "learning_rate": 3.933076969516724e-10, "logits": -1.288960576057434, "logps": -81.9923095703125, "loss": 0.6978, "objective": 0.6587303876876831, "ranking_idealized": 0.512499988079071, "ranking_idealized_expo": 0.5083333253860474, "ranking_simple": 0.512499988079071, "regularize": 0.6587303876876831, "step": 870 }, { "dpo_loss": 0.72224360704422, "epoch": 4.959848842701937, "grad_norm": 973.4560843637579, "learning_rate": 9.833659432367803e-11, "logits": -1.2122024297714233, "logps": -81.87159729003906, "loss": 0.7712, "objective": 0.9232720136642456, "ranking_idealized": 0.5458333492279053, "ranking_idealized_expo": 0.5458333492279053, "ranking_simple": 0.550000011920929, "regularize": 0.9232720136642456, "step": 875 }, { "dpo_loss": 0.6564382910728455, "epoch": 4.988190836088805, "grad_norm": 950.1802722480554, "learning_rate": 0.0, "logits": -1.284375786781311, "logps": -82.72828674316406, "loss": 0.7367, "objective": 0.7989345788955688, "ranking_idealized": 0.5291666388511658, "ranking_idealized_expo": 0.5291666388511658, "ranking_simple": 0.5291666388511658, "regularize": 0.7989345788955688, "step": 880 }, { "epoch": 4.988190836088805, "step": 880, "total_flos": 0.0, "train_loss": 2.940262828902765, "train_runtime": 35074.095, "train_samples_per_second": 7.242, "train_steps_per_second": 0.025 } ], "logging_steps": 5, "max_steps": 880, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }