diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,2969 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 4.988190836088805, + "eval_steps": 50, + "global_step": 880, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "dpo_loss": 0.6931471824645996, + "epoch": 0.005668398677373642, + "grad_norm": 1341.3653828621927, + "learning_rate": 1.1363636363636363e-08, + "logits": -1.3147305250167847, + "logps": -88.0877456665039, + "loss": 0.4113, + "objective": 0.41588976979255676, + "ranking_idealized": 0.5208333134651184, + "ranking_idealized_expo": 0.5208333134651184, + "ranking_simple": 0.5208333134651184, + "regularize": 0.41588976979255676, + "step": 1 + }, + { + "dpo_loss": 0.6927290558815002, + "epoch": 0.02834199338686821, + "grad_norm": 1318.7970843939129, + "learning_rate": 5.6818181818181815e-08, + "logits": -1.367867350578308, + "logps": -84.43714141845703, + "loss": 0.4128, + "objective": 0.3797340393066406, + "ranking_idealized": 0.546875, + "ranking_idealized_expo": 0.546875, + "ranking_simple": 0.546875, + "regularize": 0.3797340393066406, + "step": 5 + }, + { + "dpo_loss": 0.66615891456604, + "epoch": 0.05668398677373642, + "grad_norm": 1529.7102214452402, + "learning_rate": 1.1363636363636363e-07, + "logits": -1.446859359741211, + "logps": -83.48344421386719, + "loss": 0.4289, + "objective": 0.4494988024234772, + "ranking_idealized": 0.512499988079071, + "ranking_idealized_expo": 0.5083333253860474, + "ranking_simple": 0.5083333253860474, + "regularize": 0.4494988024234772, + "step": 10 + }, + { + "dpo_loss": 0.6892092823982239, + "epoch": 0.08502598016060463, + "grad_norm": 1398.9545573108187, + "learning_rate": 1.7045454545454543e-07, + "logits": -1.410345196723938, + "logps": -83.83523559570312, + "loss": 0.5083, + "objective": 0.5087102055549622, + "ranking_idealized": 0.5041666626930237, + "ranking_idealized_expo": 0.5041666626930237, + "ranking_simple": 0.5083333253860474, + "regularize": 0.5087102055549622, + "step": 15 + }, + { + "dpo_loss": 0.737316906452179, + "epoch": 0.11336797354747284, + "grad_norm": 1480.9989705702396, + "learning_rate": 2.2727272727272726e-07, + "logits": -1.397745966911316, + "logps": -84.64732360839844, + "loss": 0.5834, + "objective": 0.6373786330223083, + "ranking_idealized": 0.5375000238418579, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.5375000238418579, + "regularize": 0.6373786330223083, + "step": 20 + }, + { + "dpo_loss": 0.6649187803268433, + "epoch": 0.14170996693434104, + "grad_norm": 1589.2205546755508, + "learning_rate": 2.840909090909091e-07, + "logits": -1.4405299425125122, + "logps": -84.5063705444336, + "loss": 0.7072, + "objective": 0.7172243595123291, + "ranking_idealized": 0.4958333373069763, + "ranking_idealized_expo": 0.4958333373069763, + "ranking_simple": 0.4958333373069763, + "regularize": 0.7172243595123291, + "step": 25 + }, + { + "dpo_loss": 0.7319389581680298, + "epoch": 0.17005196032120926, + "grad_norm": 1608.2967040181402, + "learning_rate": 3.4090909090909085e-07, + "logits": -1.4063345193862915, + "logps": -85.10441589355469, + "loss": 0.9251, + "objective": 0.919275164604187, + "ranking_idealized": 0.5416666865348816, + "ranking_idealized_expo": 0.5416666865348816, + "ranking_simple": 0.5416666865348816, + "regularize": 0.919275164604187, + "step": 30 + }, + { + "dpo_loss": 0.7343574166297913, + "epoch": 0.19839395370807747, + "grad_norm": 1660.500800571558, + "learning_rate": 3.977272727272727e-07, + "logits": -1.3780211210250854, + "logps": -83.85320281982422, + "loss": 1.1082, + "objective": 1.1600453853607178, + "ranking_idealized": 0.5041666626930237, + "ranking_idealized_expo": 0.5041666626930237, + "ranking_simple": 0.5, + "regularize": 1.1600453853607178, + "step": 35 + }, + { + "dpo_loss": 0.7147431969642639, + "epoch": 0.22673594709494568, + "grad_norm": 1529.1766380039583, + "learning_rate": 4.545454545454545e-07, + "logits": -1.3637299537658691, + "logps": -84.27665710449219, + "loss": 1.1991, + "objective": 1.0680582523345947, + "ranking_idealized": 0.5208333134651184, + "ranking_idealized_expo": 0.5208333134651184, + "ranking_simple": 0.5208333134651184, + "regularize": 1.0680582523345947, + "step": 40 + }, + { + "dpo_loss": 0.8464261889457703, + "epoch": 0.25507794048181387, + "grad_norm": 1547.4803845815225, + "learning_rate": 5.113636363636363e-07, + "logits": -1.483019471168518, + "logps": -85.49544525146484, + "loss": 1.4787, + "objective": 1.7124279737472534, + "ranking_idealized": 0.5416666865348816, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.5416666865348816, + "regularize": 1.7124279737472534, + "step": 45 + }, + { + "dpo_loss": 0.9592286348342896, + "epoch": 0.2834199338686821, + "grad_norm": 1190.1245065397072, + "learning_rate": 5.681818181818182e-07, + "logits": -1.3799251317977905, + "logps": -83.56061553955078, + "loss": 1.7171, + "objective": 1.6765538454055786, + "ranking_idealized": 0.42500001192092896, + "ranking_idealized_expo": 0.42500001192092896, + "ranking_simple": 0.42500001192092896, + "regularize": 1.6765538454055786, + "step": 50 + }, + { + "epoch": 0.2834199338686821, + "eval_dpo_loss": 0.8390654921531677, + "eval_logits": -1.3979839086532593, + "eval_logps": -91.42163848876953, + "eval_loss": 0.9451757073402405, + "eval_objective": 0.9804208874702454, + "eval_ranking_idealized": 0.5092975497245789, + "eval_ranking_idealized_expo": 0.5092975497245789, + "eval_ranking_simple": 0.5113636255264282, + "eval_regularize": 0.9804208874702454, + "eval_runtime": 260.041, + "eval_samples_per_second": 22.266, + "eval_steps_per_second": 0.931, + "step": 50 + }, + { + "dpo_loss": 1.1626336574554443, + "epoch": 0.3117619272555503, + "grad_norm": 1379.607675790991, + "learning_rate": 6.249999999999999e-07, + "logits": -1.4113659858703613, + "logps": -85.0433578491211, + "loss": 1.963, + "objective": 2.1214394569396973, + "ranking_idealized": 0.5416666865348816, + "ranking_idealized_expo": 0.5416666865348816, + "ranking_simple": 0.5416666865348816, + "regularize": 2.1214394569396973, + "step": 55 + }, + { + "dpo_loss": 1.1583107709884644, + "epoch": 0.3401039206424185, + "grad_norm": 1583.6531529945255, + "learning_rate": 6.818181818181817e-07, + "logits": -1.387775182723999, + "logps": -83.92105102539062, + "loss": 2.1311, + "objective": 2.187351703643799, + "ranking_idealized": 0.5166666507720947, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.5041666626930237, + "regularize": 2.187351703643799, + "step": 60 + }, + { + "dpo_loss": 1.1024636030197144, + "epoch": 0.3684459140292867, + "grad_norm": 1400.9200642265332, + "learning_rate": 7.386363636363636e-07, + "logits": -1.3549267053604126, + "logps": -84.21534729003906, + "loss": 2.5118, + "objective": 2.5817580223083496, + "ranking_idealized": 0.5083333253860474, + "ranking_idealized_expo": 0.5083333253860474, + "ranking_simple": 0.5041666626930237, + "regularize": 2.5817580223083496, + "step": 65 + }, + { + "dpo_loss": 1.401374340057373, + "epoch": 0.39678790741615494, + "grad_norm": 1397.7577328959405, + "learning_rate": 7.954545454545454e-07, + "logits": -1.434369444847107, + "logps": -85.42965698242188, + "loss": 2.8155, + "objective": 2.7807960510253906, + "ranking_idealized": 0.5166666507720947, + "ranking_idealized_expo": 0.5166666507720947, + "ranking_simple": 0.5083333253860474, + "regularize": 2.7807960510253906, + "step": 70 + }, + { + "dpo_loss": 1.3847529888153076, + "epoch": 0.42512990080302315, + "grad_norm": 1574.992862602621, + "learning_rate": 8.522727272727273e-07, + "logits": -1.4534552097320557, + "logps": -85.975341796875, + "loss": 3.0239, + "objective": 2.7482104301452637, + "ranking_idealized": 0.5458333492279053, + "ranking_idealized_expo": 0.5458333492279053, + "ranking_simple": 0.5458333492279053, + "regularize": 2.7482104301452637, + "step": 75 + }, + { + "dpo_loss": 1.6628150939941406, + "epoch": 0.45347189418989137, + "grad_norm": 1317.6328063663461, + "learning_rate": 9.09090909090909e-07, + "logits": -1.4975560903549194, + "logps": -85.59205627441406, + "loss": 3.0523, + "objective": 3.2058732509613037, + "ranking_idealized": 0.5166666507720947, + "ranking_idealized_expo": 0.5166666507720947, + "ranking_simple": 0.5166666507720947, + "regularize": 3.2058732509613037, + "step": 80 + }, + { + "dpo_loss": 1.5282264947891235, + "epoch": 0.4818138875767596, + "grad_norm": 1409.5000185848141, + "learning_rate": 9.65909090909091e-07, + "logits": -1.4272305965423584, + "logps": -83.47660827636719, + "loss": 3.6628, + "objective": 3.7103846073150635, + "ranking_idealized": 0.5249999761581421, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.5208333134651184, + "regularize": 3.7103846073150635, + "step": 85 + }, + { + "dpo_loss": 2.017771005630493, + "epoch": 0.5101558809636277, + "grad_norm": 1413.0881700978623, + "learning_rate": 9.999842657116664e-07, + "logits": -1.2261414527893066, + "logps": -84.69164276123047, + "loss": 3.9801, + "objective": 3.7396154403686523, + "ranking_idealized": 0.4791666567325592, + "ranking_idealized_expo": 0.4791666567325592, + "ranking_simple": 0.4749999940395355, + "regularize": 3.7396154403686523, + "step": 90 + }, + { + "dpo_loss": 2.145413875579834, + "epoch": 0.538497874350496, + "grad_norm": 1451.2644391659398, + "learning_rate": 9.998072663403656e-07, + "logits": -1.3078831434249878, + "logps": -83.98829650878906, + "loss": 4.1074, + "objective": 4.225299835205078, + "ranking_idealized": 0.5458333492279053, + "ranking_idealized_expo": 0.5416666865348816, + "ranking_simple": 0.550000011920929, + "regularize": 4.225299835205078, + "step": 95 + }, + { + "dpo_loss": 2.2973792552948, + "epoch": 0.5668398677373642, + "grad_norm": 1458.4336577249642, + "learning_rate": 9.99433669591504e-07, + "logits": -1.3813899755477905, + "logps": -85.42733001708984, + "loss": 4.4116, + "objective": 4.421018600463867, + "ranking_idealized": 0.5416666865348816, + "ranking_idealized_expo": 0.5416666865348816, + "ranking_simple": 0.5416666865348816, + "regularize": 4.421018600463867, + "step": 100 + }, + { + "epoch": 0.5668398677373642, + "eval_dpo_loss": 1.3937046527862549, + "eval_logits": -1.3645607233047485, + "eval_logps": -91.35843658447266, + "eval_loss": 2.288909912109375, + "eval_objective": 2.2847275733947754, + "eval_ranking_idealized": 0.5092975497245789, + "eval_ranking_idealized_expo": 0.5092975497245789, + "eval_ranking_simple": 0.5144628286361694, + "eval_regularize": 2.2847275733947754, + "eval_runtime": 259.4011, + "eval_samples_per_second": 22.321, + "eval_steps_per_second": 0.933, + "step": 100 + }, + { + "dpo_loss": 2.3384857177734375, + "epoch": 0.5951818611242324, + "grad_norm": 1377.8614811370987, + "learning_rate": 9.988636224180095e-07, + "logits": -1.2764217853546143, + "logps": -85.17194366455078, + "loss": 4.8397, + "objective": 4.943901062011719, + "ranking_idealized": 0.5375000238418579, + "ranking_idealized_expo": 0.5333333611488342, + "ranking_simple": 0.5541666746139526, + "regularize": 4.943901062011719, + "step": 105 + }, + { + "dpo_loss": 2.419879198074341, + "epoch": 0.6235238545111006, + "grad_norm": 1709.2314342435861, + "learning_rate": 9.980973490458728e-07, + "logits": -1.4455102682113647, + "logps": -84.0779037475586, + "loss": 4.9241, + "objective": 4.433398723602295, + "ranking_idealized": 0.4541666805744171, + "ranking_idealized_expo": 0.44999998807907104, + "ranking_simple": 0.44583332538604736, + "regularize": 4.433398723602295, + "step": 110 + }, + { + "dpo_loss": 2.3997786045074463, + "epoch": 0.6518658478979689, + "grad_norm": 1267.7408034843309, + "learning_rate": 9.971351508859486e-07, + "logits": -1.403380036354065, + "logps": -83.38719940185547, + "loss": 4.8313, + "objective": 4.724060535430908, + "ranking_idealized": 0.4833333194255829, + "ranking_idealized_expo": 0.4833333194255829, + "ranking_simple": 0.48750001192092896, + "regularize": 4.724060535430908, + "step": 115 + }, + { + "dpo_loss": 2.01283597946167, + "epoch": 0.680207841284837, + "grad_norm": 1334.5055640243738, + "learning_rate": 9.959774064153975e-07, + "logits": -1.3471440076828003, + "logps": -84.59120178222656, + "loss": 4.9092, + "objective": 4.763902187347412, + "ranking_idealized": 0.5375000238418579, + "ranking_idealized_expo": 0.5333333611488342, + "ranking_simple": 0.5333333611488342, + "regularize": 4.763902187347412, + "step": 120 + }, + { + "dpo_loss": 2.8711302280426025, + "epoch": 0.7085498346717053, + "grad_norm": 1217.4260816961178, + "learning_rate": 9.94624571028813e-07, + "logits": -1.2994908094406128, + "logps": -83.49886322021484, + "loss": 5.2406, + "objective": 5.268767356872559, + "ranking_idealized": 0.550000011920929, + "ranking_idealized_expo": 0.550000011920929, + "ranking_simple": 0.550000011920929, + "regularize": 5.268767356872559, + "step": 125 + }, + { + "dpo_loss": 2.1492836475372314, + "epoch": 0.7368918280585735, + "grad_norm": 1181.5936213316104, + "learning_rate": 9.930771768590933e-07, + "logits": -1.4135076999664307, + "logps": -82.80963897705078, + "loss": 4.7897, + "objective": 4.472428798675537, + "ranking_idealized": 0.44999998807907104, + "ranking_idealized_expo": 0.44999998807907104, + "ranking_simple": 0.44999998807907104, + "regularize": 4.472428798675537, + "step": 130 + }, + { + "dpo_loss": 2.715928554534912, + "epoch": 0.7652338214454416, + "grad_norm": 1143.7481041860115, + "learning_rate": 9.91335832568129e-07, + "logits": -1.387623906135559, + "logps": -84.99431610107422, + "loss": 5.1988, + "objective": 5.63712215423584, + "ranking_idealized": 0.5291666388511658, + "ranking_idealized_expo": 0.5291666388511658, + "ranking_simple": 0.5291666388511658, + "regularize": 5.63712215423584, + "step": 135 + }, + { + "dpo_loss": 2.748910665512085, + "epoch": 0.7935758148323099, + "grad_norm": 1147.9091652909822, + "learning_rate": 9.894012231073895e-07, + "logits": -1.2905735969543457, + "logps": -84.26557922363281, + "loss": 5.4168, + "objective": 5.596283912658691, + "ranking_idealized": 0.5041666626930237, + "ranking_idealized_expo": 0.5041666626930237, + "ranking_simple": 0.512499988079071, + "regularize": 5.596283912658691, + "step": 140 + }, + { + "dpo_loss": 2.3077232837677, + "epoch": 0.821917808219178, + "grad_norm": 1202.3074237963756, + "learning_rate": 9.872741094484964e-07, + "logits": -1.3657087087631226, + "logps": -83.96611785888672, + "loss": 5.3248, + "objective": 4.7676682472229, + "ranking_idealized": 0.4833333194255829, + "ranking_idealized_expo": 0.4833333194255829, + "ranking_simple": 0.48750001192092896, + "regularize": 4.7676682472229, + "step": 145 + }, + { + "dpo_loss": 2.2885777950286865, + "epoch": 0.8502598016060463, + "grad_norm": 1261.3028504954063, + "learning_rate": 9.849553282839024e-07, + "logits": -1.350846767425537, + "logps": -82.00806427001953, + "loss": 5.641, + "objective": 5.300591945648193, + "ranking_idealized": 0.5416666865348816, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.5416666865348816, + "regularize": 5.300591945648193, + "step": 150 + }, + { + "epoch": 0.8502598016060463, + "eval_dpo_loss": 1.8989161252975464, + "eval_logits": -1.361150860786438, + "eval_logps": -89.60128784179688, + "eval_loss": 3.659212827682495, + "eval_objective": 3.6993324756622314, + "eval_ranking_idealized": 0.5092975497245789, + "eval_ranking_idealized_expo": 0.5092975497245789, + "eval_ranking_simple": 0.5123966932296753, + "eval_regularize": 3.6993324756622314, + "eval_runtime": 258.9031, + "eval_samples_per_second": 22.364, + "eval_steps_per_second": 0.935, + "step": 150 + }, + { + "dpo_loss": 2.7537026405334473, + "epoch": 0.8786017949929145, + "grad_norm": 1297.1182521614555, + "learning_rate": 9.824457916977784e-07, + "logits": -1.358762264251709, + "logps": -81.92320251464844, + "loss": 5.4151, + "objective": 5.554434776306152, + "ranking_idealized": 0.512499988079071, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.5249999761581421, + "regularize": 5.554434776306152, + "step": 155 + }, + { + "dpo_loss": 2.6335387229919434, + "epoch": 0.9069437883797827, + "grad_norm": 1187.7670981291164, + "learning_rate": 9.797464868072486e-07, + "logits": -1.2611459493637085, + "logps": -83.41938018798828, + "loss": 5.5854, + "objective": 5.695128917694092, + "ranking_idealized": 0.4958333373069763, + "ranking_idealized_expo": 0.4958333373069763, + "ranking_simple": 0.48750001192092896, + "regularize": 5.695128917694092, + "step": 160 + }, + { + "dpo_loss": 2.661656618118286, + "epoch": 0.9352857817666509, + "grad_norm": 1120.1403651445853, + "learning_rate": 9.768584753741134e-07, + "logits": -1.2767577171325684, + "logps": -84.16160583496094, + "loss": 5.4701, + "objective": 5.190924644470215, + "ranking_idealized": 0.5208333134651184, + "ranking_idealized_expo": 0.5208333134651184, + "ranking_simple": 0.5208333134651184, + "regularize": 5.190924644470215, + "step": 165 + }, + { + "dpo_loss": 3.1157445907592773, + "epoch": 0.9636277751535192, + "grad_norm": 1093.9866208390724, + "learning_rate": 9.737828933872073e-07, + "logits": -1.3006123304367065, + "logps": -84.45008087158203, + "loss": 5.3421, + "objective": 5.613492488861084, + "ranking_idealized": 0.512499988079071, + "ranking_idealized_expo": 0.5083333253860474, + "ranking_simple": 0.5, + "regularize": 5.613492488861084, + "step": 170 + }, + { + "dpo_loss": 2.3348023891448975, + "epoch": 0.9919697685403873, + "grad_norm": 1151.3293772515624, + "learning_rate": 9.705209506155634e-07, + "logits": -1.2408747673034668, + "logps": -84.64601135253906, + "loss": 5.2759, + "objective": 4.945895671844482, + "ranking_idealized": 0.574999988079071, + "ranking_idealized_expo": 0.574999988079071, + "ranking_simple": 0.574999988079071, + "regularize": 4.945895671844482, + "step": 175 + }, + { + "dpo_loss": 2.864588737487793, + "epoch": 1.0203117619272555, + "grad_norm": 1140.0762636078218, + "learning_rate": 9.670739301325534e-07, + "logits": -1.3873549699783325, + "logps": -83.78386688232422, + "loss": 5.4744, + "objective": 5.529939651489258, + "ranking_idealized": 0.5416666865348816, + "ranking_idealized_expo": 0.5416666865348816, + "ranking_simple": 0.5375000238418579, + "regularize": 5.529939651489258, + "step": 180 + }, + { + "dpo_loss": 2.8053672313690186, + "epoch": 1.0486537553141237, + "grad_norm": 1077.7726094731238, + "learning_rate": 9.63443187811197e-07, + "logits": -1.2356277704238892, + "logps": -84.0350341796875, + "loss": 5.2551, + "objective": 5.298059463500977, + "ranking_idealized": 0.4833333194255829, + "ranking_idealized_expo": 0.4833333194255829, + "ranking_simple": 0.48750001192092896, + "regularize": 5.298059463500977, + "step": 185 + }, + { + "dpo_loss": 2.420074462890625, + "epoch": 1.076995748700992, + "grad_norm": 1190.6128199162288, + "learning_rate": 9.596301517908328e-07, + "logits": -1.3124566078186035, + "logps": -84.89928436279297, + "loss": 5.3934, + "objective": 5.610664367675781, + "ranking_idealized": 0.5541666746139526, + "ranking_idealized_expo": 0.550000011920929, + "ranking_simple": 0.5458333492279053, + "regularize": 5.610664367675781, + "step": 190 + }, + { + "dpo_loss": 2.3999674320220947, + "epoch": 1.10533774208786, + "grad_norm": 1115.8540591706928, + "learning_rate": 9.556363219153662e-07, + "logits": -1.2911279201507568, + "logps": -84.42256927490234, + "loss": 5.3254, + "objective": 5.321680545806885, + "ranking_idealized": 0.5666666626930237, + "ranking_idealized_expo": 0.5666666626930237, + "ranking_simple": 0.5666666626930237, + "regularize": 5.321680545806885, + "step": 195 + }, + { + "dpo_loss": 2.4562041759490967, + "epoch": 1.1336797354747283, + "grad_norm": 1209.8487619605175, + "learning_rate": 9.514632691433106e-07, + "logits": -1.357124924659729, + "logps": -84.73216247558594, + "loss": 5.6662, + "objective": 5.265989303588867, + "ranking_idealized": 0.5083333253860474, + "ranking_idealized_expo": 0.5083333253860474, + "ranking_simple": 0.5083333253860474, + "regularize": 5.265989303588867, + "step": 200 + }, + { + "epoch": 1.1336797354747283, + "eval_dpo_loss": 2.5621941089630127, + "eval_logits": -1.312853455543518, + "eval_logps": -91.82030487060547, + "eval_loss": 4.9017205238342285, + "eval_objective": 5.1433634757995605, + "eval_ranking_idealized": 0.5092975497245789, + "eval_ranking_idealized_expo": 0.5092975497245789, + "eval_ranking_simple": 0.5134297609329224, + "eval_regularize": 5.1433634757995605, + "eval_runtime": 259.021, + "eval_samples_per_second": 22.353, + "eval_steps_per_second": 0.934, + "step": 200 + }, + { + "dpo_loss": 2.70652174949646, + "epoch": 1.1620217288615966, + "grad_norm": 1176.9552446858397, + "learning_rate": 9.471126349298556e-07, + "logits": -1.3222142457962036, + "logps": -84.78860473632812, + "loss": 5.5162, + "objective": 5.902441501617432, + "ranking_idealized": 0.48750001192092896, + "ranking_idealized_expo": 0.48750001192092896, + "ranking_simple": 0.5, + "regularize": 5.902441501617432, + "step": 205 + }, + { + "dpo_loss": 2.1185081005096436, + "epoch": 1.1903637222484649, + "grad_norm": 1095.8529561562762, + "learning_rate": 9.425861305812081e-07, + "logits": -1.302480936050415, + "logps": -84.719482421875, + "loss": 5.425, + "objective": 5.670342922210693, + "ranking_idealized": 0.5, + "ranking_idealized_expo": 0.4958333373069763, + "ranking_simple": 0.5, + "regularize": 5.670342922210693, + "step": 210 + }, + { + "dpo_loss": 2.6837000846862793, + "epoch": 1.2187057156353331, + "grad_norm": 1062.9514542837012, + "learning_rate": 9.378855365814557e-07, + "logits": -1.273558497428894, + "logps": -84.21690368652344, + "loss": 5.3289, + "objective": 5.31361722946167, + "ranking_idealized": 0.48750001192092896, + "ranking_idealized_expo": 0.4833333194255829, + "ranking_simple": 0.48750001192092896, + "regularize": 5.31361722946167, + "step": 215 + }, + { + "dpo_loss": 2.5074095726013184, + "epoch": 1.2470477090222012, + "grad_norm": 1053.1396201008674, + "learning_rate": 9.330127018922193e-07, + "logits": -1.1912199258804321, + "logps": -83.59181213378906, + "loss": 5.2229, + "objective": 5.1353912353515625, + "ranking_idealized": 0.6041666865348816, + "ranking_idealized_expo": 0.6000000238418579, + "ranking_simple": 0.5958333611488342, + "regularize": 5.1353912353515625, + "step": 220 + }, + { + "dpo_loss": 2.4020681381225586, + "epoch": 1.2753897024090695, + "grad_norm": 1047.2312940257925, + "learning_rate": 9.279695432253708e-07, + "logits": -1.2806742191314697, + "logps": -84.57674407958984, + "loss": 5.1377, + "objective": 5.066871643066406, + "ranking_idealized": 0.5291666388511658, + "ranking_idealized_expo": 0.5291666388511658, + "ranking_simple": 0.5166666507720947, + "regularize": 5.066871643066406, + "step": 225 + }, + { + "dpo_loss": 2.466576337814331, + "epoch": 1.3037316957959377, + "grad_norm": 1030.5617485584146, + "learning_rate": 9.227580442891021e-07, + "logits": -1.2669168710708618, + "logps": -82.93086242675781, + "loss": 4.8601, + "objective": 4.961721420288086, + "ranking_idealized": 0.5083333253860474, + "ranking_idealized_expo": 0.5083333253860474, + "ranking_simple": 0.512499988079071, + "regularize": 4.961721420288086, + "step": 230 + }, + { + "dpo_loss": 2.1171398162841797, + "epoch": 1.3320736891828058, + "grad_norm": 1061.077062757036, + "learning_rate": 9.173802550076401e-07, + "logits": -1.3311480283737183, + "logps": -81.57727813720703, + "loss": 4.9903, + "objective": 4.573681354522705, + "ranking_idealized": 0.4833333194255829, + "ranking_idealized_expo": 0.4833333194255829, + "ranking_simple": 0.4958333373069763, + "regularize": 4.573681354522705, + "step": 235 + }, + { + "dpo_loss": 2.8134536743164062, + "epoch": 1.360415682569674, + "grad_norm": 970.3779104307249, + "learning_rate": 9.118382907149163e-07, + "logits": -1.267702579498291, + "logps": -83.0981216430664, + "loss": 5.0505, + "objective": 5.230247974395752, + "ranking_idealized": 0.4791666567325592, + "ranking_idealized_expo": 0.4791666567325592, + "ranking_simple": 0.4791666567325592, + "regularize": 5.230247974395752, + "step": 240 + }, + { + "dpo_loss": 2.3336708545684814, + "epoch": 1.3887576759565423, + "grad_norm": 1037.7314557451798, + "learning_rate": 9.061343313225087e-07, + "logits": -1.2927136421203613, + "logps": -82.71648406982422, + "loss": 4.97, + "objective": 4.810959815979004, + "ranking_idealized": 0.5208333134651184, + "ranking_idealized_expo": 0.5166666507720947, + "ranking_simple": 0.5208333134651184, + "regularize": 4.810959815979004, + "step": 245 + }, + { + "dpo_loss": 1.7325116395950317, + "epoch": 1.4170996693434104, + "grad_norm": 1022.8469212904224, + "learning_rate": 9.002706204621802e-07, + "logits": -1.2380987405776978, + "logps": -82.20935821533203, + "loss": 5.0544, + "objective": 4.453593730926514, + "ranking_idealized": 0.47083333134651184, + "ranking_idealized_expo": 0.47083333134651184, + "ranking_simple": 0.47083333134651184, + "regularize": 4.453593730926514, + "step": 250 + }, + { + "epoch": 1.4170996693434104, + "eval_dpo_loss": 2.388360023498535, + "eval_logits": -1.2957789897918701, + "eval_logps": -89.6596450805664, + "eval_loss": 4.645730495452881, + "eval_objective": 4.698073387145996, + "eval_ranking_idealized": 0.5092975497245789, + "eval_ranking_idealized_expo": 0.5092975497245789, + "eval_ranking_simple": 0.5092975497245789, + "eval_regularize": 4.698073387145996, + "eval_runtime": 258.9175, + "eval_samples_per_second": 22.362, + "eval_steps_per_second": 0.935, + "step": 250 + }, + { + "dpo_loss": 2.6409242153167725, + "epoch": 1.4454416627302786, + "grad_norm": 1045.3372739075203, + "learning_rate": 8.942494646033554e-07, + "logits": -1.248718500137329, + "logps": -83.6023941040039, + "loss": 5.3714, + "objective": 5.307827949523926, + "ranking_idealized": 0.550000011920929, + "ranking_idealized_expo": 0.5458333492279053, + "ranking_simple": 0.5375000238418579, + "regularize": 5.307827949523926, + "step": 255 + }, + { + "dpo_loss": 2.64563250541687, + "epoch": 1.473783656117147, + "grad_norm": 1197.986844648104, + "learning_rate": 8.880732321458784e-07, + "logits": -1.2879093885421753, + "logps": -84.90482330322266, + "loss": 5.0662, + "objective": 5.523282051086426, + "ranking_idealized": 0.48750001192092896, + "ranking_idealized_expo": 0.48750001192092896, + "ranking_simple": 0.4791666567325592, + "regularize": 5.523282051086426, + "step": 260 + }, + { + "dpo_loss": 2.4120934009552, + "epoch": 1.5021256495040152, + "grad_norm": 1129.3043637151231, + "learning_rate": 8.817443524884117e-07, + "logits": -1.2617005109786987, + "logps": -83.68741607666016, + "loss": 5.09, + "objective": 4.771634578704834, + "ranking_idealized": 0.5041666626930237, + "ranking_idealized_expo": 0.5041666626930237, + "ranking_simple": 0.5, + "regularize": 4.771634578704834, + "step": 265 + }, + { + "dpo_loss": 3.012559175491333, + "epoch": 1.5304676428908834, + "grad_norm": 1023.223627517731, + "learning_rate": 8.752653150728411e-07, + "logits": -1.284982442855835, + "logps": -84.35843658447266, + "loss": 5.3376, + "objective": 5.6197028160095215, + "ranking_idealized": 0.5083333253860474, + "ranking_idealized_expo": 0.5041666626930237, + "ranking_simple": 0.5041666626930237, + "regularize": 5.6197028160095215, + "step": 270 + }, + { + "dpo_loss": 2.465777635574341, + "epoch": 1.5588096362777515, + "grad_norm": 1088.952583153439, + "learning_rate": 8.68638668405062e-07, + "logits": -1.2928842306137085, + "logps": -84.76825714111328, + "loss": 5.0697, + "objective": 4.782662868499756, + "ranking_idealized": 0.5249999761581421, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.512499988079071, + "regularize": 4.782662868499756, + "step": 275 + }, + { + "dpo_loss": 2.743584156036377, + "epoch": 1.5871516296646198, + "grad_norm": 1199.9945832988806, + "learning_rate": 8.61867019052535e-07, + "logits": -1.212363362312317, + "logps": -82.49223327636719, + "loss": 4.971, + "objective": 5.158485412597656, + "ranking_idealized": 0.5041666626930237, + "ranking_idealized_expo": 0.5041666626930237, + "ranking_simple": 0.5041666626930237, + "regularize": 5.158485412597656, + "step": 280 + }, + { + "dpo_loss": 2.19926381111145, + "epoch": 1.615493623051488, + "grad_norm": 1072.4621792134556, + "learning_rate": 8.549530306190014e-07, + "logits": -1.3124572038650513, + "logps": -84.11770629882812, + "loss": 4.7979, + "objective": 4.820002555847168, + "ranking_idealized": 0.5208333134651184, + "ranking_idealized_expo": 0.5166666507720947, + "ranking_simple": 0.5249999761581421, + "regularize": 4.820002555847168, + "step": 285 + }, + { + "dpo_loss": 2.2039902210235596, + "epoch": 1.643835616438356, + "grad_norm": 1037.1720137680302, + "learning_rate": 8.478994226967638e-07, + "logits": -1.331601619720459, + "logps": -82.66283416748047, + "loss": 4.7089, + "objective": 4.694666385650635, + "ranking_idealized": 0.5625, + "ranking_idealized_expo": 0.5625, + "ranking_simple": 0.5583333373069763, + "regularize": 4.694666385650635, + "step": 290 + }, + { + "dpo_loss": 2.586439847946167, + "epoch": 1.6721776098252243, + "grad_norm": 1019.0647957946281, + "learning_rate": 8.407089697969456e-07, + "logits": -1.2327104806900024, + "logps": -81.66007995605469, + "loss": 4.8644, + "objective": 4.790833473205566, + "ranking_idealized": 0.4833333194255829, + "ranking_idealized_expo": 0.4791666567325592, + "ranking_simple": 0.49166667461395264, + "regularize": 4.790833473205566, + "step": 295 + }, + { + "dpo_loss": 2.176333427429199, + "epoch": 1.7005196032120926, + "grad_norm": 1054.8932248051876, + "learning_rate": 8.333845002581458e-07, + "logits": -1.3139069080352783, + "logps": -83.0549087524414, + "loss": 4.799, + "objective": 4.67409610748291, + "ranking_idealized": 0.574999988079071, + "ranking_idealized_expo": 0.574999988079071, + "ranking_simple": 0.5708333253860474, + "regularize": 4.67409610748291, + "step": 300 + }, + { + "epoch": 1.7005196032120926, + "eval_dpo_loss": 2.5370917320251465, + "eval_logits": -1.312793254852295, + "eval_logps": -89.6458969116211, + "eval_loss": 5.069702625274658, + "eval_objective": 5.148064136505127, + "eval_ranking_idealized": 0.5092975497245789, + "eval_ranking_idealized_expo": 0.5092975497245789, + "eval_ranking_simple": 0.5113636255264282, + "eval_regularize": 5.148064136505127, + "eval_runtime": 259.1048, + "eval_samples_per_second": 22.346, + "eval_steps_per_second": 0.934, + "step": 300 + }, + { + "dpo_loss": 2.535557985305786, + "epoch": 1.7288615965989607, + "grad_norm": 1059.8267179953498, + "learning_rate": 8.259288951339232e-07, + "logits": -1.2971898317337036, + "logps": -83.48696899414062, + "loss": 4.7904, + "objective": 5.117157459259033, + "ranking_idealized": 0.5083333253860474, + "ranking_idealized_expo": 0.5041666626930237, + "ranking_simple": 0.512499988079071, + "regularize": 5.117157459259033, + "step": 305 + }, + { + "dpo_loss": 2.743206024169922, + "epoch": 1.7572035899858292, + "grad_norm": 1062.5530460229647, + "learning_rate": 8.183450870595441e-07, + "logits": -1.3307418823242188, + "logps": -81.59162902832031, + "loss": 4.7455, + "objective": 4.8372015953063965, + "ranking_idealized": 0.5249999761581421, + "ranking_idealized_expo": 0.5208333134651184, + "ranking_simple": 0.5166666507720947, + "regularize": 4.8372015953063965, + "step": 310 + }, + { + "dpo_loss": 2.1805214881896973, + "epoch": 1.7855455833726972, + "grad_norm": 967.1673859822481, + "learning_rate": 8.106360590984404e-07, + "logits": -1.2439404726028442, + "logps": -82.7205810546875, + "loss": 4.5159, + "objective": 4.586319923400879, + "ranking_idealized": 0.5083333253860474, + "ranking_idealized_expo": 0.5083333253860474, + "ranking_simple": 0.512499988079071, + "regularize": 4.586319923400879, + "step": 315 + }, + { + "dpo_loss": 2.943455219268799, + "epoch": 1.8138875767595655, + "grad_norm": 1040.3088755706913, + "learning_rate": 8.028048435688333e-07, + "logits": -1.3003054857254028, + "logps": -83.96493530273438, + "loss": 4.8496, + "objective": 5.258904933929443, + "ranking_idealized": 0.5041666626930237, + "ranking_idealized_expo": 0.5041666626930237, + "ranking_simple": 0.5083333253860474, + "regularize": 5.258904933929443, + "step": 320 + }, + { + "dpo_loss": 1.8657586574554443, + "epoch": 1.8422295701464337, + "grad_norm": 1028.4810077841332, + "learning_rate": 7.948545208509811e-07, + "logits": -1.3532642126083374, + "logps": -85.16321563720703, + "loss": 4.6322, + "objective": 4.409180164337158, + "ranking_idealized": 0.5249999761581421, + "ranking_idealized_expo": 0.5166666507720947, + "ranking_simple": 0.5291666388511658, + "regularize": 4.409180164337158, + "step": 325 + }, + { + "dpo_loss": 2.035325050354004, + "epoch": 1.8705715635333018, + "grad_norm": 981.4379644106098, + "learning_rate": 7.86788218175523e-07, + "logits": -1.124271035194397, + "logps": -81.62163543701172, + "loss": 4.5108, + "objective": 4.548847675323486, + "ranking_idealized": 0.4749999940395355, + "ranking_idealized_expo": 0.47083333134651184, + "ranking_simple": 0.4791666567325592, + "regularize": 4.548847675323486, + "step": 330 + }, + { + "dpo_loss": 2.2992029190063477, + "epoch": 1.89891355692017, + "grad_norm": 1060.3744283391652, + "learning_rate": 7.786091083933949e-07, + "logits": -1.2721890211105347, + "logps": -81.50038146972656, + "loss": 4.574, + "objective": 4.452338218688965, + "ranking_idealized": 0.4958333373069763, + "ranking_idealized_expo": 0.4958333373069763, + "ranking_simple": 0.5041666626930237, + "regularize": 4.452338218688965, + "step": 335 + }, + { + "dpo_loss": 2.214193820953369, + "epoch": 1.9272555503070383, + "grad_norm": 1066.9038354372583, + "learning_rate": 7.703204087277988e-07, + "logits": -1.2717024087905884, + "logps": -82.89705657958984, + "loss": 4.1923, + "objective": 4.533308029174805, + "ranking_idealized": 0.5416666865348816, + "ranking_idealized_expo": 0.5416666865348816, + "ranking_simple": 0.5416666865348816, + "regularize": 4.533308029174805, + "step": 340 + }, + { + "dpo_loss": 1.864801287651062, + "epoch": 1.9555975436939064, + "grad_norm": 1068.1959237278584, + "learning_rate": 7.619253795087208e-07, + "logits": -1.2882568836212158, + "logps": -81.01626586914062, + "loss": 4.3847, + "objective": 4.10886812210083, + "ranking_idealized": 0.5333333611488342, + "ranking_idealized_expo": 0.5333333611488342, + "ranking_simple": 0.5333333611488342, + "regularize": 4.10886812210083, + "step": 345 + }, + { + "dpo_loss": 1.7610963582992554, + "epoch": 1.9839395370807746, + "grad_norm": 1065.3539167654192, + "learning_rate": 7.534273228904915e-07, + "logits": -1.2216317653656006, + "logps": -82.78074645996094, + "loss": 4.3968, + "objective": 4.256522178649902, + "ranking_idealized": 0.48750001192092896, + "ranking_idealized_expo": 0.48750001192092896, + "ranking_simple": 0.5, + "regularize": 4.256522178649902, + "step": 350 + }, + { + "epoch": 1.9839395370807746, + "eval_dpo_loss": 2.7970640659332275, + "eval_logits": -1.2878926992416382, + "eval_logps": -88.54591369628906, + "eval_loss": 5.4044508934021, + "eval_objective": 5.363577365875244, + "eval_ranking_idealized": 0.5092975497245789, + "eval_ranking_idealized_expo": 0.5092975497245789, + "eval_ranking_simple": 0.5103305578231812, + "eval_regularize": 5.363577365875244, + "eval_runtime": 258.9945, + "eval_samples_per_second": 22.356, + "eval_steps_per_second": 0.934, + "step": 350 + }, + { + "dpo_loss": 2.0882179737091064, + "epoch": 2.012281530467643, + "grad_norm": 1012.7784752507871, + "learning_rate": 7.448295815528956e-07, + "logits": -1.180530309677124, + "logps": -82.18423461914062, + "loss": 4.222, + "objective": 4.049466609954834, + "ranking_idealized": 0.512499988079071, + "ranking_idealized_expo": 0.5083333253860474, + "ranking_simple": 0.49166667461395264, + "regularize": 4.049466609954834, + "step": 355 + }, + { + "dpo_loss": 2.207961082458496, + "epoch": 2.040623523854511, + "grad_norm": 991.7225128541588, + "learning_rate": 7.361355373863413e-07, + "logits": -1.2234774827957153, + "logps": -80.75540161132812, + "loss": 4.1947, + "objective": 4.063844680786133, + "ranking_idealized": 0.5333333611488342, + "ranking_idealized_expo": 0.5291666388511658, + "ranking_simple": 0.5375000238418579, + "regularize": 4.063844680786133, + "step": 360 + }, + { + "dpo_loss": 1.986746907234192, + "epoch": 2.0689655172413794, + "grad_norm": 1102.1948086047603, + "learning_rate": 7.273486101616056e-07, + "logits": -1.2934725284576416, + "logps": -83.87660217285156, + "loss": 4.2559, + "objective": 4.076398849487305, + "ranking_idealized": 0.5041666626930237, + "ranking_idealized_expo": 0.5041666626930237, + "ranking_simple": 0.5208333134651184, + "regularize": 4.076398849487305, + "step": 365 + }, + { + "dpo_loss": 1.9069340229034424, + "epoch": 2.0973075106282475, + "grad_norm": 1148.422426893069, + "learning_rate": 7.184722561846797e-07, + "logits": -1.2647373676300049, + "logps": -80.90840911865234, + "loss": 3.9351, + "objective": 3.6877379417419434, + "ranking_idealized": 0.4833333194255829, + "ranking_idealized_expo": 0.4833333194255829, + "ranking_simple": 0.49166667461395264, + "regularize": 3.6877379417419434, + "step": 370 + }, + { + "dpo_loss": 1.905211329460144, + "epoch": 2.1256495040151155, + "grad_norm": 982.12339135048, + "learning_rate": 7.095099669372443e-07, + "logits": -1.2502344846725464, + "logps": -83.15164947509766, + "loss": 4.1889, + "objective": 3.8048832416534424, + "ranking_idealized": 0.5916666388511658, + "ranking_idealized_expo": 0.5874999761581421, + "ranking_simple": 0.6000000238418579, + "regularize": 3.8048832416534424, + "step": 375 + }, + { + "dpo_loss": 2.2980270385742188, + "epoch": 2.153991497401984, + "grad_norm": 1002.9053178903335, + "learning_rate": 7.004652677033068e-07, + "logits": -1.1937479972839355, + "logps": -82.11659240722656, + "loss": 4.0113, + "objective": 4.026199817657471, + "ranking_idealized": 0.5291666388511658, + "ranking_idealized_expo": 0.5291666388511658, + "ranking_simple": 0.5208333134651184, + "regularize": 4.026199817657471, + "step": 380 + }, + { + "dpo_loss": 2.430462121963501, + "epoch": 2.182333490788852, + "grad_norm": 1002.2263691968883, + "learning_rate": 6.913417161825449e-07, + "logits": -1.2855180501937866, + "logps": -84.00334930419922, + "loss": 4.1443, + "objective": 4.582634925842285, + "ranking_idealized": 0.5041666626930237, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.5041666626930237, + "regularize": 4.582634925842285, + "step": 385 + }, + { + "dpo_loss": 2.034668445587158, + "epoch": 2.21067548417572, + "grad_norm": 1073.0050710345477, + "learning_rate": 6.821429010908971e-07, + "logits": -1.183647632598877, + "logps": -82.46015930175781, + "loss": 3.9544, + "objective": 3.7511041164398193, + "ranking_idealized": 0.4833333194255829, + "ranking_idealized_expo": 0.4833333194255829, + "ranking_simple": 0.4791666567325592, + "regularize": 3.7511041164398193, + "step": 390 + }, + { + "dpo_loss": 2.1025197505950928, + "epoch": 2.2390174775625886, + "grad_norm": 928.4936074724073, + "learning_rate": 6.728724407489553e-07, + "logits": -1.176824688911438, + "logps": -82.696044921875, + "loss": 4.0116, + "objective": 4.03180456161499, + "ranking_idealized": 0.5208333134651184, + "ranking_idealized_expo": 0.5166666507720947, + "ranking_simple": 0.5249999761581421, + "regularize": 4.03180456161499, + "step": 395 + }, + { + "dpo_loss": 1.8622020483016968, + "epoch": 2.2673594709494567, + "grad_norm": 950.7422986439022, + "learning_rate": 6.635339816587108e-07, + "logits": -1.242794394493103, + "logps": -83.7727279663086, + "loss": 3.8148, + "objective": 3.835386037826538, + "ranking_idealized": 0.574999988079071, + "ranking_idealized_expo": 0.574999988079071, + "ranking_simple": 0.574999988079071, + "regularize": 3.835386037826538, + "step": 400 + }, + { + "epoch": 2.2673594709494567, + "eval_dpo_loss": 2.9398233890533447, + "eval_logits": -1.2679872512817383, + "eval_logps": -88.25416564941406, + "eval_loss": 5.762566089630127, + "eval_objective": 5.820021152496338, + "eval_ranking_idealized": 0.5092975497245789, + "eval_ranking_idealized_expo": 0.5092975497245789, + "eval_ranking_simple": 0.5092975497245789, + "eval_regularize": 5.820021152496338, + "eval_runtime": 259.3166, + "eval_samples_per_second": 22.328, + "eval_steps_per_second": 0.933, + "step": 400 + }, + { + "dpo_loss": 1.8469058275222778, + "epoch": 2.295701464336325, + "grad_norm": 963.0280264659817, + "learning_rate": 6.541311970692162e-07, + "logits": -1.301633358001709, + "logps": -81.62474060058594, + "loss": 3.8783, + "objective": 3.7539381980895996, + "ranking_idealized": 0.5583333373069763, + "ranking_idealized_expo": 0.5583333373069763, + "ranking_simple": 0.5708333253860474, + "regularize": 3.7539381980895996, + "step": 405 + }, + { + "dpo_loss": 1.6361950635910034, + "epoch": 2.324043457723193, + "grad_norm": 1074.5619431455382, + "learning_rate": 6.446677855317264e-07, + "logits": -1.2071573734283447, + "logps": -80.61900329589844, + "loss": 3.9829, + "objective": 3.7228012084960938, + "ranking_idealized": 0.4833333194255829, + "ranking_idealized_expo": 0.4749999940395355, + "ranking_simple": 0.47083333134651184, + "regularize": 3.7228012084960938, + "step": 410 + }, + { + "dpo_loss": 1.8135488033294678, + "epoch": 2.3523854511100613, + "grad_norm": 1058.2549464280587, + "learning_rate": 6.351474694448864e-07, + "logits": -1.2399203777313232, + "logps": -81.5149917602539, + "loss": 3.748, + "objective": 3.9710586071014404, + "ranking_idealized": 0.5333333611488342, + "ranking_idealized_expo": 0.5333333611488342, + "ranking_simple": 0.5333333611488342, + "regularize": 3.9710586071014404, + "step": 415 + }, + { + "dpo_loss": 2.120755434036255, + "epoch": 2.3807274444969297, + "grad_norm": 1114.426082281688, + "learning_rate": 6.255739935905395e-07, + "logits": -1.1594359874725342, + "logps": -83.35548400878906, + "loss": 3.7118, + "objective": 3.681281089782715, + "ranking_idealized": 0.4749999940395355, + "ranking_idealized_expo": 0.47083333134651184, + "ranking_simple": 0.4583333432674408, + "regularize": 3.681281089782715, + "step": 420 + }, + { + "dpo_loss": 1.7397000789642334, + "epoch": 2.409069437883798, + "grad_norm": 1024.8329703830655, + "learning_rate": 6.159511236607315e-07, + "logits": -1.2232387065887451, + "logps": -81.95706939697266, + "loss": 3.5923, + "objective": 3.648602247238159, + "ranking_idealized": 0.5, + "ranking_idealized_expo": 0.4958333373069763, + "ranking_simple": 0.5166666507720947, + "regularize": 3.648602247238159, + "step": 425 + }, + { + "dpo_loss": 1.804638385772705, + "epoch": 2.4374114312706663, + "grad_norm": 979.544871113056, + "learning_rate": 6.062826447764883e-07, + "logits": -1.2574443817138672, + "logps": -82.60694122314453, + "loss": 3.4778, + "objective": 3.4914093017578125, + "ranking_idealized": 0.5333333611488342, + "ranking_idealized_expo": 0.5291666388511658, + "ranking_simple": 0.5249999761581421, + "regularize": 3.4914093017578125, + "step": 430 + }, + { + "dpo_loss": 1.6648954153060913, + "epoch": 2.4657534246575343, + "grad_norm": 1000.8401446346558, + "learning_rate": 5.965723599989528e-07, + "logits": -1.300902009010315, + "logps": -82.3549575805664, + "loss": 3.5217, + "objective": 3.345120668411255, + "ranking_idealized": 0.5083333253860474, + "ranking_idealized_expo": 0.5083333253860474, + "ranking_simple": 0.5, + "regularize": 3.345120668411255, + "step": 435 + }, + { + "dpo_loss": 2.0332047939300537, + "epoch": 2.4940954180444024, + "grad_norm": 1031.1441408735554, + "learning_rate": 5.868240888334652e-07, + "logits": -1.2219815254211426, + "logps": -82.15070343017578, + "loss": 3.7915, + "objective": 3.9336984157562256, + "ranking_idealized": 0.5375000238418579, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.5333333611488342, + "regularize": 3.9336984157562256, + "step": 440 + }, + { + "dpo_loss": 2.0011653900146484, + "epoch": 2.5224374114312704, + "grad_norm": 970.4125487560452, + "learning_rate": 5.770416657271728e-07, + "logits": -1.2273285388946533, + "logps": -80.1659164428711, + "loss": 3.4124, + "objective": 3.168755292892456, + "ranking_idealized": 0.5166666507720947, + "ranking_idealized_expo": 0.5166666507720947, + "ranking_simple": 0.5208333134651184, + "regularize": 3.168755292892456, + "step": 445 + }, + { + "dpo_loss": 1.613458275794983, + "epoch": 2.550779404818139, + "grad_norm": 1061.4659212212357, + "learning_rate": 5.67228938560766e-07, + "logits": -1.2801109552383423, + "logps": -81.43175506591797, + "loss": 3.4169, + "objective": 3.3297555446624756, + "ranking_idealized": 0.5, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.49166667461395264, + "regularize": 3.3297555446624756, + "step": 450 + }, + { + "epoch": 2.550779404818139, + "eval_dpo_loss": 3.138432502746582, + "eval_logits": -1.2897279262542725, + "eval_logps": -88.01155853271484, + "eval_loss": 5.953909397125244, + "eval_objective": 6.106462478637695, + "eval_ranking_idealized": 0.5092975497245789, + "eval_ranking_idealized_expo": 0.5092975497245789, + "eval_ranking_simple": 0.5144628286361694, + "eval_regularize": 6.106462478637695, + "eval_runtime": 259.8077, + "eval_samples_per_second": 22.286, + "eval_steps_per_second": 0.931, + "step": 450 + }, + { + "dpo_loss": 1.8731194734573364, + "epoch": 2.579121398205007, + "grad_norm": 1156.0005265988177, + "learning_rate": 5.573897671349268e-07, + "logits": -1.300366759300232, + "logps": -82.42855834960938, + "loss": 3.5224, + "objective": 3.593839168548584, + "ranking_idealized": 0.5041666626930237, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.5041666626930237, + "regularize": 3.593839168548584, + "step": 455 + }, + { + "dpo_loss": 1.952242136001587, + "epoch": 2.6074633915918755, + "grad_norm": 1095.9802997680708, + "learning_rate": 5.475280216520912e-07, + "logits": -1.3715617656707764, + "logps": -82.51021575927734, + "loss": 3.3771, + "objective": 3.524890422821045, + "ranking_idealized": 0.512499988079071, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.5208333134651184, + "regularize": 3.524890422821045, + "step": 460 + }, + { + "dpo_loss": 1.5585062503814697, + "epoch": 2.6358053849787435, + "grad_norm": 935.1251434271745, + "learning_rate": 5.376475811941191e-07, + "logits": -1.2529839277267456, + "logps": -82.1951904296875, + "loss": 3.4445, + "objective": 3.3441879749298096, + "ranking_idealized": 0.4958333373069763, + "ranking_idealized_expo": 0.4958333373069763, + "ranking_simple": 0.5, + "regularize": 3.3441879749298096, + "step": 465 + }, + { + "dpo_loss": 1.6525613069534302, + "epoch": 2.6641473783656116, + "grad_norm": 977.6740437303708, + "learning_rate": 5.277523321964701e-07, + "logits": -1.2638176679611206, + "logps": -81.61119079589844, + "loss": 3.3653, + "objective": 3.1554312705993652, + "ranking_idealized": 0.5541666746139526, + "ranking_idealized_expo": 0.550000011920929, + "ranking_simple": 0.5625, + "regularize": 3.1554312705993652, + "step": 470 + }, + { + "dpo_loss": 1.4508802890777588, + "epoch": 2.69248937175248, + "grad_norm": 993.1353802099517, + "learning_rate": 5.178461669194903e-07, + "logits": -1.228849172592163, + "logps": -81.23489379882812, + "loss": 2.9912, + "objective": 2.843146562576294, + "ranking_idealized": 0.4958333373069763, + "ranking_idealized_expo": 0.4958333373069763, + "ranking_simple": 0.5, + "regularize": 2.843146562576294, + "step": 475 + }, + { + "dpo_loss": 1.4309269189834595, + "epoch": 2.720831365139348, + "grad_norm": 1013.9109275016526, + "learning_rate": 5.07932981917404e-07, + "logits": -1.3067494630813599, + "logps": -80.33541107177734, + "loss": 3.1599, + "objective": 3.0493505001068115, + "ranking_idealized": 0.5, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.512499988079071, + "regularize": 3.0493505001068115, + "step": 480 + }, + { + "dpo_loss": 1.6128212213516235, + "epoch": 2.7491733585262166, + "grad_norm": 935.3777113198429, + "learning_rate": 4.980166765056193e-07, + "logits": -1.2488080263137817, + "logps": -82.94316101074219, + "loss": 3.2342, + "objective": 3.1210060119628906, + "ranking_idealized": 0.5249999761581421, + "ranking_idealized_expo": 0.5208333134651184, + "ranking_simple": 0.5166666507720947, + "regularize": 3.1210060119628906, + "step": 485 + }, + { + "dpo_loss": 1.2609761953353882, + "epoch": 2.7775153519130846, + "grad_norm": 1021.9995000614038, + "learning_rate": 4.881011512269463e-07, + "logits": -1.224744439125061, + "logps": -80.88065338134766, + "loss": 3.0434, + "objective": 2.964614152908325, + "ranking_idealized": 0.4749999940395355, + "ranking_idealized_expo": 0.47083333134651184, + "ranking_simple": 0.46666666865348816, + "regularize": 2.964614152908325, + "step": 490 + }, + { + "dpo_loss": 1.3648375272750854, + "epoch": 2.8058573452999527, + "grad_norm": 957.4547050967395, + "learning_rate": 4.78190306317332e-07, + "logits": -1.2405409812927246, + "logps": -79.93798065185547, + "loss": 2.9176, + "objective": 2.8268253803253174, + "ranking_idealized": 0.4791666567325592, + "ranking_idealized_expo": 0.4791666567325592, + "ranking_simple": 0.47083333134651184, + "regularize": 2.8268253803253174, + "step": 495 + }, + { + "dpo_loss": 1.4600600004196167, + "epoch": 2.8341993386868207, + "grad_norm": 956.91251387349, + "learning_rate": 4.682880401717177e-07, + "logits": -1.2869919538497925, + "logps": -79.13894653320312, + "loss": 2.988, + "objective": 2.750535488128662, + "ranking_idealized": 0.5249999761581421, + "ranking_idealized_expo": 0.5208333134651184, + "ranking_simple": 0.5208333134651184, + "regularize": 2.750535488128662, + "step": 500 + }, + { + "epoch": 2.8341993386868207, + "eval_dpo_loss": 3.1318445205688477, + "eval_logits": -1.2855963706970215, + "eval_logps": -87.95059967041016, + "eval_loss": 5.985367298126221, + "eval_objective": 6.018334865570068, + "eval_ranking_idealized": 0.5092975497245789, + "eval_ranking_idealized_expo": 0.5092975497245789, + "eval_ranking_simple": 0.5092975497245789, + "eval_regularize": 6.018334865570068, + "eval_runtime": 259.5234, + "eval_samples_per_second": 22.31, + "eval_steps_per_second": 0.932, + "step": 500 + }, + { + "dpo_loss": 1.2522823810577393, + "epoch": 2.862541332073689, + "grad_norm": 990.3820386306984, + "learning_rate": 4.5839824781061886e-07, + "logits": -1.238278865814209, + "logps": -80.7296371459961, + "loss": 2.9937, + "objective": 2.6271092891693115, + "ranking_idealized": 0.5208333134651184, + "ranking_idealized_expo": 0.5083333253860474, + "ranking_simple": 0.512499988079071, + "regularize": 2.6271092891693115, + "step": 505 + }, + { + "dpo_loss": 1.5270129442214966, + "epoch": 2.8908833254605573, + "grad_norm": 943.7310548857196, + "learning_rate": 4.4852481934803277e-07, + "logits": -1.1962122917175293, + "logps": -81.28018951416016, + "loss": 2.8508, + "objective": 3.192195177078247, + "ranking_idealized": 0.5291666388511658, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.5416666865348816, + "regularize": 3.192195177078247, + "step": 510 + }, + { + "dpo_loss": 1.3985689878463745, + "epoch": 2.9192253188474258, + "grad_norm": 1066.3526544144515, + "learning_rate": 4.3867163846127674e-07, + "logits": -1.3405725955963135, + "logps": -81.18116760253906, + "loss": 2.8001, + "objective": 2.7043962478637695, + "ranking_idealized": 0.4791666567325592, + "ranking_idealized_expo": 0.4791666567325592, + "ranking_simple": 0.4791666567325592, + "regularize": 2.7043962478637695, + "step": 515 + }, + { + "dpo_loss": 1.549082636833191, + "epoch": 2.947567312234294, + "grad_norm": 976.1195004276424, + "learning_rate": 4.2884258086335745e-07, + "logits": -1.2520852088928223, + "logps": -83.24747467041016, + "loss": 2.7691, + "objective": 2.9670374393463135, + "ranking_idealized": 0.512499988079071, + "ranking_idealized_expo": 0.5083333253860474, + "ranking_simple": 0.512499988079071, + "regularize": 2.9670374393463135, + "step": 520 + }, + { + "dpo_loss": 1.1514889001846313, + "epoch": 2.975909305621162, + "grad_norm": 993.1767181389736, + "learning_rate": 4.1904151277847305e-07, + "logits": -1.2369369268417358, + "logps": -82.1258316040039, + "loss": 2.7535, + "objective": 2.7600042819976807, + "ranking_idealized": 0.5666666626930237, + "ranking_idealized_expo": 0.5666666626930237, + "ranking_simple": 0.574999988079071, + "regularize": 2.7600042819976807, + "step": 525 + }, + { + "dpo_loss": 1.234760046005249, + "epoch": 3.0042512990080303, + "grad_norm": 954.3168391295485, + "learning_rate": 4.092722894212487e-07, + "logits": -1.2384470701217651, + "logps": -81.53229522705078, + "loss": 2.772, + "objective": 2.536973237991333, + "ranking_idealized": 0.512499988079071, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.5166666507720947, + "regularize": 2.536973237991333, + "step": 530 + }, + { + "dpo_loss": 1.2055299282073975, + "epoch": 3.0325932923948984, + "grad_norm": 1192.8110305376617, + "learning_rate": 3.995387534803005e-07, + "logits": -1.2691408395767212, + "logps": -82.96357727050781, + "loss": 2.6555, + "objective": 2.663506031036377, + "ranking_idealized": 0.5458333492279053, + "ranking_idealized_expo": 0.5416666865348816, + "ranking_simple": 0.550000011920929, + "regularize": 2.663506031036377, + "step": 535 + }, + { + "dpo_loss": 1.3903727531433105, + "epoch": 3.0609352857817664, + "grad_norm": 949.2725322799425, + "learning_rate": 3.8984473360672967e-07, + "logits": -1.3405746221542358, + "logps": -81.4103012084961, + "loss": 2.5599, + "objective": 2.611825704574585, + "ranking_idealized": 0.5375000238418579, + "ranking_idealized_expo": 0.5333333611488342, + "ranking_simple": 0.5291666388511658, + "regularize": 2.611825704574585, + "step": 540 + }, + { + "dpo_loss": 1.3084332942962646, + "epoch": 3.089277279168635, + "grad_norm": 999.2271817343484, + "learning_rate": 3.801940429081345e-07, + "logits": -1.2964000701904297, + "logps": -82.18136596679688, + "loss": 2.504, + "objective": 2.4276323318481445, + "ranking_idealized": 0.49166667461395264, + "ranking_idealized_expo": 0.49166667461395264, + "ranking_simple": 0.4833333194255829, + "regularize": 2.4276323318481445, + "step": 545 + }, + { + "dpo_loss": 1.0889129638671875, + "epoch": 3.117619272555503, + "grad_norm": 934.3758046690474, + "learning_rate": 3.7059047744873955e-07, + "logits": -1.1509153842926025, + "logps": -82.78107452392578, + "loss": 2.4859, + "objective": 2.5204343795776367, + "ranking_idealized": 0.5458333492279053, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.5458333492279053, + "regularize": 2.5204343795776367, + "step": 550 + }, + { + "epoch": 3.117619272555503, + "eval_dpo_loss": 3.178955316543579, + "eval_logits": -1.2804911136627197, + "eval_logps": -88.50298309326172, + "eval_loss": 6.194624423980713, + "eval_objective": 6.202889919281006, + "eval_ranking_idealized": 0.5092975497245789, + "eval_ranking_idealized_expo": 0.5092975497245789, + "eval_ranking_simple": 0.5103305578231812, + "eval_regularize": 6.202889919281006, + "eval_runtime": 260.2352, + "eval_samples_per_second": 22.249, + "eval_steps_per_second": 0.93, + "step": 550 + }, + { + "dpo_loss": 1.148443579673767, + "epoch": 3.1459612659423715, + "grad_norm": 977.893659928589, + "learning_rate": 3.6103781475622786e-07, + "logits": -1.2158228158950806, + "logps": -83.3880844116211, + "loss": 2.4258, + "objective": 2.2721104621887207, + "ranking_idealized": 0.550000011920929, + "ranking_idealized_expo": 0.550000011920929, + "ranking_simple": 0.5458333492279053, + "regularize": 2.2721104621887207, + "step": 555 + }, + { + "dpo_loss": 1.2861641645431519, + "epoch": 3.1743032593292395, + "grad_norm": 912.4395782657139, + "learning_rate": 3.5153981233586274e-07, + "logits": -1.2103866338729858, + "logps": -80.51148223876953, + "loss": 2.4037, + "objective": 2.3816921710968018, + "ranking_idealized": 0.5041666626930237, + "ranking_idealized_expo": 0.5041666626930237, + "ranking_simple": 0.5041666626930237, + "regularize": 2.3816921710968018, + "step": 560 + }, + { + "dpo_loss": 1.0393404960632324, + "epoch": 3.2026452527161076, + "grad_norm": 959.0200932296882, + "learning_rate": 3.421002061924876e-07, + "logits": -1.262898325920105, + "logps": -82.25037384033203, + "loss": 2.3058, + "objective": 2.267256259918213, + "ranking_idealized": 0.5458333492279053, + "ranking_idealized_expo": 0.5416666865348816, + "ranking_simple": 0.5583333373069763, + "regularize": 2.267256259918213, + "step": 565 + }, + { + "dpo_loss": 1.2455191612243652, + "epoch": 3.230987246102976, + "grad_norm": 937.4751934096442, + "learning_rate": 3.327227093609824e-07, + "logits": -1.1880606412887573, + "logps": -81.5679702758789, + "loss": 2.2052, + "objective": 2.3692660331726074, + "ranking_idealized": 0.4791666567325592, + "ranking_idealized_expo": 0.4791666567325592, + "ranking_simple": 0.4833333194255829, + "regularize": 2.3692660331726074, + "step": 570 + }, + { + "dpo_loss": 1.228649616241455, + "epoch": 3.259329239489844, + "grad_norm": 996.5558020139559, + "learning_rate": 3.234110104457536e-07, + "logits": -1.2556049823760986, + "logps": -81.8873519897461, + "loss": 2.2515, + "objective": 2.29750657081604, + "ranking_idealized": 0.5, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.49166667461395264, + "regularize": 2.29750657081604, + "step": 575 + }, + { + "dpo_loss": 1.3365857601165771, + "epoch": 3.287671232876712, + "grad_norm": 996.2512645672206, + "learning_rate": 3.141687721698363e-07, + "logits": -1.2736408710479736, + "logps": -82.88424682617188, + "loss": 2.3155, + "objective": 2.366077423095703, + "ranking_idealized": 0.5541666746139526, + "ranking_idealized_expo": 0.5541666746139526, + "ranking_simple": 0.5458333492279053, + "regularize": 2.366077423095703, + "step": 580 + }, + { + "dpo_loss": 1.1693744659423828, + "epoch": 3.3160132262635806, + "grad_norm": 990.5330100186713, + "learning_rate": 3.049996299341742e-07, + "logits": -1.289427638053894, + "logps": -83.39188385009766, + "loss": 2.1009, + "objective": 2.1158106327056885, + "ranking_idealized": 0.5333333611488342, + "ranking_idealized_expo": 0.5291666388511658, + "ranking_simple": 0.5249999761581421, + "regularize": 2.1158106327056885, + "step": 585 + }, + { + "dpo_loss": 1.1631190776824951, + "epoch": 3.3443552196504487, + "grad_norm": 986.1485755213372, + "learning_rate": 2.959071903876486e-07, + "logits": -1.3035333156585693, + "logps": -83.53241729736328, + "loss": 2.1276, + "objective": 2.4290575981140137, + "ranking_idealized": 0.49166667461395264, + "ranking_idealized_expo": 0.48750001192092896, + "ranking_simple": 0.5083333253860474, + "regularize": 2.4290575981140137, + "step": 590 + }, + { + "dpo_loss": 1.3233096599578857, + "epoch": 3.372697213037317, + "grad_norm": 930.1393629687922, + "learning_rate": 2.86895030008416e-07, + "logits": -1.2197285890579224, + "logps": -82.90579223632812, + "loss": 2.1207, + "objective": 2.1894338130950928, + "ranking_idealized": 0.5249999761581421, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.5291666388511658, + "regularize": 2.1894338130950928, + "step": 595 + }, + { + "dpo_loss": 1.007699728012085, + "epoch": 3.4010392064241852, + "grad_norm": 935.3828921290786, + "learning_rate": 2.779666936971129e-07, + "logits": -1.230286717414856, + "logps": -81.8963851928711, + "loss": 2.0539, + "objective": 2.0761570930480957, + "ranking_idealized": 0.5041666626930237, + "ranking_idealized_expo": 0.5041666626930237, + "ranking_simple": 0.4958333373069763, + "regularize": 2.0761570930480957, + "step": 600 + }, + { + "epoch": 3.4010392064241852, + "eval_dpo_loss": 3.1110734939575195, + "eval_logits": -1.2650625705718994, + "eval_logps": -88.16163635253906, + "eval_loss": 5.933178901672363, + "eval_objective": 6.031818866729736, + "eval_ranking_idealized": 0.5092975497245789, + "eval_ranking_idealized_expo": 0.5092975497245789, + "eval_ranking_simple": 0.5113636255264282, + "eval_regularize": 6.031818866729736, + "eval_runtime": 258.4717, + "eval_samples_per_second": 22.401, + "eval_steps_per_second": 0.936, + "step": 600 + }, + { + "dpo_loss": 1.11422860622406, + "epoch": 3.4293811998110533, + "grad_norm": 962.8376859443554, + "learning_rate": 2.6912569338248315e-07, + "logits": -1.2667381763458252, + "logps": -83.77916717529297, + "loss": 2.0057, + "objective": 1.9590004682540894, + "ranking_idealized": 0.5333333611488342, + "ranking_idealized_expo": 0.5333333611488342, + "ranking_simple": 0.5333333611488342, + "regularize": 1.9590004682540894, + "step": 605 + }, + { + "dpo_loss": 1.1715530157089233, + "epoch": 3.4577231931979218, + "grad_norm": 934.6534827687339, + "learning_rate": 2.603755066399718e-07, + "logits": -1.2390044927597046, + "logps": -82.3260269165039, + "loss": 2.1286, + "objective": 2.050309658050537, + "ranking_idealized": 0.5208333134651184, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.512499988079071, + "regularize": 2.050309658050537, + "step": 610 + }, + { + "dpo_loss": 1.1228054761886597, + "epoch": 3.48606518658479, + "grad_norm": 924.4634252845681, + "learning_rate": 2.517195753238345e-07, + "logits": -1.2509876489639282, + "logps": -82.427001953125, + "loss": 1.9336, + "objective": 1.9248236417770386, + "ranking_idealized": 0.47083333134651184, + "ranking_idealized_expo": 0.46666666865348816, + "ranking_simple": 0.4625000059604645, + "regularize": 1.9248236417770386, + "step": 615 + }, + { + "dpo_loss": 1.0055824518203735, + "epoch": 3.514407179971658, + "grad_norm": 996.235630609532, + "learning_rate": 2.4316130421329696e-07, + "logits": -1.1457880735397339, + "logps": -81.78346252441406, + "loss": 1.8569, + "objective": 1.8841525316238403, + "ranking_idealized": 0.4958333373069763, + "ranking_idealized_expo": 0.4958333373069763, + "ranking_simple": 0.49166667461395264, + "regularize": 1.8841525316238403, + "step": 620 + }, + { + "dpo_loss": 1.0400264263153076, + "epoch": 3.5427491733585263, + "grad_norm": 988.2054187208093, + "learning_rate": 2.3470405967329604e-07, + "logits": -1.202520489692688, + "logps": -81.60346984863281, + "loss": 1.8727, + "objective": 2.022357702255249, + "ranking_idealized": 0.5249999761581421, + "ranking_idealized_expo": 0.5208333134651184, + "ranking_simple": 0.5166666507720947, + "regularize": 2.022357702255249, + "step": 625 + }, + { + "dpo_loss": 0.9165257215499878, + "epoch": 3.5710911667453944, + "grad_norm": 970.3302350643374, + "learning_rate": 2.2635116833033392e-07, + "logits": -1.2847248315811157, + "logps": -81.50933074951172, + "loss": 1.8387, + "objective": 1.6823768615722656, + "ranking_idealized": 0.512499988079071, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.5291666388511658, + "regularize": 1.6823768615722656, + "step": 630 + }, + { + "dpo_loss": 0.8153350949287415, + "epoch": 3.5994331601322624, + "grad_norm": 964.0631369889124, + "learning_rate": 2.181059157639598e-07, + "logits": -1.2249428033828735, + "logps": -82.01227569580078, + "loss": 1.7136, + "objective": 1.8080626726150513, + "ranking_idealized": 0.48750001192092896, + "ranking_idealized_expo": 0.48750001192092896, + "ranking_simple": 0.48750001192092896, + "regularize": 1.8080624341964722, + "step": 635 + }, + { + "dpo_loss": 0.9864614605903625, + "epoch": 3.627775153519131, + "grad_norm": 1000.0610851173434, + "learning_rate": 2.0997154521440097e-07, + "logits": -1.1798667907714844, + "logps": -82.78071594238281, + "loss": 1.7312, + "objective": 1.868208646774292, + "ranking_idealized": 0.4625000059604645, + "ranking_idealized_expo": 0.4625000059604645, + "ranking_simple": 0.46666666865348816, + "regularize": 1.868208646774292, + "step": 640 + }, + { + "dpo_loss": 1.0488131046295166, + "epoch": 3.656117146905999, + "grad_norm": 1049.7832510532623, + "learning_rate": 2.0195125630684428e-07, + "logits": -1.2025647163391113, + "logps": -81.37299346923828, + "loss": 1.7885, + "objective": 1.6037089824676514, + "ranking_idealized": 0.5249999761581421, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.5249999761581421, + "regularize": 1.6037089824676514, + "step": 645 + }, + { + "dpo_loss": 1.022878646850586, + "epoch": 3.6844591402928675, + "grad_norm": 919.1271980482485, + "learning_rate": 1.9404820379287672e-07, + "logits": -1.1927168369293213, + "logps": -82.80690002441406, + "loss": 1.664, + "objective": 1.7033976316452026, + "ranking_idealized": 0.5041666626930237, + "ranking_idealized_expo": 0.5041666626930237, + "ranking_simple": 0.5041666626930237, + "regularize": 1.7033976316452026, + "step": 650 + }, + { + "epoch": 3.6844591402928675, + "eval_dpo_loss": 3.096806764602661, + "eval_logits": -1.2607561349868774, + "eval_logps": -88.69917297363281, + "eval_loss": 5.923920631408691, + "eval_objective": 5.985104084014893, + "eval_ranking_idealized": 0.5092975497245789, + "eval_ranking_idealized_expo": 0.5092975497245789, + "eval_ranking_simple": 0.5113636255264282, + "eval_regularize": 5.985104084014893, + "eval_runtime": 258.729, + "eval_samples_per_second": 22.379, + "eval_steps_per_second": 0.935, + "step": 650 + }, + { + "dpo_loss": 0.9369535446166992, + "epoch": 3.7128011336797355, + "grad_norm": 961.5116821455488, + "learning_rate": 1.8626549630957395e-07, + "logits": -1.2329158782958984, + "logps": -81.57703399658203, + "loss": 1.6411, + "objective": 1.523728847503662, + "ranking_idealized": 0.5208333134651184, + "ranking_idealized_expo": 0.5208333134651184, + "ranking_simple": 0.5208333134651184, + "regularize": 1.523728847503662, + "step": 655 + }, + { + "dpo_loss": 0.8511099219322205, + "epoch": 3.7411431270666036, + "grad_norm": 1089.6596694384646, + "learning_rate": 1.7860619515673032e-07, + "logits": -1.3094429969787598, + "logps": -82.70073699951172, + "loss": 1.6123, + "objective": 1.475422739982605, + "ranking_idealized": 0.5541666746139526, + "ranking_idealized_expo": 0.5541666746139526, + "ranking_simple": 0.5541666746139526, + "regularize": 1.475422739982605, + "step": 660 + }, + { + "dpo_loss": 0.9054993987083435, + "epoch": 3.769485120453472, + "grad_norm": 1093.6896679613712, + "learning_rate": 1.7107331309270684e-07, + "logits": -1.2157032489776611, + "logps": -82.55271911621094, + "loss": 1.561, + "objective": 1.6129040718078613, + "ranking_idealized": 0.5166666507720947, + "ranking_idealized_expo": 0.5166666507720947, + "ranking_simple": 0.512499988079071, + "regularize": 1.6129040718078613, + "step": 665 + }, + { + "dpo_loss": 0.8416043519973755, + "epoch": 3.79782711384034, + "grad_norm": 1024.9155399545134, + "learning_rate": 1.6366981314937372e-07, + "logits": -1.3233702182769775, + "logps": -82.39900970458984, + "loss": 1.5252, + "objective": 1.4049646854400635, + "ranking_idealized": 0.5916666388511658, + "ranking_idealized_expo": 0.5916666388511658, + "ranking_simple": 0.6000000238418579, + "regularize": 1.4049646854400635, + "step": 670 + }, + { + "dpo_loss": 0.8841171264648438, + "epoch": 3.826169107227208, + "grad_norm": 971.974542125377, + "learning_rate": 1.5639860746661338e-07, + "logits": -1.288584589958191, + "logps": -81.51100158691406, + "loss": 1.5256, + "objective": 1.549899935722351, + "ranking_idealized": 0.5458333492279053, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.5458333492279053, + "regularize": 1.549899935722351, + "step": 675 + }, + { + "dpo_loss": 0.9599818587303162, + "epoch": 3.8545111006140766, + "grad_norm": 930.812509420293, + "learning_rate": 1.492625561468393e-07, + "logits": -1.2116204500198364, + "logps": -81.91743469238281, + "loss": 1.4834, + "objective": 1.6812348365783691, + "ranking_idealized": 0.5083333253860474, + "ranking_idealized_expo": 0.5083333253860474, + "ranking_simple": 0.5083333253860474, + "regularize": 1.6812348365783691, + "step": 680 + }, + { + "dpo_loss": 0.9112765192985535, + "epoch": 3.8828530940009447, + "grad_norm": 1163.329406971879, + "learning_rate": 1.4226446612998671e-07, + "logits": -1.273834228515625, + "logps": -81.50404357910156, + "loss": 1.4897, + "objective": 1.4795509576797485, + "ranking_idealized": 0.5166666507720947, + "ranking_idealized_expo": 0.5166666507720947, + "ranking_simple": 0.512499988079071, + "regularize": 1.4795509576797485, + "step": 685 + }, + { + "dpo_loss": 0.8565966486930847, + "epoch": 3.9111950873878127, + "grad_norm": 938.1098675569013, + "learning_rate": 1.3540709008941147e-07, + "logits": -1.2776970863342285, + "logps": -81.18965911865234, + "loss": 1.4652, + "objective": 1.3701138496398926, + "ranking_idealized": 0.5458333492279053, + "ranking_idealized_expo": 0.5458333492279053, + "ranking_simple": 0.550000011920929, + "regularize": 1.3701138496398926, + "step": 690 + }, + { + "dpo_loss": 1.0229411125183105, + "epoch": 3.9395370807746812, + "grad_norm": 1013.5958054606544, + "learning_rate": 1.2869312534913685e-07, + "logits": -1.191641092300415, + "logps": -82.60445404052734, + "loss": 1.3376, + "objective": 1.4829381704330444, + "ranking_idealized": 0.46666666865348816, + "ranking_idealized_expo": 0.46666666865348816, + "ranking_simple": 0.4625000059604645, + "regularize": 1.4829381704330444, + "step": 695 + }, + { + "dpo_loss": 0.8406078815460205, + "epoch": 3.9678790741615493, + "grad_norm": 1063.5432850972618, + "learning_rate": 1.2212521282287093e-07, + "logits": -1.2275745868682861, + "logps": -82.30086517333984, + "loss": 1.3502, + "objective": 1.3136423826217651, + "ranking_idealized": 0.48750001192092896, + "ranking_idealized_expo": 0.4833333194255829, + "ranking_simple": 0.48750001192092896, + "regularize": 1.3136423826217651, + "step": 700 + }, + { + "epoch": 3.9678790741615493, + "eval_dpo_loss": 3.089524269104004, + "eval_logits": -1.2646851539611816, + "eval_logps": -88.52361297607422, + "eval_loss": 5.917611122131348, + "eval_objective": 5.957097053527832, + "eval_ranking_idealized": 0.5092975497245789, + "eval_ranking_idealized_expo": 0.5092975497245789, + "eval_ranking_simple": 0.5134297609329224, + "eval_regularize": 5.957097053527832, + "eval_runtime": 258.9235, + "eval_samples_per_second": 22.362, + "eval_steps_per_second": 0.935, + "step": 700 + }, + { + "dpo_loss": 0.8173587918281555, + "epoch": 3.9962210675484178, + "grad_norm": 985.2767504669318, + "learning_rate": 1.15705935975212e-07, + "logits": -1.1528183221817017, + "logps": -80.9789810180664, + "loss": 1.3032, + "objective": 1.2147972583770752, + "ranking_idealized": 0.5291666388511658, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.5291666388511658, + "regularize": 1.2147972583770752, + "step": 705 + }, + { + "dpo_loss": 0.7131792902946472, + "epoch": 4.024563060935286, + "grad_norm": 1011.8698807645903, + "learning_rate": 1.094378198054533e-07, + "logits": -1.2421104907989502, + "logps": -82.51912689208984, + "loss": 1.1035, + "objective": 1.1794158220291138, + "ranking_idealized": 0.4791666567325592, + "ranking_idealized_expo": 0.4791666567325592, + "ranking_simple": 0.4833333194255829, + "regularize": 1.1794158220291138, + "step": 710 + }, + { + "dpo_loss": 0.7108194231987, + "epoch": 4.052905054322154, + "grad_norm": 940.6537803599663, + "learning_rate": 1.0332332985438247e-07, + "logits": -1.1998772621154785, + "logps": -81.95320892333984, + "loss": 1.0461, + "objective": 0.9881690740585327, + "ranking_idealized": 0.5375000238418579, + "ranking_idealized_expo": 0.5333333611488342, + "ranking_simple": 0.5333333611488342, + "regularize": 0.9881690740585327, + "step": 715 + }, + { + "dpo_loss": 0.6712031960487366, + "epoch": 4.081247047709022, + "grad_norm": 960.2681465991928, + "learning_rate": 9.736487123447068e-08, + "logits": -1.15168035030365, + "logps": -83.81200408935547, + "loss": 1.0468, + "objective": 1.1416888236999512, + "ranking_idealized": 0.5208333134651184, + "ranking_idealized_expo": 0.5208333134651184, + "ranking_simple": 0.5249999761581421, + "regularize": 1.1416888236999512, + "step": 720 + }, + { + "dpo_loss": 0.6989570260047913, + "epoch": 4.109589041095891, + "grad_norm": 935.0432434834537, + "learning_rate": 9.156478768383058e-08, + "logits": -1.2237892150878906, + "logps": -81.69219207763672, + "loss": 1.074, + "objective": 0.9603613615036011, + "ranking_idealized": 0.5291666388511658, + "ranking_idealized_expo": 0.5291666388511658, + "ranking_simple": 0.5291666388511658, + "regularize": 0.9603613615036011, + "step": 725 + }, + { + "dpo_loss": 0.7995573878288269, + "epoch": 4.137931034482759, + "grad_norm": 980.3733885345745, + "learning_rate": 8.592536064431466e-08, + "logits": -1.2934256792068481, + "logps": -81.94860076904297, + "loss": 1.0596, + "objective": 1.06425142288208, + "ranking_idealized": 0.5375000238418579, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.5375000238418579, + "regularize": 1.06425142288208, + "step": 730 + }, + { + "dpo_loss": 0.7126405835151672, + "epoch": 4.166273027869627, + "grad_norm": 1007.1386546772866, + "learning_rate": 8.044880836411888e-08, + "logits": -1.1827558279037476, + "logps": -82.89656066894531, + "loss": 1.0028, + "objective": 0.9806023240089417, + "ranking_idealized": 0.5249999761581421, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.5208333134651184, + "regularize": 0.9806023240089417, + "step": 735 + }, + { + "dpo_loss": 0.7121079564094543, + "epoch": 4.194615021256495, + "grad_norm": 940.1459069064864, + "learning_rate": 7.513728502524286e-08, + "logits": -1.2114641666412354, + "logps": -82.5162582397461, + "loss": 1.0102, + "objective": 0.9698477387428284, + "ranking_idealized": 0.5208333134651184, + "ranking_idealized_expo": 0.5208333134651184, + "ranking_simple": 0.5208333134651184, + "regularize": 0.9698477387428284, + "step": 740 + }, + { + "dpo_loss": 0.7223809957504272, + "epoch": 4.222957014643363, + "grad_norm": 956.701027117306, + "learning_rate": 6.999287989614971e-08, + "logits": -1.274079442024231, + "logps": -80.06112670898438, + "loss": 0.9886, + "objective": 1.010962724685669, + "ranking_idealized": 0.48750001192092896, + "ranking_idealized_expo": 0.48750001192092896, + "ranking_simple": 0.4958333373069763, + "regularize": 1.010962724685669, + "step": 745 + }, + { + "dpo_loss": 0.6834555268287659, + "epoch": 4.251299008030231, + "grad_norm": 994.5271297786792, + "learning_rate": 6.501761650996052e-08, + "logits": -1.3588440418243408, + "logps": -82.0845947265625, + "loss": 1.0052, + "objective": 1.0897630453109741, + "ranking_idealized": 0.5249999761581421, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.5291666388511658, + "regularize": 1.0897630453109741, + "step": 750 + }, + { + "epoch": 4.251299008030231, + "eval_dpo_loss": 3.103642702102661, + "eval_logits": -1.2629750967025757, + "eval_logps": -88.36180877685547, + "eval_loss": 5.9642486572265625, + "eval_objective": 6.00606632232666, + "eval_ranking_idealized": 0.5092975497245789, + "eval_ranking_idealized_expo": 0.5092975497245789, + "eval_ranking_simple": 0.5134297609329224, + "eval_regularize": 6.00606632232666, + "eval_runtime": 258.8045, + "eval_samples_per_second": 22.372, + "eval_steps_per_second": 0.935, + "step": 750 + }, + { + "dpo_loss": 0.6240705251693726, + "epoch": 4.2796410014171, + "grad_norm": 928.8030739675138, + "learning_rate": 6.021345186850418e-08, + "logits": -1.2852017879486084, + "logps": -82.10335540771484, + "loss": 0.972, + "objective": 1.01486337184906, + "ranking_idealized": 0.5333333611488342, + "ranking_idealized_expo": 0.5333333611488342, + "ranking_simple": 0.5333333611488342, + "regularize": 1.01486337184906, + "step": 755 + }, + { + "dpo_loss": 0.7065821886062622, + "epoch": 4.307982994803968, + "grad_norm": 929.3861266719559, + "learning_rate": 5.5582275672538316e-08, + "logits": -1.2621345520019531, + "logps": -82.05753326416016, + "loss": 0.8992, + "objective": 0.8614501953125, + "ranking_idealized": 0.5666666626930237, + "ranking_idealized_expo": 0.5625, + "ranking_simple": 0.5666666626930237, + "regularize": 0.8614501953125, + "step": 760 + }, + { + "dpo_loss": 0.6562435030937195, + "epoch": 4.336324988190836, + "grad_norm": 977.0057816667661, + "learning_rate": 5.112590957844232e-08, + "logits": -1.2853424549102783, + "logps": -82.94247436523438, + "loss": 0.9593, + "objective": 0.8854343295097351, + "ranking_idealized": 0.5291666388511658, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.5333333611488342, + "regularize": 0.8854343295097351, + "step": 765 + }, + { + "dpo_loss": 0.7083169221878052, + "epoch": 4.364666981577704, + "grad_norm": 932.8658870761088, + "learning_rate": 4.684610648167503e-08, + "logits": -1.1741254329681396, + "logps": -81.14856719970703, + "loss": 0.9364, + "objective": 0.9174091815948486, + "ranking_idealized": 0.5083333253860474, + "ranking_idealized_expo": 0.5041666626930237, + "ranking_simple": 0.5083333253860474, + "regularize": 0.9174091815948486, + "step": 770 + }, + { + "dpo_loss": 0.6173561811447144, + "epoch": 4.393008974964572, + "grad_norm": 973.1322872774431, + "learning_rate": 4.274454982728032e-08, + "logits": -1.2410908937454224, + "logps": -81.94947052001953, + "loss": 0.8846, + "objective": 0.7543167471885681, + "ranking_idealized": 0.5083333253860474, + "ranking_idealized_expo": 0.5083333253860474, + "ranking_simple": 0.5083333253860474, + "regularize": 0.7543167471885681, + "step": 775 + }, + { + "dpo_loss": 0.6880941390991211, + "epoch": 4.42135096835144, + "grad_norm": 934.0673376048558, + "learning_rate": 3.882285294770937e-08, + "logits": -1.2685768604278564, + "logps": -80.74879455566406, + "loss": 0.8891, + "objective": 0.9145664572715759, + "ranking_idealized": 0.5041666626930237, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.5041666626930237, + "regularize": 0.9145664572715759, + "step": 780 + }, + { + "dpo_loss": 0.6857554316520691, + "epoch": 4.449692961738309, + "grad_norm": 906.6964623786657, + "learning_rate": 3.508255842822255e-08, + "logits": -1.286802887916565, + "logps": -80.75093841552734, + "loss": 0.8708, + "objective": 0.865050196647644, + "ranking_idealized": 0.5291666388511658, + "ranking_idealized_expo": 0.5291666388511658, + "ranking_simple": 0.5249999761581421, + "regularize": 0.865050196647644, + "step": 785 + }, + { + "dpo_loss": 0.6727093458175659, + "epoch": 4.478034955125177, + "grad_norm": 954.6542045385403, + "learning_rate": 3.15251375001192e-08, + "logits": -1.248421549797058, + "logps": -83.45836639404297, + "loss": 0.8584, + "objective": 0.825614869594574, + "ranking_idealized": 0.5583333373069763, + "ranking_idealized_expo": 0.5541666746139526, + "ranking_simple": 0.5541666746139526, + "regularize": 0.825614869594574, + "step": 790 + }, + { + "dpo_loss": 0.7268415689468384, + "epoch": 4.506376948512045, + "grad_norm": 940.0246507195186, + "learning_rate": 2.8151989462033787e-08, + "logits": -1.1517890691757202, + "logps": -82.83182525634766, + "loss": 0.8256, + "objective": 0.8579443693161011, + "ranking_idealized": 0.550000011920929, + "ranking_idealized_expo": 0.550000011920929, + "ranking_simple": 0.550000011920929, + "regularize": 0.8579443693161011, + "step": 795 + }, + { + "dpo_loss": 0.6633224487304688, + "epoch": 4.534718941898913, + "grad_norm": 974.8383451116964, + "learning_rate": 2.4964441129527335e-08, + "logits": -1.148630976676941, + "logps": -82.155029296875, + "loss": 0.8548, + "objective": 0.8071673512458801, + "ranking_idealized": 0.512499988079071, + "ranking_idealized_expo": 0.5083333253860474, + "ranking_simple": 0.512499988079071, + "regularize": 0.8071673512458801, + "step": 800 + }, + { + "epoch": 4.534718941898913, + "eval_dpo_loss": 3.0852909088134766, + "eval_logits": -1.2662479877471924, + "eval_logps": -88.35336303710938, + "eval_loss": 5.92377233505249, + "eval_objective": 5.97105073928833, + "eval_ranking_idealized": 0.5092975497245789, + "eval_ranking_idealized_expo": 0.5092975497245789, + "eval_ranking_simple": 0.5134297609329224, + "eval_regularize": 5.97105073928833, + "eval_runtime": 258.7722, + "eval_samples_per_second": 22.375, + "eval_steps_per_second": 0.935, + "step": 800 + }, + { + "dpo_loss": 0.6257317662239075, + "epoch": 4.563060935285781, + "grad_norm": 904.8852856060951, + "learning_rate": 2.1963746313188757e-08, + "logits": -1.2208141088485718, + "logps": -81.56495666503906, + "loss": 0.8065, + "objective": 0.8274087905883789, + "ranking_idealized": 0.46666666865348816, + "ranking_idealized_expo": 0.4583333432674408, + "ranking_simple": 0.46666666865348816, + "regularize": 0.8274087905883789, + "step": 805 + }, + { + "dpo_loss": 0.6851524710655212, + "epoch": 4.59140292867265, + "grad_norm": 950.7745711731907, + "learning_rate": 1.915108532545351e-08, + "logits": -1.284138798713684, + "logps": -80.96287536621094, + "loss": 0.7829, + "objective": 0.8247645497322083, + "ranking_idealized": 0.4583333432674408, + "ranking_idealized_expo": 0.4583333432674408, + "ranking_simple": 0.4625000059604645, + "regularize": 0.8247645497322083, + "step": 810 + }, + { + "dpo_loss": 0.6184301972389221, + "epoch": 4.619744922059518, + "grad_norm": 910.9864796470945, + "learning_rate": 1.6527564516331638e-08, + "logits": -1.1581400632858276, + "logps": -82.53880310058594, + "loss": 0.7758, + "objective": 0.8514427542686462, + "ranking_idealized": 0.5916666388511658, + "ranking_idealized_expo": 0.5874999761581421, + "ranking_simple": 0.5916666388511658, + "regularize": 0.8514427542686462, + "step": 815 + }, + { + "dpo_loss": 0.7527021765708923, + "epoch": 4.648086915446386, + "grad_norm": 990.2454056593273, + "learning_rate": 1.4094215838229172e-08, + "logits": -1.2537620067596436, + "logps": -82.07845306396484, + "loss": 0.8496, + "objective": 0.8979706168174744, + "ranking_idealized": 0.5625, + "ranking_idealized_expo": 0.5625, + "ranking_simple": 0.5583333373069763, + "regularize": 0.8979706168174744, + "step": 820 + }, + { + "dpo_loss": 0.6094750165939331, + "epoch": 4.6764289088332545, + "grad_norm": 933.278247470779, + "learning_rate": 1.1851996440033318e-08, + "logits": -1.1612147092819214, + "logps": -80.19402313232422, + "loss": 0.7784, + "objective": 0.6954202651977539, + "ranking_idealized": 0.5, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.5, + "regularize": 0.6954202651977539, + "step": 825 + }, + { + "dpo_loss": 0.7630098462104797, + "epoch": 4.7047709022201225, + "grad_norm": 1034.6630104905564, + "learning_rate": 9.801788290621505e-09, + "logits": -1.3119471073150635, + "logps": -82.28199768066406, + "loss": 0.8393, + "objective": 0.9094979166984558, + "ranking_idealized": 0.5166666507720947, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.5208333134651184, + "regularize": 0.9094979166984558, + "step": 830 + }, + { + "dpo_loss": 0.6389487385749817, + "epoch": 4.733112895606991, + "grad_norm": 919.014869305993, + "learning_rate": 7.944397831941951e-09, + "logits": -1.2169098854064941, + "logps": -81.839111328125, + "loss": 0.7883, + "objective": 0.7863165140151978, + "ranking_idealized": 0.49166667461395264, + "ranking_idealized_expo": 0.48750001192092896, + "ranking_simple": 0.4958333373069763, + "regularize": 0.7863165140151978, + "step": 835 + }, + { + "dpo_loss": 0.6490210890769958, + "epoch": 4.7614548889938595, + "grad_norm": 984.0918792700685, + "learning_rate": 6.280555661802856e-09, + "logits": -1.2279409170150757, + "logps": -82.31600189208984, + "loss": 0.7359, + "objective": 0.7738173007965088, + "ranking_idealized": 0.5541666746139526, + "ranking_idealized_expo": 0.5541666746139526, + "ranking_simple": 0.5541666746139526, + "regularize": 0.7738173007965088, + "step": 840 + }, + { + "dpo_loss": 0.6469201445579529, + "epoch": 4.7897968823807275, + "grad_norm": 943.5599388069303, + "learning_rate": 4.810916246494157e-09, + "logits": -1.2585629224777222, + "logps": -82.3524398803711, + "loss": 0.7615, + "objective": 0.7981647849082947, + "ranking_idealized": 0.4583333432674408, + "ranking_idealized_expo": 0.4541666805744171, + "ranking_simple": 0.4625000059604645, + "regularize": 0.7981647849082947, + "step": 845 + }, + { + "dpo_loss": 0.6333919167518616, + "epoch": 4.818138875767596, + "grad_norm": 953.2591490386869, + "learning_rate": 3.5360576633558513e-09, + "logits": -1.2325036525726318, + "logps": -81.02188873291016, + "loss": 0.7765, + "objective": 0.7740827798843384, + "ranking_idealized": 0.5291666388511658, + "ranking_idealized_expo": 0.5291666388511658, + "ranking_simple": 0.5291666388511658, + "regularize": 0.7740827798843384, + "step": 850 + }, + { + "epoch": 4.818138875767596, + "eval_dpo_loss": 3.091606378555298, + "eval_logits": -1.266028642654419, + "eval_logps": -88.38737487792969, + "eval_loss": 5.932301044464111, + "eval_objective": 5.976984977722168, + "eval_ranking_idealized": 0.5092975497245789, + "eval_ranking_idealized_expo": 0.5092975497245789, + "eval_ranking_simple": 0.5134297609329224, + "eval_regularize": 5.976984977722168, + "eval_runtime": 259.1307, + "eval_samples_per_second": 22.344, + "eval_steps_per_second": 0.934, + "step": 850 + }, + { + "dpo_loss": 0.7061713337898254, + "epoch": 4.846480869154464, + "grad_norm": 938.2798417262723, + "learning_rate": 2.4564813733932155e-09, + "logits": -1.2008044719696045, + "logps": -81.2861328125, + "loss": 0.7477, + "objective": 0.7695434093475342, + "ranking_idealized": 0.5375000238418579, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.5375000238418579, + "regularize": 0.7695434093475342, + "step": 855 + }, + { + "dpo_loss": 0.6985270380973816, + "epoch": 4.874822862541333, + "grad_norm": 978.3840453547333, + "learning_rate": 1.5726120240288631e-09, + "logits": -1.1891288757324219, + "logps": -81.2249984741211, + "loss": 0.787, + "objective": 0.8799866437911987, + "ranking_idealized": 0.5541666746139526, + "ranking_idealized_expo": 0.550000011920929, + "ranking_simple": 0.550000011920929, + "regularize": 0.8799866437911987, + "step": 860 + }, + { + "dpo_loss": 0.6121171116828918, + "epoch": 4.903164855928201, + "grad_norm": 903.3650703276869, + "learning_rate": 8.847972820693051e-10, + "logits": -1.255650281906128, + "logps": -80.4308090209961, + "loss": 0.6987, + "objective": 0.6325153708457947, + "ranking_idealized": 0.4958333373069763, + "ranking_idealized_expo": 0.4958333373069763, + "ranking_simple": 0.4958333373069763, + "regularize": 0.6325153708457947, + "step": 865 + }, + { + "dpo_loss": 0.6306089162826538, + "epoch": 4.931506849315069, + "grad_norm": 920.898527815901, + "learning_rate": 3.933076969516724e-10, + "logits": -1.288960576057434, + "logps": -81.9923095703125, + "loss": 0.6978, + "objective": 0.6587303876876831, + "ranking_idealized": 0.512499988079071, + "ranking_idealized_expo": 0.5083333253860474, + "ranking_simple": 0.512499988079071, + "regularize": 0.6587303876876831, + "step": 870 + }, + { + "dpo_loss": 0.72224360704422, + "epoch": 4.959848842701937, + "grad_norm": 973.4560843637579, + "learning_rate": 9.833659432367803e-11, + "logits": -1.2122024297714233, + "logps": -81.87159729003906, + "loss": 0.7712, + "objective": 0.9232720136642456, + "ranking_idealized": 0.5458333492279053, + "ranking_idealized_expo": 0.5458333492279053, + "ranking_simple": 0.550000011920929, + "regularize": 0.9232720136642456, + "step": 875 + }, + { + "dpo_loss": 0.6564382910728455, + "epoch": 4.988190836088805, + "grad_norm": 950.1802722480554, + "learning_rate": 0.0, + "logits": -1.284375786781311, + "logps": -82.72828674316406, + "loss": 0.7367, + "objective": 0.7989345788955688, + "ranking_idealized": 0.5291666388511658, + "ranking_idealized_expo": 0.5291666388511658, + "ranking_simple": 0.5291666388511658, + "regularize": 0.7989345788955688, + "step": 880 + }, + { + "epoch": 4.988190836088805, + "step": 880, + "total_flos": 0.0, + "train_loss": 2.940262828902765, + "train_runtime": 35074.095, + "train_samples_per_second": 7.242, + "train_steps_per_second": 0.025 + } + ], + "logging_steps": 5, + "max_steps": 880, + "num_input_tokens_seen": 0, + "num_train_epochs": 5, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +}