|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 4.988190836088805, |
|
"eval_steps": 50, |
|
"global_step": 880, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"dpo_loss": 0.6931471824645996, |
|
"epoch": 0.005668398677373642, |
|
"grad_norm": 1341.3653828621927, |
|
"learning_rate": 1.1363636363636363e-08, |
|
"logits": -1.3147305250167847, |
|
"logps": -88.0877456665039, |
|
"loss": 0.4113, |
|
"objective": 0.41588976979255676, |
|
"ranking_idealized": 0.5208333134651184, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.5208333134651184, |
|
"regularize": 0.41588976979255676, |
|
"step": 1 |
|
}, |
|
{ |
|
"dpo_loss": 0.6927290558815002, |
|
"epoch": 0.02834199338686821, |
|
"grad_norm": 1318.7970843939129, |
|
"learning_rate": 5.6818181818181815e-08, |
|
"logits": -1.367867350578308, |
|
"logps": -84.43714141845703, |
|
"loss": 0.4128, |
|
"objective": 0.3797340393066406, |
|
"ranking_idealized": 0.546875, |
|
"ranking_idealized_expo": 0.546875, |
|
"ranking_simple": 0.546875, |
|
"regularize": 0.3797340393066406, |
|
"step": 5 |
|
}, |
|
{ |
|
"dpo_loss": 0.66615891456604, |
|
"epoch": 0.05668398677373642, |
|
"grad_norm": 1529.7102214452402, |
|
"learning_rate": 1.1363636363636363e-07, |
|
"logits": -1.446859359741211, |
|
"logps": -83.48344421386719, |
|
"loss": 0.4289, |
|
"objective": 0.4494988024234772, |
|
"ranking_idealized": 0.512499988079071, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.5083333253860474, |
|
"regularize": 0.4494988024234772, |
|
"step": 10 |
|
}, |
|
{ |
|
"dpo_loss": 0.6892092823982239, |
|
"epoch": 0.08502598016060463, |
|
"grad_norm": 1398.9545573108187, |
|
"learning_rate": 1.7045454545454543e-07, |
|
"logits": -1.410345196723938, |
|
"logps": -83.83523559570312, |
|
"loss": 0.5083, |
|
"objective": 0.5087102055549622, |
|
"ranking_idealized": 0.5041666626930237, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.5083333253860474, |
|
"regularize": 0.5087102055549622, |
|
"step": 15 |
|
}, |
|
{ |
|
"dpo_loss": 0.737316906452179, |
|
"epoch": 0.11336797354747284, |
|
"grad_norm": 1480.9989705702396, |
|
"learning_rate": 2.2727272727272726e-07, |
|
"logits": -1.397745966911316, |
|
"logps": -84.64732360839844, |
|
"loss": 0.5834, |
|
"objective": 0.6373786330223083, |
|
"ranking_idealized": 0.5375000238418579, |
|
"ranking_idealized_expo": 0.5375000238418579, |
|
"ranking_simple": 0.5375000238418579, |
|
"regularize": 0.6373786330223083, |
|
"step": 20 |
|
}, |
|
{ |
|
"dpo_loss": 0.6649187803268433, |
|
"epoch": 0.14170996693434104, |
|
"grad_norm": 1589.2205546755508, |
|
"learning_rate": 2.840909090909091e-07, |
|
"logits": -1.4405299425125122, |
|
"logps": -84.5063705444336, |
|
"loss": 0.7072, |
|
"objective": 0.7172243595123291, |
|
"ranking_idealized": 0.4958333373069763, |
|
"ranking_idealized_expo": 0.4958333373069763, |
|
"ranking_simple": 0.4958333373069763, |
|
"regularize": 0.7172243595123291, |
|
"step": 25 |
|
}, |
|
{ |
|
"dpo_loss": 0.7319389581680298, |
|
"epoch": 0.17005196032120926, |
|
"grad_norm": 1608.2967040181402, |
|
"learning_rate": 3.4090909090909085e-07, |
|
"logits": -1.4063345193862915, |
|
"logps": -85.10441589355469, |
|
"loss": 0.9251, |
|
"objective": 0.919275164604187, |
|
"ranking_idealized": 0.5416666865348816, |
|
"ranking_idealized_expo": 0.5416666865348816, |
|
"ranking_simple": 0.5416666865348816, |
|
"regularize": 0.919275164604187, |
|
"step": 30 |
|
}, |
|
{ |
|
"dpo_loss": 0.7343574166297913, |
|
"epoch": 0.19839395370807747, |
|
"grad_norm": 1660.500800571558, |
|
"learning_rate": 3.977272727272727e-07, |
|
"logits": -1.3780211210250854, |
|
"logps": -83.85320281982422, |
|
"loss": 1.1082, |
|
"objective": 1.1600453853607178, |
|
"ranking_idealized": 0.5041666626930237, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.5, |
|
"regularize": 1.1600453853607178, |
|
"step": 35 |
|
}, |
|
{ |
|
"dpo_loss": 0.7147431969642639, |
|
"epoch": 0.22673594709494568, |
|
"grad_norm": 1529.1766380039583, |
|
"learning_rate": 4.545454545454545e-07, |
|
"logits": -1.3637299537658691, |
|
"logps": -84.27665710449219, |
|
"loss": 1.1991, |
|
"objective": 1.0680582523345947, |
|
"ranking_idealized": 0.5208333134651184, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.5208333134651184, |
|
"regularize": 1.0680582523345947, |
|
"step": 40 |
|
}, |
|
{ |
|
"dpo_loss": 0.8464261889457703, |
|
"epoch": 0.25507794048181387, |
|
"grad_norm": 1547.4803845815225, |
|
"learning_rate": 5.113636363636363e-07, |
|
"logits": -1.483019471168518, |
|
"logps": -85.49544525146484, |
|
"loss": 1.4787, |
|
"objective": 1.7124279737472534, |
|
"ranking_idealized": 0.5416666865348816, |
|
"ranking_idealized_expo": 0.5375000238418579, |
|
"ranking_simple": 0.5416666865348816, |
|
"regularize": 1.7124279737472534, |
|
"step": 45 |
|
}, |
|
{ |
|
"dpo_loss": 0.9592286348342896, |
|
"epoch": 0.2834199338686821, |
|
"grad_norm": 1190.1245065397072, |
|
"learning_rate": 5.681818181818182e-07, |
|
"logits": -1.3799251317977905, |
|
"logps": -83.56061553955078, |
|
"loss": 1.7171, |
|
"objective": 1.6765538454055786, |
|
"ranking_idealized": 0.42500001192092896, |
|
"ranking_idealized_expo": 0.42500001192092896, |
|
"ranking_simple": 0.42500001192092896, |
|
"regularize": 1.6765538454055786, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.2834199338686821, |
|
"eval_dpo_loss": 0.8390654921531677, |
|
"eval_logits": -1.3979839086532593, |
|
"eval_logps": -91.42163848876953, |
|
"eval_loss": 0.9451757073402405, |
|
"eval_objective": 0.9804208874702454, |
|
"eval_ranking_idealized": 0.5092975497245789, |
|
"eval_ranking_idealized_expo": 0.5092975497245789, |
|
"eval_ranking_simple": 0.5113636255264282, |
|
"eval_regularize": 0.9804208874702454, |
|
"eval_runtime": 260.041, |
|
"eval_samples_per_second": 22.266, |
|
"eval_steps_per_second": 0.931, |
|
"step": 50 |
|
}, |
|
{ |
|
"dpo_loss": 1.1626336574554443, |
|
"epoch": 0.3117619272555503, |
|
"grad_norm": 1379.607675790991, |
|
"learning_rate": 6.249999999999999e-07, |
|
"logits": -1.4113659858703613, |
|
"logps": -85.0433578491211, |
|
"loss": 1.963, |
|
"objective": 2.1214394569396973, |
|
"ranking_idealized": 0.5416666865348816, |
|
"ranking_idealized_expo": 0.5416666865348816, |
|
"ranking_simple": 0.5416666865348816, |
|
"regularize": 2.1214394569396973, |
|
"step": 55 |
|
}, |
|
{ |
|
"dpo_loss": 1.1583107709884644, |
|
"epoch": 0.3401039206424185, |
|
"grad_norm": 1583.6531529945255, |
|
"learning_rate": 6.818181818181817e-07, |
|
"logits": -1.387775182723999, |
|
"logps": -83.92105102539062, |
|
"loss": 2.1311, |
|
"objective": 2.187351703643799, |
|
"ranking_idealized": 0.5166666507720947, |
|
"ranking_idealized_expo": 0.512499988079071, |
|
"ranking_simple": 0.5041666626930237, |
|
"regularize": 2.187351703643799, |
|
"step": 60 |
|
}, |
|
{ |
|
"dpo_loss": 1.1024636030197144, |
|
"epoch": 0.3684459140292867, |
|
"grad_norm": 1400.9200642265332, |
|
"learning_rate": 7.386363636363636e-07, |
|
"logits": -1.3549267053604126, |
|
"logps": -84.21534729003906, |
|
"loss": 2.5118, |
|
"objective": 2.5817580223083496, |
|
"ranking_idealized": 0.5083333253860474, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.5041666626930237, |
|
"regularize": 2.5817580223083496, |
|
"step": 65 |
|
}, |
|
{ |
|
"dpo_loss": 1.401374340057373, |
|
"epoch": 0.39678790741615494, |
|
"grad_norm": 1397.7577328959405, |
|
"learning_rate": 7.954545454545454e-07, |
|
"logits": -1.434369444847107, |
|
"logps": -85.42965698242188, |
|
"loss": 2.8155, |
|
"objective": 2.7807960510253906, |
|
"ranking_idealized": 0.5166666507720947, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5083333253860474, |
|
"regularize": 2.7807960510253906, |
|
"step": 70 |
|
}, |
|
{ |
|
"dpo_loss": 1.3847529888153076, |
|
"epoch": 0.42512990080302315, |
|
"grad_norm": 1574.992862602621, |
|
"learning_rate": 8.522727272727273e-07, |
|
"logits": -1.4534552097320557, |
|
"logps": -85.975341796875, |
|
"loss": 3.0239, |
|
"objective": 2.7482104301452637, |
|
"ranking_idealized": 0.5458333492279053, |
|
"ranking_idealized_expo": 0.5458333492279053, |
|
"ranking_simple": 0.5458333492279053, |
|
"regularize": 2.7482104301452637, |
|
"step": 75 |
|
}, |
|
{ |
|
"dpo_loss": 1.6628150939941406, |
|
"epoch": 0.45347189418989137, |
|
"grad_norm": 1317.6328063663461, |
|
"learning_rate": 9.09090909090909e-07, |
|
"logits": -1.4975560903549194, |
|
"logps": -85.59205627441406, |
|
"loss": 3.0523, |
|
"objective": 3.2058732509613037, |
|
"ranking_idealized": 0.5166666507720947, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 3.2058732509613037, |
|
"step": 80 |
|
}, |
|
{ |
|
"dpo_loss": 1.5282264947891235, |
|
"epoch": 0.4818138875767596, |
|
"grad_norm": 1409.5000185848141, |
|
"learning_rate": 9.65909090909091e-07, |
|
"logits": -1.4272305965423584, |
|
"logps": -83.47660827636719, |
|
"loss": 3.6628, |
|
"objective": 3.7103846073150635, |
|
"ranking_idealized": 0.5249999761581421, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.5208333134651184, |
|
"regularize": 3.7103846073150635, |
|
"step": 85 |
|
}, |
|
{ |
|
"dpo_loss": 2.017771005630493, |
|
"epoch": 0.5101558809636277, |
|
"grad_norm": 1413.0881700978623, |
|
"learning_rate": 9.999842657116664e-07, |
|
"logits": -1.2261414527893066, |
|
"logps": -84.69164276123047, |
|
"loss": 3.9801, |
|
"objective": 3.7396154403686523, |
|
"ranking_idealized": 0.4791666567325592, |
|
"ranking_idealized_expo": 0.4791666567325592, |
|
"ranking_simple": 0.4749999940395355, |
|
"regularize": 3.7396154403686523, |
|
"step": 90 |
|
}, |
|
{ |
|
"dpo_loss": 2.145413875579834, |
|
"epoch": 0.538497874350496, |
|
"grad_norm": 1451.2644391659398, |
|
"learning_rate": 9.998072663403656e-07, |
|
"logits": -1.3078831434249878, |
|
"logps": -83.98829650878906, |
|
"loss": 4.1074, |
|
"objective": 4.225299835205078, |
|
"ranking_idealized": 0.5458333492279053, |
|
"ranking_idealized_expo": 0.5416666865348816, |
|
"ranking_simple": 0.550000011920929, |
|
"regularize": 4.225299835205078, |
|
"step": 95 |
|
}, |
|
{ |
|
"dpo_loss": 2.2973792552948, |
|
"epoch": 0.5668398677373642, |
|
"grad_norm": 1458.4336577249642, |
|
"learning_rate": 9.99433669591504e-07, |
|
"logits": -1.3813899755477905, |
|
"logps": -85.42733001708984, |
|
"loss": 4.4116, |
|
"objective": 4.421018600463867, |
|
"ranking_idealized": 0.5416666865348816, |
|
"ranking_idealized_expo": 0.5416666865348816, |
|
"ranking_simple": 0.5416666865348816, |
|
"regularize": 4.421018600463867, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.5668398677373642, |
|
"eval_dpo_loss": 1.3937046527862549, |
|
"eval_logits": -1.3645607233047485, |
|
"eval_logps": -91.35843658447266, |
|
"eval_loss": 2.288909912109375, |
|
"eval_objective": 2.2847275733947754, |
|
"eval_ranking_idealized": 0.5092975497245789, |
|
"eval_ranking_idealized_expo": 0.5092975497245789, |
|
"eval_ranking_simple": 0.5144628286361694, |
|
"eval_regularize": 2.2847275733947754, |
|
"eval_runtime": 259.4011, |
|
"eval_samples_per_second": 22.321, |
|
"eval_steps_per_second": 0.933, |
|
"step": 100 |
|
}, |
|
{ |
|
"dpo_loss": 2.3384857177734375, |
|
"epoch": 0.5951818611242324, |
|
"grad_norm": 1377.8614811370987, |
|
"learning_rate": 9.988636224180095e-07, |
|
"logits": -1.2764217853546143, |
|
"logps": -85.17194366455078, |
|
"loss": 4.8397, |
|
"objective": 4.943901062011719, |
|
"ranking_idealized": 0.5375000238418579, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.5541666746139526, |
|
"regularize": 4.943901062011719, |
|
"step": 105 |
|
}, |
|
{ |
|
"dpo_loss": 2.419879198074341, |
|
"epoch": 0.6235238545111006, |
|
"grad_norm": 1709.2314342435861, |
|
"learning_rate": 9.980973490458728e-07, |
|
"logits": -1.4455102682113647, |
|
"logps": -84.0779037475586, |
|
"loss": 4.9241, |
|
"objective": 4.433398723602295, |
|
"ranking_idealized": 0.4541666805744171, |
|
"ranking_idealized_expo": 0.44999998807907104, |
|
"ranking_simple": 0.44583332538604736, |
|
"regularize": 4.433398723602295, |
|
"step": 110 |
|
}, |
|
{ |
|
"dpo_loss": 2.3997786045074463, |
|
"epoch": 0.6518658478979689, |
|
"grad_norm": 1267.7408034843309, |
|
"learning_rate": 9.971351508859486e-07, |
|
"logits": -1.403380036354065, |
|
"logps": -83.38719940185547, |
|
"loss": 4.8313, |
|
"objective": 4.724060535430908, |
|
"ranking_idealized": 0.4833333194255829, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.48750001192092896, |
|
"regularize": 4.724060535430908, |
|
"step": 115 |
|
}, |
|
{ |
|
"dpo_loss": 2.01283597946167, |
|
"epoch": 0.680207841284837, |
|
"grad_norm": 1334.5055640243738, |
|
"learning_rate": 9.959774064153975e-07, |
|
"logits": -1.3471440076828003, |
|
"logps": -84.59120178222656, |
|
"loss": 4.9092, |
|
"objective": 4.763902187347412, |
|
"ranking_idealized": 0.5375000238418579, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.5333333611488342, |
|
"regularize": 4.763902187347412, |
|
"step": 120 |
|
}, |
|
{ |
|
"dpo_loss": 2.8711302280426025, |
|
"epoch": 0.7085498346717053, |
|
"grad_norm": 1217.4260816961178, |
|
"learning_rate": 9.94624571028813e-07, |
|
"logits": -1.2994908094406128, |
|
"logps": -83.49886322021484, |
|
"loss": 5.2406, |
|
"objective": 5.268767356872559, |
|
"ranking_idealized": 0.550000011920929, |
|
"ranking_idealized_expo": 0.550000011920929, |
|
"ranking_simple": 0.550000011920929, |
|
"regularize": 5.268767356872559, |
|
"step": 125 |
|
}, |
|
{ |
|
"dpo_loss": 2.1492836475372314, |
|
"epoch": 0.7368918280585735, |
|
"grad_norm": 1181.5936213316104, |
|
"learning_rate": 9.930771768590933e-07, |
|
"logits": -1.4135076999664307, |
|
"logps": -82.80963897705078, |
|
"loss": 4.7897, |
|
"objective": 4.472428798675537, |
|
"ranking_idealized": 0.44999998807907104, |
|
"ranking_idealized_expo": 0.44999998807907104, |
|
"ranking_simple": 0.44999998807907104, |
|
"regularize": 4.472428798675537, |
|
"step": 130 |
|
}, |
|
{ |
|
"dpo_loss": 2.715928554534912, |
|
"epoch": 0.7652338214454416, |
|
"grad_norm": 1143.7481041860115, |
|
"learning_rate": 9.91335832568129e-07, |
|
"logits": -1.387623906135559, |
|
"logps": -84.99431610107422, |
|
"loss": 5.1988, |
|
"objective": 5.63712215423584, |
|
"ranking_idealized": 0.5291666388511658, |
|
"ranking_idealized_expo": 0.5291666388511658, |
|
"ranking_simple": 0.5291666388511658, |
|
"regularize": 5.63712215423584, |
|
"step": 135 |
|
}, |
|
{ |
|
"dpo_loss": 2.748910665512085, |
|
"epoch": 0.7935758148323099, |
|
"grad_norm": 1147.9091652909822, |
|
"learning_rate": 9.894012231073895e-07, |
|
"logits": -1.2905735969543457, |
|
"logps": -84.26557922363281, |
|
"loss": 5.4168, |
|
"objective": 5.596283912658691, |
|
"ranking_idealized": 0.5041666626930237, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.512499988079071, |
|
"regularize": 5.596283912658691, |
|
"step": 140 |
|
}, |
|
{ |
|
"dpo_loss": 2.3077232837677, |
|
"epoch": 0.821917808219178, |
|
"grad_norm": 1202.3074237963756, |
|
"learning_rate": 9.872741094484964e-07, |
|
"logits": -1.3657087087631226, |
|
"logps": -83.96611785888672, |
|
"loss": 5.3248, |
|
"objective": 4.7676682472229, |
|
"ranking_idealized": 0.4833333194255829, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.48750001192092896, |
|
"regularize": 4.7676682472229, |
|
"step": 145 |
|
}, |
|
{ |
|
"dpo_loss": 2.2885777950286865, |
|
"epoch": 0.8502598016060463, |
|
"grad_norm": 1261.3028504954063, |
|
"learning_rate": 9.849553282839024e-07, |
|
"logits": -1.350846767425537, |
|
"logps": -82.00806427001953, |
|
"loss": 5.641, |
|
"objective": 5.300591945648193, |
|
"ranking_idealized": 0.5416666865348816, |
|
"ranking_idealized_expo": 0.5375000238418579, |
|
"ranking_simple": 0.5416666865348816, |
|
"regularize": 5.300591945648193, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.8502598016060463, |
|
"eval_dpo_loss": 1.8989161252975464, |
|
"eval_logits": -1.361150860786438, |
|
"eval_logps": -89.60128784179688, |
|
"eval_loss": 3.659212827682495, |
|
"eval_objective": 3.6993324756622314, |
|
"eval_ranking_idealized": 0.5092975497245789, |
|
"eval_ranking_idealized_expo": 0.5092975497245789, |
|
"eval_ranking_simple": 0.5123966932296753, |
|
"eval_regularize": 3.6993324756622314, |
|
"eval_runtime": 258.9031, |
|
"eval_samples_per_second": 22.364, |
|
"eval_steps_per_second": 0.935, |
|
"step": 150 |
|
}, |
|
{ |
|
"dpo_loss": 2.7537026405334473, |
|
"epoch": 0.8786017949929145, |
|
"grad_norm": 1297.1182521614555, |
|
"learning_rate": 9.824457916977784e-07, |
|
"logits": -1.358762264251709, |
|
"logps": -81.92320251464844, |
|
"loss": 5.4151, |
|
"objective": 5.554434776306152, |
|
"ranking_idealized": 0.512499988079071, |
|
"ranking_idealized_expo": 0.512499988079071, |
|
"ranking_simple": 0.5249999761581421, |
|
"regularize": 5.554434776306152, |
|
"step": 155 |
|
}, |
|
{ |
|
"dpo_loss": 2.6335387229919434, |
|
"epoch": 0.9069437883797827, |
|
"grad_norm": 1187.7670981291164, |
|
"learning_rate": 9.797464868072486e-07, |
|
"logits": -1.2611459493637085, |
|
"logps": -83.41938018798828, |
|
"loss": 5.5854, |
|
"objective": 5.695128917694092, |
|
"ranking_idealized": 0.4958333373069763, |
|
"ranking_idealized_expo": 0.4958333373069763, |
|
"ranking_simple": 0.48750001192092896, |
|
"regularize": 5.695128917694092, |
|
"step": 160 |
|
}, |
|
{ |
|
"dpo_loss": 2.661656618118286, |
|
"epoch": 0.9352857817666509, |
|
"grad_norm": 1120.1403651445853, |
|
"learning_rate": 9.768584753741134e-07, |
|
"logits": -1.2767577171325684, |
|
"logps": -84.16160583496094, |
|
"loss": 5.4701, |
|
"objective": 5.190924644470215, |
|
"ranking_idealized": 0.5208333134651184, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.5208333134651184, |
|
"regularize": 5.190924644470215, |
|
"step": 165 |
|
}, |
|
{ |
|
"dpo_loss": 3.1157445907592773, |
|
"epoch": 0.9636277751535192, |
|
"grad_norm": 1093.9866208390724, |
|
"learning_rate": 9.737828933872073e-07, |
|
"logits": -1.3006123304367065, |
|
"logps": -84.45008087158203, |
|
"loss": 5.3421, |
|
"objective": 5.613492488861084, |
|
"ranking_idealized": 0.512499988079071, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.5, |
|
"regularize": 5.613492488861084, |
|
"step": 170 |
|
}, |
|
{ |
|
"dpo_loss": 2.3348023891448975, |
|
"epoch": 0.9919697685403873, |
|
"grad_norm": 1151.3293772515624, |
|
"learning_rate": 9.705209506155634e-07, |
|
"logits": -1.2408747673034668, |
|
"logps": -84.64601135253906, |
|
"loss": 5.2759, |
|
"objective": 4.945895671844482, |
|
"ranking_idealized": 0.574999988079071, |
|
"ranking_idealized_expo": 0.574999988079071, |
|
"ranking_simple": 0.574999988079071, |
|
"regularize": 4.945895671844482, |
|
"step": 175 |
|
}, |
|
{ |
|
"dpo_loss": 2.864588737487793, |
|
"epoch": 1.0203117619272555, |
|
"grad_norm": 1140.0762636078218, |
|
"learning_rate": 9.670739301325534e-07, |
|
"logits": -1.3873549699783325, |
|
"logps": -83.78386688232422, |
|
"loss": 5.4744, |
|
"objective": 5.529939651489258, |
|
"ranking_idealized": 0.5416666865348816, |
|
"ranking_idealized_expo": 0.5416666865348816, |
|
"ranking_simple": 0.5375000238418579, |
|
"regularize": 5.529939651489258, |
|
"step": 180 |
|
}, |
|
{ |
|
"dpo_loss": 2.8053672313690186, |
|
"epoch": 1.0486537553141237, |
|
"grad_norm": 1077.7726094731238, |
|
"learning_rate": 9.63443187811197e-07, |
|
"logits": -1.2356277704238892, |
|
"logps": -84.0350341796875, |
|
"loss": 5.2551, |
|
"objective": 5.298059463500977, |
|
"ranking_idealized": 0.4833333194255829, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.48750001192092896, |
|
"regularize": 5.298059463500977, |
|
"step": 185 |
|
}, |
|
{ |
|
"dpo_loss": 2.420074462890625, |
|
"epoch": 1.076995748700992, |
|
"grad_norm": 1190.6128199162288, |
|
"learning_rate": 9.596301517908328e-07, |
|
"logits": -1.3124566078186035, |
|
"logps": -84.89928436279297, |
|
"loss": 5.3934, |
|
"objective": 5.610664367675781, |
|
"ranking_idealized": 0.5541666746139526, |
|
"ranking_idealized_expo": 0.550000011920929, |
|
"ranking_simple": 0.5458333492279053, |
|
"regularize": 5.610664367675781, |
|
"step": 190 |
|
}, |
|
{ |
|
"dpo_loss": 2.3999674320220947, |
|
"epoch": 1.10533774208786, |
|
"grad_norm": 1115.8540591706928, |
|
"learning_rate": 9.556363219153662e-07, |
|
"logits": -1.2911279201507568, |
|
"logps": -84.42256927490234, |
|
"loss": 5.3254, |
|
"objective": 5.321680545806885, |
|
"ranking_idealized": 0.5666666626930237, |
|
"ranking_idealized_expo": 0.5666666626930237, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 5.321680545806885, |
|
"step": 195 |
|
}, |
|
{ |
|
"dpo_loss": 2.4562041759490967, |
|
"epoch": 1.1336797354747283, |
|
"grad_norm": 1209.8487619605175, |
|
"learning_rate": 9.514632691433106e-07, |
|
"logits": -1.357124924659729, |
|
"logps": -84.73216247558594, |
|
"loss": 5.6662, |
|
"objective": 5.265989303588867, |
|
"ranking_idealized": 0.5083333253860474, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.5083333253860474, |
|
"regularize": 5.265989303588867, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.1336797354747283, |
|
"eval_dpo_loss": 2.5621941089630127, |
|
"eval_logits": -1.312853455543518, |
|
"eval_logps": -91.82030487060547, |
|
"eval_loss": 4.9017205238342285, |
|
"eval_objective": 5.1433634757995605, |
|
"eval_ranking_idealized": 0.5092975497245789, |
|
"eval_ranking_idealized_expo": 0.5092975497245789, |
|
"eval_ranking_simple": 0.5134297609329224, |
|
"eval_regularize": 5.1433634757995605, |
|
"eval_runtime": 259.021, |
|
"eval_samples_per_second": 22.353, |
|
"eval_steps_per_second": 0.934, |
|
"step": 200 |
|
}, |
|
{ |
|
"dpo_loss": 2.70652174949646, |
|
"epoch": 1.1620217288615966, |
|
"grad_norm": 1176.9552446858397, |
|
"learning_rate": 9.471126349298556e-07, |
|
"logits": -1.3222142457962036, |
|
"logps": -84.78860473632812, |
|
"loss": 5.5162, |
|
"objective": 5.902441501617432, |
|
"ranking_idealized": 0.48750001192092896, |
|
"ranking_idealized_expo": 0.48750001192092896, |
|
"ranking_simple": 0.5, |
|
"regularize": 5.902441501617432, |
|
"step": 205 |
|
}, |
|
{ |
|
"dpo_loss": 2.1185081005096436, |
|
"epoch": 1.1903637222484649, |
|
"grad_norm": 1095.8529561562762, |
|
"learning_rate": 9.425861305812081e-07, |
|
"logits": -1.302480936050415, |
|
"logps": -84.719482421875, |
|
"loss": 5.425, |
|
"objective": 5.670342922210693, |
|
"ranking_idealized": 0.5, |
|
"ranking_idealized_expo": 0.4958333373069763, |
|
"ranking_simple": 0.5, |
|
"regularize": 5.670342922210693, |
|
"step": 210 |
|
}, |
|
{ |
|
"dpo_loss": 2.6837000846862793, |
|
"epoch": 1.2187057156353331, |
|
"grad_norm": 1062.9514542837012, |
|
"learning_rate": 9.378855365814557e-07, |
|
"logits": -1.273558497428894, |
|
"logps": -84.21690368652344, |
|
"loss": 5.3289, |
|
"objective": 5.31361722946167, |
|
"ranking_idealized": 0.48750001192092896, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.48750001192092896, |
|
"regularize": 5.31361722946167, |
|
"step": 215 |
|
}, |
|
{ |
|
"dpo_loss": 2.5074095726013184, |
|
"epoch": 1.2470477090222012, |
|
"grad_norm": 1053.1396201008674, |
|
"learning_rate": 9.330127018922193e-07, |
|
"logits": -1.1912199258804321, |
|
"logps": -83.59181213378906, |
|
"loss": 5.2229, |
|
"objective": 5.1353912353515625, |
|
"ranking_idealized": 0.6041666865348816, |
|
"ranking_idealized_expo": 0.6000000238418579, |
|
"ranking_simple": 0.5958333611488342, |
|
"regularize": 5.1353912353515625, |
|
"step": 220 |
|
}, |
|
{ |
|
"dpo_loss": 2.4020681381225586, |
|
"epoch": 1.2753897024090695, |
|
"grad_norm": 1047.2312940257925, |
|
"learning_rate": 9.279695432253708e-07, |
|
"logits": -1.2806742191314697, |
|
"logps": -84.57674407958984, |
|
"loss": 5.1377, |
|
"objective": 5.066871643066406, |
|
"ranking_idealized": 0.5291666388511658, |
|
"ranking_idealized_expo": 0.5291666388511658, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 5.066871643066406, |
|
"step": 225 |
|
}, |
|
{ |
|
"dpo_loss": 2.466576337814331, |
|
"epoch": 1.3037316957959377, |
|
"grad_norm": 1030.5617485584146, |
|
"learning_rate": 9.227580442891021e-07, |
|
"logits": -1.2669168710708618, |
|
"logps": -82.93086242675781, |
|
"loss": 4.8601, |
|
"objective": 4.961721420288086, |
|
"ranking_idealized": 0.5083333253860474, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.512499988079071, |
|
"regularize": 4.961721420288086, |
|
"step": 230 |
|
}, |
|
{ |
|
"dpo_loss": 2.1171398162841797, |
|
"epoch": 1.3320736891828058, |
|
"grad_norm": 1061.077062757036, |
|
"learning_rate": 9.173802550076401e-07, |
|
"logits": -1.3311480283737183, |
|
"logps": -81.57727813720703, |
|
"loss": 4.9903, |
|
"objective": 4.573681354522705, |
|
"ranking_idealized": 0.4833333194255829, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.4958333373069763, |
|
"regularize": 4.573681354522705, |
|
"step": 235 |
|
}, |
|
{ |
|
"dpo_loss": 2.8134536743164062, |
|
"epoch": 1.360415682569674, |
|
"grad_norm": 970.3779104307249, |
|
"learning_rate": 9.118382907149163e-07, |
|
"logits": -1.267702579498291, |
|
"logps": -83.0981216430664, |
|
"loss": 5.0505, |
|
"objective": 5.230247974395752, |
|
"ranking_idealized": 0.4791666567325592, |
|
"ranking_idealized_expo": 0.4791666567325592, |
|
"ranking_simple": 0.4791666567325592, |
|
"regularize": 5.230247974395752, |
|
"step": 240 |
|
}, |
|
{ |
|
"dpo_loss": 2.3336708545684814, |
|
"epoch": 1.3887576759565423, |
|
"grad_norm": 1037.7314557451798, |
|
"learning_rate": 9.061343313225087e-07, |
|
"logits": -1.2927136421203613, |
|
"logps": -82.71648406982422, |
|
"loss": 4.97, |
|
"objective": 4.810959815979004, |
|
"ranking_idealized": 0.5208333134651184, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5208333134651184, |
|
"regularize": 4.810959815979004, |
|
"step": 245 |
|
}, |
|
{ |
|
"dpo_loss": 1.7325116395950317, |
|
"epoch": 1.4170996693434104, |
|
"grad_norm": 1022.8469212904224, |
|
"learning_rate": 9.002706204621802e-07, |
|
"logits": -1.2380987405776978, |
|
"logps": -82.20935821533203, |
|
"loss": 5.0544, |
|
"objective": 4.453593730926514, |
|
"ranking_idealized": 0.47083333134651184, |
|
"ranking_idealized_expo": 0.47083333134651184, |
|
"ranking_simple": 0.47083333134651184, |
|
"regularize": 4.453593730926514, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.4170996693434104, |
|
"eval_dpo_loss": 2.388360023498535, |
|
"eval_logits": -1.2957789897918701, |
|
"eval_logps": -89.6596450805664, |
|
"eval_loss": 4.645730495452881, |
|
"eval_objective": 4.698073387145996, |
|
"eval_ranking_idealized": 0.5092975497245789, |
|
"eval_ranking_idealized_expo": 0.5092975497245789, |
|
"eval_ranking_simple": 0.5092975497245789, |
|
"eval_regularize": 4.698073387145996, |
|
"eval_runtime": 258.9175, |
|
"eval_samples_per_second": 22.362, |
|
"eval_steps_per_second": 0.935, |
|
"step": 250 |
|
}, |
|
{ |
|
"dpo_loss": 2.6409242153167725, |
|
"epoch": 1.4454416627302786, |
|
"grad_norm": 1045.3372739075203, |
|
"learning_rate": 8.942494646033554e-07, |
|
"logits": -1.248718500137329, |
|
"logps": -83.6023941040039, |
|
"loss": 5.3714, |
|
"objective": 5.307827949523926, |
|
"ranking_idealized": 0.550000011920929, |
|
"ranking_idealized_expo": 0.5458333492279053, |
|
"ranking_simple": 0.5375000238418579, |
|
"regularize": 5.307827949523926, |
|
"step": 255 |
|
}, |
|
{ |
|
"dpo_loss": 2.64563250541687, |
|
"epoch": 1.473783656117147, |
|
"grad_norm": 1197.986844648104, |
|
"learning_rate": 8.880732321458784e-07, |
|
"logits": -1.2879093885421753, |
|
"logps": -84.90482330322266, |
|
"loss": 5.0662, |
|
"objective": 5.523282051086426, |
|
"ranking_idealized": 0.48750001192092896, |
|
"ranking_idealized_expo": 0.48750001192092896, |
|
"ranking_simple": 0.4791666567325592, |
|
"regularize": 5.523282051086426, |
|
"step": 260 |
|
}, |
|
{ |
|
"dpo_loss": 2.4120934009552, |
|
"epoch": 1.5021256495040152, |
|
"grad_norm": 1129.3043637151231, |
|
"learning_rate": 8.817443524884117e-07, |
|
"logits": -1.2617005109786987, |
|
"logps": -83.68741607666016, |
|
"loss": 5.09, |
|
"objective": 4.771634578704834, |
|
"ranking_idealized": 0.5041666626930237, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.5, |
|
"regularize": 4.771634578704834, |
|
"step": 265 |
|
}, |
|
{ |
|
"dpo_loss": 3.012559175491333, |
|
"epoch": 1.5304676428908834, |
|
"grad_norm": 1023.223627517731, |
|
"learning_rate": 8.752653150728411e-07, |
|
"logits": -1.284982442855835, |
|
"logps": -84.35843658447266, |
|
"loss": 5.3376, |
|
"objective": 5.6197028160095215, |
|
"ranking_idealized": 0.5083333253860474, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.5041666626930237, |
|
"regularize": 5.6197028160095215, |
|
"step": 270 |
|
}, |
|
{ |
|
"dpo_loss": 2.465777635574341, |
|
"epoch": 1.5588096362777515, |
|
"grad_norm": 1088.952583153439, |
|
"learning_rate": 8.68638668405062e-07, |
|
"logits": -1.2928842306137085, |
|
"logps": -84.76825714111328, |
|
"loss": 5.0697, |
|
"objective": 4.782662868499756, |
|
"ranking_idealized": 0.5249999761581421, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.512499988079071, |
|
"regularize": 4.782662868499756, |
|
"step": 275 |
|
}, |
|
{ |
|
"dpo_loss": 2.743584156036377, |
|
"epoch": 1.5871516296646198, |
|
"grad_norm": 1199.9945832988806, |
|
"learning_rate": 8.61867019052535e-07, |
|
"logits": -1.212363362312317, |
|
"logps": -82.49223327636719, |
|
"loss": 4.971, |
|
"objective": 5.158485412597656, |
|
"ranking_idealized": 0.5041666626930237, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.5041666626930237, |
|
"regularize": 5.158485412597656, |
|
"step": 280 |
|
}, |
|
{ |
|
"dpo_loss": 2.19926381111145, |
|
"epoch": 1.615493623051488, |
|
"grad_norm": 1072.4621792134556, |
|
"learning_rate": 8.549530306190014e-07, |
|
"logits": -1.3124572038650513, |
|
"logps": -84.11770629882812, |
|
"loss": 4.7979, |
|
"objective": 4.820002555847168, |
|
"ranking_idealized": 0.5208333134651184, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5249999761581421, |
|
"regularize": 4.820002555847168, |
|
"step": 285 |
|
}, |
|
{ |
|
"dpo_loss": 2.2039902210235596, |
|
"epoch": 1.643835616438356, |
|
"grad_norm": 1037.1720137680302, |
|
"learning_rate": 8.478994226967638e-07, |
|
"logits": -1.331601619720459, |
|
"logps": -82.66283416748047, |
|
"loss": 4.7089, |
|
"objective": 4.694666385650635, |
|
"ranking_idealized": 0.5625, |
|
"ranking_idealized_expo": 0.5625, |
|
"ranking_simple": 0.5583333373069763, |
|
"regularize": 4.694666385650635, |
|
"step": 290 |
|
}, |
|
{ |
|
"dpo_loss": 2.586439847946167, |
|
"epoch": 1.6721776098252243, |
|
"grad_norm": 1019.0647957946281, |
|
"learning_rate": 8.407089697969456e-07, |
|
"logits": -1.2327104806900024, |
|
"logps": -81.66007995605469, |
|
"loss": 4.8644, |
|
"objective": 4.790833473205566, |
|
"ranking_idealized": 0.4833333194255829, |
|
"ranking_idealized_expo": 0.4791666567325592, |
|
"ranking_simple": 0.49166667461395264, |
|
"regularize": 4.790833473205566, |
|
"step": 295 |
|
}, |
|
{ |
|
"dpo_loss": 2.176333427429199, |
|
"epoch": 1.7005196032120926, |
|
"grad_norm": 1054.8932248051876, |
|
"learning_rate": 8.333845002581458e-07, |
|
"logits": -1.3139069080352783, |
|
"logps": -83.0549087524414, |
|
"loss": 4.799, |
|
"objective": 4.67409610748291, |
|
"ranking_idealized": 0.574999988079071, |
|
"ranking_idealized_expo": 0.574999988079071, |
|
"ranking_simple": 0.5708333253860474, |
|
"regularize": 4.67409610748291, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.7005196032120926, |
|
"eval_dpo_loss": 2.5370917320251465, |
|
"eval_logits": -1.312793254852295, |
|
"eval_logps": -89.6458969116211, |
|
"eval_loss": 5.069702625274658, |
|
"eval_objective": 5.148064136505127, |
|
"eval_ranking_idealized": 0.5092975497245789, |
|
"eval_ranking_idealized_expo": 0.5092975497245789, |
|
"eval_ranking_simple": 0.5113636255264282, |
|
"eval_regularize": 5.148064136505127, |
|
"eval_runtime": 259.1048, |
|
"eval_samples_per_second": 22.346, |
|
"eval_steps_per_second": 0.934, |
|
"step": 300 |
|
}, |
|
{ |
|
"dpo_loss": 2.535557985305786, |
|
"epoch": 1.7288615965989607, |
|
"grad_norm": 1059.8267179953498, |
|
"learning_rate": 8.259288951339232e-07, |
|
"logits": -1.2971898317337036, |
|
"logps": -83.48696899414062, |
|
"loss": 4.7904, |
|
"objective": 5.117157459259033, |
|
"ranking_idealized": 0.5083333253860474, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.512499988079071, |
|
"regularize": 5.117157459259033, |
|
"step": 305 |
|
}, |
|
{ |
|
"dpo_loss": 2.743206024169922, |
|
"epoch": 1.7572035899858292, |
|
"grad_norm": 1062.5530460229647, |
|
"learning_rate": 8.183450870595441e-07, |
|
"logits": -1.3307418823242188, |
|
"logps": -81.59162902832031, |
|
"loss": 4.7455, |
|
"objective": 4.8372015953063965, |
|
"ranking_idealized": 0.5249999761581421, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 4.8372015953063965, |
|
"step": 310 |
|
}, |
|
{ |
|
"dpo_loss": 2.1805214881896973, |
|
"epoch": 1.7855455833726972, |
|
"grad_norm": 967.1673859822481, |
|
"learning_rate": 8.106360590984404e-07, |
|
"logits": -1.2439404726028442, |
|
"logps": -82.7205810546875, |
|
"loss": 4.5159, |
|
"objective": 4.586319923400879, |
|
"ranking_idealized": 0.5083333253860474, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.512499988079071, |
|
"regularize": 4.586319923400879, |
|
"step": 315 |
|
}, |
|
{ |
|
"dpo_loss": 2.943455219268799, |
|
"epoch": 1.8138875767595655, |
|
"grad_norm": 1040.3088755706913, |
|
"learning_rate": 8.028048435688333e-07, |
|
"logits": -1.3003054857254028, |
|
"logps": -83.96493530273438, |
|
"loss": 4.8496, |
|
"objective": 5.258904933929443, |
|
"ranking_idealized": 0.5041666626930237, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.5083333253860474, |
|
"regularize": 5.258904933929443, |
|
"step": 320 |
|
}, |
|
{ |
|
"dpo_loss": 1.8657586574554443, |
|
"epoch": 1.8422295701464337, |
|
"grad_norm": 1028.4810077841332, |
|
"learning_rate": 7.948545208509811e-07, |
|
"logits": -1.3532642126083374, |
|
"logps": -85.16321563720703, |
|
"loss": 4.6322, |
|
"objective": 4.409180164337158, |
|
"ranking_idealized": 0.5249999761581421, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5291666388511658, |
|
"regularize": 4.409180164337158, |
|
"step": 325 |
|
}, |
|
{ |
|
"dpo_loss": 2.035325050354004, |
|
"epoch": 1.8705715635333018, |
|
"grad_norm": 981.4379644106098, |
|
"learning_rate": 7.86788218175523e-07, |
|
"logits": -1.124271035194397, |
|
"logps": -81.62163543701172, |
|
"loss": 4.5108, |
|
"objective": 4.548847675323486, |
|
"ranking_idealized": 0.4749999940395355, |
|
"ranking_idealized_expo": 0.47083333134651184, |
|
"ranking_simple": 0.4791666567325592, |
|
"regularize": 4.548847675323486, |
|
"step": 330 |
|
}, |
|
{ |
|
"dpo_loss": 2.2992029190063477, |
|
"epoch": 1.89891355692017, |
|
"grad_norm": 1060.3744283391652, |
|
"learning_rate": 7.786091083933949e-07, |
|
"logits": -1.2721890211105347, |
|
"logps": -81.50038146972656, |
|
"loss": 4.574, |
|
"objective": 4.452338218688965, |
|
"ranking_idealized": 0.4958333373069763, |
|
"ranking_idealized_expo": 0.4958333373069763, |
|
"ranking_simple": 0.5041666626930237, |
|
"regularize": 4.452338218688965, |
|
"step": 335 |
|
}, |
|
{ |
|
"dpo_loss": 2.214193820953369, |
|
"epoch": 1.9272555503070383, |
|
"grad_norm": 1066.9038354372583, |
|
"learning_rate": 7.703204087277988e-07, |
|
"logits": -1.2717024087905884, |
|
"logps": -82.89705657958984, |
|
"loss": 4.1923, |
|
"objective": 4.533308029174805, |
|
"ranking_idealized": 0.5416666865348816, |
|
"ranking_idealized_expo": 0.5416666865348816, |
|
"ranking_simple": 0.5416666865348816, |
|
"regularize": 4.533308029174805, |
|
"step": 340 |
|
}, |
|
{ |
|
"dpo_loss": 1.864801287651062, |
|
"epoch": 1.9555975436939064, |
|
"grad_norm": 1068.1959237278584, |
|
"learning_rate": 7.619253795087208e-07, |
|
"logits": -1.2882568836212158, |
|
"logps": -81.01626586914062, |
|
"loss": 4.3847, |
|
"objective": 4.10886812210083, |
|
"ranking_idealized": 0.5333333611488342, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.5333333611488342, |
|
"regularize": 4.10886812210083, |
|
"step": 345 |
|
}, |
|
{ |
|
"dpo_loss": 1.7610963582992554, |
|
"epoch": 1.9839395370807746, |
|
"grad_norm": 1065.3539167654192, |
|
"learning_rate": 7.534273228904915e-07, |
|
"logits": -1.2216317653656006, |
|
"logps": -82.78074645996094, |
|
"loss": 4.3968, |
|
"objective": 4.256522178649902, |
|
"ranking_idealized": 0.48750001192092896, |
|
"ranking_idealized_expo": 0.48750001192092896, |
|
"ranking_simple": 0.5, |
|
"regularize": 4.256522178649902, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.9839395370807746, |
|
"eval_dpo_loss": 2.7970640659332275, |
|
"eval_logits": -1.2878926992416382, |
|
"eval_logps": -88.54591369628906, |
|
"eval_loss": 5.4044508934021, |
|
"eval_objective": 5.363577365875244, |
|
"eval_ranking_idealized": 0.5092975497245789, |
|
"eval_ranking_idealized_expo": 0.5092975497245789, |
|
"eval_ranking_simple": 0.5103305578231812, |
|
"eval_regularize": 5.363577365875244, |
|
"eval_runtime": 258.9945, |
|
"eval_samples_per_second": 22.356, |
|
"eval_steps_per_second": 0.934, |
|
"step": 350 |
|
}, |
|
{ |
|
"dpo_loss": 2.0882179737091064, |
|
"epoch": 2.012281530467643, |
|
"grad_norm": 1012.7784752507871, |
|
"learning_rate": 7.448295815528956e-07, |
|
"logits": -1.180530309677124, |
|
"logps": -82.18423461914062, |
|
"loss": 4.222, |
|
"objective": 4.049466609954834, |
|
"ranking_idealized": 0.512499988079071, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.49166667461395264, |
|
"regularize": 4.049466609954834, |
|
"step": 355 |
|
}, |
|
{ |
|
"dpo_loss": 2.207961082458496, |
|
"epoch": 2.040623523854511, |
|
"grad_norm": 991.7225128541588, |
|
"learning_rate": 7.361355373863413e-07, |
|
"logits": -1.2234774827957153, |
|
"logps": -80.75540161132812, |
|
"loss": 4.1947, |
|
"objective": 4.063844680786133, |
|
"ranking_idealized": 0.5333333611488342, |
|
"ranking_idealized_expo": 0.5291666388511658, |
|
"ranking_simple": 0.5375000238418579, |
|
"regularize": 4.063844680786133, |
|
"step": 360 |
|
}, |
|
{ |
|
"dpo_loss": 1.986746907234192, |
|
"epoch": 2.0689655172413794, |
|
"grad_norm": 1102.1948086047603, |
|
"learning_rate": 7.273486101616056e-07, |
|
"logits": -1.2934725284576416, |
|
"logps": -83.87660217285156, |
|
"loss": 4.2559, |
|
"objective": 4.076398849487305, |
|
"ranking_idealized": 0.5041666626930237, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.5208333134651184, |
|
"regularize": 4.076398849487305, |
|
"step": 365 |
|
}, |
|
{ |
|
"dpo_loss": 1.9069340229034424, |
|
"epoch": 2.0973075106282475, |
|
"grad_norm": 1148.422426893069, |
|
"learning_rate": 7.184722561846797e-07, |
|
"logits": -1.2647373676300049, |
|
"logps": -80.90840911865234, |
|
"loss": 3.9351, |
|
"objective": 3.6877379417419434, |
|
"ranking_idealized": 0.4833333194255829, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.49166667461395264, |
|
"regularize": 3.6877379417419434, |
|
"step": 370 |
|
}, |
|
{ |
|
"dpo_loss": 1.905211329460144, |
|
"epoch": 2.1256495040151155, |
|
"grad_norm": 982.12339135048, |
|
"learning_rate": 7.095099669372443e-07, |
|
"logits": -1.2502344846725464, |
|
"logps": -83.15164947509766, |
|
"loss": 4.1889, |
|
"objective": 3.8048832416534424, |
|
"ranking_idealized": 0.5916666388511658, |
|
"ranking_idealized_expo": 0.5874999761581421, |
|
"ranking_simple": 0.6000000238418579, |
|
"regularize": 3.8048832416534424, |
|
"step": 375 |
|
}, |
|
{ |
|
"dpo_loss": 2.2980270385742188, |
|
"epoch": 2.153991497401984, |
|
"grad_norm": 1002.9053178903335, |
|
"learning_rate": 7.004652677033068e-07, |
|
"logits": -1.1937479972839355, |
|
"logps": -82.11659240722656, |
|
"loss": 4.0113, |
|
"objective": 4.026199817657471, |
|
"ranking_idealized": 0.5291666388511658, |
|
"ranking_idealized_expo": 0.5291666388511658, |
|
"ranking_simple": 0.5208333134651184, |
|
"regularize": 4.026199817657471, |
|
"step": 380 |
|
}, |
|
{ |
|
"dpo_loss": 2.430462121963501, |
|
"epoch": 2.182333490788852, |
|
"grad_norm": 1002.2263691968883, |
|
"learning_rate": 6.913417161825449e-07, |
|
"logits": -1.2855180501937866, |
|
"logps": -84.00334930419922, |
|
"loss": 4.1443, |
|
"objective": 4.582634925842285, |
|
"ranking_idealized": 0.5041666626930237, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.5041666626930237, |
|
"regularize": 4.582634925842285, |
|
"step": 385 |
|
}, |
|
{ |
|
"dpo_loss": 2.034668445587158, |
|
"epoch": 2.21067548417572, |
|
"grad_norm": 1073.0050710345477, |
|
"learning_rate": 6.821429010908971e-07, |
|
"logits": -1.183647632598877, |
|
"logps": -82.46015930175781, |
|
"loss": 3.9544, |
|
"objective": 3.7511041164398193, |
|
"ranking_idealized": 0.4833333194255829, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.4791666567325592, |
|
"regularize": 3.7511041164398193, |
|
"step": 390 |
|
}, |
|
{ |
|
"dpo_loss": 2.1025197505950928, |
|
"epoch": 2.2390174775625886, |
|
"grad_norm": 928.4936074724073, |
|
"learning_rate": 6.728724407489553e-07, |
|
"logits": -1.176824688911438, |
|
"logps": -82.696044921875, |
|
"loss": 4.0116, |
|
"objective": 4.03180456161499, |
|
"ranking_idealized": 0.5208333134651184, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5249999761581421, |
|
"regularize": 4.03180456161499, |
|
"step": 395 |
|
}, |
|
{ |
|
"dpo_loss": 1.8622020483016968, |
|
"epoch": 2.2673594709494567, |
|
"grad_norm": 950.7422986439022, |
|
"learning_rate": 6.635339816587108e-07, |
|
"logits": -1.242794394493103, |
|
"logps": -83.7727279663086, |
|
"loss": 3.8148, |
|
"objective": 3.835386037826538, |
|
"ranking_idealized": 0.574999988079071, |
|
"ranking_idealized_expo": 0.574999988079071, |
|
"ranking_simple": 0.574999988079071, |
|
"regularize": 3.835386037826538, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.2673594709494567, |
|
"eval_dpo_loss": 2.9398233890533447, |
|
"eval_logits": -1.2679872512817383, |
|
"eval_logps": -88.25416564941406, |
|
"eval_loss": 5.762566089630127, |
|
"eval_objective": 5.820021152496338, |
|
"eval_ranking_idealized": 0.5092975497245789, |
|
"eval_ranking_idealized_expo": 0.5092975497245789, |
|
"eval_ranking_simple": 0.5092975497245789, |
|
"eval_regularize": 5.820021152496338, |
|
"eval_runtime": 259.3166, |
|
"eval_samples_per_second": 22.328, |
|
"eval_steps_per_second": 0.933, |
|
"step": 400 |
|
}, |
|
{ |
|
"dpo_loss": 1.8469058275222778, |
|
"epoch": 2.295701464336325, |
|
"grad_norm": 963.0280264659817, |
|
"learning_rate": 6.541311970692162e-07, |
|
"logits": -1.301633358001709, |
|
"logps": -81.62474060058594, |
|
"loss": 3.8783, |
|
"objective": 3.7539381980895996, |
|
"ranking_idealized": 0.5583333373069763, |
|
"ranking_idealized_expo": 0.5583333373069763, |
|
"ranking_simple": 0.5708333253860474, |
|
"regularize": 3.7539381980895996, |
|
"step": 405 |
|
}, |
|
{ |
|
"dpo_loss": 1.6361950635910034, |
|
"epoch": 2.324043457723193, |
|
"grad_norm": 1074.5619431455382, |
|
"learning_rate": 6.446677855317264e-07, |
|
"logits": -1.2071573734283447, |
|
"logps": -80.61900329589844, |
|
"loss": 3.9829, |
|
"objective": 3.7228012084960938, |
|
"ranking_idealized": 0.4833333194255829, |
|
"ranking_idealized_expo": 0.4749999940395355, |
|
"ranking_simple": 0.47083333134651184, |
|
"regularize": 3.7228012084960938, |
|
"step": 410 |
|
}, |
|
{ |
|
"dpo_loss": 1.8135488033294678, |
|
"epoch": 2.3523854511100613, |
|
"grad_norm": 1058.2549464280587, |
|
"learning_rate": 6.351474694448864e-07, |
|
"logits": -1.2399203777313232, |
|
"logps": -81.5149917602539, |
|
"loss": 3.748, |
|
"objective": 3.9710586071014404, |
|
"ranking_idealized": 0.5333333611488342, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.5333333611488342, |
|
"regularize": 3.9710586071014404, |
|
"step": 415 |
|
}, |
|
{ |
|
"dpo_loss": 2.120755434036255, |
|
"epoch": 2.3807274444969297, |
|
"grad_norm": 1114.426082281688, |
|
"learning_rate": 6.255739935905395e-07, |
|
"logits": -1.1594359874725342, |
|
"logps": -83.35548400878906, |
|
"loss": 3.7118, |
|
"objective": 3.681281089782715, |
|
"ranking_idealized": 0.4749999940395355, |
|
"ranking_idealized_expo": 0.47083333134651184, |
|
"ranking_simple": 0.4583333432674408, |
|
"regularize": 3.681281089782715, |
|
"step": 420 |
|
}, |
|
{ |
|
"dpo_loss": 1.7397000789642334, |
|
"epoch": 2.409069437883798, |
|
"grad_norm": 1024.8329703830655, |
|
"learning_rate": 6.159511236607315e-07, |
|
"logits": -1.2232387065887451, |
|
"logps": -81.95706939697266, |
|
"loss": 3.5923, |
|
"objective": 3.648602247238159, |
|
"ranking_idealized": 0.5, |
|
"ranking_idealized_expo": 0.4958333373069763, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 3.648602247238159, |
|
"step": 425 |
|
}, |
|
{ |
|
"dpo_loss": 1.804638385772705, |
|
"epoch": 2.4374114312706663, |
|
"grad_norm": 979.544871113056, |
|
"learning_rate": 6.062826447764883e-07, |
|
"logits": -1.2574443817138672, |
|
"logps": -82.60694122314453, |
|
"loss": 3.4778, |
|
"objective": 3.4914093017578125, |
|
"ranking_idealized": 0.5333333611488342, |
|
"ranking_idealized_expo": 0.5291666388511658, |
|
"ranking_simple": 0.5249999761581421, |
|
"regularize": 3.4914093017578125, |
|
"step": 430 |
|
}, |
|
{ |
|
"dpo_loss": 1.6648954153060913, |
|
"epoch": 2.4657534246575343, |
|
"grad_norm": 1000.8401446346558, |
|
"learning_rate": 5.965723599989528e-07, |
|
"logits": -1.300902009010315, |
|
"logps": -82.3549575805664, |
|
"loss": 3.5217, |
|
"objective": 3.345120668411255, |
|
"ranking_idealized": 0.5083333253860474, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.5, |
|
"regularize": 3.345120668411255, |
|
"step": 435 |
|
}, |
|
{ |
|
"dpo_loss": 2.0332047939300537, |
|
"epoch": 2.4940954180444024, |
|
"grad_norm": 1031.1441408735554, |
|
"learning_rate": 5.868240888334652e-07, |
|
"logits": -1.2219815254211426, |
|
"logps": -82.15070343017578, |
|
"loss": 3.7915, |
|
"objective": 3.9336984157562256, |
|
"ranking_idealized": 0.5375000238418579, |
|
"ranking_idealized_expo": 0.5375000238418579, |
|
"ranking_simple": 0.5333333611488342, |
|
"regularize": 3.9336984157562256, |
|
"step": 440 |
|
}, |
|
{ |
|
"dpo_loss": 2.0011653900146484, |
|
"epoch": 2.5224374114312704, |
|
"grad_norm": 970.4125487560452, |
|
"learning_rate": 5.770416657271728e-07, |
|
"logits": -1.2273285388946533, |
|
"logps": -80.1659164428711, |
|
"loss": 3.4124, |
|
"objective": 3.168755292892456, |
|
"ranking_idealized": 0.5166666507720947, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5208333134651184, |
|
"regularize": 3.168755292892456, |
|
"step": 445 |
|
}, |
|
{ |
|
"dpo_loss": 1.613458275794983, |
|
"epoch": 2.550779404818139, |
|
"grad_norm": 1061.4659212212357, |
|
"learning_rate": 5.67228938560766e-07, |
|
"logits": -1.2801109552383423, |
|
"logps": -81.43175506591797, |
|
"loss": 3.4169, |
|
"objective": 3.3297555446624756, |
|
"ranking_idealized": 0.5, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.49166667461395264, |
|
"regularize": 3.3297555446624756, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 2.550779404818139, |
|
"eval_dpo_loss": 3.138432502746582, |
|
"eval_logits": -1.2897279262542725, |
|
"eval_logps": -88.01155853271484, |
|
"eval_loss": 5.953909397125244, |
|
"eval_objective": 6.106462478637695, |
|
"eval_ranking_idealized": 0.5092975497245789, |
|
"eval_ranking_idealized_expo": 0.5092975497245789, |
|
"eval_ranking_simple": 0.5144628286361694, |
|
"eval_regularize": 6.106462478637695, |
|
"eval_runtime": 259.8077, |
|
"eval_samples_per_second": 22.286, |
|
"eval_steps_per_second": 0.931, |
|
"step": 450 |
|
}, |
|
{ |
|
"dpo_loss": 1.8731194734573364, |
|
"epoch": 2.579121398205007, |
|
"grad_norm": 1156.0005265988177, |
|
"learning_rate": 5.573897671349268e-07, |
|
"logits": -1.300366759300232, |
|
"logps": -82.42855834960938, |
|
"loss": 3.5224, |
|
"objective": 3.593839168548584, |
|
"ranking_idealized": 0.5041666626930237, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.5041666626930237, |
|
"regularize": 3.593839168548584, |
|
"step": 455 |
|
}, |
|
{ |
|
"dpo_loss": 1.952242136001587, |
|
"epoch": 2.6074633915918755, |
|
"grad_norm": 1095.9802997680708, |
|
"learning_rate": 5.475280216520912e-07, |
|
"logits": -1.3715617656707764, |
|
"logps": -82.51021575927734, |
|
"loss": 3.3771, |
|
"objective": 3.524890422821045, |
|
"ranking_idealized": 0.512499988079071, |
|
"ranking_idealized_expo": 0.512499988079071, |
|
"ranking_simple": 0.5208333134651184, |
|
"regularize": 3.524890422821045, |
|
"step": 460 |
|
}, |
|
{ |
|
"dpo_loss": 1.5585062503814697, |
|
"epoch": 2.6358053849787435, |
|
"grad_norm": 935.1251434271745, |
|
"learning_rate": 5.376475811941191e-07, |
|
"logits": -1.2529839277267456, |
|
"logps": -82.1951904296875, |
|
"loss": 3.4445, |
|
"objective": 3.3441879749298096, |
|
"ranking_idealized": 0.4958333373069763, |
|
"ranking_idealized_expo": 0.4958333373069763, |
|
"ranking_simple": 0.5, |
|
"regularize": 3.3441879749298096, |
|
"step": 465 |
|
}, |
|
{ |
|
"dpo_loss": 1.6525613069534302, |
|
"epoch": 2.6641473783656116, |
|
"grad_norm": 977.6740437303708, |
|
"learning_rate": 5.277523321964701e-07, |
|
"logits": -1.2638176679611206, |
|
"logps": -81.61119079589844, |
|
"loss": 3.3653, |
|
"objective": 3.1554312705993652, |
|
"ranking_idealized": 0.5541666746139526, |
|
"ranking_idealized_expo": 0.550000011920929, |
|
"ranking_simple": 0.5625, |
|
"regularize": 3.1554312705993652, |
|
"step": 470 |
|
}, |
|
{ |
|
"dpo_loss": 1.4508802890777588, |
|
"epoch": 2.69248937175248, |
|
"grad_norm": 993.1353802099517, |
|
"learning_rate": 5.178461669194903e-07, |
|
"logits": -1.228849172592163, |
|
"logps": -81.23489379882812, |
|
"loss": 2.9912, |
|
"objective": 2.843146562576294, |
|
"ranking_idealized": 0.4958333373069763, |
|
"ranking_idealized_expo": 0.4958333373069763, |
|
"ranking_simple": 0.5, |
|
"regularize": 2.843146562576294, |
|
"step": 475 |
|
}, |
|
{ |
|
"dpo_loss": 1.4309269189834595, |
|
"epoch": 2.720831365139348, |
|
"grad_norm": 1013.9109275016526, |
|
"learning_rate": 5.07932981917404e-07, |
|
"logits": -1.3067494630813599, |
|
"logps": -80.33541107177734, |
|
"loss": 3.1599, |
|
"objective": 3.0493505001068115, |
|
"ranking_idealized": 0.5, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.512499988079071, |
|
"regularize": 3.0493505001068115, |
|
"step": 480 |
|
}, |
|
{ |
|
"dpo_loss": 1.6128212213516235, |
|
"epoch": 2.7491733585262166, |
|
"grad_norm": 935.3777113198429, |
|
"learning_rate": 4.980166765056193e-07, |
|
"logits": -1.2488080263137817, |
|
"logps": -82.94316101074219, |
|
"loss": 3.2342, |
|
"objective": 3.1210060119628906, |
|
"ranking_idealized": 0.5249999761581421, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 3.1210060119628906, |
|
"step": 485 |
|
}, |
|
{ |
|
"dpo_loss": 1.2609761953353882, |
|
"epoch": 2.7775153519130846, |
|
"grad_norm": 1021.9995000614038, |
|
"learning_rate": 4.881011512269463e-07, |
|
"logits": -1.224744439125061, |
|
"logps": -80.88065338134766, |
|
"loss": 3.0434, |
|
"objective": 2.964614152908325, |
|
"ranking_idealized": 0.4749999940395355, |
|
"ranking_idealized_expo": 0.47083333134651184, |
|
"ranking_simple": 0.46666666865348816, |
|
"regularize": 2.964614152908325, |
|
"step": 490 |
|
}, |
|
{ |
|
"dpo_loss": 1.3648375272750854, |
|
"epoch": 2.8058573452999527, |
|
"grad_norm": 957.4547050967395, |
|
"learning_rate": 4.78190306317332e-07, |
|
"logits": -1.2405409812927246, |
|
"logps": -79.93798065185547, |
|
"loss": 2.9176, |
|
"objective": 2.8268253803253174, |
|
"ranking_idealized": 0.4791666567325592, |
|
"ranking_idealized_expo": 0.4791666567325592, |
|
"ranking_simple": 0.47083333134651184, |
|
"regularize": 2.8268253803253174, |
|
"step": 495 |
|
}, |
|
{ |
|
"dpo_loss": 1.4600600004196167, |
|
"epoch": 2.8341993386868207, |
|
"grad_norm": 956.91251387349, |
|
"learning_rate": 4.682880401717177e-07, |
|
"logits": -1.2869919538497925, |
|
"logps": -79.13894653320312, |
|
"loss": 2.988, |
|
"objective": 2.750535488128662, |
|
"ranking_idealized": 0.5249999761581421, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.5208333134651184, |
|
"regularize": 2.750535488128662, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.8341993386868207, |
|
"eval_dpo_loss": 3.1318445205688477, |
|
"eval_logits": -1.2855963706970215, |
|
"eval_logps": -87.95059967041016, |
|
"eval_loss": 5.985367298126221, |
|
"eval_objective": 6.018334865570068, |
|
"eval_ranking_idealized": 0.5092975497245789, |
|
"eval_ranking_idealized_expo": 0.5092975497245789, |
|
"eval_ranking_simple": 0.5092975497245789, |
|
"eval_regularize": 6.018334865570068, |
|
"eval_runtime": 259.5234, |
|
"eval_samples_per_second": 22.31, |
|
"eval_steps_per_second": 0.932, |
|
"step": 500 |
|
}, |
|
{ |
|
"dpo_loss": 1.2522823810577393, |
|
"epoch": 2.862541332073689, |
|
"grad_norm": 990.3820386306984, |
|
"learning_rate": 4.5839824781061886e-07, |
|
"logits": -1.238278865814209, |
|
"logps": -80.7296371459961, |
|
"loss": 2.9937, |
|
"objective": 2.6271092891693115, |
|
"ranking_idealized": 0.5208333134651184, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.512499988079071, |
|
"regularize": 2.6271092891693115, |
|
"step": 505 |
|
}, |
|
{ |
|
"dpo_loss": 1.5270129442214966, |
|
"epoch": 2.8908833254605573, |
|
"grad_norm": 943.7310548857196, |
|
"learning_rate": 4.4852481934803277e-07, |
|
"logits": -1.1962122917175293, |
|
"logps": -81.28018951416016, |
|
"loss": 2.8508, |
|
"objective": 3.192195177078247, |
|
"ranking_idealized": 0.5291666388511658, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.5416666865348816, |
|
"regularize": 3.192195177078247, |
|
"step": 510 |
|
}, |
|
{ |
|
"dpo_loss": 1.3985689878463745, |
|
"epoch": 2.9192253188474258, |
|
"grad_norm": 1066.3526544144515, |
|
"learning_rate": 4.3867163846127674e-07, |
|
"logits": -1.3405725955963135, |
|
"logps": -81.18116760253906, |
|
"loss": 2.8001, |
|
"objective": 2.7043962478637695, |
|
"ranking_idealized": 0.4791666567325592, |
|
"ranking_idealized_expo": 0.4791666567325592, |
|
"ranking_simple": 0.4791666567325592, |
|
"regularize": 2.7043962478637695, |
|
"step": 515 |
|
}, |
|
{ |
|
"dpo_loss": 1.549082636833191, |
|
"epoch": 2.947567312234294, |
|
"grad_norm": 976.1195004276424, |
|
"learning_rate": 4.2884258086335745e-07, |
|
"logits": -1.2520852088928223, |
|
"logps": -83.24747467041016, |
|
"loss": 2.7691, |
|
"objective": 2.9670374393463135, |
|
"ranking_idealized": 0.512499988079071, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.512499988079071, |
|
"regularize": 2.9670374393463135, |
|
"step": 520 |
|
}, |
|
{ |
|
"dpo_loss": 1.1514889001846313, |
|
"epoch": 2.975909305621162, |
|
"grad_norm": 993.1767181389736, |
|
"learning_rate": 4.1904151277847305e-07, |
|
"logits": -1.2369369268417358, |
|
"logps": -82.1258316040039, |
|
"loss": 2.7535, |
|
"objective": 2.7600042819976807, |
|
"ranking_idealized": 0.5666666626930237, |
|
"ranking_idealized_expo": 0.5666666626930237, |
|
"ranking_simple": 0.574999988079071, |
|
"regularize": 2.7600042819976807, |
|
"step": 525 |
|
}, |
|
{ |
|
"dpo_loss": 1.234760046005249, |
|
"epoch": 3.0042512990080303, |
|
"grad_norm": 954.3168391295485, |
|
"learning_rate": 4.092722894212487e-07, |
|
"logits": -1.2384470701217651, |
|
"logps": -81.53229522705078, |
|
"loss": 2.772, |
|
"objective": 2.536973237991333, |
|
"ranking_idealized": 0.512499988079071, |
|
"ranking_idealized_expo": 0.512499988079071, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 2.536973237991333, |
|
"step": 530 |
|
}, |
|
{ |
|
"dpo_loss": 1.2055299282073975, |
|
"epoch": 3.0325932923948984, |
|
"grad_norm": 1192.8110305376617, |
|
"learning_rate": 3.995387534803005e-07, |
|
"logits": -1.2691408395767212, |
|
"logps": -82.96357727050781, |
|
"loss": 2.6555, |
|
"objective": 2.663506031036377, |
|
"ranking_idealized": 0.5458333492279053, |
|
"ranking_idealized_expo": 0.5416666865348816, |
|
"ranking_simple": 0.550000011920929, |
|
"regularize": 2.663506031036377, |
|
"step": 535 |
|
}, |
|
{ |
|
"dpo_loss": 1.3903727531433105, |
|
"epoch": 3.0609352857817664, |
|
"grad_norm": 949.2725322799425, |
|
"learning_rate": 3.8984473360672967e-07, |
|
"logits": -1.3405746221542358, |
|
"logps": -81.4103012084961, |
|
"loss": 2.5599, |
|
"objective": 2.611825704574585, |
|
"ranking_idealized": 0.5375000238418579, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.5291666388511658, |
|
"regularize": 2.611825704574585, |
|
"step": 540 |
|
}, |
|
{ |
|
"dpo_loss": 1.3084332942962646, |
|
"epoch": 3.089277279168635, |
|
"grad_norm": 999.2271817343484, |
|
"learning_rate": 3.801940429081345e-07, |
|
"logits": -1.2964000701904297, |
|
"logps": -82.18136596679688, |
|
"loss": 2.504, |
|
"objective": 2.4276323318481445, |
|
"ranking_idealized": 0.49166667461395264, |
|
"ranking_idealized_expo": 0.49166667461395264, |
|
"ranking_simple": 0.4833333194255829, |
|
"regularize": 2.4276323318481445, |
|
"step": 545 |
|
}, |
|
{ |
|
"dpo_loss": 1.0889129638671875, |
|
"epoch": 3.117619272555503, |
|
"grad_norm": 934.3758046690474, |
|
"learning_rate": 3.7059047744873955e-07, |
|
"logits": -1.1509153842926025, |
|
"logps": -82.78107452392578, |
|
"loss": 2.4859, |
|
"objective": 2.5204343795776367, |
|
"ranking_idealized": 0.5458333492279053, |
|
"ranking_idealized_expo": 0.5375000238418579, |
|
"ranking_simple": 0.5458333492279053, |
|
"regularize": 2.5204343795776367, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 3.117619272555503, |
|
"eval_dpo_loss": 3.178955316543579, |
|
"eval_logits": -1.2804911136627197, |
|
"eval_logps": -88.50298309326172, |
|
"eval_loss": 6.194624423980713, |
|
"eval_objective": 6.202889919281006, |
|
"eval_ranking_idealized": 0.5092975497245789, |
|
"eval_ranking_idealized_expo": 0.5092975497245789, |
|
"eval_ranking_simple": 0.5103305578231812, |
|
"eval_regularize": 6.202889919281006, |
|
"eval_runtime": 260.2352, |
|
"eval_samples_per_second": 22.249, |
|
"eval_steps_per_second": 0.93, |
|
"step": 550 |
|
}, |
|
{ |
|
"dpo_loss": 1.148443579673767, |
|
"epoch": 3.1459612659423715, |
|
"grad_norm": 977.893659928589, |
|
"learning_rate": 3.6103781475622786e-07, |
|
"logits": -1.2158228158950806, |
|
"logps": -83.3880844116211, |
|
"loss": 2.4258, |
|
"objective": 2.2721104621887207, |
|
"ranking_idealized": 0.550000011920929, |
|
"ranking_idealized_expo": 0.550000011920929, |
|
"ranking_simple": 0.5458333492279053, |
|
"regularize": 2.2721104621887207, |
|
"step": 555 |
|
}, |
|
{ |
|
"dpo_loss": 1.2861641645431519, |
|
"epoch": 3.1743032593292395, |
|
"grad_norm": 912.4395782657139, |
|
"learning_rate": 3.5153981233586274e-07, |
|
"logits": -1.2103866338729858, |
|
"logps": -80.51148223876953, |
|
"loss": 2.4037, |
|
"objective": 2.3816921710968018, |
|
"ranking_idealized": 0.5041666626930237, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.5041666626930237, |
|
"regularize": 2.3816921710968018, |
|
"step": 560 |
|
}, |
|
{ |
|
"dpo_loss": 1.0393404960632324, |
|
"epoch": 3.2026452527161076, |
|
"grad_norm": 959.0200932296882, |
|
"learning_rate": 3.421002061924876e-07, |
|
"logits": -1.262898325920105, |
|
"logps": -82.25037384033203, |
|
"loss": 2.3058, |
|
"objective": 2.267256259918213, |
|
"ranking_idealized": 0.5458333492279053, |
|
"ranking_idealized_expo": 0.5416666865348816, |
|
"ranking_simple": 0.5583333373069763, |
|
"regularize": 2.267256259918213, |
|
"step": 565 |
|
}, |
|
{ |
|
"dpo_loss": 1.2455191612243652, |
|
"epoch": 3.230987246102976, |
|
"grad_norm": 937.4751934096442, |
|
"learning_rate": 3.327227093609824e-07, |
|
"logits": -1.1880606412887573, |
|
"logps": -81.5679702758789, |
|
"loss": 2.2052, |
|
"objective": 2.3692660331726074, |
|
"ranking_idealized": 0.4791666567325592, |
|
"ranking_idealized_expo": 0.4791666567325592, |
|
"ranking_simple": 0.4833333194255829, |
|
"regularize": 2.3692660331726074, |
|
"step": 570 |
|
}, |
|
{ |
|
"dpo_loss": 1.228649616241455, |
|
"epoch": 3.259329239489844, |
|
"grad_norm": 996.5558020139559, |
|
"learning_rate": 3.234110104457536e-07, |
|
"logits": -1.2556049823760986, |
|
"logps": -81.8873519897461, |
|
"loss": 2.2515, |
|
"objective": 2.29750657081604, |
|
"ranking_idealized": 0.5, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.49166667461395264, |
|
"regularize": 2.29750657081604, |
|
"step": 575 |
|
}, |
|
{ |
|
"dpo_loss": 1.3365857601165771, |
|
"epoch": 3.287671232876712, |
|
"grad_norm": 996.2512645672206, |
|
"learning_rate": 3.141687721698363e-07, |
|
"logits": -1.2736408710479736, |
|
"logps": -82.88424682617188, |
|
"loss": 2.3155, |
|
"objective": 2.366077423095703, |
|
"ranking_idealized": 0.5541666746139526, |
|
"ranking_idealized_expo": 0.5541666746139526, |
|
"ranking_simple": 0.5458333492279053, |
|
"regularize": 2.366077423095703, |
|
"step": 580 |
|
}, |
|
{ |
|
"dpo_loss": 1.1693744659423828, |
|
"epoch": 3.3160132262635806, |
|
"grad_norm": 990.5330100186713, |
|
"learning_rate": 3.049996299341742e-07, |
|
"logits": -1.289427638053894, |
|
"logps": -83.39188385009766, |
|
"loss": 2.1009, |
|
"objective": 2.1158106327056885, |
|
"ranking_idealized": 0.5333333611488342, |
|
"ranking_idealized_expo": 0.5291666388511658, |
|
"ranking_simple": 0.5249999761581421, |
|
"regularize": 2.1158106327056885, |
|
"step": 585 |
|
}, |
|
{ |
|
"dpo_loss": 1.1631190776824951, |
|
"epoch": 3.3443552196504487, |
|
"grad_norm": 986.1485755213372, |
|
"learning_rate": 2.959071903876486e-07, |
|
"logits": -1.3035333156585693, |
|
"logps": -83.53241729736328, |
|
"loss": 2.1276, |
|
"objective": 2.4290575981140137, |
|
"ranking_idealized": 0.49166667461395264, |
|
"ranking_idealized_expo": 0.48750001192092896, |
|
"ranking_simple": 0.5083333253860474, |
|
"regularize": 2.4290575981140137, |
|
"step": 590 |
|
}, |
|
{ |
|
"dpo_loss": 1.3233096599578857, |
|
"epoch": 3.372697213037317, |
|
"grad_norm": 930.1393629687922, |
|
"learning_rate": 2.86895030008416e-07, |
|
"logits": -1.2197285890579224, |
|
"logps": -82.90579223632812, |
|
"loss": 2.1207, |
|
"objective": 2.1894338130950928, |
|
"ranking_idealized": 0.5249999761581421, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.5291666388511658, |
|
"regularize": 2.1894338130950928, |
|
"step": 595 |
|
}, |
|
{ |
|
"dpo_loss": 1.007699728012085, |
|
"epoch": 3.4010392064241852, |
|
"grad_norm": 935.3828921290786, |
|
"learning_rate": 2.779666936971129e-07, |
|
"logits": -1.230286717414856, |
|
"logps": -81.8963851928711, |
|
"loss": 2.0539, |
|
"objective": 2.0761570930480957, |
|
"ranking_idealized": 0.5041666626930237, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.4958333373069763, |
|
"regularize": 2.0761570930480957, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 3.4010392064241852, |
|
"eval_dpo_loss": 3.1110734939575195, |
|
"eval_logits": -1.2650625705718994, |
|
"eval_logps": -88.16163635253906, |
|
"eval_loss": 5.933178901672363, |
|
"eval_objective": 6.031818866729736, |
|
"eval_ranking_idealized": 0.5092975497245789, |
|
"eval_ranking_idealized_expo": 0.5092975497245789, |
|
"eval_ranking_simple": 0.5113636255264282, |
|
"eval_regularize": 6.031818866729736, |
|
"eval_runtime": 258.4717, |
|
"eval_samples_per_second": 22.401, |
|
"eval_steps_per_second": 0.936, |
|
"step": 600 |
|
}, |
|
{ |
|
"dpo_loss": 1.11422860622406, |
|
"epoch": 3.4293811998110533, |
|
"grad_norm": 962.8376859443554, |
|
"learning_rate": 2.6912569338248315e-07, |
|
"logits": -1.2667381763458252, |
|
"logps": -83.77916717529297, |
|
"loss": 2.0057, |
|
"objective": 1.9590004682540894, |
|
"ranking_idealized": 0.5333333611488342, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.5333333611488342, |
|
"regularize": 1.9590004682540894, |
|
"step": 605 |
|
}, |
|
{ |
|
"dpo_loss": 1.1715530157089233, |
|
"epoch": 3.4577231931979218, |
|
"grad_norm": 934.6534827687339, |
|
"learning_rate": 2.603755066399718e-07, |
|
"logits": -1.2390044927597046, |
|
"logps": -82.3260269165039, |
|
"loss": 2.1286, |
|
"objective": 2.050309658050537, |
|
"ranking_idealized": 0.5208333134651184, |
|
"ranking_idealized_expo": 0.512499988079071, |
|
"ranking_simple": 0.512499988079071, |
|
"regularize": 2.050309658050537, |
|
"step": 610 |
|
}, |
|
{ |
|
"dpo_loss": 1.1228054761886597, |
|
"epoch": 3.48606518658479, |
|
"grad_norm": 924.4634252845681, |
|
"learning_rate": 2.517195753238345e-07, |
|
"logits": -1.2509876489639282, |
|
"logps": -82.427001953125, |
|
"loss": 1.9336, |
|
"objective": 1.9248236417770386, |
|
"ranking_idealized": 0.47083333134651184, |
|
"ranking_idealized_expo": 0.46666666865348816, |
|
"ranking_simple": 0.4625000059604645, |
|
"regularize": 1.9248236417770386, |
|
"step": 615 |
|
}, |
|
{ |
|
"dpo_loss": 1.0055824518203735, |
|
"epoch": 3.514407179971658, |
|
"grad_norm": 996.235630609532, |
|
"learning_rate": 2.4316130421329696e-07, |
|
"logits": -1.1457880735397339, |
|
"logps": -81.78346252441406, |
|
"loss": 1.8569, |
|
"objective": 1.8841525316238403, |
|
"ranking_idealized": 0.4958333373069763, |
|
"ranking_idealized_expo": 0.4958333373069763, |
|
"ranking_simple": 0.49166667461395264, |
|
"regularize": 1.8841525316238403, |
|
"step": 620 |
|
}, |
|
{ |
|
"dpo_loss": 1.0400264263153076, |
|
"epoch": 3.5427491733585263, |
|
"grad_norm": 988.2054187208093, |
|
"learning_rate": 2.3470405967329604e-07, |
|
"logits": -1.202520489692688, |
|
"logps": -81.60346984863281, |
|
"loss": 1.8727, |
|
"objective": 2.022357702255249, |
|
"ranking_idealized": 0.5249999761581421, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 2.022357702255249, |
|
"step": 625 |
|
}, |
|
{ |
|
"dpo_loss": 0.9165257215499878, |
|
"epoch": 3.5710911667453944, |
|
"grad_norm": 970.3302350643374, |
|
"learning_rate": 2.2635116833033392e-07, |
|
"logits": -1.2847248315811157, |
|
"logps": -81.50933074951172, |
|
"loss": 1.8387, |
|
"objective": 1.6823768615722656, |
|
"ranking_idealized": 0.512499988079071, |
|
"ranking_idealized_expo": 0.512499988079071, |
|
"ranking_simple": 0.5291666388511658, |
|
"regularize": 1.6823768615722656, |
|
"step": 630 |
|
}, |
|
{ |
|
"dpo_loss": 0.8153350949287415, |
|
"epoch": 3.5994331601322624, |
|
"grad_norm": 964.0631369889124, |
|
"learning_rate": 2.181059157639598e-07, |
|
"logits": -1.2249428033828735, |
|
"logps": -82.01227569580078, |
|
"loss": 1.7136, |
|
"objective": 1.8080626726150513, |
|
"ranking_idealized": 0.48750001192092896, |
|
"ranking_idealized_expo": 0.48750001192092896, |
|
"ranking_simple": 0.48750001192092896, |
|
"regularize": 1.8080624341964722, |
|
"step": 635 |
|
}, |
|
{ |
|
"dpo_loss": 0.9864614605903625, |
|
"epoch": 3.627775153519131, |
|
"grad_norm": 1000.0610851173434, |
|
"learning_rate": 2.0997154521440097e-07, |
|
"logits": -1.1798667907714844, |
|
"logps": -82.78071594238281, |
|
"loss": 1.7312, |
|
"objective": 1.868208646774292, |
|
"ranking_idealized": 0.4625000059604645, |
|
"ranking_idealized_expo": 0.4625000059604645, |
|
"ranking_simple": 0.46666666865348816, |
|
"regularize": 1.868208646774292, |
|
"step": 640 |
|
}, |
|
{ |
|
"dpo_loss": 1.0488131046295166, |
|
"epoch": 3.656117146905999, |
|
"grad_norm": 1049.7832510532623, |
|
"learning_rate": 2.0195125630684428e-07, |
|
"logits": -1.2025647163391113, |
|
"logps": -81.37299346923828, |
|
"loss": 1.7885, |
|
"objective": 1.6037089824676514, |
|
"ranking_idealized": 0.5249999761581421, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.5249999761581421, |
|
"regularize": 1.6037089824676514, |
|
"step": 645 |
|
}, |
|
{ |
|
"dpo_loss": 1.022878646850586, |
|
"epoch": 3.6844591402928675, |
|
"grad_norm": 919.1271980482485, |
|
"learning_rate": 1.9404820379287672e-07, |
|
"logits": -1.1927168369293213, |
|
"logps": -82.80690002441406, |
|
"loss": 1.664, |
|
"objective": 1.7033976316452026, |
|
"ranking_idealized": 0.5041666626930237, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.5041666626930237, |
|
"regularize": 1.7033976316452026, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 3.6844591402928675, |
|
"eval_dpo_loss": 3.096806764602661, |
|
"eval_logits": -1.2607561349868774, |
|
"eval_logps": -88.69917297363281, |
|
"eval_loss": 5.923920631408691, |
|
"eval_objective": 5.985104084014893, |
|
"eval_ranking_idealized": 0.5092975497245789, |
|
"eval_ranking_idealized_expo": 0.5092975497245789, |
|
"eval_ranking_simple": 0.5113636255264282, |
|
"eval_regularize": 5.985104084014893, |
|
"eval_runtime": 258.729, |
|
"eval_samples_per_second": 22.379, |
|
"eval_steps_per_second": 0.935, |
|
"step": 650 |
|
}, |
|
{ |
|
"dpo_loss": 0.9369535446166992, |
|
"epoch": 3.7128011336797355, |
|
"grad_norm": 961.5116821455488, |
|
"learning_rate": 1.8626549630957395e-07, |
|
"logits": -1.2329158782958984, |
|
"logps": -81.57703399658203, |
|
"loss": 1.6411, |
|
"objective": 1.523728847503662, |
|
"ranking_idealized": 0.5208333134651184, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.5208333134651184, |
|
"regularize": 1.523728847503662, |
|
"step": 655 |
|
}, |
|
{ |
|
"dpo_loss": 0.8511099219322205, |
|
"epoch": 3.7411431270666036, |
|
"grad_norm": 1089.6596694384646, |
|
"learning_rate": 1.7860619515673032e-07, |
|
"logits": -1.3094429969787598, |
|
"logps": -82.70073699951172, |
|
"loss": 1.6123, |
|
"objective": 1.475422739982605, |
|
"ranking_idealized": 0.5541666746139526, |
|
"ranking_idealized_expo": 0.5541666746139526, |
|
"ranking_simple": 0.5541666746139526, |
|
"regularize": 1.475422739982605, |
|
"step": 660 |
|
}, |
|
{ |
|
"dpo_loss": 0.9054993987083435, |
|
"epoch": 3.769485120453472, |
|
"grad_norm": 1093.6896679613712, |
|
"learning_rate": 1.7107331309270684e-07, |
|
"logits": -1.2157032489776611, |
|
"logps": -82.55271911621094, |
|
"loss": 1.561, |
|
"objective": 1.6129040718078613, |
|
"ranking_idealized": 0.5166666507720947, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.512499988079071, |
|
"regularize": 1.6129040718078613, |
|
"step": 665 |
|
}, |
|
{ |
|
"dpo_loss": 0.8416043519973755, |
|
"epoch": 3.79782711384034, |
|
"grad_norm": 1024.9155399545134, |
|
"learning_rate": 1.6366981314937372e-07, |
|
"logits": -1.3233702182769775, |
|
"logps": -82.39900970458984, |
|
"loss": 1.5252, |
|
"objective": 1.4049646854400635, |
|
"ranking_idealized": 0.5916666388511658, |
|
"ranking_idealized_expo": 0.5916666388511658, |
|
"ranking_simple": 0.6000000238418579, |
|
"regularize": 1.4049646854400635, |
|
"step": 670 |
|
}, |
|
{ |
|
"dpo_loss": 0.8841171264648438, |
|
"epoch": 3.826169107227208, |
|
"grad_norm": 971.974542125377, |
|
"learning_rate": 1.5639860746661338e-07, |
|
"logits": -1.288584589958191, |
|
"logps": -81.51100158691406, |
|
"loss": 1.5256, |
|
"objective": 1.549899935722351, |
|
"ranking_idealized": 0.5458333492279053, |
|
"ranking_idealized_expo": 0.5375000238418579, |
|
"ranking_simple": 0.5458333492279053, |
|
"regularize": 1.549899935722351, |
|
"step": 675 |
|
}, |
|
{ |
|
"dpo_loss": 0.9599818587303162, |
|
"epoch": 3.8545111006140766, |
|
"grad_norm": 930.812509420293, |
|
"learning_rate": 1.492625561468393e-07, |
|
"logits": -1.2116204500198364, |
|
"logps": -81.91743469238281, |
|
"loss": 1.4834, |
|
"objective": 1.6812348365783691, |
|
"ranking_idealized": 0.5083333253860474, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.5083333253860474, |
|
"regularize": 1.6812348365783691, |
|
"step": 680 |
|
}, |
|
{ |
|
"dpo_loss": 0.9112765192985535, |
|
"epoch": 3.8828530940009447, |
|
"grad_norm": 1163.329406971879, |
|
"learning_rate": 1.4226446612998671e-07, |
|
"logits": -1.273834228515625, |
|
"logps": -81.50404357910156, |
|
"loss": 1.4897, |
|
"objective": 1.4795509576797485, |
|
"ranking_idealized": 0.5166666507720947, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.512499988079071, |
|
"regularize": 1.4795509576797485, |
|
"step": 685 |
|
}, |
|
{ |
|
"dpo_loss": 0.8565966486930847, |
|
"epoch": 3.9111950873878127, |
|
"grad_norm": 938.1098675569013, |
|
"learning_rate": 1.3540709008941147e-07, |
|
"logits": -1.2776970863342285, |
|
"logps": -81.18965911865234, |
|
"loss": 1.4652, |
|
"objective": 1.3701138496398926, |
|
"ranking_idealized": 0.5458333492279053, |
|
"ranking_idealized_expo": 0.5458333492279053, |
|
"ranking_simple": 0.550000011920929, |
|
"regularize": 1.3701138496398926, |
|
"step": 690 |
|
}, |
|
{ |
|
"dpo_loss": 1.0229411125183105, |
|
"epoch": 3.9395370807746812, |
|
"grad_norm": 1013.5958054606544, |
|
"learning_rate": 1.2869312534913685e-07, |
|
"logits": -1.191641092300415, |
|
"logps": -82.60445404052734, |
|
"loss": 1.3376, |
|
"objective": 1.4829381704330444, |
|
"ranking_idealized": 0.46666666865348816, |
|
"ranking_idealized_expo": 0.46666666865348816, |
|
"ranking_simple": 0.4625000059604645, |
|
"regularize": 1.4829381704330444, |
|
"step": 695 |
|
}, |
|
{ |
|
"dpo_loss": 0.8406078815460205, |
|
"epoch": 3.9678790741615493, |
|
"grad_norm": 1063.5432850972618, |
|
"learning_rate": 1.2212521282287093e-07, |
|
"logits": -1.2275745868682861, |
|
"logps": -82.30086517333984, |
|
"loss": 1.3502, |
|
"objective": 1.3136423826217651, |
|
"ranking_idealized": 0.48750001192092896, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.48750001192092896, |
|
"regularize": 1.3136423826217651, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 3.9678790741615493, |
|
"eval_dpo_loss": 3.089524269104004, |
|
"eval_logits": -1.2646851539611816, |
|
"eval_logps": -88.52361297607422, |
|
"eval_loss": 5.917611122131348, |
|
"eval_objective": 5.957097053527832, |
|
"eval_ranking_idealized": 0.5092975497245789, |
|
"eval_ranking_idealized_expo": 0.5092975497245789, |
|
"eval_ranking_simple": 0.5134297609329224, |
|
"eval_regularize": 5.957097053527832, |
|
"eval_runtime": 258.9235, |
|
"eval_samples_per_second": 22.362, |
|
"eval_steps_per_second": 0.935, |
|
"step": 700 |
|
}, |
|
{ |
|
"dpo_loss": 0.8173587918281555, |
|
"epoch": 3.9962210675484178, |
|
"grad_norm": 985.2767504669318, |
|
"learning_rate": 1.15705935975212e-07, |
|
"logits": -1.1528183221817017, |
|
"logps": -80.9789810180664, |
|
"loss": 1.3032, |
|
"objective": 1.2147972583770752, |
|
"ranking_idealized": 0.5291666388511658, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.5291666388511658, |
|
"regularize": 1.2147972583770752, |
|
"step": 705 |
|
}, |
|
{ |
|
"dpo_loss": 0.7131792902946472, |
|
"epoch": 4.024563060935286, |
|
"grad_norm": 1011.8698807645903, |
|
"learning_rate": 1.094378198054533e-07, |
|
"logits": -1.2421104907989502, |
|
"logps": -82.51912689208984, |
|
"loss": 1.1035, |
|
"objective": 1.1794158220291138, |
|
"ranking_idealized": 0.4791666567325592, |
|
"ranking_idealized_expo": 0.4791666567325592, |
|
"ranking_simple": 0.4833333194255829, |
|
"regularize": 1.1794158220291138, |
|
"step": 710 |
|
}, |
|
{ |
|
"dpo_loss": 0.7108194231987, |
|
"epoch": 4.052905054322154, |
|
"grad_norm": 940.6537803599663, |
|
"learning_rate": 1.0332332985438247e-07, |
|
"logits": -1.1998772621154785, |
|
"logps": -81.95320892333984, |
|
"loss": 1.0461, |
|
"objective": 0.9881690740585327, |
|
"ranking_idealized": 0.5375000238418579, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.5333333611488342, |
|
"regularize": 0.9881690740585327, |
|
"step": 715 |
|
}, |
|
{ |
|
"dpo_loss": 0.6712031960487366, |
|
"epoch": 4.081247047709022, |
|
"grad_norm": 960.2681465991928, |
|
"learning_rate": 9.736487123447068e-08, |
|
"logits": -1.15168035030365, |
|
"logps": -83.81200408935547, |
|
"loss": 1.0468, |
|
"objective": 1.1416888236999512, |
|
"ranking_idealized": 0.5208333134651184, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.5249999761581421, |
|
"regularize": 1.1416888236999512, |
|
"step": 720 |
|
}, |
|
{ |
|
"dpo_loss": 0.6989570260047913, |
|
"epoch": 4.109589041095891, |
|
"grad_norm": 935.0432434834537, |
|
"learning_rate": 9.156478768383058e-08, |
|
"logits": -1.2237892150878906, |
|
"logps": -81.69219207763672, |
|
"loss": 1.074, |
|
"objective": 0.9603613615036011, |
|
"ranking_idealized": 0.5291666388511658, |
|
"ranking_idealized_expo": 0.5291666388511658, |
|
"ranking_simple": 0.5291666388511658, |
|
"regularize": 0.9603613615036011, |
|
"step": 725 |
|
}, |
|
{ |
|
"dpo_loss": 0.7995573878288269, |
|
"epoch": 4.137931034482759, |
|
"grad_norm": 980.3733885345745, |
|
"learning_rate": 8.592536064431466e-08, |
|
"logits": -1.2934256792068481, |
|
"logps": -81.94860076904297, |
|
"loss": 1.0596, |
|
"objective": 1.06425142288208, |
|
"ranking_idealized": 0.5375000238418579, |
|
"ranking_idealized_expo": 0.5375000238418579, |
|
"ranking_simple": 0.5375000238418579, |
|
"regularize": 1.06425142288208, |
|
"step": 730 |
|
}, |
|
{ |
|
"dpo_loss": 0.7126405835151672, |
|
"epoch": 4.166273027869627, |
|
"grad_norm": 1007.1386546772866, |
|
"learning_rate": 8.044880836411888e-08, |
|
"logits": -1.1827558279037476, |
|
"logps": -82.89656066894531, |
|
"loss": 1.0028, |
|
"objective": 0.9806023240089417, |
|
"ranking_idealized": 0.5249999761581421, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.5208333134651184, |
|
"regularize": 0.9806023240089417, |
|
"step": 735 |
|
}, |
|
{ |
|
"dpo_loss": 0.7121079564094543, |
|
"epoch": 4.194615021256495, |
|
"grad_norm": 940.1459069064864, |
|
"learning_rate": 7.513728502524286e-08, |
|
"logits": -1.2114641666412354, |
|
"logps": -82.5162582397461, |
|
"loss": 1.0102, |
|
"objective": 0.9698477387428284, |
|
"ranking_idealized": 0.5208333134651184, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.5208333134651184, |
|
"regularize": 0.9698477387428284, |
|
"step": 740 |
|
}, |
|
{ |
|
"dpo_loss": 0.7223809957504272, |
|
"epoch": 4.222957014643363, |
|
"grad_norm": 956.701027117306, |
|
"learning_rate": 6.999287989614971e-08, |
|
"logits": -1.274079442024231, |
|
"logps": -80.06112670898438, |
|
"loss": 0.9886, |
|
"objective": 1.010962724685669, |
|
"ranking_idealized": 0.48750001192092896, |
|
"ranking_idealized_expo": 0.48750001192092896, |
|
"ranking_simple": 0.4958333373069763, |
|
"regularize": 1.010962724685669, |
|
"step": 745 |
|
}, |
|
{ |
|
"dpo_loss": 0.6834555268287659, |
|
"epoch": 4.251299008030231, |
|
"grad_norm": 994.5271297786792, |
|
"learning_rate": 6.501761650996052e-08, |
|
"logits": -1.3588440418243408, |
|
"logps": -82.0845947265625, |
|
"loss": 1.0052, |
|
"objective": 1.0897630453109741, |
|
"ranking_idealized": 0.5249999761581421, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.5291666388511658, |
|
"regularize": 1.0897630453109741, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 4.251299008030231, |
|
"eval_dpo_loss": 3.103642702102661, |
|
"eval_logits": -1.2629750967025757, |
|
"eval_logps": -88.36180877685547, |
|
"eval_loss": 5.9642486572265625, |
|
"eval_objective": 6.00606632232666, |
|
"eval_ranking_idealized": 0.5092975497245789, |
|
"eval_ranking_idealized_expo": 0.5092975497245789, |
|
"eval_ranking_simple": 0.5134297609329224, |
|
"eval_regularize": 6.00606632232666, |
|
"eval_runtime": 258.8045, |
|
"eval_samples_per_second": 22.372, |
|
"eval_steps_per_second": 0.935, |
|
"step": 750 |
|
}, |
|
{ |
|
"dpo_loss": 0.6240705251693726, |
|
"epoch": 4.2796410014171, |
|
"grad_norm": 928.8030739675138, |
|
"learning_rate": 6.021345186850418e-08, |
|
"logits": -1.2852017879486084, |
|
"logps": -82.10335540771484, |
|
"loss": 0.972, |
|
"objective": 1.01486337184906, |
|
"ranking_idealized": 0.5333333611488342, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.5333333611488342, |
|
"regularize": 1.01486337184906, |
|
"step": 755 |
|
}, |
|
{ |
|
"dpo_loss": 0.7065821886062622, |
|
"epoch": 4.307982994803968, |
|
"grad_norm": 929.3861266719559, |
|
"learning_rate": 5.5582275672538316e-08, |
|
"logits": -1.2621345520019531, |
|
"logps": -82.05753326416016, |
|
"loss": 0.8992, |
|
"objective": 0.8614501953125, |
|
"ranking_idealized": 0.5666666626930237, |
|
"ranking_idealized_expo": 0.5625, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.8614501953125, |
|
"step": 760 |
|
}, |
|
{ |
|
"dpo_loss": 0.6562435030937195, |
|
"epoch": 4.336324988190836, |
|
"grad_norm": 977.0057816667661, |
|
"learning_rate": 5.112590957844232e-08, |
|
"logits": -1.2853424549102783, |
|
"logps": -82.94247436523438, |
|
"loss": 0.9593, |
|
"objective": 0.8854343295097351, |
|
"ranking_idealized": 0.5291666388511658, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.5333333611488342, |
|
"regularize": 0.8854343295097351, |
|
"step": 765 |
|
}, |
|
{ |
|
"dpo_loss": 0.7083169221878052, |
|
"epoch": 4.364666981577704, |
|
"grad_norm": 932.8658870761088, |
|
"learning_rate": 4.684610648167503e-08, |
|
"logits": -1.1741254329681396, |
|
"logps": -81.14856719970703, |
|
"loss": 0.9364, |
|
"objective": 0.9174091815948486, |
|
"ranking_idealized": 0.5083333253860474, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.5083333253860474, |
|
"regularize": 0.9174091815948486, |
|
"step": 770 |
|
}, |
|
{ |
|
"dpo_loss": 0.6173561811447144, |
|
"epoch": 4.393008974964572, |
|
"grad_norm": 973.1322872774431, |
|
"learning_rate": 4.274454982728032e-08, |
|
"logits": -1.2410908937454224, |
|
"logps": -81.94947052001953, |
|
"loss": 0.8846, |
|
"objective": 0.7543167471885681, |
|
"ranking_idealized": 0.5083333253860474, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.5083333253860474, |
|
"regularize": 0.7543167471885681, |
|
"step": 775 |
|
}, |
|
{ |
|
"dpo_loss": 0.6880941390991211, |
|
"epoch": 4.42135096835144, |
|
"grad_norm": 934.0673376048558, |
|
"learning_rate": 3.882285294770937e-08, |
|
"logits": -1.2685768604278564, |
|
"logps": -80.74879455566406, |
|
"loss": 0.8891, |
|
"objective": 0.9145664572715759, |
|
"ranking_idealized": 0.5041666626930237, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.5041666626930237, |
|
"regularize": 0.9145664572715759, |
|
"step": 780 |
|
}, |
|
{ |
|
"dpo_loss": 0.6857554316520691, |
|
"epoch": 4.449692961738309, |
|
"grad_norm": 906.6964623786657, |
|
"learning_rate": 3.508255842822255e-08, |
|
"logits": -1.286802887916565, |
|
"logps": -80.75093841552734, |
|
"loss": 0.8708, |
|
"objective": 0.865050196647644, |
|
"ranking_idealized": 0.5291666388511658, |
|
"ranking_idealized_expo": 0.5291666388511658, |
|
"ranking_simple": 0.5249999761581421, |
|
"regularize": 0.865050196647644, |
|
"step": 785 |
|
}, |
|
{ |
|
"dpo_loss": 0.6727093458175659, |
|
"epoch": 4.478034955125177, |
|
"grad_norm": 954.6542045385403, |
|
"learning_rate": 3.15251375001192e-08, |
|
"logits": -1.248421549797058, |
|
"logps": -83.45836639404297, |
|
"loss": 0.8584, |
|
"objective": 0.825614869594574, |
|
"ranking_idealized": 0.5583333373069763, |
|
"ranking_idealized_expo": 0.5541666746139526, |
|
"ranking_simple": 0.5541666746139526, |
|
"regularize": 0.825614869594574, |
|
"step": 790 |
|
}, |
|
{ |
|
"dpo_loss": 0.7268415689468384, |
|
"epoch": 4.506376948512045, |
|
"grad_norm": 940.0246507195186, |
|
"learning_rate": 2.8151989462033787e-08, |
|
"logits": -1.1517890691757202, |
|
"logps": -82.83182525634766, |
|
"loss": 0.8256, |
|
"objective": 0.8579443693161011, |
|
"ranking_idealized": 0.550000011920929, |
|
"ranking_idealized_expo": 0.550000011920929, |
|
"ranking_simple": 0.550000011920929, |
|
"regularize": 0.8579443693161011, |
|
"step": 795 |
|
}, |
|
{ |
|
"dpo_loss": 0.6633224487304688, |
|
"epoch": 4.534718941898913, |
|
"grad_norm": 974.8383451116964, |
|
"learning_rate": 2.4964441129527335e-08, |
|
"logits": -1.148630976676941, |
|
"logps": -82.155029296875, |
|
"loss": 0.8548, |
|
"objective": 0.8071673512458801, |
|
"ranking_idealized": 0.512499988079071, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.512499988079071, |
|
"regularize": 0.8071673512458801, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 4.534718941898913, |
|
"eval_dpo_loss": 3.0852909088134766, |
|
"eval_logits": -1.2662479877471924, |
|
"eval_logps": -88.35336303710938, |
|
"eval_loss": 5.92377233505249, |
|
"eval_objective": 5.97105073928833, |
|
"eval_ranking_idealized": 0.5092975497245789, |
|
"eval_ranking_idealized_expo": 0.5092975497245789, |
|
"eval_ranking_simple": 0.5134297609329224, |
|
"eval_regularize": 5.97105073928833, |
|
"eval_runtime": 258.7722, |
|
"eval_samples_per_second": 22.375, |
|
"eval_steps_per_second": 0.935, |
|
"step": 800 |
|
}, |
|
{ |
|
"dpo_loss": 0.6257317662239075, |
|
"epoch": 4.563060935285781, |
|
"grad_norm": 904.8852856060951, |
|
"learning_rate": 2.1963746313188757e-08, |
|
"logits": -1.2208141088485718, |
|
"logps": -81.56495666503906, |
|
"loss": 0.8065, |
|
"objective": 0.8274087905883789, |
|
"ranking_idealized": 0.46666666865348816, |
|
"ranking_idealized_expo": 0.4583333432674408, |
|
"ranking_simple": 0.46666666865348816, |
|
"regularize": 0.8274087905883789, |
|
"step": 805 |
|
}, |
|
{ |
|
"dpo_loss": 0.6851524710655212, |
|
"epoch": 4.59140292867265, |
|
"grad_norm": 950.7745711731907, |
|
"learning_rate": 1.915108532545351e-08, |
|
"logits": -1.284138798713684, |
|
"logps": -80.96287536621094, |
|
"loss": 0.7829, |
|
"objective": 0.8247645497322083, |
|
"ranking_idealized": 0.4583333432674408, |
|
"ranking_idealized_expo": 0.4583333432674408, |
|
"ranking_simple": 0.4625000059604645, |
|
"regularize": 0.8247645497322083, |
|
"step": 810 |
|
}, |
|
{ |
|
"dpo_loss": 0.6184301972389221, |
|
"epoch": 4.619744922059518, |
|
"grad_norm": 910.9864796470945, |
|
"learning_rate": 1.6527564516331638e-08, |
|
"logits": -1.1581400632858276, |
|
"logps": -82.53880310058594, |
|
"loss": 0.7758, |
|
"objective": 0.8514427542686462, |
|
"ranking_idealized": 0.5916666388511658, |
|
"ranking_idealized_expo": 0.5874999761581421, |
|
"ranking_simple": 0.5916666388511658, |
|
"regularize": 0.8514427542686462, |
|
"step": 815 |
|
}, |
|
{ |
|
"dpo_loss": 0.7527021765708923, |
|
"epoch": 4.648086915446386, |
|
"grad_norm": 990.2454056593273, |
|
"learning_rate": 1.4094215838229172e-08, |
|
"logits": -1.2537620067596436, |
|
"logps": -82.07845306396484, |
|
"loss": 0.8496, |
|
"objective": 0.8979706168174744, |
|
"ranking_idealized": 0.5625, |
|
"ranking_idealized_expo": 0.5625, |
|
"ranking_simple": 0.5583333373069763, |
|
"regularize": 0.8979706168174744, |
|
"step": 820 |
|
}, |
|
{ |
|
"dpo_loss": 0.6094750165939331, |
|
"epoch": 4.6764289088332545, |
|
"grad_norm": 933.278247470779, |
|
"learning_rate": 1.1851996440033318e-08, |
|
"logits": -1.1612147092819214, |
|
"logps": -80.19402313232422, |
|
"loss": 0.7784, |
|
"objective": 0.6954202651977539, |
|
"ranking_idealized": 0.5, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.5, |
|
"regularize": 0.6954202651977539, |
|
"step": 825 |
|
}, |
|
{ |
|
"dpo_loss": 0.7630098462104797, |
|
"epoch": 4.7047709022201225, |
|
"grad_norm": 1034.6630104905564, |
|
"learning_rate": 9.801788290621505e-09, |
|
"logits": -1.3119471073150635, |
|
"logps": -82.28199768066406, |
|
"loss": 0.8393, |
|
"objective": 0.9094979166984558, |
|
"ranking_idealized": 0.5166666507720947, |
|
"ranking_idealized_expo": 0.512499988079071, |
|
"ranking_simple": 0.5208333134651184, |
|
"regularize": 0.9094979166984558, |
|
"step": 830 |
|
}, |
|
{ |
|
"dpo_loss": 0.6389487385749817, |
|
"epoch": 4.733112895606991, |
|
"grad_norm": 919.014869305993, |
|
"learning_rate": 7.944397831941951e-09, |
|
"logits": -1.2169098854064941, |
|
"logps": -81.839111328125, |
|
"loss": 0.7883, |
|
"objective": 0.7863165140151978, |
|
"ranking_idealized": 0.49166667461395264, |
|
"ranking_idealized_expo": 0.48750001192092896, |
|
"ranking_simple": 0.4958333373069763, |
|
"regularize": 0.7863165140151978, |
|
"step": 835 |
|
}, |
|
{ |
|
"dpo_loss": 0.6490210890769958, |
|
"epoch": 4.7614548889938595, |
|
"grad_norm": 984.0918792700685, |
|
"learning_rate": 6.280555661802856e-09, |
|
"logits": -1.2279409170150757, |
|
"logps": -82.31600189208984, |
|
"loss": 0.7359, |
|
"objective": 0.7738173007965088, |
|
"ranking_idealized": 0.5541666746139526, |
|
"ranking_idealized_expo": 0.5541666746139526, |
|
"ranking_simple": 0.5541666746139526, |
|
"regularize": 0.7738173007965088, |
|
"step": 840 |
|
}, |
|
{ |
|
"dpo_loss": 0.6469201445579529, |
|
"epoch": 4.7897968823807275, |
|
"grad_norm": 943.5599388069303, |
|
"learning_rate": 4.810916246494157e-09, |
|
"logits": -1.2585629224777222, |
|
"logps": -82.3524398803711, |
|
"loss": 0.7615, |
|
"objective": 0.7981647849082947, |
|
"ranking_idealized": 0.4583333432674408, |
|
"ranking_idealized_expo": 0.4541666805744171, |
|
"ranking_simple": 0.4625000059604645, |
|
"regularize": 0.7981647849082947, |
|
"step": 845 |
|
}, |
|
{ |
|
"dpo_loss": 0.6333919167518616, |
|
"epoch": 4.818138875767596, |
|
"grad_norm": 953.2591490386869, |
|
"learning_rate": 3.5360576633558513e-09, |
|
"logits": -1.2325036525726318, |
|
"logps": -81.02188873291016, |
|
"loss": 0.7765, |
|
"objective": 0.7740827798843384, |
|
"ranking_idealized": 0.5291666388511658, |
|
"ranking_idealized_expo": 0.5291666388511658, |
|
"ranking_simple": 0.5291666388511658, |
|
"regularize": 0.7740827798843384, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 4.818138875767596, |
|
"eval_dpo_loss": 3.091606378555298, |
|
"eval_logits": -1.266028642654419, |
|
"eval_logps": -88.38737487792969, |
|
"eval_loss": 5.932301044464111, |
|
"eval_objective": 5.976984977722168, |
|
"eval_ranking_idealized": 0.5092975497245789, |
|
"eval_ranking_idealized_expo": 0.5092975497245789, |
|
"eval_ranking_simple": 0.5134297609329224, |
|
"eval_regularize": 5.976984977722168, |
|
"eval_runtime": 259.1307, |
|
"eval_samples_per_second": 22.344, |
|
"eval_steps_per_second": 0.934, |
|
"step": 850 |
|
}, |
|
{ |
|
"dpo_loss": 0.7061713337898254, |
|
"epoch": 4.846480869154464, |
|
"grad_norm": 938.2798417262723, |
|
"learning_rate": 2.4564813733932155e-09, |
|
"logits": -1.2008044719696045, |
|
"logps": -81.2861328125, |
|
"loss": 0.7477, |
|
"objective": 0.7695434093475342, |
|
"ranking_idealized": 0.5375000238418579, |
|
"ranking_idealized_expo": 0.5375000238418579, |
|
"ranking_simple": 0.5375000238418579, |
|
"regularize": 0.7695434093475342, |
|
"step": 855 |
|
}, |
|
{ |
|
"dpo_loss": 0.6985270380973816, |
|
"epoch": 4.874822862541333, |
|
"grad_norm": 978.3840453547333, |
|
"learning_rate": 1.5726120240288631e-09, |
|
"logits": -1.1891288757324219, |
|
"logps": -81.2249984741211, |
|
"loss": 0.787, |
|
"objective": 0.8799866437911987, |
|
"ranking_idealized": 0.5541666746139526, |
|
"ranking_idealized_expo": 0.550000011920929, |
|
"ranking_simple": 0.550000011920929, |
|
"regularize": 0.8799866437911987, |
|
"step": 860 |
|
}, |
|
{ |
|
"dpo_loss": 0.6121171116828918, |
|
"epoch": 4.903164855928201, |
|
"grad_norm": 903.3650703276869, |
|
"learning_rate": 8.847972820693051e-10, |
|
"logits": -1.255650281906128, |
|
"logps": -80.4308090209961, |
|
"loss": 0.6987, |
|
"objective": 0.6325153708457947, |
|
"ranking_idealized": 0.4958333373069763, |
|
"ranking_idealized_expo": 0.4958333373069763, |
|
"ranking_simple": 0.4958333373069763, |
|
"regularize": 0.6325153708457947, |
|
"step": 865 |
|
}, |
|
{ |
|
"dpo_loss": 0.6306089162826538, |
|
"epoch": 4.931506849315069, |
|
"grad_norm": 920.898527815901, |
|
"learning_rate": 3.933076969516724e-10, |
|
"logits": -1.288960576057434, |
|
"logps": -81.9923095703125, |
|
"loss": 0.6978, |
|
"objective": 0.6587303876876831, |
|
"ranking_idealized": 0.512499988079071, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.512499988079071, |
|
"regularize": 0.6587303876876831, |
|
"step": 870 |
|
}, |
|
{ |
|
"dpo_loss": 0.72224360704422, |
|
"epoch": 4.959848842701937, |
|
"grad_norm": 973.4560843637579, |
|
"learning_rate": 9.833659432367803e-11, |
|
"logits": -1.2122024297714233, |
|
"logps": -81.87159729003906, |
|
"loss": 0.7712, |
|
"objective": 0.9232720136642456, |
|
"ranking_idealized": 0.5458333492279053, |
|
"ranking_idealized_expo": 0.5458333492279053, |
|
"ranking_simple": 0.550000011920929, |
|
"regularize": 0.9232720136642456, |
|
"step": 875 |
|
}, |
|
{ |
|
"dpo_loss": 0.6564382910728455, |
|
"epoch": 4.988190836088805, |
|
"grad_norm": 950.1802722480554, |
|
"learning_rate": 0.0, |
|
"logits": -1.284375786781311, |
|
"logps": -82.72828674316406, |
|
"loss": 0.7367, |
|
"objective": 0.7989345788955688, |
|
"ranking_idealized": 0.5291666388511658, |
|
"ranking_idealized_expo": 0.5291666388511658, |
|
"ranking_simple": 0.5291666388511658, |
|
"regularize": 0.7989345788955688, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 4.988190836088805, |
|
"step": 880, |
|
"total_flos": 0.0, |
|
"train_loss": 2.940262828902765, |
|
"train_runtime": 35074.095, |
|
"train_samples_per_second": 7.242, |
|
"train_steps_per_second": 0.025 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 880, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": false, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|