|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 4.988190836088805, |
|
"eval_steps": 50, |
|
"global_step": 880, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"dpo_loss": 0.6931471824645996, |
|
"epoch": 0.005668398677373642, |
|
"grad_norm": 26.827196619905514, |
|
"learning_rate": 5.681818181818182e-08, |
|
"logits": -1.3147305250167847, |
|
"logps": -88.0877456665039, |
|
"loss": 0.4113, |
|
"objective": 0.41588976979255676, |
|
"ranking_idealized": 0.625, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.5208333134651184, |
|
"regularize": 0.41588976979255676, |
|
"step": 1 |
|
}, |
|
{ |
|
"dpo_loss": 0.6930485367774963, |
|
"epoch": 0.02834199338686821, |
|
"grad_norm": 26.779096499602865, |
|
"learning_rate": 2.840909090909091e-07, |
|
"logits": -1.3680453300476074, |
|
"logps": -84.26322937011719, |
|
"loss": 0.4128, |
|
"objective": 0.37547406554222107, |
|
"ranking_idealized": 0.6145833134651184, |
|
"ranking_idealized_expo": 0.546875, |
|
"ranking_simple": 0.546875, |
|
"regularize": 0.37547406554222107, |
|
"step": 5 |
|
}, |
|
{ |
|
"dpo_loss": 0.6873258948326111, |
|
"epoch": 0.05668398677373642, |
|
"grad_norm": 27.711550621025058, |
|
"learning_rate": 5.681818181818182e-07, |
|
"logits": -1.4466668367385864, |
|
"logps": -82.5794448852539, |
|
"loss": 0.4133, |
|
"objective": 0.4332159161567688, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.512499988079071, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 0.4332159161567688, |
|
"step": 10 |
|
}, |
|
{ |
|
"dpo_loss": 0.6854580044746399, |
|
"epoch": 0.08502598016060463, |
|
"grad_norm": 26.55882910438415, |
|
"learning_rate": 8.522727272727273e-07, |
|
"logits": -1.4248812198638916, |
|
"logps": -82.44363403320312, |
|
"loss": 0.4132, |
|
"objective": 0.4001390337944031, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 0.4001390337944031, |
|
"step": 15 |
|
}, |
|
{ |
|
"dpo_loss": 0.6893234252929688, |
|
"epoch": 0.11336797354747284, |
|
"grad_norm": 28.16437068526933, |
|
"learning_rate": 1.1363636363636364e-06, |
|
"logits": -1.4341281652450562, |
|
"logps": -83.77052307128906, |
|
"loss": 0.3955, |
|
"objective": 0.4052102267742157, |
|
"ranking_idealized": 0.625, |
|
"ranking_idealized_expo": 0.5375000238418579, |
|
"ranking_simple": 0.5375000238418579, |
|
"regularize": 0.4052102267742157, |
|
"step": 20 |
|
}, |
|
{ |
|
"dpo_loss": 0.6645582914352417, |
|
"epoch": 0.14170996693434104, |
|
"grad_norm": 24.3242807722224, |
|
"learning_rate": 1.4204545454545458e-06, |
|
"logits": -1.491589069366455, |
|
"logps": -82.93748474121094, |
|
"loss": 0.3885, |
|
"objective": 0.40111032128334045, |
|
"ranking_idealized": 0.5958333611488342, |
|
"ranking_idealized_expo": 0.4958333373069763, |
|
"ranking_simple": 0.5041666626930237, |
|
"regularize": 0.40111032128334045, |
|
"step": 25 |
|
}, |
|
{ |
|
"dpo_loss": 0.6675031185150146, |
|
"epoch": 0.17005196032120926, |
|
"grad_norm": 26.35319215355226, |
|
"learning_rate": 1.7045454545454546e-06, |
|
"logits": -1.4577081203460693, |
|
"logps": -84.03182220458984, |
|
"loss": 0.3752, |
|
"objective": 0.382988303899765, |
|
"ranking_idealized": 0.6041666865348816, |
|
"ranking_idealized_expo": 0.5416666865348816, |
|
"ranking_simple": 0.5583333373069763, |
|
"regularize": 0.382988303899765, |
|
"step": 30 |
|
}, |
|
{ |
|
"dpo_loss": 0.6479320526123047, |
|
"epoch": 0.19839395370807747, |
|
"grad_norm": 27.102387091432373, |
|
"learning_rate": 1.9886363636363638e-06, |
|
"logits": -1.4457683563232422, |
|
"logps": -82.55751037597656, |
|
"loss": 0.3652, |
|
"objective": 0.3455962538719177, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.5375000238418579, |
|
"regularize": 0.34559622406959534, |
|
"step": 35 |
|
}, |
|
{ |
|
"dpo_loss": 0.6392762660980225, |
|
"epoch": 0.22673594709494568, |
|
"grad_norm": 27.66182181149778, |
|
"learning_rate": 2.2727272727272728e-06, |
|
"logits": -1.4526138305664062, |
|
"logps": -82.46888732910156, |
|
"loss": 0.3679, |
|
"objective": 0.38397228717803955, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.5375000238418579, |
|
"regularize": 0.38397228717803955, |
|
"step": 40 |
|
}, |
|
{ |
|
"dpo_loss": 0.6107492446899414, |
|
"epoch": 0.25507794048181387, |
|
"grad_norm": 24.020609030227188, |
|
"learning_rate": 2.556818181818182e-06, |
|
"logits": -1.5597164630889893, |
|
"logps": -85.65888214111328, |
|
"loss": 0.3553, |
|
"objective": 0.40014997124671936, |
|
"ranking_idealized": 0.6208333373069763, |
|
"ranking_idealized_expo": 0.5375000238418579, |
|
"ranking_simple": 0.5625, |
|
"regularize": 0.40014997124671936, |
|
"step": 45 |
|
}, |
|
{ |
|
"dpo_loss": 0.6240749359130859, |
|
"epoch": 0.2834199338686821, |
|
"grad_norm": 22.420393674285894, |
|
"learning_rate": 2.8409090909090916e-06, |
|
"logits": -1.4485610723495483, |
|
"logps": -86.837890625, |
|
"loss": 0.3559, |
|
"objective": 0.3479757606983185, |
|
"ranking_idealized": 0.5416666865348816, |
|
"ranking_idealized_expo": 0.42500001192092896, |
|
"ranking_simple": 0.4749999940395355, |
|
"regularize": 0.3479757606983185, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.2834199338686821, |
|
"eval_dpo_loss": 0.6920796632766724, |
|
"eval_logits": -1.4739316701889038, |
|
"eval_logps": -96.0254135131836, |
|
"eval_loss": 0.42298370599746704, |
|
"eval_objective": 0.4318382143974304, |
|
"eval_ranking_idealized": 0.5888429880142212, |
|
"eval_ranking_idealized_expo": 0.5103305578231812, |
|
"eval_ranking_simple": 0.5185950398445129, |
|
"eval_regularize": 0.4318382143974304, |
|
"eval_runtime": 305.3308, |
|
"eval_samples_per_second": 18.963, |
|
"eval_steps_per_second": 0.793, |
|
"step": 50 |
|
}, |
|
{ |
|
"dpo_loss": 0.632377564907074, |
|
"epoch": 0.3117619272555503, |
|
"grad_norm": 29.297505573115142, |
|
"learning_rate": 3.125e-06, |
|
"logits": -1.499748945236206, |
|
"logps": -92.24181365966797, |
|
"loss": 0.372, |
|
"objective": 0.36379119753837585, |
|
"ranking_idealized": 0.625, |
|
"ranking_idealized_expo": 0.5416666865348816, |
|
"ranking_simple": 0.5625, |
|
"regularize": 0.36379119753837585, |
|
"step": 55 |
|
}, |
|
{ |
|
"dpo_loss": 0.6205580830574036, |
|
"epoch": 0.3401039206424185, |
|
"grad_norm": 32.111329845751825, |
|
"learning_rate": 3.409090909090909e-06, |
|
"logits": -1.4636166095733643, |
|
"logps": -88.49433135986328, |
|
"loss": 0.3756, |
|
"objective": 0.3948574662208557, |
|
"ranking_idealized": 0.637499988079071, |
|
"ranking_idealized_expo": 0.512499988079071, |
|
"ranking_simple": 0.5541666746139526, |
|
"regularize": 0.3948574662208557, |
|
"step": 60 |
|
}, |
|
{ |
|
"dpo_loss": 0.6158726811408997, |
|
"epoch": 0.3684459140292867, |
|
"grad_norm": 21.125780754509353, |
|
"learning_rate": 3.6931818181818186e-06, |
|
"logits": -1.4296706914901733, |
|
"logps": -84.69975280761719, |
|
"loss": 0.3512, |
|
"objective": 0.34672078490257263, |
|
"ranking_idealized": 0.5708333253860474, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.5583333373069763, |
|
"regularize": 0.34672078490257263, |
|
"step": 65 |
|
}, |
|
{ |
|
"dpo_loss": 0.6078273057937622, |
|
"epoch": 0.39678790741615494, |
|
"grad_norm": 21.36738942621028, |
|
"learning_rate": 3.9772727272727275e-06, |
|
"logits": -1.4390350580215454, |
|
"logps": -84.15774536132812, |
|
"loss": 0.3661, |
|
"objective": 0.3570030629634857, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5625, |
|
"regularize": 0.3570030629634857, |
|
"step": 70 |
|
}, |
|
{ |
|
"dpo_loss": 0.604975700378418, |
|
"epoch": 0.42512990080302315, |
|
"grad_norm": 20.27859101420223, |
|
"learning_rate": 4.2613636363636365e-06, |
|
"logits": -1.3615275621414185, |
|
"logps": -83.67790985107422, |
|
"loss": 0.3562, |
|
"objective": 0.38403797149658203, |
|
"ranking_idealized": 0.6291666626930237, |
|
"ranking_idealized_expo": 0.5458333492279053, |
|
"ranking_simple": 0.5791666507720947, |
|
"regularize": 0.38403797149658203, |
|
"step": 75 |
|
}, |
|
{ |
|
"dpo_loss": 0.6048832535743713, |
|
"epoch": 0.45347189418989137, |
|
"grad_norm": 21.18230829994196, |
|
"learning_rate": 4.5454545454545455e-06, |
|
"logits": -1.368180513381958, |
|
"logps": -81.03931427001953, |
|
"loss": 0.3692, |
|
"objective": 0.3890025019645691, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5583333373069763, |
|
"regularize": 0.3890025019645691, |
|
"step": 80 |
|
}, |
|
{ |
|
"dpo_loss": 0.575659990310669, |
|
"epoch": 0.4818138875767596, |
|
"grad_norm": 18.639373454342547, |
|
"learning_rate": 4.829545454545455e-06, |
|
"logits": -1.2571583986282349, |
|
"logps": -80.53406524658203, |
|
"loss": 0.3542, |
|
"objective": 0.3442578613758087, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.3442578613758087, |
|
"step": 85 |
|
}, |
|
{ |
|
"dpo_loss": 0.5988019704818726, |
|
"epoch": 0.5101558809636277, |
|
"grad_norm": 19.215896145423606, |
|
"learning_rate": 4.999921328558333e-06, |
|
"logits": -1.0600625276565552, |
|
"logps": -79.93087005615234, |
|
"loss": 0.3716, |
|
"objective": 0.3766815960407257, |
|
"ranking_idealized": 0.550000011920929, |
|
"ranking_idealized_expo": 0.4791666567325592, |
|
"ranking_simple": 0.5041666626930237, |
|
"regularize": 0.3766815960407257, |
|
"step": 90 |
|
}, |
|
{ |
|
"dpo_loss": 0.5648664832115173, |
|
"epoch": 0.538497874350496, |
|
"grad_norm": 18.301401671657857, |
|
"learning_rate": 4.999036331701828e-06, |
|
"logits": -1.1114833354949951, |
|
"logps": -78.7803726196289, |
|
"loss": 0.3787, |
|
"objective": 0.3693276345729828, |
|
"ranking_idealized": 0.625, |
|
"ranking_idealized_expo": 0.5416666865348816, |
|
"ranking_simple": 0.5958333611488342, |
|
"regularize": 0.3693276345729828, |
|
"step": 95 |
|
}, |
|
{ |
|
"dpo_loss": 0.5575936436653137, |
|
"epoch": 0.5668398677373642, |
|
"grad_norm": 17.902415337800125, |
|
"learning_rate": 4.997168347957521e-06, |
|
"logits": -1.1674468517303467, |
|
"logps": -80.26119995117188, |
|
"loss": 0.3834, |
|
"objective": 0.3862568736076355, |
|
"ranking_idealized": 0.612500011920929, |
|
"ranking_idealized_expo": 0.5416666865348816, |
|
"ranking_simple": 0.5916666388511658, |
|
"regularize": 0.3862568736076355, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.5668398677373642, |
|
"eval_dpo_loss": 0.695151686668396, |
|
"eval_logits": -1.1784183979034424, |
|
"eval_logps": -85.01744079589844, |
|
"eval_loss": 0.48081931471824646, |
|
"eval_objective": 0.4771096706390381, |
|
"eval_ranking_idealized": 0.5888429880142212, |
|
"eval_ranking_idealized_expo": 0.5103305578231812, |
|
"eval_ranking_simple": 0.5154958963394165, |
|
"eval_regularize": 0.4771096706390381, |
|
"eval_runtime": 306.5313, |
|
"eval_samples_per_second": 18.889, |
|
"eval_steps_per_second": 0.789, |
|
"step": 100 |
|
}, |
|
{ |
|
"dpo_loss": 0.588794469833374, |
|
"epoch": 0.5951818611242324, |
|
"grad_norm": 18.044596123919455, |
|
"learning_rate": 4.994318112090048e-06, |
|
"logits": -1.0578794479370117, |
|
"logps": -79.78172302246094, |
|
"loss": 0.395, |
|
"objective": 0.40164127945899963, |
|
"ranking_idealized": 0.6208333373069763, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.6000000238418579, |
|
"regularize": 0.40164127945899963, |
|
"step": 105 |
|
}, |
|
{ |
|
"dpo_loss": 0.5815469026565552, |
|
"epoch": 0.6235238545111006, |
|
"grad_norm": 21.50058623262647, |
|
"learning_rate": 4.990486745229364e-06, |
|
"logits": -1.2767162322998047, |
|
"logps": -76.8266372680664, |
|
"loss": 0.3976, |
|
"objective": 0.4256088137626648, |
|
"ranking_idealized": 0.5708333253860474, |
|
"ranking_idealized_expo": 0.4541666805744171, |
|
"ranking_simple": 0.512499988079071, |
|
"regularize": 0.4256088137626648, |
|
"step": 110 |
|
}, |
|
{ |
|
"dpo_loss": 0.5960076451301575, |
|
"epoch": 0.6518658478979689, |
|
"grad_norm": 16.79094440634272, |
|
"learning_rate": 4.985675754429744e-06, |
|
"logits": -1.329805850982666, |
|
"logps": -75.79194641113281, |
|
"loss": 0.4067, |
|
"objective": 0.4033397138118744, |
|
"ranking_idealized": 0.574999988079071, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.5333333611488342, |
|
"regularize": 0.4033397138118744, |
|
"step": 115 |
|
}, |
|
{ |
|
"dpo_loss": 0.6170827746391296, |
|
"epoch": 0.680207841284837, |
|
"grad_norm": 18.793022818980457, |
|
"learning_rate": 4.9798870320769884e-06, |
|
"logits": -1.1681851148605347, |
|
"logps": -76.48377990722656, |
|
"loss": 0.4066, |
|
"objective": 0.36593717336654663, |
|
"ranking_idealized": 0.5874999761581421, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.5625, |
|
"regularize": 0.36593717336654663, |
|
"step": 120 |
|
}, |
|
{ |
|
"dpo_loss": 0.5946592092514038, |
|
"epoch": 0.7085498346717053, |
|
"grad_norm": 17.05224950606184, |
|
"learning_rate": 4.973122855144066e-06, |
|
"logits": -1.0615798234939575, |
|
"logps": -75.89274597167969, |
|
"loss": 0.3915, |
|
"objective": 0.3765077292919159, |
|
"ranking_idealized": 0.6291666626930237, |
|
"ranking_idealized_expo": 0.550000011920929, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.3765077292919159, |
|
"step": 125 |
|
}, |
|
{ |
|
"dpo_loss": 0.6107650399208069, |
|
"epoch": 0.7368918280585735, |
|
"grad_norm": 15.723039306234742, |
|
"learning_rate": 4.965385884295467e-06, |
|
"logits": -1.1680763959884644, |
|
"logps": -74.33956909179688, |
|
"loss": 0.393, |
|
"objective": 0.39573410153388977, |
|
"ranking_idealized": 0.5416666865348816, |
|
"ranking_idealized_expo": 0.44999998807907104, |
|
"ranking_simple": 0.5291666388511658, |
|
"regularize": 0.39573410153388977, |
|
"step": 130 |
|
}, |
|
{ |
|
"dpo_loss": 0.5728858709335327, |
|
"epoch": 0.7652338214454416, |
|
"grad_norm": 15.626255395375802, |
|
"learning_rate": 4.956679162840646e-06, |
|
"logits": -1.1485530138015747, |
|
"logps": -75.2200698852539, |
|
"loss": 0.3823, |
|
"objective": 0.36154037714004517, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.5291666388511658, |
|
"ranking_simple": 0.5458333492279053, |
|
"regularize": 0.36154037714004517, |
|
"step": 135 |
|
}, |
|
{ |
|
"dpo_loss": 0.5767256617546082, |
|
"epoch": 0.7935758148323099, |
|
"grad_norm": 15.232853103825379, |
|
"learning_rate": 4.947006115536947e-06, |
|
"logits": -0.9859077334403992, |
|
"logps": -75.89881896972656, |
|
"loss": 0.3873, |
|
"objective": 0.3745146691799164, |
|
"ranking_idealized": 0.5791666507720947, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.5625, |
|
"regularize": 0.3745146691799164, |
|
"step": 140 |
|
}, |
|
{ |
|
"dpo_loss": 0.5821123123168945, |
|
"epoch": 0.821917808219178, |
|
"grad_norm": 15.85191878128928, |
|
"learning_rate": 4.9363705472424825e-06, |
|
"logits": -0.9272752404212952, |
|
"logps": -76.59304809570312, |
|
"loss": 0.3707, |
|
"objective": 0.3570760488510132, |
|
"ranking_idealized": 0.5791666507720947, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.5375000238418579, |
|
"regularize": 0.3570760488510132, |
|
"step": 145 |
|
}, |
|
{ |
|
"dpo_loss": 0.5737297534942627, |
|
"epoch": 0.8502598016060463, |
|
"grad_norm": 16.000525131960934, |
|
"learning_rate": 4.924776641419513e-06, |
|
"logits": -0.9384148120880127, |
|
"logps": -74.83208465576172, |
|
"loss": 0.3746, |
|
"objective": 0.36504948139190674, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5416666865348816, |
|
"ranking_simple": 0.6000000238418579, |
|
"regularize": 0.36504948139190674, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.8502598016060463, |
|
"eval_dpo_loss": 0.7075234055519104, |
|
"eval_logits": -1.0147572755813599, |
|
"eval_logps": -81.63130950927734, |
|
"eval_loss": 0.5244768261909485, |
|
"eval_objective": 0.5243399143218994, |
|
"eval_ranking_idealized": 0.5888429880142212, |
|
"eval_ranking_idealized_expo": 0.5103305578231812, |
|
"eval_ranking_simple": 0.5165289044380188, |
|
"eval_regularize": 0.5243399143218994, |
|
"eval_runtime": 304.8522, |
|
"eval_samples_per_second": 18.993, |
|
"eval_steps_per_second": 0.794, |
|
"step": 150 |
|
}, |
|
{ |
|
"dpo_loss": 0.5900284051895142, |
|
"epoch": 0.8786017949929145, |
|
"grad_norm": 16.142319382249244, |
|
"learning_rate": 4.9122289584888926e-06, |
|
"logits": -0.9796825051307678, |
|
"logps": -73.79574584960938, |
|
"loss": 0.3743, |
|
"objective": 0.36068999767303467, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.512499988079071, |
|
"ranking_simple": 0.5958333611488342, |
|
"regularize": 0.36068999767303467, |
|
"step": 155 |
|
}, |
|
{ |
|
"dpo_loss": 0.581436812877655, |
|
"epoch": 0.9069437883797827, |
|
"grad_norm": 14.536280492665549, |
|
"learning_rate": 4.8987324340362445e-06, |
|
"logits": -0.9089191555976868, |
|
"logps": -74.14305877685547, |
|
"loss": 0.3561, |
|
"objective": 0.3342379629611969, |
|
"ranking_idealized": 0.5708333253860474, |
|
"ranking_idealized_expo": 0.4958333373069763, |
|
"ranking_simple": 0.5791666507720947, |
|
"regularize": 0.3342379629611969, |
|
"step": 160 |
|
}, |
|
{ |
|
"dpo_loss": 0.5954117178916931, |
|
"epoch": 0.9352857817666509, |
|
"grad_norm": 14.701365211451375, |
|
"learning_rate": 4.884292376870567e-06, |
|
"logits": -0.9048901796340942, |
|
"logps": -74.482666015625, |
|
"loss": 0.3603, |
|
"objective": 0.3528175950050354, |
|
"ranking_idealized": 0.6041666865348816, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.574999988079071, |
|
"regularize": 0.3528175950050354, |
|
"step": 165 |
|
}, |
|
{ |
|
"dpo_loss": 0.5802183747291565, |
|
"epoch": 0.9636277751535192, |
|
"grad_norm": 14.493481098965965, |
|
"learning_rate": 4.868914466936038e-06, |
|
"logits": -0.9519513249397278, |
|
"logps": -73.6054916381836, |
|
"loss": 0.36, |
|
"objective": 0.3644227087497711, |
|
"ranking_idealized": 0.5791666507720947, |
|
"ranking_idealized_expo": 0.512499988079071, |
|
"ranking_simple": 0.5583333373069763, |
|
"regularize": 0.3644227087497711, |
|
"step": 170 |
|
}, |
|
{ |
|
"dpo_loss": 0.5715630054473877, |
|
"epoch": 0.9919697685403873, |
|
"grad_norm": 14.890486278379207, |
|
"learning_rate": 4.8526047530778175e-06, |
|
"logits": -0.9974517822265625, |
|
"logps": -72.90505981445312, |
|
"loss": 0.3487, |
|
"objective": 0.33891138434410095, |
|
"ranking_idealized": 0.6541666388511658, |
|
"ranking_idealized_expo": 0.574999988079071, |
|
"ranking_simple": 0.6333333253860474, |
|
"regularize": 0.33891138434410095, |
|
"step": 175 |
|
}, |
|
{ |
|
"dpo_loss": 0.5492505431175232, |
|
"epoch": 1.0203117619272555, |
|
"grad_norm": 15.405845532960777, |
|
"learning_rate": 4.835369650662767e-06, |
|
"logits": -1.026891827583313, |
|
"logps": -73.60990142822266, |
|
"loss": 0.3572, |
|
"objective": 0.37355440855026245, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5416666865348816, |
|
"ranking_simple": 0.6166666746139526, |
|
"regularize": 0.37355440855026245, |
|
"step": 180 |
|
}, |
|
{ |
|
"dpo_loss": 0.5769887566566467, |
|
"epoch": 1.0486537553141237, |
|
"grad_norm": 14.375308639465139, |
|
"learning_rate": 4.817215939055984e-06, |
|
"logits": -0.8692090511322021, |
|
"logps": -74.66992950439453, |
|
"loss": 0.3467, |
|
"objective": 0.3649583160877228, |
|
"ranking_idealized": 0.5791666507720947, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.5625, |
|
"regularize": 0.3649583160877228, |
|
"step": 185 |
|
}, |
|
{ |
|
"dpo_loss": 0.5738885998725891, |
|
"epoch": 1.076995748700992, |
|
"grad_norm": 14.51613583082745, |
|
"learning_rate": 4.798150758954164e-06, |
|
"logits": -0.9525903463363647, |
|
"logps": -77.0860824584961, |
|
"loss": 0.326, |
|
"objective": 0.28388112783432007, |
|
"ranking_idealized": 0.625, |
|
"ranking_idealized_expo": 0.5541666746139526, |
|
"ranking_simple": 0.6166666746139526, |
|
"regularize": 0.28388112783432007, |
|
"step": 190 |
|
}, |
|
{ |
|
"dpo_loss": 0.5714147090911865, |
|
"epoch": 1.10533774208786, |
|
"grad_norm": 14.487338275298626, |
|
"learning_rate": 4.778181609576832e-06, |
|
"logits": -1.04482901096344, |
|
"logps": -76.55809020996094, |
|
"loss": 0.3359, |
|
"objective": 0.3480500280857086, |
|
"ranking_idealized": 0.6333333253860474, |
|
"ranking_idealized_expo": 0.5666666626930237, |
|
"ranking_simple": 0.625, |
|
"regularize": 0.3480500280857086, |
|
"step": 195 |
|
}, |
|
{ |
|
"dpo_loss": 0.5680884122848511, |
|
"epoch": 1.1336797354747283, |
|
"grad_norm": 13.467634775423322, |
|
"learning_rate": 4.757316345716554e-06, |
|
"logits": -1.0929234027862549, |
|
"logps": -74.94612121582031, |
|
"loss": 0.3365, |
|
"objective": 0.3373413681983948, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.5916666388511658, |
|
"regularize": 0.3373413681983948, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.1336797354747283, |
|
"eval_dpo_loss": 0.7045410871505737, |
|
"eval_logits": -1.081216812133789, |
|
"eval_logps": -80.30853271484375, |
|
"eval_loss": 0.5509735941886902, |
|
"eval_objective": 0.5435125827789307, |
|
"eval_ranking_idealized": 0.5888429880142212, |
|
"eval_ranking_idealized_expo": 0.5103305578231812, |
|
"eval_ranking_simple": 0.5134297609329224, |
|
"eval_regularize": 0.5435125827789307, |
|
"eval_runtime": 306.4337, |
|
"eval_samples_per_second": 18.895, |
|
"eval_steps_per_second": 0.79, |
|
"step": 200 |
|
}, |
|
{ |
|
"dpo_loss": 0.5709416270256042, |
|
"epoch": 1.1620217288615966, |
|
"grad_norm": 14.362316497314021, |
|
"learning_rate": 4.735563174649278e-06, |
|
"logits": -0.9582468867301941, |
|
"logps": -76.56965637207031, |
|
"loss": 0.3235, |
|
"objective": 0.3568976819515228, |
|
"ranking_idealized": 0.5708333253860474, |
|
"ranking_idealized_expo": 0.48750001192092896, |
|
"ranking_simple": 0.5541666746139526, |
|
"regularize": 0.3568976819515228, |
|
"step": 205 |
|
}, |
|
{ |
|
"dpo_loss": 0.5406769514083862, |
|
"epoch": 1.1903637222484649, |
|
"grad_norm": 14.379661109842203, |
|
"learning_rate": 4.7129306529060415e-06, |
|
"logits": -0.8170334696769714, |
|
"logps": -77.32323455810547, |
|
"loss": 0.3299, |
|
"objective": 0.32689639925956726, |
|
"ranking_idealized": 0.5874999761581421, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.5708333253860474, |
|
"regularize": 0.32689639925956726, |
|
"step": 210 |
|
}, |
|
{ |
|
"dpo_loss": 0.579511284828186, |
|
"epoch": 1.2187057156353331, |
|
"grad_norm": 13.661507608183136, |
|
"learning_rate": 4.68942768290728e-06, |
|
"logits": -0.7792264223098755, |
|
"logps": -76.88572692871094, |
|
"loss": 0.3051, |
|
"objective": 0.3139689862728119, |
|
"ranking_idealized": 0.5625, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.5708333253860474, |
|
"regularize": 0.3139689862728119, |
|
"step": 215 |
|
}, |
|
{ |
|
"dpo_loss": 0.5531005263328552, |
|
"epoch": 1.2470477090222012, |
|
"grad_norm": 13.52084314453953, |
|
"learning_rate": 4.665063509461098e-06, |
|
"logits": -0.6951576471328735, |
|
"logps": -75.48503875732422, |
|
"loss": 0.3088, |
|
"objective": 0.29014942049980164, |
|
"ranking_idealized": 0.6541666388511658, |
|
"ranking_idealized_expo": 0.6041666865348816, |
|
"ranking_simple": 0.6583333611488342, |
|
"regularize": 0.29014942049980164, |
|
"step": 220 |
|
}, |
|
{ |
|
"dpo_loss": 0.5622245073318481, |
|
"epoch": 1.2753897024090695, |
|
"grad_norm": 13.599594203183662, |
|
"learning_rate": 4.639847716126855e-06, |
|
"logits": -0.8319641351699829, |
|
"logps": -76.13066101074219, |
|
"loss": 0.3046, |
|
"objective": 0.2946093678474426, |
|
"ranking_idealized": 0.612500011920929, |
|
"ranking_idealized_expo": 0.5291666388511658, |
|
"ranking_simple": 0.6166666746139526, |
|
"regularize": 0.2946093678474426, |
|
"step": 225 |
|
}, |
|
{ |
|
"dpo_loss": 0.5498285889625549, |
|
"epoch": 1.3037316957959377, |
|
"grad_norm": 13.631491302105779, |
|
"learning_rate": 4.613790221445511e-06, |
|
"logits": -0.8351464867591858, |
|
"logps": -76.21858978271484, |
|
"loss": 0.2907, |
|
"objective": 0.2966916263103485, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.2966916263103485, |
|
"step": 230 |
|
}, |
|
{ |
|
"dpo_loss": 0.5353298187255859, |
|
"epoch": 1.3320736891828058, |
|
"grad_norm": 13.968269928794491, |
|
"learning_rate": 4.586901275038201e-06, |
|
"logits": -0.9569566249847412, |
|
"logps": -73.50439453125, |
|
"loss": 0.2932, |
|
"objective": 0.282253235578537, |
|
"ranking_idealized": 0.6041666865348816, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.5916666388511658, |
|
"regularize": 0.282253235578537, |
|
"step": 235 |
|
}, |
|
{ |
|
"dpo_loss": 0.5563216805458069, |
|
"epoch": 1.360415682569674, |
|
"grad_norm": 13.29819869933344, |
|
"learning_rate": 4.559191453574582e-06, |
|
"logits": -0.8645553588867188, |
|
"logps": -75.53768920898438, |
|
"loss": 0.2903, |
|
"objective": 0.3077048361301422, |
|
"ranking_idealized": 0.5541666746139526, |
|
"ranking_idealized_expo": 0.4791666567325592, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.3077048361301422, |
|
"step": 240 |
|
}, |
|
{ |
|
"dpo_loss": 0.5537334084510803, |
|
"epoch": 1.3887576759565423, |
|
"grad_norm": 12.875135429729372, |
|
"learning_rate": 4.530671656612544e-06, |
|
"logits": -0.850976824760437, |
|
"logps": -74.48047637939453, |
|
"loss": 0.2813, |
|
"objective": 0.28993913531303406, |
|
"ranking_idealized": 0.5916666388511658, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.5708333253860474, |
|
"regularize": 0.28993913531303406, |
|
"step": 245 |
|
}, |
|
{ |
|
"dpo_loss": 0.5563530325889587, |
|
"epoch": 1.4170996693434104, |
|
"grad_norm": 13.133098253949312, |
|
"learning_rate": 4.501353102310901e-06, |
|
"logits": -0.876754879951477, |
|
"logps": -74.63734436035156, |
|
"loss": 0.2986, |
|
"objective": 0.31398385763168335, |
|
"ranking_idealized": 0.574999988079071, |
|
"ranking_idealized_expo": 0.47083333134651184, |
|
"ranking_simple": 0.5625, |
|
"regularize": 0.31398385763168335, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.4170996693434104, |
|
"eval_dpo_loss": 0.7114357352256775, |
|
"eval_logits": -0.9739663004875183, |
|
"eval_logps": -79.86080932617188, |
|
"eval_loss": 0.5600165128707886, |
|
"eval_objective": 0.5589736700057983, |
|
"eval_ranking_idealized": 0.5888429880142212, |
|
"eval_ranking_idealized_expo": 0.5103305578231812, |
|
"eval_ranking_simple": 0.5216942429542542, |
|
"eval_regularize": 0.5589736700057983, |
|
"eval_runtime": 311.8199, |
|
"eval_samples_per_second": 18.568, |
|
"eval_steps_per_second": 0.776, |
|
"step": 250 |
|
}, |
|
{ |
|
"dpo_loss": 0.5523940324783325, |
|
"epoch": 1.4454416627302786, |
|
"grad_norm": 13.234580872068834, |
|
"learning_rate": 4.4712473230167775e-06, |
|
"logits": -0.8489291071891785, |
|
"logps": -75.83350372314453, |
|
"loss": 0.2822, |
|
"objective": 0.26451078057289124, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5458333492279053, |
|
"ranking_simple": 0.625, |
|
"regularize": 0.26451075077056885, |
|
"step": 255 |
|
}, |
|
{ |
|
"dpo_loss": 0.5551812052726746, |
|
"epoch": 1.473783656117147, |
|
"grad_norm": 13.300182649317502, |
|
"learning_rate": 4.440366160729393e-06, |
|
"logits": -0.7993748784065247, |
|
"logps": -76.90797424316406, |
|
"loss": 0.2899, |
|
"objective": 0.3137137293815613, |
|
"ranking_idealized": 0.5666666626930237, |
|
"ranking_idealized_expo": 0.48750001192092896, |
|
"ranking_simple": 0.5541666746139526, |
|
"regularize": 0.3137137293815613, |
|
"step": 260 |
|
}, |
|
{ |
|
"dpo_loss": 0.5330025553703308, |
|
"epoch": 1.5021256495040152, |
|
"grad_norm": 14.207525692908433, |
|
"learning_rate": 4.4087217624420595e-06, |
|
"logits": -0.7082098722457886, |
|
"logps": -76.11457824707031, |
|
"loss": 0.2846, |
|
"objective": 0.25817468762397766, |
|
"ranking_idealized": 0.574999988079071, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.5916666388511658, |
|
"regularize": 0.25817468762397766, |
|
"step": 265 |
|
}, |
|
{ |
|
"dpo_loss": 0.5525502562522888, |
|
"epoch": 1.5304676428908834, |
|
"grad_norm": 12.70772023655561, |
|
"learning_rate": 4.376326575364206e-06, |
|
"logits": -0.7001979947090149, |
|
"logps": -76.76708221435547, |
|
"loss": 0.2772, |
|
"objective": 0.2816409468650818, |
|
"ranking_idealized": 0.5791666507720947, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.2816409468650818, |
|
"step": 270 |
|
}, |
|
{ |
|
"dpo_loss": 0.5756404399871826, |
|
"epoch": 1.5588096362777515, |
|
"grad_norm": 12.63088418395231, |
|
"learning_rate": 4.34319334202531e-06, |
|
"logits": -0.7429192066192627, |
|
"logps": -76.83454132080078, |
|
"loss": 0.2784, |
|
"objective": 0.2739499509334564, |
|
"ranking_idealized": 0.6208333373069763, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.6041666865348816, |
|
"regularize": 0.2739499509334564, |
|
"step": 275 |
|
}, |
|
{ |
|
"dpo_loss": 0.5417617559432983, |
|
"epoch": 1.5871516296646198, |
|
"grad_norm": 12.825009100783165, |
|
"learning_rate": 4.309335095262675e-06, |
|
"logits": -0.7624490261077881, |
|
"logps": -73.92798614501953, |
|
"loss": 0.2686, |
|
"objective": 0.2632053792476654, |
|
"ranking_idealized": 0.574999988079071, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.2632053792476654, |
|
"step": 280 |
|
}, |
|
{ |
|
"dpo_loss": 0.5646030306816101, |
|
"epoch": 1.615493623051488, |
|
"grad_norm": 13.21737932177293, |
|
"learning_rate": 4.274765153095008e-06, |
|
"logits": -0.8555312752723694, |
|
"logps": -74.2479019165039, |
|
"loss": 0.2734, |
|
"objective": 0.28485408425331116, |
|
"ranking_idealized": 0.5874999761581421, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5958333611488342, |
|
"regularize": 0.28485408425331116, |
|
"step": 285 |
|
}, |
|
{ |
|
"dpo_loss": 0.5464602112770081, |
|
"epoch": 1.643835616438356, |
|
"grad_norm": 12.446912499924908, |
|
"learning_rate": 4.239497113483819e-06, |
|
"logits": -0.8550429940223694, |
|
"logps": -72.50788879394531, |
|
"loss": 0.2488, |
|
"objective": 0.24861304461956024, |
|
"ranking_idealized": 0.612500011920929, |
|
"ranking_idealized_expo": 0.5625, |
|
"ranking_simple": 0.6416666507720947, |
|
"regularize": 0.24861304461956024, |
|
"step": 290 |
|
}, |
|
{ |
|
"dpo_loss": 0.5289739370346069, |
|
"epoch": 1.6721776098252243, |
|
"grad_norm": 12.298985319390749, |
|
"learning_rate": 4.203544848984729e-06, |
|
"logits": -0.7495914101600647, |
|
"logps": -71.680908203125, |
|
"loss": 0.2614, |
|
"objective": 0.24754279851913452, |
|
"ranking_idealized": 0.5791666507720947, |
|
"ranking_idealized_expo": 0.4791666567325592, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.24754279851913452, |
|
"step": 295 |
|
}, |
|
{ |
|
"dpo_loss": 0.5397422909736633, |
|
"epoch": 1.7005196032120926, |
|
"grad_norm": 12.732726818745434, |
|
"learning_rate": 4.16692250129073e-06, |
|
"logits": -0.7598840594291687, |
|
"logps": -72.7435531616211, |
|
"loss": 0.2571, |
|
"objective": 0.26093509793281555, |
|
"ranking_idealized": 0.6583333611488342, |
|
"ranking_idealized_expo": 0.574999988079071, |
|
"ranking_simple": 0.637499988079071, |
|
"regularize": 0.26093509793281555, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.7005196032120926, |
|
"eval_dpo_loss": 0.714857280254364, |
|
"eval_logits": -0.802342414855957, |
|
"eval_logps": -77.65937042236328, |
|
"eval_loss": 0.5773638486862183, |
|
"eval_objective": 0.5723886489868164, |
|
"eval_ranking_idealized": 0.5888429880142212, |
|
"eval_ranking_idealized_expo": 0.5103305578231812, |
|
"eval_ranking_simple": 0.5216942429542542, |
|
"eval_regularize": 0.5723886489868164, |
|
"eval_runtime": 310.9204, |
|
"eval_samples_per_second": 18.622, |
|
"eval_steps_per_second": 0.778, |
|
"step": 300 |
|
}, |
|
{ |
|
"dpo_loss": 0.544061541557312, |
|
"epoch": 1.7288615965989607, |
|
"grad_norm": 13.110616566501927, |
|
"learning_rate": 4.129644475669617e-06, |
|
"logits": -0.7123271822929382, |
|
"logps": -74.22494506835938, |
|
"loss": 0.2603, |
|
"objective": 0.26340511441230774, |
|
"ranking_idealized": 0.5625, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.5625, |
|
"regularize": 0.26340508460998535, |
|
"step": 305 |
|
}, |
|
{ |
|
"dpo_loss": 0.5413281917572021, |
|
"epoch": 1.7572035899858292, |
|
"grad_norm": 12.898498535434477, |
|
"learning_rate": 4.091725435297721e-06, |
|
"logits": -0.7656295895576477, |
|
"logps": -73.33552551269531, |
|
"loss": 0.2607, |
|
"objective": 0.24210144579410553, |
|
"ranking_idealized": 0.6083333492279053, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.5958333611488342, |
|
"regularize": 0.24210144579410553, |
|
"step": 310 |
|
}, |
|
{ |
|
"dpo_loss": 0.5535203218460083, |
|
"epoch": 1.7855455833726972, |
|
"grad_norm": 13.419557597255025, |
|
"learning_rate": 4.053180295492203e-06, |
|
"logits": -0.7532054781913757, |
|
"logps": -74.6802749633789, |
|
"loss": 0.2485, |
|
"objective": 0.2460954487323761, |
|
"ranking_idealized": 0.6041666865348816, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.5791666507720947, |
|
"regularize": 0.2460954487323761, |
|
"step": 315 |
|
}, |
|
{ |
|
"dpo_loss": 0.5523297190666199, |
|
"epoch": 1.8138875767595655, |
|
"grad_norm": 12.79380807435913, |
|
"learning_rate": 4.014024217844167e-06, |
|
"logits": -0.8301442861557007, |
|
"logps": -76.51591491699219, |
|
"loss": 0.2635, |
|
"objective": 0.3148113191127777, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.3148112893104553, |
|
"step": 320 |
|
}, |
|
{ |
|
"dpo_loss": 0.532356858253479, |
|
"epoch": 1.8422295701464337, |
|
"grad_norm": 13.365547750277852, |
|
"learning_rate": 3.974272604254906e-06, |
|
"logits": -0.8250628709793091, |
|
"logps": -77.59033203125, |
|
"loss": 0.2519, |
|
"objective": 0.25422483682632446, |
|
"ranking_idealized": 0.6333333253860474, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.6208333373069763, |
|
"regularize": 0.25422483682632446, |
|
"step": 325 |
|
}, |
|
{ |
|
"dpo_loss": 0.5476362705230713, |
|
"epoch": 1.8705715635333018, |
|
"grad_norm": 12.802067588796056, |
|
"learning_rate": 3.933941090877615e-06, |
|
"logits": -0.6167819499969482, |
|
"logps": -74.30210876464844, |
|
"loss": 0.249, |
|
"objective": 0.23240336775779724, |
|
"ranking_idealized": 0.5708333253860474, |
|
"ranking_idealized_expo": 0.4749999940395355, |
|
"ranking_simple": 0.5541666746139526, |
|
"regularize": 0.23240336775779724, |
|
"step": 330 |
|
}, |
|
{ |
|
"dpo_loss": 0.5612020492553711, |
|
"epoch": 1.89891355692017, |
|
"grad_norm": 12.79328820625046, |
|
"learning_rate": 3.893045541966975e-06, |
|
"logits": -0.6927453875541687, |
|
"logps": -73.87529754638672, |
|
"loss": 0.2434, |
|
"objective": 0.23465226590633392, |
|
"ranking_idealized": 0.5916666388511658, |
|
"ranking_idealized_expo": 0.4958333373069763, |
|
"ranking_simple": 0.5874999761581421, |
|
"regularize": 0.23465226590633392, |
|
"step": 335 |
|
}, |
|
{ |
|
"dpo_loss": 0.5674837231636047, |
|
"epoch": 1.9272555503070383, |
|
"grad_norm": 13.023100915665985, |
|
"learning_rate": 3.8516020436389945e-06, |
|
"logits": -0.6162213683128357, |
|
"logps": -76.17884063720703, |
|
"loss": 0.2412, |
|
"objective": 0.2584584355354309, |
|
"ranking_idealized": 0.6416666507720947, |
|
"ranking_idealized_expo": 0.5416666865348816, |
|
"ranking_simple": 0.6291666626930237, |
|
"regularize": 0.2584584355354309, |
|
"step": 340 |
|
}, |
|
{ |
|
"dpo_loss": 0.5505400896072388, |
|
"epoch": 1.9555975436939064, |
|
"grad_norm": 12.686766547892125, |
|
"learning_rate": 3.8096268975436045e-06, |
|
"logits": -0.634881854057312, |
|
"logps": -74.6928939819336, |
|
"loss": 0.227, |
|
"objective": 0.23832601308822632, |
|
"ranking_idealized": 0.574999988079071, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.23832601308822632, |
|
"step": 345 |
|
}, |
|
{ |
|
"dpo_loss": 0.562423825263977, |
|
"epoch": 1.9839395370807746, |
|
"grad_norm": 13.255584593626184, |
|
"learning_rate": 3.767136614452458e-06, |
|
"logits": -0.5817281603813171, |
|
"logps": -75.97590637207031, |
|
"loss": 0.2355, |
|
"objective": 0.23991867899894714, |
|
"ranking_idealized": 0.574999988079071, |
|
"ranking_idealized_expo": 0.48750001192092896, |
|
"ranking_simple": 0.5791666507720947, |
|
"regularize": 0.23991867899894714, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.9839395370807746, |
|
"eval_dpo_loss": 0.7175589203834534, |
|
"eval_logits": -0.7278391122817993, |
|
"eval_logps": -79.45545196533203, |
|
"eval_loss": 0.5796564817428589, |
|
"eval_objective": 0.5735844373703003, |
|
"eval_ranking_idealized": 0.5888429880142212, |
|
"eval_ranking_idealized_expo": 0.5103305578231812, |
|
"eval_ranking_simple": 0.5185950398445129, |
|
"eval_regularize": 0.5735844373703003, |
|
"eval_runtime": 307.9093, |
|
"eval_samples_per_second": 18.804, |
|
"eval_steps_per_second": 0.786, |
|
"step": 350 |
|
}, |
|
{ |
|
"dpo_loss": 0.5313494801521301, |
|
"epoch": 2.012281530467643, |
|
"grad_norm": 12.907638515771302, |
|
"learning_rate": 3.724147907764478e-06, |
|
"logits": -0.5674468874931335, |
|
"logps": -75.11559295654297, |
|
"loss": 0.2316, |
|
"objective": 0.23825272917747498, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.512499988079071, |
|
"ranking_simple": 0.5916666388511658, |
|
"regularize": 0.23825272917747498, |
|
"step": 355 |
|
}, |
|
{ |
|
"dpo_loss": 0.5382583737373352, |
|
"epoch": 2.040623523854511, |
|
"grad_norm": 13.693755833374205, |
|
"learning_rate": 3.6806776869317074e-06, |
|
"logits": -0.6632164120674133, |
|
"logps": -74.61618041992188, |
|
"loss": 0.2199, |
|
"objective": 0.2271762192249298, |
|
"ranking_idealized": 0.6583333611488342, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.6208333373069763, |
|
"regularize": 0.2271762192249298, |
|
"step": 360 |
|
}, |
|
{ |
|
"dpo_loss": 0.547314465045929, |
|
"epoch": 2.0689655172413794, |
|
"grad_norm": 12.48173826929469, |
|
"learning_rate": 3.6367430508080283e-06, |
|
"logits": -0.7491353154182434, |
|
"logps": -77.67138671875, |
|
"loss": 0.2218, |
|
"objective": 0.2287120372056961, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.6000000238418579, |
|
"regularize": 0.2287120372056961, |
|
"step": 365 |
|
}, |
|
{ |
|
"dpo_loss": 0.5396644473075867, |
|
"epoch": 2.0973075106282475, |
|
"grad_norm": 12.712152014920445, |
|
"learning_rate": 3.5923612809233987e-06, |
|
"logits": -0.6872729063034058, |
|
"logps": -75.26972961425781, |
|
"loss": 0.2158, |
|
"objective": 0.20656557381153107, |
|
"ranking_idealized": 0.5708333253860474, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.5625, |
|
"regularize": 0.20656557381153107, |
|
"step": 370 |
|
}, |
|
{ |
|
"dpo_loss": 0.5337859988212585, |
|
"epoch": 2.1256495040151155, |
|
"grad_norm": 12.21493703143298, |
|
"learning_rate": 3.547549834686222e-06, |
|
"logits": -0.732543408870697, |
|
"logps": -76.9386978149414, |
|
"loss": 0.222, |
|
"objective": 0.23369713127613068, |
|
"ranking_idealized": 0.6708333492279053, |
|
"ranking_idealized_expo": 0.5916666388511658, |
|
"ranking_simple": 0.6583333611488342, |
|
"regularize": 0.23369713127613068, |
|
"step": 375 |
|
}, |
|
{ |
|
"dpo_loss": 0.5434145331382751, |
|
"epoch": 2.153991497401984, |
|
"grad_norm": 12.412606280858217, |
|
"learning_rate": 3.5023263385165346e-06, |
|
"logits": -0.6855267286300659, |
|
"logps": -76.3364486694336, |
|
"loss": 0.2038, |
|
"objective": 0.21526171267032623, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5291666388511658, |
|
"ranking_simple": 0.5874999761581421, |
|
"regularize": 0.21526171267032623, |
|
"step": 380 |
|
}, |
|
{ |
|
"dpo_loss": 0.5589515566825867, |
|
"epoch": 2.182333490788852, |
|
"grad_norm": 13.046894193039002, |
|
"learning_rate": 3.4567085809127247e-06, |
|
"logits": -0.6990526914596558, |
|
"logps": -78.23905944824219, |
|
"loss": 0.2127, |
|
"objective": 0.22517748177051544, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.574999988079071, |
|
"regularize": 0.22517748177051544, |
|
"step": 385 |
|
}, |
|
{ |
|
"dpo_loss": 0.5659069418907166, |
|
"epoch": 2.21067548417572, |
|
"grad_norm": 13.492951762790497, |
|
"learning_rate": 3.410714505454486e-06, |
|
"logits": -0.6352434754371643, |
|
"logps": -76.46235656738281, |
|
"loss": 0.2106, |
|
"objective": 0.2098178267478943, |
|
"ranking_idealized": 0.5291666388511658, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.5249999761581421, |
|
"regularize": 0.2098177969455719, |
|
"step": 390 |
|
}, |
|
{ |
|
"dpo_loss": 0.520107626914978, |
|
"epoch": 2.2390174775625886, |
|
"grad_norm": 12.062788566128349, |
|
"learning_rate": 3.364362203744777e-06, |
|
"logits": -0.6107616424560547, |
|
"logps": -78.00432586669922, |
|
"loss": 0.2145, |
|
"objective": 0.21819378435611725, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5916666388511658, |
|
"regularize": 0.21819378435611725, |
|
"step": 395 |
|
}, |
|
{ |
|
"dpo_loss": 0.5222891569137573, |
|
"epoch": 2.2673594709494567, |
|
"grad_norm": 13.243434332838024, |
|
"learning_rate": 3.3176699082935546e-06, |
|
"logits": -0.6634539365768433, |
|
"logps": -78.29851531982422, |
|
"loss": 0.1974, |
|
"objective": 0.19920341670513153, |
|
"ranking_idealized": 0.6666666865348816, |
|
"ranking_idealized_expo": 0.574999988079071, |
|
"ranking_simple": 0.6499999761581421, |
|
"regularize": 0.19920340180397034, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.2673594709494567, |
|
"eval_dpo_loss": 0.715599775314331, |
|
"eval_logits": -0.7596362233161926, |
|
"eval_logps": -81.36702728271484, |
|
"eval_loss": 0.5802313685417175, |
|
"eval_objective": 0.5785107612609863, |
|
"eval_ranking_idealized": 0.5888429880142212, |
|
"eval_ranking_idealized_expo": 0.5103305578231812, |
|
"eval_ranking_simple": 0.5278925895690918, |
|
"eval_regularize": 0.5785107612609863, |
|
"eval_runtime": 305.2997, |
|
"eval_samples_per_second": 18.965, |
|
"eval_steps_per_second": 0.793, |
|
"step": 400 |
|
}, |
|
{ |
|
"dpo_loss": 0.5360192060470581, |
|
"epoch": 2.295701464336325, |
|
"grad_norm": 12.435630926562606, |
|
"learning_rate": 3.2706559853460818e-06, |
|
"logits": -0.7467118501663208, |
|
"logps": -76.52106475830078, |
|
"loss": 0.1969, |
|
"objective": 0.19979530572891235, |
|
"ranking_idealized": 0.6458333134651184, |
|
"ranking_idealized_expo": 0.5583333373069763, |
|
"ranking_simple": 0.637499988079071, |
|
"regularize": 0.19979530572891235, |
|
"step": 405 |
|
}, |
|
{ |
|
"dpo_loss": 0.5409468412399292, |
|
"epoch": 2.324043457723193, |
|
"grad_norm": 11.990944827281423, |
|
"learning_rate": 3.2233389276586325e-06, |
|
"logits": -0.6921295523643494, |
|
"logps": -76.02128601074219, |
|
"loss": 0.1889, |
|
"objective": 0.18640001118183136, |
|
"ranking_idealized": 0.5708333253860474, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.18640001118183136, |
|
"step": 410 |
|
}, |
|
{ |
|
"dpo_loss": 0.5444414615631104, |
|
"epoch": 2.3523854511100613, |
|
"grad_norm": 12.656500632614845, |
|
"learning_rate": 3.1757373472244324e-06, |
|
"logits": -0.6981016993522644, |
|
"logps": -76.54192352294922, |
|
"loss": 0.191, |
|
"objective": 0.19465957581996918, |
|
"ranking_idealized": 0.5958333611488342, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.5791666507720947, |
|
"regularize": 0.19465957581996918, |
|
"step": 415 |
|
}, |
|
{ |
|
"dpo_loss": 0.5401104092597961, |
|
"epoch": 2.3807274444969297, |
|
"grad_norm": 11.869598225301196, |
|
"learning_rate": 3.127869967952698e-06, |
|
"logits": -0.6331411004066467, |
|
"logps": -77.9117202758789, |
|
"loss": 0.1932, |
|
"objective": 0.19117851555347443, |
|
"ranking_idealized": 0.5625, |
|
"ranking_idealized_expo": 0.47083333134651184, |
|
"ranking_simple": 0.5625, |
|
"regularize": 0.19117851555347443, |
|
"step": 420 |
|
}, |
|
{ |
|
"dpo_loss": 0.5331805944442749, |
|
"epoch": 2.409069437883798, |
|
"grad_norm": 12.370377424025024, |
|
"learning_rate": 3.0797556183036582e-06, |
|
"logits": -0.659760057926178, |
|
"logps": -77.06755065917969, |
|
"loss": 0.1996, |
|
"objective": 0.20076175034046173, |
|
"ranking_idealized": 0.5874999761581421, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.20076175034046173, |
|
"step": 425 |
|
}, |
|
{ |
|
"dpo_loss": 0.5397772192955017, |
|
"epoch": 2.4374114312706663, |
|
"grad_norm": 11.643686599023107, |
|
"learning_rate": 3.0314132238824416e-06, |
|
"logits": -0.6464790105819702, |
|
"logps": -77.9500503540039, |
|
"loss": 0.1919, |
|
"objective": 0.19164572656154633, |
|
"ranking_idealized": 0.5958333611488342, |
|
"ranking_idealized_expo": 0.5291666388511658, |
|
"ranking_simple": 0.6000000238418579, |
|
"regularize": 0.19164572656154633, |
|
"step": 430 |
|
}, |
|
{ |
|
"dpo_loss": 0.5243253111839294, |
|
"epoch": 2.4657534246575343, |
|
"grad_norm": 11.712683298007569, |
|
"learning_rate": 2.9828617999947647e-06, |
|
"logits": -0.6973146796226501, |
|
"logps": -76.83717346191406, |
|
"loss": 0.1917, |
|
"objective": 0.20314709842205048, |
|
"ranking_idealized": 0.5874999761581421, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.5958333611488342, |
|
"regularize": 0.20314709842205048, |
|
"step": 435 |
|
}, |
|
{ |
|
"dpo_loss": 0.5402427315711975, |
|
"epoch": 2.4940954180444024, |
|
"grad_norm": 12.183847925432286, |
|
"learning_rate": 2.9341204441673267e-06, |
|
"logits": -0.6815401315689087, |
|
"logps": -76.1678695678711, |
|
"loss": 0.1838, |
|
"objective": 0.17876936495304108, |
|
"ranking_idealized": 0.574999988079071, |
|
"ranking_idealized_expo": 0.5375000238418579, |
|
"ranking_simple": 0.5791666507720947, |
|
"regularize": 0.1787693351507187, |
|
"step": 440 |
|
}, |
|
{ |
|
"dpo_loss": 0.5401654839515686, |
|
"epoch": 2.5224374114312704, |
|
"grad_norm": 11.596641440144525, |
|
"learning_rate": 2.8852083286358647e-06, |
|
"logits": -0.6953208446502686, |
|
"logps": -74.04133605957031, |
|
"loss": 0.1837, |
|
"objective": 0.17748965322971344, |
|
"ranking_idealized": 0.5916666388511658, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.6041666865348816, |
|
"regularize": 0.17748965322971344, |
|
"step": 445 |
|
}, |
|
{ |
|
"dpo_loss": 0.5587974786758423, |
|
"epoch": 2.550779404818139, |
|
"grad_norm": 12.363231902921715, |
|
"learning_rate": 2.8361446928038298e-06, |
|
"logits": -0.7141574621200562, |
|
"logps": -76.00529479980469, |
|
"loss": 0.1787, |
|
"objective": 0.17918486893177032, |
|
"ranking_idealized": 0.5625, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.5416666865348816, |
|
"regularize": 0.17918486893177032, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 2.550779404818139, |
|
"eval_dpo_loss": 0.716120183467865, |
|
"eval_logits": -0.7106173038482666, |
|
"eval_logps": -80.80033111572266, |
|
"eval_loss": 0.5830134749412537, |
|
"eval_objective": 0.5798959732055664, |
|
"eval_ranking_idealized": 0.5888429880142212, |
|
"eval_ranking_idealized_expo": 0.5103305578231812, |
|
"eval_ranking_simple": 0.5227272510528564, |
|
"eval_regularize": 0.5798959732055664, |
|
"eval_runtime": 310.4005, |
|
"eval_samples_per_second": 18.653, |
|
"eval_steps_per_second": 0.78, |
|
"step": 450 |
|
}, |
|
{ |
|
"dpo_loss": 0.5358783006668091, |
|
"epoch": 2.579121398205007, |
|
"grad_norm": 12.672287662194814, |
|
"learning_rate": 2.7869488356746344e-06, |
|
"logits": -0.651078462600708, |
|
"logps": -76.96314239501953, |
|
"loss": 0.1798, |
|
"objective": 0.18865393102169037, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.5958333611488342, |
|
"regularize": 0.18865393102169037, |
|
"step": 455 |
|
}, |
|
{ |
|
"dpo_loss": 0.525675356388092, |
|
"epoch": 2.6074633915918755, |
|
"grad_norm": 11.750098983866883, |
|
"learning_rate": 2.7376401082604563e-06, |
|
"logits": -0.6694878339767456, |
|
"logps": -76.94908905029297, |
|
"loss": 0.1726, |
|
"objective": 0.17671962082386017, |
|
"ranking_idealized": 0.6208333373069763, |
|
"ranking_idealized_expo": 0.512499988079071, |
|
"ranking_simple": 0.6083333492279053, |
|
"regularize": 0.17671962082386017, |
|
"step": 460 |
|
}, |
|
{ |
|
"dpo_loss": 0.5402486324310303, |
|
"epoch": 2.6358053849787435, |
|
"grad_norm": 11.832013815001902, |
|
"learning_rate": 2.6882379059705953e-06, |
|
"logits": -0.5349324345588684, |
|
"logps": -77.00886535644531, |
|
"loss": 0.1673, |
|
"objective": 0.15668810904026031, |
|
"ranking_idealized": 0.5625, |
|
"ranking_idealized_expo": 0.4958333373069763, |
|
"ranking_simple": 0.5541666746139526, |
|
"regularize": 0.15668810904026031, |
|
"step": 465 |
|
}, |
|
{ |
|
"dpo_loss": 0.5375062823295593, |
|
"epoch": 2.6641473783656116, |
|
"grad_norm": 12.335063217908251, |
|
"learning_rate": 2.6387616609823506e-06, |
|
"logits": -0.5760343670845032, |
|
"logps": -76.343994140625, |
|
"loss": 0.1727, |
|
"objective": 0.15891402959823608, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5541666746139526, |
|
"ranking_simple": 0.5958333611488342, |
|
"regularize": 0.15891402959823608, |
|
"step": 470 |
|
}, |
|
{ |
|
"dpo_loss": 0.5359907150268555, |
|
"epoch": 2.69248937175248, |
|
"grad_norm": 12.267574887608895, |
|
"learning_rate": 2.5892308345974517e-06, |
|
"logits": -0.6134538054466248, |
|
"logps": -75.63439178466797, |
|
"loss": 0.1674, |
|
"objective": 0.15188990533351898, |
|
"ranking_idealized": 0.6083333492279053, |
|
"ranking_idealized_expo": 0.4958333373069763, |
|
"ranking_simple": 0.6083333492279053, |
|
"regularize": 0.15188990533351898, |
|
"step": 475 |
|
}, |
|
{ |
|
"dpo_loss": 0.5327687859535217, |
|
"epoch": 2.720831365139348, |
|
"grad_norm": 12.442899229326, |
|
"learning_rate": 2.53966490958702e-06, |
|
"logits": -0.6768204569816589, |
|
"logps": -74.73170471191406, |
|
"loss": 0.1658, |
|
"objective": 0.1598203480243683, |
|
"ranking_idealized": 0.5874999761581421, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.5916666388511658, |
|
"regularize": 0.1598203480243683, |
|
"step": 480 |
|
}, |
|
{ |
|
"dpo_loss": 0.5352267622947693, |
|
"epoch": 2.7491733585262166, |
|
"grad_norm": 11.94503762472298, |
|
"learning_rate": 2.490083382528097e-06, |
|
"logits": -0.6315467357635498, |
|
"logps": -77.08844757080078, |
|
"loss": 0.1607, |
|
"objective": 0.14373879134655, |
|
"ranking_idealized": 0.6041666865348816, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.574999988079071, |
|
"regularize": 0.14373879134655, |
|
"step": 485 |
|
}, |
|
{ |
|
"dpo_loss": 0.5155076384544373, |
|
"epoch": 2.7775153519130846, |
|
"grad_norm": 11.88965868362973, |
|
"learning_rate": 2.440505756134732e-06, |
|
"logits": -0.588857889175415, |
|
"logps": -75.61197662353516, |
|
"loss": 0.1625, |
|
"objective": 0.1591319590806961, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.4749999940395355, |
|
"ranking_simple": 0.574999988079071, |
|
"regularize": 0.1591319590806961, |
|
"step": 490 |
|
}, |
|
{ |
|
"dpo_loss": 0.5332462191581726, |
|
"epoch": 2.8058573452999527, |
|
"grad_norm": 12.56886938049473, |
|
"learning_rate": 2.3909515315866606e-06, |
|
"logits": -0.6140356063842773, |
|
"logps": -74.00759887695312, |
|
"loss": 0.1536, |
|
"objective": 0.14011286199092865, |
|
"ranking_idealized": 0.5708333253860474, |
|
"ranking_idealized_expo": 0.4791666567325592, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.14011286199092865, |
|
"step": 495 |
|
}, |
|
{ |
|
"dpo_loss": 0.5316342711448669, |
|
"epoch": 2.8341993386868207, |
|
"grad_norm": 13.210636360710986, |
|
"learning_rate": 2.341440200858589e-06, |
|
"logits": -0.6548908352851868, |
|
"logps": -73.46178436279297, |
|
"loss": 0.1582, |
|
"objective": 0.16330239176750183, |
|
"ranking_idealized": 0.612500011920929, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.6000000238418579, |
|
"regularize": 0.16330239176750183, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.8341993386868207, |
|
"eval_dpo_loss": 0.7177305817604065, |
|
"eval_logits": -0.7271575927734375, |
|
"eval_logps": -80.30960845947266, |
|
"eval_loss": 0.5835670828819275, |
|
"eval_objective": 0.5800158977508545, |
|
"eval_ranking_idealized": 0.5888429880142212, |
|
"eval_ranking_idealized_expo": 0.5103305578231812, |
|
"eval_ranking_simple": 0.5175619721412659, |
|
"eval_regularize": 0.5800158977508545, |
|
"eval_runtime": 305.7617, |
|
"eval_samples_per_second": 18.936, |
|
"eval_steps_per_second": 0.791, |
|
"step": 500 |
|
}, |
|
{ |
|
"dpo_loss": 0.5332918763160706, |
|
"epoch": 2.862541332073689, |
|
"grad_norm": 12.029588140120659, |
|
"learning_rate": 2.2919912390530945e-06, |
|
"logits": -0.6230663061141968, |
|
"logps": -75.35047912597656, |
|
"loss": 0.1537, |
|
"objective": 0.1514635533094406, |
|
"ranking_idealized": 0.6041666865348816, |
|
"ranking_idealized_expo": 0.512499988079071, |
|
"ranking_simple": 0.5958333611488342, |
|
"regularize": 0.1514635533094406, |
|
"step": 505 |
|
}, |
|
{ |
|
"dpo_loss": 0.5396894216537476, |
|
"epoch": 2.8908833254605573, |
|
"grad_norm": 12.137559628481368, |
|
"learning_rate": 2.242624096740164e-06, |
|
"logits": -0.5799409747123718, |
|
"logps": -76.0989761352539, |
|
"loss": 0.1479, |
|
"objective": 0.135562464594841, |
|
"ranking_idealized": 0.612500011920929, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.6041666865348816, |
|
"regularize": 0.135562464594841, |
|
"step": 510 |
|
}, |
|
{ |
|
"dpo_loss": 0.5431153178215027, |
|
"epoch": 2.9192253188474258, |
|
"grad_norm": 12.661269374784712, |
|
"learning_rate": 2.193358192306384e-06, |
|
"logits": -0.6524708867073059, |
|
"logps": -75.5029067993164, |
|
"loss": 0.1493, |
|
"objective": 0.15468096733093262, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.4791666567325592, |
|
"ranking_simple": 0.5708333253860474, |
|
"regularize": 0.15468096733093262, |
|
"step": 515 |
|
}, |
|
{ |
|
"dpo_loss": 0.5469124913215637, |
|
"epoch": 2.947567312234294, |
|
"grad_norm": 11.884820373737863, |
|
"learning_rate": 2.1442129043167877e-06, |
|
"logits": -0.599845290184021, |
|
"logps": -77.28815460205078, |
|
"loss": 0.1425, |
|
"objective": 0.1461026966571808, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.5916666388511658, |
|
"regularize": 0.1461026966571808, |
|
"step": 520 |
|
}, |
|
{ |
|
"dpo_loss": 0.524174690246582, |
|
"epoch": 2.975909305621162, |
|
"grad_norm": 12.08259988276891, |
|
"learning_rate": 2.0952075638923656e-06, |
|
"logits": -0.5986518859863281, |
|
"logps": -76.06282806396484, |
|
"loss": 0.1444, |
|
"objective": 0.14290857315063477, |
|
"ranking_idealized": 0.6499999761581421, |
|
"ranking_idealized_expo": 0.5666666626930237, |
|
"ranking_simple": 0.6333333253860474, |
|
"regularize": 0.14290857315063477, |
|
"step": 525 |
|
}, |
|
{ |
|
"dpo_loss": 0.5151727199554443, |
|
"epoch": 3.0042512990080303, |
|
"grad_norm": 12.395048106432046, |
|
"learning_rate": 2.046361447106244e-06, |
|
"logits": -0.6094143986701965, |
|
"logps": -75.57567596435547, |
|
"loss": 0.1418, |
|
"objective": 0.14209994673728943, |
|
"ranking_idealized": 0.5916666388511658, |
|
"ranking_idealized_expo": 0.512499988079071, |
|
"ranking_simple": 0.5958333611488342, |
|
"regularize": 0.14209994673728943, |
|
"step": 530 |
|
}, |
|
{ |
|
"dpo_loss": 0.5413444638252258, |
|
"epoch": 3.0325932923948984, |
|
"grad_norm": 12.113971197675227, |
|
"learning_rate": 1.997693767401503e-06, |
|
"logits": -0.6318843960762024, |
|
"logps": -77.62167358398438, |
|
"loss": 0.1364, |
|
"objective": 0.13606974482536316, |
|
"ranking_idealized": 0.612500011920929, |
|
"ranking_idealized_expo": 0.5416666865348816, |
|
"ranking_simple": 0.6166666746139526, |
|
"regularize": 0.13606974482536316, |
|
"step": 535 |
|
}, |
|
{ |
|
"dpo_loss": 0.5392124056816101, |
|
"epoch": 3.0609352857817664, |
|
"grad_norm": 12.624676506046614, |
|
"learning_rate": 1.9492236680336486e-06, |
|
"logits": -0.6600247621536255, |
|
"logps": -75.87137603759766, |
|
"loss": 0.1305, |
|
"objective": 0.12727196514606476, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5375000238418579, |
|
"ranking_simple": 0.6041666865348816, |
|
"regularize": 0.12727196514606476, |
|
"step": 540 |
|
}, |
|
{ |
|
"dpo_loss": 0.5333729982376099, |
|
"epoch": 3.089277279168635, |
|
"grad_norm": 11.94747900474122, |
|
"learning_rate": 1.9009702145406728e-06, |
|
"logits": -0.6088955402374268, |
|
"logps": -76.96524810791016, |
|
"loss": 0.1277, |
|
"objective": 0.12902715802192688, |
|
"ranking_idealized": 0.5625, |
|
"ranking_idealized_expo": 0.49166667461395264, |
|
"ranking_simple": 0.5583333373069763, |
|
"regularize": 0.12902715802192688, |
|
"step": 545 |
|
}, |
|
{ |
|
"dpo_loss": 0.5270958542823792, |
|
"epoch": 3.117619272555503, |
|
"grad_norm": 12.851758065750987, |
|
"learning_rate": 1.852952387243698e-06, |
|
"logits": -0.5018327832221985, |
|
"logps": -77.49250030517578, |
|
"loss": 0.1257, |
|
"objective": 0.1227855458855629, |
|
"ranking_idealized": 0.6208333373069763, |
|
"ranking_idealized_expo": 0.5458333492279053, |
|
"ranking_simple": 0.6083333492279053, |
|
"regularize": 0.1227855458855629, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 3.117619272555503, |
|
"eval_dpo_loss": 0.7177569270133972, |
|
"eval_logits": -0.6680934429168701, |
|
"eval_logps": -80.876708984375, |
|
"eval_loss": 0.5853214859962463, |
|
"eval_objective": 0.581609308719635, |
|
"eval_ranking_idealized": 0.5888429880142212, |
|
"eval_ranking_idealized_expo": 0.5103305578231812, |
|
"eval_ranking_simple": 0.5237603187561035, |
|
"eval_regularize": 0.581609308719635, |
|
"eval_runtime": 302.9624, |
|
"eval_samples_per_second": 19.111, |
|
"eval_steps_per_second": 0.799, |
|
"step": 550 |
|
}, |
|
{ |
|
"dpo_loss": 0.520027756690979, |
|
"epoch": 3.1459612659423715, |
|
"grad_norm": 12.026540658203501, |
|
"learning_rate": 1.8051890737811395e-06, |
|
"logits": -0.5085936188697815, |
|
"logps": -77.27413940429688, |
|
"loss": 0.1298, |
|
"objective": 0.13011744618415833, |
|
"ranking_idealized": 0.637499988079071, |
|
"ranking_idealized_expo": 0.550000011920929, |
|
"ranking_simple": 0.6291666626930237, |
|
"regularize": 0.13011744618415833, |
|
"step": 555 |
|
}, |
|
{ |
|
"dpo_loss": 0.5274783372879028, |
|
"epoch": 3.1743032593292395, |
|
"grad_norm": 12.49570560811601, |
|
"learning_rate": 1.7576990616793139e-06, |
|
"logits": -0.5597764253616333, |
|
"logps": -74.51499938964844, |
|
"loss": 0.1253, |
|
"objective": 0.12430194020271301, |
|
"ranking_idealized": 0.574999988079071, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.5708333253860474, |
|
"regularize": 0.12430194020271301, |
|
"step": 560 |
|
}, |
|
{ |
|
"dpo_loss": 0.5323359370231628, |
|
"epoch": 3.2026452527161076, |
|
"grad_norm": 12.00538487916689, |
|
"learning_rate": 1.7105010309624381e-06, |
|
"logits": -0.5895612835884094, |
|
"logps": -76.143310546875, |
|
"loss": 0.1215, |
|
"objective": 0.13054049015045166, |
|
"ranking_idealized": 0.6416666507720947, |
|
"ranking_idealized_expo": 0.5458333492279053, |
|
"ranking_simple": 0.6416666507720947, |
|
"regularize": 0.13054047524929047, |
|
"step": 565 |
|
}, |
|
{ |
|
"dpo_loss": 0.5244373083114624, |
|
"epoch": 3.230987246102976, |
|
"grad_norm": 11.706295916540721, |
|
"learning_rate": 1.6636135468049122e-06, |
|
"logits": -0.5602142810821533, |
|
"logps": -75.6644515991211, |
|
"loss": 0.1165, |
|
"objective": 0.10757040977478027, |
|
"ranking_idealized": 0.5708333253860474, |
|
"ranking_idealized_expo": 0.4791666567325592, |
|
"ranking_simple": 0.574999988079071, |
|
"regularize": 0.10757040977478027, |
|
"step": 570 |
|
}, |
|
{ |
|
"dpo_loss": 0.5240556001663208, |
|
"epoch": 3.259329239489844, |
|
"grad_norm": 12.017586482980144, |
|
"learning_rate": 1.617055052228768e-06, |
|
"logits": -0.6023640036582947, |
|
"logps": -76.43074798583984, |
|
"loss": 0.1137, |
|
"objective": 0.11454036831855774, |
|
"ranking_idealized": 0.6041666865348816, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.6166666746139526, |
|
"regularize": 0.11454036086797714, |
|
"step": 575 |
|
}, |
|
{ |
|
"dpo_loss": 0.5436674952507019, |
|
"epoch": 3.287671232876712, |
|
"grad_norm": 12.849990562743383, |
|
"learning_rate": 1.5708438608491816e-06, |
|
"logits": -0.6143894195556641, |
|
"logps": -76.68331909179688, |
|
"loss": 0.1196, |
|
"objective": 0.11335032433271408, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5541666746139526, |
|
"ranking_simple": 0.6166666746139526, |
|
"regularize": 0.11335031688213348, |
|
"step": 580 |
|
}, |
|
{ |
|
"dpo_loss": 0.5411363244056702, |
|
"epoch": 3.3160132262635806, |
|
"grad_norm": 12.545535124570625, |
|
"learning_rate": 1.524998149670871e-06, |
|
"logits": -0.6247425675392151, |
|
"logps": -77.41455841064453, |
|
"loss": 0.113, |
|
"objective": 0.11495224386453629, |
|
"ranking_idealized": 0.625, |
|
"ranking_idealized_expo": 0.5291666388511658, |
|
"ranking_simple": 0.612500011920929, |
|
"regularize": 0.11495222896337509, |
|
"step": 585 |
|
}, |
|
{ |
|
"dpo_loss": 0.5388593077659607, |
|
"epoch": 3.3443552196504487, |
|
"grad_norm": 12.180484930435734, |
|
"learning_rate": 1.479535951938243e-06, |
|
"logits": -0.6511493921279907, |
|
"logps": -77.2118911743164, |
|
"loss": 0.1101, |
|
"objective": 0.11349861323833466, |
|
"ranking_idealized": 0.5874999761581421, |
|
"ranking_idealized_expo": 0.49166667461395264, |
|
"ranking_simple": 0.5874999761581421, |
|
"regularize": 0.11349860578775406, |
|
"step": 590 |
|
}, |
|
{ |
|
"dpo_loss": 0.5339847207069397, |
|
"epoch": 3.372697213037317, |
|
"grad_norm": 12.019912229417695, |
|
"learning_rate": 1.43447515004208e-06, |
|
"logits": -0.5625333189964294, |
|
"logps": -76.55901336669922, |
|
"loss": 0.1063, |
|
"objective": 0.09969887137413025, |
|
"ranking_idealized": 0.5958333611488342, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.5916666388511658, |
|
"regularize": 0.09969887137413025, |
|
"step": 595 |
|
}, |
|
{ |
|
"dpo_loss": 0.5057954788208008, |
|
"epoch": 3.4010392064241852, |
|
"grad_norm": 12.324261804726659, |
|
"learning_rate": 1.3898334684855647e-06, |
|
"logits": -0.5732893347740173, |
|
"logps": -76.14974975585938, |
|
"loss": 0.1018, |
|
"objective": 0.10744435340166092, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.6000000238418579, |
|
"regularize": 0.10744435340166092, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 3.4010392064241852, |
|
"eval_dpo_loss": 0.7155191898345947, |
|
"eval_logits": -0.6519966721534729, |
|
"eval_logps": -80.26307678222656, |
|
"eval_loss": 0.5869894027709961, |
|
"eval_objective": 0.5793389678001404, |
|
"eval_ranking_idealized": 0.5888429880142212, |
|
"eval_ranking_idealized_expo": 0.5103305578231812, |
|
"eval_ranking_simple": 0.5227272510528564, |
|
"eval_regularize": 0.5793389678001404, |
|
"eval_runtime": 304.5422, |
|
"eval_samples_per_second": 19.012, |
|
"eval_steps_per_second": 0.795, |
|
"step": 600 |
|
}, |
|
{ |
|
"dpo_loss": 0.5424041152000427, |
|
"epoch": 3.4293811998110533, |
|
"grad_norm": 12.25686443542734, |
|
"learning_rate": 1.3456284669124159e-06, |
|
"logits": -0.5684671401977539, |
|
"logps": -78.04743194580078, |
|
"loss": 0.1064, |
|
"objective": 0.09537867456674576, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.5958333611488342, |
|
"regularize": 0.09537866711616516, |
|
"step": 605 |
|
}, |
|
{ |
|
"dpo_loss": 0.5277451872825623, |
|
"epoch": 3.4577231931979218, |
|
"grad_norm": 11.965563307417655, |
|
"learning_rate": 1.301877533199859e-06, |
|
"logits": -0.5791481733322144, |
|
"logps": -75.86957550048828, |
|
"loss": 0.105, |
|
"objective": 0.09935756027698517, |
|
"ranking_idealized": 0.5791666507720947, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.5916666388511658, |
|
"regularize": 0.09935753792524338, |
|
"step": 610 |
|
}, |
|
{ |
|
"dpo_loss": 0.5237266421318054, |
|
"epoch": 3.48606518658479, |
|
"grad_norm": 11.710547942970107, |
|
"learning_rate": 1.2585978766191726e-06, |
|
"logits": -0.5997554063796997, |
|
"logps": -76.46390533447266, |
|
"loss": 0.0994, |
|
"objective": 0.0968371257185936, |
|
"ranking_idealized": 0.5416666865348816, |
|
"ranking_idealized_expo": 0.47083333134651184, |
|
"ranking_simple": 0.5416666865348816, |
|
"regularize": 0.0968371257185936, |
|
"step": 615 |
|
}, |
|
{ |
|
"dpo_loss": 0.5097241997718811, |
|
"epoch": 3.514407179971658, |
|
"grad_norm": 11.439925552543285, |
|
"learning_rate": 1.2158065210664848e-06, |
|
"logits": -0.5296372175216675, |
|
"logps": -75.6750259399414, |
|
"loss": 0.1027, |
|
"objective": 0.09600085765123367, |
|
"ranking_idealized": 0.5666666626930237, |
|
"ranking_idealized_expo": 0.4958333373069763, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.09600085765123367, |
|
"step": 620 |
|
}, |
|
{ |
|
"dpo_loss": 0.5289521813392639, |
|
"epoch": 3.5427491733585263, |
|
"grad_norm": 12.536978726775153, |
|
"learning_rate": 1.1735202983664803e-06, |
|
"logits": -0.570234477519989, |
|
"logps": -74.44934844970703, |
|
"loss": 0.1036, |
|
"objective": 0.10480068624019623, |
|
"ranking_idealized": 0.6083333492279053, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.6041666865348816, |
|
"regularize": 0.10480068624019623, |
|
"step": 625 |
|
}, |
|
{ |
|
"dpo_loss": 0.5382587313652039, |
|
"epoch": 3.5710911667453944, |
|
"grad_norm": 12.482247688036189, |
|
"learning_rate": 1.1317558416516696e-06, |
|
"logits": -0.6371855139732361, |
|
"logps": -74.74573516845703, |
|
"loss": 0.0981, |
|
"objective": 0.08699598163366318, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.512499988079071, |
|
"ranking_simple": 0.5874999761581421, |
|
"regularize": 0.08699598163366318, |
|
"step": 630 |
|
}, |
|
{ |
|
"dpo_loss": 0.5292662978172302, |
|
"epoch": 3.5994331601322624, |
|
"grad_norm": 11.724186457910758, |
|
"learning_rate": 1.0905295788197993e-06, |
|
"logits": -0.6215745210647583, |
|
"logps": -75.0141372680664, |
|
"loss": 0.0957, |
|
"objective": 0.09982422739267349, |
|
"ranking_idealized": 0.5666666626930237, |
|
"ranking_idealized_expo": 0.48750001192092896, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.09982422739267349, |
|
"step": 635 |
|
}, |
|
{ |
|
"dpo_loss": 0.5090000033378601, |
|
"epoch": 3.627775153519131, |
|
"grad_norm": 12.2959167635157, |
|
"learning_rate": 1.049857726072005e-06, |
|
"logits": -0.5938432812690735, |
|
"logps": -76.37276458740234, |
|
"loss": 0.0957, |
|
"objective": 0.09877195209264755, |
|
"ranking_idealized": 0.5541666746139526, |
|
"ranking_idealized_expo": 0.4625000059604645, |
|
"ranking_simple": 0.5416666865348816, |
|
"regularize": 0.09877195209264755, |
|
"step": 640 |
|
}, |
|
{ |
|
"dpo_loss": 0.5198237299919128, |
|
"epoch": 3.656117146905999, |
|
"grad_norm": 11.877643838835896, |
|
"learning_rate": 1.0097562815342215e-06, |
|
"logits": -0.6002366542816162, |
|
"logps": -74.8946762084961, |
|
"loss": 0.0894, |
|
"objective": 0.0859779417514801, |
|
"ranking_idealized": 0.6208333373069763, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.6083333492279053, |
|
"regularize": 0.0859779417514801, |
|
"step": 645 |
|
}, |
|
{ |
|
"dpo_loss": 0.5422871708869934, |
|
"epoch": 3.6844591402928675, |
|
"grad_norm": 12.290572025250485, |
|
"learning_rate": 9.702410189643838e-07, |
|
"logits": -0.6051768660545349, |
|
"logps": -75.9346694946289, |
|
"loss": 0.0908, |
|
"objective": 0.0903468057513237, |
|
"ranking_idealized": 0.574999988079071, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.0903467983007431, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 3.6844591402928675, |
|
"eval_dpo_loss": 0.7142183780670166, |
|
"eval_logits": -0.6950010061264038, |
|
"eval_logps": -80.09378051757812, |
|
"eval_loss": 0.5846312046051025, |
|
"eval_objective": 0.575107753276825, |
|
"eval_ranking_idealized": 0.5888429880142212, |
|
"eval_ranking_idealized_expo": 0.5103305578231812, |
|
"eval_ranking_simple": 0.5309917330741882, |
|
"eval_regularize": 0.575107753276825, |
|
"eval_runtime": 311.7236, |
|
"eval_samples_per_second": 18.574, |
|
"eval_steps_per_second": 0.776, |
|
"step": 650 |
|
}, |
|
{ |
|
"dpo_loss": 0.5320921540260315, |
|
"epoch": 3.7128011336797355, |
|
"grad_norm": 12.073732647268816, |
|
"learning_rate": 9.313274815478698e-07, |
|
"logits": -0.5923182964324951, |
|
"logps": -75.15804290771484, |
|
"loss": 0.0911, |
|
"objective": 0.08135481178760529, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.5958333611488342, |
|
"regularize": 0.0813547894358635, |
|
"step": 655 |
|
}, |
|
{ |
|
"dpo_loss": 0.5302092432975769, |
|
"epoch": 3.7411431270666036, |
|
"grad_norm": 12.119943535015905, |
|
"learning_rate": 8.930309757836517e-07, |
|
"logits": -0.6394330859184265, |
|
"logps": -76.03792572021484, |
|
"loss": 0.0872, |
|
"objective": 0.08334127813577652, |
|
"ranking_idealized": 0.625, |
|
"ranking_idealized_expo": 0.5541666746139526, |
|
"ranking_simple": 0.6333333253860474, |
|
"regularize": 0.08334127813577652, |
|
"step": 660 |
|
}, |
|
{ |
|
"dpo_loss": 0.5253182649612427, |
|
"epoch": 3.769485120453472, |
|
"grad_norm": 12.163074542006058, |
|
"learning_rate": 8.553665654635343e-07, |
|
"logits": -0.6006038188934326, |
|
"logps": -75.9883804321289, |
|
"loss": 0.0854, |
|
"objective": 0.08219381421804428, |
|
"ranking_idealized": 0.6041666865348816, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.6000000238418579, |
|
"regularize": 0.08219381421804428, |
|
"step": 665 |
|
}, |
|
{ |
|
"dpo_loss": 0.5255737900733948, |
|
"epoch": 3.79782711384034, |
|
"grad_norm": 12.572774020559422, |
|
"learning_rate": 8.183490657468687e-07, |
|
"logits": -0.6632742285728455, |
|
"logps": -76.2689437866211, |
|
"loss": 0.0866, |
|
"objective": 0.08637838065624237, |
|
"ranking_idealized": 0.6791666746139526, |
|
"ranking_idealized_expo": 0.5958333611488342, |
|
"ranking_simple": 0.6833333373069763, |
|
"regularize": 0.08637836575508118, |
|
"step": 670 |
|
}, |
|
{ |
|
"dpo_loss": 0.5349328517913818, |
|
"epoch": 3.826169107227208, |
|
"grad_norm": 11.892804895505401, |
|
"learning_rate": 7.819930373330669e-07, |
|
"logits": -0.6208025813102722, |
|
"logps": -75.36246490478516, |
|
"loss": 0.0791, |
|
"objective": 0.07929237186908722, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5416666865348816, |
|
"ranking_simple": 0.612500011920929, |
|
"regularize": 0.07929236441850662, |
|
"step": 675 |
|
}, |
|
{ |
|
"dpo_loss": 0.54377681016922, |
|
"epoch": 3.8545111006140766, |
|
"grad_norm": 11.851078631402974, |
|
"learning_rate": 7.463127807341966e-07, |
|
"logits": -0.5686856508255005, |
|
"logps": -75.87733459472656, |
|
"loss": 0.0851, |
|
"objective": 0.08981513231992722, |
|
"ranking_idealized": 0.574999988079071, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.5791666507720947, |
|
"regularize": 0.08981513231992722, |
|
"step": 680 |
|
}, |
|
{ |
|
"dpo_loss": 0.5212653279304504, |
|
"epoch": 3.8828530940009447, |
|
"grad_norm": 12.638205294617679, |
|
"learning_rate": 7.113223306499336e-07, |
|
"logits": -0.5982258319854736, |
|
"logps": -75.38506317138672, |
|
"loss": 0.086, |
|
"objective": 0.07463731616735458, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.6000000238418579, |
|
"regularize": 0.07463730871677399, |
|
"step": 685 |
|
}, |
|
{ |
|
"dpo_loss": 0.5221771001815796, |
|
"epoch": 3.9111950873878127, |
|
"grad_norm": 11.687391766152553, |
|
"learning_rate": 6.770354504470575e-07, |
|
"logits": -0.6342039108276367, |
|
"logps": -75.1083755493164, |
|
"loss": 0.0781, |
|
"objective": 0.07596276700496674, |
|
"ranking_idealized": 0.6291666626930237, |
|
"ranking_idealized_expo": 0.5458333492279053, |
|
"ranking_simple": 0.6458333134651184, |
|
"regularize": 0.07596276700496674, |
|
"step": 690 |
|
}, |
|
{ |
|
"dpo_loss": 0.5347076654434204, |
|
"epoch": 3.9395370807746812, |
|
"grad_norm": 11.88312797363931, |
|
"learning_rate": 6.434656267456843e-07, |
|
"logits": -0.5775099992752075, |
|
"logps": -76.01984405517578, |
|
"loss": 0.0768, |
|
"objective": 0.0815543457865715, |
|
"ranking_idealized": 0.550000011920929, |
|
"ranking_idealized_expo": 0.46666666865348816, |
|
"ranking_simple": 0.5375000238418579, |
|
"regularize": 0.0815543457865715, |
|
"step": 695 |
|
}, |
|
{ |
|
"dpo_loss": 0.5384576916694641, |
|
"epoch": 3.9678790741615493, |
|
"grad_norm": 11.978357414624993, |
|
"learning_rate": 6.106260641143547e-07, |
|
"logits": -0.5876869559288025, |
|
"logps": -76.2168960571289, |
|
"loss": 0.0782, |
|
"objective": 0.08425504714250565, |
|
"ranking_idealized": 0.5708333253860474, |
|
"ranking_idealized_expo": 0.48750001192092896, |
|
"ranking_simple": 0.5791666507720947, |
|
"regularize": 0.08425504714250565, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 3.9678790741615493, |
|
"eval_dpo_loss": 0.7158195376396179, |
|
"eval_logits": -0.6694127321243286, |
|
"eval_logps": -80.57975769042969, |
|
"eval_loss": 0.5832462906837463, |
|
"eval_objective": 0.5775305032730103, |
|
"eval_ranking_idealized": 0.5888429880142212, |
|
"eval_ranking_idealized_expo": 0.5103305578231812, |
|
"eval_ranking_simple": 0.5216942429542542, |
|
"eval_regularize": 0.5775305032730103, |
|
"eval_runtime": 303.6208, |
|
"eval_samples_per_second": 19.07, |
|
"eval_steps_per_second": 0.797, |
|
"step": 700 |
|
}, |
|
{ |
|
"dpo_loss": 0.5220022797584534, |
|
"epoch": 3.9962210675484178, |
|
"grad_norm": 11.865443583379125, |
|
"learning_rate": 5.785296798760601e-07, |
|
"logits": -0.5430007576942444, |
|
"logps": -74.90709686279297, |
|
"loss": 0.0746, |
|
"objective": 0.07280976325273514, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5291666388511658, |
|
"ranking_simple": 0.612500011920929, |
|
"regularize": 0.07280976325273514, |
|
"step": 705 |
|
}, |
|
{ |
|
"dpo_loss": 0.5136677026748657, |
|
"epoch": 4.024563060935286, |
|
"grad_norm": 12.91211905572677, |
|
"learning_rate": 5.471890990272666e-07, |
|
"logits": -0.5883128643035889, |
|
"logps": -76.31259155273438, |
|
"loss": 0.0646, |
|
"objective": 0.06166737899184227, |
|
"ranking_idealized": 0.5666666626930237, |
|
"ranking_idealized_expo": 0.4791666567325592, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.06166737526655197, |
|
"step": 710 |
|
}, |
|
{ |
|
"dpo_loss": 0.5278608202934265, |
|
"epoch": 4.052905054322154, |
|
"grad_norm": 12.122689089246824, |
|
"learning_rate": 5.166166492719124e-07, |
|
"logits": -0.5741557478904724, |
|
"logps": -75.61717987060547, |
|
"loss": 0.0617, |
|
"objective": 0.060495439916849136, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5375000238418579, |
|
"ranking_simple": 0.5916666388511658, |
|
"regularize": 0.06049543619155884, |
|
"step": 715 |
|
}, |
|
{ |
|
"dpo_loss": 0.5187323093414307, |
|
"epoch": 4.081247047709022, |
|
"grad_norm": 11.560808093959789, |
|
"learning_rate": 4.868243561723535e-07, |
|
"logits": -0.5206624865531921, |
|
"logps": -78.19841003417969, |
|
"loss": 0.0635, |
|
"objective": 0.06401447206735611, |
|
"ranking_idealized": 0.5874999761581421, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.06401447206735611, |
|
"step": 720 |
|
}, |
|
{ |
|
"dpo_loss": 0.540945827960968, |
|
"epoch": 4.109589041095891, |
|
"grad_norm": 11.843737418563835, |
|
"learning_rate": 4.57823938419153e-07, |
|
"logits": -0.5632360577583313, |
|
"logps": -75.70304107666016, |
|
"loss": 0.0616, |
|
"objective": 0.06045746058225632, |
|
"ranking_idealized": 0.5874999761581421, |
|
"ranking_idealized_expo": 0.5375000238418579, |
|
"ranking_simple": 0.5874999761581421, |
|
"regularize": 0.06045745685696602, |
|
"step": 725 |
|
}, |
|
{ |
|
"dpo_loss": 0.5236831903457642, |
|
"epoch": 4.137931034482759, |
|
"grad_norm": 12.096865413097825, |
|
"learning_rate": 4.2962680322157335e-07, |
|
"logits": -0.6435456871986389, |
|
"logps": -76.09138488769531, |
|
"loss": 0.061, |
|
"objective": 0.059013016521930695, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5375000238418579, |
|
"ranking_simple": 0.6041666865348816, |
|
"regularize": 0.059013016521930695, |
|
"step": 730 |
|
}, |
|
{ |
|
"dpo_loss": 0.533221423625946, |
|
"epoch": 4.166273027869627, |
|
"grad_norm": 12.657248993273553, |
|
"learning_rate": 4.0224404182059443e-07, |
|
"logits": -0.5540097951889038, |
|
"logps": -77.43226623535156, |
|
"loss": 0.0556, |
|
"objective": 0.05801505967974663, |
|
"ranking_idealized": 0.6041666865348816, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.6166666746139526, |
|
"regularize": 0.05801505222916603, |
|
"step": 735 |
|
}, |
|
{ |
|
"dpo_loss": 0.5072778463363647, |
|
"epoch": 4.194615021256495, |
|
"grad_norm": 12.763432211885254, |
|
"learning_rate": 3.756864251262143e-07, |
|
"logits": -0.5889293551445007, |
|
"logps": -76.94921875, |
|
"loss": 0.0586, |
|
"objective": 0.06059374660253525, |
|
"ranking_idealized": 0.6083333492279053, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.6083333492279053, |
|
"regularize": 0.06059373542666435, |
|
"step": 740 |
|
}, |
|
{ |
|
"dpo_loss": 0.5195721387863159, |
|
"epoch": 4.222957014643363, |
|
"grad_norm": 13.011382942960159, |
|
"learning_rate": 3.499643994807486e-07, |
|
"logits": -0.636326789855957, |
|
"logps": -74.38599395751953, |
|
"loss": 0.0567, |
|
"objective": 0.06092626973986626, |
|
"ranking_idealized": 0.5791666507720947, |
|
"ranking_idealized_expo": 0.48750001192092896, |
|
"ranking_simple": 0.5708333253860474, |
|
"regularize": 0.06092626228928566, |
|
"step": 745 |
|
}, |
|
{ |
|
"dpo_loss": 0.5143676996231079, |
|
"epoch": 4.251299008030231, |
|
"grad_norm": 12.170871057268412, |
|
"learning_rate": 3.250880825498026e-07, |
|
"logits": -0.6765701770782471, |
|
"logps": -76.2116928100586, |
|
"loss": 0.0573, |
|
"objective": 0.05863998085260391, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.6000000238418579, |
|
"regularize": 0.05863996967673302, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 4.251299008030231, |
|
"eval_dpo_loss": 0.7155545949935913, |
|
"eval_logits": -0.6764047145843506, |
|
"eval_logps": -80.59185791015625, |
|
"eval_loss": 0.5846724510192871, |
|
"eval_objective": 0.5774604678153992, |
|
"eval_ranking_idealized": 0.5888429880142212, |
|
"eval_ranking_idealized_expo": 0.5103305578231812, |
|
"eval_ranking_simple": 0.5237603187561035, |
|
"eval_regularize": 0.5774604678153992, |
|
"eval_runtime": 306.7663, |
|
"eval_samples_per_second": 18.874, |
|
"eval_steps_per_second": 0.789, |
|
"step": 750 |
|
}, |
|
{ |
|
"dpo_loss": 0.5219383239746094, |
|
"epoch": 4.2796410014171, |
|
"grad_norm": 12.194092389260657, |
|
"learning_rate": 3.0106725934252095e-07, |
|
"logits": -0.6311543583869934, |
|
"logps": -75.78201293945312, |
|
"loss": 0.0583, |
|
"objective": 0.05387051776051521, |
|
"ranking_idealized": 0.612500011920929, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.6083333492279053, |
|
"regularize": 0.053870514035224915, |
|
"step": 755 |
|
}, |
|
{ |
|
"dpo_loss": 0.5232734084129333, |
|
"epoch": 4.307982994803968, |
|
"grad_norm": 11.576901998049738, |
|
"learning_rate": 2.779113783626916e-07, |
|
"logits": -0.609634280204773, |
|
"logps": -76.47386932373047, |
|
"loss": 0.0558, |
|
"objective": 0.04752067103981972, |
|
"ranking_idealized": 0.6541666388511658, |
|
"ranking_idealized_expo": 0.5625, |
|
"ranking_simple": 0.6458333134651184, |
|
"regularize": 0.04752066358923912, |
|
"step": 760 |
|
}, |
|
{ |
|
"dpo_loss": 0.5060269832611084, |
|
"epoch": 4.336324988190836, |
|
"grad_norm": 11.974550505334834, |
|
"learning_rate": 2.5562954789221164e-07, |
|
"logits": -0.6198402047157288, |
|
"logps": -76.60380554199219, |
|
"loss": 0.0546, |
|
"objective": 0.048433538526296616, |
|
"ranking_idealized": 0.6291666626930237, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.6416666507720947, |
|
"regularize": 0.04843352735042572, |
|
"step": 765 |
|
}, |
|
{ |
|
"dpo_loss": 0.5125846862792969, |
|
"epoch": 4.364666981577704, |
|
"grad_norm": 12.13893132092522, |
|
"learning_rate": 2.3423053240837518e-07, |
|
"logits": -0.5853985548019409, |
|
"logps": -75.1782455444336, |
|
"loss": 0.0577, |
|
"objective": 0.05432061105966568, |
|
"ranking_idealized": 0.5916666388511658, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.5958333611488342, |
|
"regularize": 0.054320596158504486, |
|
"step": 770 |
|
}, |
|
{ |
|
"dpo_loss": 0.5186619162559509, |
|
"epoch": 4.393008974964572, |
|
"grad_norm": 12.025324933873947, |
|
"learning_rate": 2.137227491364016e-07, |
|
"logits": -0.5982651114463806, |
|
"logps": -76.23176574707031, |
|
"loss": 0.0524, |
|
"objective": 0.05267190933227539, |
|
"ranking_idealized": 0.5958333611488342, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.6000000238418579, |
|
"regularize": 0.05267190933227539, |
|
"step": 775 |
|
}, |
|
{ |
|
"dpo_loss": 0.5261180400848389, |
|
"epoch": 4.42135096835144, |
|
"grad_norm": 11.779490213275317, |
|
"learning_rate": 1.941142647385469e-07, |
|
"logits": -0.6180116534233093, |
|
"logps": -74.97576141357422, |
|
"loss": 0.052, |
|
"objective": 0.04746713861823082, |
|
"ranking_idealized": 0.5708333253860474, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.5708333253860474, |
|
"regularize": 0.04746713116765022, |
|
"step": 780 |
|
}, |
|
{ |
|
"dpo_loss": 0.5437088012695312, |
|
"epoch": 4.449692961738309, |
|
"grad_norm": 12.241647561755466, |
|
"learning_rate": 1.7541279214111277e-07, |
|
"logits": -0.6545938849449158, |
|
"logps": -75.04468536376953, |
|
"loss": 0.0549, |
|
"objective": 0.06091846525669098, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5291666388511658, |
|
"ranking_simple": 0.5958333611488342, |
|
"regularize": 0.06091845780611038, |
|
"step": 785 |
|
}, |
|
{ |
|
"dpo_loss": 0.5158849358558655, |
|
"epoch": 4.478034955125177, |
|
"grad_norm": 12.172553863147998, |
|
"learning_rate": 1.5762568750059604e-07, |
|
"logits": -0.6263752579689026, |
|
"logps": -77.42415618896484, |
|
"loss": 0.0488, |
|
"objective": 0.050752636045217514, |
|
"ranking_idealized": 0.6416666507720947, |
|
"ranking_idealized_expo": 0.5583333373069763, |
|
"ranking_simple": 0.637499988079071, |
|
"regularize": 0.05075262859463692, |
|
"step": 790 |
|
}, |
|
{ |
|
"dpo_loss": 0.523824155330658, |
|
"epoch": 4.506376948512045, |
|
"grad_norm": 11.642533263792576, |
|
"learning_rate": 1.4075994731016895e-07, |
|
"logits": -0.5398118495941162, |
|
"logps": -77.37626647949219, |
|
"loss": 0.0497, |
|
"objective": 0.049972303211688995, |
|
"ranking_idealized": 0.5958333611488342, |
|
"ranking_idealized_expo": 0.550000011920929, |
|
"ranking_simple": 0.5916666388511658, |
|
"regularize": 0.0499722920358181, |
|
"step": 795 |
|
}, |
|
{ |
|
"dpo_loss": 0.5201839804649353, |
|
"epoch": 4.534718941898913, |
|
"grad_norm": 12.033841229595977, |
|
"learning_rate": 1.2482220564763669e-07, |
|
"logits": -0.5231271982192993, |
|
"logps": -76.26326751708984, |
|
"loss": 0.0513, |
|
"objective": 0.047722525894641876, |
|
"ranking_idealized": 0.6083333492279053, |
|
"ranking_idealized_expo": 0.512499988079071, |
|
"ranking_simple": 0.6166666746139526, |
|
"regularize": 0.04772252216935158, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 4.534718941898913, |
|
"eval_dpo_loss": 0.7149068713188171, |
|
"eval_logits": -0.680606484413147, |
|
"eval_logps": -80.50379943847656, |
|
"eval_loss": 0.5835468769073486, |
|
"eval_objective": 0.5758251547813416, |
|
"eval_ranking_idealized": 0.5888429880142212, |
|
"eval_ranking_idealized_expo": 0.5103305578231812, |
|
"eval_ranking_simple": 0.5247933864593506, |
|
"eval_regularize": 0.5758251547813416, |
|
"eval_runtime": 309.1293, |
|
"eval_samples_per_second": 18.73, |
|
"eval_steps_per_second": 0.783, |
|
"step": 800 |
|
}, |
|
{ |
|
"dpo_loss": 0.5226713418960571, |
|
"epoch": 4.563060935285781, |
|
"grad_norm": 11.85308064151459, |
|
"learning_rate": 1.0981873156594381e-07, |
|
"logits": -0.5926477909088135, |
|
"logps": -75.56033325195312, |
|
"loss": 0.0501, |
|
"objective": 0.04832206293940544, |
|
"ranking_idealized": 0.5625, |
|
"ranking_idealized_expo": 0.46666666865348816, |
|
"ranking_simple": 0.5583333373069763, |
|
"regularize": 0.04832205921411514, |
|
"step": 805 |
|
}, |
|
{ |
|
"dpo_loss": 0.5298218727111816, |
|
"epoch": 4.59140292867265, |
|
"grad_norm": 11.585229252604742, |
|
"learning_rate": 9.575542662726756e-08, |
|
"logits": -0.6329571008682251, |
|
"logps": -74.88050079345703, |
|
"loss": 0.0486, |
|
"objective": 0.04479978233575821, |
|
"ranking_idealized": 0.5291666388511658, |
|
"ranking_idealized_expo": 0.4583333432674408, |
|
"ranking_simple": 0.5249999761581421, |
|
"regularize": 0.04479977861046791, |
|
"step": 810 |
|
}, |
|
{ |
|
"dpo_loss": 0.5306591391563416, |
|
"epoch": 4.619744922059518, |
|
"grad_norm": 11.532261417058377, |
|
"learning_rate": 8.26378225816582e-08, |
|
"logits": -0.5220829844474792, |
|
"logps": -76.33390045166016, |
|
"loss": 0.049, |
|
"objective": 0.04780552163720131, |
|
"ranking_idealized": 0.675000011920929, |
|
"ranking_idealized_expo": 0.5916666388511658, |
|
"ranking_simple": 0.6791666746139526, |
|
"regularize": 0.047805510461330414, |
|
"step": 815 |
|
}, |
|
{ |
|
"dpo_loss": 0.5376265048980713, |
|
"epoch": 4.648086915446386, |
|
"grad_norm": 11.844210455280477, |
|
"learning_rate": 7.047107919114588e-08, |
|
"logits": -0.6030393838882446, |
|
"logps": -76.0525131225586, |
|
"loss": 0.0493, |
|
"objective": 0.04802839830517769, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5625, |
|
"ranking_simple": 0.6166666746139526, |
|
"regularize": 0.04802839085459709, |
|
"step": 820 |
|
}, |
|
{ |
|
"dpo_loss": 0.537010133266449, |
|
"epoch": 4.6764289088332545, |
|
"grad_norm": 11.420139730897464, |
|
"learning_rate": 5.92599822001666e-08, |
|
"logits": -0.5595048666000366, |
|
"logps": -74.4270248413086, |
|
"loss": 0.0478, |
|
"objective": 0.04515855759382248, |
|
"ranking_idealized": 0.5541666746139526, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.5458333492279053, |
|
"regularize": 0.04515855386853218, |
|
"step": 825 |
|
}, |
|
{ |
|
"dpo_loss": 0.5216527581214905, |
|
"epoch": 4.7047709022201225, |
|
"grad_norm": 11.727721968209657, |
|
"learning_rate": 4.9008941453107527e-08, |
|
"logits": -0.6450707912445068, |
|
"logps": -76.4205322265625, |
|
"loss": 0.0531, |
|
"objective": 0.05846472084522247, |
|
"ranking_idealized": 0.5916666388511658, |
|
"ranking_idealized_expo": 0.512499988079071, |
|
"ranking_simple": 0.5958333611488342, |
|
"regularize": 0.058464717119932175, |
|
"step": 830 |
|
}, |
|
{ |
|
"dpo_loss": 0.510058581829071, |
|
"epoch": 4.733112895606991, |
|
"grad_norm": 11.762662946153132, |
|
"learning_rate": 3.972198915970976e-08, |
|
"logits": -0.5860788226127625, |
|
"logps": -76.25663757324219, |
|
"loss": 0.0512, |
|
"objective": 0.04887576773762703, |
|
"ranking_idealized": 0.5625, |
|
"ranking_idealized_expo": 0.48750001192092896, |
|
"ranking_simple": 0.5541666746139526, |
|
"regularize": 0.048875752836465836, |
|
"step": 835 |
|
}, |
|
{ |
|
"dpo_loss": 0.5002175569534302, |
|
"epoch": 4.7614548889938595, |
|
"grad_norm": 12.325128711177749, |
|
"learning_rate": 3.1402778309014284e-08, |
|
"logits": -0.5927218198776245, |
|
"logps": -76.40971374511719, |
|
"loss": 0.0481, |
|
"objective": 0.04862005263566971, |
|
"ranking_idealized": 0.6208333373069763, |
|
"ranking_idealized_expo": 0.5541666746139526, |
|
"ranking_simple": 0.612500011920929, |
|
"regularize": 0.04862004518508911, |
|
"step": 840 |
|
}, |
|
{ |
|
"dpo_loss": 0.532789945602417, |
|
"epoch": 4.7897968823807275, |
|
"grad_norm": 11.538897448416794, |
|
"learning_rate": 2.4054581232470785e-08, |
|
"logits": -0.5980377793312073, |
|
"logps": -76.28124237060547, |
|
"loss": 0.0459, |
|
"objective": 0.0512334480881691, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.4541666805744171, |
|
"ranking_simple": 0.574999988079071, |
|
"regularize": 0.0512334480881691, |
|
"step": 845 |
|
}, |
|
{ |
|
"dpo_loss": 0.5221417546272278, |
|
"epoch": 4.818138875767596, |
|
"grad_norm": 11.64226324346139, |
|
"learning_rate": 1.768028831677926e-08, |
|
"logits": -0.5999422073364258, |
|
"logps": -75.14291381835938, |
|
"loss": 0.0447, |
|
"objective": 0.039600715041160583, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5291666388511658, |
|
"ranking_simple": 0.6166666746139526, |
|
"regularize": 0.03960070386528969, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 4.818138875767596, |
|
"eval_dpo_loss": 0.7150014042854309, |
|
"eval_logits": -0.680656373500824, |
|
"eval_logps": -80.54595184326172, |
|
"eval_loss": 0.5835261940956116, |
|
"eval_objective": 0.5760576128959656, |
|
"eval_ranking_idealized": 0.5888429880142212, |
|
"eval_ranking_idealized_expo": 0.5103305578231812, |
|
"eval_ranking_simple": 0.5247933864593506, |
|
"eval_regularize": 0.5760576128959656, |
|
"eval_runtime": 308.7978, |
|
"eval_samples_per_second": 18.75, |
|
"eval_steps_per_second": 0.784, |
|
"step": 850 |
|
}, |
|
{ |
|
"dpo_loss": 0.5369904637336731, |
|
"epoch": 4.846480869154464, |
|
"grad_norm": 11.853216981436601, |
|
"learning_rate": 1.2282406866966078e-08, |
|
"logits": -0.5668095946311951, |
|
"logps": -75.38987731933594, |
|
"loss": 0.0458, |
|
"objective": 0.04888928309082985, |
|
"ranking_idealized": 0.6458333134651184, |
|
"ranking_idealized_expo": 0.5375000238418579, |
|
"ranking_simple": 0.6333333253860474, |
|
"regularize": 0.048889271914958954, |
|
"step": 855 |
|
}, |
|
{ |
|
"dpo_loss": 0.5126673579216003, |
|
"epoch": 4.874822862541333, |
|
"grad_norm": 12.066886758890655, |
|
"learning_rate": 7.863060120144316e-09, |
|
"logits": -0.5576753616333008, |
|
"logps": -75.38652038574219, |
|
"loss": 0.0467, |
|
"objective": 0.042654138058423996, |
|
"ranking_idealized": 0.6541666388511658, |
|
"ranking_idealized_expo": 0.550000011920929, |
|
"ranking_simple": 0.6541666388511658, |
|
"regularize": 0.0426541231572628, |
|
"step": 860 |
|
}, |
|
{ |
|
"dpo_loss": 0.5163237452507019, |
|
"epoch": 4.903164855928201, |
|
"grad_norm": 11.881483274725076, |
|
"learning_rate": 4.423986410346526e-09, |
|
"logits": -0.6045090556144714, |
|
"logps": -74.37069702148438, |
|
"loss": 0.0434, |
|
"objective": 0.04223904386162758, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.4958333373069763, |
|
"ranking_simple": 0.5958333611488342, |
|
"regularize": 0.042239028960466385, |
|
"step": 865 |
|
}, |
|
{ |
|
"dpo_loss": 0.5167383551597595, |
|
"epoch": 4.931506849315069, |
|
"grad_norm": 11.995382070296834, |
|
"learning_rate": 1.9665384847583622e-09, |
|
"logits": -0.6181827783584595, |
|
"logps": -76.19407653808594, |
|
"loss": 0.0445, |
|
"objective": 0.04629291966557503, |
|
"ranking_idealized": 0.6041666865348816, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.6041666865348816, |
|
"regularize": 0.04629291221499443, |
|
"step": 870 |
|
}, |
|
{ |
|
"dpo_loss": 0.5252609252929688, |
|
"epoch": 4.959848842701937, |
|
"grad_norm": 11.846766301958432, |
|
"learning_rate": 4.916829716183901e-10, |
|
"logits": -0.5762243866920471, |
|
"logps": -75.8907241821289, |
|
"loss": 0.0472, |
|
"objective": 0.04822330176830292, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5458333492279053, |
|
"ranking_simple": 0.612500011920929, |
|
"regularize": 0.04822329804301262, |
|
"step": 875 |
|
}, |
|
{ |
|
"dpo_loss": 0.5235641598701477, |
|
"epoch": 4.988190836088805, |
|
"grad_norm": 11.641506789650348, |
|
"learning_rate": 0.0, |
|
"logits": -0.6528417468070984, |
|
"logps": -76.5003433227539, |
|
"loss": 0.0462, |
|
"objective": 0.047958675771951675, |
|
"ranking_idealized": 0.625, |
|
"ranking_idealized_expo": 0.5291666388511658, |
|
"ranking_simple": 0.625, |
|
"regularize": 0.04795865714550018, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 4.988190836088805, |
|
"step": 880, |
|
"total_flos": 0.0, |
|
"train_loss": 0.1997517315332185, |
|
"train_runtime": 41462.5211, |
|
"train_samples_per_second": 6.126, |
|
"train_steps_per_second": 0.021 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 880, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": false, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|