|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.992914501653283, |
|
"eval_steps": 50, |
|
"global_step": 1056, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"dpo_loss": 0.6931471824645996, |
|
"epoch": 0.002834199338686821, |
|
"grad_norm": 22.07642892977258, |
|
"learning_rate": 4.716981132075472e-08, |
|
"logits": -1.2867579460144043, |
|
"logps": -84.34933471679688, |
|
"loss": 0.7323, |
|
"objective": 0.7293278574943542, |
|
"ranking_simple": 0.5833333134651184, |
|
"step": 1 |
|
}, |
|
{ |
|
"dpo_loss": 0.6926142573356628, |
|
"epoch": 0.014170996693434105, |
|
"grad_norm": 21.921066098011842, |
|
"learning_rate": 2.358490566037736e-07, |
|
"logits": -1.4302740097045898, |
|
"logps": -83.70521545410156, |
|
"loss": 0.7347, |
|
"objective": 0.7329134345054626, |
|
"ranking_simple": 0.4895833432674408, |
|
"step": 5 |
|
}, |
|
{ |
|
"dpo_loss": 0.6933749318122864, |
|
"epoch": 0.02834199338686821, |
|
"grad_norm": 23.783506033511284, |
|
"learning_rate": 4.716981132075472e-07, |
|
"logits": -1.4030728340148926, |
|
"logps": -83.88999938964844, |
|
"loss": 0.7338, |
|
"objective": 0.733830451965332, |
|
"ranking_simple": 0.5833333134651184, |
|
"step": 10 |
|
}, |
|
{ |
|
"dpo_loss": 0.687871515750885, |
|
"epoch": 0.042512990080302314, |
|
"grad_norm": 22.035924243469346, |
|
"learning_rate": 7.075471698113208e-07, |
|
"logits": -1.5375704765319824, |
|
"logps": -82.89444732666016, |
|
"loss": 0.7315, |
|
"objective": 0.7324588894844055, |
|
"ranking_simple": 0.574999988079071, |
|
"step": 15 |
|
}, |
|
{ |
|
"dpo_loss": 0.6827042102813721, |
|
"epoch": 0.05668398677373642, |
|
"grad_norm": 20.603492069244336, |
|
"learning_rate": 9.433962264150944e-07, |
|
"logits": -1.377241611480713, |
|
"logps": -81.56272888183594, |
|
"loss": 0.726, |
|
"objective": 0.7265715003013611, |
|
"ranking_simple": 0.48750001192092896, |
|
"step": 20 |
|
}, |
|
{ |
|
"dpo_loss": 0.6807191967964172, |
|
"epoch": 0.07085498346717052, |
|
"grad_norm": 19.555842775301652, |
|
"learning_rate": 1.179245283018868e-06, |
|
"logits": -1.4146537780761719, |
|
"logps": -81.54867553710938, |
|
"loss": 0.7175, |
|
"objective": 0.7202857732772827, |
|
"ranking_simple": 0.5708333253860474, |
|
"step": 25 |
|
}, |
|
{ |
|
"dpo_loss": 0.6734613180160522, |
|
"epoch": 0.08502598016060463, |
|
"grad_norm": 19.538210305976264, |
|
"learning_rate": 1.4150943396226415e-06, |
|
"logits": -1.5176372528076172, |
|
"logps": -83.47260284423828, |
|
"loss": 0.7164, |
|
"objective": 0.7164552807807922, |
|
"ranking_simple": 0.48750001192092896, |
|
"step": 30 |
|
}, |
|
{ |
|
"dpo_loss": 0.669752836227417, |
|
"epoch": 0.09919697685403873, |
|
"grad_norm": 21.78747337027822, |
|
"learning_rate": 1.650943396226415e-06, |
|
"logits": -1.5162482261657715, |
|
"logps": -83.25272369384766, |
|
"loss": 0.7048, |
|
"objective": 0.70932936668396, |
|
"ranking_simple": 0.5375000238418579, |
|
"step": 35 |
|
}, |
|
{ |
|
"dpo_loss": 0.6691684722900391, |
|
"epoch": 0.11336797354747284, |
|
"grad_norm": 22.101328341871838, |
|
"learning_rate": 1.8867924528301889e-06, |
|
"logits": -1.522802710533142, |
|
"logps": -81.73042297363281, |
|
"loss": 0.6976, |
|
"objective": 0.7093203663825989, |
|
"ranking_simple": 0.5041666626930237, |
|
"step": 40 |
|
}, |
|
{ |
|
"dpo_loss": 0.6488091945648193, |
|
"epoch": 0.12753897024090693, |
|
"grad_norm": 18.71576103393132, |
|
"learning_rate": 2.1226415094339624e-06, |
|
"logits": -1.530007004737854, |
|
"logps": -80.75597381591797, |
|
"loss": 0.6962, |
|
"objective": 0.6907859444618225, |
|
"ranking_simple": 0.5208333134651184, |
|
"step": 45 |
|
}, |
|
{ |
|
"dpo_loss": 0.6503397822380066, |
|
"epoch": 0.14170996693434104, |
|
"grad_norm": 19.644583345415946, |
|
"learning_rate": 2.358490566037736e-06, |
|
"logits": -1.5319806337356567, |
|
"logps": -83.5707015991211, |
|
"loss": 0.6826, |
|
"objective": 0.6971887946128845, |
|
"ranking_simple": 0.5166666507720947, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.14170996693434104, |
|
"eval_dpo_loss": 0.6827681660652161, |
|
"eval_logits": -1.5367929935455322, |
|
"eval_logps": -92.3091812133789, |
|
"eval_loss": 0.7254260182380676, |
|
"eval_objective": 0.7272183299064636, |
|
"eval_ranking_simple": 0.5279502868652344, |
|
"eval_runtime": 368.8841, |
|
"eval_samples_per_second": 15.696, |
|
"eval_steps_per_second": 1.309, |
|
"step": 50 |
|
}, |
|
{ |
|
"dpo_loss": 0.6309434771537781, |
|
"epoch": 0.15588096362777515, |
|
"grad_norm": 20.382972919722654, |
|
"learning_rate": 2.5943396226415095e-06, |
|
"logits": -1.5233420133590698, |
|
"logps": -86.92516326904297, |
|
"loss": 0.6735, |
|
"objective": 0.6735023260116577, |
|
"ranking_simple": 0.5291666388511658, |
|
"step": 55 |
|
}, |
|
{ |
|
"dpo_loss": 0.6313819885253906, |
|
"epoch": 0.17005196032120926, |
|
"grad_norm": 18.13044483387993, |
|
"learning_rate": 2.830188679245283e-06, |
|
"logits": -1.4574053287506104, |
|
"logps": -87.16276550292969, |
|
"loss": 0.6756, |
|
"objective": 0.680309534072876, |
|
"ranking_simple": 0.5666666626930237, |
|
"step": 60 |
|
}, |
|
{ |
|
"dpo_loss": 0.5997079610824585, |
|
"epoch": 0.18422295701464336, |
|
"grad_norm": 18.96425221102233, |
|
"learning_rate": 3.0660377358490567e-06, |
|
"logits": -1.554844617843628, |
|
"logps": -83.63428497314453, |
|
"loss": 0.6591, |
|
"objective": 0.6451537609100342, |
|
"ranking_simple": 0.6000000238418579, |
|
"step": 65 |
|
}, |
|
{ |
|
"dpo_loss": 0.585650622844696, |
|
"epoch": 0.19839395370807747, |
|
"grad_norm": 17.70445379457848, |
|
"learning_rate": 3.30188679245283e-06, |
|
"logits": -1.6170697212219238, |
|
"logps": -86.24830627441406, |
|
"loss": 0.6426, |
|
"objective": 0.6370573043823242, |
|
"ranking_simple": 0.574999988079071, |
|
"step": 70 |
|
}, |
|
{ |
|
"dpo_loss": 0.61463463306427, |
|
"epoch": 0.21256495040151158, |
|
"grad_norm": 21.475097381950583, |
|
"learning_rate": 3.5377358490566038e-06, |
|
"logits": -1.5737711191177368, |
|
"logps": -88.68922424316406, |
|
"loss": 0.6607, |
|
"objective": 0.6711177229881287, |
|
"ranking_simple": 0.5833333134651184, |
|
"step": 75 |
|
}, |
|
{ |
|
"dpo_loss": 0.5694079995155334, |
|
"epoch": 0.22673594709494568, |
|
"grad_norm": 19.818901321925456, |
|
"learning_rate": 3.7735849056603777e-06, |
|
"logits": -1.6495484113693237, |
|
"logps": -86.88624572753906, |
|
"loss": 0.6294, |
|
"objective": 0.6182950139045715, |
|
"ranking_simple": 0.6291666626930237, |
|
"step": 80 |
|
}, |
|
{ |
|
"dpo_loss": 0.5784618854522705, |
|
"epoch": 0.2409069437883798, |
|
"grad_norm": 15.843708271810604, |
|
"learning_rate": 4.009433962264152e-06, |
|
"logits": -1.6493085622787476, |
|
"logps": -86.69010162353516, |
|
"loss": 0.637, |
|
"objective": 0.6347183585166931, |
|
"ranking_simple": 0.5958333611488342, |
|
"step": 85 |
|
}, |
|
{ |
|
"dpo_loss": 0.5171253085136414, |
|
"epoch": 0.25507794048181387, |
|
"grad_norm": 15.928618564308922, |
|
"learning_rate": 4.245283018867925e-06, |
|
"logits": -1.7159606218338013, |
|
"logps": -87.39143371582031, |
|
"loss": 0.624, |
|
"objective": 0.5702006816864014, |
|
"ranking_simple": 0.625, |
|
"step": 90 |
|
}, |
|
{ |
|
"dpo_loss": 0.5523179173469543, |
|
"epoch": 0.269248937175248, |
|
"grad_norm": 17.73357042390042, |
|
"learning_rate": 4.481132075471699e-06, |
|
"logits": -1.6336411237716675, |
|
"logps": -92.6693344116211, |
|
"loss": 0.6238, |
|
"objective": 0.6184098720550537, |
|
"ranking_simple": 0.5874999761581421, |
|
"step": 95 |
|
}, |
|
{ |
|
"dpo_loss": 0.5685542821884155, |
|
"epoch": 0.2834199338686821, |
|
"grad_norm": 19.615233623313884, |
|
"learning_rate": 4.716981132075472e-06, |
|
"logits": -1.5585684776306152, |
|
"logps": -92.34257507324219, |
|
"loss": 0.6518, |
|
"objective": 0.6242761015892029, |
|
"ranking_simple": 0.5916666388511658, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.2834199338686821, |
|
"eval_dpo_loss": 0.6727089285850525, |
|
"eval_logits": -1.6149238348007202, |
|
"eval_logps": -99.43962097167969, |
|
"eval_loss": 0.7250885367393494, |
|
"eval_objective": 0.7208768725395203, |
|
"eval_ranking_simple": 0.5383023023605347, |
|
"eval_runtime": 370.4436, |
|
"eval_samples_per_second": 15.63, |
|
"eval_steps_per_second": 1.304, |
|
"step": 100 |
|
}, |
|
{ |
|
"dpo_loss": 0.5984646677970886, |
|
"epoch": 0.2975909305621162, |
|
"grad_norm": 17.951005909363573, |
|
"learning_rate": 4.952830188679246e-06, |
|
"logits": -1.680674433708191, |
|
"logps": -93.13371276855469, |
|
"loss": 0.6355, |
|
"objective": 0.6607668399810791, |
|
"ranking_simple": 0.625, |
|
"step": 105 |
|
}, |
|
{ |
|
"dpo_loss": 0.5449077486991882, |
|
"epoch": 0.3117619272555503, |
|
"grad_norm": 15.66083104197914, |
|
"learning_rate": 4.999781286194085e-06, |
|
"logits": -1.5491271018981934, |
|
"logps": -89.25350189208984, |
|
"loss": 0.618, |
|
"objective": 0.6060856580734253, |
|
"ranking_simple": 0.5916666388511658, |
|
"step": 110 |
|
}, |
|
{ |
|
"dpo_loss": 0.5834077596664429, |
|
"epoch": 0.32593292394898443, |
|
"grad_norm": 16.011885176044043, |
|
"learning_rate": 4.998892826944418e-06, |
|
"logits": -1.4953746795654297, |
|
"logps": -86.22123718261719, |
|
"loss": 0.6324, |
|
"objective": 0.6475391387939453, |
|
"ranking_simple": 0.5916666388511658, |
|
"step": 115 |
|
}, |
|
{ |
|
"dpo_loss": 0.5353394746780396, |
|
"epoch": 0.3401039206424185, |
|
"grad_norm": 15.412706668770728, |
|
"learning_rate": 4.997321195347154e-06, |
|
"logits": -1.4954169988632202, |
|
"logps": -86.4836654663086, |
|
"loss": 0.6279, |
|
"objective": 0.5993959903717041, |
|
"ranking_simple": 0.6208333373069763, |
|
"step": 120 |
|
}, |
|
{ |
|
"dpo_loss": 0.5647523999214172, |
|
"epoch": 0.35427491733585265, |
|
"grad_norm": 15.94277628603713, |
|
"learning_rate": 4.9950668210706795e-06, |
|
"logits": -1.5816553831100464, |
|
"logps": -87.99699401855469, |
|
"loss": 0.6255, |
|
"objective": 0.6341190934181213, |
|
"ranking_simple": 0.5874999761581421, |
|
"step": 125 |
|
}, |
|
{ |
|
"dpo_loss": 0.537562906742096, |
|
"epoch": 0.3684459140292867, |
|
"grad_norm": 13.361047111116015, |
|
"learning_rate": 4.992130320438411e-06, |
|
"logits": -1.486953854560852, |
|
"logps": -82.83094787597656, |
|
"loss": 0.626, |
|
"objective": 0.6004360318183899, |
|
"ranking_simple": 0.6291666626930237, |
|
"step": 130 |
|
}, |
|
{ |
|
"dpo_loss": 0.5634739398956299, |
|
"epoch": 0.3826169107227208, |
|
"grad_norm": 14.998158738648598, |
|
"learning_rate": 4.988512496260302e-06, |
|
"logits": -1.3754876852035522, |
|
"logps": -82.95647430419922, |
|
"loss": 0.617, |
|
"objective": 0.6264640092849731, |
|
"ranking_simple": 0.5958333611488342, |
|
"step": 135 |
|
}, |
|
{ |
|
"dpo_loss": 0.5888128876686096, |
|
"epoch": 0.39678790741615494, |
|
"grad_norm": 17.662529862358365, |
|
"learning_rate": 4.984214337613357e-06, |
|
"logits": -1.298575520515442, |
|
"logps": -82.97203826904297, |
|
"loss": 0.6332, |
|
"objective": 0.6609014868736267, |
|
"ranking_simple": 0.612500011920929, |
|
"step": 140 |
|
}, |
|
{ |
|
"dpo_loss": 0.5268335342407227, |
|
"epoch": 0.410958904109589, |
|
"grad_norm": 15.28899400178431, |
|
"learning_rate": 4.979237019571235e-06, |
|
"logits": -1.3319251537322998, |
|
"logps": -83.20392608642578, |
|
"loss": 0.6108, |
|
"objective": 0.5984794497489929, |
|
"ranking_simple": 0.699999988079071, |
|
"step": 145 |
|
}, |
|
{ |
|
"dpo_loss": 0.5154529213905334, |
|
"epoch": 0.42512990080302315, |
|
"grad_norm": 14.256755917631622, |
|
"learning_rate": 4.97358190288299e-06, |
|
"logits": -1.389101505279541, |
|
"logps": -84.82242584228516, |
|
"loss": 0.5964, |
|
"objective": 0.5867258310317993, |
|
"ranking_simple": 0.5874999761581421, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.42512990080302315, |
|
"eval_dpo_loss": 0.6714372634887695, |
|
"eval_logits": -1.4408866167068481, |
|
"eval_logps": -90.21656036376953, |
|
"eval_loss": 0.7329566478729248, |
|
"eval_objective": 0.7246823310852051, |
|
"eval_ranking_simple": 0.5408902764320374, |
|
"eval_runtime": 368.6547, |
|
"eval_samples_per_second": 15.706, |
|
"eval_steps_per_second": 1.31, |
|
"step": 150 |
|
}, |
|
{ |
|
"dpo_loss": 0.509684681892395, |
|
"epoch": 0.43930089749645723, |
|
"grad_norm": 15.222485168209062, |
|
"learning_rate": 4.967250533601059e-06, |
|
"logits": -1.5175641775131226, |
|
"logps": -85.33422088623047, |
|
"loss": 0.6038, |
|
"objective": 0.5891249179840088, |
|
"ranking_simple": 0.6208333373069763, |
|
"step": 155 |
|
}, |
|
{ |
|
"dpo_loss": 0.5627656579017639, |
|
"epoch": 0.45347189418989137, |
|
"grad_norm": 14.37066166651516, |
|
"learning_rate": 4.9602446426585845e-06, |
|
"logits": -1.444894552230835, |
|
"logps": -85.82301330566406, |
|
"loss": 0.6235, |
|
"objective": 0.6422544121742249, |
|
"ranking_simple": 0.6208333373069763, |
|
"step": 160 |
|
}, |
|
{ |
|
"dpo_loss": 0.5278021693229675, |
|
"epoch": 0.46764289088332545, |
|
"grad_norm": 13.248877681668944, |
|
"learning_rate": 4.952566145396197e-06, |
|
"logits": -1.513926386833191, |
|
"logps": -86.929443359375, |
|
"loss": 0.6091, |
|
"objective": 0.6052799820899963, |
|
"ranking_simple": 0.612500011920929, |
|
"step": 165 |
|
}, |
|
{ |
|
"dpo_loss": 0.5260218977928162, |
|
"epoch": 0.4818138875767596, |
|
"grad_norm": 11.286148523723739, |
|
"learning_rate": 4.944217141038379e-06, |
|
"logits": -1.5067484378814697, |
|
"logps": -87.19461059570312, |
|
"loss": 0.5865, |
|
"objective": 0.5946651697158813, |
|
"ranking_simple": 0.5791666507720947, |
|
"step": 170 |
|
}, |
|
{ |
|
"dpo_loss": 0.532828688621521, |
|
"epoch": 0.49598488427019366, |
|
"grad_norm": 13.880514840470576, |
|
"learning_rate": 4.935199912119558e-06, |
|
"logits": -1.3940012454986572, |
|
"logps": -87.2088851928711, |
|
"loss": 0.5986, |
|
"objective": 0.6150615811347961, |
|
"ranking_simple": 0.6416666507720947, |
|
"step": 175 |
|
}, |
|
{ |
|
"dpo_loss": 0.5224838852882385, |
|
"epoch": 0.5101558809636277, |
|
"grad_norm": 13.177690520129488, |
|
"learning_rate": 4.925516923860083e-06, |
|
"logits": -1.4137970209121704, |
|
"logps": -87.7735366821289, |
|
"loss": 0.572, |
|
"objective": 0.609702467918396, |
|
"ranking_simple": 0.6000000238418579, |
|
"step": 180 |
|
}, |
|
{ |
|
"dpo_loss": 0.485040545463562, |
|
"epoch": 0.5243268776570619, |
|
"grad_norm": 12.914253483146682, |
|
"learning_rate": 4.9151708234922605e-06, |
|
"logits": -1.4367740154266357, |
|
"logps": -85.89669036865234, |
|
"loss": 0.566, |
|
"objective": 0.5699875950813293, |
|
"ranking_simple": 0.6541666388511658, |
|
"step": 185 |
|
}, |
|
{ |
|
"dpo_loss": 0.5013444423675537, |
|
"epoch": 0.538497874350496, |
|
"grad_norm": 11.870478509302453, |
|
"learning_rate": 4.904164439536626e-06, |
|
"logits": -1.4781168699264526, |
|
"logps": -84.88081359863281, |
|
"loss": 0.5937, |
|
"objective": 0.5846849083900452, |
|
"ranking_simple": 0.675000011920929, |
|
"step": 190 |
|
}, |
|
{ |
|
"dpo_loss": 0.47530683875083923, |
|
"epoch": 0.5526688710439301, |
|
"grad_norm": 13.38945439253625, |
|
"learning_rate": 4.8925007810286555e-06, |
|
"logits": -1.5154672861099243, |
|
"logps": -84.33358001708984, |
|
"loss": 0.6047, |
|
"objective": 0.5678978562355042, |
|
"ranking_simple": 0.6583333611488342, |
|
"step": 195 |
|
}, |
|
{ |
|
"dpo_loss": 0.49561557173728943, |
|
"epoch": 0.5668398677373642, |
|
"grad_norm": 11.74052506226232, |
|
"learning_rate": 4.880183036696123e-06, |
|
"logits": -1.5464282035827637, |
|
"logps": -85.40785217285156, |
|
"loss": 0.5794, |
|
"objective": 0.5721699595451355, |
|
"ranking_simple": 0.6333333253860474, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.5668398677373642, |
|
"eval_dpo_loss": 0.6857984662055969, |
|
"eval_logits": -1.5459333658218384, |
|
"eval_logps": -90.88076782226562, |
|
"eval_loss": 0.7543493509292603, |
|
"eval_objective": 0.743674099445343, |
|
"eval_ranking_simple": 0.5377846956253052, |
|
"eval_runtime": 368.3702, |
|
"eval_samples_per_second": 15.718, |
|
"eval_steps_per_second": 1.311, |
|
"step": 200 |
|
}, |
|
{ |
|
"dpo_loss": 0.49015435576438904, |
|
"epoch": 0.5810108644307983, |
|
"grad_norm": 13.14364469960144, |
|
"learning_rate": 4.867214574087338e-06, |
|
"logits": -1.4027189016342163, |
|
"logps": -88.2087631225586, |
|
"loss": 0.5739, |
|
"objective": 0.5810206532478333, |
|
"ranking_simple": 0.625, |
|
"step": 205 |
|
}, |
|
{ |
|
"dpo_loss": 0.4410454034805298, |
|
"epoch": 0.5951818611242324, |
|
"grad_norm": 13.072326727623208, |
|
"learning_rate": 4.853598938650487e-06, |
|
"logits": -1.4029525518417358, |
|
"logps": -86.05158996582031, |
|
"loss": 0.5515, |
|
"objective": 0.52931809425354, |
|
"ranking_simple": 0.6708333492279053, |
|
"step": 210 |
|
}, |
|
{ |
|
"dpo_loss": 0.47003600001335144, |
|
"epoch": 0.6093528578176665, |
|
"grad_norm": 12.681896576326764, |
|
"learning_rate": 4.8393398527643495e-06, |
|
"logits": -1.4985297918319702, |
|
"logps": -84.5003433227539, |
|
"loss": 0.5691, |
|
"objective": 0.5656867623329163, |
|
"ranking_simple": 0.6416666507720947, |
|
"step": 215 |
|
}, |
|
{ |
|
"dpo_loss": 0.4842369854450226, |
|
"epoch": 0.6235238545111006, |
|
"grad_norm": 12.185860749706057, |
|
"learning_rate": 4.824441214720629e-06, |
|
"logits": -1.5714740753173828, |
|
"logps": -84.36730194091797, |
|
"loss": 0.5581, |
|
"objective": 0.5698367953300476, |
|
"ranking_simple": 0.6291666626930237, |
|
"step": 220 |
|
}, |
|
{ |
|
"dpo_loss": 0.499014288187027, |
|
"epoch": 0.6376948512045347, |
|
"grad_norm": 11.86860643002718, |
|
"learning_rate": 4.808907097658205e-06, |
|
"logits": -1.5924923419952393, |
|
"logps": -84.66715240478516, |
|
"loss": 0.5884, |
|
"objective": 0.5895112752914429, |
|
"ranking_simple": 0.6083333492279053, |
|
"step": 225 |
|
}, |
|
{ |
|
"dpo_loss": 0.45395275950431824, |
|
"epoch": 0.6518658478979689, |
|
"grad_norm": 11.069924512206331, |
|
"learning_rate": 4.7927417484495756e-06, |
|
"logits": -1.4241927862167358, |
|
"logps": -83.54296112060547, |
|
"loss": 0.5726, |
|
"objective": 0.5383394956588745, |
|
"ranking_simple": 0.6791666746139526, |
|
"step": 230 |
|
}, |
|
{ |
|
"dpo_loss": 0.491834819316864, |
|
"epoch": 0.6660368445914029, |
|
"grad_norm": 10.918136207544526, |
|
"learning_rate": 4.7759495865398035e-06, |
|
"logits": -1.3152087926864624, |
|
"logps": -84.92122650146484, |
|
"loss": 0.5635, |
|
"objective": 0.575198233127594, |
|
"ranking_simple": 0.6083333492279053, |
|
"step": 235 |
|
}, |
|
{ |
|
"dpo_loss": 0.4870225191116333, |
|
"epoch": 0.680207841284837, |
|
"grad_norm": 11.379260144258156, |
|
"learning_rate": 4.758535202738287e-06, |
|
"logits": -1.5095748901367188, |
|
"logps": -87.46720123291016, |
|
"loss": 0.5672, |
|
"objective": 0.5821055173873901, |
|
"ranking_simple": 0.6708333492279053, |
|
"step": 240 |
|
}, |
|
{ |
|
"dpo_loss": 0.541431725025177, |
|
"epoch": 0.6943788379782712, |
|
"grad_norm": 12.918426431601308, |
|
"learning_rate": 4.740503357963676e-06, |
|
"logits": -1.5333224534988403, |
|
"logps": -86.5069808959961, |
|
"loss": 0.5804, |
|
"objective": 0.6269174218177795, |
|
"ranking_simple": 0.6666666865348816, |
|
"step": 245 |
|
}, |
|
{ |
|
"dpo_loss": 0.46632593870162964, |
|
"epoch": 0.7085498346717053, |
|
"grad_norm": 11.479829992169247, |
|
"learning_rate": 4.721858981942284e-06, |
|
"logits": -1.311442494392395, |
|
"logps": -82.5728530883789, |
|
"loss": 0.5802, |
|
"objective": 0.559305727481842, |
|
"ranking_simple": 0.6791666746139526, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.7085498346717053, |
|
"eval_dpo_loss": 0.6874057650566101, |
|
"eval_logits": -1.5326449871063232, |
|
"eval_logps": -86.87519073486328, |
|
"eval_loss": 0.7558908462524414, |
|
"eval_objective": 0.7459491491317749, |
|
"eval_ranking_simple": 0.5403726696968079, |
|
"eval_runtime": 368.3582, |
|
"eval_samples_per_second": 15.718, |
|
"eval_steps_per_second": 1.311, |
|
"step": 250 |
|
}, |
|
{ |
|
"dpo_loss": 0.4774031341075897, |
|
"epoch": 0.7227208313651393, |
|
"grad_norm": 9.487597580618754, |
|
"learning_rate": 4.702607171860354e-06, |
|
"logits": -1.5363638401031494, |
|
"logps": -80.10908508300781, |
|
"loss": 0.5586, |
|
"objective": 0.5592390894889832, |
|
"ranking_simple": 0.6291666626930237, |
|
"step": 255 |
|
}, |
|
{ |
|
"dpo_loss": 0.4512685537338257, |
|
"epoch": 0.7368918280585735, |
|
"grad_norm": 10.042345370580236, |
|
"learning_rate": 4.682753190970533e-06, |
|
"logits": -1.5129272937774658, |
|
"logps": -81.99565887451172, |
|
"loss": 0.5574, |
|
"objective": 0.5405219793319702, |
|
"ranking_simple": 0.6333333253860474, |
|
"step": 260 |
|
}, |
|
{ |
|
"dpo_loss": 0.4361381232738495, |
|
"epoch": 0.7510628247520076, |
|
"grad_norm": 10.77799422518219, |
|
"learning_rate": 4.6623024671529555e-06, |
|
"logits": -1.470523715019226, |
|
"logps": -81.5511703491211, |
|
"loss": 0.5503, |
|
"objective": 0.5269137620925903, |
|
"ranking_simple": 0.7124999761581421, |
|
"step": 265 |
|
}, |
|
{ |
|
"dpo_loss": 0.48189878463745117, |
|
"epoch": 0.7652338214454416, |
|
"grad_norm": 12.181902084596548, |
|
"learning_rate": 4.641260591431315e-06, |
|
"logits": -1.5885664224624634, |
|
"logps": -82.85069274902344, |
|
"loss": 0.5554, |
|
"objective": 0.5691978931427002, |
|
"ranking_simple": 0.6416666507720947, |
|
"step": 270 |
|
}, |
|
{ |
|
"dpo_loss": 0.47639578580856323, |
|
"epoch": 0.7794048181388757, |
|
"grad_norm": 10.23476141708296, |
|
"learning_rate": 4.61963331644433e-06, |
|
"logits": -1.4604405164718628, |
|
"logps": -82.8023681640625, |
|
"loss": 0.5548, |
|
"objective": 0.55946284532547, |
|
"ranking_simple": 0.637499988079071, |
|
"step": 275 |
|
}, |
|
{ |
|
"dpo_loss": 0.43256130814552307, |
|
"epoch": 0.7935758148323099, |
|
"grad_norm": 10.705607992682587, |
|
"learning_rate": 4.597426554873037e-06, |
|
"logits": -1.5027648210525513, |
|
"logps": -83.16519927978516, |
|
"loss": 0.5354, |
|
"objective": 0.5194212794303894, |
|
"ranking_simple": 0.6708333492279053, |
|
"step": 280 |
|
}, |
|
{ |
|
"dpo_loss": 0.45488685369491577, |
|
"epoch": 0.807746811525744, |
|
"grad_norm": 11.614191556355545, |
|
"learning_rate": 4.574646377824316e-06, |
|
"logits": -1.4621371030807495, |
|
"logps": -84.6886215209961, |
|
"loss": 0.5375, |
|
"objective": 0.539017379283905, |
|
"ranking_simple": 0.6000000238418579, |
|
"step": 285 |
|
}, |
|
{ |
|
"dpo_loss": 0.4303934872150421, |
|
"epoch": 0.821917808219178, |
|
"grad_norm": 11.206240309054008, |
|
"learning_rate": 4.551299013171111e-06, |
|
"logits": -1.4472154378890991, |
|
"logps": -83.35670471191406, |
|
"loss": 0.5305, |
|
"objective": 0.5253880620002747, |
|
"ranking_simple": 0.6958333253860474, |
|
"step": 290 |
|
}, |
|
{ |
|
"dpo_loss": 0.43744438886642456, |
|
"epoch": 0.8360888049126122, |
|
"grad_norm": 9.074371483374543, |
|
"learning_rate": 4.5273908438498e-06, |
|
"logits": -1.506995677947998, |
|
"logps": -84.82511138916016, |
|
"loss": 0.5517, |
|
"objective": 0.5335044860839844, |
|
"ranking_simple": 0.637499988079071, |
|
"step": 295 |
|
}, |
|
{ |
|
"dpo_loss": 0.4651775360107422, |
|
"epoch": 0.8502598016060463, |
|
"grad_norm": 9.01878047466114, |
|
"learning_rate": 4.502928406115152e-06, |
|
"logits": -1.4889142513275146, |
|
"logps": -84.10907745361328, |
|
"loss": 0.5473, |
|
"objective": 0.5568612217903137, |
|
"ranking_simple": 0.6666666865348816, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.8502598016060463, |
|
"eval_dpo_loss": 0.6780434846878052, |
|
"eval_logits": -1.5369890928268433, |
|
"eval_logps": -92.34797668457031, |
|
"eval_loss": 0.7457044720649719, |
|
"eval_objective": 0.7388522028923035, |
|
"eval_ranking_simple": 0.5486542582511902, |
|
"eval_runtime": 369.8452, |
|
"eval_samples_per_second": 15.655, |
|
"eval_steps_per_second": 1.306, |
|
"step": 300 |
|
}, |
|
{ |
|
"dpo_loss": 0.47844478487968445, |
|
"epoch": 0.8644307982994804, |
|
"grad_norm": 10.005579661781612, |
|
"learning_rate": 4.477918387753388e-06, |
|
"logits": -1.4307849407196045, |
|
"logps": -86.45662689208984, |
|
"loss": 0.5252, |
|
"objective": 0.5761564373970032, |
|
"ranking_simple": 0.7083333134651184, |
|
"step": 305 |
|
}, |
|
{ |
|
"dpo_loss": 0.40813323855400085, |
|
"epoch": 0.8786017949929145, |
|
"grad_norm": 10.307837911346382, |
|
"learning_rate": 4.452367626253805e-06, |
|
"logits": -1.3825361728668213, |
|
"logps": -86.61471557617188, |
|
"loss": 0.5225, |
|
"objective": 0.5045412182807922, |
|
"ranking_simple": 0.6916666626930237, |
|
"step": 310 |
|
}, |
|
{ |
|
"dpo_loss": 0.42051681876182556, |
|
"epoch": 0.8927727916863486, |
|
"grad_norm": 10.459183690361446, |
|
"learning_rate": 4.426283106939474e-06, |
|
"logits": -1.3151206970214844, |
|
"logps": -85.77119445800781, |
|
"loss": 0.5426, |
|
"objective": 0.513097882270813, |
|
"ranking_simple": 0.637499988079071, |
|
"step": 315 |
|
}, |
|
{ |
|
"dpo_loss": 0.41688165068626404, |
|
"epoch": 0.9069437883797827, |
|
"grad_norm": 11.302451754279112, |
|
"learning_rate": 4.399671961057523e-06, |
|
"logits": -1.2146058082580566, |
|
"logps": -87.50178527832031, |
|
"loss": 0.5607, |
|
"objective": 0.5194131731987, |
|
"ranking_simple": 0.7166666388511658, |
|
"step": 320 |
|
}, |
|
{ |
|
"dpo_loss": 0.4247673749923706, |
|
"epoch": 0.9211147850732169, |
|
"grad_norm": 9.112687814178505, |
|
"learning_rate": 4.372541463829524e-06, |
|
"logits": -1.2642977237701416, |
|
"logps": -86.85433959960938, |
|
"loss": 0.5281, |
|
"objective": 0.5294176340103149, |
|
"ranking_simple": 0.6875, |
|
"step": 325 |
|
}, |
|
{ |
|
"dpo_loss": 0.3975774645805359, |
|
"epoch": 0.9352857817666509, |
|
"grad_norm": 10.380051826599395, |
|
"learning_rate": 4.3448990324625244e-06, |
|
"logits": -1.1297107934951782, |
|
"logps": -85.07998657226562, |
|
"loss": 0.5304, |
|
"objective": 0.5025947690010071, |
|
"ranking_simple": 0.7041666507720947, |
|
"step": 330 |
|
}, |
|
{ |
|
"dpo_loss": 0.4059672951698303, |
|
"epoch": 0.949456778460085, |
|
"grad_norm": 9.237583761740382, |
|
"learning_rate": 4.316752224121252e-06, |
|
"logits": -1.118418574333191, |
|
"logps": -84.82205200195312, |
|
"loss": 0.5208, |
|
"objective": 0.499520480632782, |
|
"ranking_simple": 0.7041666507720947, |
|
"step": 335 |
|
}, |
|
{ |
|
"dpo_loss": 0.4579353332519531, |
|
"epoch": 0.9636277751535192, |
|
"grad_norm": 9.19069599968856, |
|
"learning_rate": 4.288108733862064e-06, |
|
"logits": -1.200610876083374, |
|
"logps": -83.12140655517578, |
|
"loss": 0.5327, |
|
"objective": 0.5504066348075867, |
|
"ranking_simple": 0.6583333611488342, |
|
"step": 340 |
|
}, |
|
{ |
|
"dpo_loss": 0.4034007489681244, |
|
"epoch": 0.9777987718469532, |
|
"grad_norm": 9.024982300418905, |
|
"learning_rate": 4.2589763925291924e-06, |
|
"logits": -1.2796326875686646, |
|
"logps": -81.1929702758789, |
|
"loss": 0.5202, |
|
"objective": 0.4997580945491791, |
|
"ranking_simple": 0.699999988079071, |
|
"step": 345 |
|
}, |
|
{ |
|
"dpo_loss": 0.3821704089641571, |
|
"epoch": 0.9919697685403873, |
|
"grad_norm": 10.161439402885936, |
|
"learning_rate": 4.229363164613874e-06, |
|
"logits": -1.2219146490097046, |
|
"logps": -84.29117584228516, |
|
"loss": 0.5104, |
|
"objective": 0.4823973476886749, |
|
"ranking_simple": 0.7708333134651184, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.9919697685403873, |
|
"eval_dpo_loss": 0.6766201257705688, |
|
"eval_logits": -1.3364328145980835, |
|
"eval_logps": -88.19395446777344, |
|
"eval_loss": 0.7516013979911804, |
|
"eval_objective": 0.7371890544891357, |
|
"eval_ranking_simple": 0.5429606437683105, |
|
"eval_runtime": 368.1018, |
|
"eval_samples_per_second": 15.729, |
|
"eval_steps_per_second": 1.312, |
|
"step": 350 |
|
}, |
|
{ |
|
"dpo_loss": 0.34969767928123474, |
|
"epoch": 1.0061407652338215, |
|
"grad_norm": 6.289440966432375, |
|
"learning_rate": 4.199277146076933e-06, |
|
"logits": -1.3432971239089966, |
|
"logps": -82.10868072509766, |
|
"loss": 0.4782, |
|
"objective": 0.4609685242176056, |
|
"ranking_simple": 0.7458333373069763, |
|
"step": 355 |
|
}, |
|
{ |
|
"dpo_loss": 0.33886921405792236, |
|
"epoch": 1.0203117619272555, |
|
"grad_norm": 8.403207409711419, |
|
"learning_rate": 4.168726562135432e-06, |
|
"logits": -1.2731363773345947, |
|
"logps": -83.29503631591797, |
|
"loss": 0.4406, |
|
"objective": 0.4606216251850128, |
|
"ranking_simple": 0.7208333611488342, |
|
"step": 360 |
|
}, |
|
{ |
|
"dpo_loss": 0.2881692051887512, |
|
"epoch": 1.0344827586206897, |
|
"grad_norm": 6.468809468648868, |
|
"learning_rate": 4.137719765013974e-06, |
|
"logits": -1.298916220664978, |
|
"logps": -84.39533233642578, |
|
"loss": 0.4218, |
|
"objective": 0.4092879295349121, |
|
"ranking_simple": 0.7416666746139526, |
|
"step": 365 |
|
}, |
|
{ |
|
"dpo_loss": 0.31373023986816406, |
|
"epoch": 1.0486537553141237, |
|
"grad_norm": 10.120074264563044, |
|
"learning_rate": 4.106265231661292e-06, |
|
"logits": -1.257562518119812, |
|
"logps": -81.22982788085938, |
|
"loss": 0.4476, |
|
"objective": 0.4375106990337372, |
|
"ranking_simple": 0.7333333492279053, |
|
"step": 370 |
|
}, |
|
{ |
|
"dpo_loss": 0.3089008033275604, |
|
"epoch": 1.0628247520075578, |
|
"grad_norm": 7.557866547749679, |
|
"learning_rate": 4.074371561432731e-06, |
|
"logits": -1.3321465253829956, |
|
"logps": -83.06178283691406, |
|
"loss": 0.4494, |
|
"objective": 0.4439465403556824, |
|
"ranking_simple": 0.762499988079071, |
|
"step": 375 |
|
}, |
|
{ |
|
"dpo_loss": 0.3182891309261322, |
|
"epoch": 1.076995748700992, |
|
"grad_norm": 7.153131025534419, |
|
"learning_rate": 4.042047473739278e-06, |
|
"logits": -1.2969304323196411, |
|
"logps": -84.80577087402344, |
|
"loss": 0.4362, |
|
"objective": 0.4368627667427063, |
|
"ranking_simple": 0.7833333611488342, |
|
"step": 380 |
|
}, |
|
{ |
|
"dpo_loss": 0.33301401138305664, |
|
"epoch": 1.091166745394426, |
|
"grad_norm": 7.370055629692345, |
|
"learning_rate": 4.009301805663752e-06, |
|
"logits": -1.1952742338180542, |
|
"logps": -83.80043029785156, |
|
"loss": 0.4555, |
|
"objective": 0.4633476436138153, |
|
"ranking_simple": 0.7291666865348816, |
|
"step": 385 |
|
}, |
|
{ |
|
"dpo_loss": 0.3350224494934082, |
|
"epoch": 1.10533774208786, |
|
"grad_norm": 6.621937340714611, |
|
"learning_rate": 3.976143509544843e-06, |
|
"logits": -1.144229531288147, |
|
"logps": -85.45323944091797, |
|
"loss": 0.4405, |
|
"objective": 0.4582778811454773, |
|
"ranking_simple": 0.762499988079071, |
|
"step": 390 |
|
}, |
|
{ |
|
"dpo_loss": 0.2885909676551819, |
|
"epoch": 1.1195087387812943, |
|
"grad_norm": 7.285249021586949, |
|
"learning_rate": 3.9425816505296254e-06, |
|
"logits": -1.1913460493087769, |
|
"logps": -84.71748352050781, |
|
"loss": 0.4457, |
|
"objective": 0.40855225920677185, |
|
"ranking_simple": 0.7708333134651184, |
|
"step": 395 |
|
}, |
|
{ |
|
"dpo_loss": 0.3148750960826874, |
|
"epoch": 1.1336797354747283, |
|
"grad_norm": 7.611492036237173, |
|
"learning_rate": 3.908625404095242e-06, |
|
"logits": -1.2308555841445923, |
|
"logps": -82.81310272216797, |
|
"loss": 0.4425, |
|
"objective": 0.4325387179851532, |
|
"ranking_simple": 0.7583333253860474, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.1336797354747283, |
|
"eval_dpo_loss": 0.6866451501846313, |
|
"eval_logits": -1.2226030826568604, |
|
"eval_logps": -88.75952911376953, |
|
"eval_loss": 0.7568410634994507, |
|
"eval_objective": 0.7489107251167297, |
|
"eval_ranking_simple": 0.5439958572387695, |
|
"eval_runtime": 368.4645, |
|
"eval_samples_per_second": 15.714, |
|
"eval_steps_per_second": 1.311, |
|
"step": 400 |
|
}, |
|
{ |
|
"dpo_loss": 0.35161828994750977, |
|
"epoch": 1.1478507321681626, |
|
"grad_norm": 7.5393253270016265, |
|
"learning_rate": 3.8742840535404155e-06, |
|
"logits": -1.1856341361999512, |
|
"logps": -84.8878173828125, |
|
"loss": 0.4461, |
|
"objective": 0.47060316801071167, |
|
"ranking_simple": 0.7166666388511658, |
|
"step": 405 |
|
}, |
|
{ |
|
"dpo_loss": 0.338571161031723, |
|
"epoch": 1.1620217288615966, |
|
"grad_norm": 5.801736886678098, |
|
"learning_rate": 3.839566987447492e-06, |
|
"logits": -1.2132126092910767, |
|
"logps": -85.20359802246094, |
|
"loss": 0.4339, |
|
"objective": 0.46073082089424133, |
|
"ranking_simple": 0.7333333492279053, |
|
"step": 410 |
|
}, |
|
{ |
|
"dpo_loss": 0.3313528895378113, |
|
"epoch": 1.1761927255550306, |
|
"grad_norm": 7.544361991295727, |
|
"learning_rate": 3.8044836971156935e-06, |
|
"logits": -1.1481475830078125, |
|
"logps": -84.14738464355469, |
|
"loss": 0.4504, |
|
"objective": 0.45183154940605164, |
|
"ranking_simple": 0.6833333373069763, |
|
"step": 415 |
|
}, |
|
{ |
|
"dpo_loss": 0.31563931703567505, |
|
"epoch": 1.1903637222484649, |
|
"grad_norm": 6.99992341058847, |
|
"learning_rate": 3.7690437739662928e-06, |
|
"logits": -1.1214243173599243, |
|
"logps": -84.69954681396484, |
|
"loss": 0.4436, |
|
"objective": 0.43789881467819214, |
|
"ranking_simple": 0.7166666388511658, |
|
"step": 420 |
|
}, |
|
{ |
|
"dpo_loss": 0.3403506278991699, |
|
"epoch": 1.204534718941899, |
|
"grad_norm": 7.423010911616055, |
|
"learning_rate": 3.7332569069204127e-06, |
|
"logits": -1.179802417755127, |
|
"logps": -83.71639251708984, |
|
"loss": 0.4526, |
|
"objective": 0.46505507826805115, |
|
"ranking_simple": 0.7250000238418579, |
|
"step": 425 |
|
}, |
|
{ |
|
"dpo_loss": 0.3509117066860199, |
|
"epoch": 1.2187057156353331, |
|
"grad_norm": 7.343865460650952, |
|
"learning_rate": 3.697132879750174e-06, |
|
"logits": -1.190435528755188, |
|
"logps": -86.54014587402344, |
|
"loss": 0.4481, |
|
"objective": 0.4735199213027954, |
|
"ranking_simple": 0.6958333253860474, |
|
"step": 430 |
|
}, |
|
{ |
|
"dpo_loss": 0.3211102783679962, |
|
"epoch": 1.2328767123287672, |
|
"grad_norm": 8.246553327807998, |
|
"learning_rate": 3.6606815684039098e-06, |
|
"logits": -1.1106759309768677, |
|
"logps": -84.52306365966797, |
|
"loss": 0.4591, |
|
"objective": 0.4374840557575226, |
|
"ranking_simple": 0.7791666388511658, |
|
"step": 435 |
|
}, |
|
{ |
|
"dpo_loss": 0.3193610608577728, |
|
"epoch": 1.2470477090222012, |
|
"grad_norm": 8.720099662621916, |
|
"learning_rate": 3.6239129383061764e-06, |
|
"logits": -1.1405307054519653, |
|
"logps": -85.81301879882812, |
|
"loss": 0.4554, |
|
"objective": 0.44526031613349915, |
|
"ranking_simple": 0.7958333492279053, |
|
"step": 440 |
|
}, |
|
{ |
|
"dpo_loss": 0.3264698088169098, |
|
"epoch": 1.2612187057156352, |
|
"grad_norm": 7.381389964368484, |
|
"learning_rate": 3.586837041633312e-06, |
|
"logits": -1.1591824293136597, |
|
"logps": -86.23896026611328, |
|
"loss": 0.4648, |
|
"objective": 0.45790377259254456, |
|
"ranking_simple": 0.7875000238418579, |
|
"step": 445 |
|
}, |
|
{ |
|
"dpo_loss": 0.32597142457962036, |
|
"epoch": 1.2753897024090695, |
|
"grad_norm": 7.58156282251525, |
|
"learning_rate": 3.5494640145652647e-06, |
|
"logits": -1.3325276374816895, |
|
"logps": -85.49541473388672, |
|
"loss": 0.4544, |
|
"objective": 0.456112265586853, |
|
"ranking_simple": 0.7791666388511658, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.2753897024090695, |
|
"eval_dpo_loss": 0.6750319004058838, |
|
"eval_logits": -1.3088560104370117, |
|
"eval_logps": -90.05509185791016, |
|
"eval_loss": 0.7455039620399475, |
|
"eval_objective": 0.736483633518219, |
|
"eval_ranking_simple": 0.5481366515159607, |
|
"eval_runtime": 373.8032, |
|
"eval_samples_per_second": 15.489, |
|
"eval_steps_per_second": 1.292, |
|
"step": 450 |
|
}, |
|
{ |
|
"dpo_loss": 0.3323071599006653, |
|
"epoch": 1.2895606991025035, |
|
"grad_norm": 7.576847549255986, |
|
"learning_rate": 3.511804074514468e-06, |
|
"logits": -1.1815721988677979, |
|
"logps": -86.30281829833984, |
|
"loss": 0.4633, |
|
"objective": 0.45755326747894287, |
|
"ranking_simple": 0.7291666865348816, |
|
"step": 455 |
|
}, |
|
{ |
|
"dpo_loss": 0.32099449634552, |
|
"epoch": 1.3037316957959377, |
|
"grad_norm": 7.0611969685720934, |
|
"learning_rate": 3.4738675173325008e-06, |
|
"logits": -1.3054893016815186, |
|
"logps": -85.12818145751953, |
|
"loss": 0.449, |
|
"objective": 0.4495506286621094, |
|
"ranking_simple": 0.7166666388511658, |
|
"step": 460 |
|
}, |
|
{ |
|
"dpo_loss": 0.3063493072986603, |
|
"epoch": 1.3179026924893718, |
|
"grad_norm": 7.25813572886754, |
|
"learning_rate": 3.435664714495301e-06, |
|
"logits": -1.3341224193572998, |
|
"logps": -82.83889770507812, |
|
"loss": 0.4533, |
|
"objective": 0.4265134036540985, |
|
"ranking_simple": 0.737500011920929, |
|
"step": 465 |
|
}, |
|
{ |
|
"dpo_loss": 0.3599289655685425, |
|
"epoch": 1.3320736891828058, |
|
"grad_norm": 7.324136192857674, |
|
"learning_rate": 3.397206110267713e-06, |
|
"logits": -1.409017562866211, |
|
"logps": -81.51747131347656, |
|
"loss": 0.4597, |
|
"objective": 0.47407808899879456, |
|
"ranking_simple": 0.7458333373069763, |
|
"step": 470 |
|
}, |
|
{ |
|
"dpo_loss": 0.32313933968544006, |
|
"epoch": 1.34624468587624, |
|
"grad_norm": 7.632057920114339, |
|
"learning_rate": 3.3585022188481247e-06, |
|
"logits": -1.3106532096862793, |
|
"logps": -84.2204360961914, |
|
"loss": 0.4525, |
|
"objective": 0.44208064675331116, |
|
"ranking_simple": 0.7749999761581421, |
|
"step": 475 |
|
}, |
|
{ |
|
"dpo_loss": 0.3283863365650177, |
|
"epoch": 1.360415682569674, |
|
"grad_norm": 8.810666497005016, |
|
"learning_rate": 3.3195636214939943e-06, |
|
"logits": -1.350165843963623, |
|
"logps": -84.08671569824219, |
|
"loss": 0.4602, |
|
"objective": 0.45167192816734314, |
|
"ranking_simple": 0.7166666388511658, |
|
"step": 480 |
|
}, |
|
{ |
|
"dpo_loss": 0.3586553633213043, |
|
"epoch": 1.3745866792631083, |
|
"grad_norm": 9.1074396847746, |
|
"learning_rate": 3.2804009636290403e-06, |
|
"logits": -1.3092820644378662, |
|
"logps": -82.56153106689453, |
|
"loss": 0.4755, |
|
"objective": 0.4738306701183319, |
|
"ranking_simple": 0.737500011920929, |
|
"step": 485 |
|
}, |
|
{ |
|
"dpo_loss": 0.3484760820865631, |
|
"epoch": 1.3887576759565423, |
|
"grad_norm": 8.495980198743673, |
|
"learning_rate": 3.2410249519328848e-06, |
|
"logits": -1.2295089960098267, |
|
"logps": -85.73413848876953, |
|
"loss": 0.4478, |
|
"objective": 0.46845272183418274, |
|
"ranking_simple": 0.7458333373069763, |
|
"step": 490 |
|
}, |
|
{ |
|
"dpo_loss": 0.3095528483390808, |
|
"epoch": 1.4029286726499763, |
|
"grad_norm": 8.616754835251825, |
|
"learning_rate": 3.201446351413958e-06, |
|
"logits": -1.2363132238388062, |
|
"logps": -86.17890930175781, |
|
"loss": 0.4491, |
|
"objective": 0.4346270263195038, |
|
"ranking_simple": 0.7583333253860474, |
|
"step": 495 |
|
}, |
|
{ |
|
"dpo_loss": 0.33423519134521484, |
|
"epoch": 1.4170996693434104, |
|
"grad_norm": 9.483021181746007, |
|
"learning_rate": 3.1616759824664543e-06, |
|
"logits": -1.1377207040786743, |
|
"logps": -84.53422546386719, |
|
"loss": 0.4624, |
|
"objective": 0.46367743611335754, |
|
"ranking_simple": 0.7458333373069763, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.4170996693434104, |
|
"eval_dpo_loss": 0.67823725938797, |
|
"eval_logits": -1.2444933652877808, |
|
"eval_logps": -89.6255874633789, |
|
"eval_loss": 0.7469730973243713, |
|
"eval_objective": 0.7386783957481384, |
|
"eval_ranking_simple": 0.5533125996589661, |
|
"eval_runtime": 371.626, |
|
"eval_samples_per_second": 15.58, |
|
"eval_steps_per_second": 1.3, |
|
"step": 500 |
|
}, |
|
{ |
|
"dpo_loss": 0.3274858891963959, |
|
"epoch": 1.4312706660368446, |
|
"grad_norm": 7.65492855456284, |
|
"learning_rate": 3.121724717912138e-06, |
|
"logits": -1.1723324060440063, |
|
"logps": -85.37480163574219, |
|
"loss": 0.4519, |
|
"objective": 0.45082953572273254, |
|
"ranking_simple": 0.7666666507720947, |
|
"step": 505 |
|
}, |
|
{ |
|
"dpo_loss": 0.3435133397579193, |
|
"epoch": 1.4454416627302786, |
|
"grad_norm": 7.695387713966003, |
|
"learning_rate": 3.081603480027826e-06, |
|
"logits": -1.1867634057998657, |
|
"logps": -85.25582885742188, |
|
"loss": 0.4577, |
|
"objective": 0.4627363979816437, |
|
"ranking_simple": 0.7416666746139526, |
|
"step": 510 |
|
}, |
|
{ |
|
"dpo_loss": 0.33665701746940613, |
|
"epoch": 1.4596126594237129, |
|
"grad_norm": 6.852553951419641, |
|
"learning_rate": 3.04132323755935e-06, |
|
"logits": -1.2323737144470215, |
|
"logps": -85.88379669189453, |
|
"loss": 0.4456, |
|
"objective": 0.4514175057411194, |
|
"ranking_simple": 0.6875, |
|
"step": 515 |
|
}, |
|
{ |
|
"dpo_loss": 0.29371899366378784, |
|
"epoch": 1.473783656117147, |
|
"grad_norm": 6.810862817314035, |
|
"learning_rate": 3.0008950027228035e-06, |
|
"logits": -1.1436675786972046, |
|
"logps": -84.27217102050781, |
|
"loss": 0.4264, |
|
"objective": 0.42438414692878723, |
|
"ranking_simple": 0.7749999761581421, |
|
"step": 520 |
|
}, |
|
{ |
|
"dpo_loss": 0.3257172703742981, |
|
"epoch": 1.487954652810581, |
|
"grad_norm": 8.583092428586104, |
|
"learning_rate": 2.960329828193918e-06, |
|
"logits": -1.013688325881958, |
|
"logps": -83.8447265625, |
|
"loss": 0.462, |
|
"objective": 0.45160239934921265, |
|
"ranking_simple": 0.7416666746139526, |
|
"step": 525 |
|
}, |
|
{ |
|
"dpo_loss": 0.32752183079719543, |
|
"epoch": 1.5021256495040152, |
|
"grad_norm": 7.949241972971005, |
|
"learning_rate": 2.9196388040863695e-06, |
|
"logits": -1.2097468376159668, |
|
"logps": -87.19517517089844, |
|
"loss": 0.4528, |
|
"objective": 0.4474882483482361, |
|
"ranking_simple": 0.7333333492279053, |
|
"step": 530 |
|
}, |
|
{ |
|
"dpo_loss": 0.2954551577568054, |
|
"epoch": 1.5162966461974492, |
|
"grad_norm": 6.728845085484624, |
|
"learning_rate": 2.8788330549198512e-06, |
|
"logits": -1.2219938039779663, |
|
"logps": -86.1001205444336, |
|
"loss": 0.4194, |
|
"objective": 0.4230054020881653, |
|
"ranking_simple": 0.7541666626930237, |
|
"step": 535 |
|
}, |
|
{ |
|
"dpo_loss": 0.3660537302494049, |
|
"epoch": 1.5304676428908834, |
|
"grad_norm": 8.075282408782796, |
|
"learning_rate": 2.8379237365787426e-06, |
|
"logits": -1.1332299709320068, |
|
"logps": -86.49842834472656, |
|
"loss": 0.4601, |
|
"objective": 0.48352059721946716, |
|
"ranking_simple": 0.7666666507720947, |
|
"step": 540 |
|
}, |
|
{ |
|
"dpo_loss": 0.3429103195667267, |
|
"epoch": 1.5446386395843175, |
|
"grad_norm": 8.419322188891892, |
|
"learning_rate": 2.7969220332622004e-06, |
|
"logits": -1.2047460079193115, |
|
"logps": -87.4766616821289, |
|
"loss": 0.4602, |
|
"objective": 0.4581466615200043, |
|
"ranking_simple": 0.7749999761581421, |
|
"step": 545 |
|
}, |
|
{ |
|
"dpo_loss": 0.2875075042247772, |
|
"epoch": 1.5588096362777515, |
|
"grad_norm": 7.159416578046968, |
|
"learning_rate": 2.7558391544265127e-06, |
|
"logits": -1.184915542602539, |
|
"logps": -88.01736450195312, |
|
"loss": 0.4391, |
|
"objective": 0.41795456409454346, |
|
"ranking_simple": 0.762499988079071, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.5588096362777515, |
|
"eval_dpo_loss": 0.6695489287376404, |
|
"eval_logits": -1.1983243227005005, |
|
"eval_logps": -91.99543762207031, |
|
"eval_loss": 0.7384570837020874, |
|
"eval_objective": 0.7304210066795349, |
|
"eval_ranking_simple": 0.5486542582511902, |
|
"eval_runtime": 368.2566, |
|
"eval_samples_per_second": 15.723, |
|
"eval_steps_per_second": 1.312, |
|
"step": 550 |
|
}, |
|
{ |
|
"dpo_loss": 0.33032724261283875, |
|
"epoch": 1.5729806329711855, |
|
"grad_norm": 7.5647449089794305, |
|
"learning_rate": 2.714686331720543e-06, |
|
"logits": -0.9676509499549866, |
|
"logps": -88.15595245361328, |
|
"loss": 0.4441, |
|
"objective": 0.44403356313705444, |
|
"ranking_simple": 0.737500011920929, |
|
"step": 555 |
|
}, |
|
{ |
|
"dpo_loss": 0.3146314024925232, |
|
"epoch": 1.5871516296646198, |
|
"grad_norm": 7.8310551029619555, |
|
"learning_rate": 2.6734748159151104e-06, |
|
"logits": -1.0463515520095825, |
|
"logps": -85.61664581298828, |
|
"loss": 0.4414, |
|
"objective": 0.42937740683555603, |
|
"ranking_simple": 0.7458333373069763, |
|
"step": 560 |
|
}, |
|
{ |
|
"dpo_loss": 0.31714820861816406, |
|
"epoch": 1.601322626358054, |
|
"grad_norm": 7.188369880783801, |
|
"learning_rate": 2.632215873827142e-06, |
|
"logits": -1.049370527267456, |
|
"logps": -86.86510467529297, |
|
"loss": 0.4496, |
|
"objective": 0.4433501362800598, |
|
"ranking_simple": 0.7333333492279053, |
|
"step": 565 |
|
}, |
|
{ |
|
"dpo_loss": 0.31138813495635986, |
|
"epoch": 1.615493623051488, |
|
"grad_norm": 7.613684765151376, |
|
"learning_rate": 2.5909207852394363e-06, |
|
"logits": -1.1900863647460938, |
|
"logps": -88.4011001586914, |
|
"loss": 0.4325, |
|
"objective": 0.4364365339279175, |
|
"ranking_simple": 0.7583333253860474, |
|
"step": 570 |
|
}, |
|
{ |
|
"dpo_loss": 0.3405741751194, |
|
"epoch": 1.629664619744922, |
|
"grad_norm": 7.994470235278288, |
|
"learning_rate": 2.5496008398168844e-06, |
|
"logits": -1.1202160120010376, |
|
"logps": -85.93099975585938, |
|
"loss": 0.4543, |
|
"objective": 0.46565988659858704, |
|
"ranking_simple": 0.7666666507720947, |
|
"step": 575 |
|
}, |
|
{ |
|
"dpo_loss": 0.32562312483787537, |
|
"epoch": 1.643835616438356, |
|
"grad_norm": 7.561927544761709, |
|
"learning_rate": 2.508267334019988e-06, |
|
"logits": -1.1339008808135986, |
|
"logps": -85.58094787597656, |
|
"loss": 0.4384, |
|
"objective": 0.4460238814353943, |
|
"ranking_simple": 0.7666666507720947, |
|
"step": 580 |
|
}, |
|
{ |
|
"dpo_loss": 0.32099297642707825, |
|
"epoch": 1.6580066131317903, |
|
"grad_norm": 7.262395386385144, |
|
"learning_rate": 2.46693156801652e-06, |
|
"logits": -0.9802089333534241, |
|
"logps": -84.47058868408203, |
|
"loss": 0.4483, |
|
"objective": 0.43592384457588196, |
|
"ranking_simple": 0.7208333611488342, |
|
"step": 585 |
|
}, |
|
{ |
|
"dpo_loss": 0.31580641865730286, |
|
"epoch": 1.6721776098252243, |
|
"grad_norm": 6.800312627956946, |
|
"learning_rate": 2.4256048425921693e-06, |
|
"logits": -1.01039457321167, |
|
"logps": -86.4159164428711, |
|
"loss": 0.444, |
|
"objective": 0.44133689999580383, |
|
"ranking_simple": 0.7541666626930237, |
|
"step": 590 |
|
}, |
|
{ |
|
"dpo_loss": 0.32943814992904663, |
|
"epoch": 1.6863486065186586, |
|
"grad_norm": 6.937730325378872, |
|
"learning_rate": 2.384298456061023e-06, |
|
"logits": -0.9875913858413696, |
|
"logps": -86.71006774902344, |
|
"loss": 0.4342, |
|
"objective": 0.44936403632164, |
|
"ranking_simple": 0.75, |
|
"step": 595 |
|
}, |
|
{ |
|
"dpo_loss": 0.28611433506011963, |
|
"epoch": 1.7005196032120926, |
|
"grad_norm": 7.2292134497805245, |
|
"learning_rate": 2.3430237011767166e-06, |
|
"logits": -1.0400419235229492, |
|
"logps": -87.26065063476562, |
|
"loss": 0.4285, |
|
"objective": 0.4172123074531555, |
|
"ranking_simple": 0.75, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.7005196032120926, |
|
"eval_dpo_loss": 0.6726422905921936, |
|
"eval_logits": -1.1181021928787231, |
|
"eval_logps": -91.40369415283203, |
|
"eval_loss": 0.7407769560813904, |
|
"eval_objective": 0.7317408919334412, |
|
"eval_ranking_simple": 0.5502070188522339, |
|
"eval_runtime": 368.21, |
|
"eval_samples_per_second": 15.725, |
|
"eval_steps_per_second": 1.312, |
|
"step": 600 |
|
}, |
|
{ |
|
"dpo_loss": 0.31679973006248474, |
|
"epoch": 1.7146905999055266, |
|
"grad_norm": 8.641164554516212, |
|
"learning_rate": 2.30179186204511e-06, |
|
"logits": -0.9793424606323242, |
|
"logps": -87.53156280517578, |
|
"loss": 0.4514, |
|
"objective": 0.439665287733078, |
|
"ranking_simple": 0.7291666865348816, |
|
"step": 605 |
|
}, |
|
{ |
|
"dpo_loss": 0.31452521681785583, |
|
"epoch": 1.7288615965989607, |
|
"grad_norm": 8.523948659267386, |
|
"learning_rate": 2.2606142110393248e-06, |
|
"logits": -1.05043625831604, |
|
"logps": -86.67326354980469, |
|
"loss": 0.4321, |
|
"objective": 0.4331686198711395, |
|
"ranking_simple": 0.7833333611488342, |
|
"step": 610 |
|
}, |
|
{ |
|
"dpo_loss": 0.3183232843875885, |
|
"epoch": 1.743032593292395, |
|
"grad_norm": 7.54765334989791, |
|
"learning_rate": 2.2195020057179897e-06, |
|
"logits": -1.0829071998596191, |
|
"logps": -87.68238830566406, |
|
"loss": 0.4403, |
|
"objective": 0.4391016960144043, |
|
"ranking_simple": 0.7166666388511658, |
|
"step": 615 |
|
}, |
|
{ |
|
"dpo_loss": 0.31284984946250916, |
|
"epoch": 1.7572035899858292, |
|
"grad_norm": 8.306713660930198, |
|
"learning_rate": 2.1784664857475356e-06, |
|
"logits": -1.133055567741394, |
|
"logps": -88.2243881225586, |
|
"loss": 0.4481, |
|
"objective": 0.4363309144973755, |
|
"ranking_simple": 0.7583333253860474, |
|
"step": 620 |
|
}, |
|
{ |
|
"dpo_loss": 0.3389653265476227, |
|
"epoch": 1.7713745866792632, |
|
"grad_norm": 8.27642622610812, |
|
"learning_rate": 2.1375188698293855e-06, |
|
"logits": -1.0864481925964355, |
|
"logps": -87.53520202636719, |
|
"loss": 0.4464, |
|
"objective": 0.4615752100944519, |
|
"ranking_simple": 0.7458333373069763, |
|
"step": 625 |
|
}, |
|
{ |
|
"dpo_loss": 0.3449970781803131, |
|
"epoch": 1.7855455833726972, |
|
"grad_norm": 7.582885342390072, |
|
"learning_rate": 2.096670352632873e-06, |
|
"logits": -1.17921781539917, |
|
"logps": -88.3585205078125, |
|
"loss": 0.4521, |
|
"objective": 0.4663265645503998, |
|
"ranking_simple": 0.7124999761581421, |
|
"step": 630 |
|
}, |
|
{ |
|
"dpo_loss": 0.2966757118701935, |
|
"epoch": 1.7997165800661312, |
|
"grad_norm": 7.391390201817734, |
|
"learning_rate": 2.0559321017347286e-06, |
|
"logits": -1.3011022806167603, |
|
"logps": -87.97920989990234, |
|
"loss": 0.4423, |
|
"objective": 0.42836812138557434, |
|
"ranking_simple": 0.7416666746139526, |
|
"step": 635 |
|
}, |
|
{ |
|
"dpo_loss": 0.2997787892818451, |
|
"epoch": 1.8138875767595655, |
|
"grad_norm": 7.910229224489209, |
|
"learning_rate": 2.01531525456598e-06, |
|
"logits": -1.2456448078155518, |
|
"logps": -88.25955963134766, |
|
"loss": 0.4321, |
|
"objective": 0.43085435032844543, |
|
"ranking_simple": 0.7291666865348816, |
|
"step": 640 |
|
}, |
|
{ |
|
"dpo_loss": 0.3190990388393402, |
|
"epoch": 1.8280585734529995, |
|
"grad_norm": 7.336498748426566, |
|
"learning_rate": 1.974830915367086e-06, |
|
"logits": -1.2663050889968872, |
|
"logps": -88.02909851074219, |
|
"loss": 0.448, |
|
"objective": 0.44558364152908325, |
|
"ranking_simple": 0.7416666746139526, |
|
"step": 645 |
|
}, |
|
{ |
|
"dpo_loss": 0.31604960560798645, |
|
"epoch": 1.8422295701464337, |
|
"grad_norm": 8.01809767773599, |
|
"learning_rate": 1.93449015215215e-06, |
|
"logits": -1.3034510612487793, |
|
"logps": -87.49658966064453, |
|
"loss": 0.4553, |
|
"objective": 0.4388020634651184, |
|
"ranking_simple": 0.7583333253860474, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.8422295701464337, |
|
"eval_dpo_loss": 0.6739967465400696, |
|
"eval_logits": -1.272489070892334, |
|
"eval_logps": -90.41595458984375, |
|
"eval_loss": 0.7425940036773682, |
|
"eval_objective": 0.7334864139556885, |
|
"eval_ranking_simple": 0.5559006333351135, |
|
"eval_runtime": 368.2584, |
|
"eval_samples_per_second": 15.723, |
|
"eval_steps_per_second": 1.312, |
|
"step": 650 |
|
}, |
|
{ |
|
"dpo_loss": 0.3332468867301941, |
|
"epoch": 1.8564005668398678, |
|
"grad_norm": 6.515064472181348, |
|
"learning_rate": 1.8943039936830347e-06, |
|
"logits": -1.153478980064392, |
|
"logps": -83.99712371826172, |
|
"loss": 0.4429, |
|
"objective": 0.441272109746933, |
|
"ranking_simple": 0.7416666746139526, |
|
"step": 655 |
|
}, |
|
{ |
|
"dpo_loss": 0.3361124098300934, |
|
"epoch": 1.8705715635333018, |
|
"grad_norm": 7.186294756150463, |
|
"learning_rate": 1.8542834264542091e-06, |
|
"logits": -1.1785982847213745, |
|
"logps": -87.21165466308594, |
|
"loss": 0.4408, |
|
"objective": 0.4524908661842346, |
|
"ranking_simple": 0.737500011920929, |
|
"step": 660 |
|
}, |
|
{ |
|
"dpo_loss": 0.30988797545433044, |
|
"epoch": 1.8847425602267358, |
|
"grad_norm": 7.223583667691921, |
|
"learning_rate": 1.814439391689151e-06, |
|
"logits": -1.1908276081085205, |
|
"logps": -85.18921661376953, |
|
"loss": 0.4392, |
|
"objective": 0.4265805184841156, |
|
"ranking_simple": 0.7875000238418579, |
|
"step": 665 |
|
}, |
|
{ |
|
"dpo_loss": 0.3152609169483185, |
|
"epoch": 1.89891355692017, |
|
"grad_norm": 7.236738362974331, |
|
"learning_rate": 1.7747827823491253e-06, |
|
"logits": -1.1136568784713745, |
|
"logps": -85.27536010742188, |
|
"loss": 0.437, |
|
"objective": 0.43872639536857605, |
|
"ranking_simple": 0.7166666388511658, |
|
"step": 670 |
|
}, |
|
{ |
|
"dpo_loss": 0.3269253373146057, |
|
"epoch": 1.9130845536136043, |
|
"grad_norm": 7.561018148695705, |
|
"learning_rate": 1.7353244401551566e-06, |
|
"logits": -1.1110624074935913, |
|
"logps": -86.15958404541016, |
|
"loss": 0.4438, |
|
"objective": 0.45002037286758423, |
|
"ranking_simple": 0.7916666865348816, |
|
"step": 675 |
|
}, |
|
{ |
|
"dpo_loss": 0.32910048961639404, |
|
"epoch": 1.9272555503070383, |
|
"grad_norm": 7.152885290887085, |
|
"learning_rate": 1.6960751526240122e-06, |
|
"logits": -1.1628243923187256, |
|
"logps": -88.80421447753906, |
|
"loss": 0.4433, |
|
"objective": 0.4564170241355896, |
|
"ranking_simple": 0.8041666746139526, |
|
"step": 680 |
|
}, |
|
{ |
|
"dpo_loss": 0.2932046949863434, |
|
"epoch": 1.9414265470004723, |
|
"grad_norm": 6.233579110675445, |
|
"learning_rate": 1.6570456501189996e-06, |
|
"logits": -1.1053887605667114, |
|
"logps": -87.36367797851562, |
|
"loss": 0.4164, |
|
"objective": 0.424954891204834, |
|
"ranking_simple": 0.699999988079071, |
|
"step": 685 |
|
}, |
|
{ |
|
"dpo_loss": 0.3040808141231537, |
|
"epoch": 1.9555975436939064, |
|
"grad_norm": 7.4587976233319795, |
|
"learning_rate": 1.6182466029163974e-06, |
|
"logits": -1.1335686445236206, |
|
"logps": -87.29252624511719, |
|
"loss": 0.439, |
|
"objective": 0.4274420142173767, |
|
"ranking_simple": 0.75, |
|
"step": 690 |
|
}, |
|
{ |
|
"dpo_loss": 0.33969834446907043, |
|
"epoch": 1.9697685403873406, |
|
"grad_norm": 8.387180426204326, |
|
"learning_rate": 1.5796886182883053e-06, |
|
"logits": -1.048902988433838, |
|
"logps": -87.7245101928711, |
|
"loss": 0.4375, |
|
"objective": 0.44796222448349, |
|
"ranking_simple": 0.75, |
|
"step": 695 |
|
}, |
|
{ |
|
"dpo_loss": 0.3006362318992615, |
|
"epoch": 1.9839395370807746, |
|
"grad_norm": 7.023996943402399, |
|
"learning_rate": 1.541382237602721e-06, |
|
"logits": -1.0404043197631836, |
|
"logps": -88.86479187011719, |
|
"loss": 0.4307, |
|
"objective": 0.4227179288864136, |
|
"ranking_simple": 0.7416666746139526, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.9839395370807746, |
|
"eval_dpo_loss": 0.6734952926635742, |
|
"eval_logits": -1.2351235151290894, |
|
"eval_logps": -91.78550720214844, |
|
"eval_loss": 0.740387499332428, |
|
"eval_objective": 0.7341719269752502, |
|
"eval_ranking_simple": 0.5584886074066162, |
|
"eval_runtime": 367.9084, |
|
"eval_samples_per_second": 15.738, |
|
"eval_steps_per_second": 1.313, |
|
"step": 700 |
|
}, |
|
{ |
|
"dpo_loss": 0.3133445382118225, |
|
"epoch": 1.9981105337742089, |
|
"grad_norm": 7.983253403779177, |
|
"learning_rate": 1.5033379334416376e-06, |
|
"logits": -0.9815789461135864, |
|
"logps": -87.82261657714844, |
|
"loss": 0.448, |
|
"objective": 0.4431394636631012, |
|
"ranking_simple": 0.7208333611488342, |
|
"step": 705 |
|
}, |
|
{ |
|
"dpo_loss": 0.26884225010871887, |
|
"epoch": 2.012281530467643, |
|
"grad_norm": 4.711433926223709, |
|
"learning_rate": 1.465566106737942e-06, |
|
"logits": -1.132247805595398, |
|
"logps": -86.83318328857422, |
|
"loss": 0.3883, |
|
"objective": 0.4008704125881195, |
|
"ranking_simple": 0.800000011920929, |
|
"step": 710 |
|
}, |
|
{ |
|
"dpo_loss": 0.24094882607460022, |
|
"epoch": 2.026452527161077, |
|
"grad_norm": 4.45009467250422, |
|
"learning_rate": 1.4280770839319073e-06, |
|
"logits": -1.1357406377792358, |
|
"logps": -85.59795379638672, |
|
"loss": 0.3752, |
|
"objective": 0.37576305866241455, |
|
"ranking_simple": 0.8333333134651184, |
|
"step": 715 |
|
}, |
|
{ |
|
"dpo_loss": 0.22363178431987762, |
|
"epoch": 2.040623523854511, |
|
"grad_norm": 4.462329734353584, |
|
"learning_rate": 1.3908811141480408e-06, |
|
"logits": -1.1281845569610596, |
|
"logps": -86.89059448242188, |
|
"loss": 0.371, |
|
"objective": 0.3674115538597107, |
|
"ranking_simple": 0.8166666626930237, |
|
"step": 720 |
|
}, |
|
{ |
|
"dpo_loss": 0.22502318024635315, |
|
"epoch": 2.0547945205479454, |
|
"grad_norm": 4.243136918683414, |
|
"learning_rate": 1.353988366393083e-06, |
|
"logits": -1.1830660104751587, |
|
"logps": -89.62688446044922, |
|
"loss": 0.3719, |
|
"objective": 0.37353357672691345, |
|
"ranking_simple": 0.7958333492279053, |
|
"step": 725 |
|
}, |
|
{ |
|
"dpo_loss": 0.24978047609329224, |
|
"epoch": 2.0689655172413794, |
|
"grad_norm": 4.567188933527537, |
|
"learning_rate": 1.3174089267758983e-06, |
|
"logits": -1.2705327272415161, |
|
"logps": -87.48053741455078, |
|
"loss": 0.3752, |
|
"objective": 0.3849594295024872, |
|
"ranking_simple": 0.7833333611488342, |
|
"step": 730 |
|
}, |
|
{ |
|
"dpo_loss": 0.25259262323379517, |
|
"epoch": 2.0831365139348135, |
|
"grad_norm": 5.151937950415065, |
|
"learning_rate": 1.2811527957500344e-06, |
|
"logits": -1.1913000345230103, |
|
"logps": -87.4656753540039, |
|
"loss": 0.3812, |
|
"objective": 0.3858603835105896, |
|
"ranking_simple": 0.762499988079071, |
|
"step": 735 |
|
}, |
|
{ |
|
"dpo_loss": 0.2209930121898651, |
|
"epoch": 2.0973075106282475, |
|
"grad_norm": 4.254397078002712, |
|
"learning_rate": 1.245229885379699e-06, |
|
"logits": -1.2872763872146606, |
|
"logps": -86.62919616699219, |
|
"loss": 0.3727, |
|
"objective": 0.3619976043701172, |
|
"ranking_simple": 0.8458333611488342, |
|
"step": 740 |
|
}, |
|
{ |
|
"dpo_loss": 0.2264253944158554, |
|
"epoch": 2.1114785073216815, |
|
"grad_norm": 4.542965704767376, |
|
"learning_rate": 1.2096500166298992e-06, |
|
"logits": -1.2000243663787842, |
|
"logps": -88.8626937866211, |
|
"loss": 0.3762, |
|
"objective": 0.3712898790836334, |
|
"ranking_simple": 0.8125, |
|
"step": 745 |
|
}, |
|
{ |
|
"dpo_loss": 0.23141829669475555, |
|
"epoch": 2.1256495040151155, |
|
"grad_norm": 4.288133779032122, |
|
"learning_rate": 1.1744229166814889e-06, |
|
"logits": -1.2524789571762085, |
|
"logps": -87.17241668701172, |
|
"loss": 0.3755, |
|
"objective": 0.37335434556007385, |
|
"ranking_simple": 0.7791666388511658, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 2.1256495040151155, |
|
"eval_dpo_loss": 0.6762245893478394, |
|
"eval_logits": -1.3013161420822144, |
|
"eval_logps": -93.2393569946289, |
|
"eval_loss": 0.7430183291435242, |
|
"eval_objective": 0.736893355846405, |
|
"eval_ranking_simple": 0.5486542582511902, |
|
"eval_runtime": 368.204, |
|
"eval_samples_per_second": 15.725, |
|
"eval_steps_per_second": 1.312, |
|
"step": 750 |
|
}, |
|
{ |
|
"dpo_loss": 0.2273796647787094, |
|
"epoch": 2.13982050070855, |
|
"grad_norm": 4.5000688322821345, |
|
"learning_rate": 1.1395582162718524e-06, |
|
"logits": -1.2545628547668457, |
|
"logps": -90.6506576538086, |
|
"loss": 0.3758, |
|
"objective": 0.3768024444580078, |
|
"ranking_simple": 0.8083333373069763, |
|
"step": 755 |
|
}, |
|
{ |
|
"dpo_loss": 0.2525111138820648, |
|
"epoch": 2.153991497401984, |
|
"grad_norm": 5.011749929420246, |
|
"learning_rate": 1.1050654470619602e-06, |
|
"logits": -1.1907628774642944, |
|
"logps": -85.97274017333984, |
|
"loss": 0.3783, |
|
"objective": 0.39089202880859375, |
|
"ranking_simple": 0.7875000238418579, |
|
"step": 760 |
|
}, |
|
{ |
|
"dpo_loss": 0.23322957754135132, |
|
"epoch": 2.168162494095418, |
|
"grad_norm": 5.201612157262786, |
|
"learning_rate": 1.0709540390305061e-06, |
|
"logits": -1.2566605806350708, |
|
"logps": -87.6942367553711, |
|
"loss": 0.3849, |
|
"objective": 0.3768391013145447, |
|
"ranking_simple": 0.8125, |
|
"step": 765 |
|
}, |
|
{ |
|
"dpo_loss": 0.22707949578762054, |
|
"epoch": 2.182333490788852, |
|
"grad_norm": 4.70373130482108, |
|
"learning_rate": 1.0372333178958462e-06, |
|
"logits": -1.2721644639968872, |
|
"logps": -88.22640991210938, |
|
"loss": 0.377, |
|
"objective": 0.3728346526622772, |
|
"ranking_simple": 0.7875000238418579, |
|
"step": 770 |
|
}, |
|
{ |
|
"dpo_loss": 0.22152571380138397, |
|
"epoch": 2.196504487482286, |
|
"grad_norm": 5.587786749861536, |
|
"learning_rate": 1.0039125025664392e-06, |
|
"logits": -1.2534898519515991, |
|
"logps": -89.17953491210938, |
|
"loss": 0.3725, |
|
"objective": 0.3679354786872864, |
|
"ranking_simple": 0.8208333253860474, |
|
"step": 775 |
|
}, |
|
{ |
|
"dpo_loss": 0.250379741191864, |
|
"epoch": 2.21067548417572, |
|
"grad_norm": 5.002867020526872, |
|
"learning_rate": 9.710007026204896e-07, |
|
"logits": -1.1392863988876343, |
|
"logps": -89.51647186279297, |
|
"loss": 0.3774, |
|
"objective": 0.39055460691452026, |
|
"ranking_simple": 0.737500011920929, |
|
"step": 780 |
|
}, |
|
{ |
|
"dpo_loss": 0.22285352647304535, |
|
"epoch": 2.2248464808691546, |
|
"grad_norm": 5.843250372899321, |
|
"learning_rate": 9.385069158154805e-07, |
|
"logits": -1.1372294425964355, |
|
"logps": -87.82280731201172, |
|
"loss": 0.3756, |
|
"objective": 0.37054911255836487, |
|
"ranking_simple": 0.7916666865348816, |
|
"step": 785 |
|
}, |
|
{ |
|
"dpo_loss": 0.2125234156847, |
|
"epoch": 2.2390174775625886, |
|
"grad_norm": 4.914317874167112, |
|
"learning_rate": 9.064400256282757e-07, |
|
"logits": -1.1291053295135498, |
|
"logps": -88.7917709350586, |
|
"loss": 0.3784, |
|
"objective": 0.36886271834373474, |
|
"ranking_simple": 0.8500000238418579, |
|
"step": 790 |
|
}, |
|
{ |
|
"dpo_loss": 0.24073070287704468, |
|
"epoch": 2.2531884742560226, |
|
"grad_norm": 4.77090719779272, |
|
"learning_rate": 8.74808798826467e-07, |
|
"logits": -1.216113805770874, |
|
"logps": -89.64153289794922, |
|
"loss": 0.3768, |
|
"objective": 0.37714874744415283, |
|
"ranking_simple": 0.824999988079071, |
|
"step": 795 |
|
}, |
|
{ |
|
"dpo_loss": 0.23036065697669983, |
|
"epoch": 2.2673594709494567, |
|
"grad_norm": 4.57839809888007, |
|
"learning_rate": 8.436218830716259e-07, |
|
"logits": -1.2197948694229126, |
|
"logps": -90.15279388427734, |
|
"loss": 0.3794, |
|
"objective": 0.3776319622993469, |
|
"ranking_simple": 0.8333333134651184, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.2673594709494567, |
|
"eval_dpo_loss": 0.6726187467575073, |
|
"eval_logits": -1.2647498846054077, |
|
"eval_logps": -93.31334686279297, |
|
"eval_loss": 0.7400006651878357, |
|
"eval_objective": 0.7334582805633545, |
|
"eval_ranking_simple": 0.554347813129425, |
|
"eval_runtime": 370.3208, |
|
"eval_samples_per_second": 15.635, |
|
"eval_steps_per_second": 1.304, |
|
"step": 800 |
|
}, |
|
{ |
|
"dpo_loss": 0.24638353288173676, |
|
"epoch": 2.2815304676428907, |
|
"grad_norm": 4.775498293092394, |
|
"learning_rate": 8.1288780455512e-07, |
|
"logits": -1.2070590257644653, |
|
"logps": -88.29240417480469, |
|
"loss": 0.38, |
|
"objective": 0.3882957696914673, |
|
"ranking_simple": 0.7916666865348816, |
|
"step": 805 |
|
}, |
|
{ |
|
"dpo_loss": 0.26376059651374817, |
|
"epoch": 2.295701464336325, |
|
"grad_norm": 4.107962497004369, |
|
"learning_rate": 7.826149656671386e-07, |
|
"logits": -1.3075222969055176, |
|
"logps": -87.07465362548828, |
|
"loss": 0.3775, |
|
"objective": 0.38518133759498596, |
|
"ranking_simple": 0.8208333253860474, |
|
"step": 810 |
|
}, |
|
{ |
|
"dpo_loss": 0.23214052617549896, |
|
"epoch": 2.309872461029759, |
|
"grad_norm": 4.032286949010579, |
|
"learning_rate": 7.528116426995605e-07, |
|
"logits": -1.2393053770065308, |
|
"logps": -89.05374145507812, |
|
"loss": 0.3692, |
|
"objective": 0.3722856342792511, |
|
"ranking_simple": 0.7833333611488342, |
|
"step": 815 |
|
}, |
|
{ |
|
"dpo_loss": 0.24578505754470825, |
|
"epoch": 2.324043457723193, |
|
"grad_norm": 4.0007768416623755, |
|
"learning_rate": 7.234859835833022e-07, |
|
"logits": -1.1707955598831177, |
|
"logps": -88.1304702758789, |
|
"loss": 0.3733, |
|
"objective": 0.377387672662735, |
|
"ranking_simple": 0.762499988079071, |
|
"step": 820 |
|
}, |
|
{ |
|
"dpo_loss": 0.23438973724842072, |
|
"epoch": 2.3382144544166272, |
|
"grad_norm": 4.645717749878006, |
|
"learning_rate": 6.94646005660749e-07, |
|
"logits": -1.197273850440979, |
|
"logps": -87.03529357910156, |
|
"loss": 0.3747, |
|
"objective": 0.3757517635822296, |
|
"ranking_simple": 0.8083333373069763, |
|
"step": 825 |
|
}, |
|
{ |
|
"dpo_loss": 0.22819119691848755, |
|
"epoch": 2.3523854511100613, |
|
"grad_norm": 5.87351004355499, |
|
"learning_rate": 6.662995934939007e-07, |
|
"logits": -1.2045276165008545, |
|
"logps": -89.30978393554688, |
|
"loss": 0.3712, |
|
"objective": 0.3776562809944153, |
|
"ranking_simple": 0.8125, |
|
"step": 830 |
|
}, |
|
{ |
|
"dpo_loss": 0.24645249545574188, |
|
"epoch": 2.3665564478034957, |
|
"grad_norm": 5.364960343792073, |
|
"learning_rate": 6.384544967088063e-07, |
|
"logits": -1.177357792854309, |
|
"logps": -88.5252914428711, |
|
"loss": 0.3869, |
|
"objective": 0.38127484917640686, |
|
"ranking_simple": 0.8208333253860474, |
|
"step": 835 |
|
}, |
|
{ |
|
"dpo_loss": 0.24418006837368011, |
|
"epoch": 2.3807274444969297, |
|
"grad_norm": 5.224294646577439, |
|
"learning_rate": 6.111183278768956e-07, |
|
"logits": -1.1215898990631104, |
|
"logps": -88.96337890625, |
|
"loss": 0.3837, |
|
"objective": 0.38468509912490845, |
|
"ranking_simple": 0.7583333253860474, |
|
"step": 840 |
|
}, |
|
{ |
|
"dpo_loss": 0.2479609102010727, |
|
"epoch": 2.3948984411903638, |
|
"grad_norm": 5.589943019855736, |
|
"learning_rate": 5.842985604337769e-07, |
|
"logits": -1.1818034648895264, |
|
"logps": -90.55766296386719, |
|
"loss": 0.3851, |
|
"objective": 0.3813749849796295, |
|
"ranking_simple": 0.8041666746139526, |
|
"step": 845 |
|
}, |
|
{ |
|
"dpo_loss": 0.22911126911640167, |
|
"epoch": 2.409069437883798, |
|
"grad_norm": 4.122848222733345, |
|
"learning_rate": 5.580025266360764e-07, |
|
"logits": -1.2163876295089722, |
|
"logps": -88.67691040039062, |
|
"loss": 0.373, |
|
"objective": 0.37054920196533203, |
|
"ranking_simple": 0.7749999761581421, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 2.409069437883798, |
|
"eval_dpo_loss": 0.6747334599494934, |
|
"eval_logits": -1.2593425512313843, |
|
"eval_logps": -92.93883514404297, |
|
"eval_loss": 0.74095219373703, |
|
"eval_objective": 0.7354456186294556, |
|
"eval_ranking_simple": 0.5522774457931519, |
|
"eval_runtime": 376.5034, |
|
"eval_samples_per_second": 15.378, |
|
"eval_steps_per_second": 1.283, |
|
"step": 850 |
|
}, |
|
{ |
|
"dpo_loss": 0.24901343882083893, |
|
"epoch": 2.423240434577232, |
|
"grad_norm": 5.545917724699393, |
|
"learning_rate": 5.322374155568688e-07, |
|
"logits": -1.2266477346420288, |
|
"logps": -87.11955261230469, |
|
"loss": 0.3932, |
|
"objective": 0.3926084637641907, |
|
"ranking_simple": 0.800000011920929, |
|
"step": 855 |
|
}, |
|
{ |
|
"dpo_loss": 0.24267633259296417, |
|
"epoch": 2.4374114312706663, |
|
"grad_norm": 4.734435718269958, |
|
"learning_rate": 5.070102711202606e-07, |
|
"logits": -1.1731507778167725, |
|
"logps": -89.50730895996094, |
|
"loss": 0.3789, |
|
"objective": 0.38105666637420654, |
|
"ranking_simple": 0.7916666865348816, |
|
"step": 860 |
|
}, |
|
{ |
|
"dpo_loss": 0.23941467702388763, |
|
"epoch": 2.4515824279641003, |
|
"grad_norm": 5.519011513783408, |
|
"learning_rate": 4.823279901756498e-07, |
|
"logits": -1.204505443572998, |
|
"logps": -88.21556854248047, |
|
"loss": 0.3821, |
|
"objective": 0.38131964206695557, |
|
"ranking_simple": 0.762499988079071, |
|
"step": 865 |
|
}, |
|
{ |
|
"dpo_loss": 0.2390858381986618, |
|
"epoch": 2.4657534246575343, |
|
"grad_norm": 4.890994580870049, |
|
"learning_rate": 4.581973206121948e-07, |
|
"logits": -1.2685866355895996, |
|
"logps": -88.25513458251953, |
|
"loss": 0.381, |
|
"objective": 0.3830280900001526, |
|
"ranking_simple": 0.7791666388511658, |
|
"step": 870 |
|
}, |
|
{ |
|
"dpo_loss": 0.2303398698568344, |
|
"epoch": 2.4799244213509684, |
|
"grad_norm": 5.062132747484294, |
|
"learning_rate": 4.3462485951401126e-07, |
|
"logits": -1.204114556312561, |
|
"logps": -88.9038314819336, |
|
"loss": 0.3766, |
|
"objective": 0.38060086965560913, |
|
"ranking_simple": 0.8208333253860474, |
|
"step": 875 |
|
}, |
|
{ |
|
"dpo_loss": 0.2502264678478241, |
|
"epoch": 2.4940954180444024, |
|
"grad_norm": 5.573699785814543, |
|
"learning_rate": 4.116170513565942e-07, |
|
"logits": -1.1215044260025024, |
|
"logps": -87.70077514648438, |
|
"loss": 0.3832, |
|
"objective": 0.3906749486923218, |
|
"ranking_simple": 0.7458333373069763, |
|
"step": 880 |
|
}, |
|
{ |
|
"dpo_loss": 0.2577410042285919, |
|
"epoch": 2.5082664147378364, |
|
"grad_norm": 4.956979588584702, |
|
"learning_rate": 3.891801862449629e-07, |
|
"logits": -1.3180614709854126, |
|
"logps": -86.54364776611328, |
|
"loss": 0.3894, |
|
"objective": 0.39261943101882935, |
|
"ranking_simple": 0.800000011920929, |
|
"step": 885 |
|
}, |
|
{ |
|
"dpo_loss": 0.21435794234275818, |
|
"epoch": 2.5224374114312704, |
|
"grad_norm": 5.7378961390152385, |
|
"learning_rate": 3.6732039819400686e-07, |
|
"logits": -1.2147475481033325, |
|
"logps": -85.97087097167969, |
|
"loss": 0.3823, |
|
"objective": 0.3677336275577545, |
|
"ranking_simple": 0.8291666507720947, |
|
"step": 890 |
|
}, |
|
{ |
|
"dpo_loss": 0.24500012397766113, |
|
"epoch": 2.536608408124705, |
|
"grad_norm": 5.350118064676714, |
|
"learning_rate": 3.46043663451511e-07, |
|
"logits": -1.2147990465164185, |
|
"logps": -88.52668762207031, |
|
"loss": 0.3803, |
|
"objective": 0.3881225883960724, |
|
"ranking_simple": 0.762499988079071, |
|
"step": 895 |
|
}, |
|
{ |
|
"dpo_loss": 0.27485665678977966, |
|
"epoch": 2.550779404818139, |
|
"grad_norm": 5.3127285866105405, |
|
"learning_rate": 3.253557988643072e-07, |
|
"logits": -1.1600372791290283, |
|
"logps": -86.77176666259766, |
|
"loss": 0.388, |
|
"objective": 0.40204280614852905, |
|
"ranking_simple": 0.7875000238418579, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.550779404818139, |
|
"eval_dpo_loss": 0.6756879687309265, |
|
"eval_logits": -1.2939373254776, |
|
"eval_logps": -92.89238739013672, |
|
"eval_loss": 0.7418231964111328, |
|
"eval_objective": 0.7363364696502686, |
|
"eval_ranking_simple": 0.5502070188522339, |
|
"eval_runtime": 369.0318, |
|
"eval_samples_per_second": 15.69, |
|
"eval_steps_per_second": 1.309, |
|
"step": 900 |
|
}, |
|
{ |
|
"dpo_loss": 0.23769782483577728, |
|
"epoch": 2.564950401511573, |
|
"grad_norm": 4.493878506991506, |
|
"learning_rate": 3.052624602880064e-07, |
|
"logits": -1.2237998247146606, |
|
"logps": -85.79032135009766, |
|
"loss": 0.3781, |
|
"objective": 0.3822983205318451, |
|
"ranking_simple": 0.7875000238418579, |
|
"step": 905 |
|
}, |
|
{ |
|
"dpo_loss": 0.23715509474277496, |
|
"epoch": 2.579121398205007, |
|
"grad_norm": 5.776134875432629, |
|
"learning_rate": 2.8576914104074425e-07, |
|
"logits": -1.2153693437576294, |
|
"logps": -89.9877700805664, |
|
"loss": 0.38, |
|
"objective": 0.3822348713874817, |
|
"ranking_simple": 0.7833333611488342, |
|
"step": 910 |
|
}, |
|
{ |
|
"dpo_loss": 0.241657093167305, |
|
"epoch": 2.593292394898441, |
|
"grad_norm": 3.828702735136657, |
|
"learning_rate": 2.6688117040136463e-07, |
|
"logits": -1.3503960371017456, |
|
"logps": -88.94493865966797, |
|
"loss": 0.3732, |
|
"objective": 0.3829546570777893, |
|
"ranking_simple": 0.8083333373069763, |
|
"step": 915 |
|
}, |
|
{ |
|
"dpo_loss": 0.2292027622461319, |
|
"epoch": 2.6074633915918755, |
|
"grad_norm": 4.721702099688327, |
|
"learning_rate": 2.486037121524448e-07, |
|
"logits": -1.1647106409072876, |
|
"logps": -88.75472259521484, |
|
"loss": 0.3765, |
|
"objective": 0.3745788335800171, |
|
"ranking_simple": 0.8125, |
|
"step": 920 |
|
}, |
|
{ |
|
"dpo_loss": 0.22531941533088684, |
|
"epoch": 2.6216343882853095, |
|
"grad_norm": 4.89488363414913, |
|
"learning_rate": 2.3094176316856982e-07, |
|
"logits": -1.1882636547088623, |
|
"logps": -87.45687866210938, |
|
"loss": 0.3799, |
|
"objective": 0.373484343290329, |
|
"ranking_simple": 0.800000011920929, |
|
"step": 925 |
|
}, |
|
{ |
|
"dpo_loss": 0.2518150508403778, |
|
"epoch": 2.6358053849787435, |
|
"grad_norm": 5.096542868734766, |
|
"learning_rate": 2.13900152050239e-07, |
|
"logits": -1.1838154792785645, |
|
"logps": -87.14102935791016, |
|
"loss": 0.3831, |
|
"objective": 0.39408108592033386, |
|
"ranking_simple": 0.7416666746139526, |
|
"step": 930 |
|
}, |
|
{ |
|
"dpo_loss": 0.2513907849788666, |
|
"epoch": 2.6499763816721775, |
|
"grad_norm": 5.6389448500299935, |
|
"learning_rate": 1.9748353780377234e-07, |
|
"logits": -1.1840038299560547, |
|
"logps": -86.4458999633789, |
|
"loss": 0.3954, |
|
"objective": 0.3891184628009796, |
|
"ranking_simple": 0.8291666507720947, |
|
"step": 935 |
|
}, |
|
{ |
|
"dpo_loss": 0.22740763425827026, |
|
"epoch": 2.6641473783656116, |
|
"grad_norm": 4.892959475477896, |
|
"learning_rate": 1.8169640856758652e-07, |
|
"logits": -1.2362346649169922, |
|
"logps": -89.93408203125, |
|
"loss": 0.3789, |
|
"objective": 0.3699011206626892, |
|
"ranking_simple": 0.8041666746139526, |
|
"step": 940 |
|
}, |
|
{ |
|
"dpo_loss": 0.2212921679019928, |
|
"epoch": 2.678318375059046, |
|
"grad_norm": 4.26351950100694, |
|
"learning_rate": 1.6654308038518057e-07, |
|
"logits": -1.2807658910751343, |
|
"logps": -88.23102569580078, |
|
"loss": 0.3726, |
|
"objective": 0.36465439200401306, |
|
"ranking_simple": 0.800000011920929, |
|
"step": 945 |
|
}, |
|
{ |
|
"dpo_loss": 0.24647028744220734, |
|
"epoch": 2.69248937175248, |
|
"grad_norm": 5.222304477193562, |
|
"learning_rate": 1.5202769602517514e-07, |
|
"logits": -1.1635686159133911, |
|
"logps": -86.58324432373047, |
|
"loss": 0.3866, |
|
"objective": 0.3892979621887207, |
|
"ranking_simple": 0.7958333492279053, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 2.69248937175248, |
|
"eval_dpo_loss": 0.6751933097839355, |
|
"eval_logits": -1.2937095165252686, |
|
"eval_logps": -92.32901763916016, |
|
"eval_loss": 0.7418193221092224, |
|
"eval_objective": 0.7358114719390869, |
|
"eval_ranking_simple": 0.5507246255874634, |
|
"eval_runtime": 369.9174, |
|
"eval_samples_per_second": 15.652, |
|
"eval_steps_per_second": 1.306, |
|
"step": 950 |
|
}, |
|
{ |
|
"dpo_loss": 0.2517485022544861, |
|
"epoch": 2.706660368445914, |
|
"grad_norm": 4.388748582328918, |
|
"learning_rate": 1.381542238487188e-07, |
|
"logits": -1.243085265159607, |
|
"logps": -88.0900650024414, |
|
"loss": 0.3837, |
|
"objective": 0.38995641469955444, |
|
"ranking_simple": 0.8041666746139526, |
|
"step": 955 |
|
}, |
|
{ |
|
"dpo_loss": 0.25272881984710693, |
|
"epoch": 2.720831365139348, |
|
"grad_norm": 5.315737788533154, |
|
"learning_rate": 1.2492645672457838e-07, |
|
"logits": -1.2224252223968506, |
|
"logps": -87.57622528076172, |
|
"loss": 0.3841, |
|
"objective": 0.39599618315696716, |
|
"ranking_simple": 0.7958333492279053, |
|
"step": 960 |
|
}, |
|
{ |
|
"dpo_loss": 0.22980408370494843, |
|
"epoch": 2.735002361832782, |
|
"grad_norm": 5.693843860804482, |
|
"learning_rate": 1.1234801099220787e-07, |
|
"logits": -1.2591173648834229, |
|
"logps": -87.08290100097656, |
|
"loss": 0.3861, |
|
"objective": 0.3821989893913269, |
|
"ranking_simple": 0.8500000238418579, |
|
"step": 965 |
|
}, |
|
{ |
|
"dpo_loss": 0.26472774147987366, |
|
"epoch": 2.7491733585262166, |
|
"grad_norm": 4.654019056163642, |
|
"learning_rate": 1.004223254730749e-07, |
|
"logits": -1.2617594003677368, |
|
"logps": -88.49696350097656, |
|
"loss": 0.3797, |
|
"objective": 0.39959490299224854, |
|
"ranking_simple": 0.8458333611488342, |
|
"step": 970 |
|
}, |
|
{ |
|
"dpo_loss": 0.22332945466041565, |
|
"epoch": 2.7633443552196506, |
|
"grad_norm": 5.199760236696295, |
|
"learning_rate": 8.915266053052374e-08, |
|
"logits": -1.1656602621078491, |
|
"logps": -86.72117614746094, |
|
"loss": 0.3766, |
|
"objective": 0.36580052971839905, |
|
"ranking_simple": 0.8083333373069763, |
|
"step": 975 |
|
}, |
|
{ |
|
"dpo_loss": 0.22700704634189606, |
|
"epoch": 2.7775153519130846, |
|
"grad_norm": 4.6165629384250435, |
|
"learning_rate": 7.854209717842231e-08, |
|
"logits": -1.3093292713165283, |
|
"logps": -87.9115982055664, |
|
"loss": 0.3728, |
|
"objective": 0.37029343843460083, |
|
"ranking_simple": 0.8291666507720947, |
|
"step": 980 |
|
}, |
|
{ |
|
"dpo_loss": 0.23392988741397858, |
|
"epoch": 2.7916863486065187, |
|
"grad_norm": 4.595783043627286, |
|
"learning_rate": 6.859353623884569e-08, |
|
"logits": -1.1338623762130737, |
|
"logps": -84.75395202636719, |
|
"loss": 0.3729, |
|
"objective": 0.3690672218799591, |
|
"ranking_simple": 0.8083333373069763, |
|
"step": 985 |
|
}, |
|
{ |
|
"dpo_loss": 0.24494783580303192, |
|
"epoch": 2.8058573452999527, |
|
"grad_norm": 4.754488601809905, |
|
"learning_rate": 5.930969754901844e-08, |
|
"logits": -1.1965256929397583, |
|
"logps": -88.29127502441406, |
|
"loss": 0.3801, |
|
"objective": 0.38465526700019836, |
|
"ranking_simple": 0.7749999761581421, |
|
"step": 990 |
|
}, |
|
{ |
|
"dpo_loss": 0.24409013986587524, |
|
"epoch": 2.820028341993387, |
|
"grad_norm": 5.4353357294596245, |
|
"learning_rate": 5.069311921774039e-08, |
|
"logits": -1.2917786836624146, |
|
"logps": -88.09101867675781, |
|
"loss": 0.3817, |
|
"objective": 0.3790687918663025, |
|
"ranking_simple": 0.7916666865348816, |
|
"step": 995 |
|
}, |
|
{ |
|
"dpo_loss": 0.24059538543224335, |
|
"epoch": 2.8341993386868207, |
|
"grad_norm": 4.725884973016709, |
|
"learning_rate": 4.2746156931490756e-08, |
|
"logits": -1.2047442197799683, |
|
"logps": -85.76768493652344, |
|
"loss": 0.3828, |
|
"objective": 0.3797774612903595, |
|
"ranking_simple": 0.8291666507720947, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.8341993386868207, |
|
"eval_dpo_loss": 0.6749303340911865, |
|
"eval_logits": -1.2945847511291504, |
|
"eval_logps": -92.32601928710938, |
|
"eval_loss": 0.7417021989822388, |
|
"eval_objective": 0.7355546355247498, |
|
"eval_ranking_simple": 0.5502070188522339, |
|
"eval_runtime": 369.447, |
|
"eval_samples_per_second": 15.672, |
|
"eval_steps_per_second": 1.307, |
|
"step": 1000 |
|
}, |
|
{ |
|
"dpo_loss": 0.23444737493991852, |
|
"epoch": 2.848370335380255, |
|
"grad_norm": 5.091471401481779, |
|
"learning_rate": 3.547098331040916e-08, |
|
"logits": -1.2024807929992676, |
|
"logps": -86.74796295166016, |
|
"loss": 0.382, |
|
"objective": 0.3817732632160187, |
|
"ranking_simple": 0.8041666746139526, |
|
"step": 1005 |
|
}, |
|
{ |
|
"dpo_loss": 0.2292069047689438, |
|
"epoch": 2.862541332073689, |
|
"grad_norm": 4.7403703778744815, |
|
"learning_rate": 2.8869587314321324e-08, |
|
"logits": -1.2810461521148682, |
|
"logps": -87.0550308227539, |
|
"loss": 0.379, |
|
"objective": 0.37782254815101624, |
|
"ranking_simple": 0.7791666388511658, |
|
"step": 1010 |
|
}, |
|
{ |
|
"dpo_loss": 0.2399337738752365, |
|
"epoch": 2.8767123287671232, |
|
"grad_norm": 5.329975170501087, |
|
"learning_rate": 2.2943773698977935e-08, |
|
"logits": -1.197946548461914, |
|
"logps": -85.78963470458984, |
|
"loss": 0.3813, |
|
"objective": 0.3823747932910919, |
|
"ranking_simple": 0.7916666865348816, |
|
"step": 1015 |
|
}, |
|
{ |
|
"dpo_loss": 0.23225615918636322, |
|
"epoch": 2.8908833254605573, |
|
"grad_norm": 4.141247666962683, |
|
"learning_rate": 1.7695162522652352e-08, |
|
"logits": -1.1521192789077759, |
|
"logps": -87.80615234375, |
|
"loss": 0.383, |
|
"objective": 0.3741118311882019, |
|
"ranking_simple": 0.75, |
|
"step": 1020 |
|
}, |
|
{ |
|
"dpo_loss": 0.24901318550109863, |
|
"epoch": 2.9050543221539913, |
|
"grad_norm": 4.809794853364304, |
|
"learning_rate": 1.3125188703233815e-08, |
|
"logits": -1.2677100896835327, |
|
"logps": -87.76679229736328, |
|
"loss": 0.3815, |
|
"objective": 0.3834156394004822, |
|
"ranking_simple": 0.8083333373069763, |
|
"step": 1025 |
|
}, |
|
{ |
|
"dpo_loss": 0.2290908843278885, |
|
"epoch": 2.9192253188474258, |
|
"grad_norm": 3.635733503543428, |
|
"learning_rate": 9.235101625932885e-09, |
|
"logits": -1.3011952638626099, |
|
"logps": -86.35026550292969, |
|
"loss": 0.3798, |
|
"objective": 0.3711520731449127, |
|
"ranking_simple": 0.762499988079071, |
|
"step": 1030 |
|
}, |
|
{ |
|
"dpo_loss": 0.23674419522285461, |
|
"epoch": 2.9333963155408598, |
|
"grad_norm": 4.9577484155223, |
|
"learning_rate": 6.025964801714412e-09, |
|
"logits": -1.2497870922088623, |
|
"logps": -88.07068634033203, |
|
"loss": 0.3866, |
|
"objective": 0.3858194053173065, |
|
"ranking_simple": 0.8083333373069763, |
|
"step": 1035 |
|
}, |
|
{ |
|
"dpo_loss": 0.23205497860908508, |
|
"epoch": 2.947567312234294, |
|
"grad_norm": 4.605543722963177, |
|
"learning_rate": 3.4986555765434415e-09, |
|
"logits": -1.2593365907669067, |
|
"logps": -88.1611099243164, |
|
"loss": 0.3798, |
|
"objective": 0.3837084472179413, |
|
"ranking_simple": 0.7875000238418579, |
|
"step": 1040 |
|
}, |
|
{ |
|
"dpo_loss": 0.24727579951286316, |
|
"epoch": 2.961738308927728, |
|
"grad_norm": 4.997984528026836, |
|
"learning_rate": 1.6538648915270794e-09, |
|
"logits": -1.2010886669158936, |
|
"logps": -90.2540512084961, |
|
"loss": 0.3835, |
|
"objective": 0.3843297064304352, |
|
"ranking_simple": 0.7833333611488342, |
|
"step": 1045 |
|
}, |
|
{ |
|
"dpo_loss": 0.23686116933822632, |
|
"epoch": 2.975909305621162, |
|
"grad_norm": 4.330333752032827, |
|
"learning_rate": 4.920970940180958e-10, |
|
"logits": -1.2512930631637573, |
|
"logps": -85.0908203125, |
|
"loss": 0.3743, |
|
"objective": 0.3781766891479492, |
|
"ranking_simple": 0.8166666626930237, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 2.975909305621162, |
|
"eval_dpo_loss": 0.6748924255371094, |
|
"eval_logits": -1.2948576211929321, |
|
"eval_logps": -92.3352279663086, |
|
"eval_loss": 0.7416455745697021, |
|
"eval_objective": 0.7355117201805115, |
|
"eval_ranking_simple": 0.5502070188522339, |
|
"eval_runtime": 370.411, |
|
"eval_samples_per_second": 15.631, |
|
"eval_steps_per_second": 1.304, |
|
"step": 1050 |
|
}, |
|
{ |
|
"dpo_loss": 0.25150495767593384, |
|
"epoch": 2.9900803023145963, |
|
"grad_norm": 5.061308546886394, |
|
"learning_rate": 1.3669799732163314e-11, |
|
"logits": -1.2227600812911987, |
|
"logps": -85.82769012451172, |
|
"loss": 0.3854, |
|
"objective": 0.38525307178497314, |
|
"ranking_simple": 0.7833333611488342, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 2.992914501653283, |
|
"step": 1056, |
|
"total_flos": 0.0, |
|
"train_loss": 0.4763158837220434, |
|
"train_runtime": 34658.3644, |
|
"train_samples_per_second": 4.397, |
|
"train_steps_per_second": 0.03 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 1056, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": false, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|