{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.992914501653283, "eval_steps": 50, "global_step": 1056, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "dpo_loss": 0.6931471824645996, "epoch": 0.002834199338686821, "grad_norm": 22.07642892977258, "learning_rate": 4.716981132075472e-08, "logits": -1.2867579460144043, "logps": -84.34933471679688, "loss": 0.7323, "objective": 0.7293278574943542, "ranking_simple": 0.5833333134651184, "step": 1 }, { "dpo_loss": 0.6926142573356628, "epoch": 0.014170996693434105, "grad_norm": 21.921066098011842, "learning_rate": 2.358490566037736e-07, "logits": -1.4302740097045898, "logps": -83.70521545410156, "loss": 0.7347, "objective": 0.7329134345054626, "ranking_simple": 0.4895833432674408, "step": 5 }, { "dpo_loss": 0.6933749318122864, "epoch": 0.02834199338686821, "grad_norm": 23.783506033511284, "learning_rate": 4.716981132075472e-07, "logits": -1.4030728340148926, "logps": -83.88999938964844, "loss": 0.7338, "objective": 0.733830451965332, "ranking_simple": 0.5833333134651184, "step": 10 }, { "dpo_loss": 0.687871515750885, "epoch": 0.042512990080302314, "grad_norm": 22.035924243469346, "learning_rate": 7.075471698113208e-07, "logits": -1.5375704765319824, "logps": -82.89444732666016, "loss": 0.7315, "objective": 0.7324588894844055, "ranking_simple": 0.574999988079071, "step": 15 }, { "dpo_loss": 0.6827042102813721, "epoch": 0.05668398677373642, "grad_norm": 20.603492069244336, "learning_rate": 9.433962264150944e-07, "logits": -1.377241611480713, "logps": -81.56272888183594, "loss": 0.726, "objective": 0.7265715003013611, "ranking_simple": 0.48750001192092896, "step": 20 }, { "dpo_loss": 0.6807191967964172, "epoch": 0.07085498346717052, "grad_norm": 19.555842775301652, "learning_rate": 1.179245283018868e-06, "logits": -1.4146537780761719, "logps": -81.54867553710938, "loss": 0.7175, "objective": 0.7202857732772827, "ranking_simple": 0.5708333253860474, "step": 25 }, { "dpo_loss": 0.6734613180160522, "epoch": 0.08502598016060463, "grad_norm": 19.538210305976264, "learning_rate": 1.4150943396226415e-06, "logits": -1.5176372528076172, "logps": -83.47260284423828, "loss": 0.7164, "objective": 0.7164552807807922, "ranking_simple": 0.48750001192092896, "step": 30 }, { "dpo_loss": 0.669752836227417, "epoch": 0.09919697685403873, "grad_norm": 21.78747337027822, "learning_rate": 1.650943396226415e-06, "logits": -1.5162482261657715, "logps": -83.25272369384766, "loss": 0.7048, "objective": 0.70932936668396, "ranking_simple": 0.5375000238418579, "step": 35 }, { "dpo_loss": 0.6691684722900391, "epoch": 0.11336797354747284, "grad_norm": 22.101328341871838, "learning_rate": 1.8867924528301889e-06, "logits": -1.522802710533142, "logps": -81.73042297363281, "loss": 0.6976, "objective": 0.7093203663825989, "ranking_simple": 0.5041666626930237, "step": 40 }, { "dpo_loss": 0.6488091945648193, "epoch": 0.12753897024090693, "grad_norm": 18.71576103393132, "learning_rate": 2.1226415094339624e-06, "logits": -1.530007004737854, "logps": -80.75597381591797, "loss": 0.6962, "objective": 0.6907859444618225, "ranking_simple": 0.5208333134651184, "step": 45 }, { "dpo_loss": 0.6503397822380066, "epoch": 0.14170996693434104, "grad_norm": 19.644583345415946, "learning_rate": 2.358490566037736e-06, "logits": -1.5319806337356567, "logps": -83.5707015991211, "loss": 0.6826, "objective": 0.6971887946128845, "ranking_simple": 0.5166666507720947, "step": 50 }, { "epoch": 0.14170996693434104, "eval_dpo_loss": 0.6827681660652161, "eval_logits": -1.5367929935455322, "eval_logps": -92.3091812133789, "eval_loss": 0.7254260182380676, "eval_objective": 0.7272183299064636, "eval_ranking_simple": 0.5279502868652344, "eval_runtime": 368.8841, "eval_samples_per_second": 15.696, "eval_steps_per_second": 1.309, "step": 50 }, { "dpo_loss": 0.6309434771537781, "epoch": 0.15588096362777515, "grad_norm": 20.382972919722654, "learning_rate": 2.5943396226415095e-06, "logits": -1.5233420133590698, "logps": -86.92516326904297, "loss": 0.6735, "objective": 0.6735023260116577, "ranking_simple": 0.5291666388511658, "step": 55 }, { "dpo_loss": 0.6313819885253906, "epoch": 0.17005196032120926, "grad_norm": 18.13044483387993, "learning_rate": 2.830188679245283e-06, "logits": -1.4574053287506104, "logps": -87.16276550292969, "loss": 0.6756, "objective": 0.680309534072876, "ranking_simple": 0.5666666626930237, "step": 60 }, { "dpo_loss": 0.5997079610824585, "epoch": 0.18422295701464336, "grad_norm": 18.96425221102233, "learning_rate": 3.0660377358490567e-06, "logits": -1.554844617843628, "logps": -83.63428497314453, "loss": 0.6591, "objective": 0.6451537609100342, "ranking_simple": 0.6000000238418579, "step": 65 }, { "dpo_loss": 0.585650622844696, "epoch": 0.19839395370807747, "grad_norm": 17.70445379457848, "learning_rate": 3.30188679245283e-06, "logits": -1.6170697212219238, "logps": -86.24830627441406, "loss": 0.6426, "objective": 0.6370573043823242, "ranking_simple": 0.574999988079071, "step": 70 }, { "dpo_loss": 0.61463463306427, "epoch": 0.21256495040151158, "grad_norm": 21.475097381950583, "learning_rate": 3.5377358490566038e-06, "logits": -1.5737711191177368, "logps": -88.68922424316406, "loss": 0.6607, "objective": 0.6711177229881287, "ranking_simple": 0.5833333134651184, "step": 75 }, { "dpo_loss": 0.5694079995155334, "epoch": 0.22673594709494568, "grad_norm": 19.818901321925456, "learning_rate": 3.7735849056603777e-06, "logits": -1.6495484113693237, "logps": -86.88624572753906, "loss": 0.6294, "objective": 0.6182950139045715, "ranking_simple": 0.6291666626930237, "step": 80 }, { "dpo_loss": 0.5784618854522705, "epoch": 0.2409069437883798, "grad_norm": 15.843708271810604, "learning_rate": 4.009433962264152e-06, "logits": -1.6493085622787476, "logps": -86.69010162353516, "loss": 0.637, "objective": 0.6347183585166931, "ranking_simple": 0.5958333611488342, "step": 85 }, { "dpo_loss": 0.5171253085136414, "epoch": 0.25507794048181387, "grad_norm": 15.928618564308922, "learning_rate": 4.245283018867925e-06, "logits": -1.7159606218338013, "logps": -87.39143371582031, "loss": 0.624, "objective": 0.5702006816864014, "ranking_simple": 0.625, "step": 90 }, { "dpo_loss": 0.5523179173469543, "epoch": 0.269248937175248, "grad_norm": 17.73357042390042, "learning_rate": 4.481132075471699e-06, "logits": -1.6336411237716675, "logps": -92.6693344116211, "loss": 0.6238, "objective": 0.6184098720550537, "ranking_simple": 0.5874999761581421, "step": 95 }, { "dpo_loss": 0.5685542821884155, "epoch": 0.2834199338686821, "grad_norm": 19.615233623313884, "learning_rate": 4.716981132075472e-06, "logits": -1.5585684776306152, "logps": -92.34257507324219, "loss": 0.6518, "objective": 0.6242761015892029, "ranking_simple": 0.5916666388511658, "step": 100 }, { "epoch": 0.2834199338686821, "eval_dpo_loss": 0.6727089285850525, "eval_logits": -1.6149238348007202, "eval_logps": -99.43962097167969, "eval_loss": 0.7250885367393494, "eval_objective": 0.7208768725395203, "eval_ranking_simple": 0.5383023023605347, "eval_runtime": 370.4436, "eval_samples_per_second": 15.63, "eval_steps_per_second": 1.304, "step": 100 }, { "dpo_loss": 0.5984646677970886, "epoch": 0.2975909305621162, "grad_norm": 17.951005909363573, "learning_rate": 4.952830188679246e-06, "logits": -1.680674433708191, "logps": -93.13371276855469, "loss": 0.6355, "objective": 0.6607668399810791, "ranking_simple": 0.625, "step": 105 }, { "dpo_loss": 0.5449077486991882, "epoch": 0.3117619272555503, "grad_norm": 15.66083104197914, "learning_rate": 4.999781286194085e-06, "logits": -1.5491271018981934, "logps": -89.25350189208984, "loss": 0.618, "objective": 0.6060856580734253, "ranking_simple": 0.5916666388511658, "step": 110 }, { "dpo_loss": 0.5834077596664429, "epoch": 0.32593292394898443, "grad_norm": 16.011885176044043, "learning_rate": 4.998892826944418e-06, "logits": -1.4953746795654297, "logps": -86.22123718261719, "loss": 0.6324, "objective": 0.6475391387939453, "ranking_simple": 0.5916666388511658, "step": 115 }, { "dpo_loss": 0.5353394746780396, "epoch": 0.3401039206424185, "grad_norm": 15.412706668770728, "learning_rate": 4.997321195347154e-06, "logits": -1.4954169988632202, "logps": -86.4836654663086, "loss": 0.6279, "objective": 0.5993959903717041, "ranking_simple": 0.6208333373069763, "step": 120 }, { "dpo_loss": 0.5647523999214172, "epoch": 0.35427491733585265, "grad_norm": 15.94277628603713, "learning_rate": 4.9950668210706795e-06, "logits": -1.5816553831100464, "logps": -87.99699401855469, "loss": 0.6255, "objective": 0.6341190934181213, "ranking_simple": 0.5874999761581421, "step": 125 }, { "dpo_loss": 0.537562906742096, "epoch": 0.3684459140292867, "grad_norm": 13.361047111116015, "learning_rate": 4.992130320438411e-06, "logits": -1.486953854560852, "logps": -82.83094787597656, "loss": 0.626, "objective": 0.6004360318183899, "ranking_simple": 0.6291666626930237, "step": 130 }, { "dpo_loss": 0.5634739398956299, "epoch": 0.3826169107227208, "grad_norm": 14.998158738648598, "learning_rate": 4.988512496260302e-06, "logits": -1.3754876852035522, "logps": -82.95647430419922, "loss": 0.617, "objective": 0.6264640092849731, "ranking_simple": 0.5958333611488342, "step": 135 }, { "dpo_loss": 0.5888128876686096, "epoch": 0.39678790741615494, "grad_norm": 17.662529862358365, "learning_rate": 4.984214337613357e-06, "logits": -1.298575520515442, "logps": -82.97203826904297, "loss": 0.6332, "objective": 0.6609014868736267, "ranking_simple": 0.612500011920929, "step": 140 }, { "dpo_loss": 0.5268335342407227, "epoch": 0.410958904109589, "grad_norm": 15.28899400178431, "learning_rate": 4.979237019571235e-06, "logits": -1.3319251537322998, "logps": -83.20392608642578, "loss": 0.6108, "objective": 0.5984794497489929, "ranking_simple": 0.699999988079071, "step": 145 }, { "dpo_loss": 0.5154529213905334, "epoch": 0.42512990080302315, "grad_norm": 14.256755917631622, "learning_rate": 4.97358190288299e-06, "logits": -1.389101505279541, "logps": -84.82242584228516, "loss": 0.5964, "objective": 0.5867258310317993, "ranking_simple": 0.5874999761581421, "step": 150 }, { "epoch": 0.42512990080302315, "eval_dpo_loss": 0.6714372634887695, "eval_logits": -1.4408866167068481, "eval_logps": -90.21656036376953, "eval_loss": 0.7329566478729248, "eval_objective": 0.7246823310852051, "eval_ranking_simple": 0.5408902764320374, "eval_runtime": 368.6547, "eval_samples_per_second": 15.706, "eval_steps_per_second": 1.31, "step": 150 }, { "dpo_loss": 0.509684681892395, "epoch": 0.43930089749645723, "grad_norm": 15.222485168209062, "learning_rate": 4.967250533601059e-06, "logits": -1.5175641775131226, "logps": -85.33422088623047, "loss": 0.6038, "objective": 0.5891249179840088, "ranking_simple": 0.6208333373069763, "step": 155 }, { "dpo_loss": 0.5627656579017639, "epoch": 0.45347189418989137, "grad_norm": 14.37066166651516, "learning_rate": 4.9602446426585845e-06, "logits": -1.444894552230835, "logps": -85.82301330566406, "loss": 0.6235, "objective": 0.6422544121742249, "ranking_simple": 0.6208333373069763, "step": 160 }, { "dpo_loss": 0.5278021693229675, "epoch": 0.46764289088332545, "grad_norm": 13.248877681668944, "learning_rate": 4.952566145396197e-06, "logits": -1.513926386833191, "logps": -86.929443359375, "loss": 0.6091, "objective": 0.6052799820899963, "ranking_simple": 0.612500011920929, "step": 165 }, { "dpo_loss": 0.5260218977928162, "epoch": 0.4818138875767596, "grad_norm": 11.286148523723739, "learning_rate": 4.944217141038379e-06, "logits": -1.5067484378814697, "logps": -87.19461059570312, "loss": 0.5865, "objective": 0.5946651697158813, "ranking_simple": 0.5791666507720947, "step": 170 }, { "dpo_loss": 0.532828688621521, "epoch": 0.49598488427019366, "grad_norm": 13.880514840470576, "learning_rate": 4.935199912119558e-06, "logits": -1.3940012454986572, "logps": -87.2088851928711, "loss": 0.5986, "objective": 0.6150615811347961, "ranking_simple": 0.6416666507720947, "step": 175 }, { "dpo_loss": 0.5224838852882385, "epoch": 0.5101558809636277, "grad_norm": 13.177690520129488, "learning_rate": 4.925516923860083e-06, "logits": -1.4137970209121704, "logps": -87.7735366821289, "loss": 0.572, "objective": 0.609702467918396, "ranking_simple": 0.6000000238418579, "step": 180 }, { "dpo_loss": 0.485040545463562, "epoch": 0.5243268776570619, "grad_norm": 12.914253483146682, "learning_rate": 4.9151708234922605e-06, "logits": -1.4367740154266357, "logps": -85.89669036865234, "loss": 0.566, "objective": 0.5699875950813293, "ranking_simple": 0.6541666388511658, "step": 185 }, { "dpo_loss": 0.5013444423675537, "epoch": 0.538497874350496, "grad_norm": 11.870478509302453, "learning_rate": 4.904164439536626e-06, "logits": -1.4781168699264526, "logps": -84.88081359863281, "loss": 0.5937, "objective": 0.5846849083900452, "ranking_simple": 0.675000011920929, "step": 190 }, { "dpo_loss": 0.47530683875083923, "epoch": 0.5526688710439301, "grad_norm": 13.38945439253625, "learning_rate": 4.8925007810286555e-06, "logits": -1.5154672861099243, "logps": -84.33358001708984, "loss": 0.6047, "objective": 0.5678978562355042, "ranking_simple": 0.6583333611488342, "step": 195 }, { "dpo_loss": 0.49561557173728943, "epoch": 0.5668398677373642, "grad_norm": 11.74052506226232, "learning_rate": 4.880183036696123e-06, "logits": -1.5464282035827637, "logps": -85.40785217285156, "loss": 0.5794, "objective": 0.5721699595451355, "ranking_simple": 0.6333333253860474, "step": 200 }, { "epoch": 0.5668398677373642, "eval_dpo_loss": 0.6857984662055969, "eval_logits": -1.5459333658218384, "eval_logps": -90.88076782226562, "eval_loss": 0.7543493509292603, "eval_objective": 0.743674099445343, "eval_ranking_simple": 0.5377846956253052, "eval_runtime": 368.3702, "eval_samples_per_second": 15.718, "eval_steps_per_second": 1.311, "step": 200 }, { "dpo_loss": 0.49015435576438904, "epoch": 0.5810108644307983, "grad_norm": 13.14364469960144, "learning_rate": 4.867214574087338e-06, "logits": -1.4027189016342163, "logps": -88.2087631225586, "loss": 0.5739, "objective": 0.5810206532478333, "ranking_simple": 0.625, "step": 205 }, { "dpo_loss": 0.4410454034805298, "epoch": 0.5951818611242324, "grad_norm": 13.072326727623208, "learning_rate": 4.853598938650487e-06, "logits": -1.4029525518417358, "logps": -86.05158996582031, "loss": 0.5515, "objective": 0.52931809425354, "ranking_simple": 0.6708333492279053, "step": 210 }, { "dpo_loss": 0.47003600001335144, "epoch": 0.6093528578176665, "grad_norm": 12.681896576326764, "learning_rate": 4.8393398527643495e-06, "logits": -1.4985297918319702, "logps": -84.5003433227539, "loss": 0.5691, "objective": 0.5656867623329163, "ranking_simple": 0.6416666507720947, "step": 215 }, { "dpo_loss": 0.4842369854450226, "epoch": 0.6235238545111006, "grad_norm": 12.185860749706057, "learning_rate": 4.824441214720629e-06, "logits": -1.5714740753173828, "logps": -84.36730194091797, "loss": 0.5581, "objective": 0.5698367953300476, "ranking_simple": 0.6291666626930237, "step": 220 }, { "dpo_loss": 0.499014288187027, "epoch": 0.6376948512045347, "grad_norm": 11.86860643002718, "learning_rate": 4.808907097658205e-06, "logits": -1.5924923419952393, "logps": -84.66715240478516, "loss": 0.5884, "objective": 0.5895112752914429, "ranking_simple": 0.6083333492279053, "step": 225 }, { "dpo_loss": 0.45395275950431824, "epoch": 0.6518658478979689, "grad_norm": 11.069924512206331, "learning_rate": 4.7927417484495756e-06, "logits": -1.4241927862167358, "logps": -83.54296112060547, "loss": 0.5726, "objective": 0.5383394956588745, "ranking_simple": 0.6791666746139526, "step": 230 }, { "dpo_loss": 0.491834819316864, "epoch": 0.6660368445914029, "grad_norm": 10.918136207544526, "learning_rate": 4.7759495865398035e-06, "logits": -1.3152087926864624, "logps": -84.92122650146484, "loss": 0.5635, "objective": 0.575198233127594, "ranking_simple": 0.6083333492279053, "step": 235 }, { "dpo_loss": 0.4870225191116333, "epoch": 0.680207841284837, "grad_norm": 11.379260144258156, "learning_rate": 4.758535202738287e-06, "logits": -1.5095748901367188, "logps": -87.46720123291016, "loss": 0.5672, "objective": 0.5821055173873901, "ranking_simple": 0.6708333492279053, "step": 240 }, { "dpo_loss": 0.541431725025177, "epoch": 0.6943788379782712, "grad_norm": 12.918426431601308, "learning_rate": 4.740503357963676e-06, "logits": -1.5333224534988403, "logps": -86.5069808959961, "loss": 0.5804, "objective": 0.6269174218177795, "ranking_simple": 0.6666666865348816, "step": 245 }, { "dpo_loss": 0.46632593870162964, "epoch": 0.7085498346717053, "grad_norm": 11.479829992169247, "learning_rate": 4.721858981942284e-06, "logits": -1.311442494392395, "logps": -82.5728530883789, "loss": 0.5802, "objective": 0.559305727481842, "ranking_simple": 0.6791666746139526, "step": 250 }, { "epoch": 0.7085498346717053, "eval_dpo_loss": 0.6874057650566101, "eval_logits": -1.5326449871063232, "eval_logps": -86.87519073486328, "eval_loss": 0.7558908462524414, "eval_objective": 0.7459491491317749, "eval_ranking_simple": 0.5403726696968079, "eval_runtime": 368.3582, "eval_samples_per_second": 15.718, "eval_steps_per_second": 1.311, "step": 250 }, { "dpo_loss": 0.4774031341075897, "epoch": 0.7227208313651393, "grad_norm": 9.487597580618754, "learning_rate": 4.702607171860354e-06, "logits": -1.5363638401031494, "logps": -80.10908508300781, "loss": 0.5586, "objective": 0.5592390894889832, "ranking_simple": 0.6291666626930237, "step": 255 }, { "dpo_loss": 0.4512685537338257, "epoch": 0.7368918280585735, "grad_norm": 10.042345370580236, "learning_rate": 4.682753190970533e-06, "logits": -1.5129272937774658, "logps": -81.99565887451172, "loss": 0.5574, "objective": 0.5405219793319702, "ranking_simple": 0.6333333253860474, "step": 260 }, { "dpo_loss": 0.4361381232738495, "epoch": 0.7510628247520076, "grad_norm": 10.77799422518219, "learning_rate": 4.6623024671529555e-06, "logits": -1.470523715019226, "logps": -81.5511703491211, "loss": 0.5503, "objective": 0.5269137620925903, "ranking_simple": 0.7124999761581421, "step": 265 }, { "dpo_loss": 0.48189878463745117, "epoch": 0.7652338214454416, "grad_norm": 12.181902084596548, "learning_rate": 4.641260591431315e-06, "logits": -1.5885664224624634, "logps": -82.85069274902344, "loss": 0.5554, "objective": 0.5691978931427002, "ranking_simple": 0.6416666507720947, "step": 270 }, { "dpo_loss": 0.47639578580856323, "epoch": 0.7794048181388757, "grad_norm": 10.23476141708296, "learning_rate": 4.61963331644433e-06, "logits": -1.4604405164718628, "logps": -82.8023681640625, "loss": 0.5548, "objective": 0.55946284532547, "ranking_simple": 0.637499988079071, "step": 275 }, { "dpo_loss": 0.43256130814552307, "epoch": 0.7935758148323099, "grad_norm": 10.705607992682587, "learning_rate": 4.597426554873037e-06, "logits": -1.5027648210525513, "logps": -83.16519927978516, "loss": 0.5354, "objective": 0.5194212794303894, "ranking_simple": 0.6708333492279053, "step": 280 }, { "dpo_loss": 0.45488685369491577, "epoch": 0.807746811525744, "grad_norm": 11.614191556355545, "learning_rate": 4.574646377824316e-06, "logits": -1.4621371030807495, "logps": -84.6886215209961, "loss": 0.5375, "objective": 0.539017379283905, "ranking_simple": 0.6000000238418579, "step": 285 }, { "dpo_loss": 0.4303934872150421, "epoch": 0.821917808219178, "grad_norm": 11.206240309054008, "learning_rate": 4.551299013171111e-06, "logits": -1.4472154378890991, "logps": -83.35670471191406, "loss": 0.5305, "objective": 0.5253880620002747, "ranking_simple": 0.6958333253860474, "step": 290 }, { "dpo_loss": 0.43744438886642456, "epoch": 0.8360888049126122, "grad_norm": 9.074371483374543, "learning_rate": 4.5273908438498e-06, "logits": -1.506995677947998, "logps": -84.82511138916016, "loss": 0.5517, "objective": 0.5335044860839844, "ranking_simple": 0.637499988079071, "step": 295 }, { "dpo_loss": 0.4651775360107422, "epoch": 0.8502598016060463, "grad_norm": 9.01878047466114, "learning_rate": 4.502928406115152e-06, "logits": -1.4889142513275146, "logps": -84.10907745361328, "loss": 0.5473, "objective": 0.5568612217903137, "ranking_simple": 0.6666666865348816, "step": 300 }, { "epoch": 0.8502598016060463, "eval_dpo_loss": 0.6780434846878052, "eval_logits": -1.5369890928268433, "eval_logps": -92.34797668457031, "eval_loss": 0.7457044720649719, "eval_objective": 0.7388522028923035, "eval_ranking_simple": 0.5486542582511902, "eval_runtime": 369.8452, "eval_samples_per_second": 15.655, "eval_steps_per_second": 1.306, "step": 300 }, { "dpo_loss": 0.47844478487968445, "epoch": 0.8644307982994804, "grad_norm": 10.005579661781612, "learning_rate": 4.477918387753388e-06, "logits": -1.4307849407196045, "logps": -86.45662689208984, "loss": 0.5252, "objective": 0.5761564373970032, "ranking_simple": 0.7083333134651184, "step": 305 }, { "dpo_loss": 0.40813323855400085, "epoch": 0.8786017949929145, "grad_norm": 10.307837911346382, "learning_rate": 4.452367626253805e-06, "logits": -1.3825361728668213, "logps": -86.61471557617188, "loss": 0.5225, "objective": 0.5045412182807922, "ranking_simple": 0.6916666626930237, "step": 310 }, { "dpo_loss": 0.42051681876182556, "epoch": 0.8927727916863486, "grad_norm": 10.459183690361446, "learning_rate": 4.426283106939474e-06, "logits": -1.3151206970214844, "logps": -85.77119445800781, "loss": 0.5426, "objective": 0.513097882270813, "ranking_simple": 0.637499988079071, "step": 315 }, { "dpo_loss": 0.41688165068626404, "epoch": 0.9069437883797827, "grad_norm": 11.302451754279112, "learning_rate": 4.399671961057523e-06, "logits": -1.2146058082580566, "logps": -87.50178527832031, "loss": 0.5607, "objective": 0.5194131731987, "ranking_simple": 0.7166666388511658, "step": 320 }, { "dpo_loss": 0.4247673749923706, "epoch": 0.9211147850732169, "grad_norm": 9.112687814178505, "learning_rate": 4.372541463829524e-06, "logits": -1.2642977237701416, "logps": -86.85433959960938, "loss": 0.5281, "objective": 0.5294176340103149, "ranking_simple": 0.6875, "step": 325 }, { "dpo_loss": 0.3975774645805359, "epoch": 0.9352857817666509, "grad_norm": 10.380051826599395, "learning_rate": 4.3448990324625244e-06, "logits": -1.1297107934951782, "logps": -85.07998657226562, "loss": 0.5304, "objective": 0.5025947690010071, "ranking_simple": 0.7041666507720947, "step": 330 }, { "dpo_loss": 0.4059672951698303, "epoch": 0.949456778460085, "grad_norm": 9.237583761740382, "learning_rate": 4.316752224121252e-06, "logits": -1.118418574333191, "logps": -84.82205200195312, "loss": 0.5208, "objective": 0.499520480632782, "ranking_simple": 0.7041666507720947, "step": 335 }, { "dpo_loss": 0.4579353332519531, "epoch": 0.9636277751535192, "grad_norm": 9.19069599968856, "learning_rate": 4.288108733862064e-06, "logits": -1.200610876083374, "logps": -83.12140655517578, "loss": 0.5327, "objective": 0.5504066348075867, "ranking_simple": 0.6583333611488342, "step": 340 }, { "dpo_loss": 0.4034007489681244, "epoch": 0.9777987718469532, "grad_norm": 9.024982300418905, "learning_rate": 4.2589763925291924e-06, "logits": -1.2796326875686646, "logps": -81.1929702758789, "loss": 0.5202, "objective": 0.4997580945491791, "ranking_simple": 0.699999988079071, "step": 345 }, { "dpo_loss": 0.3821704089641571, "epoch": 0.9919697685403873, "grad_norm": 10.161439402885936, "learning_rate": 4.229363164613874e-06, "logits": -1.2219146490097046, "logps": -84.29117584228516, "loss": 0.5104, "objective": 0.4823973476886749, "ranking_simple": 0.7708333134651184, "step": 350 }, { "epoch": 0.9919697685403873, "eval_dpo_loss": 0.6766201257705688, "eval_logits": -1.3364328145980835, "eval_logps": -88.19395446777344, "eval_loss": 0.7516013979911804, "eval_objective": 0.7371890544891357, "eval_ranking_simple": 0.5429606437683105, "eval_runtime": 368.1018, "eval_samples_per_second": 15.729, "eval_steps_per_second": 1.312, "step": 350 }, { "dpo_loss": 0.34969767928123474, "epoch": 1.0061407652338215, "grad_norm": 6.289440966432375, "learning_rate": 4.199277146076933e-06, "logits": -1.3432971239089966, "logps": -82.10868072509766, "loss": 0.4782, "objective": 0.4609685242176056, "ranking_simple": 0.7458333373069763, "step": 355 }, { "dpo_loss": 0.33886921405792236, "epoch": 1.0203117619272555, "grad_norm": 8.403207409711419, "learning_rate": 4.168726562135432e-06, "logits": -1.2731363773345947, "logps": -83.29503631591797, "loss": 0.4406, "objective": 0.4606216251850128, "ranking_simple": 0.7208333611488342, "step": 360 }, { "dpo_loss": 0.2881692051887512, "epoch": 1.0344827586206897, "grad_norm": 6.468809468648868, "learning_rate": 4.137719765013974e-06, "logits": -1.298916220664978, "logps": -84.39533233642578, "loss": 0.4218, "objective": 0.4092879295349121, "ranking_simple": 0.7416666746139526, "step": 365 }, { "dpo_loss": 0.31373023986816406, "epoch": 1.0486537553141237, "grad_norm": 10.120074264563044, "learning_rate": 4.106265231661292e-06, "logits": -1.257562518119812, "logps": -81.22982788085938, "loss": 0.4476, "objective": 0.4375106990337372, "ranking_simple": 0.7333333492279053, "step": 370 }, { "dpo_loss": 0.3089008033275604, "epoch": 1.0628247520075578, "grad_norm": 7.557866547749679, "learning_rate": 4.074371561432731e-06, "logits": -1.3321465253829956, "logps": -83.06178283691406, "loss": 0.4494, "objective": 0.4439465403556824, "ranking_simple": 0.762499988079071, "step": 375 }, { "dpo_loss": 0.3182891309261322, "epoch": 1.076995748700992, "grad_norm": 7.153131025534419, "learning_rate": 4.042047473739278e-06, "logits": -1.2969304323196411, "logps": -84.80577087402344, "loss": 0.4362, "objective": 0.4368627667427063, "ranking_simple": 0.7833333611488342, "step": 380 }, { "dpo_loss": 0.33301401138305664, "epoch": 1.091166745394426, "grad_norm": 7.370055629692345, "learning_rate": 4.009301805663752e-06, "logits": -1.1952742338180542, "logps": -83.80043029785156, "loss": 0.4555, "objective": 0.4633476436138153, "ranking_simple": 0.7291666865348816, "step": 385 }, { "dpo_loss": 0.3350224494934082, "epoch": 1.10533774208786, "grad_norm": 6.621937340714611, "learning_rate": 3.976143509544843e-06, "logits": -1.144229531288147, "logps": -85.45323944091797, "loss": 0.4405, "objective": 0.4582778811454773, "ranking_simple": 0.762499988079071, "step": 390 }, { "dpo_loss": 0.2885909676551819, "epoch": 1.1195087387812943, "grad_norm": 7.285249021586949, "learning_rate": 3.9425816505296254e-06, "logits": -1.1913460493087769, "logps": -84.71748352050781, "loss": 0.4457, "objective": 0.40855225920677185, "ranking_simple": 0.7708333134651184, "step": 395 }, { "dpo_loss": 0.3148750960826874, "epoch": 1.1336797354747283, "grad_norm": 7.611492036237173, "learning_rate": 3.908625404095242e-06, "logits": -1.2308555841445923, "logps": -82.81310272216797, "loss": 0.4425, "objective": 0.4325387179851532, "ranking_simple": 0.7583333253860474, "step": 400 }, { "epoch": 1.1336797354747283, "eval_dpo_loss": 0.6866451501846313, "eval_logits": -1.2226030826568604, "eval_logps": -88.75952911376953, "eval_loss": 0.7568410634994507, "eval_objective": 0.7489107251167297, "eval_ranking_simple": 0.5439958572387695, "eval_runtime": 368.4645, "eval_samples_per_second": 15.714, "eval_steps_per_second": 1.311, "step": 400 }, { "dpo_loss": 0.35161828994750977, "epoch": 1.1478507321681626, "grad_norm": 7.5393253270016265, "learning_rate": 3.8742840535404155e-06, "logits": -1.1856341361999512, "logps": -84.8878173828125, "loss": 0.4461, "objective": 0.47060316801071167, "ranking_simple": 0.7166666388511658, "step": 405 }, { "dpo_loss": 0.338571161031723, "epoch": 1.1620217288615966, "grad_norm": 5.801736886678098, "learning_rate": 3.839566987447492e-06, "logits": -1.2132126092910767, "logps": -85.20359802246094, "loss": 0.4339, "objective": 0.46073082089424133, "ranking_simple": 0.7333333492279053, "step": 410 }, { "dpo_loss": 0.3313528895378113, "epoch": 1.1761927255550306, "grad_norm": 7.544361991295727, "learning_rate": 3.8044836971156935e-06, "logits": -1.1481475830078125, "logps": -84.14738464355469, "loss": 0.4504, "objective": 0.45183154940605164, "ranking_simple": 0.6833333373069763, "step": 415 }, { "dpo_loss": 0.31563931703567505, "epoch": 1.1903637222484649, "grad_norm": 6.99992341058847, "learning_rate": 3.7690437739662928e-06, "logits": -1.1214243173599243, "logps": -84.69954681396484, "loss": 0.4436, "objective": 0.43789881467819214, "ranking_simple": 0.7166666388511658, "step": 420 }, { "dpo_loss": 0.3403506278991699, "epoch": 1.204534718941899, "grad_norm": 7.423010911616055, "learning_rate": 3.7332569069204127e-06, "logits": -1.179802417755127, "logps": -83.71639251708984, "loss": 0.4526, "objective": 0.46505507826805115, "ranking_simple": 0.7250000238418579, "step": 425 }, { "dpo_loss": 0.3509117066860199, "epoch": 1.2187057156353331, "grad_norm": 7.343865460650952, "learning_rate": 3.697132879750174e-06, "logits": -1.190435528755188, "logps": -86.54014587402344, "loss": 0.4481, "objective": 0.4735199213027954, "ranking_simple": 0.6958333253860474, "step": 430 }, { "dpo_loss": 0.3211102783679962, "epoch": 1.2328767123287672, "grad_norm": 8.246553327807998, "learning_rate": 3.6606815684039098e-06, "logits": -1.1106759309768677, "logps": -84.52306365966797, "loss": 0.4591, "objective": 0.4374840557575226, "ranking_simple": 0.7791666388511658, "step": 435 }, { "dpo_loss": 0.3193610608577728, "epoch": 1.2470477090222012, "grad_norm": 8.720099662621916, "learning_rate": 3.6239129383061764e-06, "logits": -1.1405307054519653, "logps": -85.81301879882812, "loss": 0.4554, "objective": 0.44526031613349915, "ranking_simple": 0.7958333492279053, "step": 440 }, { "dpo_loss": 0.3264698088169098, "epoch": 1.2612187057156352, "grad_norm": 7.381389964368484, "learning_rate": 3.586837041633312e-06, "logits": -1.1591824293136597, "logps": -86.23896026611328, "loss": 0.4648, "objective": 0.45790377259254456, "ranking_simple": 0.7875000238418579, "step": 445 }, { "dpo_loss": 0.32597142457962036, "epoch": 1.2753897024090695, "grad_norm": 7.58156282251525, "learning_rate": 3.5494640145652647e-06, "logits": -1.3325276374816895, "logps": -85.49541473388672, "loss": 0.4544, "objective": 0.456112265586853, "ranking_simple": 0.7791666388511658, "step": 450 }, { "epoch": 1.2753897024090695, "eval_dpo_loss": 0.6750319004058838, "eval_logits": -1.3088560104370117, "eval_logps": -90.05509185791016, "eval_loss": 0.7455039620399475, "eval_objective": 0.736483633518219, "eval_ranking_simple": 0.5481366515159607, "eval_runtime": 373.8032, "eval_samples_per_second": 15.489, "eval_steps_per_second": 1.292, "step": 450 }, { "dpo_loss": 0.3323071599006653, "epoch": 1.2895606991025035, "grad_norm": 7.576847549255986, "learning_rate": 3.511804074514468e-06, "logits": -1.1815721988677979, "logps": -86.30281829833984, "loss": 0.4633, "objective": 0.45755326747894287, "ranking_simple": 0.7291666865348816, "step": 455 }, { "dpo_loss": 0.32099449634552, "epoch": 1.3037316957959377, "grad_norm": 7.0611969685720934, "learning_rate": 3.4738675173325008e-06, "logits": -1.3054893016815186, "logps": -85.12818145751953, "loss": 0.449, "objective": 0.4495506286621094, "ranking_simple": 0.7166666388511658, "step": 460 }, { "dpo_loss": 0.3063493072986603, "epoch": 1.3179026924893718, "grad_norm": 7.25813572886754, "learning_rate": 3.435664714495301e-06, "logits": -1.3341224193572998, "logps": -82.83889770507812, "loss": 0.4533, "objective": 0.4265134036540985, "ranking_simple": 0.737500011920929, "step": 465 }, { "dpo_loss": 0.3599289655685425, "epoch": 1.3320736891828058, "grad_norm": 7.324136192857674, "learning_rate": 3.397206110267713e-06, "logits": -1.409017562866211, "logps": -81.51747131347656, "loss": 0.4597, "objective": 0.47407808899879456, "ranking_simple": 0.7458333373069763, "step": 470 }, { "dpo_loss": 0.32313933968544006, "epoch": 1.34624468587624, "grad_norm": 7.632057920114339, "learning_rate": 3.3585022188481247e-06, "logits": -1.3106532096862793, "logps": -84.2204360961914, "loss": 0.4525, "objective": 0.44208064675331116, "ranking_simple": 0.7749999761581421, "step": 475 }, { "dpo_loss": 0.3283863365650177, "epoch": 1.360415682569674, "grad_norm": 8.810666497005016, "learning_rate": 3.3195636214939943e-06, "logits": -1.350165843963623, "logps": -84.08671569824219, "loss": 0.4602, "objective": 0.45167192816734314, "ranking_simple": 0.7166666388511658, "step": 480 }, { "dpo_loss": 0.3586553633213043, "epoch": 1.3745866792631083, "grad_norm": 9.1074396847746, "learning_rate": 3.2804009636290403e-06, "logits": -1.3092820644378662, "logps": -82.56153106689453, "loss": 0.4755, "objective": 0.4738306701183319, "ranking_simple": 0.737500011920929, "step": 485 }, { "dpo_loss": 0.3484760820865631, "epoch": 1.3887576759565423, "grad_norm": 8.495980198743673, "learning_rate": 3.2410249519328848e-06, "logits": -1.2295089960098267, "logps": -85.73413848876953, "loss": 0.4478, "objective": 0.46845272183418274, "ranking_simple": 0.7458333373069763, "step": 490 }, { "dpo_loss": 0.3095528483390808, "epoch": 1.4029286726499763, "grad_norm": 8.616754835251825, "learning_rate": 3.201446351413958e-06, "logits": -1.2363132238388062, "logps": -86.17890930175781, "loss": 0.4491, "objective": 0.4346270263195038, "ranking_simple": 0.7583333253860474, "step": 495 }, { "dpo_loss": 0.33423519134521484, "epoch": 1.4170996693434104, "grad_norm": 9.483021181746007, "learning_rate": 3.1616759824664543e-06, "logits": -1.1377207040786743, "logps": -84.53422546386719, "loss": 0.4624, "objective": 0.46367743611335754, "ranking_simple": 0.7458333373069763, "step": 500 }, { "epoch": 1.4170996693434104, "eval_dpo_loss": 0.67823725938797, "eval_logits": -1.2444933652877808, "eval_logps": -89.6255874633789, "eval_loss": 0.7469730973243713, "eval_objective": 0.7386783957481384, "eval_ranking_simple": 0.5533125996589661, "eval_runtime": 371.626, "eval_samples_per_second": 15.58, "eval_steps_per_second": 1.3, "step": 500 }, { "dpo_loss": 0.3274858891963959, "epoch": 1.4312706660368446, "grad_norm": 7.65492855456284, "learning_rate": 3.121724717912138e-06, "logits": -1.1723324060440063, "logps": -85.37480163574219, "loss": 0.4519, "objective": 0.45082953572273254, "ranking_simple": 0.7666666507720947, "step": 505 }, { "dpo_loss": 0.3435133397579193, "epoch": 1.4454416627302786, "grad_norm": 7.695387713966003, "learning_rate": 3.081603480027826e-06, "logits": -1.1867634057998657, "logps": -85.25582885742188, "loss": 0.4577, "objective": 0.4627363979816437, "ranking_simple": 0.7416666746139526, "step": 510 }, { "dpo_loss": 0.33665701746940613, "epoch": 1.4596126594237129, "grad_norm": 6.852553951419641, "learning_rate": 3.04132323755935e-06, "logits": -1.2323737144470215, "logps": -85.88379669189453, "loss": 0.4456, "objective": 0.4514175057411194, "ranking_simple": 0.6875, "step": 515 }, { "dpo_loss": 0.29371899366378784, "epoch": 1.473783656117147, "grad_norm": 6.810862817314035, "learning_rate": 3.0008950027228035e-06, "logits": -1.1436675786972046, "logps": -84.27217102050781, "loss": 0.4264, "objective": 0.42438414692878723, "ranking_simple": 0.7749999761581421, "step": 520 }, { "dpo_loss": 0.3257172703742981, "epoch": 1.487954652810581, "grad_norm": 8.583092428586104, "learning_rate": 2.960329828193918e-06, "logits": -1.013688325881958, "logps": -83.8447265625, "loss": 0.462, "objective": 0.45160239934921265, "ranking_simple": 0.7416666746139526, "step": 525 }, { "dpo_loss": 0.32752183079719543, "epoch": 1.5021256495040152, "grad_norm": 7.949241972971005, "learning_rate": 2.9196388040863695e-06, "logits": -1.2097468376159668, "logps": -87.19517517089844, "loss": 0.4528, "objective": 0.4474882483482361, "ranking_simple": 0.7333333492279053, "step": 530 }, { "dpo_loss": 0.2954551577568054, "epoch": 1.5162966461974492, "grad_norm": 6.728845085484624, "learning_rate": 2.8788330549198512e-06, "logits": -1.2219938039779663, "logps": -86.1001205444336, "loss": 0.4194, "objective": 0.4230054020881653, "ranking_simple": 0.7541666626930237, "step": 535 }, { "dpo_loss": 0.3660537302494049, "epoch": 1.5304676428908834, "grad_norm": 8.075282408782796, "learning_rate": 2.8379237365787426e-06, "logits": -1.1332299709320068, "logps": -86.49842834472656, "loss": 0.4601, "objective": 0.48352059721946716, "ranking_simple": 0.7666666507720947, "step": 540 }, { "dpo_loss": 0.3429103195667267, "epoch": 1.5446386395843175, "grad_norm": 8.419322188891892, "learning_rate": 2.7969220332622004e-06, "logits": -1.2047460079193115, "logps": -87.4766616821289, "loss": 0.4602, "objective": 0.4581466615200043, "ranking_simple": 0.7749999761581421, "step": 545 }, { "dpo_loss": 0.2875075042247772, "epoch": 1.5588096362777515, "grad_norm": 7.159416578046968, "learning_rate": 2.7558391544265127e-06, "logits": -1.184915542602539, "logps": -88.01736450195312, "loss": 0.4391, "objective": 0.41795456409454346, "ranking_simple": 0.762499988079071, "step": 550 }, { "epoch": 1.5588096362777515, "eval_dpo_loss": 0.6695489287376404, "eval_logits": -1.1983243227005005, "eval_logps": -91.99543762207031, "eval_loss": 0.7384570837020874, "eval_objective": 0.7304210066795349, "eval_ranking_simple": 0.5486542582511902, "eval_runtime": 368.2566, "eval_samples_per_second": 15.723, "eval_steps_per_second": 1.312, "step": 550 }, { "dpo_loss": 0.33032724261283875, "epoch": 1.5729806329711855, "grad_norm": 7.5647449089794305, "learning_rate": 2.714686331720543e-06, "logits": -0.9676509499549866, "logps": -88.15595245361328, "loss": 0.4441, "objective": 0.44403356313705444, "ranking_simple": 0.737500011920929, "step": 555 }, { "dpo_loss": 0.3146314024925232, "epoch": 1.5871516296646198, "grad_norm": 7.8310551029619555, "learning_rate": 2.6734748159151104e-06, "logits": -1.0463515520095825, "logps": -85.61664581298828, "loss": 0.4414, "objective": 0.42937740683555603, "ranking_simple": 0.7458333373069763, "step": 560 }, { "dpo_loss": 0.31714820861816406, "epoch": 1.601322626358054, "grad_norm": 7.188369880783801, "learning_rate": 2.632215873827142e-06, "logits": -1.049370527267456, "logps": -86.86510467529297, "loss": 0.4496, "objective": 0.4433501362800598, "ranking_simple": 0.7333333492279053, "step": 565 }, { "dpo_loss": 0.31138813495635986, "epoch": 1.615493623051488, "grad_norm": 7.613684765151376, "learning_rate": 2.5909207852394363e-06, "logits": -1.1900863647460938, "logps": -88.4011001586914, "loss": 0.4325, "objective": 0.4364365339279175, "ranking_simple": 0.7583333253860474, "step": 570 }, { "dpo_loss": 0.3405741751194, "epoch": 1.629664619744922, "grad_norm": 7.994470235278288, "learning_rate": 2.5496008398168844e-06, "logits": -1.1202160120010376, "logps": -85.93099975585938, "loss": 0.4543, "objective": 0.46565988659858704, "ranking_simple": 0.7666666507720947, "step": 575 }, { "dpo_loss": 0.32562312483787537, "epoch": 1.643835616438356, "grad_norm": 7.561927544761709, "learning_rate": 2.508267334019988e-06, "logits": -1.1339008808135986, "logps": -85.58094787597656, "loss": 0.4384, "objective": 0.4460238814353943, "ranking_simple": 0.7666666507720947, "step": 580 }, { "dpo_loss": 0.32099297642707825, "epoch": 1.6580066131317903, "grad_norm": 7.262395386385144, "learning_rate": 2.46693156801652e-06, "logits": -0.9802089333534241, "logps": -84.47058868408203, "loss": 0.4483, "objective": 0.43592384457588196, "ranking_simple": 0.7208333611488342, "step": 585 }, { "dpo_loss": 0.31580641865730286, "epoch": 1.6721776098252243, "grad_norm": 6.800312627956946, "learning_rate": 2.4256048425921693e-06, "logits": -1.01039457321167, "logps": -86.4159164428711, "loss": 0.444, "objective": 0.44133689999580383, "ranking_simple": 0.7541666626930237, "step": 590 }, { "dpo_loss": 0.32943814992904663, "epoch": 1.6863486065186586, "grad_norm": 6.937730325378872, "learning_rate": 2.384298456061023e-06, "logits": -0.9875913858413696, "logps": -86.71006774902344, "loss": 0.4342, "objective": 0.44936403632164, "ranking_simple": 0.75, "step": 595 }, { "dpo_loss": 0.28611433506011963, "epoch": 1.7005196032120926, "grad_norm": 7.2292134497805245, "learning_rate": 2.3430237011767166e-06, "logits": -1.0400419235229492, "logps": -87.26065063476562, "loss": 0.4285, "objective": 0.4172123074531555, "ranking_simple": 0.75, "step": 600 }, { "epoch": 1.7005196032120926, "eval_dpo_loss": 0.6726422905921936, "eval_logits": -1.1181021928787231, "eval_logps": -91.40369415283203, "eval_loss": 0.7407769560813904, "eval_objective": 0.7317408919334412, "eval_ranking_simple": 0.5502070188522339, "eval_runtime": 368.21, "eval_samples_per_second": 15.725, "eval_steps_per_second": 1.312, "step": 600 }, { "dpo_loss": 0.31679973006248474, "epoch": 1.7146905999055266, "grad_norm": 8.641164554516212, "learning_rate": 2.30179186204511e-06, "logits": -0.9793424606323242, "logps": -87.53156280517578, "loss": 0.4514, "objective": 0.439665287733078, "ranking_simple": 0.7291666865348816, "step": 605 }, { "dpo_loss": 0.31452521681785583, "epoch": 1.7288615965989607, "grad_norm": 8.523948659267386, "learning_rate": 2.2606142110393248e-06, "logits": -1.05043625831604, "logps": -86.67326354980469, "loss": 0.4321, "objective": 0.4331686198711395, "ranking_simple": 0.7833333611488342, "step": 610 }, { "dpo_loss": 0.3183232843875885, "epoch": 1.743032593292395, "grad_norm": 7.54765334989791, "learning_rate": 2.2195020057179897e-06, "logits": -1.0829071998596191, "logps": -87.68238830566406, "loss": 0.4403, "objective": 0.4391016960144043, "ranking_simple": 0.7166666388511658, "step": 615 }, { "dpo_loss": 0.31284984946250916, "epoch": 1.7572035899858292, "grad_norm": 8.306713660930198, "learning_rate": 2.1784664857475356e-06, "logits": -1.133055567741394, "logps": -88.2243881225586, "loss": 0.4481, "objective": 0.4363309144973755, "ranking_simple": 0.7583333253860474, "step": 620 }, { "dpo_loss": 0.3389653265476227, "epoch": 1.7713745866792632, "grad_norm": 8.27642622610812, "learning_rate": 2.1375188698293855e-06, "logits": -1.0864481925964355, "logps": -87.53520202636719, "loss": 0.4464, "objective": 0.4615752100944519, "ranking_simple": 0.7458333373069763, "step": 625 }, { "dpo_loss": 0.3449970781803131, "epoch": 1.7855455833726972, "grad_norm": 7.582885342390072, "learning_rate": 2.096670352632873e-06, "logits": -1.17921781539917, "logps": -88.3585205078125, "loss": 0.4521, "objective": 0.4663265645503998, "ranking_simple": 0.7124999761581421, "step": 630 }, { "dpo_loss": 0.2966757118701935, "epoch": 1.7997165800661312, "grad_norm": 7.391390201817734, "learning_rate": 2.0559321017347286e-06, "logits": -1.3011022806167603, "logps": -87.97920989990234, "loss": 0.4423, "objective": 0.42836812138557434, "ranking_simple": 0.7416666746139526, "step": 635 }, { "dpo_loss": 0.2997787892818451, "epoch": 1.8138875767595655, "grad_norm": 7.910229224489209, "learning_rate": 2.01531525456598e-06, "logits": -1.2456448078155518, "logps": -88.25955963134766, "loss": 0.4321, "objective": 0.43085435032844543, "ranking_simple": 0.7291666865348816, "step": 640 }, { "dpo_loss": 0.3190990388393402, "epoch": 1.8280585734529995, "grad_norm": 7.336498748426566, "learning_rate": 1.974830915367086e-06, "logits": -1.2663050889968872, "logps": -88.02909851074219, "loss": 0.448, "objective": 0.44558364152908325, "ranking_simple": 0.7416666746139526, "step": 645 }, { "dpo_loss": 0.31604960560798645, "epoch": 1.8422295701464337, "grad_norm": 8.01809767773599, "learning_rate": 1.93449015215215e-06, "logits": -1.3034510612487793, "logps": -87.49658966064453, "loss": 0.4553, "objective": 0.4388020634651184, "ranking_simple": 0.7583333253860474, "step": 650 }, { "epoch": 1.8422295701464337, "eval_dpo_loss": 0.6739967465400696, "eval_logits": -1.272489070892334, "eval_logps": -90.41595458984375, "eval_loss": 0.7425940036773682, "eval_objective": 0.7334864139556885, "eval_ranking_simple": 0.5559006333351135, "eval_runtime": 368.2584, "eval_samples_per_second": 15.723, "eval_steps_per_second": 1.312, "step": 650 }, { "dpo_loss": 0.3332468867301941, "epoch": 1.8564005668398678, "grad_norm": 6.515064472181348, "learning_rate": 1.8943039936830347e-06, "logits": -1.153478980064392, "logps": -83.99712371826172, "loss": 0.4429, "objective": 0.441272109746933, "ranking_simple": 0.7416666746139526, "step": 655 }, { "dpo_loss": 0.3361124098300934, "epoch": 1.8705715635333018, "grad_norm": 7.186294756150463, "learning_rate": 1.8542834264542091e-06, "logits": -1.1785982847213745, "logps": -87.21165466308594, "loss": 0.4408, "objective": 0.4524908661842346, "ranking_simple": 0.737500011920929, "step": 660 }, { "dpo_loss": 0.30988797545433044, "epoch": 1.8847425602267358, "grad_norm": 7.223583667691921, "learning_rate": 1.814439391689151e-06, "logits": -1.1908276081085205, "logps": -85.18921661376953, "loss": 0.4392, "objective": 0.4265805184841156, "ranking_simple": 0.7875000238418579, "step": 665 }, { "dpo_loss": 0.3152609169483185, "epoch": 1.89891355692017, "grad_norm": 7.236738362974331, "learning_rate": 1.7747827823491253e-06, "logits": -1.1136568784713745, "logps": -85.27536010742188, "loss": 0.437, "objective": 0.43872639536857605, "ranking_simple": 0.7166666388511658, "step": 670 }, { "dpo_loss": 0.3269253373146057, "epoch": 1.9130845536136043, "grad_norm": 7.561018148695705, "learning_rate": 1.7353244401551566e-06, "logits": -1.1110624074935913, "logps": -86.15958404541016, "loss": 0.4438, "objective": 0.45002037286758423, "ranking_simple": 0.7916666865348816, "step": 675 }, { "dpo_loss": 0.32910048961639404, "epoch": 1.9272555503070383, "grad_norm": 7.152885290887085, "learning_rate": 1.6960751526240122e-06, "logits": -1.1628243923187256, "logps": -88.80421447753906, "loss": 0.4433, "objective": 0.4564170241355896, "ranking_simple": 0.8041666746139526, "step": 680 }, { "dpo_loss": 0.2932046949863434, "epoch": 1.9414265470004723, "grad_norm": 6.233579110675445, "learning_rate": 1.6570456501189996e-06, "logits": -1.1053887605667114, "logps": -87.36367797851562, "loss": 0.4164, "objective": 0.424954891204834, "ranking_simple": 0.699999988079071, "step": 685 }, { "dpo_loss": 0.3040808141231537, "epoch": 1.9555975436939064, "grad_norm": 7.4587976233319795, "learning_rate": 1.6182466029163974e-06, "logits": -1.1335686445236206, "logps": -87.29252624511719, "loss": 0.439, "objective": 0.4274420142173767, "ranking_simple": 0.75, "step": 690 }, { "dpo_loss": 0.33969834446907043, "epoch": 1.9697685403873406, "grad_norm": 8.387180426204326, "learning_rate": 1.5796886182883053e-06, "logits": -1.048902988433838, "logps": -87.7245101928711, "loss": 0.4375, "objective": 0.44796222448349, "ranking_simple": 0.75, "step": 695 }, { "dpo_loss": 0.3006362318992615, "epoch": 1.9839395370807746, "grad_norm": 7.023996943402399, "learning_rate": 1.541382237602721e-06, "logits": -1.0404043197631836, "logps": -88.86479187011719, "loss": 0.4307, "objective": 0.4227179288864136, "ranking_simple": 0.7416666746139526, "step": 700 }, { "epoch": 1.9839395370807746, "eval_dpo_loss": 0.6734952926635742, "eval_logits": -1.2351235151290894, "eval_logps": -91.78550720214844, "eval_loss": 0.740387499332428, "eval_objective": 0.7341719269752502, "eval_ranking_simple": 0.5584886074066162, "eval_runtime": 367.9084, "eval_samples_per_second": 15.738, "eval_steps_per_second": 1.313, "step": 700 }, { "dpo_loss": 0.3133445382118225, "epoch": 1.9981105337742089, "grad_norm": 7.983253403779177, "learning_rate": 1.5033379334416376e-06, "logits": -0.9815789461135864, "logps": -87.82261657714844, "loss": 0.448, "objective": 0.4431394636631012, "ranking_simple": 0.7208333611488342, "step": 705 }, { "dpo_loss": 0.26884225010871887, "epoch": 2.012281530467643, "grad_norm": 4.711433926223709, "learning_rate": 1.465566106737942e-06, "logits": -1.132247805595398, "logps": -86.83318328857422, "loss": 0.3883, "objective": 0.4008704125881195, "ranking_simple": 0.800000011920929, "step": 710 }, { "dpo_loss": 0.24094882607460022, "epoch": 2.026452527161077, "grad_norm": 4.45009467250422, "learning_rate": 1.4280770839319073e-06, "logits": -1.1357406377792358, "logps": -85.59795379638672, "loss": 0.3752, "objective": 0.37576305866241455, "ranking_simple": 0.8333333134651184, "step": 715 }, { "dpo_loss": 0.22363178431987762, "epoch": 2.040623523854511, "grad_norm": 4.462329734353584, "learning_rate": 1.3908811141480408e-06, "logits": -1.1281845569610596, "logps": -86.89059448242188, "loss": 0.371, "objective": 0.3674115538597107, "ranking_simple": 0.8166666626930237, "step": 720 }, { "dpo_loss": 0.22502318024635315, "epoch": 2.0547945205479454, "grad_norm": 4.243136918683414, "learning_rate": 1.353988366393083e-06, "logits": -1.1830660104751587, "logps": -89.62688446044922, "loss": 0.3719, "objective": 0.37353357672691345, "ranking_simple": 0.7958333492279053, "step": 725 }, { "dpo_loss": 0.24978047609329224, "epoch": 2.0689655172413794, "grad_norm": 4.567188933527537, "learning_rate": 1.3174089267758983e-06, "logits": -1.2705327272415161, "logps": -87.48053741455078, "loss": 0.3752, "objective": 0.3849594295024872, "ranking_simple": 0.7833333611488342, "step": 730 }, { "dpo_loss": 0.25259262323379517, "epoch": 2.0831365139348135, "grad_norm": 5.151937950415065, "learning_rate": 1.2811527957500344e-06, "logits": -1.1913000345230103, "logps": -87.4656753540039, "loss": 0.3812, "objective": 0.3858603835105896, "ranking_simple": 0.762499988079071, "step": 735 }, { "dpo_loss": 0.2209930121898651, "epoch": 2.0973075106282475, "grad_norm": 4.254397078002712, "learning_rate": 1.245229885379699e-06, "logits": -1.2872763872146606, "logps": -86.62919616699219, "loss": 0.3727, "objective": 0.3619976043701172, "ranking_simple": 0.8458333611488342, "step": 740 }, { "dpo_loss": 0.2264253944158554, "epoch": 2.1114785073216815, "grad_norm": 4.542965704767376, "learning_rate": 1.2096500166298992e-06, "logits": -1.2000243663787842, "logps": -88.8626937866211, "loss": 0.3762, "objective": 0.3712898790836334, "ranking_simple": 0.8125, "step": 745 }, { "dpo_loss": 0.23141829669475555, "epoch": 2.1256495040151155, "grad_norm": 4.288133779032122, "learning_rate": 1.1744229166814889e-06, "logits": -1.2524789571762085, "logps": -87.17241668701172, "loss": 0.3755, "objective": 0.37335434556007385, "ranking_simple": 0.7791666388511658, "step": 750 }, { "epoch": 2.1256495040151155, "eval_dpo_loss": 0.6762245893478394, "eval_logits": -1.3013161420822144, "eval_logps": -93.2393569946289, "eval_loss": 0.7430183291435242, "eval_objective": 0.736893355846405, "eval_ranking_simple": 0.5486542582511902, "eval_runtime": 368.204, "eval_samples_per_second": 15.725, "eval_steps_per_second": 1.312, "step": 750 }, { "dpo_loss": 0.2273796647787094, "epoch": 2.13982050070855, "grad_norm": 4.5000688322821345, "learning_rate": 1.1395582162718524e-06, "logits": -1.2545628547668457, "logps": -90.6506576538086, "loss": 0.3758, "objective": 0.3768024444580078, "ranking_simple": 0.8083333373069763, "step": 755 }, { "dpo_loss": 0.2525111138820648, "epoch": 2.153991497401984, "grad_norm": 5.011749929420246, "learning_rate": 1.1050654470619602e-06, "logits": -1.1907628774642944, "logps": -85.97274017333984, "loss": 0.3783, "objective": 0.39089202880859375, "ranking_simple": 0.7875000238418579, "step": 760 }, { "dpo_loss": 0.23322957754135132, "epoch": 2.168162494095418, "grad_norm": 5.201612157262786, "learning_rate": 1.0709540390305061e-06, "logits": -1.2566605806350708, "logps": -87.6942367553711, "loss": 0.3849, "objective": 0.3768391013145447, "ranking_simple": 0.8125, "step": 765 }, { "dpo_loss": 0.22707949578762054, "epoch": 2.182333490788852, "grad_norm": 4.70373130482108, "learning_rate": 1.0372333178958462e-06, "logits": -1.2721644639968872, "logps": -88.22640991210938, "loss": 0.377, "objective": 0.3728346526622772, "ranking_simple": 0.7875000238418579, "step": 770 }, { "dpo_loss": 0.22152571380138397, "epoch": 2.196504487482286, "grad_norm": 5.587786749861536, "learning_rate": 1.0039125025664392e-06, "logits": -1.2534898519515991, "logps": -89.17953491210938, "loss": 0.3725, "objective": 0.3679354786872864, "ranking_simple": 0.8208333253860474, "step": 775 }, { "dpo_loss": 0.250379741191864, "epoch": 2.21067548417572, "grad_norm": 5.002867020526872, "learning_rate": 9.710007026204896e-07, "logits": -1.1392863988876343, "logps": -89.51647186279297, "loss": 0.3774, "objective": 0.39055460691452026, "ranking_simple": 0.737500011920929, "step": 780 }, { "dpo_loss": 0.22285352647304535, "epoch": 2.2248464808691546, "grad_norm": 5.843250372899321, "learning_rate": 9.385069158154805e-07, "logits": -1.1372294425964355, "logps": -87.82280731201172, "loss": 0.3756, "objective": 0.37054911255836487, "ranking_simple": 0.7916666865348816, "step": 785 }, { "dpo_loss": 0.2125234156847, "epoch": 2.2390174775625886, "grad_norm": 4.914317874167112, "learning_rate": 9.064400256282757e-07, "logits": -1.1291053295135498, "logps": -88.7917709350586, "loss": 0.3784, "objective": 0.36886271834373474, "ranking_simple": 0.8500000238418579, "step": 790 }, { "dpo_loss": 0.24073070287704468, "epoch": 2.2531884742560226, "grad_norm": 4.77090719779272, "learning_rate": 8.74808798826467e-07, "logits": -1.216113805770874, "logps": -89.64153289794922, "loss": 0.3768, "objective": 0.37714874744415283, "ranking_simple": 0.824999988079071, "step": 795 }, { "dpo_loss": 0.23036065697669983, "epoch": 2.2673594709494567, "grad_norm": 4.57839809888007, "learning_rate": 8.436218830716259e-07, "logits": -1.2197948694229126, "logps": -90.15279388427734, "loss": 0.3794, "objective": 0.3776319622993469, "ranking_simple": 0.8333333134651184, "step": 800 }, { "epoch": 2.2673594709494567, "eval_dpo_loss": 0.6726187467575073, "eval_logits": -1.2647498846054077, "eval_logps": -93.31334686279297, "eval_loss": 0.7400006651878357, "eval_objective": 0.7334582805633545, "eval_ranking_simple": 0.554347813129425, "eval_runtime": 370.3208, "eval_samples_per_second": 15.635, "eval_steps_per_second": 1.304, "step": 800 }, { "dpo_loss": 0.24638353288173676, "epoch": 2.2815304676428907, "grad_norm": 4.775498293092394, "learning_rate": 8.1288780455512e-07, "logits": -1.2070590257644653, "logps": -88.29240417480469, "loss": 0.38, "objective": 0.3882957696914673, "ranking_simple": 0.7916666865348816, "step": 805 }, { "dpo_loss": 0.26376059651374817, "epoch": 2.295701464336325, "grad_norm": 4.107962497004369, "learning_rate": 7.826149656671386e-07, "logits": -1.3075222969055176, "logps": -87.07465362548828, "loss": 0.3775, "objective": 0.38518133759498596, "ranking_simple": 0.8208333253860474, "step": 810 }, { "dpo_loss": 0.23214052617549896, "epoch": 2.309872461029759, "grad_norm": 4.032286949010579, "learning_rate": 7.528116426995605e-07, "logits": -1.2393053770065308, "logps": -89.05374145507812, "loss": 0.3692, "objective": 0.3722856342792511, "ranking_simple": 0.7833333611488342, "step": 815 }, { "dpo_loss": 0.24578505754470825, "epoch": 2.324043457723193, "grad_norm": 4.0007768416623755, "learning_rate": 7.234859835833022e-07, "logits": -1.1707955598831177, "logps": -88.1304702758789, "loss": 0.3733, "objective": 0.377387672662735, "ranking_simple": 0.762499988079071, "step": 820 }, { "dpo_loss": 0.23438973724842072, "epoch": 2.3382144544166272, "grad_norm": 4.645717749878006, "learning_rate": 6.94646005660749e-07, "logits": -1.197273850440979, "logps": -87.03529357910156, "loss": 0.3747, "objective": 0.3757517635822296, "ranking_simple": 0.8083333373069763, "step": 825 }, { "dpo_loss": 0.22819119691848755, "epoch": 2.3523854511100613, "grad_norm": 5.87351004355499, "learning_rate": 6.662995934939007e-07, "logits": -1.2045276165008545, "logps": -89.30978393554688, "loss": 0.3712, "objective": 0.3776562809944153, "ranking_simple": 0.8125, "step": 830 }, { "dpo_loss": 0.24645249545574188, "epoch": 2.3665564478034957, "grad_norm": 5.364960343792073, "learning_rate": 6.384544967088063e-07, "logits": -1.177357792854309, "logps": -88.5252914428711, "loss": 0.3869, "objective": 0.38127484917640686, "ranking_simple": 0.8208333253860474, "step": 835 }, { "dpo_loss": 0.24418006837368011, "epoch": 2.3807274444969297, "grad_norm": 5.224294646577439, "learning_rate": 6.111183278768956e-07, "logits": -1.1215898990631104, "logps": -88.96337890625, "loss": 0.3837, "objective": 0.38468509912490845, "ranking_simple": 0.7583333253860474, "step": 840 }, { "dpo_loss": 0.2479609102010727, "epoch": 2.3948984411903638, "grad_norm": 5.589943019855736, "learning_rate": 5.842985604337769e-07, "logits": -1.1818034648895264, "logps": -90.55766296386719, "loss": 0.3851, "objective": 0.3813749849796295, "ranking_simple": 0.8041666746139526, "step": 845 }, { "dpo_loss": 0.22911126911640167, "epoch": 2.409069437883798, "grad_norm": 4.122848222733345, "learning_rate": 5.580025266360764e-07, "logits": -1.2163876295089722, "logps": -88.67691040039062, "loss": 0.373, "objective": 0.37054920196533203, "ranking_simple": 0.7749999761581421, "step": 850 }, { "epoch": 2.409069437883798, "eval_dpo_loss": 0.6747334599494934, "eval_logits": -1.2593425512313843, "eval_logps": -92.93883514404297, "eval_loss": 0.74095219373703, "eval_objective": 0.7354456186294556, "eval_ranking_simple": 0.5522774457931519, "eval_runtime": 376.5034, "eval_samples_per_second": 15.378, "eval_steps_per_second": 1.283, "step": 850 }, { "dpo_loss": 0.24901343882083893, "epoch": 2.423240434577232, "grad_norm": 5.545917724699393, "learning_rate": 5.322374155568688e-07, "logits": -1.2266477346420288, "logps": -87.11955261230469, "loss": 0.3932, "objective": 0.3926084637641907, "ranking_simple": 0.800000011920929, "step": 855 }, { "dpo_loss": 0.24267633259296417, "epoch": 2.4374114312706663, "grad_norm": 4.734435718269958, "learning_rate": 5.070102711202606e-07, "logits": -1.1731507778167725, "logps": -89.50730895996094, "loss": 0.3789, "objective": 0.38105666637420654, "ranking_simple": 0.7916666865348816, "step": 860 }, { "dpo_loss": 0.23941467702388763, "epoch": 2.4515824279641003, "grad_norm": 5.519011513783408, "learning_rate": 4.823279901756498e-07, "logits": -1.204505443572998, "logps": -88.21556854248047, "loss": 0.3821, "objective": 0.38131964206695557, "ranking_simple": 0.762499988079071, "step": 865 }, { "dpo_loss": 0.2390858381986618, "epoch": 2.4657534246575343, "grad_norm": 4.890994580870049, "learning_rate": 4.581973206121948e-07, "logits": -1.2685866355895996, "logps": -88.25513458251953, "loss": 0.381, "objective": 0.3830280900001526, "ranking_simple": 0.7791666388511658, "step": 870 }, { "dpo_loss": 0.2303398698568344, "epoch": 2.4799244213509684, "grad_norm": 5.062132747484294, "learning_rate": 4.3462485951401126e-07, "logits": -1.204114556312561, "logps": -88.9038314819336, "loss": 0.3766, "objective": 0.38060086965560913, "ranking_simple": 0.8208333253860474, "step": 875 }, { "dpo_loss": 0.2502264678478241, "epoch": 2.4940954180444024, "grad_norm": 5.573699785814543, "learning_rate": 4.116170513565942e-07, "logits": -1.1215044260025024, "logps": -87.70077514648438, "loss": 0.3832, "objective": 0.3906749486923218, "ranking_simple": 0.7458333373069763, "step": 880 }, { "dpo_loss": 0.2577410042285919, "epoch": 2.5082664147378364, "grad_norm": 4.956979588584702, "learning_rate": 3.891801862449629e-07, "logits": -1.3180614709854126, "logps": -86.54364776611328, "loss": 0.3894, "objective": 0.39261943101882935, "ranking_simple": 0.800000011920929, "step": 885 }, { "dpo_loss": 0.21435794234275818, "epoch": 2.5224374114312704, "grad_norm": 5.7378961390152385, "learning_rate": 3.6732039819400686e-07, "logits": -1.2147475481033325, "logps": -85.97087097167969, "loss": 0.3823, "objective": 0.3677336275577545, "ranking_simple": 0.8291666507720947, "step": 890 }, { "dpo_loss": 0.24500012397766113, "epoch": 2.536608408124705, "grad_norm": 5.350118064676714, "learning_rate": 3.46043663451511e-07, "logits": -1.2147990465164185, "logps": -88.52668762207031, "loss": 0.3803, "objective": 0.3881225883960724, "ranking_simple": 0.762499988079071, "step": 895 }, { "dpo_loss": 0.27485665678977966, "epoch": 2.550779404818139, "grad_norm": 5.3127285866105405, "learning_rate": 3.253557988643072e-07, "logits": -1.1600372791290283, "logps": -86.77176666259766, "loss": 0.388, "objective": 0.40204280614852905, "ranking_simple": 0.7875000238418579, "step": 900 }, { "epoch": 2.550779404818139, "eval_dpo_loss": 0.6756879687309265, "eval_logits": -1.2939373254776, "eval_logps": -92.89238739013672, "eval_loss": 0.7418231964111328, "eval_objective": 0.7363364696502686, "eval_ranking_simple": 0.5502070188522339, "eval_runtime": 369.0318, "eval_samples_per_second": 15.69, "eval_steps_per_second": 1.309, "step": 900 }, { "dpo_loss": 0.23769782483577728, "epoch": 2.564950401511573, "grad_norm": 4.493878506991506, "learning_rate": 3.052624602880064e-07, "logits": -1.2237998247146606, "logps": -85.79032135009766, "loss": 0.3781, "objective": 0.3822983205318451, "ranking_simple": 0.7875000238418579, "step": 905 }, { "dpo_loss": 0.23715509474277496, "epoch": 2.579121398205007, "grad_norm": 5.776134875432629, "learning_rate": 2.8576914104074425e-07, "logits": -1.2153693437576294, "logps": -89.9877700805664, "loss": 0.38, "objective": 0.3822348713874817, "ranking_simple": 0.7833333611488342, "step": 910 }, { "dpo_loss": 0.241657093167305, "epoch": 2.593292394898441, "grad_norm": 3.828702735136657, "learning_rate": 2.6688117040136463e-07, "logits": -1.3503960371017456, "logps": -88.94493865966797, "loss": 0.3732, "objective": 0.3829546570777893, "ranking_simple": 0.8083333373069763, "step": 915 }, { "dpo_loss": 0.2292027622461319, "epoch": 2.6074633915918755, "grad_norm": 4.721702099688327, "learning_rate": 2.486037121524448e-07, "logits": -1.1647106409072876, "logps": -88.75472259521484, "loss": 0.3765, "objective": 0.3745788335800171, "ranking_simple": 0.8125, "step": 920 }, { "dpo_loss": 0.22531941533088684, "epoch": 2.6216343882853095, "grad_norm": 4.89488363414913, "learning_rate": 2.3094176316856982e-07, "logits": -1.1882636547088623, "logps": -87.45687866210938, "loss": 0.3799, "objective": 0.373484343290329, "ranking_simple": 0.800000011920929, "step": 925 }, { "dpo_loss": 0.2518150508403778, "epoch": 2.6358053849787435, "grad_norm": 5.096542868734766, "learning_rate": 2.13900152050239e-07, "logits": -1.1838154792785645, "logps": -87.14102935791016, "loss": 0.3831, "objective": 0.39408108592033386, "ranking_simple": 0.7416666746139526, "step": 930 }, { "dpo_loss": 0.2513907849788666, "epoch": 2.6499763816721775, "grad_norm": 5.6389448500299935, "learning_rate": 1.9748353780377234e-07, "logits": -1.1840038299560547, "logps": -86.4458999633789, "loss": 0.3954, "objective": 0.3891184628009796, "ranking_simple": 0.8291666507720947, "step": 935 }, { "dpo_loss": 0.22740763425827026, "epoch": 2.6641473783656116, "grad_norm": 4.892959475477896, "learning_rate": 1.8169640856758652e-07, "logits": -1.2362346649169922, "logps": -89.93408203125, "loss": 0.3789, "objective": 0.3699011206626892, "ranking_simple": 0.8041666746139526, "step": 940 }, { "dpo_loss": 0.2212921679019928, "epoch": 2.678318375059046, "grad_norm": 4.26351950100694, "learning_rate": 1.6654308038518057e-07, "logits": -1.2807658910751343, "logps": -88.23102569580078, "loss": 0.3726, "objective": 0.36465439200401306, "ranking_simple": 0.800000011920929, "step": 945 }, { "dpo_loss": 0.24647028744220734, "epoch": 2.69248937175248, "grad_norm": 5.222304477193562, "learning_rate": 1.5202769602517514e-07, "logits": -1.1635686159133911, "logps": -86.58324432373047, "loss": 0.3866, "objective": 0.3892979621887207, "ranking_simple": 0.7958333492279053, "step": 950 }, { "epoch": 2.69248937175248, "eval_dpo_loss": 0.6751933097839355, "eval_logits": -1.2937095165252686, "eval_logps": -92.32901763916016, "eval_loss": 0.7418193221092224, "eval_objective": 0.7358114719390869, "eval_ranking_simple": 0.5507246255874634, "eval_runtime": 369.9174, "eval_samples_per_second": 15.652, "eval_steps_per_second": 1.306, "step": 950 }, { "dpo_loss": 0.2517485022544861, "epoch": 2.706660368445914, "grad_norm": 4.388748582328918, "learning_rate": 1.381542238487188e-07, "logits": -1.243085265159607, "logps": -88.0900650024414, "loss": 0.3837, "objective": 0.38995641469955444, "ranking_simple": 0.8041666746139526, "step": 955 }, { "dpo_loss": 0.25272881984710693, "epoch": 2.720831365139348, "grad_norm": 5.315737788533154, "learning_rate": 1.2492645672457838e-07, "logits": -1.2224252223968506, "logps": -87.57622528076172, "loss": 0.3841, "objective": 0.39599618315696716, "ranking_simple": 0.7958333492279053, "step": 960 }, { "dpo_loss": 0.22980408370494843, "epoch": 2.735002361832782, "grad_norm": 5.693843860804482, "learning_rate": 1.1234801099220787e-07, "logits": -1.2591173648834229, "logps": -87.08290100097656, "loss": 0.3861, "objective": 0.3821989893913269, "ranking_simple": 0.8500000238418579, "step": 965 }, { "dpo_loss": 0.26472774147987366, "epoch": 2.7491733585262166, "grad_norm": 4.654019056163642, "learning_rate": 1.004223254730749e-07, "logits": -1.2617594003677368, "logps": -88.49696350097656, "loss": 0.3797, "objective": 0.39959490299224854, "ranking_simple": 0.8458333611488342, "step": 970 }, { "dpo_loss": 0.22332945466041565, "epoch": 2.7633443552196506, "grad_norm": 5.199760236696295, "learning_rate": 8.915266053052374e-08, "logits": -1.1656602621078491, "logps": -86.72117614746094, "loss": 0.3766, "objective": 0.36580052971839905, "ranking_simple": 0.8083333373069763, "step": 975 }, { "dpo_loss": 0.22700704634189606, "epoch": 2.7775153519130846, "grad_norm": 4.6165629384250435, "learning_rate": 7.854209717842231e-08, "logits": -1.3093292713165283, "logps": -87.9115982055664, "loss": 0.3728, "objective": 0.37029343843460083, "ranking_simple": 0.8291666507720947, "step": 980 }, { "dpo_loss": 0.23392988741397858, "epoch": 2.7916863486065187, "grad_norm": 4.595783043627286, "learning_rate": 6.859353623884569e-08, "logits": -1.1338623762130737, "logps": -84.75395202636719, "loss": 0.3729, "objective": 0.3690672218799591, "ranking_simple": 0.8083333373069763, "step": 985 }, { "dpo_loss": 0.24494783580303192, "epoch": 2.8058573452999527, "grad_norm": 4.754488601809905, "learning_rate": 5.930969754901844e-08, "logits": -1.1965256929397583, "logps": -88.29127502441406, "loss": 0.3801, "objective": 0.38465526700019836, "ranking_simple": 0.7749999761581421, "step": 990 }, { "dpo_loss": 0.24409013986587524, "epoch": 2.820028341993387, "grad_norm": 5.4353357294596245, "learning_rate": 5.069311921774039e-08, "logits": -1.2917786836624146, "logps": -88.09101867675781, "loss": 0.3817, "objective": 0.3790687918663025, "ranking_simple": 0.7916666865348816, "step": 995 }, { "dpo_loss": 0.24059538543224335, "epoch": 2.8341993386868207, "grad_norm": 4.725884973016709, "learning_rate": 4.2746156931490756e-08, "logits": -1.2047442197799683, "logps": -85.76768493652344, "loss": 0.3828, "objective": 0.3797774612903595, "ranking_simple": 0.8291666507720947, "step": 1000 }, { "epoch": 2.8341993386868207, "eval_dpo_loss": 0.6749303340911865, "eval_logits": -1.2945847511291504, "eval_logps": -92.32601928710938, "eval_loss": 0.7417021989822388, "eval_objective": 0.7355546355247498, "eval_ranking_simple": 0.5502070188522339, "eval_runtime": 369.447, "eval_samples_per_second": 15.672, "eval_steps_per_second": 1.307, "step": 1000 }, { "dpo_loss": 0.23444737493991852, "epoch": 2.848370335380255, "grad_norm": 5.091471401481779, "learning_rate": 3.547098331040916e-08, "logits": -1.2024807929992676, "logps": -86.74796295166016, "loss": 0.382, "objective": 0.3817732632160187, "ranking_simple": 0.8041666746139526, "step": 1005 }, { "dpo_loss": 0.2292069047689438, "epoch": 2.862541332073689, "grad_norm": 4.7403703778744815, "learning_rate": 2.8869587314321324e-08, "logits": -1.2810461521148682, "logps": -87.0550308227539, "loss": 0.379, "objective": 0.37782254815101624, "ranking_simple": 0.7791666388511658, "step": 1010 }, { "dpo_loss": 0.2399337738752365, "epoch": 2.8767123287671232, "grad_norm": 5.329975170501087, "learning_rate": 2.2943773698977935e-08, "logits": -1.197946548461914, "logps": -85.78963470458984, "loss": 0.3813, "objective": 0.3823747932910919, "ranking_simple": 0.7916666865348816, "step": 1015 }, { "dpo_loss": 0.23225615918636322, "epoch": 2.8908833254605573, "grad_norm": 4.141247666962683, "learning_rate": 1.7695162522652352e-08, "logits": -1.1521192789077759, "logps": -87.80615234375, "loss": 0.383, "objective": 0.3741118311882019, "ranking_simple": 0.75, "step": 1020 }, { "dpo_loss": 0.24901318550109863, "epoch": 2.9050543221539913, "grad_norm": 4.809794853364304, "learning_rate": 1.3125188703233815e-08, "logits": -1.2677100896835327, "logps": -87.76679229736328, "loss": 0.3815, "objective": 0.3834156394004822, "ranking_simple": 0.8083333373069763, "step": 1025 }, { "dpo_loss": 0.2290908843278885, "epoch": 2.9192253188474258, "grad_norm": 3.635733503543428, "learning_rate": 9.235101625932885e-09, "logits": -1.3011952638626099, "logps": -86.35026550292969, "loss": 0.3798, "objective": 0.3711520731449127, "ranking_simple": 0.762499988079071, "step": 1030 }, { "dpo_loss": 0.23674419522285461, "epoch": 2.9333963155408598, "grad_norm": 4.9577484155223, "learning_rate": 6.025964801714412e-09, "logits": -1.2497870922088623, "logps": -88.07068634033203, "loss": 0.3866, "objective": 0.3858194053173065, "ranking_simple": 0.8083333373069763, "step": 1035 }, { "dpo_loss": 0.23205497860908508, "epoch": 2.947567312234294, "grad_norm": 4.605543722963177, "learning_rate": 3.4986555765434415e-09, "logits": -1.2593365907669067, "logps": -88.1611099243164, "loss": 0.3798, "objective": 0.3837084472179413, "ranking_simple": 0.7875000238418579, "step": 1040 }, { "dpo_loss": 0.24727579951286316, "epoch": 2.961738308927728, "grad_norm": 4.997984528026836, "learning_rate": 1.6538648915270794e-09, "logits": -1.2010886669158936, "logps": -90.2540512084961, "loss": 0.3835, "objective": 0.3843297064304352, "ranking_simple": 0.7833333611488342, "step": 1045 }, { "dpo_loss": 0.23686116933822632, "epoch": 2.975909305621162, "grad_norm": 4.330333752032827, "learning_rate": 4.920970940180958e-10, "logits": -1.2512930631637573, "logps": -85.0908203125, "loss": 0.3743, "objective": 0.3781766891479492, "ranking_simple": 0.8166666626930237, "step": 1050 }, { "epoch": 2.975909305621162, "eval_dpo_loss": 0.6748924255371094, "eval_logits": -1.2948576211929321, "eval_logps": -92.3352279663086, "eval_loss": 0.7416455745697021, "eval_objective": 0.7355117201805115, "eval_ranking_simple": 0.5502070188522339, "eval_runtime": 370.411, "eval_samples_per_second": 15.631, "eval_steps_per_second": 1.304, "step": 1050 }, { "dpo_loss": 0.25150495767593384, "epoch": 2.9900803023145963, "grad_norm": 5.061308546886394, "learning_rate": 1.3669799732163314e-11, "logits": -1.2227600812911987, "logps": -85.82769012451172, "loss": 0.3854, "objective": 0.38525307178497314, "ranking_simple": 0.7833333611488342, "step": 1055 }, { "epoch": 2.992914501653283, "step": 1056, "total_flos": 0.0, "train_loss": 0.4763158837220434, "train_runtime": 34658.3644, "train_samples_per_second": 4.397, "train_steps_per_second": 0.03 } ], "logging_steps": 5, "max_steps": 1056, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }