diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,2969 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 4.988190836088805, + "eval_steps": 50, + "global_step": 880, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "dpo_loss": 0.6931471824645996, + "epoch": 0.005668398677373642, + "grad_norm": 26.827203675535984, + "learning_rate": 1.1363636363636363e-08, + "logits": -1.3147305250167847, + "logps": -88.0877456665039, + "loss": 0.4113, + "objective": 0.41588976979255676, + "ranking_idealized": 0.625, + "ranking_idealized_expo": 0.5208333134651184, + "ranking_simple": 0.5208333134651184, + "regularize": 0.41588976979255676, + "step": 1 + }, + { + "dpo_loss": 0.6931466460227966, + "epoch": 0.02834199338686821, + "grad_norm": 26.655974166157932, + "learning_rate": 5.6818181818181815e-08, + "logits": -1.3678570985794067, + "logps": -84.42396545410156, + "loss": 0.413, + "objective": 0.3755497932434082, + "ranking_idealized": 0.6145833134651184, + "ranking_idealized_expo": 0.546875, + "ranking_simple": 0.546875, + "regularize": 0.3755497932434082, + "step": 5 + }, + { + "dpo_loss": 0.6916109323501587, + "epoch": 0.05668398677373642, + "grad_norm": 25.202984552553435, + "learning_rate": 1.1363636363636363e-07, + "logits": -1.446576714515686, + "logps": -83.28290557861328, + "loss": 0.4165, + "objective": 0.4402167499065399, + "ranking_idealized": 0.6000000238418579, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.512499988079071, + "regularize": 0.4402167499065399, + "step": 10 + }, + { + "dpo_loss": 0.6918571591377258, + "epoch": 0.08502598016060463, + "grad_norm": 24.8928017897937, + "learning_rate": 1.7045454545454543e-07, + "logits": -1.4129120111465454, + "logps": -83.23918151855469, + "loss": 0.423, + "objective": 0.40991583466529846, + "ranking_idealized": 0.5833333134651184, + "ranking_idealized_expo": 0.5041666626930237, + "ranking_simple": 0.5083333253860474, + "regularize": 0.40991583466529846, + "step": 15 + }, + { + "dpo_loss": 0.6913864612579346, + "epoch": 0.11336797354747284, + "grad_norm": 26.1438361746268, + "learning_rate": 2.2727272727272726e-07, + "logits": -1.405305027961731, + "logps": -83.78267669677734, + "loss": 0.4098, + "objective": 0.4017895758152008, + "ranking_idealized": 0.625, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.5375000238418579, + "regularize": 0.4017895758152008, + "step": 20 + }, + { + "dpo_loss": 0.6848570108413696, + "epoch": 0.14170996693434104, + "grad_norm": 26.79124275787855, + "learning_rate": 2.840909090909091e-07, + "logits": -1.4560821056365967, + "logps": -83.52696990966797, + "loss": 0.4034, + "objective": 0.41992515325546265, + "ranking_idealized": 0.5958333611488342, + "ranking_idealized_expo": 0.4958333373069763, + "ranking_simple": 0.5, + "regularize": 0.41992515325546265, + "step": 25 + }, + { + "dpo_loss": 0.6844711303710938, + "epoch": 0.17005196032120926, + "grad_norm": 26.78495469951858, + "learning_rate": 3.4090909090909085e-07, + "logits": -1.4348876476287842, + "logps": -84.22993469238281, + "loss": 0.4013, + "objective": 0.40435200929641724, + "ranking_idealized": 0.6041666865348816, + "ranking_idealized_expo": 0.5416666865348816, + "ranking_simple": 0.5416666865348816, + "regularize": 0.40435200929641724, + "step": 30 + }, + { + "dpo_loss": 0.674633264541626, + "epoch": 0.19839395370807747, + "grad_norm": 27.550998188131874, + "learning_rate": 3.977272727272727e-07, + "logits": -1.4130500555038452, + "logps": -82.98973846435547, + "loss": 0.3925, + "objective": 0.37177178263664246, + "ranking_idealized": 0.6166666746139526, + "ranking_idealized_expo": 0.5041666626930237, + "ranking_simple": 0.512499988079071, + "regularize": 0.37177178263664246, + "step": 35 + }, + { + "dpo_loss": 0.6748062372207642, + "epoch": 0.22673594709494568, + "grad_norm": 30.08966136803542, + "learning_rate": 4.545454545454545e-07, + "logits": -1.4084281921386719, + "logps": -83.05668640136719, + "loss": 0.4041, + "objective": 0.4255501925945282, + "ranking_idealized": 0.6000000238418579, + "ranking_idealized_expo": 0.5208333134651184, + "ranking_simple": 0.512499988079071, + "regularize": 0.4255501925945282, + "step": 40 + }, + { + "dpo_loss": 0.6630504727363586, + "epoch": 0.25507794048181387, + "grad_norm": 25.26840087998978, + "learning_rate": 5.113636363636363e-07, + "logits": -1.5426502227783203, + "logps": -84.47521209716797, + "loss": 0.3947, + "objective": 0.4412144422531128, + "ranking_idealized": 0.6208333373069763, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.5458333492279053, + "regularize": 0.4412144422531128, + "step": 45 + }, + { + "dpo_loss": 0.659989058971405, + "epoch": 0.2834199338686821, + "grad_norm": 24.465381128270387, + "learning_rate": 5.681818181818182e-07, + "logits": -1.4524168968200684, + "logps": -82.95875549316406, + "loss": 0.3854, + "objective": 0.364622563123703, + "ranking_idealized": 0.5416666865348816, + "ranking_idealized_expo": 0.42500001192092896, + "ranking_simple": 0.4583333432674408, + "regularize": 0.364622563123703, + "step": 50 + }, + { + "epoch": 0.2834199338686821, + "eval_dpo_loss": 0.6886485815048218, + "eval_logits": -1.4800517559051514, + "eval_logps": -91.4064712524414, + "eval_loss": 0.4056198000907898, + "eval_objective": 0.4075882136821747, + "eval_ranking_idealized": 0.5888429880142212, + "eval_ranking_idealized_expo": 0.5103305578231812, + "eval_ranking_simple": 0.5123966932296753, + "eval_regularize": 0.4075882136821747, + "eval_runtime": 265.1514, + "eval_samples_per_second": 21.837, + "eval_steps_per_second": 0.913, + "step": 50 + }, + { + "dpo_loss": 0.6636093258857727, + "epoch": 0.3117619272555503, + "grad_norm": 27.096857998186312, + "learning_rate": 6.249999999999999e-07, + "logits": -1.4970166683197021, + "logps": -85.03699493408203, + "loss": 0.3728, + "objective": 0.3725493848323822, + "ranking_idealized": 0.625, + "ranking_idealized_expo": 0.5416666865348816, + "ranking_simple": 0.5458333492279053, + "regularize": 0.3725493848323822, + "step": 55 + }, + { + "dpo_loss": 0.6567211151123047, + "epoch": 0.3401039206424185, + "grad_norm": 25.695749312088278, + "learning_rate": 6.818181818181817e-07, + "logits": -1.4813398122787476, + "logps": -84.4722671508789, + "loss": 0.3599, + "objective": 0.3475739657878876, + "ranking_idealized": 0.637499988079071, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.5333333611488342, + "regularize": 0.3475739657878876, + "step": 60 + }, + { + "dpo_loss": 0.6518040895462036, + "epoch": 0.3684459140292867, + "grad_norm": 29.49986445883662, + "learning_rate": 7.386363636363636e-07, + "logits": -1.430372714996338, + "logps": -84.72962188720703, + "loss": 0.3497, + "objective": 0.345612108707428, + "ranking_idealized": 0.5708333253860474, + "ranking_idealized_expo": 0.5083333253860474, + "ranking_simple": 0.5083333253860474, + "regularize": 0.345612108707428, + "step": 65 + }, + { + "dpo_loss": 0.6528828740119934, + "epoch": 0.39678790741615494, + "grad_norm": 29.563000130373773, + "learning_rate": 7.954545454545454e-07, + "logits": -1.5054484605789185, + "logps": -86.26591491699219, + "loss": 0.35, + "objective": 0.3871075510978699, + "ranking_idealized": 0.6000000238418579, + "ranking_idealized_expo": 0.5166666507720947, + "ranking_simple": 0.5416666865348816, + "regularize": 0.3871075510978699, + "step": 70 + }, + { + "dpo_loss": 0.6483267545700073, + "epoch": 0.42512990080302315, + "grad_norm": 27.602858223257197, + "learning_rate": 8.522727272727273e-07, + "logits": -1.516791582107544, + "logps": -86.8262710571289, + "loss": 0.3468, + "objective": 0.3712550401687622, + "ranking_idealized": 0.6291666626930237, + "ranking_idealized_expo": 0.5458333492279053, + "ranking_simple": 0.5833333134651184, + "regularize": 0.3712550401687622, + "step": 75 + }, + { + "dpo_loss": 0.6363473534584045, + "epoch": 0.45347189418989137, + "grad_norm": 25.853451932249023, + "learning_rate": 9.09090909090909e-07, + "logits": -1.5554119348526, + "logps": -85.4685287475586, + "loss": 0.3352, + "objective": 0.3362359404563904, + "ranking_idealized": 0.5833333134651184, + "ranking_idealized_expo": 0.5166666507720947, + "ranking_simple": 0.5416666865348816, + "regularize": 0.336235910654068, + "step": 80 + }, + { + "dpo_loss": 0.6442821025848389, + "epoch": 0.4818138875767596, + "grad_norm": 25.41070923572626, + "learning_rate": 9.65909090909091e-07, + "logits": -1.5026181936264038, + "logps": -84.45774841308594, + "loss": 0.3304, + "objective": 0.3429431617259979, + "ranking_idealized": 0.6000000238418579, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.550000011920929, + "regularize": 0.3429431617259979, + "step": 85 + }, + { + "dpo_loss": 0.6335326433181763, + "epoch": 0.5101558809636277, + "grad_norm": 25.187750521174056, + "learning_rate": 9.999842657116664e-07, + "logits": -1.2913075685501099, + "logps": -86.8448257446289, + "loss": 0.3243, + "objective": 0.32520177960395813, + "ranking_idealized": 0.550000011920929, + "ranking_idealized_expo": 0.4791666567325592, + "ranking_simple": 0.5083333253860474, + "regularize": 0.32520177960395813, + "step": 90 + }, + { + "dpo_loss": 0.6084260940551758, + "epoch": 0.538497874350496, + "grad_norm": 23.93476735734447, + "learning_rate": 9.998072663403656e-07, + "logits": -1.3773174285888672, + "logps": -85.11380767822266, + "loss": 0.3036, + "objective": 0.3108121454715729, + "ranking_idealized": 0.625, + "ranking_idealized_expo": 0.5416666865348816, + "ranking_simple": 0.5833333134651184, + "regularize": 0.3108121454715729, + "step": 95 + }, + { + "dpo_loss": 0.6009453535079956, + "epoch": 0.5668398677373642, + "grad_norm": 25.488579442690856, + "learning_rate": 9.99433669591504e-07, + "logits": -1.4631216526031494, + "logps": -85.5998764038086, + "loss": 0.3126, + "objective": 0.3375842273235321, + "ranking_idealized": 0.612500011920929, + "ranking_idealized_expo": 0.5416666865348816, + "ranking_simple": 0.5625, + "regularize": 0.3375842273235321, + "step": 100 + }, + { + "epoch": 0.5668398677373642, + "eval_dpo_loss": 0.6816912293434143, + "eval_logits": -1.45261812210083, + "eval_logps": -91.31664276123047, + "eval_loss": 0.40215975046157837, + "eval_objective": 0.400903582572937, + "eval_ranking_idealized": 0.5888429880142212, + "eval_ranking_idealized_expo": 0.5103305578231812, + "eval_ranking_simple": 0.5206611752510071, + "eval_regularize": 0.400903582572937, + "eval_runtime": 259.1884, + "eval_samples_per_second": 22.339, + "eval_steps_per_second": 0.934, + "step": 100 + }, + { + "dpo_loss": 0.5999605059623718, + "epoch": 0.5951818611242324, + "grad_norm": 25.38952651860073, + "learning_rate": 9.988636224180095e-07, + "logits": -1.352739930152893, + "logps": -85.40930938720703, + "loss": 0.3097, + "objective": 0.32598960399627686, + "ranking_idealized": 0.6208333373069763, + "ranking_idealized_expo": 0.5333333611488342, + "ranking_simple": 0.5791666507720947, + "regularize": 0.32598960399627686, + "step": 105 + }, + { + "dpo_loss": 0.6067489981651306, + "epoch": 0.6235238545111006, + "grad_norm": 31.045039069385457, + "learning_rate": 9.980973490458728e-07, + "logits": -1.5531387329101562, + "logps": -84.0550537109375, + "loss": 0.3104, + "objective": 0.3359374403953552, + "ranking_idealized": 0.5708333253860474, + "ranking_idealized_expo": 0.4541666805744171, + "ranking_simple": 0.4958333373069763, + "regularize": 0.33593741059303284, + "step": 110 + }, + { + "dpo_loss": 0.6095985770225525, + "epoch": 0.6518658478979689, + "grad_norm": 26.435670420498003, + "learning_rate": 9.971351508859486e-07, + "logits": -1.5276844501495361, + "logps": -84.30924987792969, + "loss": 0.291, + "objective": 0.28773021697998047, + "ranking_idealized": 0.574999988079071, + "ranking_idealized_expo": 0.4833333194255829, + "ranking_simple": 0.5041666626930237, + "regularize": 0.28773021697998047, + "step": 115 + }, + { + "dpo_loss": 0.6103960871696472, + "epoch": 0.680207841284837, + "grad_norm": 26.942509852249753, + "learning_rate": 9.959774064153975e-07, + "logits": -1.4677897691726685, + "logps": -84.61531066894531, + "loss": 0.2837, + "objective": 0.2627010643482208, + "ranking_idealized": 0.5874999761581421, + "ranking_idealized_expo": 0.5333333611488342, + "ranking_simple": 0.5541666746139526, + "regularize": 0.2627010643482208, + "step": 120 + }, + { + "dpo_loss": 0.5971355438232422, + "epoch": 0.7085498346717053, + "grad_norm": 25.495357006548982, + "learning_rate": 9.94624571028813e-07, + "logits": -1.4407005310058594, + "logps": -84.40795135498047, + "loss": 0.288, + "objective": 0.29481950402259827, + "ranking_idealized": 0.6291666626930237, + "ranking_idealized_expo": 0.550000011920929, + "ranking_simple": 0.5874999761581421, + "regularize": 0.29481950402259827, + "step": 125 + }, + { + "dpo_loss": 0.5917614102363586, + "epoch": 0.7368918280585735, + "grad_norm": 27.139835865074275, + "learning_rate": 9.930771768590933e-07, + "logits": -1.5837173461914062, + "logps": -83.2771987915039, + "loss": 0.2887, + "objective": 0.2870228886604309, + "ranking_idealized": 0.5416666865348816, + "ranking_idealized_expo": 0.44999998807907104, + "ranking_simple": 0.5041666626930237, + "regularize": 0.2870228886604309, + "step": 130 + }, + { + "dpo_loss": 0.6036564111709595, + "epoch": 0.7652338214454416, + "grad_norm": 24.259859808790555, + "learning_rate": 9.91335832568129e-07, + "logits": -1.528158187866211, + "logps": -85.43966674804688, + "loss": 0.2694, + "objective": 0.270797461271286, + "ranking_idealized": 0.5833333134651184, + "ranking_idealized_expo": 0.5291666388511658, + "ranking_simple": 0.5541666746139526, + "regularize": 0.270797461271286, + "step": 135 + }, + { + "dpo_loss": 0.596954345703125, + "epoch": 0.7935758148323099, + "grad_norm": 26.42799993318966, + "learning_rate": 9.894012231073895e-07, + "logits": -1.4152525663375854, + "logps": -86.42430114746094, + "loss": 0.2606, + "objective": 0.2631489038467407, + "ranking_idealized": 0.5791666507720947, + "ranking_idealized_expo": 0.5041666626930237, + "ranking_simple": 0.550000011920929, + "regularize": 0.2631489038467407, + "step": 140 + }, + { + "dpo_loss": 0.58833909034729, + "epoch": 0.821917808219178, + "grad_norm": 26.472189025522844, + "learning_rate": 9.872741094484964e-07, + "logits": -1.5059914588928223, + "logps": -85.94861602783203, + "loss": 0.2555, + "objective": 0.2643609344959259, + "ranking_idealized": 0.5791666507720947, + "ranking_idealized_expo": 0.4833333194255829, + "ranking_simple": 0.5333333611488342, + "regularize": 0.2643609344959259, + "step": 145 + }, + { + "dpo_loss": 0.5924276113510132, + "epoch": 0.8502598016060463, + "grad_norm": 25.826528962819687, + "learning_rate": 9.849553282839024e-07, + "logits": -1.4773136377334595, + "logps": -84.33631134033203, + "loss": 0.2481, + "objective": 0.24327746033668518, + "ranking_idealized": 0.6166666746139526, + "ranking_idealized_expo": 0.5416666865348816, + "ranking_simple": 0.5874999761581421, + "regularize": 0.243277445435524, + "step": 150 + }, + { + "epoch": 0.8502598016060463, + "eval_dpo_loss": 0.6853220462799072, + "eval_logits": -1.478104829788208, + "eval_logps": -93.32852935791016, + "eval_loss": 0.4118410348892212, + "eval_objective": 0.41562050580978394, + "eval_ranking_idealized": 0.5888429880142212, + "eval_ranking_idealized_expo": 0.5103305578231812, + "eval_ranking_simple": 0.5185950398445129, + "eval_regularize": 0.41562050580978394, + "eval_runtime": 260.1091, + "eval_samples_per_second": 22.26, + "eval_steps_per_second": 0.93, + "step": 150 + }, + { + "dpo_loss": 0.5857201814651489, + "epoch": 0.8786017949929145, + "grad_norm": 24.421694763767686, + "learning_rate": 9.824457916977784e-07, + "logits": -1.4784348011016846, + "logps": -84.23937225341797, + "loss": 0.25, + "objective": 0.24794721603393555, + "ranking_idealized": 0.6166666746139526, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.5916666388511658, + "regularize": 0.24794721603393555, + "step": 155 + }, + { + "dpo_loss": 0.5842316746711731, + "epoch": 0.9069437883797827, + "grad_norm": 24.297754190889687, + "learning_rate": 9.797464868072486e-07, + "logits": -1.379388689994812, + "logps": -84.26329803466797, + "loss": 0.2417, + "objective": 0.23959442973136902, + "ranking_idealized": 0.5708333253860474, + "ranking_idealized_expo": 0.4958333373069763, + "ranking_simple": 0.5458333492279053, + "regularize": 0.23959442973136902, + "step": 160 + }, + { + "dpo_loss": 0.5881075263023376, + "epoch": 0.9352857817666509, + "grad_norm": 25.046440958455594, + "learning_rate": 9.768584753741134e-07, + "logits": -1.3925925493240356, + "logps": -85.05484771728516, + "loss": 0.2445, + "objective": 0.24838505685329437, + "ranking_idealized": 0.6041666865348816, + "ranking_idealized_expo": 0.5208333134651184, + "ranking_simple": 0.5583333373069763, + "regularize": 0.24838504195213318, + "step": 165 + }, + { + "dpo_loss": 0.5687467455863953, + "epoch": 0.9636277751535192, + "grad_norm": 24.80826032024146, + "learning_rate": 9.737828933872073e-07, + "logits": -1.440019130706787, + "logps": -85.22455596923828, + "loss": 0.2525, + "objective": 0.24621081352233887, + "ranking_idealized": 0.5791666507720947, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.5625, + "regularize": 0.24621081352233887, + "step": 170 + }, + { + "dpo_loss": 0.5792465209960938, + "epoch": 0.9919697685403873, + "grad_norm": 25.657531696623572, + "learning_rate": 9.705209506155634e-07, + "logits": -1.3882230520248413, + "logps": -85.2247085571289, + "loss": 0.2408, + "objective": 0.2368970364332199, + "ranking_idealized": 0.6541666388511658, + "ranking_idealized_expo": 0.574999988079071, + "ranking_simple": 0.6041666865348816, + "regularize": 0.2368970364332199, + "step": 175 + }, + { + "dpo_loss": 0.5573223233222961, + "epoch": 1.0203117619272555, + "grad_norm": 24.441555112350308, + "learning_rate": 9.670739301325534e-07, + "logits": -1.5630497932434082, + "logps": -84.3948745727539, + "loss": 0.2102, + "objective": 0.20754273235797882, + "ranking_idealized": 0.6166666746139526, + "ranking_idealized_expo": 0.5416666865348816, + "ranking_simple": 0.5958333611488342, + "regularize": 0.20754273235797882, + "step": 180 + }, + { + "dpo_loss": 0.5467338562011719, + "epoch": 1.0486537553141237, + "grad_norm": 26.114706754447813, + "learning_rate": 9.63443187811197e-07, + "logits": -1.4042932987213135, + "logps": -84.7653579711914, + "loss": 0.214, + "objective": 0.21694259345531464, + "ranking_idealized": 0.5791666507720947, + "ranking_idealized_expo": 0.4833333194255829, + "ranking_simple": 0.5458333492279053, + "regularize": 0.21694259345531464, + "step": 185 + }, + { + "dpo_loss": 0.5574190020561218, + "epoch": 1.076995748700992, + "grad_norm": 25.20524724848005, + "learning_rate": 9.596301517908328e-07, + "logits": -1.4538909196853638, + "logps": -85.65680694580078, + "loss": 0.2007, + "objective": 0.21142269670963287, + "ranking_idealized": 0.625, + "ranking_idealized_expo": 0.5541666746139526, + "ranking_simple": 0.625, + "regularize": 0.21142269670963287, + "step": 190 + }, + { + "dpo_loss": 0.561899721622467, + "epoch": 1.10533774208786, + "grad_norm": 28.03205694511378, + "learning_rate": 9.556363219153662e-07, + "logits": -1.435767650604248, + "logps": -84.88529968261719, + "loss": 0.2057, + "objective": 0.19679027795791626, + "ranking_idealized": 0.6333333253860474, + "ranking_idealized_expo": 0.5666666626930237, + "ranking_simple": 0.6166666746139526, + "regularize": 0.19679027795791626, + "step": 195 + }, + { + "dpo_loss": 0.5534842014312744, + "epoch": 1.1336797354747283, + "grad_norm": 23.06275024905121, + "learning_rate": 9.514632691433106e-07, + "logits": -1.517577052116394, + "logps": -83.62954711914062, + "loss": 0.1986, + "objective": 0.19466033577919006, + "ranking_idealized": 0.6000000238418579, + "ranking_idealized_expo": 0.5083333253860474, + "ranking_simple": 0.574999988079071, + "regularize": 0.19466033577919006, + "step": 200 + }, + { + "epoch": 1.1336797354747283, + "eval_dpo_loss": 0.6827520132064819, + "eval_logits": -1.46909761428833, + "eval_logps": -90.6331558227539, + "eval_loss": 0.40533673763275146, + "eval_objective": 0.40887078642845154, + "eval_ranking_idealized": 0.5888429880142212, + "eval_ranking_idealized_expo": 0.5103305578231812, + "eval_ranking_simple": 0.5206611752510071, + "eval_regularize": 0.40887078642845154, + "eval_runtime": 260.5987, + "eval_samples_per_second": 22.218, + "eval_steps_per_second": 0.929, + "step": 200 + }, + { + "dpo_loss": 0.5494053363800049, + "epoch": 1.1620217288615966, + "grad_norm": 22.941534169012083, + "learning_rate": 9.471126349298556e-07, + "logits": -1.5020116567611694, + "logps": -83.8444595336914, + "loss": 0.1994, + "objective": 0.19596201181411743, + "ranking_idealized": 0.5708333253860474, + "ranking_idealized_expo": 0.48750001192092896, + "ranking_simple": 0.5583333373069763, + "regularize": 0.19596201181411743, + "step": 205 + }, + { + "dpo_loss": 0.5515065789222717, + "epoch": 1.1903637222484649, + "grad_norm": 26.741821520067802, + "learning_rate": 9.425861305812081e-07, + "logits": -1.4875836372375488, + "logps": -83.98831176757812, + "loss": 0.1895, + "objective": 0.20510397851467133, + "ranking_idealized": 0.5874999761581421, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.5541666746139526, + "regularize": 0.20510397851467133, + "step": 210 + }, + { + "dpo_loss": 0.55607670545578, + "epoch": 1.2187057156353331, + "grad_norm": 23.43637893497653, + "learning_rate": 9.378855365814557e-07, + "logits": -1.4646224975585938, + "logps": -83.52363586425781, + "loss": 0.1889, + "objective": 0.19153118133544922, + "ranking_idealized": 0.5625, + "ranking_idealized_expo": 0.4833333194255829, + "ranking_simple": 0.5708333253860474, + "regularize": 0.19153118133544922, + "step": 215 + }, + { + "dpo_loss": 0.556377112865448, + "epoch": 1.2470477090222012, + "grad_norm": 26.789286245107157, + "learning_rate": 9.330127018922193e-07, + "logits": -1.4145793914794922, + "logps": -82.84550476074219, + "loss": 0.1925, + "objective": 0.17143851518630981, + "ranking_idealized": 0.6541666388511658, + "ranking_idealized_expo": 0.6041666865348816, + "ranking_simple": 0.6499999761581421, + "regularize": 0.17143851518630981, + "step": 220 + }, + { + "dpo_loss": 0.5455420613288879, + "epoch": 1.2753897024090695, + "grad_norm": 25.237511413060258, + "learning_rate": 9.279695432253708e-07, + "logits": -1.4910824298858643, + "logps": -84.51390075683594, + "loss": 0.1898, + "objective": 0.1823263168334961, + "ranking_idealized": 0.612500011920929, + "ranking_idealized_expo": 0.5291666388511658, + "ranking_simple": 0.6083333492279053, + "regularize": 0.1823263168334961, + "step": 225 + }, + { + "dpo_loss": 0.5552546381950378, + "epoch": 1.3037316957959377, + "grad_norm": 23.65942718982369, + "learning_rate": 9.227580442891021e-07, + "logits": -1.4593993425369263, + "logps": -84.47645568847656, + "loss": 0.1809, + "objective": 0.17018872499465942, + "ranking_idealized": 0.5833333134651184, + "ranking_idealized_expo": 0.5083333253860474, + "ranking_simple": 0.550000011920929, + "regularize": 0.17018872499465942, + "step": 230 + }, + { + "dpo_loss": 0.5385202169418335, + "epoch": 1.3320736891828058, + "grad_norm": 25.266299893397434, + "learning_rate": 9.173802550076401e-07, + "logits": -1.5345088243484497, + "logps": -82.98789978027344, + "loss": 0.1789, + "objective": 0.1734149307012558, + "ranking_idealized": 0.6041666865348816, + "ranking_idealized_expo": 0.4833333194255829, + "ranking_simple": 0.5833333134651184, + "regularize": 0.1734149307012558, + "step": 235 + }, + { + "dpo_loss": 0.5434895157814026, + "epoch": 1.360415682569674, + "grad_norm": 25.750551600333242, + "learning_rate": 9.118382907149163e-07, + "logits": -1.4756948947906494, + "logps": -84.32857513427734, + "loss": 0.1742, + "objective": 0.1837477833032608, + "ranking_idealized": 0.5541666746139526, + "ranking_idealized_expo": 0.4791666567325592, + "ranking_simple": 0.5583333373069763, + "regularize": 0.1837477684020996, + "step": 240 + }, + { + "dpo_loss": 0.5604755878448486, + "epoch": 1.3887576759565423, + "grad_norm": 24.129154340629153, + "learning_rate": 9.061343313225087e-07, + "logits": -1.4909014701843262, + "logps": -83.4426498413086, + "loss": 0.1789, + "objective": 0.1817345917224884, + "ranking_idealized": 0.5916666388511658, + "ranking_idealized_expo": 0.5208333134651184, + "ranking_simple": 0.5666666626930237, + "regularize": 0.1817345917224884, + "step": 245 + }, + { + "dpo_loss": 0.5357322692871094, + "epoch": 1.4170996693434104, + "grad_norm": 24.16224594925354, + "learning_rate": 9.002706204621802e-07, + "logits": -1.4255733489990234, + "logps": -82.65512084960938, + "loss": 0.1805, + "objective": 0.17317816615104675, + "ranking_idealized": 0.574999988079071, + "ranking_idealized_expo": 0.47083333134651184, + "ranking_simple": 0.5541666746139526, + "regularize": 0.17317816615104675, + "step": 250 + }, + { + "epoch": 1.4170996693434104, + "eval_dpo_loss": 0.6830819249153137, + "eval_logits": -1.464825987815857, + "eval_logps": -90.24966430664062, + "eval_loss": 0.4085530936717987, + "eval_objective": 0.4083588719367981, + "eval_ranking_idealized": 0.5888429880142212, + "eval_ranking_idealized_expo": 0.5103305578231812, + "eval_ranking_simple": 0.5247933864593506, + "eval_regularize": 0.4083588719367981, + "eval_runtime": 262.2655, + "eval_samples_per_second": 22.077, + "eval_steps_per_second": 0.923, + "step": 250 + }, + { + "dpo_loss": 0.5522000193595886, + "epoch": 1.4454416627302786, + "grad_norm": 23.544028131135565, + "learning_rate": 8.942494646033554e-07, + "logits": -1.428904414176941, + "logps": -83.82772827148438, + "loss": 0.1816, + "objective": 0.16092044115066528, + "ranking_idealized": 0.6166666746139526, + "ranking_idealized_expo": 0.5458333492279053, + "ranking_simple": 0.612500011920929, + "regularize": 0.16092044115066528, + "step": 255 + }, + { + "dpo_loss": 0.5535964369773865, + "epoch": 1.473783656117147, + "grad_norm": 24.007017906906484, + "learning_rate": 8.880732321458784e-07, + "logits": -1.4904005527496338, + "logps": -83.97267150878906, + "loss": 0.1703, + "objective": 0.16837134957313538, + "ranking_idealized": 0.5666666626930237, + "ranking_idealized_expo": 0.48750001192092896, + "ranking_simple": 0.550000011920929, + "regularize": 0.16837134957313538, + "step": 260 + }, + { + "dpo_loss": 0.5446482300758362, + "epoch": 1.5021256495040152, + "grad_norm": 24.30764382402002, + "learning_rate": 8.817443524884117e-07, + "logits": -1.4601694345474243, + "logps": -82.12098693847656, + "loss": 0.1781, + "objective": 0.17031626403331757, + "ranking_idealized": 0.574999988079071, + "ranking_idealized_expo": 0.5041666626930237, + "ranking_simple": 0.5708333253860474, + "regularize": 0.17031626403331757, + "step": 265 + }, + { + "dpo_loss": 0.5536972284317017, + "epoch": 1.5304676428908834, + "grad_norm": 24.675134737686058, + "learning_rate": 8.752653150728411e-07, + "logits": -1.471502661705017, + "logps": -84.13450622558594, + "loss": 0.1758, + "objective": 0.18668265640735626, + "ranking_idealized": 0.5791666507720947, + "ranking_idealized_expo": 0.5083333253860474, + "ranking_simple": 0.5708333253860474, + "regularize": 0.18668265640735626, + "step": 270 + }, + { + "dpo_loss": 0.5547968745231628, + "epoch": 1.5588096362777515, + "grad_norm": 22.77808390233293, + "learning_rate": 8.68638668405062e-07, + "logits": -1.4670997858047485, + "logps": -85.27931213378906, + "loss": 0.171, + "objective": 0.16611038148403168, + "ranking_idealized": 0.6208333373069763, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.6000000238418579, + "regularize": 0.16611038148403168, + "step": 275 + }, + { + "dpo_loss": 0.5309798717498779, + "epoch": 1.5871516296646198, + "grad_norm": 22.23255904480611, + "learning_rate": 8.61867019052535e-07, + "logits": -1.387014389038086, + "logps": -83.47966766357422, + "loss": 0.1731, + "objective": 0.18033398687839508, + "ranking_idealized": 0.574999988079071, + "ranking_idealized_expo": 0.5041666626930237, + "ranking_simple": 0.5874999761581421, + "regularize": 0.18033398687839508, + "step": 280 + }, + { + "dpo_loss": 0.5369495749473572, + "epoch": 1.615493623051488, + "grad_norm": 24.7467519907843, + "learning_rate": 8.549530306190014e-07, + "logits": -1.4981027841567993, + "logps": -85.08309936523438, + "loss": 0.1613, + "objective": 0.15606491267681122, + "ranking_idealized": 0.5874999761581421, + "ranking_idealized_expo": 0.5166666507720947, + "ranking_simple": 0.5958333611488342, + "regularize": 0.15606491267681122, + "step": 285 + }, + { + "dpo_loss": 0.5465491414070129, + "epoch": 1.643835616438356, + "grad_norm": 22.280063793784098, + "learning_rate": 8.478994226967638e-07, + "logits": -1.5392872095108032, + "logps": -82.96480560302734, + "loss": 0.1639, + "objective": 0.1686221808195114, + "ranking_idealized": 0.612500011920929, + "ranking_idealized_expo": 0.5625, + "ranking_simple": 0.6041666865348816, + "regularize": 0.1686221808195114, + "step": 290 + }, + { + "dpo_loss": 0.5326969623565674, + "epoch": 1.6721776098252243, + "grad_norm": 22.516708106368693, + "learning_rate": 8.407089697969456e-07, + "logits": -1.430370569229126, + "logps": -81.40605926513672, + "loss": 0.1651, + "objective": 0.16238288581371307, + "ranking_idealized": 0.5791666507720947, + "ranking_idealized_expo": 0.4791666567325592, + "ranking_simple": 0.5666666626930237, + "regularize": 0.16238288581371307, + "step": 295 + }, + { + "dpo_loss": 0.5438053011894226, + "epoch": 1.7005196032120926, + "grad_norm": 22.982971147438153, + "learning_rate": 8.333845002581458e-07, + "logits": -1.5061898231506348, + "logps": -82.67247009277344, + "loss": 0.1668, + "objective": 0.19721931219100952, + "ranking_idealized": 0.6583333611488342, + "ranking_idealized_expo": 0.574999988079071, + "ranking_simple": 0.6208333373069763, + "regularize": 0.19721931219100952, + "step": 300 + }, + { + "epoch": 1.7005196032120926, + "eval_dpo_loss": 0.6841849088668823, + "eval_logits": -1.476090669631958, + "eval_logps": -89.86566162109375, + "eval_loss": 0.4079909026622772, + "eval_objective": 0.4113588035106659, + "eval_ranking_idealized": 0.5888429880142212, + "eval_ranking_idealized_expo": 0.5103305578231812, + "eval_ranking_simple": 0.5206611752510071, + "eval_regularize": 0.4113588035106659, + "eval_runtime": 259.3673, + "eval_samples_per_second": 22.324, + "eval_steps_per_second": 0.933, + "step": 300 + }, + { + "dpo_loss": 0.5529495477676392, + "epoch": 1.7288615965989607, + "grad_norm": 23.962805989899444, + "learning_rate": 8.259288951339232e-07, + "logits": -1.4737364053726196, + "logps": -83.48453521728516, + "loss": 0.1635, + "objective": 0.17988164722919464, + "ranking_idealized": 0.5625, + "ranking_idealized_expo": 0.5083333253860474, + "ranking_simple": 0.5583333373069763, + "regularize": 0.17988164722919464, + "step": 305 + }, + { + "dpo_loss": 0.5436158776283264, + "epoch": 1.7572035899858292, + "grad_norm": 26.010266526035746, + "learning_rate": 8.183450870595441e-07, + "logits": -1.5402640104293823, + "logps": -81.41146087646484, + "loss": 0.1725, + "objective": 0.16945843398571014, + "ranking_idealized": 0.6083333492279053, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.6000000238418579, + "regularize": 0.16945843398571014, + "step": 310 + }, + { + "dpo_loss": 0.5490608811378479, + "epoch": 1.7855455833726972, + "grad_norm": 23.214852755265355, + "learning_rate": 8.106360590984404e-07, + "logits": -1.4412391185760498, + "logps": -82.86125946044922, + "loss": 0.1609, + "objective": 0.15798324346542358, + "ranking_idealized": 0.6041666865348816, + "ranking_idealized_expo": 0.5083333253860474, + "ranking_simple": 0.5666666626930237, + "regularize": 0.15798324346542358, + "step": 315 + }, + { + "dpo_loss": 0.5580403208732605, + "epoch": 1.8138875767595655, + "grad_norm": 25.270172487230024, + "learning_rate": 8.028048435688333e-07, + "logits": -1.489629864692688, + "logps": -84.82173156738281, + "loss": 0.1562, + "objective": 0.15719416737556458, + "ranking_idealized": 0.6000000238418579, + "ranking_idealized_expo": 0.5041666626930237, + "ranking_simple": 0.5958333611488342, + "regularize": 0.15719416737556458, + "step": 320 + }, + { + "dpo_loss": 0.5307654142379761, + "epoch": 1.8422295701464337, + "grad_norm": 24.866617020536584, + "learning_rate": 7.948545208509811e-07, + "logits": -1.5223475694656372, + "logps": -85.49372100830078, + "loss": 0.1605, + "objective": 0.15138211846351624, + "ranking_idealized": 0.6333333253860474, + "ranking_idealized_expo": 0.5166666507720947, + "ranking_simple": 0.6291666626930237, + "regularize": 0.15138211846351624, + "step": 325 + }, + { + "dpo_loss": 0.5346109867095947, + "epoch": 1.8705715635333018, + "grad_norm": 27.77712533482603, + "learning_rate": 7.86788218175523e-07, + "logits": -1.282273769378662, + "logps": -83.1356201171875, + "loss": 0.1554, + "objective": 0.14494642615318298, + "ranking_idealized": 0.5708333253860474, + "ranking_idealized_expo": 0.4749999940395355, + "ranking_simple": 0.5583333373069763, + "regularize": 0.14494642615318298, + "step": 330 + }, + { + "dpo_loss": 0.5577983260154724, + "epoch": 1.89891355692017, + "grad_norm": 23.806319516884738, + "learning_rate": 7.786091083933949e-07, + "logits": -1.4557408094406128, + "logps": -83.1150131225586, + "loss": 0.1472, + "objective": 0.14962820708751678, + "ranking_idealized": 0.5916666388511658, + "ranking_idealized_expo": 0.4958333373069763, + "ranking_simple": 0.5666666626930237, + "regularize": 0.14962820708751678, + "step": 335 + }, + { + "dpo_loss": 0.548663318157196, + "epoch": 1.9272555503070383, + "grad_norm": 25.2807889158847, + "learning_rate": 7.703204087277988e-07, + "logits": -1.463193416595459, + "logps": -85.10281372070312, + "loss": 0.1416, + "objective": 0.14199069142341614, + "ranking_idealized": 0.6416666507720947, + "ranking_idealized_expo": 0.5416666865348816, + "ranking_simple": 0.6166666746139526, + "regularize": 0.14199069142341614, + "step": 340 + }, + { + "dpo_loss": 0.5481914281845093, + "epoch": 1.9555975436939064, + "grad_norm": 23.034113253398804, + "learning_rate": 7.619253795087208e-07, + "logits": -1.4545904397964478, + "logps": -83.42992401123047, + "loss": 0.1457, + "objective": 0.13813456892967224, + "ranking_idealized": 0.574999988079071, + "ranking_idealized_expo": 0.5333333611488342, + "ranking_simple": 0.5833333134651184, + "regularize": 0.13813456892967224, + "step": 345 + }, + { + "dpo_loss": 0.5435228943824768, + "epoch": 1.9839395370807746, + "grad_norm": 25.493404234037047, + "learning_rate": 7.534273228904915e-07, + "logits": -1.3632704019546509, + "logps": -84.23902893066406, + "loss": 0.1476, + "objective": 0.13394585251808167, + "ranking_idealized": 0.574999988079071, + "ranking_idealized_expo": 0.48750001192092896, + "ranking_simple": 0.5625, + "regularize": 0.13394585251808167, + "step": 350 + }, + { + "epoch": 1.9839395370807746, + "eval_dpo_loss": 0.6835209131240845, + "eval_logits": -1.4348496198654175, + "eval_logps": -89.60076904296875, + "eval_loss": 0.4086475670337677, + "eval_objective": 0.4084475636482239, + "eval_ranking_idealized": 0.5888429880142212, + "eval_ranking_idealized_expo": 0.5103305578231812, + "eval_ranking_simple": 0.5216942429542542, + "eval_regularize": 0.4084475636482239, + "eval_runtime": 259.621, + "eval_samples_per_second": 22.302, + "eval_steps_per_second": 0.932, + "step": 350 + }, + { + "dpo_loss": 0.5331315994262695, + "epoch": 2.012281530467643, + "grad_norm": 22.16231721451118, + "learning_rate": 7.448295815528956e-07, + "logits": -1.3494775295257568, + "logps": -82.90995788574219, + "loss": 0.1455, + "objective": 0.1512984335422516, + "ranking_idealized": 0.6000000238418579, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.6000000238418579, + "regularize": 0.1512984186410904, + "step": 355 + }, + { + "dpo_loss": 0.5351840853691101, + "epoch": 2.040623523854511, + "grad_norm": 25.1718748641759, + "learning_rate": 7.361355373863413e-07, + "logits": -1.393783688545227, + "logps": -81.44464874267578, + "loss": 0.1343, + "objective": 0.1370130479335785, + "ranking_idealized": 0.6583333611488342, + "ranking_idealized_expo": 0.5333333611488342, + "ranking_simple": 0.637499988079071, + "regularize": 0.1370130479335785, + "step": 360 + }, + { + "dpo_loss": 0.5345187783241272, + "epoch": 2.0689655172413794, + "grad_norm": 24.179993370065525, + "learning_rate": 7.273486101616056e-07, + "logits": -1.474308729171753, + "logps": -83.76331329345703, + "loss": 0.1347, + "objective": 0.1313287615776062, + "ranking_idealized": 0.6000000238418579, + "ranking_idealized_expo": 0.5041666626930237, + "ranking_simple": 0.6041666865348816, + "regularize": 0.1313287615776062, + "step": 365 + }, + { + "dpo_loss": 0.5465765595436096, + "epoch": 2.0973075106282475, + "grad_norm": 23.72652550591992, + "learning_rate": 7.184722561846797e-07, + "logits": -1.4518685340881348, + "logps": -81.55240631103516, + "loss": 0.124, + "objective": 0.1166418269276619, + "ranking_idealized": 0.5708333253860474, + "ranking_idealized_expo": 0.4833333194255829, + "ranking_simple": 0.5541666746139526, + "regularize": 0.1166418269276619, + "step": 370 + }, + { + "dpo_loss": 0.5262054204940796, + "epoch": 2.1256495040151155, + "grad_norm": 24.679557221698076, + "learning_rate": 7.095099669372443e-07, + "logits": -1.4321234226226807, + "logps": -83.55628967285156, + "loss": 0.1283, + "objective": 0.12942390143871307, + "ranking_idealized": 0.6708333492279053, + "ranking_idealized_expo": 0.5916666388511658, + "ranking_simple": 0.6625000238418579, + "regularize": 0.12942390143871307, + "step": 375 + }, + { + "dpo_loss": 0.5403919219970703, + "epoch": 2.153991497401984, + "grad_norm": 23.122876869258256, + "learning_rate": 7.004652677033068e-07, + "logits": -1.338428020477295, + "logps": -82.6377182006836, + "loss": 0.1281, + "objective": 0.10954796522855759, + "ranking_idealized": 0.6000000238418579, + "ranking_idealized_expo": 0.5291666388511658, + "ranking_simple": 0.5916666388511658, + "regularize": 0.10954796522855759, + "step": 380 + }, + { + "dpo_loss": 0.5505331754684448, + "epoch": 2.182333490788852, + "grad_norm": 22.25736511993951, + "learning_rate": 6.913417161825449e-07, + "logits": -1.4360421895980835, + "logps": -84.50902557373047, + "loss": 0.1236, + "objective": 0.11411557346582413, + "ranking_idealized": 0.5833333134651184, + "ranking_idealized_expo": 0.5041666626930237, + "ranking_simple": 0.574999988079071, + "regularize": 0.11411556601524353, + "step": 385 + }, + { + "dpo_loss": 0.5398189425468445, + "epoch": 2.21067548417572, + "grad_norm": 23.82479611784211, + "learning_rate": 6.821429010908971e-07, + "logits": -1.336391806602478, + "logps": -83.15116882324219, + "loss": 0.1245, + "objective": 0.1218627318739891, + "ranking_idealized": 0.5291666388511658, + "ranking_idealized_expo": 0.4833333194255829, + "ranking_simple": 0.5375000238418579, + "regularize": 0.1218627318739891, + "step": 390 + }, + { + "dpo_loss": 0.5215297341346741, + "epoch": 2.2390174775625886, + "grad_norm": 22.591578381119685, + "learning_rate": 6.728724407489553e-07, + "logits": -1.3484855890274048, + "logps": -83.57234954833984, + "loss": 0.1263, + "objective": 0.1272638440132141, + "ranking_idealized": 0.6000000238418579, + "ranking_idealized_expo": 0.5166666507720947, + "ranking_simple": 0.5916666388511658, + "regularize": 0.12726382911205292, + "step": 395 + }, + { + "dpo_loss": 0.5246094465255737, + "epoch": 2.2673594709494567, + "grad_norm": 22.99471999109431, + "learning_rate": 6.635339816587108e-07, + "logits": -1.4181877374649048, + "logps": -84.8980712890625, + "loss": 0.1232, + "objective": 0.1278635859489441, + "ranking_idealized": 0.6666666865348816, + "ranking_idealized_expo": 0.574999988079071, + "ranking_simple": 0.675000011920929, + "regularize": 0.1278635859489441, + "step": 400 + }, + { + "epoch": 2.2673594709494567, + "eval_dpo_loss": 0.6825190186500549, + "eval_logits": -1.4141640663146973, + "eval_logps": -89.93671417236328, + "eval_loss": 0.40635946393013, + "eval_objective": 0.4059920310974121, + "eval_ranking_idealized": 0.5888429880142212, + "eval_ranking_idealized_expo": 0.5103305578231812, + "eval_ranking_simple": 0.5237603187561035, + "eval_regularize": 0.4059920310974121, + "eval_runtime": 259.3604, + "eval_samples_per_second": 22.324, + "eval_steps_per_second": 0.933, + "step": 400 + }, + { + "dpo_loss": 0.5345380902290344, + "epoch": 2.295701464336325, + "grad_norm": 22.452414561821904, + "learning_rate": 6.541311970692162e-07, + "logits": -1.484344720840454, + "logps": -82.7432861328125, + "loss": 0.1237, + "objective": 0.1316702663898468, + "ranking_idealized": 0.6458333134651184, + "ranking_idealized_expo": 0.5583333373069763, + "ranking_simple": 0.6541666388511658, + "regularize": 0.1316702663898468, + "step": 405 + }, + { + "dpo_loss": 0.5351517200469971, + "epoch": 2.324043457723193, + "grad_norm": 24.318684153528356, + "learning_rate": 6.446677855317264e-07, + "logits": -1.3660470247268677, + "logps": -82.44485473632812, + "loss": 0.1164, + "objective": 0.11186593025922775, + "ranking_idealized": 0.5708333253860474, + "ranking_idealized_expo": 0.4833333194255829, + "ranking_simple": 0.5583333373069763, + "regularize": 0.11186593025922775, + "step": 410 + }, + { + "dpo_loss": 0.5329793691635132, + "epoch": 2.3523854511100613, + "grad_norm": 22.50760313963993, + "learning_rate": 6.351474694448864e-07, + "logits": -1.437878131866455, + "logps": -83.41373443603516, + "loss": 0.1186, + "objective": 0.12183640152215958, + "ranking_idealized": 0.5958333611488342, + "ranking_idealized_expo": 0.5333333611488342, + "ranking_simple": 0.5708333253860474, + "regularize": 0.12183640152215958, + "step": 415 + }, + { + "dpo_loss": 0.5409477949142456, + "epoch": 2.3807274444969297, + "grad_norm": 23.39263075574448, + "learning_rate": 6.255739935905395e-07, + "logits": -1.349250078201294, + "logps": -85.22098541259766, + "loss": 0.1175, + "objective": 0.10631230473518372, + "ranking_idealized": 0.5625, + "ranking_idealized_expo": 0.47083333134651184, + "ranking_simple": 0.550000011920929, + "regularize": 0.10631229728460312, + "step": 420 + }, + { + "dpo_loss": 0.5271558165550232, + "epoch": 2.409069437883798, + "grad_norm": 23.840070879325513, + "learning_rate": 6.159511236607315e-07, + "logits": -1.4124720096588135, + "logps": -84.24110412597656, + "loss": 0.1153, + "objective": 0.11380250006914139, + "ranking_idealized": 0.5874999761581421, + "ranking_idealized_expo": 0.5041666626930237, + "ranking_simple": 0.5958333611488342, + "regularize": 0.11380250006914139, + "step": 425 + }, + { + "dpo_loss": 0.5327500700950623, + "epoch": 2.4374114312706663, + "grad_norm": 22.9996288815754, + "learning_rate": 6.062826447764883e-07, + "logits": -1.4347702264785767, + "logps": -84.58445739746094, + "loss": 0.1076, + "objective": 0.10175766050815582, + "ranking_idealized": 0.5958333611488342, + "ranking_idealized_expo": 0.5291666388511658, + "ranking_simple": 0.5916666388511658, + "regularize": 0.10175765305757523, + "step": 430 + }, + { + "dpo_loss": 0.5315712690353394, + "epoch": 2.4657534246575343, + "grad_norm": 22.21161853218669, + "learning_rate": 5.965723599989528e-07, + "logits": -1.4599779844284058, + "logps": -84.16157531738281, + "loss": 0.1148, + "objective": 0.11776481568813324, + "ranking_idealized": 0.5874999761581421, + "ranking_idealized_expo": 0.5083333253860474, + "ranking_simple": 0.5916666388511658, + "regularize": 0.11776480078697205, + "step": 435 + }, + { + "dpo_loss": 0.5355103611946106, + "epoch": 2.4940954180444024, + "grad_norm": 23.031781845673333, + "learning_rate": 5.868240888334652e-07, + "logits": -1.385536789894104, + "logps": -83.61788940429688, + "loss": 0.1125, + "objective": 0.11075066775083542, + "ranking_idealized": 0.574999988079071, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.5708333253860474, + "regularize": 0.11075066775083542, + "step": 440 + }, + { + "dpo_loss": 0.5411112904548645, + "epoch": 2.5224374114312704, + "grad_norm": 25.203231448824464, + "learning_rate": 5.770416657271728e-07, + "logits": -1.4106037616729736, + "logps": -81.53707885742188, + "loss": 0.1119, + "objective": 0.11902200430631638, + "ranking_idealized": 0.5916666388511658, + "ranking_idealized_expo": 0.5166666507720947, + "ranking_simple": 0.6041666865348816, + "regularize": 0.11902199685573578, + "step": 445 + }, + { + "dpo_loss": 0.55417400598526, + "epoch": 2.550779404818139, + "grad_norm": 24.455868446022734, + "learning_rate": 5.67228938560766e-07, + "logits": -1.4431836605072021, + "logps": -83.54483795166016, + "loss": 0.1085, + "objective": 0.10727948695421219, + "ranking_idealized": 0.5625, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.5333333611488342, + "regularize": 0.10727948695421219, + "step": 450 + }, + { + "epoch": 2.550779404818139, + "eval_dpo_loss": 0.6829083561897278, + "eval_logits": -1.4380848407745361, + "eval_logps": -90.61122131347656, + "eval_loss": 0.40571001172065735, + "eval_objective": 0.406777560710907, + "eval_ranking_idealized": 0.5888429880142212, + "eval_ranking_idealized_expo": 0.5103305578231812, + "eval_ranking_simple": 0.5237603187561035, + "eval_regularize": 0.406777560710907, + "eval_runtime": 259.0425, + "eval_samples_per_second": 22.352, + "eval_steps_per_second": 0.934, + "step": 450 + }, + { + "dpo_loss": 0.5320044159889221, + "epoch": 2.579121398205007, + "grad_norm": 22.906053050143626, + "learning_rate": 5.573897671349268e-07, + "logits": -1.4764381647109985, + "logps": -84.27240753173828, + "loss": 0.1117, + "objective": 0.11940006166696548, + "ranking_idealized": 0.6000000238418579, + "ranking_idealized_expo": 0.5041666626930237, + "ranking_simple": 0.574999988079071, + "regularize": 0.11940006166696548, + "step": 455 + }, + { + "dpo_loss": 0.5233482122421265, + "epoch": 2.6074633915918755, + "grad_norm": 22.258361780067798, + "learning_rate": 5.475280216520912e-07, + "logits": -1.5429632663726807, + "logps": -84.30569458007812, + "loss": 0.1103, + "objective": 0.10580132901668549, + "ranking_idealized": 0.6208333373069763, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.5958333611488342, + "regularize": 0.10580132901668549, + "step": 460 + }, + { + "dpo_loss": 0.5289517641067505, + "epoch": 2.6358053849787435, + "grad_norm": 23.240912033270092, + "learning_rate": 5.376475811941191e-07, + "logits": -1.428727388381958, + "logps": -83.95030212402344, + "loss": 0.1071, + "objective": 0.10987317562103271, + "ranking_idealized": 0.5625, + "ranking_idealized_expo": 0.4958333373069763, + "ranking_simple": 0.5541666746139526, + "regularize": 0.10987316071987152, + "step": 465 + }, + { + "dpo_loss": 0.5318377614021301, + "epoch": 2.6641473783656116, + "grad_norm": 24.64272982925985, + "learning_rate": 5.277523321964701e-07, + "logits": -1.4431354999542236, + "logps": -83.10697937011719, + "loss": 0.105, + "objective": 0.1006205826997757, + "ranking_idealized": 0.6166666746139526, + "ranking_idealized_expo": 0.5541666746139526, + "ranking_simple": 0.6166666746139526, + "regularize": 0.1006205826997757, + "step": 470 + }, + { + "dpo_loss": 0.5384759306907654, + "epoch": 2.69248937175248, + "grad_norm": 21.994194573090148, + "learning_rate": 5.178461669194903e-07, + "logits": -1.4019439220428467, + "logps": -82.92670440673828, + "loss": 0.101, + "objective": 0.0988389179110527, + "ranking_idealized": 0.6083333492279053, + "ranking_idealized_expo": 0.4958333373069763, + "ranking_simple": 0.5916666388511658, + "regularize": 0.0988389179110527, + "step": 475 + }, + { + "dpo_loss": 0.539698600769043, + "epoch": 2.720831365139348, + "grad_norm": 24.874583032447394, + "learning_rate": 5.07932981917404e-07, + "logits": -1.5038942098617554, + "logps": -82.17936706542969, + "loss": 0.1017, + "objective": 0.10505501180887222, + "ranking_idealized": 0.5874999761581421, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.5791666507720947, + "regularize": 0.10505500435829163, + "step": 480 + }, + { + "dpo_loss": 0.5295734405517578, + "epoch": 2.7491733585262166, + "grad_norm": 24.131350896743502, + "learning_rate": 4.980166765056193e-07, + "logits": -1.4220199584960938, + "logps": -84.46988677978516, + "loss": 0.1033, + "objective": 0.10565243661403656, + "ranking_idealized": 0.6041666865348816, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.5958333611488342, + "regularize": 0.10565243661403656, + "step": 485 + }, + { + "dpo_loss": 0.5229516625404358, + "epoch": 2.7775153519130846, + "grad_norm": 23.380731245805677, + "learning_rate": 4.881011512269463e-07, + "logits": -1.4164656400680542, + "logps": -82.1783676147461, + "loss": 0.1056, + "objective": 0.10975264012813568, + "ranking_idealized": 0.6000000238418579, + "ranking_idealized_expo": 0.4749999940395355, + "ranking_simple": 0.5958333611488342, + "regularize": 0.10975264012813568, + "step": 490 + }, + { + "dpo_loss": 0.536858856678009, + "epoch": 2.8058573452999527, + "grad_norm": 25.010956720921584, + "learning_rate": 4.78190306317332e-07, + "logits": -1.4320250749588013, + "logps": -81.11976623535156, + "loss": 0.0977, + "objective": 0.09322524815797806, + "ranking_idealized": 0.5708333253860474, + "ranking_idealized_expo": 0.4791666567325592, + "ranking_simple": 0.5666666626930237, + "regularize": 0.09322523325681686, + "step": 495 + }, + { + "dpo_loss": 0.5231731534004211, + "epoch": 2.8341993386868207, + "grad_norm": 24.981319167329183, + "learning_rate": 4.682880401717177e-07, + "logits": -1.479564905166626, + "logps": -80.21460723876953, + "loss": 0.099, + "objective": 0.09580207616090775, + "ranking_idealized": 0.612500011920929, + "ranking_idealized_expo": 0.5208333134651184, + "ranking_simple": 0.6083333492279053, + "regularize": 0.09580207616090775, + "step": 500 + }, + { + "epoch": 2.8341993386868207, + "eval_dpo_loss": 0.6836758255958557, + "eval_logits": -1.4538483619689941, + "eval_logps": -89.78665924072266, + "eval_loss": 0.4075116813182831, + "eval_objective": 0.40899595618247986, + "eval_ranking_idealized": 0.5888429880142212, + "eval_ranking_idealized_expo": 0.5103305578231812, + "eval_ranking_simple": 0.5247933864593506, + "eval_regularize": 0.40899595618247986, + "eval_runtime": 259.2881, + "eval_samples_per_second": 22.33, + "eval_steps_per_second": 0.933, + "step": 500 + }, + { + "dpo_loss": 0.5333107113838196, + "epoch": 2.862541332073689, + "grad_norm": 22.440897537859303, + "learning_rate": 4.5839824781061886e-07, + "logits": -1.4319252967834473, + "logps": -82.19851684570312, + "loss": 0.0974, + "objective": 0.09931109100580215, + "ranking_idealized": 0.6041666865348816, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.612500011920929, + "regularize": 0.09931109100580215, + "step": 505 + }, + { + "dpo_loss": 0.5374515056610107, + "epoch": 2.8908833254605573, + "grad_norm": 22.71050128727261, + "learning_rate": 4.4852481934803277e-07, + "logits": -1.3620020151138306, + "logps": -82.26110076904297, + "loss": 0.0964, + "objective": 0.10236553847789764, + "ranking_idealized": 0.612500011920929, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.5916666388511658, + "regularize": 0.10236553847789764, + "step": 510 + }, + { + "dpo_loss": 0.5393768548965454, + "epoch": 2.9192253188474258, + "grad_norm": 26.294279777028603, + "learning_rate": 4.3867163846127674e-07, + "logits": -1.5220664739608765, + "logps": -82.21379852294922, + "loss": 0.0962, + "objective": 0.09978827089071274, + "ranking_idealized": 0.5833333134651184, + "ranking_idealized_expo": 0.4791666567325592, + "ranking_simple": 0.5833333134651184, + "regularize": 0.09978827089071274, + "step": 515 + }, + { + "dpo_loss": 0.5407862067222595, + "epoch": 2.947567312234294, + "grad_norm": 22.719373903401866, + "learning_rate": 4.2884258086335745e-07, + "logits": -1.4105883836746216, + "logps": -84.40800476074219, + "loss": 0.0917, + "objective": 0.08780403435230255, + "ranking_idealized": 0.6166666746139526, + "ranking_idealized_expo": 0.5083333253860474, + "ranking_simple": 0.6000000238418579, + "regularize": 0.08780403435230255, + "step": 520 + }, + { + "dpo_loss": 0.5382903814315796, + "epoch": 2.975909305621162, + "grad_norm": 22.439739653406917, + "learning_rate": 4.1904151277847305e-07, + "logits": -1.3989008665084839, + "logps": -83.13529205322266, + "loss": 0.0909, + "objective": 0.10328014940023422, + "ranking_idealized": 0.6499999761581421, + "ranking_idealized_expo": 0.5666666626930237, + "ranking_simple": 0.637499988079071, + "regularize": 0.10328014940023422, + "step": 525 + }, + { + "dpo_loss": 0.5224732756614685, + "epoch": 3.0042512990080303, + "grad_norm": 23.2445043242505, + "learning_rate": 4.092722894212487e-07, + "logits": -1.4099732637405396, + "logps": -82.2646484375, + "loss": 0.0906, + "objective": 0.08990009129047394, + "ranking_idealized": 0.5916666388511658, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.6000000238418579, + "regularize": 0.08990008383989334, + "step": 530 + }, + { + "dpo_loss": 0.5297616720199585, + "epoch": 3.0325932923948984, + "grad_norm": 24.595241433656245, + "learning_rate": 3.995387534803005e-07, + "logits": -1.4481351375579834, + "logps": -84.04501342773438, + "loss": 0.0863, + "objective": 0.09028714150190353, + "ranking_idealized": 0.612500011920929, + "ranking_idealized_expo": 0.5416666865348816, + "ranking_simple": 0.6208333373069763, + "regularize": 0.09028714150190353, + "step": 535 + }, + { + "dpo_loss": 0.5298644304275513, + "epoch": 3.0609352857817664, + "grad_norm": 22.819470538427282, + "learning_rate": 3.8984473360672967e-07, + "logits": -1.5335410833358765, + "logps": -82.01764678955078, + "loss": 0.0786, + "objective": 0.07253900170326233, + "ranking_idealized": 0.6166666746139526, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.6291666626930237, + "regularize": 0.07253900170326233, + "step": 540 + }, + { + "dpo_loss": 0.5357497930526733, + "epoch": 3.089277279168635, + "grad_norm": 23.587959979388312, + "learning_rate": 3.801940429081345e-07, + "logits": -1.475661039352417, + "logps": -83.04609680175781, + "loss": 0.0786, + "objective": 0.08452685922384262, + "ranking_idealized": 0.5625, + "ranking_idealized_expo": 0.49166667461395264, + "ranking_simple": 0.5666666626930237, + "regularize": 0.08452685922384262, + "step": 545 + }, + { + "dpo_loss": 0.5293916463851929, + "epoch": 3.117619272555503, + "grad_norm": 23.742387802519247, + "learning_rate": 3.7059047744873955e-07, + "logits": -1.3145067691802979, + "logps": -83.14439392089844, + "loss": 0.0841, + "objective": 0.07637524604797363, + "ranking_idealized": 0.6208333373069763, + "ranking_idealized_expo": 0.5458333492279053, + "ranking_simple": 0.612500011920929, + "regularize": 0.07637524604797363, + "step": 550 + }, + { + "epoch": 3.117619272555503, + "eval_dpo_loss": 0.6836426854133606, + "eval_logits": -1.4287773370742798, + "eval_logps": -89.19234466552734, + "eval_loss": 0.4074074625968933, + "eval_objective": 0.4091208279132843, + "eval_ranking_idealized": 0.5888429880142212, + "eval_ranking_idealized_expo": 0.5103305578231812, + "eval_ranking_simple": 0.5268595218658447, + "eval_regularize": 0.4091208279132843, + "eval_runtime": 259.888, + "eval_samples_per_second": 22.279, + "eval_steps_per_second": 0.931, + "step": 550 + }, + { + "dpo_loss": 0.5199671983718872, + "epoch": 3.1459612659423715, + "grad_norm": 23.606389156724106, + "learning_rate": 3.6103781475622786e-07, + "logits": -1.4020836353302002, + "logps": -83.6429214477539, + "loss": 0.0826, + "objective": 0.08424239605665207, + "ranking_idealized": 0.637499988079071, + "ranking_idealized_expo": 0.550000011920929, + "ranking_simple": 0.637499988079071, + "regularize": 0.08424239605665207, + "step": 555 + }, + { + "dpo_loss": 0.5297064185142517, + "epoch": 3.1743032593292395, + "grad_norm": 21.283296032324174, + "learning_rate": 3.5153981233586274e-07, + "logits": -1.375638484954834, + "logps": -80.67549896240234, + "loss": 0.0764, + "objective": 0.0754186362028122, + "ranking_idealized": 0.574999988079071, + "ranking_idealized_expo": 0.5041666626930237, + "ranking_simple": 0.5625, + "regularize": 0.0754186362028122, + "step": 560 + }, + { + "dpo_loss": 0.5281550884246826, + "epoch": 3.2026452527161076, + "grad_norm": 25.383548239078706, + "learning_rate": 3.421002061924876e-07, + "logits": -1.4403051137924194, + "logps": -82.08113098144531, + "loss": 0.0745, + "objective": 0.0825800895690918, + "ranking_idealized": 0.6416666507720947, + "ranking_idealized_expo": 0.5458333492279053, + "ranking_simple": 0.6333333253860474, + "regularize": 0.0825800821185112, + "step": 565 + }, + { + "dpo_loss": 0.5239064693450928, + "epoch": 3.230987246102976, + "grad_norm": 25.606035120731306, + "learning_rate": 3.327227093609824e-07, + "logits": -1.3596783876419067, + "logps": -82.14395141601562, + "loss": 0.0748, + "objective": 0.07690493017435074, + "ranking_idealized": 0.5708333253860474, + "ranking_idealized_expo": 0.4791666567325592, + "ranking_simple": 0.5708333253860474, + "regularize": 0.07690493017435074, + "step": 570 + }, + { + "dpo_loss": 0.5288205146789551, + "epoch": 3.259329239489844, + "grad_norm": 21.36265788871065, + "learning_rate": 3.234110104457536e-07, + "logits": -1.4363545179367065, + "logps": -82.7227554321289, + "loss": 0.0765, + "objective": 0.08387748897075653, + "ranking_idealized": 0.6041666865348816, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.5833333134651184, + "regularize": 0.08387748897075653, + "step": 575 + }, + { + "dpo_loss": 0.5455772876739502, + "epoch": 3.287671232876712, + "grad_norm": 22.23742629967835, + "learning_rate": 3.141687721698363e-07, + "logits": -1.4502298831939697, + "logps": -83.70122528076172, + "loss": 0.074, + "objective": 0.0667726993560791, + "ranking_idealized": 0.6166666746139526, + "ranking_idealized_expo": 0.5541666746139526, + "ranking_simple": 0.612500011920929, + "regularize": 0.0667726919054985, + "step": 580 + }, + { + "dpo_loss": 0.5377206206321716, + "epoch": 3.3160132262635806, + "grad_norm": 21.976427115545793, + "learning_rate": 3.049996299341742e-07, + "logits": -1.478832483291626, + "logps": -84.10258483886719, + "loss": 0.074, + "objective": 0.07396882027387619, + "ranking_idealized": 0.625, + "ranking_idealized_expo": 0.5291666388511658, + "ranking_simple": 0.6291666626930237, + "regularize": 0.07396882027387619, + "step": 585 + }, + { + "dpo_loss": 0.540601372718811, + "epoch": 3.3443552196504487, + "grad_norm": 24.248150339564425, + "learning_rate": 2.959071903876486e-07, + "logits": -1.490022897720337, + "logps": -84.40371704101562, + "loss": 0.0726, + "objective": 0.06912810355424881, + "ranking_idealized": 0.5874999761581421, + "ranking_idealized_expo": 0.49166667461395264, + "ranking_simple": 0.5833333134651184, + "regularize": 0.06912810355424881, + "step": 590 + }, + { + "dpo_loss": 0.530450165271759, + "epoch": 3.372697213037317, + "grad_norm": 23.904834128431904, + "learning_rate": 2.86895030008416e-07, + "logits": -1.4088099002838135, + "logps": -83.5683822631836, + "loss": 0.0716, + "objective": 0.06921317428350449, + "ranking_idealized": 0.5958333611488342, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.6000000238418579, + "regularize": 0.06921316683292389, + "step": 595 + }, + { + "dpo_loss": 0.510923445224762, + "epoch": 3.4010392064241852, + "grad_norm": 21.999466319441446, + "learning_rate": 2.779666936971129e-07, + "logits": -1.4195644855499268, + "logps": -83.0455551147461, + "loss": 0.0673, + "objective": 0.06648312509059906, + "ranking_idealized": 0.6000000238418579, + "ranking_idealized_expo": 0.5041666626930237, + "ranking_simple": 0.5874999761581421, + "regularize": 0.06648311764001846, + "step": 600 + }, + { + "epoch": 3.4010392064241852, + "eval_dpo_loss": 0.6824304461479187, + "eval_logits": -1.4325991868972778, + "eval_logps": -89.83067321777344, + "eval_loss": 0.40557217597961426, + "eval_objective": 0.40685591101646423, + "eval_ranking_idealized": 0.5888429880142212, + "eval_ranking_idealized_expo": 0.5103305578231812, + "eval_ranking_simple": 0.5237603187561035, + "eval_regularize": 0.40685591101646423, + "eval_runtime": 259.0599, + "eval_samples_per_second": 22.35, + "eval_steps_per_second": 0.934, + "step": 600 + }, + { + "dpo_loss": 0.5408468246459961, + "epoch": 3.4293811998110533, + "grad_norm": 21.826287125403734, + "learning_rate": 2.6912569338248315e-07, + "logits": -1.4806511402130127, + "logps": -85.08236694335938, + "loss": 0.0687, + "objective": 0.0708792433142662, + "ranking_idealized": 0.6000000238418579, + "ranking_idealized_expo": 0.5333333611488342, + "ranking_simple": 0.5874999761581421, + "regularize": 0.0708792433142662, + "step": 605 + }, + { + "dpo_loss": 0.5326560139656067, + "epoch": 3.4577231931979218, + "grad_norm": 23.721876415078565, + "learning_rate": 2.603755066399718e-07, + "logits": -1.4362066984176636, + "logps": -83.59281158447266, + "loss": 0.0693, + "objective": 0.06495842337608337, + "ranking_idealized": 0.5791666507720947, + "ranking_idealized_expo": 0.5208333134651184, + "ranking_simple": 0.574999988079071, + "regularize": 0.06495841592550278, + "step": 610 + }, + { + "dpo_loss": 0.5220057964324951, + "epoch": 3.48606518658479, + "grad_norm": 23.867397255620617, + "learning_rate": 2.517195753238345e-07, + "logits": -1.459093451499939, + "logps": -83.89041137695312, + "loss": 0.0677, + "objective": 0.06726350635290146, + "ranking_idealized": 0.5416666865348816, + "ranking_idealized_expo": 0.47083333134651184, + "ranking_simple": 0.5458333492279053, + "regularize": 0.06726350635290146, + "step": 615 + }, + { + "dpo_loss": 0.5138709545135498, + "epoch": 3.514407179971658, + "grad_norm": 22.48517117265223, + "learning_rate": 2.4316130421329696e-07, + "logits": -1.3361726999282837, + "logps": -83.23828887939453, + "loss": 0.0661, + "objective": 0.05854518711566925, + "ranking_idealized": 0.5666666626930237, + "ranking_idealized_expo": 0.4958333373069763, + "ranking_simple": 0.5666666626930237, + "regularize": 0.058545153588056564, + "step": 620 + }, + { + "dpo_loss": 0.5306848287582397, + "epoch": 3.5427491733585263, + "grad_norm": 22.374230054745578, + "learning_rate": 2.3470405967329604e-07, + "logits": -1.406466007232666, + "logps": -82.32576751708984, + "loss": 0.0639, + "objective": 0.06265277415513992, + "ranking_idealized": 0.6083333492279053, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.6041666865348816, + "regularize": 0.06265277415513992, + "step": 625 + }, + { + "dpo_loss": 0.5401536226272583, + "epoch": 3.5710911667453944, + "grad_norm": 22.323503974192004, + "learning_rate": 2.2635116833033392e-07, + "logits": -1.4880479574203491, + "logps": -82.74535369873047, + "loss": 0.0671, + "objective": 0.06858905404806137, + "ranking_idealized": 0.6000000238418579, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.5874999761581421, + "regularize": 0.06858905404806137, + "step": 630 + }, + { + "dpo_loss": 0.5335288643836975, + "epoch": 3.5994331601322624, + "grad_norm": 22.950166480099814, + "learning_rate": 2.181059157639598e-07, + "logits": -1.426721215248108, + "logps": -82.85971069335938, + "loss": 0.06, + "objective": 0.0622558668255806, + "ranking_idealized": 0.5666666626930237, + "ranking_idealized_expo": 0.48750001192092896, + "ranking_simple": 0.5708333253860474, + "regularize": 0.062255859375, + "step": 635 + }, + { + "dpo_loss": 0.5119226574897766, + "epoch": 3.627775153519131, + "grad_norm": 25.079864254767315, + "learning_rate": 2.0997154521440097e-07, + "logits": -1.3697155714035034, + "logps": -83.90760803222656, + "loss": 0.0613, + "objective": 0.0635208860039711, + "ranking_idealized": 0.5541666746139526, + "ranking_idealized_expo": 0.4625000059604645, + "ranking_simple": 0.5458333492279053, + "regularize": 0.0635208785533905, + "step": 640 + }, + { + "dpo_loss": 0.522363007068634, + "epoch": 3.656117146905999, + "grad_norm": 22.441342121743332, + "learning_rate": 2.0195125630684428e-07, + "logits": -1.3928742408752441, + "logps": -81.88297271728516, + "loss": 0.0634, + "objective": 0.05965565890073776, + "ranking_idealized": 0.6208333373069763, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.612500011920929, + "regularize": 0.05965564027428627, + "step": 645 + }, + { + "dpo_loss": 0.5373592376708984, + "epoch": 3.6844591402928675, + "grad_norm": 22.133762729051785, + "learning_rate": 1.9404820379287672e-07, + "logits": -1.3841991424560547, + "logps": -83.1523208618164, + "loss": 0.0589, + "objective": 0.055038776248693466, + "ranking_idealized": 0.574999988079071, + "ranking_idealized_expo": 0.5041666626930237, + "ranking_simple": 0.5666666626930237, + "regularize": 0.055038776248693466, + "step": 650 + }, + { + "epoch": 3.6844591402928675, + "eval_dpo_loss": 0.6828624606132507, + "eval_logits": -1.4302468299865723, + "eval_logps": -89.47576904296875, + "eval_loss": 0.40598276257514954, + "eval_objective": 0.4077259600162506, + "eval_ranking_idealized": 0.5888429880142212, + "eval_ranking_idealized_expo": 0.5103305578231812, + "eval_ranking_simple": 0.5247933864593506, + "eval_regularize": 0.4077259600162506, + "eval_runtime": 258.9725, + "eval_samples_per_second": 22.358, + "eval_steps_per_second": 0.934, + "step": 650 + }, + { + "dpo_loss": 0.5351348519325256, + "epoch": 3.7128011336797355, + "grad_norm": 23.905512006208795, + "learning_rate": 1.8626549630957395e-07, + "logits": -1.429569125175476, + "logps": -82.42403411865234, + "loss": 0.0624, + "objective": 0.05734870210289955, + "ranking_idealized": 0.6000000238418579, + "ranking_idealized_expo": 0.5208333134651184, + "ranking_simple": 0.5958333611488342, + "regularize": 0.05734868720173836, + "step": 655 + }, + { + "dpo_loss": 0.5322324633598328, + "epoch": 3.7411431270666036, + "grad_norm": 24.42468424510045, + "learning_rate": 1.7860619515673032e-07, + "logits": -1.5189285278320312, + "logps": -83.2733383178711, + "loss": 0.0612, + "objective": 0.06605425477027893, + "ranking_idealized": 0.625, + "ranking_idealized_expo": 0.5541666746139526, + "ranking_simple": 0.625, + "regularize": 0.06605424731969833, + "step": 660 + }, + { + "dpo_loss": 0.5305153131484985, + "epoch": 3.769485120453472, + "grad_norm": 21.98557345680479, + "learning_rate": 1.7107331309270684e-07, + "logits": -1.4122134447097778, + "logps": -83.17848205566406, + "loss": 0.0579, + "objective": 0.05437133088707924, + "ranking_idealized": 0.6041666865348816, + "ranking_idealized_expo": 0.5166666507720947, + "ranking_simple": 0.5916666388511658, + "regularize": 0.05437132343649864, + "step": 665 + }, + { + "dpo_loss": 0.5314101576805115, + "epoch": 3.79782711384034, + "grad_norm": 22.57049790061395, + "learning_rate": 1.6366981314937372e-07, + "logits": -1.5129222869873047, + "logps": -83.30918884277344, + "loss": 0.0549, + "objective": 0.06075560674071312, + "ranking_idealized": 0.6791666746139526, + "ranking_idealized_expo": 0.5958333611488342, + "ranking_simple": 0.675000011920929, + "regularize": 0.06075560301542282, + "step": 670 + }, + { + "dpo_loss": 0.5331992506980896, + "epoch": 3.826169107227208, + "grad_norm": 21.51450391411621, + "learning_rate": 1.5639860746661338e-07, + "logits": -1.464658498764038, + "logps": -82.55012512207031, + "loss": 0.0562, + "objective": 0.05308786779642105, + "ranking_idealized": 0.6166666746139526, + "ranking_idealized_expo": 0.5416666865348816, + "ranking_simple": 0.6083333492279053, + "regularize": 0.05308786407113075, + "step": 675 + }, + { + "dpo_loss": 0.544487714767456, + "epoch": 3.8545111006140766, + "grad_norm": 21.91828532034966, + "learning_rate": 1.492625561468393e-07, + "logits": -1.401973009109497, + "logps": -83.26588439941406, + "loss": 0.0543, + "objective": 0.055845096707344055, + "ranking_idealized": 0.574999988079071, + "ranking_idealized_expo": 0.5083333253860474, + "ranking_simple": 0.5833333134651184, + "regularize": 0.05584508553147316, + "step": 680 + }, + { + "dpo_loss": 0.5221087336540222, + "epoch": 3.8828530940009447, + "grad_norm": 23.338800601233537, + "learning_rate": 1.4226446612998671e-07, + "logits": -1.483197569847107, + "logps": -82.65924835205078, + "loss": 0.0543, + "objective": 0.04644104465842247, + "ranking_idealized": 0.6000000238418579, + "ranking_idealized_expo": 0.5166666507720947, + "ranking_simple": 0.5958333611488342, + "regularize": 0.04644103720784187, + "step": 685 + }, + { + "dpo_loss": 0.5242043137550354, + "epoch": 3.9111950873878127, + "grad_norm": 22.026766940460053, + "learning_rate": 1.3540709008941147e-07, + "logits": -1.449702501296997, + "logps": -81.98009490966797, + "loss": 0.0547, + "objective": 0.055739615112543106, + "ranking_idealized": 0.6291666626930237, + "ranking_idealized_expo": 0.5458333492279053, + "ranking_simple": 0.6416666507720947, + "regularize": 0.05573960393667221, + "step": 690 + }, + { + "dpo_loss": 0.5308277606964111, + "epoch": 3.9395370807746812, + "grad_norm": 22.736825591526987, + "learning_rate": 1.2869312534913685e-07, + "logits": -1.3683240413665771, + "logps": -83.3951187133789, + "loss": 0.056, + "objective": 0.05744828283786774, + "ranking_idealized": 0.550000011920929, + "ranking_idealized_expo": 0.46666666865348816, + "ranking_simple": 0.5458333492279053, + "regularize": 0.05744827911257744, + "step": 695 + }, + { + "dpo_loss": 0.5327464938163757, + "epoch": 3.9678790741615493, + "grad_norm": 24.974758066705547, + "learning_rate": 1.2212521282287093e-07, + "logits": -1.416201114654541, + "logps": -83.47090148925781, + "loss": 0.0551, + "objective": 0.05039297044277191, + "ranking_idealized": 0.5708333253860474, + "ranking_idealized_expo": 0.48750001192092896, + "ranking_simple": 0.574999988079071, + "regularize": 0.05039296671748161, + "step": 700 + }, + { + "epoch": 3.9678790741615493, + "eval_dpo_loss": 0.683082640171051, + "eval_logits": -1.4301180839538574, + "eval_logps": -90.06600952148438, + "eval_loss": 0.40649789571762085, + "eval_objective": 0.4080060124397278, + "eval_ranking_idealized": 0.5888429880142212, + "eval_ranking_idealized_expo": 0.5103305578231812, + "eval_ranking_simple": 0.5237603187561035, + "eval_regularize": 0.4080060124397278, + "eval_runtime": 258.866, + "eval_samples_per_second": 22.367, + "eval_steps_per_second": 0.935, + "step": 700 + }, + { + "dpo_loss": 0.5257295966148376, + "epoch": 3.9962210675484178, + "grad_norm": 21.66945207844546, + "learning_rate": 1.15705935975212e-07, + "logits": -1.3355560302734375, + "logps": -81.95101928710938, + "loss": 0.0536, + "objective": 0.04855410382151604, + "ranking_idealized": 0.6166666746139526, + "ranking_idealized_expo": 0.5291666388511658, + "ranking_simple": 0.6166666746139526, + "regularize": 0.04855410382151604, + "step": 705 + }, + { + "dpo_loss": 0.5204980373382568, + "epoch": 4.024563060935286, + "grad_norm": 21.87585318414452, + "learning_rate": 1.094378198054533e-07, + "logits": -1.4359726905822754, + "logps": -83.67707061767578, + "loss": 0.0474, + "objective": 0.05088849365711212, + "ranking_idealized": 0.5666666626930237, + "ranking_idealized_expo": 0.4791666567325592, + "ranking_simple": 0.5708333253860474, + "regularize": 0.050888482481241226, + "step": 710 + }, + { + "dpo_loss": 0.5301558375358582, + "epoch": 4.052905054322154, + "grad_norm": 22.01280193333486, + "learning_rate": 1.0332332985438247e-07, + "logits": -1.3890125751495361, + "logps": -83.36654663085938, + "loss": 0.0434, + "objective": 0.040184516459703445, + "ranking_idealized": 0.6000000238418579, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.6041666865348816, + "regularize": 0.04018450155854225, + "step": 715 + }, + { + "dpo_loss": 0.5191416144371033, + "epoch": 4.081247047709022, + "grad_norm": 21.943342871470353, + "learning_rate": 9.736487123447068e-08, + "logits": -1.3216856718063354, + "logps": -85.42113494873047, + "loss": 0.0441, + "objective": 0.03967616334557533, + "ranking_idealized": 0.5874999761581421, + "ranking_idealized_expo": 0.5208333134651184, + "ranking_simple": 0.5833333134651184, + "regularize": 0.039676155894994736, + "step": 720 + }, + { + "dpo_loss": 0.5419493913650513, + "epoch": 4.109589041095891, + "grad_norm": 22.065151941072486, + "learning_rate": 9.156478768383058e-08, + "logits": -1.4097427129745483, + "logps": -83.27389526367188, + "loss": 0.0477, + "objective": 0.04659968614578247, + "ranking_idealized": 0.5874999761581421, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.5874999761581421, + "regularize": 0.046599678695201874, + "step": 725 + }, + { + "dpo_loss": 0.5275304317474365, + "epoch": 4.137931034482759, + "grad_norm": 22.997003588267155, + "learning_rate": 8.592536064431466e-08, + "logits": -1.4810242652893066, + "logps": -83.33085632324219, + "loss": 0.0479, + "objective": 0.05003201588988304, + "ranking_idealized": 0.6000000238418579, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.5916666388511658, + "regularize": 0.05003199726343155, + "step": 730 + }, + { + "dpo_loss": 0.5354489684104919, + "epoch": 4.166273027869627, + "grad_norm": 22.750124706779673, + "learning_rate": 8.044880836411888e-08, + "logits": -1.3749909400939941, + "logps": -84.28314971923828, + "loss": 0.042, + "objective": 0.04194118455052376, + "ranking_idealized": 0.6041666865348816, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.6041666865348816, + "regularize": 0.04194117337465286, + "step": 735 + }, + { + "dpo_loss": 0.5109390616416931, + "epoch": 4.194615021256495, + "grad_norm": 23.35643629791226, + "learning_rate": 7.513728502524286e-08, + "logits": -1.3980611562728882, + "logps": -83.87706756591797, + "loss": 0.0437, + "objective": 0.042474415153265, + "ranking_idealized": 0.6083333492279053, + "ranking_idealized_expo": 0.5208333134651184, + "ranking_simple": 0.6041666865348816, + "regularize": 0.0424744077026844, + "step": 740 + }, + { + "dpo_loss": 0.5253542065620422, + "epoch": 4.222957014643363, + "grad_norm": 22.418675908813192, + "learning_rate": 6.999287989614971e-08, + "logits": -1.4651761054992676, + "logps": -81.21513366699219, + "loss": 0.0406, + "objective": 0.04062732681632042, + "ranking_idealized": 0.5791666507720947, + "ranking_idealized_expo": 0.48750001192092896, + "ranking_simple": 0.574999988079071, + "regularize": 0.040627315640449524, + "step": 745 + }, + { + "dpo_loss": 0.5217363834381104, + "epoch": 4.251299008030231, + "grad_norm": 22.888185894990265, + "learning_rate": 6.501761650996052e-08, + "logits": -1.5698094367980957, + "logps": -83.2958984375, + "loss": 0.042, + "objective": 0.045288145542144775, + "ranking_idealized": 0.6000000238418579, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.6000000238418579, + "regularize": 0.04528813809156418, + "step": 750 + }, + { + "epoch": 4.251299008030231, + "eval_dpo_loss": 0.6830218434333801, + "eval_logits": -1.4307194948196411, + "eval_logps": -90.04474639892578, + "eval_loss": 0.4063892364501953, + "eval_objective": 0.4078083634376526, + "eval_ranking_idealized": 0.5888429880142212, + "eval_ranking_idealized_expo": 0.5103305578231812, + "eval_ranking_simple": 0.5247933864593506, + "eval_regularize": 0.4078083634376526, + "eval_runtime": 258.9989, + "eval_samples_per_second": 22.355, + "eval_steps_per_second": 0.934, + "step": 750 + }, + { + "dpo_loss": 0.5249465107917786, + "epoch": 4.2796410014171, + "grad_norm": 22.190575430128455, + "learning_rate": 6.021345186850418e-08, + "logits": -1.4760249853134155, + "logps": -83.12273406982422, + "loss": 0.0418, + "objective": 0.04030155390501022, + "ranking_idealized": 0.612500011920929, + "ranking_idealized_expo": 0.5333333611488342, + "ranking_simple": 0.6166666746139526, + "regularize": 0.04030154272913933, + "step": 755 + }, + { + "dpo_loss": 0.5280516147613525, + "epoch": 4.307982994803968, + "grad_norm": 22.195011354775016, + "learning_rate": 5.5582275672538316e-08, + "logits": -1.460343837738037, + "logps": -83.6526870727539, + "loss": 0.0395, + "objective": 0.040188662707805634, + "ranking_idealized": 0.6541666388511658, + "ranking_idealized_expo": 0.5625, + "ranking_simple": 0.6458333134651184, + "regularize": 0.04018864780664444, + "step": 760 + }, + { + "dpo_loss": 0.508765459060669, + "epoch": 4.336324988190836, + "grad_norm": 21.99198419312676, + "learning_rate": 5.112590957844232e-08, + "logits": -1.4831253290176392, + "logps": -83.9940414428711, + "loss": 0.0416, + "objective": 0.03937076777219772, + "ranking_idealized": 0.6291666626930237, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.6291666626930237, + "regularize": 0.03937075287103653, + "step": 765 + }, + { + "dpo_loss": 0.5139289498329163, + "epoch": 4.364666981577704, + "grad_norm": 22.21570497564684, + "learning_rate": 4.684610648167503e-08, + "logits": -1.355908751487732, + "logps": -82.18904113769531, + "loss": 0.0418, + "objective": 0.041529521346092224, + "ranking_idealized": 0.5916666388511658, + "ranking_idealized_expo": 0.5041666626930237, + "ranking_simple": 0.6000000238418579, + "regularize": 0.04152949899435043, + "step": 770 + }, + { + "dpo_loss": 0.5221685171127319, + "epoch": 4.393008974964572, + "grad_norm": 21.306801693131447, + "learning_rate": 4.274454982728032e-08, + "logits": -1.4285643100738525, + "logps": -83.1854476928711, + "loss": 0.0394, + "objective": 0.04110860824584961, + "ranking_idealized": 0.5958333611488342, + "ranking_idealized_expo": 0.5083333253860474, + "ranking_simple": 0.5958333611488342, + "regularize": 0.041108593344688416, + "step": 775 + }, + { + "dpo_loss": 0.5304800868034363, + "epoch": 4.42135096835144, + "grad_norm": 21.938217857408958, + "learning_rate": 3.882285294770937e-08, + "logits": -1.4632736444473267, + "logps": -81.85124969482422, + "loss": 0.0379, + "objective": 0.03418119251728058, + "ranking_idealized": 0.5708333253860474, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.5666666626930237, + "regularize": 0.03418118134140968, + "step": 780 + }, + { + "dpo_loss": 0.5404612421989441, + "epoch": 4.449692961738309, + "grad_norm": 21.77705913902379, + "learning_rate": 3.508255842822255e-08, + "logits": -1.4751582145690918, + "logps": -81.96646118164062, + "loss": 0.0448, + "objective": 0.04277818650007248, + "ranking_idealized": 0.6166666746139526, + "ranking_idealized_expo": 0.5291666388511658, + "ranking_simple": 0.6041666865348816, + "regularize": 0.04277818277478218, + "step": 785 + }, + { + "dpo_loss": 0.5209127068519592, + "epoch": 4.478034955125177, + "grad_norm": 21.724227546519376, + "learning_rate": 3.15251375001192e-08, + "logits": -1.4253805875778198, + "logps": -84.63212585449219, + "loss": 0.0402, + "objective": 0.050088923424482346, + "ranking_idealized": 0.6416666507720947, + "ranking_idealized_expo": 0.5583333373069763, + "ranking_simple": 0.625, + "regularize": 0.05008890852332115, + "step": 790 + }, + { + "dpo_loss": 0.5272155404090881, + "epoch": 4.506376948512045, + "grad_norm": 21.960441297110094, + "learning_rate": 2.8151989462033787e-08, + "logits": -1.3359031677246094, + "logps": -84.30043029785156, + "loss": 0.0412, + "objective": 0.03479573875665665, + "ranking_idealized": 0.5958333611488342, + "ranking_idealized_expo": 0.550000011920929, + "ranking_simple": 0.5916666388511658, + "regularize": 0.034795720130205154, + "step": 795 + }, + { + "dpo_loss": 0.5258675813674927, + "epoch": 4.534718941898913, + "grad_norm": 22.834668811719133, + "learning_rate": 2.4964441129527335e-08, + "logits": -1.3358808755874634, + "logps": -83.53750610351562, + "loss": 0.0411, + "objective": 0.04309748858213425, + "ranking_idealized": 0.6083333492279053, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.6083333492279053, + "regularize": 0.04309746250510216, + "step": 800 + }, + { + "epoch": 4.534718941898913, + "eval_dpo_loss": 0.6830146908760071, + "eval_logits": -1.431044578552246, + "eval_logps": -90.11402893066406, + "eval_loss": 0.406222939491272, + "eval_objective": 0.4077996015548706, + "eval_ranking_idealized": 0.5888429880142212, + "eval_ranking_idealized_expo": 0.5103305578231812, + "eval_ranking_simple": 0.5237603187561035, + "eval_regularize": 0.4077996015548706, + "eval_runtime": 258.8062, + "eval_samples_per_second": 22.372, + "eval_steps_per_second": 0.935, + "step": 800 + }, + { + "dpo_loss": 0.5273416042327881, + "epoch": 4.563060935285781, + "grad_norm": 21.794535718115338, + "learning_rate": 2.1963746313188757e-08, + "logits": -1.4133697748184204, + "logps": -82.60270690917969, + "loss": 0.0414, + "objective": 0.046149447560310364, + "ranking_idealized": 0.5625, + "ranking_idealized_expo": 0.46666666865348816, + "ranking_simple": 0.550000011920929, + "regularize": 0.04614944010972977, + "step": 805 + }, + { + "dpo_loss": 0.5305873155593872, + "epoch": 4.59140292867265, + "grad_norm": 21.298734472415376, + "learning_rate": 1.915108532545351e-08, + "logits": -1.481737494468689, + "logps": -82.04961395263672, + "loss": 0.0395, + "objective": 0.03058464638888836, + "ranking_idealized": 0.5291666388511658, + "ranking_idealized_expo": 0.4583333432674408, + "ranking_simple": 0.5333333611488342, + "regularize": 0.030584635213017464, + "step": 810 + }, + { + "dpo_loss": 0.5338551998138428, + "epoch": 4.619744922059518, + "grad_norm": 21.722779837853974, + "learning_rate": 1.6527564516331638e-08, + "logits": -1.3470157384872437, + "logps": -83.43151092529297, + "loss": 0.0369, + "objective": 0.030139055103063583, + "ranking_idealized": 0.675000011920929, + "ranking_idealized_expo": 0.5916666388511658, + "ranking_simple": 0.6708333492279053, + "regularize": 0.030139045789837837, + "step": 815 + }, + { + "dpo_loss": 0.5377717614173889, + "epoch": 4.648086915446386, + "grad_norm": 23.027732641639304, + "learning_rate": 1.4094215838229172e-08, + "logits": -1.439835786819458, + "logps": -83.44994354248047, + "loss": 0.0373, + "objective": 0.03681868314743042, + "ranking_idealized": 0.6166666746139526, + "ranking_idealized_expo": 0.5625, + "ranking_simple": 0.612500011920929, + "regularize": 0.036818671971559525, + "step": 820 + }, + { + "dpo_loss": 0.5387639999389648, + "epoch": 4.6764289088332545, + "grad_norm": 22.893892489361072, + "learning_rate": 1.1851996440033318e-08, + "logits": -1.3366633653640747, + "logps": -81.3759765625, + "loss": 0.0369, + "objective": 0.03668622300028801, + "ranking_idealized": 0.5541666746139526, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.5375000238418579, + "regularize": 0.03668620437383652, + "step": 825 + }, + { + "dpo_loss": 0.5243638753890991, + "epoch": 4.7047709022201225, + "grad_norm": 21.58395292653118, + "learning_rate": 9.801788290621505e-09, + "logits": -1.506198525428772, + "logps": -83.259033203125, + "loss": 0.0407, + "objective": 0.041429486125707626, + "ranking_idealized": 0.5916666388511658, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.5916666388511658, + "regularize": 0.04142947867512703, + "step": 830 + }, + { + "dpo_loss": 0.5125473737716675, + "epoch": 4.733112895606991, + "grad_norm": 21.98641530853052, + "learning_rate": 7.944397831941951e-09, + "logits": -1.4062670469284058, + "logps": -83.29720306396484, + "loss": 0.0372, + "objective": 0.03951678425073624, + "ranking_idealized": 0.5625, + "ranking_idealized_expo": 0.48750001192092896, + "ranking_simple": 0.5583333373069763, + "regularize": 0.03951676934957504, + "step": 835 + }, + { + "dpo_loss": 0.5017682909965515, + "epoch": 4.7614548889938595, + "grad_norm": 21.972117419289066, + "learning_rate": 6.280555661802856e-09, + "logits": -1.423843264579773, + "logps": -83.54265594482422, + "loss": 0.0372, + "objective": 0.03352176770567894, + "ranking_idealized": 0.6208333373069763, + "ranking_idealized_expo": 0.5541666746139526, + "ranking_simple": 0.612500011920929, + "regularize": 0.033521756529808044, + "step": 840 + }, + { + "dpo_loss": 0.5365482568740845, + "epoch": 4.7897968823807275, + "grad_norm": 21.356793654139537, + "learning_rate": 4.810916246494157e-09, + "logits": -1.45553719997406, + "logps": -83.4180679321289, + "loss": 0.0383, + "objective": 0.040656425058841705, + "ranking_idealized": 0.5833333134651184, + "ranking_idealized_expo": 0.4541666805744171, + "ranking_simple": 0.5625, + "regularize": 0.04065641388297081, + "step": 845 + }, + { + "dpo_loss": 0.5246464014053345, + "epoch": 4.818138875767596, + "grad_norm": 22.81185797664159, + "learning_rate": 3.5360576633558513e-09, + "logits": -1.4138314723968506, + "logps": -82.19649505615234, + "loss": 0.0355, + "objective": 0.03642057999968529, + "ranking_idealized": 0.6166666746139526, + "ranking_idealized_expo": 0.5291666388511658, + "ranking_simple": 0.6208333373069763, + "regularize": 0.03642057254910469, + "step": 850 + }, + { + "epoch": 4.818138875767596, + "eval_dpo_loss": 0.6829268932342529, + "eval_logits": -1.4302399158477783, + "eval_logps": -90.043212890625, + "eval_loss": 0.40620195865631104, + "eval_objective": 0.40770116448402405, + "eval_ranking_idealized": 0.5888429880142212, + "eval_ranking_idealized_expo": 0.5103305578231812, + "eval_ranking_simple": 0.5247933864593506, + "eval_regularize": 0.40770116448402405, + "eval_runtime": 259.1263, + "eval_samples_per_second": 22.344, + "eval_steps_per_second": 0.934, + "step": 850 + }, + { + "dpo_loss": 0.5365470051765442, + "epoch": 4.846480869154464, + "grad_norm": 22.602716102552016, + "learning_rate": 2.4564813733932155e-09, + "logits": -1.3940719366073608, + "logps": -82.6231460571289, + "loss": 0.0347, + "objective": 0.03581225126981735, + "ranking_idealized": 0.6458333134651184, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.6458333134651184, + "regularize": 0.03581221401691437, + "step": 855 + }, + { + "dpo_loss": 0.5150249004364014, + "epoch": 4.874822862541333, + "grad_norm": 23.704671287447177, + "learning_rate": 1.5726120240288631e-09, + "logits": -1.3679381608963013, + "logps": -82.33541870117188, + "loss": 0.0348, + "objective": 0.031035231426358223, + "ranking_idealized": 0.6541666388511658, + "ranking_idealized_expo": 0.550000011920929, + "ranking_simple": 0.6458333134651184, + "regularize": 0.031035220250487328, + "step": 860 + }, + { + "dpo_loss": 0.5223459005355835, + "epoch": 4.903164855928201, + "grad_norm": 21.42329131044869, + "learning_rate": 8.847972820693051e-10, + "logits": -1.4437813758850098, + "logps": -81.53370666503906, + "loss": 0.0355, + "objective": 0.04200226441025734, + "ranking_idealized": 0.6000000238418579, + "ranking_idealized_expo": 0.4958333373069763, + "ranking_simple": 0.5958333611488342, + "regularize": 0.042002253234386444, + "step": 865 + }, + { + "dpo_loss": 0.5215969681739807, + "epoch": 4.931506849315069, + "grad_norm": 21.701501283901965, + "learning_rate": 3.933076969516724e-10, + "logits": -1.4914921522140503, + "logps": -83.26063537597656, + "loss": 0.0393, + "objective": 0.04051649197936058, + "ranking_idealized": 0.6041666865348816, + "ranking_idealized_expo": 0.5083333253860474, + "ranking_simple": 0.6083333492279053, + "regularize": 0.0405164435505867, + "step": 870 + }, + { + "dpo_loss": 0.5250566005706787, + "epoch": 4.959848842701937, + "grad_norm": 21.86259624413417, + "learning_rate": 9.833659432367803e-11, + "logits": -1.4107563495635986, + "logps": -83.20445251464844, + "loss": 0.0346, + "objective": 0.027810534462332726, + "ranking_idealized": 0.6166666746139526, + "ranking_idealized_expo": 0.5458333492279053, + "ranking_simple": 0.6166666746139526, + "regularize": 0.027810489758849144, + "step": 875 + }, + { + "dpo_loss": 0.520707905292511, + "epoch": 4.988190836088805, + "grad_norm": 23.229102177877856, + "learning_rate": 0.0, + "logits": -1.4621251821517944, + "logps": -83.79481506347656, + "loss": 0.035, + "objective": 0.029516249895095825, + "ranking_idealized": 0.625, + "ranking_idealized_expo": 0.5291666388511658, + "ranking_simple": 0.625, + "regularize": 0.02951624244451523, + "step": 880 + }, + { + "epoch": 4.988190836088805, + "step": 880, + "total_flos": 0.0, + "train_loss": 0.1442635908045552, + "train_runtime": 35242.7125, + "train_samples_per_second": 7.207, + "train_steps_per_second": 0.025 + } + ], + "logging_steps": 5, + "max_steps": 880, + "num_input_tokens_seen": 0, + "num_train_epochs": 5, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +}