diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,2969 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 4.988190836088805, + "eval_steps": 50, + "global_step": 880, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "dpo_loss": 0.6931471824645996, + "epoch": 0.005668398677373642, + "grad_norm": 13413.608867135807, + "learning_rate": 5.681818181818182e-08, + "logits": -1.3147305250167847, + "logps": -88.0877456665039, + "loss": 0.4113, + "objective": 0.41588976979255676, + "ranking_idealized": 0.5208333134651184, + "ranking_idealized_expo": 0.5208333134651184, + "ranking_simple": 0.5208333134651184, + "regularize": 0.41588976979255676, + "step": 1 + }, + { + "dpo_loss": 1.6281694173812866, + "epoch": 0.02834199338686821, + "grad_norm": 23830.011481895843, + "learning_rate": 2.840909090909091e-07, + "logits": -1.3680096864700317, + "logps": -84.41747283935547, + "loss": 2.7794, + "objective": 2.858431100845337, + "ranking_idealized": 0.546875, + "ranking_idealized_expo": 0.546875, + "ranking_simple": 0.546875, + "regularize": 2.858431100845337, + "step": 5 + }, + { + "dpo_loss": 3.2700231075286865, + "epoch": 0.05668398677373642, + "grad_norm": 29280.306552066217, + "learning_rate": 5.681818181818182e-07, + "logits": -1.4477864503860474, + "logps": -83.50318908691406, + "loss": 7.354, + "objective": 6.85181999206543, + "ranking_idealized": 0.5083333253860474, + "ranking_idealized_expo": 0.5083333253860474, + "ranking_simple": 0.512499988079071, + "regularize": 6.85181999206543, + "step": 10 + }, + { + "dpo_loss": 6.557200908660889, + "epoch": 0.08502598016060463, + "grad_norm": 20109.513515072522, + "learning_rate": 8.522727272727273e-07, + "logits": -1.4104349613189697, + "logps": -83.76626586914062, + "loss": 13.7747, + "objective": 13.58159351348877, + "ranking_idealized": 0.5041666626930237, + "ranking_idealized_expo": 0.5041666626930237, + "ranking_simple": 0.5083333253860474, + "regularize": 13.58159351348877, + "step": 15 + }, + { + "dpo_loss": 9.023734092712402, + "epoch": 0.11336797354747284, + "grad_norm": 13139.311441037944, + "learning_rate": 1.1363636363636364e-06, + "logits": -1.3969768285751343, + "logps": -84.27888488769531, + "loss": 19.2285, + "objective": 18.731477737426758, + "ranking_idealized": 0.5375000238418579, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.5375000238418579, + "regularize": 18.731477737426758, + "step": 20 + }, + { + "dpo_loss": 14.174921035766602, + "epoch": 0.14170996693434104, + "grad_norm": 16805.179390769823, + "learning_rate": 1.4204545454545458e-06, + "logits": -1.432785153388977, + "logps": -84.65080261230469, + "loss": 28.6241, + "objective": 28.14487648010254, + "ranking_idealized": 0.4958333373069763, + "ranking_idealized_expo": 0.4958333373069763, + "ranking_simple": 0.5, + "regularize": 28.14487648010254, + "step": 25 + }, + { + "dpo_loss": 16.938854217529297, + "epoch": 0.17005196032120926, + "grad_norm": 15979.803455377341, + "learning_rate": 1.7045454545454546e-06, + "logits": -1.3974343538284302, + "logps": -85.62297058105469, + "loss": 36.5075, + "objective": 37.031803131103516, + "ranking_idealized": 0.5416666865348816, + "ranking_idealized_expo": 0.5416666865348816, + "ranking_simple": 0.550000011920929, + "regularize": 37.031803131103516, + "step": 30 + }, + { + "dpo_loss": 23.828941345214844, + "epoch": 0.19839395370807747, + "grad_norm": 17293.601243499223, + "learning_rate": 1.9886363636363638e-06, + "logits": -1.373544692993164, + "logps": -83.43585968017578, + "loss": 46.3083, + "objective": 45.453346252441406, + "ranking_idealized": 0.5041666626930237, + "ranking_idealized_expo": 0.5041666626930237, + "ranking_simple": 0.5041666626930237, + "regularize": 45.453346252441406, + "step": 35 + }, + { + "dpo_loss": 32.33571243286133, + "epoch": 0.22673594709494568, + "grad_norm": 12280.78311184525, + "learning_rate": 2.2727272727272728e-06, + "logits": -1.3559505939483643, + "logps": -83.9026870727539, + "loss": 52.9242, + "objective": 55.41267776489258, + "ranking_idealized": 0.5208333134651184, + "ranking_idealized_expo": 0.5208333134651184, + "ranking_simple": 0.5249999761581421, + "regularize": 55.41267776489258, + "step": 40 + }, + { + "dpo_loss": 34.25632858276367, + "epoch": 0.25507794048181387, + "grad_norm": 13382.907615852768, + "learning_rate": 2.556818181818182e-06, + "logits": -1.4366874694824219, + "logps": -85.12437438964844, + "loss": 65.7855, + "objective": 74.01615142822266, + "ranking_idealized": 0.5375000238418579, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.5291666388511658, + "regularize": 74.01615142822266, + "step": 45 + }, + { + "dpo_loss": 40.2269287109375, + "epoch": 0.2834199338686821, + "grad_norm": 11751.345269013133, + "learning_rate": 2.8409090909090916e-06, + "logits": -1.2930634021759033, + "logps": -83.90473175048828, + "loss": 72.7249, + "objective": 73.77684783935547, + "ranking_idealized": 0.42500001192092896, + "ranking_idealized_expo": 0.42500001192092896, + "ranking_simple": 0.4166666567325592, + "regularize": 73.77684783935547, + "step": 50 + }, + { + "epoch": 0.2834199338686821, + "eval_dpo_loss": 25.884489059448242, + "eval_logits": -1.3016308546066284, + "eval_logps": -92.81625366210938, + "eval_loss": 49.766300201416016, + "eval_objective": 48.72661209106445, + "eval_ranking_idealized": 0.5092975497245789, + "eval_ranking_idealized_expo": 0.5092975497245789, + "eval_ranking_simple": 0.5082644820213318, + "eval_regularize": 48.72661209106445, + "eval_runtime": 261.2926, + "eval_samples_per_second": 22.159, + "eval_steps_per_second": 0.926, + "step": 50 + }, + { + "dpo_loss": 46.39546585083008, + "epoch": 0.3117619272555503, + "grad_norm": 11714.053294301802, + "learning_rate": 3.125e-06, + "logits": -1.2961366176605225, + "logps": -86.04666137695312, + "loss": 88.1724, + "objective": 85.7418212890625, + "ranking_idealized": 0.5416666865348816, + "ranking_idealized_expo": 0.5416666865348816, + "ranking_simple": 0.5541666746139526, + "regularize": 85.7418212890625, + "step": 55 + }, + { + "dpo_loss": 51.08903503417969, + "epoch": 0.3401039206424185, + "grad_norm": 11581.047450443415, + "learning_rate": 3.409090909090909e-06, + "logits": -1.233353853225708, + "logps": -83.57112121582031, + "loss": 97.4116, + "objective": 92.88329315185547, + "ranking_idealized": 0.512499988079071, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.5, + "regularize": 92.88329315185547, + "step": 60 + }, + { + "dpo_loss": 47.421348571777344, + "epoch": 0.3684459140292867, + "grad_norm": 10219.552488411706, + "learning_rate": 3.6931818181818186e-06, + "logits": -1.2004388570785522, + "logps": -81.25647735595703, + "loss": 104.1737, + "objective": 105.26258850097656, + "ranking_idealized": 0.5083333253860474, + "ranking_idealized_expo": 0.5083333253860474, + "ranking_simple": 0.48750001192092896, + "regularize": 105.26258850097656, + "step": 65 + }, + { + "dpo_loss": 62.97288131713867, + "epoch": 0.39678790741615494, + "grad_norm": 10005.80876054057, + "learning_rate": 3.9772727272727275e-06, + "logits": -1.299712061882019, + "logps": -82.07231140136719, + "loss": 114.6226, + "objective": 102.42678833007812, + "ranking_idealized": 0.5166666507720947, + "ranking_idealized_expo": 0.5166666507720947, + "ranking_simple": 0.5, + "regularize": 102.42678833007812, + "step": 70 + }, + { + "dpo_loss": 64.9041976928711, + "epoch": 0.42512990080302315, + "grad_norm": 8729.102244678843, + "learning_rate": 4.2613636363636365e-06, + "logits": -1.2741096019744873, + "logps": -82.21930694580078, + "loss": 120.1568, + "objective": 109.94430541992188, + "ranking_idealized": 0.5458333492279053, + "ranking_idealized_expo": 0.5458333492279053, + "ranking_simple": 0.5583333373069763, + "regularize": 109.94430541992188, + "step": 75 + }, + { + "dpo_loss": 68.410400390625, + "epoch": 0.45347189418989137, + "grad_norm": 10066.382938514493, + "learning_rate": 4.5454545454545455e-06, + "logits": -1.3132286071777344, + "logps": -81.40987396240234, + "loss": 131.1049, + "objective": 139.6371612548828, + "ranking_idealized": 0.5166666507720947, + "ranking_idealized_expo": 0.5166666507720947, + "ranking_simple": 0.5166666507720947, + "regularize": 139.6371612548828, + "step": 80 + }, + { + "dpo_loss": 61.07379913330078, + "epoch": 0.4818138875767596, + "grad_norm": 8906.640582071686, + "learning_rate": 4.829545454545455e-06, + "logits": -1.2284363508224487, + "logps": -78.84890747070312, + "loss": 139.4462, + "objective": 142.55609130859375, + "ranking_idealized": 0.5249999761581421, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.5041666626930237, + "regularize": 142.55609130859375, + "step": 85 + }, + { + "dpo_loss": 76.96965789794922, + "epoch": 0.5101558809636277, + "grad_norm": 8487.217481710324, + "learning_rate": 4.999921328558333e-06, + "logits": -1.012691855430603, + "logps": -76.95401000976562, + "loss": 144.8047, + "objective": 142.29689025878906, + "ranking_idealized": 0.4791666567325592, + "ranking_idealized_expo": 0.4791666567325592, + "ranking_simple": 0.46666666865348816, + "regularize": 142.29689025878906, + "step": 90 + }, + { + "dpo_loss": 78.01219177246094, + "epoch": 0.538497874350496, + "grad_norm": 8772.977046324564, + "learning_rate": 4.999036331701828e-06, + "logits": -1.063112735748291, + "logps": -74.44496154785156, + "loss": 151.2955, + "objective": 154.6220703125, + "ranking_idealized": 0.5416666865348816, + "ranking_idealized_expo": 0.5416666865348816, + "ranking_simple": 0.5416666865348816, + "regularize": 154.6220703125, + "step": 95 + }, + { + "dpo_loss": 78.0488510131836, + "epoch": 0.5668398677373642, + "grad_norm": 8669.676787481652, + "learning_rate": 4.997168347957521e-06, + "logits": -1.1887397766113281, + "logps": -75.71530151367188, + "loss": 152.2211, + "objective": 154.32736206054688, + "ranking_idealized": 0.5416666865348816, + "ranking_idealized_expo": 0.5416666865348816, + "ranking_simple": 0.5291666388511658, + "regularize": 154.32736206054688, + "step": 100 + }, + { + "epoch": 0.5668398677373642, + "eval_dpo_loss": 74.54129791259766, + "eval_logits": -1.245821237564087, + "eval_logps": -80.67256164550781, + "eval_loss": 146.651123046875, + "eval_objective": 149.05430603027344, + "eval_ranking_idealized": 0.5092975497245789, + "eval_ranking_idealized_expo": 0.5092975497245789, + "eval_ranking_simple": 0.5123966932296753, + "eval_regularize": 149.05430603027344, + "eval_runtime": 260.1184, + "eval_samples_per_second": 22.259, + "eval_steps_per_second": 0.93, + "step": 100 + }, + { + "dpo_loss": 97.11483001708984, + "epoch": 0.5951818611242324, + "grad_norm": 8886.474594959353, + "learning_rate": 4.994318112090048e-06, + "logits": -1.109060525894165, + "logps": -77.18095397949219, + "loss": 162.3808, + "objective": 165.7858428955078, + "ranking_idealized": 0.5333333611488342, + "ranking_idealized_expo": 0.5333333611488342, + "ranking_simple": 0.5, + "regularize": 165.7858428955078, + "step": 105 + }, + { + "dpo_loss": 84.30198669433594, + "epoch": 0.6235238545111006, + "grad_norm": 8328.506430860918, + "learning_rate": 4.990486745229364e-06, + "logits": -1.1675399541854858, + "logps": -76.15277862548828, + "loss": 160.4772, + "objective": 181.19313049316406, + "ranking_idealized": 0.44999998807907104, + "ranking_idealized_expo": 0.44999998807907104, + "ranking_simple": 0.4416666626930237, + "regularize": 181.19313049316406, + "step": 110 + }, + { + "dpo_loss": 75.68659210205078, + "epoch": 0.6518658478979689, + "grad_norm": 9699.16568235547, + "learning_rate": 4.985675754429744e-06, + "logits": -1.1235295534133911, + "logps": -77.19483947753906, + "loss": 156.7024, + "objective": 153.15138244628906, + "ranking_idealized": 0.4833333194255829, + "ranking_idealized_expo": 0.4833333194255829, + "ranking_simple": 0.4833333194255829, + "regularize": 153.15138244628906, + "step": 115 + }, + { + "dpo_loss": 85.8583984375, + "epoch": 0.680207841284837, + "grad_norm": 7593.864278067022, + "learning_rate": 4.9798870320769884e-06, + "logits": -1.034916639328003, + "logps": -78.47518157958984, + "loss": 158.3137, + "objective": 163.73861694335938, + "ranking_idealized": 0.5333333611488342, + "ranking_idealized_expo": 0.5333333611488342, + "ranking_simple": 0.5041666626930237, + "regularize": 163.73861694335938, + "step": 120 + }, + { + "dpo_loss": 87.04534149169922, + "epoch": 0.7085498346717053, + "grad_norm": 7441.764004781261, + "learning_rate": 4.973122855144066e-06, + "logits": -0.9983721971511841, + "logps": -76.62566375732422, + "loss": 152.8004, + "objective": 162.25894165039062, + "ranking_idealized": 0.550000011920929, + "ranking_idealized_expo": 0.550000011920929, + "ranking_simple": 0.5291666388511658, + "regularize": 162.25894165039062, + "step": 125 + }, + { + "dpo_loss": 78.63727569580078, + "epoch": 0.7368918280585735, + "grad_norm": 7450.826686003035, + "learning_rate": 4.965385884295467e-06, + "logits": -1.1029576063156128, + "logps": -75.08853912353516, + "loss": 155.3986, + "objective": 160.40919494628906, + "ranking_idealized": 0.44999998807907104, + "ranking_idealized_expo": 0.44999998807907104, + "ranking_simple": 0.44583332538604736, + "regularize": 160.40919494628906, + "step": 130 + }, + { + "dpo_loss": 77.3191909790039, + "epoch": 0.7652338214454416, + "grad_norm": 7545.4833393494555, + "learning_rate": 4.956679162840646e-06, + "logits": -1.0437390804290771, + "logps": -77.15032196044922, + "loss": 163.0839, + "objective": 150.17971801757812, + "ranking_idealized": 0.5291666388511658, + "ranking_idealized_expo": 0.5291666388511658, + "ranking_simple": 0.4791666567325592, + "regularize": 150.17971801757812, + "step": 135 + }, + { + "dpo_loss": 77.59061431884766, + "epoch": 0.7935758148323099, + "grad_norm": 7289.370251601989, + "learning_rate": 4.947006115536947e-06, + "logits": -0.9542478322982788, + "logps": -77.21605682373047, + "loss": 157.3087, + "objective": 153.8356475830078, + "ranking_idealized": 0.5041666626930237, + "ranking_idealized_expo": 0.5041666626930237, + "ranking_simple": 0.512499988079071, + "regularize": 153.8356475830078, + "step": 140 + }, + { + "dpo_loss": 80.91732788085938, + "epoch": 0.821917808219178, + "grad_norm": 7226.1243002610445, + "learning_rate": 4.9363705472424825e-06, + "logits": -0.975356936454773, + "logps": -76.82910919189453, + "loss": 146.9377, + "objective": 150.40052795410156, + "ranking_idealized": 0.4833333194255829, + "ranking_idealized_expo": 0.4833333194255829, + "ranking_simple": 0.47083333134651184, + "regularize": 150.40052795410156, + "step": 145 + }, + { + "dpo_loss": 60.50048828125, + "epoch": 0.8502598016060463, + "grad_norm": 6968.43374501734, + "learning_rate": 4.924776641419513e-06, + "logits": -0.9192944765090942, + "logps": -75.97390747070312, + "loss": 149.0411, + "objective": 144.81809997558594, + "ranking_idealized": 0.5375000238418579, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.5541666746139526, + "regularize": 144.81809997558594, + "step": 150 + }, + { + "epoch": 0.8502598016060463, + "eval_dpo_loss": 89.5256576538086, + "eval_logits": -0.9510709643363953, + "eval_logps": -81.42581176757812, + "eval_loss": 179.02293395996094, + "eval_objective": 179.47549438476562, + "eval_ranking_idealized": 0.5092975497245789, + "eval_ranking_idealized_expo": 0.5092975497245789, + "eval_ranking_simple": 0.5123966932296753, + "eval_regularize": 179.47549438476562, + "eval_runtime": 259.8812, + "eval_samples_per_second": 22.279, + "eval_steps_per_second": 0.931, + "step": 150 + }, + { + "dpo_loss": 70.23117065429688, + "epoch": 0.8786017949929145, + "grad_norm": 6700.90677088136, + "learning_rate": 4.9122289584888926e-06, + "logits": -0.9173446297645569, + "logps": -75.11918640136719, + "loss": 149.5902, + "objective": 151.32916259765625, + "ranking_idealized": 0.512499988079071, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.5291666388511658, + "regularize": 151.32916259765625, + "step": 155 + }, + { + "dpo_loss": 77.6051025390625, + "epoch": 0.9069437883797827, + "grad_norm": 6795.6302941338045, + "learning_rate": 4.8987324340362445e-06, + "logits": -0.8373637795448303, + "logps": -76.12178802490234, + "loss": 143.2267, + "objective": 138.21070861816406, + "ranking_idealized": 0.4958333373069763, + "ranking_idealized_expo": 0.4958333373069763, + "ranking_simple": 0.4833333194255829, + "regularize": 138.21070861816406, + "step": 160 + }, + { + "dpo_loss": 82.77120971679688, + "epoch": 0.9352857817666509, + "grad_norm": 6989.765984737861, + "learning_rate": 4.884292376870567e-06, + "logits": -0.7745934724807739, + "logps": -77.40280151367188, + "loss": 149.6674, + "objective": 159.49266052246094, + "ranking_idealized": 0.5208333134651184, + "ranking_idealized_expo": 0.5208333134651184, + "ranking_simple": 0.5375000238418579, + "regularize": 159.49266052246094, + "step": 165 + }, + { + "dpo_loss": 82.72408294677734, + "epoch": 0.9636277751535192, + "grad_norm": 6801.285873992836, + "learning_rate": 4.868914466936038e-06, + "logits": -0.7280222773551941, + "logps": -78.39759063720703, + "loss": 147.4437, + "objective": 154.9546661376953, + "ranking_idealized": 0.5083333253860474, + "ranking_idealized_expo": 0.5083333253860474, + "ranking_simple": 0.5291666388511658, + "regularize": 154.9546661376953, + "step": 170 + }, + { + "dpo_loss": 62.412452697753906, + "epoch": 0.9919697685403873, + "grad_norm": 6950.067011326567, + "learning_rate": 4.8526047530778175e-06, + "logits": -0.6979461908340454, + "logps": -78.83072662353516, + "loss": 148.1536, + "objective": 130.68894958496094, + "ranking_idealized": 0.574999988079071, + "ranking_idealized_expo": 0.574999988079071, + "ranking_simple": 0.550000011920929, + "regularize": 130.68894958496094, + "step": 175 + }, + { + "dpo_loss": 91.09583282470703, + "epoch": 1.0203117619272555, + "grad_norm": 6720.433078386762, + "learning_rate": 4.835369650662767e-06, + "logits": -0.7362512946128845, + "logps": -77.91545867919922, + "loss": 150.91, + "objective": 157.76901245117188, + "ranking_idealized": 0.5416666865348816, + "ranking_idealized_expo": 0.5416666865348816, + "ranking_simple": 0.5583333373069763, + "regularize": 157.76901245117188, + "step": 180 + }, + { + "dpo_loss": 92.66343688964844, + "epoch": 1.0486537553141237, + "grad_norm": 6715.629478521071, + "learning_rate": 4.817215939055984e-06, + "logits": -0.6558343172073364, + "logps": -77.34681701660156, + "loss": 138.7646, + "objective": 152.0157012939453, + "ranking_idealized": 0.4833333194255829, + "ranking_idealized_expo": 0.4833333194255829, + "ranking_simple": 0.47083333134651184, + "regularize": 152.0157012939453, + "step": 185 + }, + { + "dpo_loss": 90.67267608642578, + "epoch": 1.076995748700992, + "grad_norm": 6675.106211711281, + "learning_rate": 4.798150758954164e-06, + "logits": -0.7170895934104919, + "logps": -78.08677673339844, + "loss": 135.6227, + "objective": 155.2058563232422, + "ranking_idealized": 0.550000011920929, + "ranking_idealized_expo": 0.550000011920929, + "ranking_simple": 0.5458333492279053, + "regularize": 155.2058563232422, + "step": 190 + }, + { + "dpo_loss": 70.37294006347656, + "epoch": 1.10533774208786, + "grad_norm": 6877.835091759628, + "learning_rate": 4.778181609576832e-06, + "logits": -0.7233827114105225, + "logps": -78.22598266601562, + "loss": 141.4543, + "objective": 141.26718139648438, + "ranking_idealized": 0.5666666626930237, + "ranking_idealized_expo": 0.5666666626930237, + "ranking_simple": 0.5625, + "regularize": 141.26718139648438, + "step": 195 + }, + { + "dpo_loss": 68.18767547607422, + "epoch": 1.1336797354747283, + "grad_norm": 6695.865097498279, + "learning_rate": 4.757316345716554e-06, + "logits": -0.811337411403656, + "logps": -77.92884063720703, + "loss": 135.6758, + "objective": 136.39527893066406, + "ranking_idealized": 0.5083333253860474, + "ranking_idealized_expo": 0.5083333253860474, + "ranking_simple": 0.512499988079071, + "regularize": 136.39527893066406, + "step": 200 + }, + { + "epoch": 1.1336797354747283, + "eval_dpo_loss": 98.72966003417969, + "eval_logits": -0.8759555220603943, + "eval_logps": -83.1371078491211, + "eval_loss": 190.77737426757812, + "eval_objective": 195.49459838867188, + "eval_ranking_idealized": 0.5092975497245789, + "eval_ranking_idealized_expo": 0.5092975497245789, + "eval_ranking_simple": 0.5082644820213318, + "eval_regularize": 195.49459838867188, + "eval_runtime": 260.3291, + "eval_samples_per_second": 22.241, + "eval_steps_per_second": 0.93, + "step": 200 + }, + { + "dpo_loss": 61.18910217285156, + "epoch": 1.1620217288615966, + "grad_norm": 6574.968638595831, + "learning_rate": 4.735563174649278e-06, + "logits": -0.83098304271698, + "logps": -78.29829406738281, + "loss": 137.2297, + "objective": 146.6360626220703, + "ranking_idealized": 0.48750001192092896, + "ranking_idealized_expo": 0.48750001192092896, + "ranking_simple": 0.5, + "regularize": 146.6360626220703, + "step": 205 + }, + { + "dpo_loss": 84.49109649658203, + "epoch": 1.1903637222484649, + "grad_norm": 6368.0146943258505, + "learning_rate": 4.7129306529060415e-06, + "logits": -0.828973114490509, + "logps": -78.18091583251953, + "loss": 136.1199, + "objective": 162.718994140625, + "ranking_idealized": 0.5, + "ranking_idealized_expo": 0.4958333373069763, + "ranking_simple": 0.512499988079071, + "regularize": 162.718994140625, + "step": 210 + }, + { + "dpo_loss": 75.69349670410156, + "epoch": 1.2187057156353331, + "grad_norm": 6721.076267224635, + "learning_rate": 4.68942768290728e-06, + "logits": -0.7940189242362976, + "logps": -77.39470672607422, + "loss": 132.4868, + "objective": 145.17942810058594, + "ranking_idealized": 0.4833333194255829, + "ranking_idealized_expo": 0.4833333194255829, + "ranking_simple": 0.4791666567325592, + "regularize": 145.17942810058594, + "step": 215 + }, + { + "dpo_loss": 70.87561798095703, + "epoch": 1.2470477090222012, + "grad_norm": 6782.903860178787, + "learning_rate": 4.665063509461098e-06, + "logits": -0.63108229637146, + "logps": -77.25511932373047, + "loss": 135.8738, + "objective": 132.0025177001953, + "ranking_idealized": 0.6000000238418579, + "ranking_idealized_expo": 0.6000000238418579, + "ranking_simple": 0.574999988079071, + "regularize": 132.0025177001953, + "step": 220 + }, + { + "dpo_loss": 60.317649841308594, + "epoch": 1.2753897024090695, + "grad_norm": 6714.729050270533, + "learning_rate": 4.639847716126855e-06, + "logits": -0.7443896532058716, + "logps": -78.88975524902344, + "loss": 129.2623, + "objective": 123.79737854003906, + "ranking_idealized": 0.5291666388511658, + "ranking_idealized_expo": 0.5291666388511658, + "ranking_simple": 0.5249999761581421, + "regularize": 123.79737854003906, + "step": 225 + }, + { + "dpo_loss": 61.865577697753906, + "epoch": 1.3037316957959377, + "grad_norm": 6667.869735511306, + "learning_rate": 4.613790221445511e-06, + "logits": -0.6796783208847046, + "logps": -78.33611297607422, + "loss": 120.3527, + "objective": 119.14628601074219, + "ranking_idealized": 0.5083333253860474, + "ranking_idealized_expo": 0.5083333253860474, + "ranking_simple": 0.5291666388511658, + "regularize": 119.14628601074219, + "step": 230 + }, + { + "dpo_loss": 59.05632400512695, + "epoch": 1.3320736891828058, + "grad_norm": 6713.042134498338, + "learning_rate": 4.586901275038201e-06, + "logits": -0.7024025321006775, + "logps": -76.42981719970703, + "loss": 129.2294, + "objective": 135.5782928466797, + "ranking_idealized": 0.4833333194255829, + "ranking_idealized_expo": 0.4833333194255829, + "ranking_simple": 0.5249999761581421, + "regularize": 135.5782928466797, + "step": 235 + }, + { + "dpo_loss": 63.171669006347656, + "epoch": 1.360415682569674, + "grad_norm": 6288.49610479985, + "learning_rate": 4.559191453574582e-06, + "logits": -0.678625226020813, + "logps": -77.7660903930664, + "loss": 125.9373, + "objective": 122.7352066040039, + "ranking_idealized": 0.4791666567325592, + "ranking_idealized_expo": 0.4791666567325592, + "ranking_simple": 0.4791666567325592, + "regularize": 122.7352066040039, + "step": 240 + }, + { + "dpo_loss": 62.384334564208984, + "epoch": 1.3887576759565423, + "grad_norm": 6361.509384946799, + "learning_rate": 4.530671656612544e-06, + "logits": -0.755474865436554, + "logps": -77.20565032958984, + "loss": 123.4547, + "objective": 123.81192779541016, + "ranking_idealized": 0.5166666507720947, + "ranking_idealized_expo": 0.5166666507720947, + "ranking_simple": 0.5, + "regularize": 123.81192779541016, + "step": 245 + }, + { + "dpo_loss": 64.1493911743164, + "epoch": 1.4170996693434104, + "grad_norm": 6492.257111773256, + "learning_rate": 4.501353102310901e-06, + "logits": -0.761756956577301, + "logps": -76.3807144165039, + "loss": 122.9397, + "objective": 115.62742614746094, + "ranking_idealized": 0.47083333134651184, + "ranking_idealized_expo": 0.47083333134651184, + "ranking_simple": 0.4541666805744171, + "regularize": 115.62742614746094, + "step": 250 + }, + { + "epoch": 1.4170996693434104, + "eval_dpo_loss": 104.7900161743164, + "eval_logits": -0.8410003781318665, + "eval_logps": -81.18800354003906, + "eval_loss": 204.81558227539062, + "eval_objective": 206.54141235351562, + "eval_ranking_idealized": 0.5092975497245789, + "eval_ranking_idealized_expo": 0.5092975497245789, + "eval_ranking_simple": 0.49896693229675293, + "eval_regularize": 206.54141235351562, + "eval_runtime": 260.0773, + "eval_samples_per_second": 22.263, + "eval_steps_per_second": 0.93, + "step": 250 + }, + { + "dpo_loss": 58.41355895996094, + "epoch": 1.4454416627302786, + "grad_norm": 6090.22781261977, + "learning_rate": 4.4712473230167775e-06, + "logits": -0.7302901148796082, + "logps": -77.44480895996094, + "loss": 121.9243, + "objective": 119.19187927246094, + "ranking_idealized": 0.5458333492279053, + "ranking_idealized_expo": 0.5458333492279053, + "ranking_simple": 0.5541666746139526, + "regularize": 119.19187927246094, + "step": 255 + }, + { + "dpo_loss": 62.344058990478516, + "epoch": 1.473783656117147, + "grad_norm": 6061.78193398447, + "learning_rate": 4.440366160729393e-06, + "logits": -0.7257960438728333, + "logps": -78.19904327392578, + "loss": 116.9529, + "objective": 126.45060729980469, + "ranking_idealized": 0.48750001192092896, + "ranking_idealized_expo": 0.48750001192092896, + "ranking_simple": 0.4958333373069763, + "regularize": 126.45060729980469, + "step": 260 + }, + { + "dpo_loss": 63.532493591308594, + "epoch": 1.5021256495040152, + "grad_norm": 6481.304182744527, + "learning_rate": 4.4087217624420595e-06, + "logits": -0.7170858979225159, + "logps": -76.29792022705078, + "loss": 117.9619, + "objective": 119.87791442871094, + "ranking_idealized": 0.5041666626930237, + "ranking_idealized_expo": 0.5041666626930237, + "ranking_simple": 0.5166666507720947, + "regularize": 119.87791442871094, + "step": 265 + }, + { + "dpo_loss": 62.21635055541992, + "epoch": 1.5304676428908834, + "grad_norm": 6389.335954603194, + "learning_rate": 4.376326575364206e-06, + "logits": -0.6881006360054016, + "logps": -76.92182922363281, + "loss": 112.4777, + "objective": 120.40229034423828, + "ranking_idealized": 0.5041666626930237, + "ranking_idealized_expo": 0.5041666626930237, + "ranking_simple": 0.5166666507720947, + "regularize": 120.40229034423828, + "step": 270 + }, + { + "dpo_loss": 63.73827362060547, + "epoch": 1.5588096362777515, + "grad_norm": 6484.028715201892, + "learning_rate": 4.34319334202531e-06, + "logits": -0.7395693063735962, + "logps": -76.80595397949219, + "loss": 110.8653, + "objective": 114.3071517944336, + "ranking_idealized": 0.5249999761581421, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.5333333611488342, + "regularize": 114.3071517944336, + "step": 275 + }, + { + "dpo_loss": 57.1733512878418, + "epoch": 1.5871516296646198, + "grad_norm": 6228.430728506919, + "learning_rate": 4.309335095262675e-06, + "logits": -0.7416642904281616, + "logps": -74.88420104980469, + "loss": 114.3982, + "objective": 110.38121795654297, + "ranking_idealized": 0.5041666626930237, + "ranking_idealized_expo": 0.5041666626930237, + "ranking_simple": 0.512499988079071, + "regularize": 110.38121795654297, + "step": 280 + }, + { + "dpo_loss": 54.96518325805664, + "epoch": 1.615493623051488, + "grad_norm": 6537.148002785846, + "learning_rate": 4.274765153095008e-06, + "logits": -0.8007023930549622, + "logps": -76.65270233154297, + "loss": 116.8492, + "objective": 124.57038879394531, + "ranking_idealized": 0.5166666507720947, + "ranking_idealized_expo": 0.5166666507720947, + "ranking_simple": 0.5291666388511658, + "regularize": 124.57038879394531, + "step": 285 + }, + { + "dpo_loss": 56.35616683959961, + "epoch": 1.643835616438356, + "grad_norm": 5801.834071606869, + "learning_rate": 4.239497113483819e-06, + "logits": -0.7784015536308289, + "logps": -75.22969055175781, + "loss": 108.7635, + "objective": 101.91283416748047, + "ranking_idealized": 0.5625, + "ranking_idealized_expo": 0.5625, + "ranking_simple": 0.5541666746139526, + "regularize": 101.91283416748047, + "step": 290 + }, + { + "dpo_loss": 59.50160598754883, + "epoch": 1.6721776098252243, + "grad_norm": 5633.558615804516, + "learning_rate": 4.203544848984729e-06, + "logits": -0.6548821926116943, + "logps": -74.7501449584961, + "loss": 110.3539, + "objective": 111.20478820800781, + "ranking_idealized": 0.4791666567325592, + "ranking_idealized_expo": 0.4791666567325592, + "ranking_simple": 0.4958333373069763, + "regularize": 111.20478820800781, + "step": 295 + }, + { + "dpo_loss": 65.51039123535156, + "epoch": 1.7005196032120926, + "grad_norm": 6070.898597461427, + "learning_rate": 4.16692250129073e-06, + "logits": -0.615551233291626, + "logps": -76.81422424316406, + "loss": 109.8686, + "objective": 122.05563354492188, + "ranking_idealized": 0.574999988079071, + "ranking_idealized_expo": 0.574999988079071, + "ranking_simple": 0.5625, + "regularize": 122.05563354492188, + "step": 300 + }, + { + "epoch": 1.7005196032120926, + "eval_dpo_loss": 109.18816375732422, + "eval_logits": -0.665767252445221, + "eval_logps": -82.2343978881836, + "eval_loss": 216.43338012695312, + "eval_objective": 216.9470672607422, + "eval_ranking_idealized": 0.5092975497245789, + "eval_ranking_idealized_expo": 0.5092975497245789, + "eval_ranking_simple": 0.5082644820213318, + "eval_regularize": 216.9470672607422, + "eval_runtime": 260.4112, + "eval_samples_per_second": 22.234, + "eval_steps_per_second": 0.929, + "step": 300 + }, + { + "dpo_loss": 55.654296875, + "epoch": 1.7288615965989607, + "grad_norm": 5873.367897520137, + "learning_rate": 4.129644475669617e-06, + "logits": -0.5988628268241882, + "logps": -78.57816314697266, + "loss": 107.9981, + "objective": 113.90287780761719, + "ranking_idealized": 0.5041666626930237, + "ranking_idealized_expo": 0.5041666626930237, + "ranking_simple": 0.4833333194255829, + "regularize": 113.90287780761719, + "step": 305 + }, + { + "dpo_loss": 53.0214729309082, + "epoch": 1.7572035899858292, + "grad_norm": 6172.163624470604, + "learning_rate": 4.091725435297721e-06, + "logits": -0.6676580905914307, + "logps": -76.13163757324219, + "loss": 116.5937, + "objective": 113.61856842041016, + "ranking_idealized": 0.5208333134651184, + "ranking_idealized_expo": 0.5208333134651184, + "ranking_simple": 0.5333333611488342, + "regularize": 113.61856842041016, + "step": 310 + }, + { + "dpo_loss": 57.42729568481445, + "epoch": 1.7855455833726972, + "grad_norm": 6120.23161933891, + "learning_rate": 4.053180295492203e-06, + "logits": -0.6111046075820923, + "logps": -77.60387420654297, + "loss": 109.9605, + "objective": 105.1899642944336, + "ranking_idealized": 0.5083333253860474, + "ranking_idealized_expo": 0.5083333253860474, + "ranking_simple": 0.5166666507720947, + "regularize": 105.1899642944336, + "step": 315 + }, + { + "dpo_loss": 48.950584411621094, + "epoch": 1.8138875767595655, + "grad_norm": 5988.504398453124, + "learning_rate": 4.014024217844167e-06, + "logits": -0.671535313129425, + "logps": -77.83592224121094, + "loss": 102.7624, + "objective": 104.49610137939453, + "ranking_idealized": 0.5041666626930237, + "ranking_idealized_expo": 0.5041666626930237, + "ranking_simple": 0.5291666388511658, + "regularize": 104.49610137939453, + "step": 320 + }, + { + "dpo_loss": 57.7384033203125, + "epoch": 1.8422295701464337, + "grad_norm": 6423.999380292735, + "learning_rate": 3.974272604254906e-06, + "logits": -0.6641644239425659, + "logps": -78.81893920898438, + "loss": 106.3894, + "objective": 106.4183120727539, + "ranking_idealized": 0.5166666507720947, + "ranking_idealized_expo": 0.5166666507720947, + "ranking_simple": 0.5416666865348816, + "regularize": 106.4183120727539, + "step": 325 + }, + { + "dpo_loss": 47.59239959716797, + "epoch": 1.8705715635333018, + "grad_norm": 5787.176377487831, + "learning_rate": 3.933941090877615e-06, + "logits": -0.5340750217437744, + "logps": -75.45977020263672, + "loss": 104.8829, + "objective": 98.918701171875, + "ranking_idealized": 0.47083333134651184, + "ranking_idealized_expo": 0.47083333134651184, + "ranking_simple": 0.4625000059604645, + "regularize": 98.918701171875, + "step": 330 + }, + { + "dpo_loss": 53.71455001831055, + "epoch": 1.89891355692017, + "grad_norm": 5809.111403677512, + "learning_rate": 3.893045541966975e-06, + "logits": -0.6170223355293274, + "logps": -75.74073028564453, + "loss": 102.0785, + "objective": 106.43668365478516, + "ranking_idealized": 0.4958333373069763, + "ranking_idealized_expo": 0.4958333373069763, + "ranking_simple": 0.4833333194255829, + "regularize": 106.43668365478516, + "step": 335 + }, + { + "dpo_loss": 48.59009552001953, + "epoch": 1.9272555503070383, + "grad_norm": 6046.947822829327, + "learning_rate": 3.8516020436389945e-06, + "logits": -0.5560640692710876, + "logps": -78.0693130493164, + "loss": 99.5837, + "objective": 97.92900085449219, + "ranking_idealized": 0.5416666865348816, + "ranking_idealized_expo": 0.5416666865348816, + "ranking_simple": 0.5416666865348816, + "regularize": 97.92900085449219, + "step": 340 + }, + { + "dpo_loss": 39.00655746459961, + "epoch": 1.9555975436939064, + "grad_norm": 6294.646295716598, + "learning_rate": 3.8096268975436045e-06, + "logits": -0.581243097782135, + "logps": -76.83429718017578, + "loss": 98.8101, + "objective": 92.9996566772461, + "ranking_idealized": 0.5333333611488342, + "ranking_idealized_expo": 0.5333333611488342, + "ranking_simple": 0.5333333611488342, + "regularize": 92.9996566772461, + "step": 345 + }, + { + "dpo_loss": 45.82107162475586, + "epoch": 1.9839395370807746, + "grad_norm": 6285.693550619337, + "learning_rate": 3.767136614452458e-06, + "logits": -0.5315877199172974, + "logps": -77.77294158935547, + "loss": 97.6956, + "objective": 92.99220275878906, + "ranking_idealized": 0.48750001192092896, + "ranking_idealized_expo": 0.48750001192092896, + "ranking_simple": 0.5041666626930237, + "regularize": 92.99220275878906, + "step": 350 + }, + { + "epoch": 1.9839395370807746, + "eval_dpo_loss": 109.80838012695312, + "eval_logits": -0.6322916746139526, + "eval_logps": -81.08039093017578, + "eval_loss": 218.28872680664062, + "eval_objective": 217.42910766601562, + "eval_ranking_idealized": 0.5092975497245789, + "eval_ranking_idealized_expo": 0.5092975497245789, + "eval_ranking_simple": 0.5072314143180847, + "eval_regularize": 217.42910766601562, + "eval_runtime": 260.5078, + "eval_samples_per_second": 22.226, + "eval_steps_per_second": 0.929, + "step": 350 + }, + { + "dpo_loss": 45.73307800292969, + "epoch": 2.012281530467643, + "grad_norm": 5901.724049272657, + "learning_rate": 3.724147907764478e-06, + "logits": -0.49950748682022095, + "logps": -77.2893295288086, + "loss": 95.374, + "objective": 97.87924194335938, + "ranking_idealized": 0.5083333253860474, + "ranking_idealized_expo": 0.5083333253860474, + "ranking_simple": 0.5041666626930237, + "regularize": 97.87924194335938, + "step": 355 + }, + { + "dpo_loss": 46.2380256652832, + "epoch": 2.040623523854511, + "grad_norm": 5907.7941999877185, + "learning_rate": 3.6806776869317074e-06, + "logits": -0.571615993976593, + "logps": -75.41194152832031, + "loss": 89.9848, + "objective": 88.52743530273438, + "ranking_idealized": 0.5291666388511658, + "ranking_idealized_expo": 0.5291666388511658, + "ranking_simple": 0.5333333611488342, + "regularize": 88.52743530273438, + "step": 360 + }, + { + "dpo_loss": 50.726741790771484, + "epoch": 2.0689655172413794, + "grad_norm": 6275.9056471544, + "learning_rate": 3.6367430508080283e-06, + "logits": -0.6418294310569763, + "logps": -77.9503173828125, + "loss": 93.574, + "objective": 92.23693084716797, + "ranking_idealized": 0.5041666626930237, + "ranking_idealized_expo": 0.5041666626930237, + "ranking_simple": 0.5249999761581421, + "regularize": 92.23693084716797, + "step": 365 + }, + { + "dpo_loss": 44.6575927734375, + "epoch": 2.0973075106282475, + "grad_norm": 5856.945542345439, + "learning_rate": 3.5923612809233987e-06, + "logits": -0.552503228187561, + "logps": -75.52668762207031, + "loss": 89.2864, + "objective": 88.46630096435547, + "ranking_idealized": 0.4833333194255829, + "ranking_idealized_expo": 0.4833333194255829, + "ranking_simple": 0.5083333253860474, + "regularize": 88.46630096435547, + "step": 370 + }, + { + "dpo_loss": 44.330020904541016, + "epoch": 2.1256495040151155, + "grad_norm": 5715.458499171565, + "learning_rate": 3.547549834686222e-06, + "logits": -0.5592830777168274, + "logps": -77.21271514892578, + "loss": 87.4304, + "objective": 95.40113830566406, + "ranking_idealized": 0.5874999761581421, + "ranking_idealized_expo": 0.5874999761581421, + "ranking_simple": 0.574999988079071, + "regularize": 95.40113830566406, + "step": 375 + }, + { + "dpo_loss": 42.92914581298828, + "epoch": 2.153991497401984, + "grad_norm": 5740.849860043716, + "learning_rate": 3.5023263385165346e-06, + "logits": -0.5162667632102966, + "logps": -77.4334945678711, + "loss": 88.8215, + "objective": 89.14612579345703, + "ranking_idealized": 0.5291666388511658, + "ranking_idealized_expo": 0.5291666388511658, + "ranking_simple": 0.512499988079071, + "regularize": 89.14612579345703, + "step": 380 + }, + { + "dpo_loss": 49.994590759277344, + "epoch": 2.182333490788852, + "grad_norm": 5839.263703953865, + "learning_rate": 3.4567085809127247e-06, + "logits": -0.5519043207168579, + "logps": -80.0301284790039, + "loss": 87.9243, + "objective": 99.98570251464844, + "ranking_idealized": 0.5, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.5166666507720947, + "regularize": 99.98570251464844, + "step": 385 + }, + { + "dpo_loss": 46.78067398071289, + "epoch": 2.21067548417572, + "grad_norm": 5926.180751245502, + "learning_rate": 3.410714505454486e-06, + "logits": -0.4752744138240814, + "logps": -78.86099243164062, + "loss": 88.7389, + "objective": 83.77468872070312, + "ranking_idealized": 0.4833333194255829, + "ranking_idealized_expo": 0.4833333194255829, + "ranking_simple": 0.46666666865348816, + "regularize": 83.77468872070312, + "step": 390 + }, + { + "dpo_loss": 48.72767639160156, + "epoch": 2.2390174775625886, + "grad_norm": 5736.684602068411, + "learning_rate": 3.364362203744777e-06, + "logits": -0.488779217004776, + "logps": -79.17351531982422, + "loss": 85.6137, + "objective": 89.27422332763672, + "ranking_idealized": 0.5166666507720947, + "ranking_idealized_expo": 0.5166666507720947, + "ranking_simple": 0.5, + "regularize": 89.27422332763672, + "step": 395 + }, + { + "dpo_loss": 46.81766128540039, + "epoch": 2.2673594709494567, + "grad_norm": 6351.520354901001, + "learning_rate": 3.3176699082935546e-06, + "logits": -0.5262924432754517, + "logps": -80.44742584228516, + "loss": 86.0309, + "objective": 90.6736831665039, + "ranking_idealized": 0.574999988079071, + "ranking_idealized_expo": 0.574999988079071, + "ranking_simple": 0.5708333253860474, + "regularize": 90.6736831665039, + "step": 400 + }, + { + "epoch": 2.2673594709494567, + "eval_dpo_loss": 115.8748779296875, + "eval_logits": -0.5904337763786316, + "eval_logps": -83.60823822021484, + "eval_loss": 221.7113494873047, + "eval_objective": 225.3389129638672, + "eval_ranking_idealized": 0.5092975497245789, + "eval_ranking_idealized_expo": 0.5092975497245789, + "eval_ranking_simple": 0.5051652789115906, + "eval_regularize": 225.3389129638672, + "eval_runtime": 260.7013, + "eval_samples_per_second": 22.209, + "eval_steps_per_second": 0.928, + "step": 400 + }, + { + "dpo_loss": 39.29842758178711, + "epoch": 2.295701464336325, + "grad_norm": 6158.234438206871, + "learning_rate": 3.2706559853460818e-06, + "logits": -0.583505392074585, + "logps": -78.83747100830078, + "loss": 84.181, + "objective": 83.5100326538086, + "ranking_idealized": 0.5583333373069763, + "ranking_idealized_expo": 0.5583333373069763, + "ranking_simple": 0.5708333253860474, + "regularize": 83.5100326538086, + "step": 405 + }, + { + "dpo_loss": 41.60795974731445, + "epoch": 2.324043457723193, + "grad_norm": 5505.6808601173025, + "learning_rate": 3.2233389276586325e-06, + "logits": -0.5247575640678406, + "logps": -77.18423461914062, + "loss": 81.7469, + "objective": 78.1520004272461, + "ranking_idealized": 0.4749999940395355, + "ranking_idealized_expo": 0.4749999940395355, + "ranking_simple": 0.4749999940395355, + "regularize": 78.1520004272461, + "step": 410 + }, + { + "dpo_loss": 40.77152633666992, + "epoch": 2.3523854511100613, + "grad_norm": 7217.9049268653225, + "learning_rate": 3.1757373472244324e-06, + "logits": -0.5214927196502686, + "logps": -78.42851257324219, + "loss": 82.4994, + "objective": 76.92170715332031, + "ranking_idealized": 0.5333333611488342, + "ranking_idealized_expo": 0.5291666388511658, + "ranking_simple": 0.5166666507720947, + "regularize": 76.92170715332031, + "step": 415 + }, + { + "dpo_loss": 39.665164947509766, + "epoch": 2.3807274444969297, + "grad_norm": 6286.310490781002, + "learning_rate": 3.127869967952698e-06, + "logits": -0.5284460783004761, + "logps": -78.95869445800781, + "loss": 81.5421, + "objective": 80.19254302978516, + "ranking_idealized": 0.47083333134651184, + "ranking_idealized_expo": 0.47083333134651184, + "ranking_simple": 0.4749999940395355, + "regularize": 80.19254302978516, + "step": 420 + }, + { + "dpo_loss": 35.95686340332031, + "epoch": 2.409069437883798, + "grad_norm": 6025.61452707402, + "learning_rate": 3.0797556183036582e-06, + "logits": -0.5814462304115295, + "logps": -77.8717269897461, + "loss": 78.4787, + "objective": 80.24657440185547, + "ranking_idealized": 0.4958333373069763, + "ranking_idealized_expo": 0.4958333373069763, + "ranking_simple": 0.5041666626930237, + "regularize": 80.24657440185547, + "step": 425 + }, + { + "dpo_loss": 42.524593353271484, + "epoch": 2.4374114312706663, + "grad_norm": 5711.500524222879, + "learning_rate": 3.0314132238824416e-06, + "logits": -0.6198355555534363, + "logps": -78.72447204589844, + "loss": 79.9018, + "objective": 80.83815002441406, + "ranking_idealized": 0.5291666388511658, + "ranking_idealized_expo": 0.5291666388511658, + "ranking_simple": 0.5333333611488342, + "regularize": 80.83815002441406, + "step": 430 + }, + { + "dpo_loss": 41.12130355834961, + "epoch": 2.4657534246575343, + "grad_norm": 5723.1684727847205, + "learning_rate": 2.9828617999947647e-06, + "logits": -0.6906354427337646, + "logps": -76.77250671386719, + "loss": 78.7991, + "objective": 82.95913696289062, + "ranking_idealized": 0.5083333253860474, + "ranking_idealized_expo": 0.5083333253860474, + "ranking_simple": 0.5166666507720947, + "regularize": 82.95913696289062, + "step": 435 + }, + { + "dpo_loss": 41.736507415771484, + "epoch": 2.4940954180444024, + "grad_norm": 6162.556759828204, + "learning_rate": 2.9341204441673267e-06, + "logits": -0.5858648419380188, + "logps": -77.20186614990234, + "loss": 79.0761, + "objective": 77.8648910522461, + "ranking_idealized": 0.5375000238418579, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.5291666388511658, + "regularize": 77.8648910522461, + "step": 440 + }, + { + "dpo_loss": 38.95100784301758, + "epoch": 2.5224374114312704, + "grad_norm": 5599.536160663401, + "learning_rate": 2.8852083286358647e-06, + "logits": -0.5441535711288452, + "logps": -75.47164154052734, + "loss": 75.4247, + "objective": 74.47901916503906, + "ranking_idealized": 0.5166666507720947, + "ranking_idealized_expo": 0.5166666507720947, + "ranking_simple": 0.5333333611488342, + "regularize": 74.47901916503906, + "step": 445 + }, + { + "dpo_loss": 42.42906188964844, + "epoch": 2.550779404818139, + "grad_norm": 5927.201776955767, + "learning_rate": 2.8361446928038298e-06, + "logits": -0.5917781591415405, + "logps": -77.22566986083984, + "loss": 78.4362, + "objective": 75.78580474853516, + "ranking_idealized": 0.5, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.49166667461395264, + "regularize": 75.78580474853516, + "step": 450 + }, + { + "epoch": 2.550779404818139, + "eval_dpo_loss": 116.21166229248047, + "eval_logits": -0.6173169016838074, + "eval_logps": -82.07426452636719, + "eval_loss": 221.37322998046875, + "eval_objective": 224.48391723632812, + "eval_ranking_idealized": 0.5092975497245789, + "eval_ranking_idealized_expo": 0.5092975497245789, + "eval_ranking_simple": 0.5113636255264282, + "eval_regularize": 224.48391723632812, + "eval_runtime": 259.6525, + "eval_samples_per_second": 22.299, + "eval_steps_per_second": 0.932, + "step": 450 + }, + { + "dpo_loss": 42.16978073120117, + "epoch": 2.579121398205007, + "grad_norm": 5821.790965127779, + "learning_rate": 2.7869488356746344e-06, + "logits": -0.5925208926200867, + "logps": -78.49901580810547, + "loss": 76.2254, + "objective": 73.7408447265625, + "ranking_idealized": 0.5, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.48750001192092896, + "regularize": 73.7408447265625, + "step": 455 + }, + { + "dpo_loss": 35.18735885620117, + "epoch": 2.6074633915918755, + "grad_norm": 5721.627507359908, + "learning_rate": 2.7376401082604563e-06, + "logits": -0.648517370223999, + "logps": -77.49411010742188, + "loss": 72.6796, + "objective": 75.32291412353516, + "ranking_idealized": 0.512499988079071, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.5083333253860474, + "regularize": 75.32291412353516, + "step": 460 + }, + { + "dpo_loss": 39.74345779418945, + "epoch": 2.6358053849787435, + "grad_norm": 5692.925017991515, + "learning_rate": 2.6882379059705953e-06, + "logits": -0.5508330464363098, + "logps": -77.89730834960938, + "loss": 71.4232, + "objective": 72.29166412353516, + "ranking_idealized": 0.4958333373069763, + "ranking_idealized_expo": 0.4958333373069763, + "ranking_simple": 0.49166667461395264, + "regularize": 72.29166412353516, + "step": 465 + }, + { + "dpo_loss": 38.005611419677734, + "epoch": 2.6641473783656116, + "grad_norm": 5846.922593898297, + "learning_rate": 2.6387616609823506e-06, + "logits": -0.5707463622093201, + "logps": -77.01284790039062, + "loss": 70.6383, + "objective": 67.4544906616211, + "ranking_idealized": 0.5541666746139526, + "ranking_idealized_expo": 0.550000011920929, + "ranking_simple": 0.5291666388511658, + "regularize": 67.4544906616211, + "step": 470 + }, + { + "dpo_loss": 28.328413009643555, + "epoch": 2.69248937175248, + "grad_norm": 5857.504945194897, + "learning_rate": 2.5892308345974517e-06, + "logits": -0.6008989810943604, + "logps": -76.99757385253906, + "loss": 68.5943, + "objective": 67.81741333007812, + "ranking_idealized": 0.4958333373069763, + "ranking_idealized_expo": 0.4958333373069763, + "ranking_simple": 0.49166667461395264, + "regularize": 67.81741333007812, + "step": 475 + }, + { + "dpo_loss": 32.999122619628906, + "epoch": 2.720831365139348, + "grad_norm": 6073.696561883967, + "learning_rate": 2.53966490958702e-06, + "logits": -0.6349701285362244, + "logps": -76.78397369384766, + "loss": 67.1153, + "objective": 68.42402648925781, + "ranking_idealized": 0.5, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.512499988079071, + "regularize": 68.42402648925781, + "step": 480 + }, + { + "dpo_loss": 34.57282257080078, + "epoch": 2.7491733585262166, + "grad_norm": 5933.827803950185, + "learning_rate": 2.490083382528097e-06, + "logits": -0.5782140493392944, + "logps": -79.55359649658203, + "loss": 69.0218, + "objective": 67.90946960449219, + "ranking_idealized": 0.5208333134651184, + "ranking_idealized_expo": 0.5208333134651184, + "ranking_simple": 0.5166666507720947, + "regularize": 67.90946960449219, + "step": 485 + }, + { + "dpo_loss": 37.92434310913086, + "epoch": 2.7775153519130846, + "grad_norm": 6156.113425613259, + "learning_rate": 2.440505756134732e-06, + "logits": -0.5393855571746826, + "logps": -77.88603210449219, + "loss": 67.9056, + "objective": 68.96742248535156, + "ranking_idealized": 0.47083333134651184, + "ranking_idealized_expo": 0.47083333134651184, + "ranking_simple": 0.46666666865348816, + "regularize": 68.96742248535156, + "step": 490 + }, + { + "dpo_loss": 43.407958984375, + "epoch": 2.8058573452999527, + "grad_norm": 5739.304263530335, + "learning_rate": 2.3909515315866606e-06, + "logits": -0.5840901136398315, + "logps": -77.01483917236328, + "loss": 65.3189, + "objective": 70.57030487060547, + "ranking_idealized": 0.4791666567325592, + "ranking_idealized_expo": 0.4791666567325592, + "ranking_simple": 0.4375, + "regularize": 70.57030487060547, + "step": 495 + }, + { + "dpo_loss": 34.67405700683594, + "epoch": 2.8341993386868207, + "grad_norm": 5760.189667227515, + "learning_rate": 2.341440200858589e-06, + "logits": -0.6246147155761719, + "logps": -76.3143310546875, + "loss": 65.179, + "objective": 62.59866714477539, + "ranking_idealized": 0.5208333134651184, + "ranking_idealized_expo": 0.5208333134651184, + "ranking_simple": 0.5, + "regularize": 62.59866714477539, + "step": 500 + }, + { + "epoch": 2.8341993386868207, + "eval_dpo_loss": 114.98714447021484, + "eval_logits": -0.6892295479774475, + "eval_logps": -82.34246826171875, + "eval_loss": 223.8012237548828, + "eval_objective": 227.17547607421875, + "eval_ranking_idealized": 0.5092975497245789, + "eval_ranking_idealized_expo": 0.5092975497245789, + "eval_ranking_simple": 0.5082644820213318, + "eval_regularize": 227.17547607421875, + "eval_runtime": 259.8207, + "eval_samples_per_second": 22.285, + "eval_steps_per_second": 0.931, + "step": 500 + }, + { + "dpo_loss": 29.432533264160156, + "epoch": 2.862541332073689, + "grad_norm": 5869.553889606324, + "learning_rate": 2.2919912390530945e-06, + "logits": -0.6314287185668945, + "logps": -77.34867095947266, + "loss": 62.3763, + "objective": 57.663639068603516, + "ranking_idealized": 0.5083333253860474, + "ranking_idealized_expo": 0.5083333253860474, + "ranking_simple": 0.5249999761581421, + "regularize": 57.663639068603516, + "step": 505 + }, + { + "dpo_loss": 32.43122100830078, + "epoch": 2.8908833254605573, + "grad_norm": 5536.552957965224, + "learning_rate": 2.242624096740164e-06, + "logits": -0.6147390604019165, + "logps": -78.36116027832031, + "loss": 64.5354, + "objective": 59.26633834838867, + "ranking_idealized": 0.5249999761581421, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.5416666865348816, + "regularize": 59.26633834838867, + "step": 510 + }, + { + "dpo_loss": 28.44099235534668, + "epoch": 2.9192253188474258, + "grad_norm": 5901.455292936029, + "learning_rate": 2.193358192306384e-06, + "logits": -0.6631244421005249, + "logps": -77.92379760742188, + "loss": 61.9083, + "objective": 54.49189758300781, + "ranking_idealized": 0.4791666567325592, + "ranking_idealized_expo": 0.4791666567325592, + "ranking_simple": 0.48750001192092896, + "regularize": 54.49189758300781, + "step": 515 + }, + { + "dpo_loss": 30.10361671447754, + "epoch": 2.947567312234294, + "grad_norm": 6161.871560570766, + "learning_rate": 2.1442129043167877e-06, + "logits": -0.6410778760910034, + "logps": -79.62260437011719, + "loss": 63.1757, + "objective": 61.97486877441406, + "ranking_idealized": 0.5083333253860474, + "ranking_idealized_expo": 0.5083333253860474, + "ranking_simple": 0.5208333134651184, + "regularize": 61.97486877441406, + "step": 520 + }, + { + "dpo_loss": 30.742000579833984, + "epoch": 2.975909305621162, + "grad_norm": 6354.821007890038, + "learning_rate": 2.0952075638923656e-06, + "logits": -0.6085123419761658, + "logps": -78.72110748291016, + "loss": 61.2353, + "objective": 57.58578109741211, + "ranking_idealized": 0.5666666626930237, + "ranking_idealized_expo": 0.5666666626930237, + "ranking_simple": 0.5416666865348816, + "regularize": 57.58578109741211, + "step": 525 + }, + { + "dpo_loss": 35.809959411621094, + "epoch": 3.0042512990080303, + "grad_norm": 5662.677867628538, + "learning_rate": 2.046361447106244e-06, + "logits": -0.6243312358856201, + "logps": -77.60399627685547, + "loss": 61.3834, + "objective": 55.699283599853516, + "ranking_idealized": 0.512499988079071, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.512499988079071, + "regularize": 55.699283599853516, + "step": 530 + }, + { + "dpo_loss": 28.718589782714844, + "epoch": 3.0325932923948984, + "grad_norm": 5725.286339309564, + "learning_rate": 1.997693767401503e-06, + "logits": -0.6556482315063477, + "logps": -78.42686462402344, + "loss": 56.5612, + "objective": 56.667293548583984, + "ranking_idealized": 0.5416666865348816, + "ranking_idealized_expo": 0.5416666865348816, + "ranking_simple": 0.5416666865348816, + "regularize": 56.667293548583984, + "step": 535 + }, + { + "dpo_loss": 29.884973526000977, + "epoch": 3.0609352857817664, + "grad_norm": 6203.03849936551, + "learning_rate": 1.9492236680336486e-06, + "logits": -0.6453068256378174, + "logps": -77.5386734008789, + "loss": 55.8428, + "objective": 58.181007385253906, + "ranking_idealized": 0.5333333611488342, + "ranking_idealized_expo": 0.5333333611488342, + "ranking_simple": 0.550000011920929, + "regularize": 58.181007385253906, + "step": 540 + }, + { + "dpo_loss": 25.884765625, + "epoch": 3.089277279168635, + "grad_norm": 5980.787430652505, + "learning_rate": 1.9009702145406728e-06, + "logits": -0.6111847758293152, + "logps": -78.12004852294922, + "loss": 53.4125, + "objective": 54.29548263549805, + "ranking_idealized": 0.49166667461395264, + "ranking_idealized_expo": 0.49166667461395264, + "ranking_simple": 0.4958333373069763, + "regularize": 54.29548263549805, + "step": 545 + }, + { + "dpo_loss": 27.113332748413086, + "epoch": 3.117619272555503, + "grad_norm": 5749.494417530868, + "learning_rate": 1.852952387243698e-06, + "logits": -0.5344541668891907, + "logps": -78.76824188232422, + "loss": 52.3116, + "objective": 55.897151947021484, + "ranking_idealized": 0.5375000238418579, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.5249999761581421, + "regularize": 55.897151947021484, + "step": 550 + }, + { + "epoch": 3.117619272555503, + "eval_dpo_loss": 114.9251937866211, + "eval_logits": -0.6290253400802612, + "eval_logps": -81.84333038330078, + "eval_loss": 223.6770477294922, + "eval_objective": 226.75909423828125, + "eval_ranking_idealized": 0.5092975497245789, + "eval_ranking_idealized_expo": 0.5092975497245789, + "eval_ranking_simple": 0.5103305578231812, + "eval_regularize": 226.75909423828125, + "eval_runtime": 260.203, + "eval_samples_per_second": 22.252, + "eval_steps_per_second": 0.93, + "step": 550 + }, + { + "dpo_loss": 27.520069122314453, + "epoch": 3.1459612659423715, + "grad_norm": 5785.031733960155, + "learning_rate": 1.8051890737811395e-06, + "logits": -0.49687737226486206, + "logps": -78.77640533447266, + "loss": 51.2223, + "objective": 51.64868927001953, + "ranking_idealized": 0.550000011920929, + "ranking_idealized_expo": 0.550000011920929, + "ranking_simple": 0.5541666746139526, + "regularize": 51.64868927001953, + "step": 555 + }, + { + "dpo_loss": 24.614133834838867, + "epoch": 3.1743032593292395, + "grad_norm": 5927.440129812864, + "learning_rate": 1.7576990616793139e-06, + "logits": -0.5366522073745728, + "logps": -75.75458526611328, + "loss": 52.4275, + "objective": 50.823753356933594, + "ranking_idealized": 0.5041666626930237, + "ranking_idealized_expo": 0.5041666626930237, + "ranking_simple": 0.5083333253860474, + "regularize": 50.823753356933594, + "step": 560 + }, + { + "dpo_loss": 26.020414352416992, + "epoch": 3.2026452527161076, + "grad_norm": 6153.770849149074, + "learning_rate": 1.7105010309624381e-06, + "logits": -0.5409637689590454, + "logps": -77.39885711669922, + "loss": 51.519, + "objective": 49.933555603027344, + "ranking_idealized": 0.5416666865348816, + "ranking_idealized_expo": 0.5416666865348816, + "ranking_simple": 0.5541666746139526, + "regularize": 49.933555603027344, + "step": 565 + }, + { + "dpo_loss": 26.2557373046875, + "epoch": 3.230987246102976, + "grad_norm": 5301.823299812263, + "learning_rate": 1.6636135468049122e-06, + "logits": -0.5224726796150208, + "logps": -76.94538116455078, + "loss": 49.5781, + "objective": 46.85703659057617, + "ranking_idealized": 0.4791666567325592, + "ranking_idealized_expo": 0.4791666567325592, + "ranking_simple": 0.46666666865348816, + "regularize": 46.85703659057617, + "step": 570 + }, + { + "dpo_loss": 30.09845733642578, + "epoch": 3.259329239489844, + "grad_norm": 5768.637134430283, + "learning_rate": 1.617055052228768e-06, + "logits": -0.5873778462409973, + "logps": -77.48201751708984, + "loss": 48.8665, + "objective": 49.90784454345703, + "ranking_idealized": 0.5, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.4749999940395355, + "regularize": 49.90784454345703, + "step": 575 + }, + { + "dpo_loss": 25.3740234375, + "epoch": 3.287671232876712, + "grad_norm": 5535.872847536237, + "learning_rate": 1.5708438608491816e-06, + "logits": -0.6106985211372375, + "logps": -78.49124145507812, + "loss": 47.4222, + "objective": 46.31576156616211, + "ranking_idealized": 0.5541666746139526, + "ranking_idealized_expo": 0.5541666746139526, + "ranking_simple": 0.5583333373069763, + "regularize": 46.31576156616211, + "step": 580 + }, + { + "dpo_loss": 26.91870880126953, + "epoch": 3.3160132262635806, + "grad_norm": 6071.893217545125, + "learning_rate": 1.524998149670871e-06, + "logits": -0.611818253993988, + "logps": -78.98985290527344, + "loss": 49.9675, + "objective": 49.87598419189453, + "ranking_idealized": 0.5291666388511658, + "ranking_idealized_expo": 0.5291666388511658, + "ranking_simple": 0.5208333134651184, + "regularize": 49.87598419189453, + "step": 585 + }, + { + "dpo_loss": 25.645116806030273, + "epoch": 3.3443552196504487, + "grad_norm": 5940.058676849357, + "learning_rate": 1.479535951938243e-06, + "logits": -0.6204649209976196, + "logps": -79.12604522705078, + "loss": 47.5629, + "objective": 50.466861724853516, + "ranking_idealized": 0.48750001192092896, + "ranking_idealized_expo": 0.48750001192092896, + "ranking_simple": 0.49166667461395264, + "regularize": 50.466861724853516, + "step": 590 + }, + { + "dpo_loss": 23.5716552734375, + "epoch": 3.372697213037317, + "grad_norm": 5635.743149288216, + "learning_rate": 1.43447515004208e-06, + "logits": -0.5404379367828369, + "logps": -78.58365631103516, + "loss": 45.2162, + "objective": 44.304752349853516, + "ranking_idealized": 0.5249999761581421, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.5375000238418579, + "regularize": 44.304752349853516, + "step": 595 + }, + { + "dpo_loss": 21.746015548706055, + "epoch": 3.4010392064241852, + "grad_norm": 5566.716122386338, + "learning_rate": 1.3898334684855647e-06, + "logits": -0.5577505826950073, + "logps": -77.53295135498047, + "loss": 45.9426, + "objective": 44.46009826660156, + "ranking_idealized": 0.5041666626930237, + "ranking_idealized_expo": 0.5041666626930237, + "ranking_simple": 0.4958333373069763, + "regularize": 44.46009826660156, + "step": 600 + }, + { + "epoch": 3.4010392064241852, + "eval_dpo_loss": 113.63313293457031, + "eval_logits": -0.6182904839515686, + "eval_logps": -81.3167724609375, + "eval_loss": 222.4720458984375, + "eval_objective": 223.18727111816406, + "eval_ranking_idealized": 0.5092975497245789, + "eval_ranking_idealized_expo": 0.5092975497245789, + "eval_ranking_simple": 0.5072314143180847, + "eval_regularize": 223.18727111816406, + "eval_runtime": 260.7691, + "eval_samples_per_second": 22.204, + "eval_steps_per_second": 0.928, + "step": 600 + }, + { + "dpo_loss": 20.865568161010742, + "epoch": 3.4293811998110533, + "grad_norm": 6078.906522590126, + "learning_rate": 1.3456284669124159e-06, + "logits": -0.5672589540481567, + "logps": -79.59375762939453, + "loss": 44.9405, + "objective": 43.26637268066406, + "ranking_idealized": 0.5333333611488342, + "ranking_idealized_expo": 0.5333333611488342, + "ranking_simple": 0.5333333611488342, + "regularize": 43.26637268066406, + "step": 605 + }, + { + "dpo_loss": 23.76979637145996, + "epoch": 3.4577231931979218, + "grad_norm": 6106.495672204209, + "learning_rate": 1.301877533199859e-06, + "logits": -0.6027007699012756, + "logps": -77.29557800292969, + "loss": 43.4191, + "objective": 46.19691848754883, + "ranking_idealized": 0.5166666507720947, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.512499988079071, + "regularize": 46.19691848754883, + "step": 610 + }, + { + "dpo_loss": 20.621265411376953, + "epoch": 3.48606518658479, + "grad_norm": 5469.445552736854, + "learning_rate": 1.2585978766191726e-06, + "logits": -0.6229711771011353, + "logps": -77.44082641601562, + "loss": 41.5198, + "objective": 40.210323333740234, + "ranking_idealized": 0.46666666865348816, + "ranking_idealized_expo": 0.46666666865348816, + "ranking_simple": 0.47083333134651184, + "regularize": 40.210323333740234, + "step": 615 + }, + { + "dpo_loss": 20.698007583618164, + "epoch": 3.514407179971658, + "grad_norm": 5640.751427601147, + "learning_rate": 1.2158065210664848e-06, + "logits": -0.5536880493164062, + "logps": -76.66329193115234, + "loss": 42.3006, + "objective": 39.67619323730469, + "ranking_idealized": 0.4958333373069763, + "ranking_idealized_expo": 0.4958333373069763, + "ranking_simple": 0.4833333194255829, + "regularize": 39.67619323730469, + "step": 620 + }, + { + "dpo_loss": 19.59212875366211, + "epoch": 3.5427491733585263, + "grad_norm": 5586.621015909176, + "learning_rate": 1.1735202983664803e-06, + "logits": -0.5632150769233704, + "logps": -75.98930358886719, + "loss": 42.7824, + "objective": 40.66218948364258, + "ranking_idealized": 0.5208333134651184, + "ranking_idealized_expo": 0.5208333134651184, + "ranking_simple": 0.5291666388511658, + "regularize": 40.66218948364258, + "step": 625 + }, + { + "dpo_loss": 20.836034774780273, + "epoch": 3.5710911667453944, + "grad_norm": 5970.049888723431, + "learning_rate": 1.1317558416516696e-06, + "logits": -0.6033153533935547, + "logps": -75.92536163330078, + "loss": 41.207, + "objective": 38.50748062133789, + "ranking_idealized": 0.512499988079071, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.512499988079071, + "regularize": 38.50748062133789, + "step": 630 + }, + { + "dpo_loss": 18.94291877746582, + "epoch": 3.5994331601322624, + "grad_norm": 5671.302619310192, + "learning_rate": 1.0905295788197993e-06, + "logits": -0.5977668762207031, + "logps": -76.6431655883789, + "loss": 38.9416, + "objective": 40.45100402832031, + "ranking_idealized": 0.48750001192092896, + "ranking_idealized_expo": 0.48750001192092896, + "ranking_simple": 0.48750001192092896, + "regularize": 40.45100402832031, + "step": 635 + }, + { + "dpo_loss": 21.708051681518555, + "epoch": 3.627775153519131, + "grad_norm": 5604.363744970696, + "learning_rate": 1.049857726072005e-06, + "logits": -0.5701695680618286, + "logps": -77.40218353271484, + "loss": 38.2866, + "objective": 42.04179382324219, + "ranking_idealized": 0.4625000059604645, + "ranking_idealized_expo": 0.4625000059604645, + "ranking_simple": 0.4749999940395355, + "regularize": 42.04179382324219, + "step": 640 + }, + { + "dpo_loss": 20.66307830810547, + "epoch": 3.656117146905999, + "grad_norm": 5446.269996729428, + "learning_rate": 1.0097562815342215e-06, + "logits": -0.5814236998558044, + "logps": -76.69013214111328, + "loss": 38.1396, + "objective": 35.1287841796875, + "ranking_idealized": 0.5249999761581421, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.5249999761581421, + "regularize": 35.1287841796875, + "step": 645 + }, + { + "dpo_loss": 17.27460479736328, + "epoch": 3.6844591402928675, + "grad_norm": 5568.286359889519, + "learning_rate": 9.702410189643838e-07, + "logits": -0.5777478814125061, + "logps": -77.85608673095703, + "loss": 37.3789, + "objective": 35.844581604003906, + "ranking_idealized": 0.5041666626930237, + "ranking_idealized_expo": 0.5041666626930237, + "ranking_simple": 0.5083333253860474, + "regularize": 35.844581604003906, + "step": 650 + }, + { + "epoch": 3.6844591402928675, + "eval_dpo_loss": 114.61026000976562, + "eval_logits": -0.6355183720588684, + "eval_logps": -81.70125579833984, + "eval_loss": 223.4119110107422, + "eval_objective": 225.21566772460938, + "eval_ranking_idealized": 0.5092975497245789, + "eval_ranking_idealized_expo": 0.5092975497245789, + "eval_ranking_simple": 0.5072314143180847, + "eval_regularize": 225.21566772460938, + "eval_runtime": 259.96, + "eval_samples_per_second": 22.273, + "eval_steps_per_second": 0.931, + "step": 650 + }, + { + "dpo_loss": 15.926929473876953, + "epoch": 3.7128011336797355, + "grad_norm": 5550.664905496162, + "learning_rate": 9.313274815478698e-07, + "logits": -0.5537225008010864, + "logps": -76.9358901977539, + "loss": 36.4745, + "objective": 33.71458053588867, + "ranking_idealized": 0.5208333134651184, + "ranking_idealized_expo": 0.5208333134651184, + "ranking_simple": 0.5208333134651184, + "regularize": 33.71458053588867, + "step": 655 + }, + { + "dpo_loss": 19.080156326293945, + "epoch": 3.7411431270666036, + "grad_norm": 5480.303347312512, + "learning_rate": 8.930309757836517e-07, + "logits": -0.5778761506080627, + "logps": -78.07957458496094, + "loss": 36.4648, + "objective": 39.15421676635742, + "ranking_idealized": 0.5541666746139526, + "ranking_idealized_expo": 0.5541666746139526, + "ranking_simple": 0.5583333373069763, + "regularize": 39.15421676635742, + "step": 660 + }, + { + "dpo_loss": 16.046911239624023, + "epoch": 3.769485120453472, + "grad_norm": 5637.549638771613, + "learning_rate": 8.553665654635343e-07, + "logits": -0.5516543984413147, + "logps": -78.4363784790039, + "loss": 35.2475, + "objective": 35.23891830444336, + "ranking_idealized": 0.5166666507720947, + "ranking_idealized_expo": 0.5166666507720947, + "ranking_simple": 0.5083333253860474, + "regularize": 35.23891830444336, + "step": 665 + }, + { + "dpo_loss": 14.368963241577148, + "epoch": 3.79782711384034, + "grad_norm": 5529.55552176345, + "learning_rate": 8.183490657468687e-07, + "logits": -0.5940511226654053, + "logps": -78.2576675415039, + "loss": 33.6192, + "objective": 31.81795883178711, + "ranking_idealized": 0.5916666388511658, + "ranking_idealized_expo": 0.5916666388511658, + "ranking_simple": 0.5958333611488342, + "regularize": 31.81795883178711, + "step": 670 + }, + { + "dpo_loss": 14.198540687561035, + "epoch": 3.826169107227208, + "grad_norm": 6007.275340904618, + "learning_rate": 7.819930373330669e-07, + "logits": -0.5869444608688354, + "logps": -77.28413391113281, + "loss": 34.2996, + "objective": 32.41952133178711, + "ranking_idealized": 0.5375000238418579, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.5458333492279053, + "regularize": 32.41952133178711, + "step": 675 + }, + { + "dpo_loss": 17.151575088500977, + "epoch": 3.8545111006140766, + "grad_norm": 5553.071492491405, + "learning_rate": 7.463127807341966e-07, + "logits": -0.5520313382148743, + "logps": -77.69143676757812, + "loss": 32.8529, + "objective": 37.0202522277832, + "ranking_idealized": 0.5083333253860474, + "ranking_idealized_expo": 0.5083333253860474, + "ranking_simple": 0.5083333253860474, + "regularize": 37.0202522277832, + "step": 680 + }, + { + "dpo_loss": 17.791221618652344, + "epoch": 3.8828530940009447, + "grad_norm": 5648.54220107531, + "learning_rate": 7.113223306499336e-07, + "logits": -0.5711807608604431, + "logps": -77.1575698852539, + "loss": 32.524, + "objective": 31.409448623657227, + "ranking_idealized": 0.5166666507720947, + "ranking_idealized_expo": 0.5166666507720947, + "ranking_simple": 0.5291666388511658, + "regularize": 31.409448623657227, + "step": 685 + }, + { + "dpo_loss": 13.461447715759277, + "epoch": 3.9111950873878127, + "grad_norm": 5401.846772194243, + "learning_rate": 6.770354504470575e-07, + "logits": -0.6161063313484192, + "logps": -77.0300064086914, + "loss": 30.9577, + "objective": 27.045875549316406, + "ranking_idealized": 0.5458333492279053, + "ranking_idealized_expo": 0.5458333492279053, + "ranking_simple": 0.5541666746139526, + "regularize": 27.045875549316406, + "step": 690 + }, + { + "dpo_loss": 14.924878120422363, + "epoch": 3.9395370807746812, + "grad_norm": 5461.186181284553, + "learning_rate": 6.434656267456843e-07, + "logits": -0.5763762593269348, + "logps": -78.13215637207031, + "loss": 30.6986, + "objective": 32.01544189453125, + "ranking_idealized": 0.46666666865348816, + "ranking_idealized_expo": 0.46666666865348816, + "ranking_simple": 0.4749999940395355, + "regularize": 32.01544189453125, + "step": 695 + }, + { + "dpo_loss": 18.97331428527832, + "epoch": 3.9678790741615493, + "grad_norm": 5818.6857506377755, + "learning_rate": 6.106260641143547e-07, + "logits": -0.5926896333694458, + "logps": -77.89541625976562, + "loss": 32.7043, + "objective": 33.83491516113281, + "ranking_idealized": 0.4833333194255829, + "ranking_idealized_expo": 0.4833333194255829, + "ranking_simple": 0.4791666567325592, + "regularize": 33.83491516113281, + "step": 700 + }, + { + "epoch": 3.9678790741615493, + "eval_dpo_loss": 114.2601547241211, + "eval_logits": -0.6585275530815125, + "eval_logps": -81.83432006835938, + "eval_loss": 223.54994201660156, + "eval_objective": 224.45416259765625, + "eval_ranking_idealized": 0.5092975497245789, + "eval_ranking_idealized_expo": 0.5092975497245789, + "eval_ranking_simple": 0.5061983466148376, + "eval_regularize": 224.45416259765625, + "eval_runtime": 260.3584, + "eval_samples_per_second": 22.239, + "eval_steps_per_second": 0.929, + "step": 700 + }, + { + "dpo_loss": 13.038025856018066, + "epoch": 3.9962210675484178, + "grad_norm": 5581.426984911725, + "learning_rate": 5.785296798760601e-07, + "logits": -0.5679181218147278, + "logps": -76.54711151123047, + "loss": 29.9148, + "objective": 30.134431838989258, + "ranking_idealized": 0.5249999761581421, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.5333333611488342, + "regularize": 30.134431838989258, + "step": 705 + }, + { + "dpo_loss": 12.296858787536621, + "epoch": 4.024563060935286, + "grad_norm": 5718.960380456749, + "learning_rate": 5.471890990272666e-07, + "logits": -0.6013753414154053, + "logps": -78.08429718017578, + "loss": 26.5384, + "objective": 26.26165771484375, + "ranking_idealized": 0.4791666567325592, + "ranking_idealized_expo": 0.4791666567325592, + "ranking_simple": 0.4791666567325592, + "regularize": 26.26165771484375, + "step": 710 + }, + { + "dpo_loss": 10.493599891662598, + "epoch": 4.052905054322154, + "grad_norm": 5570.51042110099, + "learning_rate": 5.166166492719124e-07, + "logits": -0.585443377494812, + "logps": -77.34422302246094, + "loss": 25.5029, + "objective": 24.800411224365234, + "ranking_idealized": 0.5333333611488342, + "ranking_idealized_expo": 0.5333333611488342, + "ranking_simple": 0.5375000238418579, + "regularize": 24.800411224365234, + "step": 715 + }, + { + "dpo_loss": 11.174727439880371, + "epoch": 4.081247047709022, + "grad_norm": 5676.732061923856, + "learning_rate": 4.868243561723535e-07, + "logits": -0.5442956686019897, + "logps": -79.65494537353516, + "loss": 25.0424, + "objective": 26.295820236206055, + "ranking_idealized": 0.5208333134651184, + "ranking_idealized_expo": 0.5208333134651184, + "ranking_simple": 0.5249999761581421, + "regularize": 26.295820236206055, + "step": 720 + }, + { + "dpo_loss": 10.66411304473877, + "epoch": 4.109589041095891, + "grad_norm": 6033.326375340949, + "learning_rate": 4.57823938419153e-07, + "logits": -0.5671114325523376, + "logps": -77.33932495117188, + "loss": 24.0471, + "objective": 21.140954971313477, + "ranking_idealized": 0.5291666388511658, + "ranking_idealized_expo": 0.5291666388511658, + "ranking_simple": 0.5291666388511658, + "regularize": 21.140954971313477, + "step": 725 + }, + { + "dpo_loss": 14.05311107635498, + "epoch": 4.137931034482759, + "grad_norm": 5855.037678063073, + "learning_rate": 4.2962680322157335e-07, + "logits": -0.6375981569290161, + "logps": -77.6299057006836, + "loss": 24.9993, + "objective": 25.476118087768555, + "ranking_idealized": 0.5375000238418579, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.5416666865348816, + "regularize": 25.476118087768555, + "step": 730 + }, + { + "dpo_loss": 9.336868286132812, + "epoch": 4.166273027869627, + "grad_norm": 5559.618972518497, + "learning_rate": 4.0224404182059443e-07, + "logits": -0.5607864856719971, + "logps": -78.882568359375, + "loss": 22.8143, + "objective": 21.191997528076172, + "ranking_idealized": 0.5249999761581421, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.5166666507720947, + "regularize": 21.191997528076172, + "step": 735 + }, + { + "dpo_loss": 10.657824516296387, + "epoch": 4.194615021256495, + "grad_norm": 5999.26275088513, + "learning_rate": 3.756864251262143e-07, + "logits": -0.6011705994606018, + "logps": -78.2365951538086, + "loss": 23.5575, + "objective": 21.66937255859375, + "ranking_idealized": 0.5208333134651184, + "ranking_idealized_expo": 0.5208333134651184, + "ranking_simple": 0.5249999761581421, + "regularize": 21.66937255859375, + "step": 740 + }, + { + "dpo_loss": 13.124934196472168, + "epoch": 4.222957014643363, + "grad_norm": 5474.613445401946, + "learning_rate": 3.499643994807486e-07, + "logits": -0.6319831013679504, + "logps": -75.88219451904297, + "loss": 22.84, + "objective": 24.67308807373047, + "ranking_idealized": 0.48750001192092896, + "ranking_idealized_expo": 0.48750001192092896, + "ranking_simple": 0.49166667461395264, + "regularize": 24.67308807373047, + "step": 745 + }, + { + "dpo_loss": 8.683289527893066, + "epoch": 4.251299008030231, + "grad_norm": 5367.021912836146, + "learning_rate": 3.250880825498026e-07, + "logits": -0.6556726098060608, + "logps": -77.68559265136719, + "loss": 22.8627, + "objective": 24.22634506225586, + "ranking_idealized": 0.5249999761581421, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.5291666388511658, + "regularize": 24.22634506225586, + "step": 750 + }, + { + "epoch": 4.251299008030231, + "eval_dpo_loss": 114.44987487792969, + "eval_logits": -0.656375527381897, + "eval_logps": -81.75474548339844, + "eval_loss": 223.7742462158203, + "eval_objective": 224.67481994628906, + "eval_ranking_idealized": 0.5092975497245789, + "eval_ranking_idealized_expo": 0.5092975497245789, + "eval_ranking_simple": 0.5072314143180847, + "eval_regularize": 224.67481994628906, + "eval_runtime": 260.8308, + "eval_samples_per_second": 22.198, + "eval_steps_per_second": 0.928, + "step": 750 + }, + { + "dpo_loss": 10.33426284790039, + "epoch": 4.2796410014171, + "grad_norm": 5558.083809420797, + "learning_rate": 3.0106725934252095e-07, + "logits": -0.620187520980835, + "logps": -77.5008544921875, + "loss": 23.8011, + "objective": 21.08031463623047, + "ranking_idealized": 0.5333333611488342, + "ranking_idealized_expo": 0.5333333611488342, + "ranking_simple": 0.5333333611488342, + "regularize": 21.08031463623047, + "step": 755 + }, + { + "dpo_loss": 10.207574844360352, + "epoch": 4.307982994803968, + "grad_norm": 5471.852981397453, + "learning_rate": 2.779113783626916e-07, + "logits": -0.6027387976646423, + "logps": -77.6553726196289, + "loss": 20.5752, + "objective": 20.16022300720215, + "ranking_idealized": 0.5625, + "ranking_idealized_expo": 0.5625, + "ranking_simple": 0.5583333373069763, + "regularize": 20.16022300720215, + "step": 760 + }, + { + "dpo_loss": 11.417025566101074, + "epoch": 4.336324988190836, + "grad_norm": 5508.4710027728315, + "learning_rate": 2.5562954789221164e-07, + "logits": -0.6078615784645081, + "logps": -78.18927764892578, + "loss": 23.2097, + "objective": 23.003475189208984, + "ranking_idealized": 0.5249999761581421, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.5208333134651184, + "regularize": 23.003475189208984, + "step": 765 + }, + { + "dpo_loss": 12.027268409729004, + "epoch": 4.364666981577704, + "grad_norm": 5910.519266120715, + "learning_rate": 2.3423053240837518e-07, + "logits": -0.5967121720314026, + "logps": -76.82184600830078, + "loss": 22.881, + "objective": 21.95987892150879, + "ranking_idealized": 0.5041666626930237, + "ranking_idealized_expo": 0.5041666626930237, + "ranking_simple": 0.5083333253860474, + "regularize": 21.95987892150879, + "step": 770 + }, + { + "dpo_loss": 8.871806144714355, + "epoch": 4.393008974964572, + "grad_norm": 5570.02355891466, + "learning_rate": 2.137227491364016e-07, + "logits": -0.5998777747154236, + "logps": -77.76221466064453, + "loss": 21.4575, + "objective": 21.222782135009766, + "ranking_idealized": 0.5083333253860474, + "ranking_idealized_expo": 0.5083333253860474, + "ranking_simple": 0.512499988079071, + "regularize": 21.222782135009766, + "step": 775 + }, + { + "dpo_loss": 9.251056671142578, + "epoch": 4.42135096835144, + "grad_norm": 5696.323750788966, + "learning_rate": 1.941142647385469e-07, + "logits": -0.6099433898925781, + "logps": -76.77149963378906, + "loss": 21.1917, + "objective": 19.60472869873047, + "ranking_idealized": 0.5, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.5083333253860474, + "regularize": 19.60472869873047, + "step": 780 + }, + { + "dpo_loss": 13.425146102905273, + "epoch": 4.449692961738309, + "grad_norm": 5399.943749207063, + "learning_rate": 1.7541279214111277e-07, + "logits": -0.6504854559898376, + "logps": -76.7619400024414, + "loss": 20.4983, + "objective": 22.567527770996094, + "ranking_idealized": 0.5291666388511658, + "ranking_idealized_expo": 0.5291666388511658, + "ranking_simple": 0.5249999761581421, + "regularize": 22.567527770996094, + "step": 785 + }, + { + "dpo_loss": 8.888577461242676, + "epoch": 4.478034955125177, + "grad_norm": 5693.14721540096, + "learning_rate": 1.5762568750059604e-07, + "logits": -0.631601095199585, + "logps": -79.10377502441406, + "loss": 20.8686, + "objective": 19.034799575805664, + "ranking_idealized": 0.5541666746139526, + "ranking_idealized_expo": 0.5541666746139526, + "ranking_simple": 0.5583333373069763, + "regularize": 19.034799575805664, + "step": 790 + }, + { + "dpo_loss": 9.370686531066895, + "epoch": 4.506376948512045, + "grad_norm": 5419.310940845333, + "learning_rate": 1.4075994731016895e-07, + "logits": -0.5613911747932434, + "logps": -78.7647705078125, + "loss": 19.2852, + "objective": 19.674354553222656, + "ranking_idealized": 0.550000011920929, + "ranking_idealized_expo": 0.550000011920929, + "ranking_simple": 0.550000011920929, + "regularize": 19.674354553222656, + "step": 795 + }, + { + "dpo_loss": 9.124998092651367, + "epoch": 4.534718941898913, + "grad_norm": 5936.415800302871, + "learning_rate": 1.2482220564763669e-07, + "logits": -0.5293439626693726, + "logps": -77.9144287109375, + "loss": 19.3618, + "objective": 20.374727249145508, + "ranking_idealized": 0.5083333253860474, + "ranking_idealized_expo": 0.5083333253860474, + "ranking_simple": 0.5166666507720947, + "regularize": 20.374727249145508, + "step": 800 + }, + { + "epoch": 4.534718941898913, + "eval_dpo_loss": 114.34851837158203, + "eval_logits": -0.6540291905403137, + "eval_logps": -81.88977813720703, + "eval_loss": 223.28860473632812, + "eval_objective": 224.43710327148438, + "eval_ranking_idealized": 0.5092975497245789, + "eval_ranking_idealized_expo": 0.5092975497245789, + "eval_ranking_simple": 0.5082644820213318, + "eval_regularize": 224.43710327148438, + "eval_runtime": 261.2589, + "eval_samples_per_second": 22.162, + "eval_steps_per_second": 0.926, + "step": 800 + }, + { + "dpo_loss": 8.316792488098145, + "epoch": 4.563060935285781, + "grad_norm": 6027.940037587719, + "learning_rate": 1.0981873156594381e-07, + "logits": -0.5915284752845764, + "logps": -77.64116668701172, + "loss": 18.7882, + "objective": 17.984209060668945, + "ranking_idealized": 0.4583333432674408, + "ranking_idealized_expo": 0.4583333432674408, + "ranking_simple": 0.4541666805744171, + "regularize": 17.984209060668945, + "step": 805 + }, + { + "dpo_loss": 10.590494155883789, + "epoch": 4.59140292867265, + "grad_norm": 5522.739351644112, + "learning_rate": 9.575542662726756e-08, + "logits": -0.615004301071167, + "logps": -76.7275161743164, + "loss": 18.881, + "objective": 19.299503326416016, + "ranking_idealized": 0.4583333432674408, + "ranking_idealized_expo": 0.4583333432674408, + "ranking_simple": 0.4625000059604645, + "regularize": 19.299503326416016, + "step": 810 + }, + { + "dpo_loss": 10.131912231445312, + "epoch": 4.619744922059518, + "grad_norm": 5480.52996485649, + "learning_rate": 8.26378225816582e-08, + "logits": -0.5336829423904419, + "logps": -78.38713836669922, + "loss": 20.3217, + "objective": 20.68436050415039, + "ranking_idealized": 0.5874999761581421, + "ranking_idealized_expo": 0.5874999761581421, + "ranking_simple": 0.5874999761581421, + "regularize": 20.68436050415039, + "step": 815 + }, + { + "dpo_loss": 9.877272605895996, + "epoch": 4.648086915446386, + "grad_norm": 5772.91774545367, + "learning_rate": 7.047107919114588e-08, + "logits": -0.5947645306587219, + "logps": -77.6579360961914, + "loss": 18.9691, + "objective": 20.117984771728516, + "ranking_idealized": 0.5625, + "ranking_idealized_expo": 0.5625, + "ranking_simple": 0.5541666746139526, + "regularize": 20.117984771728516, + "step": 820 + }, + { + "dpo_loss": 9.382747650146484, + "epoch": 4.6764289088332545, + "grad_norm": 5315.332110164652, + "learning_rate": 5.92599822001666e-08, + "logits": -0.5714208483695984, + "logps": -76.10657501220703, + "loss": 18.8755, + "objective": 17.507164001464844, + "ranking_idealized": 0.5, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.4958333373069763, + "regularize": 17.507164001464844, + "step": 825 + }, + { + "dpo_loss": 12.066271781921387, + "epoch": 4.7047709022201225, + "grad_norm": 5546.435431499101, + "learning_rate": 4.9008941453107527e-08, + "logits": -0.6190983653068542, + "logps": -78.2457046508789, + "loss": 19.5002, + "objective": 20.513113021850586, + "ranking_idealized": 0.512499988079071, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.5291666388511658, + "regularize": 20.513113021850586, + "step": 830 + }, + { + "dpo_loss": 9.90359115600586, + "epoch": 4.733112895606991, + "grad_norm": 5332.26085581772, + "learning_rate": 3.972198915970976e-08, + "logits": -0.5777944922447205, + "logps": -77.54198455810547, + "loss": 20.0274, + "objective": 21.012075424194336, + "ranking_idealized": 0.48750001192092896, + "ranking_idealized_expo": 0.48750001192092896, + "ranking_simple": 0.4958333373069763, + "regularize": 21.012075424194336, + "step": 835 + }, + { + "dpo_loss": 9.880459785461426, + "epoch": 4.7614548889938595, + "grad_norm": 5541.828579858096, + "learning_rate": 3.1402778309014284e-08, + "logits": -0.5807673931121826, + "logps": -77.97594451904297, + "loss": 18.1421, + "objective": 20.538494110107422, + "ranking_idealized": 0.5541666746139526, + "ranking_idealized_expo": 0.5541666746139526, + "ranking_simple": 0.5541666746139526, + "regularize": 20.538494110107422, + "step": 840 + }, + { + "dpo_loss": 8.26816177368164, + "epoch": 4.7897968823807275, + "grad_norm": 5943.885633928928, + "learning_rate": 2.4054581232470785e-08, + "logits": -0.5882014036178589, + "logps": -78.15058135986328, + "loss": 18.3375, + "objective": 17.440135955810547, + "ranking_idealized": 0.4541666805744171, + "ranking_idealized_expo": 0.4541666805744171, + "ranking_simple": 0.4583333432674408, + "regularize": 17.440135955810547, + "step": 845 + }, + { + "dpo_loss": 10.461382865905762, + "epoch": 4.818138875767596, + "grad_norm": 5496.982876639107, + "learning_rate": 1.768028831677926e-08, + "logits": -0.6040247678756714, + "logps": -76.82685852050781, + "loss": 18.3796, + "objective": 16.339303970336914, + "ranking_idealized": 0.5291666388511658, + "ranking_idealized_expo": 0.5291666388511658, + "ranking_simple": 0.5249999761581421, + "regularize": 16.339303970336914, + "step": 850 + }, + { + "epoch": 4.818138875767596, + "eval_dpo_loss": 114.2867202758789, + "eval_logits": -0.6522302627563477, + "eval_logps": -81.8523941040039, + "eval_loss": 223.19020080566406, + "eval_objective": 224.42820739746094, + "eval_ranking_idealized": 0.5092975497245789, + "eval_ranking_idealized_expo": 0.5092975497245789, + "eval_ranking_simple": 0.5082644820213318, + "eval_regularize": 224.42820739746094, + "eval_runtime": 260.7184, + "eval_samples_per_second": 22.208, + "eval_steps_per_second": 0.928, + "step": 850 + }, + { + "dpo_loss": 10.172054290771484, + "epoch": 4.846480869154464, + "grad_norm": 5801.469775352937, + "learning_rate": 1.2282406866966078e-08, + "logits": -0.565997302532196, + "logps": -76.97755432128906, + "loss": 18.5829, + "objective": 20.51591682434082, + "ranking_idealized": 0.5375000238418579, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.5458333492279053, + "regularize": 20.51591682434082, + "step": 855 + }, + { + "dpo_loss": 8.222423553466797, + "epoch": 4.874822862541333, + "grad_norm": 5724.160408470258, + "learning_rate": 7.863060120144316e-09, + "logits": -0.5622321963310242, + "logps": -77.1912612915039, + "loss": 19.2154, + "objective": 18.061235427856445, + "ranking_idealized": 0.550000011920929, + "ranking_idealized_expo": 0.550000011920929, + "ranking_simple": 0.5458333492279053, + "regularize": 18.061235427856445, + "step": 860 + }, + { + "dpo_loss": 9.016589164733887, + "epoch": 4.903164855928201, + "grad_norm": 5747.678123234768, + "learning_rate": 4.423986410346526e-09, + "logits": -0.5972442030906677, + "logps": -76.20024108886719, + "loss": 17.5709, + "objective": 17.75177574157715, + "ranking_idealized": 0.4958333373069763, + "ranking_idealized_expo": 0.4958333373069763, + "ranking_simple": 0.4833333194255829, + "regularize": 17.75177574157715, + "step": 865 + }, + { + "dpo_loss": 8.523500442504883, + "epoch": 4.931506849315069, + "grad_norm": 5849.97189887377, + "learning_rate": 1.9665384847583622e-09, + "logits": -0.6018223166465759, + "logps": -77.82225036621094, + "loss": 17.7554, + "objective": 17.821632385253906, + "ranking_idealized": 0.5083333253860474, + "ranking_idealized_expo": 0.5083333253860474, + "ranking_simple": 0.512499988079071, + "regularize": 17.821632385253906, + "step": 870 + }, + { + "dpo_loss": 9.60958480834961, + "epoch": 4.959848842701937, + "grad_norm": 5608.648575998045, + "learning_rate": 4.916829716183901e-10, + "logits": -0.5770124197006226, + "logps": -77.4490966796875, + "loss": 18.6192, + "objective": 19.857257843017578, + "ranking_idealized": 0.5458333492279053, + "ranking_idealized_expo": 0.5458333492279053, + "ranking_simple": 0.5541666746139526, + "regularize": 19.857257843017578, + "step": 875 + }, + { + "dpo_loss": 10.620950698852539, + "epoch": 4.988190836088805, + "grad_norm": 5634.2530385257105, + "learning_rate": 0.0, + "logits": -0.6300147175788879, + "logps": -78.3343734741211, + "loss": 18.2825, + "objective": 19.312227249145508, + "ranking_idealized": 0.5291666388511658, + "ranking_idealized_expo": 0.5291666388511658, + "ranking_simple": 0.5291666388511658, + "regularize": 19.312227249145508, + "step": 880 + }, + { + "epoch": 4.988190836088805, + "step": 880, + "total_flos": 0.0, + "train_loss": 74.13437041504817, + "train_runtime": 35293.9194, + "train_samples_per_second": 7.197, + "train_steps_per_second": 0.025 + } + ], + "logging_steps": 5, + "max_steps": 880, + "num_input_tokens_seen": 0, + "num_train_epochs": 5, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +}