{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.9992126604204392, "eval_steps": 50, "global_step": 2116, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "dpo_loss": 0.6931471824645996, "epoch": 0.0009448074954727974, "grad_norm": 66.46447760146653, "learning_rate": 4.716981132075471e-10, "logits": -2.096372604370117, "logps": -90.695556640625, "loss": 0.0053, "objective": 0.004870629869401455, "ranking_idealized": 0.5, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5, "regularize": 0.004867264535278082, "step": 1 }, { "dpo_loss": 0.6931397914886475, "epoch": 0.004724037477363987, "grad_norm": 73.1061214549564, "learning_rate": 2.358490566037736e-09, "logits": -2.2148256301879883, "logps": -89.18419647216797, "loss": 0.0056, "objective": 0.006199519615620375, "ranking_idealized": 0.5, "ranking_idealized_expo": 0.3958333432674408, "ranking_simple": 0.3958333432674408, "regularize": 0.0061979773454368114, "step": 5 }, { "dpo_loss": 0.6931113600730896, "epoch": 0.009448074954727975, "grad_norm": 66.26130454557787, "learning_rate": 4.716981132075472e-09, "logits": -2.1163227558135986, "logps": -90.67784118652344, "loss": 0.0056, "objective": 0.0063161361031234264, "ranking_idealized": 0.5333333611488342, "ranking_idealized_expo": 0.4166666567325592, "ranking_simple": 0.4166666567325592, "regularize": 0.006314346566796303, "step": 10 }, { "dpo_loss": 0.6930894255638123, "epoch": 0.014172112432091962, "grad_norm": 65.86685369935576, "learning_rate": 7.075471698113207e-09, "logits": -2.0237159729003906, "logps": -88.98513793945312, "loss": 0.0052, "objective": 0.004170234780758619, "ranking_idealized": 0.6166666746139526, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.5166666507720947, "regularize": 0.004166428931057453, "step": 15 }, { "dpo_loss": 0.6932118535041809, "epoch": 0.01889614990945595, "grad_norm": 67.08170030108543, "learning_rate": 9.433962264150943e-09, "logits": -2.220696449279785, "logps": -91.20802307128906, "loss": 0.0056, "objective": 0.0056175715290009975, "ranking_idealized": 0.6333333253860474, "ranking_idealized_expo": 0.46666666865348816, "ranking_simple": 0.46666666865348816, "regularize": 0.00561108160763979, "step": 20 }, { "dpo_loss": 0.6930363774299622, "epoch": 0.023620187386819935, "grad_norm": 64.75938875868191, "learning_rate": 1.1792452830188679e-08, "logits": -2.184110164642334, "logps": -93.84049224853516, "loss": 0.005, "objective": 0.005908097140491009, "ranking_idealized": 0.699999988079071, "ranking_idealized_expo": 0.6166666746139526, "ranking_simple": 0.6166666746139526, "regularize": 0.0059060631319880486, "step": 25 }, { "dpo_loss": 0.6930921673774719, "epoch": 0.028344224864183924, "grad_norm": 70.85746649835617, "learning_rate": 1.4150943396226414e-08, "logits": -2.048970937728882, "logps": -92.22063446044922, "loss": 0.0062, "objective": 0.006645068060606718, "ranking_idealized": 0.6166666746139526, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5, "regularize": 0.0066424161195755005, "step": 30 }, { "dpo_loss": 0.6927813291549683, "epoch": 0.03306826234154791, "grad_norm": 69.39212454087999, "learning_rate": 1.6509433962264148e-08, "logits": -2.164494276046753, "logps": -90.56898498535156, "loss": 0.006, "objective": 0.0044576446525752544, "ranking_idealized": 0.6333333253860474, "ranking_idealized_expo": 0.5666666626930237, "ranking_simple": 0.5666666626930237, "regularize": 0.004453611560165882, "step": 35 }, { "dpo_loss": 0.6927517056465149, "epoch": 0.0377922998189119, "grad_norm": 79.75758352153623, "learning_rate": 1.8867924528301887e-08, "logits": -2.2105553150177, "logps": -94.57971954345703, "loss": 0.0058, "objective": 0.004820433910936117, "ranking_idealized": 0.6166666746139526, "ranking_idealized_expo": 0.5833333134651184, "ranking_simple": 0.5833333134651184, "regularize": 0.004817112348973751, "step": 40 }, { "dpo_loss": 0.6930319666862488, "epoch": 0.04251633729627589, "grad_norm": 64.62991156662962, "learning_rate": 2.1226415094339622e-08, "logits": -2.149827241897583, "logps": -90.14302062988281, "loss": 0.0057, "objective": 0.004925203043967485, "ranking_idealized": 0.550000011920929, "ranking_idealized_expo": 0.4833333194255829, "ranking_simple": 0.4833333194255829, "regularize": 0.004921570885926485, "step": 45 }, { "dpo_loss": 0.692544162273407, "epoch": 0.04724037477363987, "grad_norm": 68.06296242846598, "learning_rate": 2.3584905660377358e-08, "logits": -2.1563477516174316, "logps": -91.66796875, "loss": 0.006, "objective": 0.006787313614040613, "ranking_idealized": 0.6499999761581421, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.550000011920929, "regularize": 0.006785000674426556, "step": 50 }, { "epoch": 0.04724037477363987, "eval_dpo_loss": 0.6929724812507629, "eval_logits": -1.9958003759384155, "eval_logps": -98.61515808105469, "eval_loss": 0.0059888348914682865, "eval_objective": 0.006077947095036507, "eval_ranking_idealized": 0.6022099256515503, "eval_ranking_idealized_expo": 0.5207182168960571, "eval_ranking_simple": 0.5179557800292969, "eval_regularize": 0.006075054872781038, "eval_runtime": 445.1462, "eval_samples_per_second": 13.007, "eval_steps_per_second": 3.253, "step": 50 }, { "dpo_loss": 0.6929494738578796, "epoch": 0.05196441225100386, "grad_norm": 73.41432282855463, "learning_rate": 2.5943396226415093e-08, "logits": -2.1937828063964844, "logps": -93.48242950439453, "loss": 0.006, "objective": 0.005628067534416914, "ranking_idealized": 0.46666666865348816, "ranking_idealized_expo": 0.4166666567325592, "ranking_simple": 0.4166666567325592, "regularize": 0.005625530146062374, "step": 55 }, { "dpo_loss": 0.692658543586731, "epoch": 0.05668844972836785, "grad_norm": 65.33336896476966, "learning_rate": 2.830188679245283e-08, "logits": -2.2241933345794678, "logps": -89.14469146728516, "loss": 0.0058, "objective": 0.004998047836124897, "ranking_idealized": 0.6499999761581421, "ranking_idealized_expo": 0.6166666746139526, "ranking_simple": 0.6166666746139526, "regularize": 0.004994309972971678, "step": 60 }, { "dpo_loss": 0.692764163017273, "epoch": 0.06141248720573183, "grad_norm": 66.55102685698147, "learning_rate": 3.0660377358490564e-08, "logits": -2.2217800617218018, "logps": -89.03886413574219, "loss": 0.0074, "objective": 0.0076709226705133915, "ranking_idealized": 0.5, "ranking_idealized_expo": 0.38333332538604736, "ranking_simple": 0.38333332538604736, "regularize": 0.007668651174753904, "step": 65 }, { "dpo_loss": 0.6925709843635559, "epoch": 0.06613652468309582, "grad_norm": 75.22580153097067, "learning_rate": 3.3018867924528296e-08, "logits": -2.1472768783569336, "logps": -94.85504913330078, "loss": 0.0076, "objective": 0.007108477409929037, "ranking_idealized": 0.550000011920929, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.5166666507720947, "regularize": 0.007105442229658365, "step": 70 }, { "dpo_loss": 0.6925690770149231, "epoch": 0.0708605621604598, "grad_norm": 79.40443090905357, "learning_rate": 3.5377358490566035e-08, "logits": -2.206367015838623, "logps": -92.62692260742188, "loss": 0.0071, "objective": 0.007145268842577934, "ranking_idealized": 0.5666666626930237, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.5166666507720947, "regularize": 0.00714282738044858, "step": 75 }, { "dpo_loss": 0.6921057105064392, "epoch": 0.0755845996378238, "grad_norm": 76.34709727963714, "learning_rate": 3.7735849056603774e-08, "logits": -2.1775050163269043, "logps": -94.01787567138672, "loss": 0.0066, "objective": 0.006093442440032959, "ranking_idealized": 0.6499999761581421, "ranking_idealized_expo": 0.5666666626930237, "ranking_simple": 0.5666666626930237, "regularize": 0.006089916918426752, "step": 80 }, { "dpo_loss": 0.6927501559257507, "epoch": 0.08030863711518778, "grad_norm": 76.24460999547686, "learning_rate": 4.009433962264151e-08, "logits": -2.1956326961517334, "logps": -92.5903091430664, "loss": 0.0069, "objective": 0.0067410701885819435, "ranking_idealized": 0.699999988079071, "ranking_idealized_expo": 0.6000000238418579, "ranking_simple": 0.6000000238418579, "regularize": 0.006739132571965456, "step": 85 }, { "dpo_loss": 0.6902840733528137, "epoch": 0.08503267459255177, "grad_norm": 65.10729628861057, "learning_rate": 4.2452830188679244e-08, "logits": -2.2021093368530273, "logps": -96.31147766113281, "loss": 0.0075, "objective": 0.008030267432332039, "ranking_idealized": 0.6499999761581421, "ranking_idealized_expo": 0.5833333134651184, "ranking_simple": 0.5833333134651184, "regularize": 0.008029019460082054, "step": 90 }, { "dpo_loss": 0.6926410794258118, "epoch": 0.08975671206991576, "grad_norm": 71.92277477052832, "learning_rate": 4.481132075471698e-08, "logits": -2.1148061752319336, "logps": -89.49421691894531, "loss": 0.007, "objective": 0.0068625533021986485, "ranking_idealized": 0.5833333134651184, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.550000011920929, "regularize": 0.006859814748167992, "step": 95 }, { "dpo_loss": 0.6942407488822937, "epoch": 0.09448074954727974, "grad_norm": 65.63913294191785, "learning_rate": 4.7169811320754715e-08, "logits": -2.2743189334869385, "logps": -94.41613006591797, "loss": 0.0092, "objective": 0.012641828507184982, "ranking_idealized": 0.6499999761581421, "ranking_idealized_expo": 0.5666666626930237, "ranking_simple": 0.5333333611488342, "regularize": 0.012639058753848076, "step": 100 }, { "epoch": 0.09448074954727974, "eval_dpo_loss": 0.6929203867912292, "eval_logits": -1.995413899421692, "eval_logps": -98.7889404296875, "eval_loss": 0.00729329651221633, "eval_objective": 0.00731161143630743, "eval_ranking_idealized": 0.6022099256515503, "eval_ranking_idealized_expo": 0.5207182168960571, "eval_ranking_simple": 0.5186464190483093, "eval_regularize": 0.00730866938829422, "eval_runtime": 447.0397, "eval_samples_per_second": 12.952, "eval_steps_per_second": 3.239, "step": 100 }, { "dpo_loss": 0.6924384832382202, "epoch": 0.09920478702464373, "grad_norm": 67.55366715626052, "learning_rate": 4.9528301886792454e-08, "logits": -2.088653802871704, "logps": -92.43064880371094, "loss": 0.009, "objective": 0.00849145371466875, "ranking_idealized": 0.5833333134651184, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.5166666507720947, "regularize": 0.008488119579851627, "step": 105 }, { "dpo_loss": 0.6923626661300659, "epoch": 0.10392882450200772, "grad_norm": 68.4026236372278, "learning_rate": 5.1886792452830186e-08, "logits": -2.1796536445617676, "logps": -91.34872436523438, "loss": 0.0083, "objective": 0.0080822529271245, "ranking_idealized": 0.5666666626930237, "ranking_idealized_expo": 0.4833333194255829, "ranking_simple": 0.4833333194255829, "regularize": 0.008080901578068733, "step": 110 }, { "dpo_loss": 0.6901246309280396, "epoch": 0.1086528619793717, "grad_norm": 64.06144820478669, "learning_rate": 5.4245283018867925e-08, "logits": -2.1858596801757812, "logps": -89.73294830322266, "loss": 0.0097, "objective": 0.013094036839902401, "ranking_idealized": 0.5333333611488342, "ranking_idealized_expo": 0.46666666865348816, "ranking_simple": 0.46666666865348816, "regularize": 0.013092008419334888, "step": 115 }, { "dpo_loss": 0.6923102736473083, "epoch": 0.1133768994567357, "grad_norm": 61.237648061668985, "learning_rate": 5.660377358490566e-08, "logits": -2.1982421875, "logps": -94.54019927978516, "loss": 0.0085, "objective": 0.008155700750648975, "ranking_idealized": 0.7166666388511658, "ranking_idealized_expo": 0.6666666865348816, "ranking_simple": 0.6666666865348816, "regularize": 0.00815290305763483, "step": 120 }, { "dpo_loss": 0.6917387247085571, "epoch": 0.11810093693409968, "grad_norm": 73.42929721502428, "learning_rate": 5.8962264150943396e-08, "logits": -2.1935439109802246, "logps": -95.18013000488281, "loss": 0.0103, "objective": 0.010648042894899845, "ranking_idealized": 0.5333333611488342, "ranking_idealized_expo": 0.4833333194255829, "ranking_simple": 0.4833333194255829, "regularize": 0.010645588859915733, "step": 125 }, { "dpo_loss": 0.6919466853141785, "epoch": 0.12282497441146366, "grad_norm": 68.99586341226502, "learning_rate": 6.132075471698113e-08, "logits": -2.1257071495056152, "logps": -97.63853454589844, "loss": 0.0103, "objective": 0.009905511513352394, "ranking_idealized": 0.6333333253860474, "ranking_idealized_expo": 0.6000000238418579, "ranking_simple": 0.6000000238418579, "regularize": 0.009904048405587673, "step": 130 }, { "dpo_loss": 0.6923593878746033, "epoch": 0.12754901188882764, "grad_norm": 56.22865998140236, "learning_rate": 6.367924528301887e-08, "logits": -2.1036369800567627, "logps": -91.52125549316406, "loss": 0.0098, "objective": 0.009057187475264072, "ranking_idealized": 0.5333333611488342, "ranking_idealized_expo": 0.46666666865348816, "ranking_simple": 0.46666666865348816, "regularize": 0.009054058231413364, "step": 135 }, { "dpo_loss": 0.6894025206565857, "epoch": 0.13227304936619164, "grad_norm": 64.3061621244485, "learning_rate": 6.603773584905659e-08, "logits": -2.246990442276001, "logps": -92.88211059570312, "loss": 0.0113, "objective": 0.011589973233640194, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.5333333611488342, "regularize": 0.011587386019527912, "step": 140 }, { "dpo_loss": 0.6934190988540649, "epoch": 0.13699708684355563, "grad_norm": 74.71610410718624, "learning_rate": 6.839622641509434e-08, "logits": -2.102886199951172, "logps": -90.7693099975586, "loss": 0.0123, "objective": 0.00984902959316969, "ranking_idealized": 0.5666666626930237, "ranking_idealized_expo": 0.5, "ranking_simple": 0.4833333194255829, "regularize": 0.009846380911767483, "step": 145 }, { "dpo_loss": 0.6908667087554932, "epoch": 0.1417211243209196, "grad_norm": 76.42928931969817, "learning_rate": 7.075471698113207e-08, "logits": -2.238095998764038, "logps": -94.04148864746094, "loss": 0.0142, "objective": 0.014535349793732166, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.5166666507720947, "regularize": 0.014532845467329025, "step": 150 }, { "epoch": 0.1417211243209196, "eval_dpo_loss": 0.693015992641449, "eval_logits": -1.9985584020614624, "eval_logps": -98.66200256347656, "eval_loss": 0.009166295640170574, "eval_objective": 0.00932803563773632, "eval_ranking_idealized": 0.6022099256515503, "eval_ranking_idealized_expo": 0.5207182168960571, "eval_ranking_simple": 0.5186464190483093, "eval_regularize": 0.009325726889073849, "eval_runtime": 446.6869, "eval_samples_per_second": 12.962, "eval_steps_per_second": 3.242, "step": 150 }, { "dpo_loss": 0.6918679475784302, "epoch": 0.1464451617982836, "grad_norm": 80.89548200084305, "learning_rate": 7.311320754716981e-08, "logits": -2.1368908882141113, "logps": -90.76949310302734, "loss": 0.0131, "objective": 0.011844370514154434, "ranking_idealized": 0.699999988079071, "ranking_idealized_expo": 0.5833333134651184, "ranking_simple": 0.5833333134651184, "regularize": 0.011842181906104088, "step": 155 }, { "dpo_loss": 0.6904442310333252, "epoch": 0.1511691992756476, "grad_norm": 70.5889289835053, "learning_rate": 7.547169811320755e-08, "logits": -2.216298818588257, "logps": -94.7828140258789, "loss": 0.0132, "objective": 0.012807334773242474, "ranking_idealized": 0.5666666626930237, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5, "regularize": 0.012806250713765621, "step": 160 }, { "dpo_loss": 0.6893646121025085, "epoch": 0.15589323675301156, "grad_norm": 70.53443315770147, "learning_rate": 7.783018867924527e-08, "logits": -2.2371225357055664, "logps": -91.81126403808594, "loss": 0.0123, "objective": 0.014454828575253487, "ranking_idealized": 0.5666666626930237, "ranking_idealized_expo": 0.46666666865348816, "ranking_simple": 0.46666666865348816, "regularize": 0.01445354800671339, "step": 165 }, { "dpo_loss": 0.6928521990776062, "epoch": 0.16061727423037556, "grad_norm": 78.26059021460965, "learning_rate": 8.018867924528302e-08, "logits": -2.215101480484009, "logps": -92.55703735351562, "loss": 0.0141, "objective": 0.012299363501369953, "ranking_idealized": 0.699999988079071, "ranking_idealized_expo": 0.5833333134651184, "ranking_simple": 0.5833333134651184, "regularize": 0.012298095040023327, "step": 170 }, { "dpo_loss": 0.6916810274124146, "epoch": 0.16534131170773955, "grad_norm": 62.838329994801875, "learning_rate": 8.254716981132075e-08, "logits": -2.1370534896850586, "logps": -91.3881607055664, "loss": 0.014, "objective": 0.015395297668874264, "ranking_idealized": 0.6499999761581421, "ranking_idealized_expo": 0.5666666626930237, "ranking_simple": 0.5666666626930237, "regularize": 0.015393855981528759, "step": 175 }, { "dpo_loss": 0.6918050050735474, "epoch": 0.17006534918510355, "grad_norm": 73.17909568701033, "learning_rate": 8.490566037735849e-08, "logits": -2.281663417816162, "logps": -94.94965362548828, "loss": 0.0138, "objective": 0.017136668786406517, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.5166666507720947, "regularize": 0.017135154455900192, "step": 180 }, { "dpo_loss": 0.69007408618927, "epoch": 0.17478938666246752, "grad_norm": 62.37985427959411, "learning_rate": 8.726415094339621e-08, "logits": -2.1940929889678955, "logps": -93.47923278808594, "loss": 0.0159, "objective": 0.017906082794070244, "ranking_idealized": 0.550000011920929, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5166666507720947, "regularize": 0.017902227118611336, "step": 185 }, { "dpo_loss": 0.6921120882034302, "epoch": 0.1795134241398315, "grad_norm": 70.11549174821204, "learning_rate": 8.962264150943397e-08, "logits": -2.1571404933929443, "logps": -87.7894287109375, "loss": 0.0144, "objective": 0.01052508968859911, "ranking_idealized": 0.6666666865348816, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.5166666507720947, "regularize": 0.010523774661123753, "step": 190 }, { "dpo_loss": 0.6912659406661987, "epoch": 0.1842374616171955, "grad_norm": 78.18173910687094, "learning_rate": 9.198113207547169e-08, "logits": -2.1662251949310303, "logps": -90.77739715576172, "loss": 0.0139, "objective": 0.01253608800470829, "ranking_idealized": 0.7166666388511658, "ranking_idealized_expo": 0.6000000238418579, "ranking_simple": 0.6000000238418579, "regularize": 0.012534084729850292, "step": 195 }, { "dpo_loss": 0.6948674321174622, "epoch": 0.18896149909455948, "grad_norm": 74.9573189190215, "learning_rate": 9.433962264150943e-08, "logits": -2.1426119804382324, "logps": -93.58480072021484, "loss": 0.0173, "objective": 0.018598254770040512, "ranking_idealized": 0.5833333134651184, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5, "regularize": 0.018596837297081947, "step": 200 }, { "epoch": 0.18896149909455948, "eval_dpo_loss": 0.6928583383560181, "eval_logits": -1.9957162141799927, "eval_logps": -98.79460144042969, "eval_loss": 0.009674900211393833, "eval_objective": 0.009766080416738987, "eval_ranking_idealized": 0.6022099256515503, "eval_ranking_idealized_expo": 0.5207182168960571, "eval_ranking_simple": 0.5172652006149292, "eval_regularize": 0.009763876907527447, "eval_runtime": 457.4524, "eval_samples_per_second": 12.657, "eval_steps_per_second": 3.165, "step": 200 }, { "dpo_loss": 0.6902305483818054, "epoch": 0.19368553657192347, "grad_norm": 69.58780380589624, "learning_rate": 9.669811320754716e-08, "logits": -2.137186288833618, "logps": -90.56549835205078, "loss": 0.0203, "objective": 0.016451861709356308, "ranking_idealized": 0.550000011920929, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5, "regularize": 0.016451073810458183, "step": 205 }, { "dpo_loss": 0.693261444568634, "epoch": 0.19840957404928747, "grad_norm": 62.13478304132868, "learning_rate": 9.905660377358491e-08, "logits": -2.0685033798217773, "logps": -92.36515045166016, "loss": 0.0198, "objective": 0.02386774867773056, "ranking_idealized": 0.5166666507720947, "ranking_idealized_expo": 0.46666666865348816, "ranking_simple": 0.44999998807907104, "regularize": 0.023865588009357452, "step": 210 }, { "dpo_loss": 0.6884204149246216, "epoch": 0.20313361152665144, "grad_norm": 60.854335858242926, "learning_rate": 9.999938744161562e-08, "logits": -2.1262550354003906, "logps": -92.82433319091797, "loss": 0.0173, "objective": 0.020975453779101372, "ranking_idealized": 0.6333333253860474, "ranking_idealized_expo": 0.5833333134651184, "ranking_simple": 0.5833333134651184, "regularize": 0.020974494516849518, "step": 215 }, { "dpo_loss": 0.6918230652809143, "epoch": 0.20785764900401543, "grad_norm": 67.33088424835829, "learning_rate": 9.999564408362053e-08, "logits": -2.169888734817505, "logps": -88.9549331665039, "loss": 0.0187, "objective": 0.015909165143966675, "ranking_idealized": 0.5333333611488342, "ranking_idealized_expo": 0.4333333373069763, "ranking_simple": 0.4333333373069763, "regularize": 0.015907270833849907, "step": 220 }, { "dpo_loss": 0.689198911190033, "epoch": 0.21258168648137943, "grad_norm": 70.77403555695805, "learning_rate": 9.998849793231472e-08, "logits": -2.099949359893799, "logps": -91.25509643554688, "loss": 0.0218, "objective": 0.02295403741300106, "ranking_idealized": 0.6166666746139526, "ranking_idealized_expo": 0.46666666865348816, "ranking_simple": 0.46666666865348816, "regularize": 0.022952700033783913, "step": 225 }, { "dpo_loss": 0.6939520239830017, "epoch": 0.2173057239587434, "grad_norm": 80.6144373753939, "learning_rate": 9.997794947407808e-08, "logits": -2.1620826721191406, "logps": -94.76692962646484, "loss": 0.0239, "objective": 0.026342039927840233, "ranking_idealized": 0.6166666746139526, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.5333333611488342, "regularize": 0.026341309770941734, "step": 230 }, { "dpo_loss": 0.688434362411499, "epoch": 0.2220297614361074, "grad_norm": 62.83216394514196, "learning_rate": 9.996399942685763e-08, "logits": -2.2001922130584717, "logps": -90.64398956298828, "loss": 0.0197, "objective": 0.02317703142762184, "ranking_idealized": 0.6666666865348816, "ranking_idealized_expo": 0.6000000238418579, "ranking_simple": 0.6000000238418579, "regularize": 0.023176301270723343, "step": 235 }, { "dpo_loss": 0.6894098520278931, "epoch": 0.2267537989134714, "grad_norm": 69.59337954721937, "learning_rate": 9.994664874011862e-08, "logits": -2.1389572620391846, "logps": -91.78279113769531, "loss": 0.0225, "objective": 0.02215094491839409, "ranking_idealized": 0.5333333611488342, "ranking_idealized_expo": 0.46666666865348816, "ranking_simple": 0.46666666865348816, "regularize": 0.02214915119111538, "step": 240 }, { "dpo_loss": 0.692762017250061, "epoch": 0.23147783639083536, "grad_norm": 70.45563918532531, "learning_rate": 9.992589859477995e-08, "logits": -2.0933754444122314, "logps": -92.82108306884766, "loss": 0.0215, "objective": 0.02130296640098095, "ranking_idealized": 0.6666666865348816, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.550000011920929, "regularize": 0.021301595494151115, "step": 245 }, { "dpo_loss": 0.6907398104667664, "epoch": 0.23620187386819935, "grad_norm": 68.46129704284265, "learning_rate": 9.990175040313376e-08, "logits": -2.0800933837890625, "logps": -88.17916870117188, "loss": 0.0245, "objective": 0.022766491398215294, "ranking_idealized": 0.6333333253860474, "ranking_idealized_expo": 0.5833333134651184, "ranking_simple": 0.5833333134651184, "regularize": 0.02276558242738247, "step": 250 }, { "epoch": 0.23620187386819935, "eval_dpo_loss": 0.6929402351379395, "eval_logits": -1.995142936706543, "eval_logps": -98.64155578613281, "eval_loss": 0.012135702185332775, "eval_objective": 0.012087649665772915, "eval_ranking_idealized": 0.6022099256515503, "eval_ranking_idealized_expo": 0.5207182168960571, "eval_ranking_simple": 0.5186464190483093, "eval_regularize": 0.012085951864719391, "eval_runtime": 446.4062, "eval_samples_per_second": 12.97, "eval_steps_per_second": 3.244, "step": 250 }, { "dpo_loss": 0.6918343901634216, "epoch": 0.24092591134556335, "grad_norm": 67.22846019191091, "learning_rate": 9.987420580874936e-08, "logits": -2.116420269012451, "logps": -90.83265686035156, "loss": 0.0239, "objective": 0.019956286996603012, "ranking_idealized": 0.6666666865348816, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.5666666626930237, "regularize": 0.019955595955252647, "step": 255 }, { "dpo_loss": 0.688471257686615, "epoch": 0.24564994882292732, "grad_norm": 70.37729127367317, "learning_rate": 9.98432666863613e-08, "logits": -2.240382194519043, "logps": -94.62246704101562, "loss": 0.0228, "objective": 0.026484496891498566, "ranking_idealized": 0.5666666626930237, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.5, "regularize": 0.026483872905373573, "step": 260 }, { "dpo_loss": 0.6923092603683472, "epoch": 0.25037398630029134, "grad_norm": 66.24088589632522, "learning_rate": 9.980893514174179e-08, "logits": -2.2002015113830566, "logps": -92.85660552978516, "loss": 0.0214, "objective": 0.016592005267739296, "ranking_idealized": 0.5833333134651184, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.5333333611488342, "regularize": 0.01658981665968895, "step": 265 }, { "dpo_loss": 0.69212406873703, "epoch": 0.2550980237776553, "grad_norm": 67.74325606253207, "learning_rate": 9.97712135115574e-08, "logits": -2.1255643367767334, "logps": -93.65802764892578, "loss": 0.0234, "objective": 0.029443560168147087, "ranking_idealized": 0.6333333253860474, "ranking_idealized_expo": 0.5666666626930237, "ranking_simple": 0.5666666626930237, "regularize": 0.02944253757596016, "step": 270 }, { "dpo_loss": 0.6937362551689148, "epoch": 0.2598220612550193, "grad_norm": 62.77300536920228, "learning_rate": 9.973010436321003e-08, "logits": -2.1865429878234863, "logps": -94.39256286621094, "loss": 0.0231, "objective": 0.024354156106710434, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.5166666507720947, "regularize": 0.024351568892598152, "step": 275 }, { "dpo_loss": 0.6910390853881836, "epoch": 0.2645460987323833, "grad_norm": 62.67099824882817, "learning_rate": 9.968561049466213e-08, "logits": -2.1923298835754395, "logps": -90.44979095458984, "loss": 0.0231, "objective": 0.020880402997136116, "ranking_idealized": 0.6166666746139526, "ranking_idealized_expo": 0.44999998807907104, "ranking_simple": 0.44999998807907104, "regularize": 0.02087988331913948, "step": 280 }, { "dpo_loss": 0.6930695176124573, "epoch": 0.26927013620974727, "grad_norm": 87.62776016079746, "learning_rate": 9.963773493424628e-08, "logits": -2.2007596492767334, "logps": -95.48867797851562, "loss": 0.0265, "objective": 0.028789160773158073, "ranking_idealized": 0.5166666507720947, "ranking_idealized_expo": 0.4000000059604645, "ranking_simple": 0.4166666567325592, "regularize": 0.028786195442080498, "step": 285 }, { "dpo_loss": 0.6869128346443176, "epoch": 0.27399417368711126, "grad_norm": 72.49597645901943, "learning_rate": 9.95864809404591e-08, "logits": -2.1374075412750244, "logps": -93.5144271850586, "loss": 0.0275, "objective": 0.026990260928869247, "ranking_idealized": 0.5833333134651184, "ranking_idealized_expo": 0.4833333194255829, "ranking_simple": 0.5333333611488342, "regularize": 0.026989614591002464, "step": 290 }, { "dpo_loss": 0.6920251250267029, "epoch": 0.27871821116447526, "grad_norm": 77.90764756547169, "learning_rate": 9.953185200173945e-08, "logits": -2.111963987350464, "logps": -90.939453125, "loss": 0.0274, "objective": 0.022320417687296867, "ranking_idealized": 0.5166666507720947, "ranking_idealized_expo": 0.4833333194255829, "ranking_simple": 0.4833333194255829, "regularize": 0.02231987938284874, "step": 295 }, { "dpo_loss": 0.6920836567878723, "epoch": 0.2834422486418392, "grad_norm": 68.10918361524686, "learning_rate": 9.947385183623097e-08, "logits": -2.213435173034668, "logps": -91.73210906982422, "loss": 0.0234, "objective": 0.026671167463064194, "ranking_idealized": 0.6166666746139526, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.5166666507720947, "regularize": 0.026670336723327637, "step": 300 }, { "epoch": 0.2834422486418392, "eval_dpo_loss": 0.6931979060173035, "eval_logits": -1.993965744972229, "eval_logps": -98.33208465576172, "eval_loss": 0.01361795049160719, "eval_objective": 0.01397615671157837, "eval_ranking_idealized": 0.6022099256515503, "eval_ranking_idealized_expo": 0.5207182168960571, "eval_ranking_simple": 0.5165745615959167, "eval_regularize": 0.013974088244140148, "eval_runtime": 447.4565, "eval_samples_per_second": 12.94, "eval_steps_per_second": 3.236, "step": 300 }, { "dpo_loss": 0.68857741355896, "epoch": 0.2881662861192032, "grad_norm": 68.87756454021408, "learning_rate": 9.94124843915291e-08, "logits": -2.1138675212860107, "logps": -93.91316986083984, "loss": 0.0266, "objective": 0.03335392475128174, "ranking_idealized": 0.5833333134651184, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.5166666507720947, "regularize": 0.03335336595773697, "step": 305 }, { "dpo_loss": 0.6887925863265991, "epoch": 0.2928903235965672, "grad_norm": 125.82123779225154, "learning_rate": 9.934775384441227e-08, "logits": -2.138413190841675, "logps": -90.38550567626953, "loss": 0.0265, "objective": 0.021200962364673615, "ranking_idealized": 0.550000011920929, "ranking_idealized_expo": 0.4833333194255829, "ranking_simple": 0.4833333194255829, "regularize": 0.02119968645274639, "step": 310 }, { "dpo_loss": 0.6924371123313904, "epoch": 0.2976143610739312, "grad_norm": 64.16350361165954, "learning_rate": 9.92796646005578e-08, "logits": -2.1671297550201416, "logps": -93.71112823486328, "loss": 0.0255, "objective": 0.027043061330914497, "ranking_idealized": 0.6499999761581421, "ranking_idealized_expo": 0.6333333253860474, "ranking_simple": 0.6499999761581421, "regularize": 0.027042122557759285, "step": 315 }, { "dpo_loss": 0.6908622980117798, "epoch": 0.3023383985512952, "grad_norm": 73.45877351161931, "learning_rate": 9.920822129424189e-08, "logits": -2.0810954570770264, "logps": -93.48076629638672, "loss": 0.0264, "objective": 0.03161174803972244, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.4833333194255829, "ranking_simple": 0.4833333194255829, "regularize": 0.03161115199327469, "step": 320 }, { "dpo_loss": 0.6928619742393494, "epoch": 0.3070624360286592, "grad_norm": 73.35574157285905, "learning_rate": 9.913342878802423e-08, "logits": -2.102128505706787, "logps": -92.2950439453125, "loss": 0.0286, "objective": 0.037009891122579575, "ranking_idealized": 0.6499999761581421, "ranking_idealized_expo": 0.5833333134651184, "ranking_simple": 0.5666666626930237, "regularize": 0.03700947389006615, "step": 325 }, { "dpo_loss": 0.6908413767814636, "epoch": 0.3117864735060231, "grad_norm": 79.4503768102059, "learning_rate": 9.90552921724171e-08, "logits": -2.2076501846313477, "logps": -91.73526763916016, "loss": 0.0276, "objective": 0.020319262519478798, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.46666666865348816, "ranking_simple": 0.46666666865348816, "regularize": 0.020317750051617622, "step": 330 }, { "dpo_loss": 0.6880075335502625, "epoch": 0.3165105109833871, "grad_norm": 82.61039711327662, "learning_rate": 9.897381676553888e-08, "logits": -2.177678346633911, "logps": -91.5321044921875, "loss": 0.0261, "objective": 0.027510004118084908, "ranking_idealized": 0.5833333134651184, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.5333333611488342, "regularize": 0.027509503066539764, "step": 335 }, { "dpo_loss": 0.6954006552696228, "epoch": 0.3212345484607511, "grad_norm": 69.91545242214347, "learning_rate": 9.888900811275203e-08, "logits": -2.1840174198150635, "logps": -92.2385025024414, "loss": 0.0272, "objective": 0.037196170538663864, "ranking_idealized": 0.5666666626930237, "ranking_idealized_expo": 0.46666666865348816, "ranking_simple": 0.46666666865348816, "regularize": 0.037195660173892975, "step": 340 }, { "dpo_loss": 0.6906254887580872, "epoch": 0.3259585859381151, "grad_norm": 65.03933644888008, "learning_rate": 9.880087198628577e-08, "logits": -2.153885841369629, "logps": -91.60904693603516, "loss": 0.0246, "objective": 0.0248491782695055, "ranking_idealized": 0.5666666626930237, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5, "regularize": 0.024848194792866707, "step": 345 }, { "dpo_loss": 0.6858618855476379, "epoch": 0.3306826234154791, "grad_norm": 65.04231679146841, "learning_rate": 9.870941438484314e-08, "logits": -2.1364102363586426, "logps": -89.71781921386719, "loss": 0.0262, "objective": 0.026014180853962898, "ranking_idealized": 0.550000011920929, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5166666507720947, "regularize": 0.026013409718871117, "step": 350 }, { "epoch": 0.3306826234154791, "eval_dpo_loss": 0.6925798058509827, "eval_logits": -1.9947079420089722, "eval_logps": -98.34574890136719, "eval_loss": 0.017829304561018944, "eval_objective": 0.018099796026945114, "eval_ranking_idealized": 0.6022099256515503, "eval_ranking_idealized_expo": 0.5207182168960571, "eval_ranking_simple": 0.5200276374816895, "eval_regularize": 0.018098480999469757, "eval_runtime": 446.968, "eval_samples_per_second": 12.954, "eval_steps_per_second": 3.24, "step": 350 }, { "dpo_loss": 0.6954938173294067, "epoch": 0.3354066608928431, "grad_norm": 80.69779694329567, "learning_rate": 9.861464153319269e-08, "logits": -2.129030466079712, "logps": -93.99476623535156, "loss": 0.0296, "objective": 0.03383675962686539, "ranking_idealized": 0.6499999761581421, "ranking_idealized_expo": 0.5666666626930237, "ranking_simple": 0.550000011920929, "regularize": 0.03383495658636093, "step": 355 }, { "dpo_loss": 0.6874939799308777, "epoch": 0.3401306983702071, "grad_norm": 76.1212296270761, "learning_rate": 9.85165598817449e-08, "logits": -2.1060662269592285, "logps": -94.81258392333984, "loss": 0.0314, "objective": 0.03174449875950813, "ranking_idealized": 0.5166666507720947, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.5166666507720947, "regularize": 0.031743988394737244, "step": 360 }, { "dpo_loss": 0.6876952648162842, "epoch": 0.34485473584757104, "grad_norm": 63.17330963799499, "learning_rate": 9.841517610611307e-08, "logits": -2.223184823989868, "logps": -94.34845733642578, "loss": 0.0271, "objective": 0.03227417171001434, "ranking_idealized": 0.550000011920929, "ranking_idealized_expo": 0.46666666865348816, "ranking_simple": 0.4833333194255829, "regularize": 0.03227332606911659, "step": 365 }, { "dpo_loss": 0.6924201846122742, "epoch": 0.34957877332493503, "grad_norm": 70.25439806775886, "learning_rate": 9.831049710665904e-08, "logits": -2.153981924057007, "logps": -92.24577331542969, "loss": 0.0267, "objective": 0.03014095313847065, "ranking_idealized": 0.550000011920929, "ranking_idealized_expo": 0.46666666865348816, "ranking_simple": 0.44999998807907104, "regularize": 0.030140016227960587, "step": 370 }, { "dpo_loss": 0.6881352066993713, "epoch": 0.35430281080229903, "grad_norm": 67.1301497619948, "learning_rate": 9.820253000802345e-08, "logits": -2.1567375659942627, "logps": -90.48641204833984, "loss": 0.029, "objective": 0.034893397241830826, "ranking_idealized": 0.5333333611488342, "ranking_idealized_expo": 0.46666666865348816, "ranking_simple": 0.4833333194255829, "regularize": 0.034892160445451736, "step": 375 }, { "dpo_loss": 0.6959127187728882, "epoch": 0.359026848279663, "grad_norm": 65.92204147729184, "learning_rate": 9.809128215864095e-08, "logits": -2.1123626232147217, "logps": -90.6343002319336, "loss": 0.0274, "objective": 0.02818784862756729, "ranking_idealized": 0.6166666746139526, "ranking_idealized_expo": 0.5833333134651184, "ranking_simple": 0.5833333134651184, "regularize": 0.028187256306409836, "step": 380 }, { "dpo_loss": 0.6918838620185852, "epoch": 0.363750885757027, "grad_norm": 78.02594257500782, "learning_rate": 9.797676113023989e-08, "logits": -2.1536900997161865, "logps": -91.86727905273438, "loss": 0.0271, "objective": 0.024834012612700462, "ranking_idealized": 0.6166666746139526, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.5166666507720947, "regularize": 0.024833252653479576, "step": 385 }, { "dpo_loss": 0.6891117095947266, "epoch": 0.368474923234391, "grad_norm": 77.62275101385076, "learning_rate": 9.785897471732711e-08, "logits": -2.220367193222046, "logps": -93.69184875488281, "loss": 0.0315, "objective": 0.04405975714325905, "ranking_idealized": 0.6166666746139526, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.5333333611488342, "regularize": 0.04405748099088669, "step": 390 }, { "dpo_loss": 0.6931707859039307, "epoch": 0.37319896071175496, "grad_norm": 70.95977856029823, "learning_rate": 9.773793093665739e-08, "logits": -2.188248872756958, "logps": -90.29833984375, "loss": 0.0304, "objective": 0.0337492860853672, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5, "regularize": 0.03374841436743736, "step": 395 }, { "dpo_loss": 0.6871830224990845, "epoch": 0.37792299818911895, "grad_norm": 71.69739058358107, "learning_rate": 9.76136380266878e-08, "logits": -2.155177354812622, "logps": -92.64527130126953, "loss": 0.0315, "objective": 0.031953115016222, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5, "regularize": 0.03195144981145859, "step": 400 }, { "epoch": 0.37792299818911895, "eval_dpo_loss": 0.6926193833351135, "eval_logits": -1.9940829277038574, "eval_logps": -98.11278533935547, "eval_loss": 0.01653479039669037, "eval_objective": 0.01635783165693283, "eval_ranking_idealized": 0.6022099256515503, "eval_ranking_idealized_expo": 0.5207182168960571, "eval_ranking_simple": 0.5200276374816895, "eval_regularize": 0.01635659858584404, "eval_runtime": 446.4503, "eval_samples_per_second": 12.969, "eval_steps_per_second": 3.243, "step": 400 }, { "dpo_loss": 0.6941009759902954, "epoch": 0.38264703566648295, "grad_norm": 62.224154885036064, "learning_rate": 9.748610444701694e-08, "logits": -2.1617021560668945, "logps": -90.76243591308594, "loss": 0.0293, "objective": 0.02944065071642399, "ranking_idealized": 0.6333333253860474, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.550000011920929, "regularize": 0.02944008819758892, "step": 405 }, { "dpo_loss": 0.6934362649917603, "epoch": 0.38737107314384694, "grad_norm": 71.86989684775978, "learning_rate": 9.735533887780928e-08, "logits": -2.1968331336975098, "logps": -94.97209930419922, "loss": 0.0284, "objective": 0.02905886620283127, "ranking_idealized": 0.46666666865348816, "ranking_idealized_expo": 0.4000000059604645, "ranking_simple": 0.4000000059604645, "regularize": 0.029057972133159637, "step": 410 }, { "dpo_loss": 0.6933155059814453, "epoch": 0.39209511062121094, "grad_norm": 69.39777880403724, "learning_rate": 9.722135021920426e-08, "logits": -2.1606533527374268, "logps": -90.09014129638672, "loss": 0.0288, "objective": 0.025797124952077866, "ranking_idealized": 0.6333333253860474, "ranking_idealized_expo": 0.4833333194255829, "ranking_simple": 0.4833333194255829, "regularize": 0.025796448811888695, "step": 415 }, { "dpo_loss": 0.6879637837409973, "epoch": 0.39681914809857494, "grad_norm": 65.66563021960218, "learning_rate": 9.708414759071057e-08, "logits": -2.192812204360962, "logps": -90.60978698730469, "loss": 0.0293, "objective": 0.028554469347000122, "ranking_idealized": 0.5333333611488342, "ranking_idealized_expo": 0.4333333373069763, "ranking_simple": 0.4333333373069763, "regularize": 0.028553970158100128, "step": 420 }, { "dpo_loss": 0.6905270218849182, "epoch": 0.4015431855759389, "grad_norm": 73.81480159017963, "learning_rate": 9.694374033058549e-08, "logits": -2.1698479652404785, "logps": -92.80254364013672, "loss": 0.03, "objective": 0.032098546624183655, "ranking_idealized": 0.5666666626930237, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.550000011920929, "regularize": 0.0320979543030262, "step": 425 }, { "dpo_loss": 0.6938761472702026, "epoch": 0.4062672230533029, "grad_norm": 76.38130749325458, "learning_rate": 9.680013799519926e-08, "logits": -2.2929608821868896, "logps": -92.20008087158203, "loss": 0.0332, "objective": 0.03666527569293976, "ranking_idealized": 0.5833333134651184, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.5333333611488342, "regularize": 0.036664899438619614, "step": 430 }, { "dpo_loss": 0.6846203207969666, "epoch": 0.41099126053066687, "grad_norm": 62.016252142417606, "learning_rate": 9.665335035838468e-08, "logits": -2.1209442615509033, "logps": -91.99024963378906, "loss": 0.0308, "objective": 0.04143450781702995, "ranking_idealized": 0.7333333492279053, "ranking_idealized_expo": 0.6166666746139526, "ranking_simple": 0.6166666746139526, "regularize": 0.04143408685922623, "step": 435 }, { "dpo_loss": 0.692919909954071, "epoch": 0.41571529800803086, "grad_norm": 83.01421114100998, "learning_rate": 9.650338741077189e-08, "logits": -2.1818275451660156, "logps": -88.64667510986328, "loss": 0.0282, "objective": 0.020624225959181786, "ranking_idealized": 0.5833333134651184, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.5666666626930237, "regularize": 0.02062271721661091, "step": 440 }, { "dpo_loss": 0.6882209777832031, "epoch": 0.42043933548539486, "grad_norm": 69.91926112096503, "learning_rate": 9.635025935910839e-08, "logits": -2.078962564468384, "logps": -94.8196792602539, "loss": 0.03, "objective": 0.029924126341938972, "ranking_idealized": 0.6499999761581421, "ranking_idealized_expo": 0.6000000238418579, "ranking_simple": 0.6000000238418579, "regularize": 0.029923679307103157, "step": 445 }, { "dpo_loss": 0.6905581951141357, "epoch": 0.42516337296275886, "grad_norm": 68.70387033094245, "learning_rate": 9.619397662556434e-08, "logits": -2.1093952655792236, "logps": -90.6502914428711, "loss": 0.0294, "objective": 0.028780171647667885, "ranking_idealized": 0.5833333134651184, "ranking_idealized_expo": 0.44999998807907104, "ranking_simple": 0.46666666865348816, "regularize": 0.028779106214642525, "step": 450 }, { "epoch": 0.42516337296275886, "eval_dpo_loss": 0.6923617124557495, "eval_logits": -1.994999885559082, "eval_logps": -98.37866973876953, "eval_loss": 0.014490882866084576, "eval_objective": 0.014800351113080978, "eval_ranking_idealized": 0.6022099256515503, "eval_ranking_idealized_expo": 0.5207182168960571, "eval_ranking_simple": 0.5186464190483093, "eval_regularize": 0.014799040742218494, "eval_runtime": 444.0476, "eval_samples_per_second": 13.039, "eval_steps_per_second": 3.261, "step": 450 }, { "dpo_loss": 0.6909880042076111, "epoch": 0.42988741044012285, "grad_norm": 68.38544757200873, "learning_rate": 9.60345498470232e-08, "logits": -2.158226728439331, "logps": -90.17542266845703, "loss": 0.0255, "objective": 0.02339431643486023, "ranking_idealized": 0.5833333134651184, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.550000011920929, "regularize": 0.023393774405121803, "step": 455 }, { "dpo_loss": 0.6882848739624023, "epoch": 0.4346114479174868, "grad_norm": 60.565560674058744, "learning_rate": 9.58719898743578e-08, "logits": -2.1875061988830566, "logps": -93.89222717285156, "loss": 0.0289, "objective": 0.03423256427049637, "ranking_idealized": 0.6166666746139526, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.5333333611488342, "regularize": 0.034231893718242645, "step": 460 }, { "dpo_loss": 0.6911517977714539, "epoch": 0.4393354853948508, "grad_norm": 68.08241104925199, "learning_rate": 9.57063077716918e-08, "logits": -2.1419482231140137, "logps": -94.25173950195312, "loss": 0.0304, "objective": 0.02857878990471363, "ranking_idealized": 0.699999988079071, "ranking_idealized_expo": 0.6666666865348816, "ranking_simple": 0.6666666865348816, "regularize": 0.028578022494912148, "step": 465 }, { "dpo_loss": 0.6899906396865845, "epoch": 0.4440595228722148, "grad_norm": 75.2887054128907, "learning_rate": 9.553751481564658e-08, "logits": -2.0578720569610596, "logps": -88.11711120605469, "loss": 0.0318, "objective": 0.02341555431485176, "ranking_idealized": 0.6499999761581421, "ranking_idealized_expo": 0.6166666746139526, "ranking_simple": 0.6166666746139526, "regularize": 0.02341252751648426, "step": 470 }, { "dpo_loss": 0.6940017938613892, "epoch": 0.4487835603495788, "grad_norm": 61.894078510860375, "learning_rate": 9.536562249457386e-08, "logits": -2.1432507038116455, "logps": -91.78999328613281, "loss": 0.0276, "objective": 0.02927049808204174, "ranking_idealized": 0.5666666626930237, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.5333333611488342, "regularize": 0.029269874095916748, "step": 475 }, { "dpo_loss": 0.6929585933685303, "epoch": 0.4535075978269428, "grad_norm": 69.75674817980894, "learning_rate": 9.51906425077736e-08, "logits": -2.1212713718414307, "logps": -91.61197662353516, "loss": 0.0303, "objective": 0.028644824400544167, "ranking_idealized": 0.5333333611488342, "ranking_idealized_expo": 0.4333333373069763, "ranking_simple": 0.44999998807907104, "regularize": 0.028642630204558372, "step": 480 }, { "dpo_loss": 0.6894620656967163, "epoch": 0.4582316353043068, "grad_norm": 68.06555771230744, "learning_rate": 9.501258676469798e-08, "logits": -2.2252414226531982, "logps": -92.49252319335938, "loss": 0.0273, "objective": 0.029991615563631058, "ranking_idealized": 0.6333333253860474, "ranking_idealized_expo": 0.6000000238418579, "ranking_simple": 0.6000000238418579, "regularize": 0.02999037504196167, "step": 485 }, { "dpo_loss": 0.6851038336753845, "epoch": 0.4629556727816707, "grad_norm": 68.81350143691881, "learning_rate": 9.483146738414056e-08, "logits": -2.1528584957122803, "logps": -93.01960754394531, "loss": 0.0298, "objective": 0.03543411195278168, "ranking_idealized": 0.5666666626930237, "ranking_idealized_expo": 0.4833333194255829, "ranking_simple": 0.5166666507720947, "regularize": 0.03543229401111603, "step": 490 }, { "dpo_loss": 0.6880256533622742, "epoch": 0.4676797102590347, "grad_norm": 63.11314302096046, "learning_rate": 9.46472966934116e-08, "logits": -2.1136722564697266, "logps": -90.93292999267578, "loss": 0.0346, "objective": 0.03396356850862503, "ranking_idealized": 0.6499999761581421, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5166666507720947, "regularize": 0.03396301716566086, "step": 495 }, { "dpo_loss": 0.6870742440223694, "epoch": 0.4724037477363987, "grad_norm": 73.04815070979407, "learning_rate": 9.446008722749906e-08, "logits": -2.2441928386688232, "logps": -95.81112670898438, "loss": 0.032, "objective": 0.026291660964488983, "ranking_idealized": 0.6333333253860474, "ranking_idealized_expo": 0.5833333134651184, "ranking_simple": 0.6000000238418579, "regularize": 0.026291247457265854, "step": 500 }, { "epoch": 0.4724037477363987, "eval_dpo_loss": 0.6924601793289185, "eval_logits": -1.9919774532318115, "eval_logps": -98.64570617675781, "eval_loss": 0.013912476599216461, "eval_objective": 0.0139460489153862, "eval_ranking_idealized": 0.6022099256515503, "eval_ranking_idealized_expo": 0.5207182168960571, "eval_ranking_simple": 0.519336998462677, "eval_regularize": 0.013944561593234539, "eval_runtime": 444.8021, "eval_samples_per_second": 13.017, "eval_steps_per_second": 3.255, "step": 500 }, { "dpo_loss": 0.6906213164329529, "epoch": 0.4771277852137627, "grad_norm": 75.29236969807971, "learning_rate": 9.426985172821529e-08, "logits": -2.225041151046753, "logps": -90.76871490478516, "loss": 0.0313, "objective": 0.034964669495821, "ranking_idealized": 0.550000011920929, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5, "regularize": 0.03496433049440384, "step": 505 }, { "dpo_loss": 0.6928242444992065, "epoch": 0.4818518226911267, "grad_norm": 71.72745918834562, "learning_rate": 9.407660314333001e-08, "logits": -2.0290334224700928, "logps": -92.85369110107422, "loss": 0.0322, "objective": 0.039766810834407806, "ranking_idealized": 0.7166666388511658, "ranking_idealized_expo": 0.5666666626930237, "ranking_simple": 0.5833333134651184, "regularize": 0.03976641967892647, "step": 510 }, { "dpo_loss": 0.6923685073852539, "epoch": 0.4865758601684907, "grad_norm": 61.131599755380016, "learning_rate": 9.388035462568891e-08, "logits": -2.147352933883667, "logps": -91.90682220458984, "loss": 0.0331, "objective": 0.035778481513261795, "ranking_idealized": 0.6666666865348816, "ranking_idealized_expo": 0.6000000238418579, "ranking_simple": 0.5666666626930237, "regularize": 0.035777974873781204, "step": 515 }, { "dpo_loss": 0.6962333917617798, "epoch": 0.49129989764585463, "grad_norm": 65.42531713674678, "learning_rate": 9.368111953231848e-08, "logits": -2.1052534580230713, "logps": -92.43571472167969, "loss": 0.0302, "objective": 0.030195049941539764, "ranking_idealized": 0.6499999761581421, "ranking_idealized_expo": 0.6166666746139526, "ranking_simple": 0.6166666746139526, "regularize": 0.030194593593478203, "step": 520 }, { "dpo_loss": 0.6958824396133423, "epoch": 0.49602393512321863, "grad_norm": 68.86518401985903, "learning_rate": 9.347891142351692e-08, "logits": -2.1327033042907715, "logps": -95.017578125, "loss": 0.0343, "objective": 0.033561404794454575, "ranking_idealized": 0.5166666507720947, "ranking_idealized_expo": 0.46666666865348816, "ranking_simple": 0.44999998807907104, "regularize": 0.033560872077941895, "step": 525 }, { "dpo_loss": 0.692583441734314, "epoch": 0.5007479726005827, "grad_norm": 68.03560111972747, "learning_rate": 9.327374406193124e-08, "logits": -2.1641759872436523, "logps": -92.0415267944336, "loss": 0.032, "objective": 0.03345762938261032, "ranking_idealized": 0.6666666865348816, "ranking_idealized_expo": 0.6000000238418579, "ranking_simple": 0.6000000238418579, "regularize": 0.03345634788274765, "step": 530 }, { "dpo_loss": 0.6896530389785767, "epoch": 0.5054720100779466, "grad_norm": 66.90019999237873, "learning_rate": 9.306563141162044e-08, "logits": -2.1231565475463867, "logps": -91.51903533935547, "loss": 0.0298, "objective": 0.03302415460348129, "ranking_idealized": 0.6666666865348816, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.5166666507720947, "regularize": 0.03302332013845444, "step": 535 }, { "dpo_loss": 0.6914986371994019, "epoch": 0.5101960475553106, "grad_norm": 58.738988827079176, "learning_rate": 9.285458763710523e-08, "logits": -2.147346019744873, "logps": -93.07068634033203, "loss": 0.0341, "objective": 0.03528103977441788, "ranking_idealized": 0.5666666626930237, "ranking_idealized_expo": 0.4833333194255829, "ranking_simple": 0.4833333194255829, "regularize": 0.035279832780361176, "step": 540 }, { "dpo_loss": 0.6926673650741577, "epoch": 0.5149200850326746, "grad_norm": 65.83406826541673, "learning_rate": 9.264062710240386e-08, "logits": -2.1894426345825195, "logps": -97.09349822998047, "loss": 0.0276, "objective": 0.029730303213000298, "ranking_idealized": 0.5666666626930237, "ranking_idealized_expo": 0.46666666865348816, "ranking_simple": 0.46666666865348816, "regularize": 0.02972940169274807, "step": 545 }, { "dpo_loss": 0.6896089315414429, "epoch": 0.5196441225100386, "grad_norm": 80.74824222454775, "learning_rate": 9.242376437005448e-08, "logits": -2.1549692153930664, "logps": -93.35411834716797, "loss": 0.0314, "objective": 0.0288882777094841, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.5166666507720947, "regularize": 0.028887782245874405, "step": 550 }, { "epoch": 0.5196441225100386, "eval_dpo_loss": 0.6926965713500977, "eval_logits": -1.9942920207977295, "eval_logps": -98.96892547607422, "eval_loss": 0.013568516820669174, "eval_objective": 0.013540062122046947, "eval_ranking_idealized": 0.6022099256515503, "eval_ranking_idealized_expo": 0.5207182168960571, "eval_ranking_simple": 0.5186464190483093, "eval_regularize": 0.013538442552089691, "eval_runtime": 444.65, "eval_samples_per_second": 13.021, "eval_steps_per_second": 3.256, "step": 550 }, { "dpo_loss": 0.6818323135375977, "epoch": 0.5243681599874026, "grad_norm": 71.45675055284568, "learning_rate": 9.22040142001241e-08, "logits": -2.1764817237854004, "logps": -92.4581298828125, "loss": 0.0336, "objective": 0.04190651327371597, "ranking_idealized": 0.6166666746139526, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.550000011920929, "regularize": 0.04190612956881523, "step": 555 }, { "dpo_loss": 0.6884815692901611, "epoch": 0.5290921974647665, "grad_norm": 72.10264430141908, "learning_rate": 9.198139154920388e-08, "logits": -2.2008354663848877, "logps": -90.6949234008789, "loss": 0.0344, "objective": 0.034483686089515686, "ranking_idealized": 0.5333333611488342, "ranking_idealized_expo": 0.46666666865348816, "ranking_simple": 0.46666666865348816, "regularize": 0.03448285162448883, "step": 560 }, { "dpo_loss": 0.6909436583518982, "epoch": 0.5338162349421306, "grad_norm": 60.56883204825771, "learning_rate": 9.175591156939118e-08, "logits": -2.1834826469421387, "logps": -94.38992309570312, "loss": 0.03, "objective": 0.02786482684314251, "ranking_idealized": 0.6333333253860474, "ranking_idealized_expo": 0.5666666626930237, "ranking_simple": 0.5666666626930237, "regularize": 0.02786322310566902, "step": 565 }, { "dpo_loss": 0.6896558403968811, "epoch": 0.5385402724194945, "grad_norm": 63.27876500292638, "learning_rate": 9.152758960725829e-08, "logits": -2.0850472450256348, "logps": -90.94063568115234, "loss": 0.0305, "objective": 0.03306278958916664, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.5166666507720947, "regularize": 0.03306075185537338, "step": 570 }, { "dpo_loss": 0.6910984516143799, "epoch": 0.5432643098968585, "grad_norm": 64.97441175201224, "learning_rate": 9.129644120280797e-08, "logits": -2.215700387954712, "logps": -93.2086181640625, "loss": 0.0363, "objective": 0.03730526939034462, "ranking_idealized": 0.6166666746139526, "ranking_idealized_expo": 0.46666666865348816, "ranking_simple": 0.46666666865348816, "regularize": 0.03730436787009239, "step": 575 }, { "dpo_loss": 0.6901772618293762, "epoch": 0.5479883473742225, "grad_norm": 60.84920518500226, "learning_rate": 9.106248208841567e-08, "logits": -2.077465534210205, "logps": -89.92863464355469, "loss": 0.0301, "objective": 0.029211556538939476, "ranking_idealized": 0.5166666507720947, "ranking_idealized_expo": 0.46666666865348816, "ranking_simple": 0.46666666865348816, "regularize": 0.02920910157263279, "step": 580 }, { "dpo_loss": 0.6912521123886108, "epoch": 0.5527123848515865, "grad_norm": 61.38401518242899, "learning_rate": 9.082572818775884e-08, "logits": -2.0964841842651367, "logps": -96.6317138671875, "loss": 0.0311, "objective": 0.0291235763579607, "ranking_idealized": 0.550000011920929, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.5166666507720947, "regularize": 0.029122162610292435, "step": 585 }, { "dpo_loss": 0.6879535913467407, "epoch": 0.5574364223289505, "grad_norm": 70.97177434877489, "learning_rate": 9.058619561473306e-08, "logits": -2.1359400749206543, "logps": -91.66080474853516, "loss": 0.0309, "objective": 0.0273025743663311, "ranking_idealized": 0.550000011920929, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.5166666507720947, "regularize": 0.02730202116072178, "step": 590 }, { "dpo_loss": 0.69169682264328, "epoch": 0.5621604598063145, "grad_norm": 66.67475961321956, "learning_rate": 9.034390067235538e-08, "logits": -2.122257947921753, "logps": -93.28813934326172, "loss": 0.0292, "objective": 0.03189357370138168, "ranking_idealized": 0.5833333134651184, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5, "regularize": 0.03189240023493767, "step": 595 }, { "dpo_loss": 0.692051351070404, "epoch": 0.5668844972836784, "grad_norm": 66.82237885750858, "learning_rate": 9.009885985165465e-08, "logits": -2.1968979835510254, "logps": -91.0505599975586, "loss": 0.0311, "objective": 0.027313487604260445, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.5166666507720947, "regularize": 0.027312906458973885, "step": 600 }, { "epoch": 0.5668844972836784, "eval_dpo_loss": 0.6925215721130371, "eval_logits": -1.9967907667160034, "eval_logps": -98.12234497070312, "eval_loss": 0.014187943190336227, "eval_objective": 0.014395096339285374, "eval_ranking_idealized": 0.6022099256515503, "eval_ranking_idealized_expo": 0.5207182168960571, "eval_ranking_simple": 0.5186464190483093, "eval_regularize": 0.014393283054232597, "eval_runtime": 445.7504, "eval_samples_per_second": 12.989, "eval_steps_per_second": 3.248, "step": 600 }, { "dpo_loss": 0.6929703950881958, "epoch": 0.5716085347610425, "grad_norm": 64.74275314805836, "learning_rate": 8.985108983054912e-08, "logits": -2.0810482501983643, "logps": -92.1054916381836, "loss": 0.0343, "objective": 0.03585705906152725, "ranking_idealized": 0.5, "ranking_idealized_expo": 0.4166666567325592, "ranking_simple": 0.4166666567325592, "regularize": 0.03585506230592728, "step": 605 }, { "dpo_loss": 0.6893709897994995, "epoch": 0.5763325722384064, "grad_norm": 63.69006739310455, "learning_rate": 8.960060747271137e-08, "logits": -2.1485848426818848, "logps": -92.45893096923828, "loss": 0.0292, "objective": 0.03595684841275215, "ranking_idealized": 0.5666666626930237, "ranking_idealized_expo": 0.4833333194255829, "ranking_simple": 0.4833333194255829, "regularize": 0.03595583513379097, "step": 610 }, { "dpo_loss": 0.6894236207008362, "epoch": 0.5810566097157704, "grad_norm": 63.68171123239859, "learning_rate": 8.934742982642041e-08, "logits": -2.2213053703308105, "logps": -92.00927734375, "loss": 0.0289, "objective": 0.030653396621346474, "ranking_idealized": 0.5833333134651184, "ranking_idealized_expo": 0.4333333373069763, "ranking_simple": 0.44999998807907104, "regularize": 0.030652187764644623, "step": 615 }, { "dpo_loss": 0.6907939314842224, "epoch": 0.5857806471931344, "grad_norm": 64.38726305381529, "learning_rate": 8.90915741234015e-08, "logits": -2.22101092338562, "logps": -93.67798614501953, "loss": 0.0293, "objective": 0.028055744245648384, "ranking_idealized": 0.6499999761581421, "ranking_idealized_expo": 0.5666666626930237, "ranking_simple": 0.5666666626930237, "regularize": 0.028055019676685333, "step": 620 }, { "dpo_loss": 0.6883565783500671, "epoch": 0.5905046846704984, "grad_norm": 69.11436679749976, "learning_rate": 8.883305777765317e-08, "logits": -2.095867395401001, "logps": -95.01261138916016, "loss": 0.0311, "objective": 0.033847175538539886, "ranking_idealized": 0.5166666507720947, "ranking_idealized_expo": 0.4333333373069763, "ranking_simple": 0.4333333373069763, "regularize": 0.03384659066796303, "step": 625 }, { "dpo_loss": 0.6883829236030579, "epoch": 0.5952287221478624, "grad_norm": 74.04617527963917, "learning_rate": 8.857189838426216e-08, "logits": -2.183093547821045, "logps": -92.19212341308594, "loss": 0.0332, "objective": 0.03131110966205597, "ranking_idealized": 0.6333333253860474, "ranking_idealized_expo": 0.5666666626930237, "ranking_simple": 0.5666666626930237, "regularize": 0.03130975365638733, "step": 630 }, { "dpo_loss": 0.6901324987411499, "epoch": 0.5999527596252263, "grad_norm": 63.254018805089515, "learning_rate": 8.83081137182057e-08, "logits": -2.137653112411499, "logps": -92.25005340576172, "loss": 0.028, "objective": 0.02667616680264473, "ranking_idealized": 0.6166666746139526, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.550000011920929, "regularize": 0.026675010100007057, "step": 635 }, { "dpo_loss": 0.6854274868965149, "epoch": 0.6046767971025904, "grad_norm": 67.682861674831, "learning_rate": 8.804172173314183e-08, "logits": -2.1525957584381104, "logps": -96.51889038085938, "loss": 0.0305, "objective": 0.026471592485904694, "ranking_idealized": 0.6166666746139526, "ranking_idealized_expo": 0.4833333194255829, "ranking_simple": 0.46666666865348816, "regularize": 0.026470640674233437, "step": 640 }, { "dpo_loss": 0.6916467547416687, "epoch": 0.6094008345799543, "grad_norm": 74.11851413570284, "learning_rate": 8.777274056018745e-08, "logits": -2.0791733264923096, "logps": -90.76611328125, "loss": 0.0275, "objective": 0.025933992117643356, "ranking_idealized": 0.5333333611488342, "ranking_idealized_expo": 0.4333333373069763, "ranking_simple": 0.4333333373069763, "regularize": 0.025932662189006805, "step": 645 }, { "dpo_loss": 0.6861349940299988, "epoch": 0.6141248720573184, "grad_norm": 71.7083018593228, "learning_rate": 8.750118850668412e-08, "logits": -2.0774688720703125, "logps": -91.57192993164062, "loss": 0.0333, "objective": 0.03247459605336189, "ranking_idealized": 0.5833333134651184, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.5, "regularize": 0.03247232735157013, "step": 650 }, { "epoch": 0.6141248720573184, "eval_dpo_loss": 0.6926119327545166, "eval_logits": -1.993467926979065, "eval_logps": -98.69168853759766, "eval_loss": 0.014501783065497875, "eval_objective": 0.014641453512012959, "eval_ranking_idealized": 0.6022099256515503, "eval_ranking_idealized_expo": 0.5207182168960571, "eval_ranking_simple": 0.5179557800292969, "eval_regularize": 0.014639632776379585, "eval_runtime": 445.8707, "eval_samples_per_second": 12.986, "eval_steps_per_second": 3.248, "step": 650 }, { "dpo_loss": 0.6902641654014587, "epoch": 0.6188489095346823, "grad_norm": 58.31607902985929, "learning_rate": 8.722708405495222e-08, "logits": -2.2487266063690186, "logps": -89.10828399658203, "loss": 0.0289, "objective": 0.02854420617222786, "ranking_idealized": 0.5666666626930237, "ranking_idealized_expo": 0.44999998807907104, "ranking_simple": 0.4333333373069763, "regularize": 0.028543464839458466, "step": 655 }, { "dpo_loss": 0.6865178942680359, "epoch": 0.6235729470120462, "grad_norm": 65.43564652911088, "learning_rate": 8.695044586103295e-08, "logits": -2.105522394180298, "logps": -94.11585998535156, "loss": 0.0288, "objective": 0.027172502130270004, "ranking_idealized": 0.4333333373069763, "ranking_idealized_expo": 0.4000000059604645, "ranking_simple": 0.4000000059604645, "regularize": 0.027170367538928986, "step": 660 }, { "dpo_loss": 0.6929230093955994, "epoch": 0.6282969844894103, "grad_norm": 75.08767896831156, "learning_rate": 8.667129275341853e-08, "logits": -2.261946201324463, "logps": -90.70641326904297, "loss": 0.036, "objective": 0.03217438980937004, "ranking_idealized": 0.6166666746139526, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.550000011920929, "regularize": 0.03217388316988945, "step": 665 }, { "dpo_loss": 0.6909863948822021, "epoch": 0.6330210219667742, "grad_norm": 61.63435170443301, "learning_rate": 8.638964373177073e-08, "logits": -2.0806498527526855, "logps": -93.46875762939453, "loss": 0.0283, "objective": 0.03122856095433235, "ranking_idealized": 0.5833333134651184, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.550000011920929, "regularize": 0.03122722916305065, "step": 670 }, { "dpo_loss": 0.6940844058990479, "epoch": 0.6377450594441383, "grad_norm": 65.12038155568429, "learning_rate": 8.610551796562768e-08, "logits": -2.2103471755981445, "logps": -92.73240661621094, "loss": 0.0319, "objective": 0.03719858080148697, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5, "regularize": 0.03719812259078026, "step": 675 }, { "dpo_loss": 0.6896507740020752, "epoch": 0.6424690969215022, "grad_norm": 65.07489196501477, "learning_rate": 8.581893479309924e-08, "logits": -2.2053842544555664, "logps": -93.24919128417969, "loss": 0.0265, "objective": 0.023518383502960205, "ranking_idealized": 0.5666666626930237, "ranking_idealized_expo": 0.4833333194255829, "ranking_simple": 0.4833333194255829, "regularize": 0.023516135290265083, "step": 680 }, { "dpo_loss": 0.6927950382232666, "epoch": 0.6471931343988663, "grad_norm": 70.32780445036671, "learning_rate": 8.552991371955072e-08, "logits": -2.296104907989502, "logps": -92.59764099121094, "loss": 0.0318, "objective": 0.034257274121046066, "ranking_idealized": 0.6833333373069763, "ranking_idealized_expo": 0.6166666746139526, "ranking_simple": 0.6166666746139526, "regularize": 0.03425610437989235, "step": 685 }, { "dpo_loss": 0.692520797252655, "epoch": 0.6519171718762302, "grad_norm": 70.14652096802335, "learning_rate": 8.523847441627536e-08, "logits": -2.193286657333374, "logps": -94.4785385131836, "loss": 0.0326, "objective": 0.03835910186171532, "ranking_idealized": 0.5333333611488342, "ranking_idealized_expo": 0.4166666567325592, "ranking_simple": 0.4000000059604645, "regularize": 0.03835882246494293, "step": 690 }, { "dpo_loss": 0.6892996430397034, "epoch": 0.6566412093535942, "grad_norm": 73.28164754238634, "learning_rate": 8.494463671915546e-08, "logits": -2.1629860401153564, "logps": -93.7652359008789, "loss": 0.0265, "objective": 0.026581525802612305, "ranking_idealized": 0.5666666626930237, "ranking_idealized_expo": 0.4166666567325592, "ranking_simple": 0.4000000059604645, "regularize": 0.026580767706036568, "step": 695 }, { "dpo_loss": 0.6907955408096313, "epoch": 0.6613652468309582, "grad_norm": 66.01685046319234, "learning_rate": 8.464842062731234e-08, "logits": -2.2634246349334717, "logps": -91.12454986572266, "loss": 0.028, "objective": 0.028724508360028267, "ranking_idealized": 0.6166666746139526, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.5166666507720947, "regularize": 0.028722405433654785, "step": 700 }, { "epoch": 0.6613652468309582, "eval_dpo_loss": 0.6929543018341064, "eval_logits": -1.9953092336654663, "eval_logps": -98.67767333984375, "eval_loss": 0.013822407461702824, "eval_objective": 0.014025083743035793, "eval_ranking_idealized": 0.6022099256515503, "eval_ranking_idealized_expo": 0.5207182168960571, "eval_ranking_simple": 0.519336998462677, "eval_regularize": 0.01402334589511156, "eval_runtime": 446.1644, "eval_samples_per_second": 12.977, "eval_steps_per_second": 3.245, "step": 700 }, { "dpo_loss": 0.6904506683349609, "epoch": 0.6660892843083221, "grad_norm": 64.45584344371969, "learning_rate": 8.434984630174508e-08, "logits": -2.223440408706665, "logps": -94.05587005615234, "loss": 0.0302, "objective": 0.029637468978762627, "ranking_idealized": 0.6833333373069763, "ranking_idealized_expo": 0.5666666626930237, "ranking_simple": 0.5666666626930237, "regularize": 0.029636209830641747, "step": 705 }, { "dpo_loss": 0.6879330277442932, "epoch": 0.6708133217856862, "grad_norm": 67.77900053910153, "learning_rate": 8.404893406395842e-08, "logits": -2.1772301197052, "logps": -93.94538879394531, "loss": 0.0323, "objective": 0.030688025057315826, "ranking_idealized": 0.5166666507720947, "ranking_idealized_expo": 0.4166666567325592, "ranking_simple": 0.4166666567325592, "regularize": 0.030687240883708, "step": 710 }, { "dpo_loss": 0.6863754987716675, "epoch": 0.6755373592630501, "grad_norm": 78.62750388933773, "learning_rate": 8.37457043945796e-08, "logits": -2.1862614154815674, "logps": -88.71346282958984, "loss": 0.0319, "objective": 0.034725725650787354, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.4833333194255829, "ranking_simple": 0.4833333194255829, "regularize": 0.03472534194588661, "step": 715 }, { "dpo_loss": 0.6946022510528564, "epoch": 0.6802613967404142, "grad_norm": 63.01104309220956, "learning_rate": 8.344017793196442e-08, "logits": -2.1920392513275146, "logps": -90.14446258544922, "loss": 0.0265, "objective": 0.025683369487524033, "ranking_idealized": 0.6499999761581421, "ranking_idealized_expo": 0.5666666626930237, "ranking_simple": 0.5666666626930237, "regularize": 0.025682510808110237, "step": 720 }, { "dpo_loss": 0.6887207627296448, "epoch": 0.6849854342177781, "grad_norm": 75.04026894879733, "learning_rate": 8.313237547079252e-08, "logits": -2.10304594039917, "logps": -90.62553405761719, "loss": 0.0292, "objective": 0.029727067798376083, "ranking_idealized": 0.6833333373069763, "ranking_idealized_expo": 0.6166666746139526, "ranking_simple": 0.6166666746139526, "regularize": 0.0297266636043787, "step": 725 }, { "dpo_loss": 0.6926788687705994, "epoch": 0.6897094716951421, "grad_norm": 67.22966096550593, "learning_rate": 8.282231796065213e-08, "logits": -2.1637871265411377, "logps": -91.91923522949219, "loss": 0.0265, "objective": 0.02672416716814041, "ranking_idealized": 0.6333333253860474, "ranking_idealized_expo": 0.5833333134651184, "ranking_simple": 0.5833333134651184, "regularize": 0.026723742485046387, "step": 730 }, { "dpo_loss": 0.688262403011322, "epoch": 0.6944335091725061, "grad_norm": 64.56167756628024, "learning_rate": 8.251002650461411e-08, "logits": -2.1801397800445557, "logps": -93.63780212402344, "loss": 0.0294, "objective": 0.029570966958999634, "ranking_idealized": 0.5666666626930237, "ranking_idealized_expo": 0.4833333194255829, "ranking_simple": 0.4833333194255829, "regularize": 0.029570437967777252, "step": 735 }, { "dpo_loss": 0.6889380216598511, "epoch": 0.6991575466498701, "grad_norm": 76.20522666744934, "learning_rate": 8.219552235779577e-08, "logits": -2.1762733459472656, "logps": -93.22509765625, "loss": 0.0341, "objective": 0.03592396527528763, "ranking_idealized": 0.6333333253860474, "ranking_idealized_expo": 0.5666666626930237, "ranking_simple": 0.5833333134651184, "regularize": 0.035923395305871964, "step": 740 }, { "dpo_loss": 0.6904739141464233, "epoch": 0.7038815841272341, "grad_norm": 61.170974083082186, "learning_rate": 8.187882692591406e-08, "logits": -2.148138999938965, "logps": -91.92343139648438, "loss": 0.0298, "objective": 0.027687864378094673, "ranking_idealized": 0.5, "ranking_idealized_expo": 0.4333333373069763, "ranking_simple": 0.4166666567325592, "regularize": 0.027687139809131622, "step": 745 }, { "dpo_loss": 0.6924771666526794, "epoch": 0.7086056216045981, "grad_norm": 86.99376248944333, "learning_rate": 8.155996176382873e-08, "logits": -2.2314558029174805, "logps": -92.25162506103516, "loss": 0.0319, "objective": 0.033008575439453125, "ranking_idealized": 0.550000011920929, "ranking_idealized_expo": 0.46666666865348816, "ranking_simple": 0.4833333194255829, "regularize": 0.03300632908940315, "step": 750 }, { "epoch": 0.7086056216045981, "eval_dpo_loss": 0.6925805807113647, "eval_logits": -1.9951562881469727, "eval_logps": -98.77120208740234, "eval_loss": 0.014676159247756004, "eval_objective": 0.014523538760840893, "eval_ranking_idealized": 0.6022099256515503, "eval_ranking_idealized_expo": 0.5207182168960571, "eval_ranking_simple": 0.5179557800292969, "eval_regularize": 0.014521739445626736, "eval_runtime": 446.8328, "eval_samples_per_second": 12.958, "eval_steps_per_second": 3.241, "step": 750 }, { "dpo_loss": 0.6896133422851562, "epoch": 0.713329659081962, "grad_norm": 65.62950261910362, "learning_rate": 8.123894857407532e-08, "logits": -2.175105571746826, "logps": -92.83119201660156, "loss": 0.0297, "objective": 0.02945883385837078, "ranking_idealized": 0.5666666626930237, "ranking_idealized_expo": 0.4833333194255829, "ranking_simple": 0.4833333194255829, "regularize": 0.029456783086061478, "step": 755 }, { "dpo_loss": 0.6876399517059326, "epoch": 0.718053696559326, "grad_norm": 80.69728652803693, "learning_rate": 8.091580920538789e-08, "logits": -2.2073442935943604, "logps": -90.69680786132812, "loss": 0.0284, "objective": 0.029233213514089584, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.4333333373069763, "ranking_simple": 0.4333333373069763, "regularize": 0.029232406988739967, "step": 760 }, { "dpo_loss": 0.6934041380882263, "epoch": 0.72277773403669, "grad_norm": 59.09750353750027, "learning_rate": 8.059056565121216e-08, "logits": -2.2103536128997803, "logps": -91.05927276611328, "loss": 0.0275, "objective": 0.026471644639968872, "ranking_idealized": 0.5666666626930237, "ranking_idealized_expo": 0.38333332538604736, "ranking_simple": 0.38333332538604736, "regularize": 0.02647002600133419, "step": 765 }, { "dpo_loss": 0.6914010643959045, "epoch": 0.727501771514054, "grad_norm": 80.2131293813357, "learning_rate": 8.026324004820844e-08, "logits": -2.1993424892425537, "logps": -91.33180236816406, "loss": 0.0329, "objective": 0.03441242873668671, "ranking_idealized": 0.5333333611488342, "ranking_idealized_expo": 0.44999998807907104, "ranking_simple": 0.44999998807907104, "regularize": 0.03441200777888298, "step": 770 }, { "dpo_loss": 0.693133533000946, "epoch": 0.732225808991418, "grad_norm": 65.96085059361222, "learning_rate": 7.993385467474502e-08, "logits": -2.2453505992889404, "logps": -94.63382720947266, "loss": 0.0376, "objective": 0.032590463757514954, "ranking_idealized": 0.5333333611488342, "ranking_idealized_expo": 0.46666666865348816, "ranking_simple": 0.4833333194255829, "regularize": 0.032589759677648544, "step": 775 }, { "dpo_loss": 0.6881453394889832, "epoch": 0.736949846468782, "grad_norm": 66.36692143180971, "learning_rate": 7.960243194938191e-08, "logits": -2.1516549587249756, "logps": -94.29581451416016, "loss": 0.0322, "objective": 0.030854353681206703, "ranking_idealized": 0.5333333611488342, "ranking_idealized_expo": 0.44999998807907104, "ranking_simple": 0.44999998807907104, "regularize": 0.030853325501084328, "step": 780 }, { "dpo_loss": 0.6893908381462097, "epoch": 0.741673883946146, "grad_norm": 73.24038753014604, "learning_rate": 7.926899442934488e-08, "logits": -2.1456098556518555, "logps": -93.58820343017578, "loss": 0.0301, "objective": 0.030529705807566643, "ranking_idealized": 0.6666666865348816, "ranking_idealized_expo": 0.5833333134651184, "ranking_simple": 0.5833333134651184, "regularize": 0.030527813360095024, "step": 785 }, { "dpo_loss": 0.6873824000358582, "epoch": 0.7463979214235099, "grad_norm": 61.386663619909456, "learning_rate": 7.893356480899029e-08, "logits": -2.202815055847168, "logps": -89.73310089111328, "loss": 0.0284, "objective": 0.026303457096219063, "ranking_idealized": 0.7166666388511658, "ranking_idealized_expo": 0.6333333253860474, "ranking_simple": 0.6333333253860474, "regularize": 0.02630232647061348, "step": 790 }, { "dpo_loss": 0.6941797733306885, "epoch": 0.751121958900874, "grad_norm": 74.12319085244714, "learning_rate": 7.85961659182604e-08, "logits": -2.2534494400024414, "logps": -92.2616195678711, "loss": 0.0317, "objective": 0.028791796416044235, "ranking_idealized": 0.6499999761581421, "ranking_idealized_expo": 0.6166666746139526, "ranking_simple": 0.6166666746139526, "regularize": 0.028791317716240883, "step": 795 }, { "dpo_loss": 0.6879761815071106, "epoch": 0.7558459963782379, "grad_norm": 66.76187019896828, "learning_rate": 7.825682072112959e-08, "logits": -2.152491807937622, "logps": -90.43921661376953, "loss": 0.0297, "objective": 0.031770989298820496, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.5666666626930237, "ranking_simple": 0.5666666626930237, "regularize": 0.031770527362823486, "step": 800 }, { "epoch": 0.7558459963782379, "eval_dpo_loss": 0.6929048299789429, "eval_logits": -1.9949605464935303, "eval_logps": -98.13481903076172, "eval_loss": 0.015697013586759567, "eval_objective": 0.016285618767142296, "eval_ranking_idealized": 0.6022099256515503, "eval_ranking_idealized_expo": 0.5207182168960571, "eval_ranking_simple": 0.5186464190483093, "eval_regularize": 0.01628427766263485, "eval_runtime": 446.756, "eval_samples_per_second": 12.96, "eval_steps_per_second": 3.241, "step": 800 }, { "dpo_loss": 0.6914661526679993, "epoch": 0.760570033855602, "grad_norm": 61.85563229584601, "learning_rate": 7.79155523140413e-08, "logits": -2.1741960048675537, "logps": -94.34522247314453, "loss": 0.0341, "objective": 0.03409460559487343, "ranking_idealized": 0.46666666865348816, "ranking_idealized_expo": 0.38333332538604736, "ranking_simple": 0.38333332538604736, "regularize": 0.03409397229552269, "step": 805 }, { "dpo_loss": 0.6910237669944763, "epoch": 0.7652940713329659, "grad_norm": 66.02922525617878, "learning_rate": 7.757238392433613e-08, "logits": -2.218034267425537, "logps": -91.0445327758789, "loss": 0.0309, "objective": 0.02644220180809498, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.550000011920929, "regularize": 0.026441721245646477, "step": 810 }, { "dpo_loss": 0.6903732419013977, "epoch": 0.77001810881033, "grad_norm": 71.25069648841824, "learning_rate": 7.722733890867088e-08, "logits": -2.13299298286438, "logps": -94.09717559814453, "loss": 0.0306, "objective": 0.027522355318069458, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.5166666507720947, "regularize": 0.027521483600139618, "step": 815 }, { "dpo_loss": 0.692348062992096, "epoch": 0.7747421462876939, "grad_norm": 62.71232207694529, "learning_rate": 7.688044075142886e-08, "logits": -2.2638330459594727, "logps": -89.2739486694336, "loss": 0.0265, "objective": 0.024408848956227303, "ranking_idealized": 0.5666666626930237, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5, "regularize": 0.024408036842942238, "step": 820 }, { "dpo_loss": 0.6927106976509094, "epoch": 0.7794661837650578, "grad_norm": 68.11094417791882, "learning_rate": 7.653171306312161e-08, "logits": -2.155310869216919, "logps": -92.2811508178711, "loss": 0.0314, "objective": 0.029093213379383087, "ranking_idealized": 0.5333333611488342, "ranking_idealized_expo": 0.4833333194255829, "ranking_simple": 0.4833333194255829, "regularize": 0.029092345386743546, "step": 825 }, { "dpo_loss": 0.6911001801490784, "epoch": 0.7841902212424219, "grad_norm": 66.75059983982347, "learning_rate": 7.618117957878178e-08, "logits": -2.236713409423828, "logps": -93.50963592529297, "loss": 0.0363, "objective": 0.033676620572805405, "ranking_idealized": 0.5833333134651184, "ranking_idealized_expo": 0.46666666865348816, "ranking_simple": 0.4833333194255829, "regularize": 0.03367554768919945, "step": 830 }, { "dpo_loss": 0.6890848278999329, "epoch": 0.7889142587197858, "grad_norm": 59.842310291910785, "learning_rate": 7.582886415634773e-08, "logits": -2.1434099674224854, "logps": -89.62670135498047, "loss": 0.0261, "objective": 0.032363876700401306, "ranking_idealized": 0.6333333253860474, "ranking_idealized_expo": 0.5833333134651184, "ranking_simple": 0.5833333134651184, "regularize": 0.032363247126340866, "step": 835 }, { "dpo_loss": 0.6889583468437195, "epoch": 0.7936382961971499, "grad_norm": 60.528801427412084, "learning_rate": 7.547479077503975e-08, "logits": -2.0931692123413086, "logps": -90.27711486816406, "loss": 0.0286, "objective": 0.032331857830286026, "ranking_idealized": 0.5833333134651184, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.550000011920929, "regularize": 0.03233127295970917, "step": 840 }, { "dpo_loss": 0.6943262815475464, "epoch": 0.7983623336745138, "grad_norm": 66.0119748602127, "learning_rate": 7.511898353372797e-08, "logits": -2.21136212348938, "logps": -91.64664459228516, "loss": 0.0279, "objective": 0.03553476184606552, "ranking_idealized": 0.6333333253860474, "ranking_idealized_expo": 0.5666666626930237, "ranking_simple": 0.5666666626930237, "regularize": 0.03553430363535881, "step": 845 }, { "dpo_loss": 0.6893811821937561, "epoch": 0.8030863711518778, "grad_norm": 64.38574308982342, "learning_rate": 7.476146664929213e-08, "logits": -2.2435154914855957, "logps": -92.36274719238281, "loss": 0.0286, "objective": 0.0326698012650013, "ranking_idealized": 0.550000011920929, "ranking_idealized_expo": 0.44999998807907104, "ranking_simple": 0.46666666865348816, "regularize": 0.03266819566488266, "step": 850 }, { "epoch": 0.8030863711518778, "eval_dpo_loss": 0.6928204894065857, "eval_logits": -1.9953876733779907, "eval_logps": -98.59400939941406, "eval_loss": 0.012405806221067905, "eval_objective": 0.012502364814281464, "eval_ranking_idealized": 0.6022099256515503, "eval_ranking_idealized_expo": 0.5207182168960571, "eval_ranking_simple": 0.5172652006149292, "eval_regularize": 0.012500518001616001, "eval_runtime": 453.0739, "eval_samples_per_second": 12.779, "eval_steps_per_second": 3.196, "step": 850 }, { "dpo_loss": 0.6887614130973816, "epoch": 0.8078104086292418, "grad_norm": 68.55505260723746, "learning_rate": 7.440226445497333e-08, "logits": -2.201233386993408, "logps": -92.58832550048828, "loss": 0.0274, "objective": 0.026876337826251984, "ranking_idealized": 0.6166666746139526, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.5333333611488342, "regularize": 0.0268756952136755, "step": 855 }, { "dpo_loss": 0.6904102563858032, "epoch": 0.8125344461066057, "grad_norm": 71.78389198214526, "learning_rate": 7.404140139871796e-08, "logits": -2.231065273284912, "logps": -95.48096466064453, "loss": 0.0317, "objective": 0.03271006420254707, "ranking_idealized": 0.6833333373069763, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.550000011920929, "regularize": 0.032709524035453796, "step": 860 }, { "dpo_loss": 0.6954269409179688, "epoch": 0.8172584835839698, "grad_norm": 66.09070940831865, "learning_rate": 7.36789020415136e-08, "logits": -2.1372532844543457, "logps": -91.0411605834961, "loss": 0.027, "objective": 0.03049122728407383, "ranking_idealized": 0.7166666388511658, "ranking_idealized_expo": 0.6000000238418579, "ranking_simple": 0.6000000238418579, "regularize": 0.03049064427614212, "step": 865 }, { "dpo_loss": 0.6953790187835693, "epoch": 0.8219825210613337, "grad_norm": 61.58311436161328, "learning_rate": 7.331479105571739e-08, "logits": -2.1517558097839355, "logps": -88.9326171875, "loss": 0.0281, "objective": 0.02535523846745491, "ranking_idealized": 0.5, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5, "regularize": 0.025354215875267982, "step": 870 }, { "dpo_loss": 0.6913577318191528, "epoch": 0.8267065585386978, "grad_norm": 61.45457019245544, "learning_rate": 7.294909322337688e-08, "logits": -2.0830719470977783, "logps": -95.3221664428711, "loss": 0.0267, "objective": 0.02792440913617611, "ranking_idealized": 0.6499999761581421, "ranking_idealized_expo": 0.6000000238418579, "ranking_simple": 0.5833333134651184, "regularize": 0.0279233455657959, "step": 875 }, { "dpo_loss": 0.6916779279708862, "epoch": 0.8314305960160617, "grad_norm": 69.76363049930384, "learning_rate": 7.258183343454319e-08, "logits": -2.276218891143799, "logps": -91.77556610107422, "loss": 0.029, "objective": 0.02677006646990776, "ranking_idealized": 0.699999988079071, "ranking_idealized_expo": 0.5666666626930237, "ranking_simple": 0.5666666626930237, "regularize": 0.02676946483552456, "step": 880 }, { "dpo_loss": 0.6923359036445618, "epoch": 0.8361546334934257, "grad_norm": 65.0950944876437, "learning_rate": 7.221303668557696e-08, "logits": -2.1599981784820557, "logps": -90.72624969482422, "loss": 0.025, "objective": 0.026336384937167168, "ranking_idealized": 0.5166666507720947, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5, "regularize": 0.026335686445236206, "step": 885 }, { "dpo_loss": 0.692168116569519, "epoch": 0.8408786709707897, "grad_norm": 63.609027029284135, "learning_rate": 7.184272807744725e-08, "logits": -2.1683857440948486, "logps": -92.93081665039062, "loss": 0.0278, "objective": 0.03211880847811699, "ranking_idealized": 0.5666666626930237, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.5166666507720947, "regularize": 0.03211786970496178, "step": 890 }, { "dpo_loss": 0.6949544548988342, "epoch": 0.8456027084481537, "grad_norm": 71.19347753450685, "learning_rate": 7.147093281402281e-08, "logits": -2.2566373348236084, "logps": -91.25569915771484, "loss": 0.0287, "objective": 0.022946473211050034, "ranking_idealized": 0.5833333134651184, "ranking_idealized_expo": 0.4833333194255829, "ranking_simple": 0.4833333194255829, "regularize": 0.022945420816540718, "step": 895 }, { "dpo_loss": 0.6891117095947266, "epoch": 0.8503267459255177, "grad_norm": 73.13390434400947, "learning_rate": 7.109767620035688e-08, "logits": -2.1637258529663086, "logps": -95.48709869384766, "loss": 0.0285, "objective": 0.030570391565561295, "ranking_idealized": 0.7166666388511658, "ranking_idealized_expo": 0.6499999761581421, "ranking_simple": 0.6499999761581421, "regularize": 0.030569853261113167, "step": 900 }, { "epoch": 0.8503267459255177, "eval_dpo_loss": 0.69291752576828, "eval_logits": -1.993115782737732, "eval_logps": -98.94220733642578, "eval_loss": 0.011713932268321514, "eval_objective": 0.011828156188130379, "eval_ranking_idealized": 0.6022099256515503, "eval_ranking_idealized_expo": 0.5207182168960571, "eval_ranking_simple": 0.5165745615959167, "eval_regularize": 0.01182604394853115, "eval_runtime": 445.945, "eval_samples_per_second": 12.984, "eval_steps_per_second": 3.247, "step": 900 }, { "dpo_loss": 0.6910974383354187, "epoch": 0.8550507834028817, "grad_norm": 72.76821991685014, "learning_rate": 7.072298364096485e-08, "logits": -2.094447374343872, "logps": -89.45642852783203, "loss": 0.0266, "objective": 0.027411019429564476, "ranking_idealized": 0.5666666626930237, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.550000011920929, "regularize": 0.02741014026105404, "step": 905 }, { "dpo_loss": 0.6891763806343079, "epoch": 0.8597748208802457, "grad_norm": 71.26536161303537, "learning_rate": 7.034688063809511e-08, "logits": -2.1282496452331543, "logps": -91.80699157714844, "loss": 0.0298, "objective": 0.025737237185239792, "ranking_idealized": 0.6333333253860474, "ranking_idealized_expo": 0.4833333194255829, "ranking_simple": 0.5333333611488342, "regularize": 0.025736594572663307, "step": 910 }, { "dpo_loss": 0.6862377524375916, "epoch": 0.8644988583576096, "grad_norm": 69.04287293810181, "learning_rate": 6.996939278999337e-08, "logits": -2.152179479598999, "logps": -94.09297180175781, "loss": 0.0263, "objective": 0.027985723689198494, "ranking_idealized": 0.5833333134651184, "ranking_idealized_expo": 0.4833333194255829, "ranking_simple": 0.4833333194255829, "regularize": 0.02798387221992016, "step": 915 }, { "dpo_loss": 0.690563440322876, "epoch": 0.8692228958349736, "grad_norm": 60.9102612710031, "learning_rate": 6.959054578916042e-08, "logits": -2.1106715202331543, "logps": -90.7065658569336, "loss": 0.0281, "objective": 0.02792646363377571, "ranking_idealized": 0.6666666865348816, "ranking_idealized_expo": 0.5833333134651184, "ranking_simple": 0.6000000238418579, "regularize": 0.02792549505829811, "step": 920 }, { "dpo_loss": 0.6911750435829163, "epoch": 0.8739469333123376, "grad_norm": 62.2660390306647, "learning_rate": 6.921036542060343e-08, "logits": -1.9987537860870361, "logps": -89.90222930908203, "loss": 0.0251, "objective": 0.01876661367714405, "ranking_idealized": 0.6833333373069763, "ranking_idealized_expo": 0.6333333253860474, "ranking_simple": 0.6333333253860474, "regularize": 0.018765147775411606, "step": 925 }, { "dpo_loss": 0.6897457242012024, "epoch": 0.8786709707897016, "grad_norm": 59.1678071537183, "learning_rate": 6.882887756008093e-08, "logits": -2.111668825149536, "logps": -87.85643768310547, "loss": 0.0257, "objective": 0.02605438232421875, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.550000011920929, "regularize": 0.026053914800286293, "step": 930 }, { "dpo_loss": 0.6957460045814514, "epoch": 0.8833950082670656, "grad_norm": 64.76262904725678, "learning_rate": 6.844610817234172e-08, "logits": -2.093857765197754, "logps": -92.8622055053711, "loss": 0.0273, "objective": 0.027011338621377945, "ranking_idealized": 0.5333333611488342, "ranking_idealized_expo": 0.4833333194255829, "ranking_simple": 0.46666666865348816, "regularize": 0.027010958641767502, "step": 935 }, { "dpo_loss": 0.6905789971351624, "epoch": 0.8881190457444296, "grad_norm": 70.7130564090627, "learning_rate": 6.806208330935765e-08, "logits": -2.2473738193511963, "logps": -91.84986114501953, "loss": 0.0268, "objective": 0.027645627036690712, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.5666666626930237, "ranking_simple": 0.5666666626930237, "regularize": 0.02764386683702469, "step": 940 }, { "dpo_loss": 0.6940723061561584, "epoch": 0.8928430832217935, "grad_norm": 60.88887279226543, "learning_rate": 6.767682910855045e-08, "logits": -2.287950038909912, "logps": -89.53514862060547, "loss": 0.0261, "objective": 0.028092078864574432, "ranking_idealized": 0.550000011920929, "ranking_idealized_expo": 0.46666666865348816, "ranking_simple": 0.46666666865348816, "regularize": 0.0280916728079319, "step": 945 }, { "dpo_loss": 0.694039523601532, "epoch": 0.8975671206991576, "grad_norm": 67.50050200668711, "learning_rate": 6.729037179101287e-08, "logits": -2.304736614227295, "logps": -93.9173812866211, "loss": 0.0248, "objective": 0.02201911062002182, "ranking_idealized": 0.5833333134651184, "ranking_idealized_expo": 0.46666666865348816, "ranking_simple": 0.4833333194255829, "regularize": 0.02201777510344982, "step": 950 }, { "epoch": 0.8975671206991576, "eval_dpo_loss": 0.6931836009025574, "eval_logits": -1.9902262687683105, "eval_logps": -98.64472198486328, "eval_loss": 0.015600171871483326, "eval_objective": 0.015454174019396305, "eval_ranking_idealized": 0.6022099256515503, "eval_ranking_idealized_expo": 0.5207182168960571, "eval_ranking_simple": 0.5172652006149292, "eval_regularize": 0.015452706255018711, "eval_runtime": 446.0966, "eval_samples_per_second": 12.979, "eval_steps_per_second": 3.246, "step": 950 }, { "dpo_loss": 0.6894887089729309, "epoch": 0.9022911581765215, "grad_norm": 62.475301521026935, "learning_rate": 6.690273765972383e-08, "logits": -2.1381261348724365, "logps": -90.50852966308594, "loss": 0.0247, "objective": 0.029864691197872162, "ranking_idealized": 0.7166666388511658, "ranking_idealized_expo": 0.5833333134651184, "ranking_simple": 0.6000000238418579, "regularize": 0.02986370399594307, "step": 955 }, { "dpo_loss": 0.6904736161231995, "epoch": 0.9070151956538856, "grad_norm": 71.36008054276485, "learning_rate": 6.651395309775836e-08, "logits": -2.161102294921875, "logps": -94.71805572509766, "loss": 0.0273, "objective": 0.03055974654853344, "ranking_idealized": 0.6166666746139526, "ranking_idealized_expo": 0.5833333134651184, "ranking_simple": 0.5833333134651184, "regularize": 0.03055933117866516, "step": 960 }, { "dpo_loss": 0.6911565661430359, "epoch": 0.9117392331312495, "grad_norm": 75.9289905899972, "learning_rate": 6.612404456649187e-08, "logits": -2.174187660217285, "logps": -90.80412292480469, "loss": 0.0255, "objective": 0.02420150302350521, "ranking_idealized": 0.550000011920929, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.5166666507720947, "regularize": 0.024200933054089546, "step": 965 }, { "dpo_loss": 0.6908090114593506, "epoch": 0.9164632706086135, "grad_norm": 66.41988248188461, "learning_rate": 6.573303860379914e-08, "logits": -2.258518695831299, "logps": -91.59303283691406, "loss": 0.0271, "objective": 0.02120455540716648, "ranking_idealized": 0.5, "ranking_idealized_expo": 0.44999998807907104, "ranking_simple": 0.4333333373069763, "regularize": 0.02120377868413925, "step": 970 }, { "dpo_loss": 0.686578631401062, "epoch": 0.9211873080859775, "grad_norm": 68.57226169783617, "learning_rate": 6.534096182224808e-08, "logits": -2.0389044284820557, "logps": -94.76436614990234, "loss": 0.0299, "objective": 0.029232459142804146, "ranking_idealized": 0.4833333194255829, "ranking_idealized_expo": 0.4000000059604645, "ranking_simple": 0.4000000059604645, "regularize": 0.029231999069452286, "step": 975 }, { "dpo_loss": 0.6919539570808411, "epoch": 0.9259113455633414, "grad_norm": 69.41679502492849, "learning_rate": 6.494784090728851e-08, "logits": -2.2500946521759033, "logps": -96.09117126464844, "loss": 0.0262, "objective": 0.026704249903559685, "ranking_idealized": 0.6499999761581421, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.5333333611488342, "regularize": 0.0267037320882082, "step": 980 }, { "dpo_loss": 0.6925919055938721, "epoch": 0.9306353830407055, "grad_norm": 64.18653487464074, "learning_rate": 6.455370261543578e-08, "logits": -2.1756606101989746, "logps": -93.87403106689453, "loss": 0.0251, "objective": 0.025608109310269356, "ranking_idealized": 0.6833333373069763, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.5166666507720947, "regularize": 0.025606883689761162, "step": 985 }, { "dpo_loss": 0.6908634901046753, "epoch": 0.9353594205180694, "grad_norm": 61.06423988779827, "learning_rate": 6.415857377244979e-08, "logits": -2.1095356941223145, "logps": -88.83150482177734, "loss": 0.0251, "objective": 0.026077650487422943, "ranking_idealized": 0.5666666626930237, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.5333333611488342, "regularize": 0.02607729658484459, "step": 990 }, { "dpo_loss": 0.689622700214386, "epoch": 0.9400834579954335, "grad_norm": 68.1910690637634, "learning_rate": 6.376248127150908e-08, "logits": -2.150278329849243, "logps": -91.8506088256836, "loss": 0.0269, "objective": 0.025722531601786613, "ranking_idealized": 0.6166666746139526, "ranking_idealized_expo": 0.5666666626930237, "ranking_simple": 0.5666666626930237, "regularize": 0.025721782818436623, "step": 995 }, { "dpo_loss": 0.6903151273727417, "epoch": 0.9448074954727974, "grad_norm": 64.10384868193276, "learning_rate": 6.33654520713805e-08, "logits": -2.1098899841308594, "logps": -93.50405883789062, "loss": 0.0272, "objective": 0.02512853965163231, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.5333333611488342, "regularize": 0.0251275934278965, "step": 1000 }, { "epoch": 0.9448074954727974, "eval_dpo_loss": 0.6931213736534119, "eval_logits": -1.9906424283981323, "eval_logps": -98.12418365478516, "eval_loss": 0.01257664430886507, "eval_objective": 0.012785565108060837, "eval_ranking_idealized": 0.6022099256515503, "eval_ranking_idealized_expo": 0.5207182168960571, "eval_ranking_simple": 0.5179557800292969, "eval_regularize": 0.012783819809556007, "eval_runtime": 444.658, "eval_samples_per_second": 13.021, "eval_steps_per_second": 3.256, "step": 1000 }, { "dpo_loss": 0.6847264170646667, "epoch": 0.9495315329501613, "grad_norm": 68.90168504337811, "learning_rate": 6.296751319458434e-08, "logits": -2.1259357929229736, "logps": -91.41114044189453, "loss": 0.0298, "objective": 0.03322311118245125, "ranking_idealized": 0.5666666626930237, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5, "regularize": 0.03322221338748932, "step": 1005 }, { "dpo_loss": 0.6920087933540344, "epoch": 0.9542555704275254, "grad_norm": 64.89339434605618, "learning_rate": 6.256869172555513e-08, "logits": -2.1444926261901855, "logps": -91.69261932373047, "loss": 0.0264, "objective": 0.02593044377863407, "ranking_idealized": 0.4833333194255829, "ranking_idealized_expo": 0.44999998807907104, "ranking_simple": 0.4333333373069763, "regularize": 0.025929344817996025, "step": 1010 }, { "dpo_loss": 0.6885382533073425, "epoch": 0.9589796079048893, "grad_norm": 71.3368687595913, "learning_rate": 6.216901480879819e-08, "logits": -2.0881664752960205, "logps": -90.8614501953125, "loss": 0.0236, "objective": 0.021669141948223114, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5166666507720947, "regularize": 0.021668575704097748, "step": 1015 }, { "dpo_loss": 0.6919686794281006, "epoch": 0.9637036453822534, "grad_norm": 68.26105061311512, "learning_rate": 6.176850964704212e-08, "logits": -2.129828453063965, "logps": -95.18238830566406, "loss": 0.0226, "objective": 0.025728456676006317, "ranking_idealized": 0.6166666746139526, "ranking_idealized_expo": 0.5833333134651184, "ranking_simple": 0.5666666626930237, "regularize": 0.02572786808013916, "step": 1020 }, { "dpo_loss": 0.6866704225540161, "epoch": 0.9684276828596173, "grad_norm": 61.40903124064667, "learning_rate": 6.136720349938743e-08, "logits": -2.2576215267181396, "logps": -94.10470581054688, "loss": 0.0257, "objective": 0.022323768585920334, "ranking_idealized": 0.5666666626930237, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.5166666507720947, "regularize": 0.022323111072182655, "step": 1025 }, { "dpo_loss": 0.69089674949646, "epoch": 0.9731517203369814, "grad_norm": 63.691297756435006, "learning_rate": 6.096512367945113e-08, "logits": -2.113276243209839, "logps": -90.31819152832031, "loss": 0.0244, "objective": 0.022346744313836098, "ranking_idealized": 0.5833333134651184, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.5333333611488342, "regularize": 0.02234587073326111, "step": 1030 }, { "dpo_loss": 0.6910406351089478, "epoch": 0.9778757578143453, "grad_norm": 65.44632044043543, "learning_rate": 6.056229755350772e-08, "logits": -2.147958517074585, "logps": -93.92337036132812, "loss": 0.0234, "objective": 0.02298605814576149, "ranking_idealized": 0.6833333373069763, "ranking_idealized_expo": 0.6166666746139526, "ranking_simple": 0.6166666746139526, "regularize": 0.022985396906733513, "step": 1035 }, { "dpo_loss": 0.68792724609375, "epoch": 0.9825997952917093, "grad_norm": 76.3390109961412, "learning_rate": 6.01587525386267e-08, "logits": -2.1341538429260254, "logps": -90.12808227539062, "loss": 0.0253, "objective": 0.02918338030576706, "ranking_idealized": 0.5333333611488342, "ranking_idealized_expo": 0.46666666865348816, "ranking_simple": 0.46666666865348816, "regularize": 0.029182856902480125, "step": 1040 }, { "dpo_loss": 0.6883952021598816, "epoch": 0.9873238327690733, "grad_norm": 62.771275862582634, "learning_rate": 5.975451610080642e-08, "logits": -2.1016061305999756, "logps": -92.14013671875, "loss": 0.0243, "objective": 0.02377927675843239, "ranking_idealized": 0.5833333134651184, "ranking_idealized_expo": 0.4833333194255829, "ranking_simple": 0.4833333194255829, "regularize": 0.023778444156050682, "step": 1045 }, { "dpo_loss": 0.6908401846885681, "epoch": 0.9920478702464373, "grad_norm": 61.00213004445293, "learning_rate": 5.9349615753104655e-08, "logits": -2.1279587745666504, "logps": -97.24657440185547, "loss": 0.0215, "objective": 0.021991008892655373, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.5666666626930237, "ranking_simple": 0.5666666626930237, "regularize": 0.02199002355337143, "step": 1050 }, { "epoch": 0.9920478702464373, "eval_dpo_loss": 0.6927458047866821, "eval_logits": -1.991100788116455, "eval_logps": -98.33568572998047, "eval_loss": 0.01325704250484705, "eval_objective": 0.013452271930873394, "eval_ranking_idealized": 0.6022099256515503, "eval_ranking_idealized_expo": 0.5207182168960571, "eval_ranking_simple": 0.5179557800292969, "eval_regularize": 0.01345061045140028, "eval_runtime": 451.6429, "eval_samples_per_second": 12.82, "eval_steps_per_second": 3.206, "step": 1050 }, { "dpo_loss": 0.6905434131622314, "epoch": 0.9967719077238013, "grad_norm": 69.09205896680123, "learning_rate": 5.894407905376616e-08, "logits": -2.2125723361968994, "logps": -90.43359375, "loss": 0.0256, "objective": 0.022856025025248528, "ranking_idealized": 0.550000011920929, "ranking_idealized_expo": 0.46666666865348816, "ranking_simple": 0.46666666865348816, "regularize": 0.022855514660477638, "step": 1055 }, { "dpo_loss": 0.6896554231643677, "epoch": 1.0014959452011654, "grad_norm": 71.18743706384133, "learning_rate": 5.853793360434687e-08, "logits": -2.095319986343384, "logps": -92.04082489013672, "loss": 0.0261, "objective": 0.025890907272696495, "ranking_idealized": 0.6166666746139526, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.550000011920929, "regularize": 0.025890160351991653, "step": 1060 }, { "dpo_loss": 0.6926552057266235, "epoch": 1.0062199826785292, "grad_norm": 68.03463485699294, "learning_rate": 5.813120704783539e-08, "logits": -2.1974759101867676, "logps": -91.99174499511719, "loss": 0.0258, "objective": 0.026935292407870293, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.5666666626930237, "ranking_simple": 0.5666666626930237, "regularize": 0.026934677734971046, "step": 1065 }, { "dpo_loss": 0.6922659277915955, "epoch": 1.0109440201558932, "grad_norm": 84.22043191701646, "learning_rate": 5.772392706677148e-08, "logits": -2.0366082191467285, "logps": -93.32905578613281, "loss": 0.0257, "objective": 0.026561260223388672, "ranking_idealized": 0.6333333253860474, "ranking_idealized_expo": 0.5666666626930237, "ranking_simple": 0.5833333134651184, "regularize": 0.026560688391327858, "step": 1070 }, { "dpo_loss": 0.6917497515678406, "epoch": 1.0156680576332573, "grad_norm": 73.42469612086076, "learning_rate": 5.7316121381361984e-08, "logits": -2.204793691635132, "logps": -94.83844757080078, "loss": 0.0269, "objective": 0.029431190341711044, "ranking_idealized": 0.5666666626930237, "ranking_idealized_expo": 0.4333333373069763, "ranking_simple": 0.46666666865348816, "regularize": 0.029430601745843887, "step": 1075 }, { "dpo_loss": 0.683849573135376, "epoch": 1.0203920951106211, "grad_norm": 69.22367476602035, "learning_rate": 5.690781774759411e-08, "logits": -2.2117373943328857, "logps": -94.56703186035156, "loss": 0.0276, "objective": 0.030149787664413452, "ranking_idealized": 0.5333333611488342, "ranking_idealized_expo": 0.44999998807907104, "ranking_simple": 0.46666666865348816, "regularize": 0.030148986726999283, "step": 1080 }, { "dpo_loss": 0.6918656229972839, "epoch": 1.0251161325879852, "grad_norm": 67.88816058800367, "learning_rate": 5.649904395534636e-08, "logits": -2.1058478355407715, "logps": -94.81607055664062, "loss": 0.0249, "objective": 0.025079350918531418, "ranking_idealized": 0.550000011920929, "ranking_idealized_expo": 0.4833333194255829, "ranking_simple": 0.4833333194255829, "regularize": 0.025078853592276573, "step": 1085 }, { "dpo_loss": 0.6931339502334595, "epoch": 1.0298401700653492, "grad_norm": 71.9727166820056, "learning_rate": 5.6089827826497026e-08, "logits": -2.2008562088012695, "logps": -93.15959930419922, "loss": 0.0233, "objective": 0.024377651512622833, "ranking_idealized": 0.6166666746139526, "ranking_idealized_expo": 0.4333333373069763, "ranking_simple": 0.44999998807907104, "regularize": 0.024377118796110153, "step": 1090 }, { "dpo_loss": 0.6928921341896057, "epoch": 1.0345642075427133, "grad_norm": 64.563119648666, "learning_rate": 5.568019721303068e-08, "logits": -2.146667957305908, "logps": -95.26697540283203, "loss": 0.0232, "objective": 0.022780917584896088, "ranking_idealized": 0.5666666626930237, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.5166666507720947, "regularize": 0.022780440747737885, "step": 1095 }, { "dpo_loss": 0.6914113759994507, "epoch": 1.039288245020077, "grad_norm": 70.34249089351219, "learning_rate": 5.527017999514239e-08, "logits": -2.1238293647766113, "logps": -90.81373596191406, "loss": 0.0242, "objective": 0.019299499690532684, "ranking_idealized": 0.550000011920929, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5, "regularize": 0.019298112019896507, "step": 1100 }, { "epoch": 1.039288245020077, "eval_dpo_loss": 0.692744791507721, "eval_logits": -1.9881205558776855, "eval_logps": -98.5120849609375, "eval_loss": 0.012820076197385788, "eval_objective": 0.012685425579547882, "eval_ranking_idealized": 0.6022099256515503, "eval_ranking_idealized_expo": 0.5207182168960571, "eval_ranking_simple": 0.5179557800292969, "eval_regularize": 0.012683761306107044, "eval_runtime": 445.6772, "eval_samples_per_second": 12.991, "eval_steps_per_second": 3.249, "step": 1100 }, { "dpo_loss": 0.6895564198493958, "epoch": 1.0440122824974412, "grad_norm": 69.83976607143053, "learning_rate": 5.4859804079340266e-08, "logits": -2.158614158630371, "logps": -88.3459243774414, "loss": 0.0234, "objective": 0.024073513224720955, "ranking_idealized": 0.5333333611488342, "ranking_idealized_expo": 0.4833333194255829, "ranking_simple": 0.4833333194255829, "regularize": 0.024072829633951187, "step": 1105 }, { "dpo_loss": 0.6888210773468018, "epoch": 1.0487363199748052, "grad_norm": 78.2561714772354, "learning_rate": 5.444909739654602e-08, "logits": -2.2234978675842285, "logps": -92.2721939086914, "loss": 0.025, "objective": 0.025087477639317513, "ranking_idealized": 0.5833333134651184, "ranking_idealized_expo": 0.4333333373069763, "ranking_simple": 0.44999998807907104, "regularize": 0.025086527690291405, "step": 1110 }, { "dpo_loss": 0.6934700608253479, "epoch": 1.053460357452169, "grad_norm": 67.69947794878017, "learning_rate": 5.4038087900193974e-08, "logits": -2.0514726638793945, "logps": -92.03214263916016, "loss": 0.0243, "objective": 0.020962979644536972, "ranking_idealized": 0.5666666626930237, "ranking_idealized_expo": 0.44999998807907104, "ranking_simple": 0.44999998807907104, "regularize": 0.020961163565516472, "step": 1115 }, { "dpo_loss": 0.6921118497848511, "epoch": 1.058184394929533, "grad_norm": 60.76759737021519, "learning_rate": 5.362680356432846e-08, "logits": -2.082772731781006, "logps": -91.51958465576172, "loss": 0.0226, "objective": 0.02336716279387474, "ranking_idealized": 0.5333333611488342, "ranking_idealized_expo": 0.4833333194255829, "ranking_simple": 0.4833333194255829, "regularize": 0.023366322740912437, "step": 1120 }, { "dpo_loss": 0.6900876760482788, "epoch": 1.0629084324068971, "grad_norm": 89.61755393855444, "learning_rate": 5.321527238169992e-08, "logits": -2.137908935546875, "logps": -94.91239166259766, "loss": 0.0258, "objective": 0.0195107851177454, "ranking_idealized": 0.550000011920929, "ranking_idealized_expo": 0.46666666865348816, "ranking_simple": 0.46666666865348816, "regularize": 0.01950863003730774, "step": 1125 }, { "dpo_loss": 0.6918936371803284, "epoch": 1.067632469884261, "grad_norm": 60.32020595039731, "learning_rate": 5.280352236185959e-08, "logits": -2.223163604736328, "logps": -94.0035629272461, "loss": 0.0198, "objective": 0.020925112068653107, "ranking_idealized": 0.5666666626930237, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.550000011920929, "regularize": 0.020924491807818413, "step": 1130 }, { "dpo_loss": 0.6936992406845093, "epoch": 1.072356507361625, "grad_norm": 68.85146603912719, "learning_rate": 5.239158152925319e-08, "logits": -2.089085102081299, "logps": -89.82282257080078, "loss": 0.0232, "objective": 0.02565601095557213, "ranking_idealized": 0.5333333611488342, "ranking_idealized_expo": 0.46666666865348816, "ranking_simple": 0.46666666865348816, "regularize": 0.025654161348938942, "step": 1135 }, { "dpo_loss": 0.6891080737113953, "epoch": 1.077080544838989, "grad_norm": 63.283810030024625, "learning_rate": 5.197947792131348e-08, "logits": -2.201981782913208, "logps": -93.19425964355469, "loss": 0.0252, "objective": 0.02514718845486641, "ranking_idealized": 0.6666666865348816, "ranking_idealized_expo": 0.5833333134651184, "ranking_simple": 0.5833333134651184, "regularize": 0.025146154686808586, "step": 1140 }, { "dpo_loss": 0.6902083158493042, "epoch": 1.0818045823163531, "grad_norm": 65.2698459996647, "learning_rate": 5.1567239586552e-08, "logits": -2.185304880142212, "logps": -90.79157257080078, "loss": 0.0219, "objective": 0.021113820374011993, "ranking_idealized": 0.5833333134651184, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.5166666507720947, "regularize": 0.021113011986017227, "step": 1145 }, { "dpo_loss": 0.6910417675971985, "epoch": 1.086528619793717, "grad_norm": 65.02233557080358, "learning_rate": 5.115489458265005e-08, "logits": -2.1189420223236084, "logps": -94.17777252197266, "loss": 0.0248, "objective": 0.023960812017321587, "ranking_idealized": 0.5666666626930237, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.5333333611488342, "regularize": 0.02395929954946041, "step": 1150 }, { "epoch": 1.086528619793717, "eval_dpo_loss": 0.6928678154945374, "eval_logits": -1.9900000095367432, "eval_logps": -98.37399291992188, "eval_loss": 0.01211391482502222, "eval_objective": 0.012414646334946156, "eval_ranking_idealized": 0.6022099256515503, "eval_ranking_idealized_expo": 0.5207182168960571, "eval_ranking_simple": 0.5179557800292969, "eval_regularize": 0.0124129019677639, "eval_runtime": 446.4802, "eval_samples_per_second": 12.968, "eval_steps_per_second": 3.243, "step": 1150 }, { "dpo_loss": 0.6903362274169922, "epoch": 1.091252657271081, "grad_norm": 65.0696209993928, "learning_rate": 5.0742470974549036e-08, "logits": -2.100759744644165, "logps": -90.93352508544922, "loss": 0.0233, "objective": 0.020316295325756073, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.5333333611488342, "regularize": 0.020315730944275856, "step": 1155 }, { "dpo_loss": 0.6921348571777344, "epoch": 1.095976694748445, "grad_norm": 68.18545652779869, "learning_rate": 5.032999683254028e-08, "logits": -2.1389288902282715, "logps": -90.10498046875, "loss": 0.0247, "objective": 0.021737979725003242, "ranking_idealized": 0.6499999761581421, "ranking_idealized_expo": 0.5833333134651184, "ranking_simple": 0.5833333134651184, "regularize": 0.021737171337008476, "step": 1160 }, { "dpo_loss": 0.69456547498703, "epoch": 1.100700732225809, "grad_norm": 73.7471857905923, "learning_rate": 4.991750023035455e-08, "logits": -2.124562978744507, "logps": -90.93301391601562, "loss": 0.024, "objective": 0.02404080517590046, "ranking_idealized": 0.6666666865348816, "ranking_idealized_expo": 0.6000000238418579, "ranking_simple": 0.6000000238418579, "regularize": 0.02403992973268032, "step": 1165 }, { "dpo_loss": 0.6940571069717407, "epoch": 1.105424769703173, "grad_norm": 67.32632108670151, "learning_rate": 4.950500924325127e-08, "logits": -2.2072455883026123, "logps": -94.81490325927734, "loss": 0.0255, "objective": 0.026444217190146446, "ranking_idealized": 0.4833333194255829, "ranking_idealized_expo": 0.4166666567325592, "ranking_simple": 0.4333333373069763, "regularize": 0.026443663984537125, "step": 1170 }, { "dpo_loss": 0.692010223865509, "epoch": 1.110148807180537, "grad_norm": 60.40484735228916, "learning_rate": 4.909255194610773e-08, "logits": -2.1622235774993896, "logps": -90.50462341308594, "loss": 0.0217, "objective": 0.01786196231842041, "ranking_idealized": 0.5333333611488342, "ranking_idealized_expo": 0.4833333194255829, "ranking_simple": 0.5, "regularize": 0.01785971410572529, "step": 1175 }, { "dpo_loss": 0.6931031942367554, "epoch": 1.114872844657901, "grad_norm": 59.38906623535502, "learning_rate": 4.8680156411508193e-08, "logits": -2.16975474357605, "logps": -89.3101806640625, "loss": 0.0243, "objective": 0.01995784044265747, "ranking_idealized": 0.5833333134651184, "ranking_idealized_expo": 0.44999998807907104, "ranking_simple": 0.44999998807907104, "regularize": 0.019954947754740715, "step": 1180 }, { "dpo_loss": 0.690836489200592, "epoch": 1.1195968821352649, "grad_norm": 68.89482324651908, "learning_rate": 4.826785070783326e-08, "logits": -2.058103084564209, "logps": -91.24579620361328, "loss": 0.0233, "objective": 0.022483140230178833, "ranking_idealized": 0.6499999761581421, "ranking_idealized_expo": 0.6166666746139526, "ranking_simple": 0.6166666746139526, "regularize": 0.02248191460967064, "step": 1185 }, { "dpo_loss": 0.6914324164390564, "epoch": 1.124320919612629, "grad_norm": 61.796191905639844, "learning_rate": 4.7855662897349464e-08, "logits": -2.1661124229431152, "logps": -91.51298522949219, "loss": 0.0226, "objective": 0.018483061343431473, "ranking_idealized": 0.6166666746139526, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.5166666507720947, "regularize": 0.018482128158211708, "step": 1190 }, { "dpo_loss": 0.6916797161102295, "epoch": 1.129044957089993, "grad_norm": 63.413323128276396, "learning_rate": 4.744362103429933e-08, "logits": -2.204550266265869, "logps": -90.19840240478516, "loss": 0.0244, "objective": 0.025325793772935867, "ranking_idealized": 0.6499999761581421, "ranking_idealized_expo": 0.6000000238418579, "ranking_simple": 0.6000000238418579, "regularize": 0.02532271295785904, "step": 1195 }, { "dpo_loss": 0.6876908540725708, "epoch": 1.1337689945673568, "grad_norm": 66.08019689679915, "learning_rate": 4.703175316299196e-08, "logits": -2.147653341293335, "logps": -94.6951904296875, "loss": 0.0238, "objective": 0.022918345406651497, "ranking_idealized": 0.699999988079071, "ranking_idealized_expo": 0.5666666626930237, "ranking_simple": 0.5666666626930237, "regularize": 0.022917365655303, "step": 1200 }, { "epoch": 1.1337689945673568, "eval_dpo_loss": 0.6931039094924927, "eval_logits": -1.988110899925232, "eval_logps": -98.65231323242188, "eval_loss": 0.013076459057629108, "eval_objective": 0.013204570859670639, "eval_ranking_idealized": 0.6022099256515503, "eval_ranking_idealized_expo": 0.5207182168960571, "eval_ranking_simple": 0.5186464190483093, "eval_regularize": 0.013202749192714691, "eval_runtime": 446.1352, "eval_samples_per_second": 12.978, "eval_steps_per_second": 3.246, "step": 1200 }, { "dpo_loss": 0.6895859837532043, "epoch": 1.1384930320447209, "grad_norm": 59.82159099357435, "learning_rate": 4.662008731589424e-08, "logits": -2.2835893630981445, "logps": -93.41173553466797, "loss": 0.0238, "objective": 0.01953883096575737, "ranking_idealized": 0.5666666626930237, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.5166666507720947, "regularize": 0.019538111984729767, "step": 1205 }, { "dpo_loss": 0.6878269910812378, "epoch": 1.143217069522085, "grad_norm": 64.13908923913202, "learning_rate": 4.6208651511722916e-08, "logits": -2.107128381729126, "logps": -95.02501678466797, "loss": 0.0225, "objective": 0.02028297260403633, "ranking_idealized": 0.5833333134651184, "ranking_idealized_expo": 0.44999998807907104, "ranking_simple": 0.4833333194255829, "regularize": 0.020282411947846413, "step": 1210 }, { "dpo_loss": 0.6913109421730042, "epoch": 1.147941106999449, "grad_norm": 64.08267971583916, "learning_rate": 4.579747375353763e-08, "logits": -2.152080774307251, "logps": -93.3291244506836, "loss": 0.0225, "objective": 0.02415003441274166, "ranking_idealized": 0.550000011920929, "ranking_idealized_expo": 0.4833333194255829, "ranking_simple": 0.4833333194255829, "regularize": 0.024148976430296898, "step": 1215 }, { "dpo_loss": 0.6908139586448669, "epoch": 1.1526651444768128, "grad_norm": 61.90111498957097, "learning_rate": 4.5386582026834904e-08, "logits": -2.2181687355041504, "logps": -91.48564147949219, "loss": 0.0211, "objective": 0.01728188991546631, "ranking_idealized": 0.5333333611488342, "ranking_idealized_expo": 0.44999998807907104, "ranking_simple": 0.44999998807907104, "regularize": 0.017280517145991325, "step": 1220 }, { "dpo_loss": 0.6892186999320984, "epoch": 1.1573891819541768, "grad_norm": 67.90586520467559, "learning_rate": 4.497600429764349e-08, "logits": -2.0878336429595947, "logps": -92.51049041748047, "loss": 0.023, "objective": 0.021265888586640358, "ranking_idealized": 0.5833333134651184, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.5, "regularize": 0.021264949813485146, "step": 1225 }, { "dpo_loss": 0.6902192831039429, "epoch": 1.162113219431541, "grad_norm": 69.62062658592635, "learning_rate": 4.456576851062089e-08, "logits": -2.1754400730133057, "logps": -89.9488296508789, "loss": 0.0202, "objective": 0.015848658978939056, "ranking_idealized": 0.6666666865348816, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.5666666626930237, "regularize": 0.015847818925976753, "step": 1230 }, { "dpo_loss": 0.6919657588005066, "epoch": 1.1668372569089047, "grad_norm": 76.5213820049961, "learning_rate": 4.4155902587151404e-08, "logits": -2.2707595825195312, "logps": -89.4555435180664, "loss": 0.0231, "objective": 0.02043619193136692, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.5333333611488342, "regularize": 0.020434273406863213, "step": 1235 }, { "dpo_loss": 0.6887553930282593, "epoch": 1.1715612943862688, "grad_norm": 68.85342051503754, "learning_rate": 4.374643442344576e-08, "logits": -2.1689367294311523, "logps": -91.3507080078125, "loss": 0.022, "objective": 0.02212885580956936, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.5833333134651184, "ranking_simple": 0.5833333134651184, "regularize": 0.02212790958583355, "step": 1240 }, { "dpo_loss": 0.6907090544700623, "epoch": 1.1762853318636328, "grad_norm": 63.9895529963574, "learning_rate": 4.333739188864243e-08, "logits": -2.116743564605713, "logps": -89.35691833496094, "loss": 0.0231, "objective": 0.02183300256729126, "ranking_idealized": 0.5333333611488342, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5, "regularize": 0.021832352504134178, "step": 1245 }, { "dpo_loss": 0.6873170733451843, "epoch": 1.1810093693409969, "grad_norm": 62.000344928337206, "learning_rate": 4.292880282291083e-08, "logits": -2.0605881214141846, "logps": -89.00260925292969, "loss": 0.0213, "objective": 0.016554510220885277, "ranking_idealized": 0.550000011920929, "ranking_idealized_expo": 0.4833333194255829, "ranking_simple": 0.4833333194255829, "regularize": 0.016552967950701714, "step": 1250 }, { "epoch": 1.1810093693409969, "eval_dpo_loss": 0.6929011344909668, "eval_logits": -1.9892135858535767, "eval_logps": -98.38199615478516, "eval_loss": 0.011604293249547482, "eval_objective": 0.011785290203988552, "eval_ranking_idealized": 0.6022099256515503, "eval_ranking_idealized_expo": 0.5207182168960571, "eval_ranking_simple": 0.5186464190483093, "eval_regularize": 0.011783457361161709, "eval_runtime": 446.5262, "eval_samples_per_second": 12.967, "eval_steps_per_second": 3.243, "step": 1250 }, { "dpo_loss": 0.6883438229560852, "epoch": 1.1857334068183607, "grad_norm": 76.14193268278439, "learning_rate": 4.2520695035556444e-08, "logits": -2.238811731338501, "logps": -93.17744445800781, "loss": 0.0227, "objective": 0.02147439494729042, "ranking_idealized": 0.4166666567325592, "ranking_idealized_expo": 0.4000000059604645, "ranking_simple": 0.4000000059604645, "regularize": 0.021473314613103867, "step": 1255 }, { "dpo_loss": 0.6898403763771057, "epoch": 1.1904574442957248, "grad_norm": 72.14873570982253, "learning_rate": 4.211309630312812e-08, "logits": -2.186509847640991, "logps": -92.71757507324219, "loss": 0.0221, "objective": 0.024685295298695564, "ranking_idealized": 0.6666666865348816, "ranking_idealized_expo": 0.6166666746139526, "ranking_simple": 0.6000000238418579, "regularize": 0.02468358352780342, "step": 1260 }, { "dpo_loss": 0.6934939026832581, "epoch": 1.1951814817730888, "grad_norm": 76.43192783166353, "learning_rate": 4.1706034367527484e-08, "logits": -2.2296221256256104, "logps": -90.43429565429688, "loss": 0.0216, "objective": 0.017879430204629898, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5, "regularize": 0.017877496778964996, "step": 1265 }, { "dpo_loss": 0.6907635927200317, "epoch": 1.1999055192504526, "grad_norm": 63.68697185614875, "learning_rate": 4.12995369341208e-08, "logits": -2.2198116779327393, "logps": -89.92237854003906, "loss": 0.0186, "objective": 0.019899163395166397, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5, "regularize": 0.019898338243365288, "step": 1270 }, { "dpo_loss": 0.6885548233985901, "epoch": 1.2046295567278167, "grad_norm": 67.30508547665502, "learning_rate": 4.0893631669853315e-08, "logits": -2.2070553302764893, "logps": -91.00017547607422, "loss": 0.0213, "objective": 0.0249098539352417, "ranking_idealized": 0.6833333373069763, "ranking_idealized_expo": 0.5833333134651184, "ranking_simple": 0.5833333134651184, "regularize": 0.024909144267439842, "step": 1275 }, { "dpo_loss": 0.6864418387413025, "epoch": 1.2093535942051807, "grad_norm": 58.37239845591427, "learning_rate": 4.048834620136618e-08, "logits": -2.157111406326294, "logps": -90.54441833496094, "loss": 0.0216, "objective": 0.024475712329149246, "ranking_idealized": 0.6333333253860474, "ranking_idealized_expo": 0.5833333134651184, "ranking_simple": 0.5833333134651184, "regularize": 0.02447471395134926, "step": 1280 }, { "dpo_loss": 0.6923628449440002, "epoch": 1.2140776316825446, "grad_norm": 62.390419253758, "learning_rate": 4.0083708113116125e-08, "logits": -2.20066499710083, "logps": -91.0829849243164, "loss": 0.0194, "objective": 0.017484158277511597, "ranking_idealized": 0.699999988079071, "ranking_idealized_expo": 0.6499999761581421, "ranking_simple": 0.6499999761581421, "regularize": 0.017483441159129143, "step": 1285 }, { "dpo_loss": 0.6917316317558289, "epoch": 1.2188016691599086, "grad_norm": 62.080679222088676, "learning_rate": 3.9679744945498026e-08, "logits": -2.0995683670043945, "logps": -89.5858154296875, "loss": 0.0215, "objective": 0.019274141639471054, "ranking_idealized": 0.5333333611488342, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5, "regularize": 0.01927088387310505, "step": 1290 }, { "dpo_loss": 0.6907222867012024, "epoch": 1.2235257066372727, "grad_norm": 69.35505760627515, "learning_rate": 3.9276484192970427e-08, "logits": -2.0752005577087402, "logps": -88.9092788696289, "loss": 0.0202, "objective": 0.01790427789092064, "ranking_idealized": 0.6499999761581421, "ranking_idealized_expo": 0.5833333134651184, "ranking_simple": 0.5833333134651184, "regularize": 0.017903130501508713, "step": 1295 }, { "dpo_loss": 0.6888744831085205, "epoch": 1.2282497441146367, "grad_norm": 64.13387937854051, "learning_rate": 3.887395330218428e-08, "logits": -2.088010549545288, "logps": -93.63009643554688, "loss": 0.0213, "objective": 0.01880194991827011, "ranking_idealized": 0.6333333253860474, "ranking_idealized_expo": 0.5666666626930237, "ranking_simple": 0.5666666626930237, "regularize": 0.018800783902406693, "step": 1300 }, { "epoch": 1.2282497441146367, "eval_dpo_loss": 0.6930280923843384, "eval_logits": -1.9901354312896729, "eval_logps": -98.35194396972656, "eval_loss": 0.010079173371195793, "eval_objective": 0.010304316878318787, "eval_ranking_idealized": 0.6022099256515503, "eval_ranking_idealized_expo": 0.5207182168960571, "eval_ranking_simple": 0.5179557800292969, "eval_regularize": 0.010302502661943436, "eval_runtime": 446.7461, "eval_samples_per_second": 12.96, "eval_steps_per_second": 3.241, "step": 1300 }, { "dpo_loss": 0.6898637413978577, "epoch": 1.2329737815920006, "grad_norm": 64.4984732207436, "learning_rate": 3.847217967011481e-08, "logits": -2.107663154602051, "logps": -90.70755004882812, "loss": 0.0188, "objective": 0.020963182672858238, "ranking_idealized": 0.4333333373069763, "ranking_idealized_expo": 0.4000000059604645, "ranking_simple": 0.4000000059604645, "regularize": 0.020961280912160873, "step": 1305 }, { "dpo_loss": 0.6896305084228516, "epoch": 1.2376978190693646, "grad_norm": 65.1483177905117, "learning_rate": 3.807119064219686e-08, "logits": -2.1721391677856445, "logps": -89.53897094726562, "loss": 0.0187, "objective": 0.022347215563058853, "ranking_idealized": 0.6833333373069763, "ranking_idealized_expo": 0.6000000238418579, "ranking_simple": 0.6000000238418579, "regularize": 0.02234494686126709, "step": 1310 }, { "dpo_loss": 0.6885426044464111, "epoch": 1.2424218565467287, "grad_norm": 67.6302362675184, "learning_rate": 3.7671013510463685e-08, "logits": -2.1977858543395996, "logps": -90.79574584960938, "loss": 0.0254, "objective": 0.021330129355192184, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.550000011920929, "regularize": 0.02132764458656311, "step": 1315 }, { "dpo_loss": 0.6933093667030334, "epoch": 1.2471458940240927, "grad_norm": 63.75946991656521, "learning_rate": 3.727167551168947e-08, "logits": -2.229327917098999, "logps": -91.51911163330078, "loss": 0.0225, "objective": 0.01876850612461567, "ranking_idealized": 0.5666666626930237, "ranking_idealized_expo": 0.46666666865348816, "ranking_simple": 0.46666666865348816, "regularize": 0.01876780204474926, "step": 1320 }, { "dpo_loss": 0.6886643171310425, "epoch": 1.2518699315014565, "grad_norm": 61.93022907625324, "learning_rate": 3.687320382553547e-08, "logits": -2.1852173805236816, "logps": -94.50476837158203, "loss": 0.0213, "objective": 0.019077714532613754, "ranking_idealized": 0.6666666865348816, "ranking_idealized_expo": 0.5666666626930237, "ranking_simple": 0.5666666626930237, "regularize": 0.01907687447965145, "step": 1325 }, { "dpo_loss": 0.690542459487915, "epoch": 1.2565939689788206, "grad_norm": 61.862265225762556, "learning_rate": 3.6475625572700156e-08, "logits": -2.1126949787139893, "logps": -93.08950805664062, "loss": 0.0201, "objective": 0.020278314128518105, "ranking_idealized": 0.550000011920929, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.5166666507720947, "regularize": 0.020277447998523712, "step": 1330 }, { "dpo_loss": 0.6890642046928406, "epoch": 1.2613180064561846, "grad_norm": 65.89840804248666, "learning_rate": 3.607896781307333e-08, "logits": -2.0817463397979736, "logps": -93.81517791748047, "loss": 0.0186, "objective": 0.018369121477007866, "ranking_idealized": 0.6333333253860474, "ranking_idealized_expo": 0.44999998807907104, "ranking_simple": 0.44999998807907104, "regularize": 0.01836659200489521, "step": 1335 }, { "dpo_loss": 0.6935243606567383, "epoch": 1.2660420439335485, "grad_norm": 61.73867224830495, "learning_rate": 3.5683257543894376e-08, "logits": -2.158568859100342, "logps": -91.2880859375, "loss": 0.0199, "objective": 0.02297687530517578, "ranking_idealized": 0.6333333253860474, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.5166666507720947, "regularize": 0.022976113483309746, "step": 1340 }, { "dpo_loss": 0.6870158910751343, "epoch": 1.2707660814109125, "grad_norm": 55.58597973847245, "learning_rate": 3.528852169791474e-08, "logits": -2.130025625228882, "logps": -92.3035888671875, "loss": 0.0191, "objective": 0.018635360524058342, "ranking_idealized": 0.5833333134651184, "ranking_idealized_expo": 0.5666666626930237, "ranking_simple": 0.5666666626930237, "regularize": 0.018634265288710594, "step": 1345 }, { "dpo_loss": 0.6925967335700989, "epoch": 1.2754901188882766, "grad_norm": 60.169587280433916, "learning_rate": 3.489478714156493e-08, "logits": -2.1538307666778564, "logps": -90.29386901855469, "loss": 0.0191, "objective": 0.01989407278597355, "ranking_idealized": 0.6499999761581421, "ranking_idealized_expo": 0.5833333134651184, "ranking_simple": 0.5833333134651184, "regularize": 0.01989228092133999, "step": 1350 }, { "epoch": 1.2754901188882766, "eval_dpo_loss": 0.6928868293762207, "eval_logits": -1.9894771575927734, "eval_logps": -98.17080688476562, "eval_loss": 0.010478594340384007, "eval_objective": 0.01073493529111147, "eval_ranking_idealized": 0.6022099256515503, "eval_ranking_idealized_expo": 0.5207182168960571, "eval_ranking_simple": 0.5186464190483093, "eval_regularize": 0.01073309313505888, "eval_runtime": 446.5009, "eval_samples_per_second": 12.967, "eval_steps_per_second": 3.243, "step": 1350 }, { "dpo_loss": 0.6914752721786499, "epoch": 1.2802141563656404, "grad_norm": 63.02270194963686, "learning_rate": 3.450208067312586e-08, "logits": -2.1492412090301514, "logps": -89.29891967773438, "loss": 0.019, "objective": 0.017367955297231674, "ranking_idealized": 0.5, "ranking_idealized_expo": 0.46666666865348816, "ranking_simple": 0.46666666865348816, "regularize": 0.01736696995794773, "step": 1355 }, { "dpo_loss": 0.6908875703811646, "epoch": 1.2849381938430045, "grad_norm": 66.84237286020337, "learning_rate": 3.411042902090492e-08, "logits": -2.2156636714935303, "logps": -91.21939849853516, "loss": 0.0183, "objective": 0.016017399728298187, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.5166666507720947, "regularize": 0.016014499589800835, "step": 1360 }, { "dpo_loss": 0.6913639307022095, "epoch": 1.2896622313203685, "grad_norm": 76.89545336747237, "learning_rate": 3.3719858841416836e-08, "logits": -2.1557438373565674, "logps": -92.71784210205078, "loss": 0.0196, "objective": 0.019386129453778267, "ranking_idealized": 0.5666666626930237, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.5166666507720947, "regularize": 0.019384343177080154, "step": 1365 }, { "dpo_loss": 0.6889938712120056, "epoch": 1.2943862687977323, "grad_norm": 56.519950070251724, "learning_rate": 3.333039671756934e-08, "logits": -2.055145502090454, "logps": -90.29429626464844, "loss": 0.0191, "objective": 0.019297009333968163, "ranking_idealized": 0.5666666626930237, "ranking_idealized_expo": 0.4833333194255829, "ranking_simple": 0.5, "regularize": 0.01929611526429653, "step": 1370 }, { "dpo_loss": 0.689259946346283, "epoch": 1.2991103062750964, "grad_norm": 73.9021579793652, "learning_rate": 3.294206915685392e-08, "logits": -2.2613272666931152, "logps": -95.13359832763672, "loss": 0.0184, "objective": 0.015245441347360611, "ranking_idealized": 0.6666666865348816, "ranking_idealized_expo": 0.5833333134651184, "ranking_simple": 0.5833333134651184, "regularize": 0.01524441223591566, "step": 1375 }, { "dpo_loss": 0.6903609037399292, "epoch": 1.3038343437524604, "grad_norm": 63.39814646096018, "learning_rate": 3.2554902589541666e-08, "logits": -2.1530189514160156, "logps": -90.94368743896484, "loss": 0.0174, "objective": 0.017877008765935898, "ranking_idealized": 0.7166666388511658, "ranking_idealized_expo": 0.6499999761581421, "ranking_simple": 0.6499999761581421, "regularize": 0.017875246703624725, "step": 1380 }, { "dpo_loss": 0.6925813555717468, "epoch": 1.3085583812298245, "grad_norm": 67.03876383480757, "learning_rate": 3.216892336688435e-08, "logits": -2.162677526473999, "logps": -91.54695892333984, "loss": 0.0196, "objective": 0.019883565604686737, "ranking_idealized": 0.5666666626930237, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5, "regularize": 0.019882572814822197, "step": 1385 }, { "dpo_loss": 0.690301775932312, "epoch": 1.3132824187071885, "grad_norm": 65.28692296573145, "learning_rate": 3.1784157759320954e-08, "logits": -2.116351842880249, "logps": -91.4957504272461, "loss": 0.018, "objective": 0.015376557596027851, "ranking_idealized": 0.6833333373069763, "ranking_idealized_expo": 0.6333333253860474, "ranking_simple": 0.6333333253860474, "regularize": 0.015375560149550438, "step": 1390 }, { "dpo_loss": 0.6893251538276672, "epoch": 1.3180064561845524, "grad_norm": 62.76399037621205, "learning_rate": 3.140063195468962e-08, "logits": -2.163536548614502, "logps": -92.03372192382812, "loss": 0.0177, "objective": 0.017055170610547066, "ranking_idealized": 0.5333333611488342, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5, "regularize": 0.017053287476301193, "step": 1395 }, { "dpo_loss": 0.6904376745223999, "epoch": 1.3227304936619164, "grad_norm": 66.03868596298913, "learning_rate": 3.101837205644531e-08, "logits": -2.1526260375976562, "logps": -91.71768951416016, "loss": 0.0183, "objective": 0.017484767362475395, "ranking_idealized": 0.5666666626930237, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5, "regularize": 0.017483625560998917, "step": 1400 }, { "epoch": 1.3227304936619164, "eval_dpo_loss": 0.6928439736366272, "eval_logits": -1.9895795583724976, "eval_logps": -98.29888916015625, "eval_loss": 0.009779366664588451, "eval_objective": 0.009947007521986961, "eval_ranking_idealized": 0.6022099256515503, "eval_ranking_idealized_expo": 0.5207182168960571, "eval_ranking_simple": 0.5179557800292969, "eval_regularize": 0.009945098310709, "eval_runtime": 445.875, "eval_samples_per_second": 12.986, "eval_steps_per_second": 3.248, "step": 1400 }, { "dpo_loss": 0.6910092234611511, "epoch": 1.3274545311392805, "grad_norm": 60.84233260746348, "learning_rate": 3.063740408188308e-08, "logits": -2.129271984100342, "logps": -88.37971496582031, "loss": 0.0189, "objective": 0.02033030241727829, "ranking_idealized": 0.6833333373069763, "ranking_idealized_expo": 0.5833333134651184, "ranking_simple": 0.5666666626930237, "regularize": 0.020329667255282402, "step": 1405 }, { "dpo_loss": 0.68757563829422, "epoch": 1.3321785686166443, "grad_norm": 66.02099749274366, "learning_rate": 3.0257753960367374e-08, "logits": -2.15506911277771, "logps": -94.14618682861328, "loss": 0.0203, "objective": 0.022749019786715508, "ranking_idealized": 0.6166666746139526, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.5166666507720947, "regularize": 0.022747376933693886, "step": 1410 }, { "dpo_loss": 0.6922497153282166, "epoch": 1.3369026060940084, "grad_norm": 69.66802628356685, "learning_rate": 2.987944753156717e-08, "logits": -2.111666440963745, "logps": -91.04405975341797, "loss": 0.0182, "objective": 0.0166956577450037, "ranking_idealized": 0.6666666865348816, "ranking_idealized_expo": 0.6499999761581421, "ranking_simple": 0.6499999761581421, "regularize": 0.016694890335202217, "step": 1415 }, { "dpo_loss": 0.6890503168106079, "epoch": 1.3416266435713724, "grad_norm": 60.95975646160118, "learning_rate": 2.9502510543697322e-08, "logits": -2.153930902481079, "logps": -90.40367889404297, "loss": 0.0168, "objective": 0.01781788095831871, "ranking_idealized": 0.5666666626930237, "ranking_idealized_expo": 0.5, "ranking_simple": 0.4833333194255829, "regularize": 0.017816245555877686, "step": 1420 }, { "dpo_loss": 0.6874597668647766, "epoch": 1.3463506810487362, "grad_norm": 59.58581498665142, "learning_rate": 2.912696865176607e-08, "logits": -2.243446111679077, "logps": -96.16178894042969, "loss": 0.0177, "objective": 0.023015646263957024, "ranking_idealized": 0.6499999761581421, "ranking_idealized_expo": 0.6166666746139526, "ranking_simple": 0.6000000238418579, "regularize": 0.02301453799009323, "step": 1425 }, { "dpo_loss": 0.6897552013397217, "epoch": 1.3510747185261003, "grad_norm": 60.74669762947064, "learning_rate": 2.875284741582892e-08, "logits": -2.1864495277404785, "logps": -96.33065032958984, "loss": 0.017, "objective": 0.015690678730607033, "ranking_idealized": 0.5666666626930237, "ranking_idealized_expo": 0.4833333194255829, "ranking_simple": 0.4833333194255829, "regularize": 0.015688156709074974, "step": 1430 }, { "dpo_loss": 0.6917933821678162, "epoch": 1.3557987560034643, "grad_norm": 52.60951888043, "learning_rate": 2.838017229924894e-08, "logits": -2.142225742340088, "logps": -92.60111999511719, "loss": 0.0173, "objective": 0.01731746830046177, "ranking_idealized": 0.6166666746139526, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.550000011920929, "regularize": 0.01731485314667225, "step": 1435 }, { "dpo_loss": 0.6895142793655396, "epoch": 1.3605227934808282, "grad_norm": 72.2797304152481, "learning_rate": 2.8008968666963817e-08, "logits": -2.1638967990875244, "logps": -92.39698028564453, "loss": 0.0165, "objective": 0.017737431451678276, "ranking_idealized": 0.5166666507720947, "ranking_idealized_expo": 0.4833333194255829, "ranking_simple": 0.46666666865348816, "regularize": 0.017736157402396202, "step": 1440 }, { "dpo_loss": 0.689642608165741, "epoch": 1.3652468309581922, "grad_norm": 79.06644823842738, "learning_rate": 2.763926178375929e-08, "logits": -2.190371036529541, "logps": -91.85442352294922, "loss": 0.0148, "objective": 0.01207685936242342, "ranking_idealized": 0.5833333134651184, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.550000011920929, "regularize": 0.012074961327016354, "step": 1445 }, { "dpo_loss": 0.690252423286438, "epoch": 1.3699708684355563, "grad_norm": 61.578467113272715, "learning_rate": 2.7271076812549688e-08, "logits": -2.2324106693267822, "logps": -91.74723815917969, "loss": 0.0173, "objective": 0.020336376503109932, "ranking_idealized": 0.5666666626930237, "ranking_idealized_expo": 0.4333333373069763, "ranking_simple": 0.44999998807907104, "regularize": 0.02033485844731331, "step": 1450 }, { "epoch": 1.3699708684355563, "eval_dpo_loss": 0.6928586959838867, "eval_logits": -1.9887967109680176, "eval_logps": -98.44745635986328, "eval_loss": 0.01200713962316513, "eval_objective": 0.012023248709738255, "eval_ranking_idealized": 0.6022099256515503, "eval_ranking_idealized_expo": 0.5207182168960571, "eval_ranking_simple": 0.519336998462677, "eval_regularize": 0.012021254748106003, "eval_runtime": 446.5411, "eval_samples_per_second": 12.966, "eval_steps_per_second": 3.243, "step": 1450 }, { "dpo_loss": 0.6905301809310913, "epoch": 1.3746949059129203, "grad_norm": 63.36787972947676, "learning_rate": 2.6904438812665275e-08, "logits": -2.1534087657928467, "logps": -90.40296936035156, "loss": 0.0161, "objective": 0.01465876679867506, "ranking_idealized": 0.5666666626930237, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.5333333611488342, "regularize": 0.01465718261897564, "step": 1455 }, { "dpo_loss": 0.6913805603981018, "epoch": 1.3794189433902841, "grad_norm": 82.33504933507443, "learning_rate": 2.6539372738146693e-08, "logits": -2.2307941913604736, "logps": -95.2313003540039, "loss": 0.0175, "objective": 0.01627987250685692, "ranking_idealized": 0.550000011920929, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5166666507720947, "regularize": 0.01627855747938156, "step": 1460 }, { "dpo_loss": 0.6937485933303833, "epoch": 1.3841429808676482, "grad_norm": 77.80619617469834, "learning_rate": 2.6175903436046474e-08, "logits": -2.127424716949463, "logps": -93.49677276611328, "loss": 0.019, "objective": 0.018087979406118393, "ranking_idealized": 0.7333333492279053, "ranking_idealized_expo": 0.5833333134651184, "ranking_simple": 0.5666666626930237, "regularize": 0.018087027594447136, "step": 1465 }, { "dpo_loss": 0.6909429430961609, "epoch": 1.3888670183450122, "grad_norm": 65.449053741861, "learning_rate": 2.5814055644738007e-08, "logits": -2.13195538520813, "logps": -92.86261749267578, "loss": 0.018, "objective": 0.01916462555527687, "ranking_idealized": 0.6833333373069763, "ranking_idealized_expo": 0.5666666626930237, "ranking_simple": 0.5666666626930237, "regularize": 0.019163591787219048, "step": 1470 }, { "dpo_loss": 0.6916597485542297, "epoch": 1.3935910558223763, "grad_norm": 64.31813956475592, "learning_rate": 2.545385399223171e-08, "logits": -2.1243183612823486, "logps": -91.4704818725586, "loss": 0.016, "objective": 0.01847274787724018, "ranking_idealized": 0.5833333134651184, "ranking_idealized_expo": 0.46666666865348816, "ranking_simple": 0.46666666865348816, "regularize": 0.01847095414996147, "step": 1475 }, { "dpo_loss": 0.6896728277206421, "epoch": 1.3983150932997401, "grad_norm": 68.83830299489735, "learning_rate": 2.5095322994498846e-08, "logits": -2.1868510246276855, "logps": -88.3174819946289, "loss": 0.0148, "objective": 0.014686751179397106, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.5333333611488342, "regularize": 0.014684156514704227, "step": 1480 }, { "dpo_loss": 0.691864550113678, "epoch": 1.4030391307771042, "grad_norm": 64.34780184550137, "learning_rate": 2.4738487053802913e-08, "logits": -2.121894359588623, "logps": -90.51294708251953, "loss": 0.0178, "objective": 0.017730435356497765, "ranking_idealized": 0.699999988079071, "ranking_idealized_expo": 0.5833333134651184, "ranking_simple": 0.5666666626930237, "regularize": 0.017729543149471283, "step": 1485 }, { "dpo_loss": 0.6895692944526672, "epoch": 1.4077631682544682, "grad_norm": 76.56054457201986, "learning_rate": 2.4383370457038788e-08, "logits": -2.287666082382202, "logps": -91.52227020263672, "loss": 0.0178, "objective": 0.020892778411507607, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.4833333194255829, "ranking_simple": 0.46666666865348816, "regularize": 0.020891882479190826, "step": 1490 }, { "dpo_loss": 0.6884461641311646, "epoch": 1.412487205731832, "grad_norm": 66.46105530145252, "learning_rate": 2.4029997374079687e-08, "logits": -2.1489083766937256, "logps": -94.47865295410156, "loss": 0.0164, "objective": 0.017250513657927513, "ranking_idealized": 0.6333333253860474, "ranking_idealized_expo": 0.5833333134651184, "ranking_simple": 0.5833333134651184, "regularize": 0.01724918559193611, "step": 1495 }, { "dpo_loss": 0.6900500059127808, "epoch": 1.4172112432091961, "grad_norm": 65.6967093839243, "learning_rate": 2.3678391856132203e-08, "logits": -2.1476263999938965, "logps": -92.45713806152344, "loss": 0.0171, "objective": 0.017398254945874214, "ranking_idealized": 0.5666666626930237, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5, "regularize": 0.01739557646214962, "step": 1500 }, { "epoch": 1.4172112432091961, "eval_dpo_loss": 0.6928529143333435, "eval_logits": -1.989194393157959, "eval_logps": -98.49781036376953, "eval_loss": 0.009285102598369122, "eval_objective": 0.009339064359664917, "eval_ranking_idealized": 0.6022099256515503, "eval_ranking_idealized_expo": 0.5207182168960571, "eval_ranking_simple": 0.5186464190483093, "eval_regularize": 0.009336920455098152, "eval_runtime": 454.5105, "eval_samples_per_second": 12.739, "eval_steps_per_second": 3.186, "step": 1500 }, { "dpo_loss": 0.6893900632858276, "epoch": 1.4219352806865602, "grad_norm": 66.7330764289215, "learning_rate": 2.3328577834099238e-08, "logits": -2.1472671031951904, "logps": -93.55850982666016, "loss": 0.0159, "objective": 0.01486156228929758, "ranking_idealized": 0.6666666865348816, "ranking_idealized_expo": 0.6000000238418579, "ranking_simple": 0.6000000238418579, "regularize": 0.014860122464597225, "step": 1505 }, { "dpo_loss": 0.6900672912597656, "epoch": 1.426659318163924, "grad_norm": 68.69500961540548, "learning_rate": 2.2980579116951266e-08, "logits": -2.205268144607544, "logps": -94.00220489501953, "loss": 0.0165, "objective": 0.015583300963044167, "ranking_idealized": 0.6333333253860474, "ranking_idealized_expo": 0.5666666626930237, "ranking_simple": 0.5666666626930237, "regularize": 0.015581325627863407, "step": 1510 }, { "dpo_loss": 0.6928814053535461, "epoch": 1.431383355641288, "grad_norm": 66.06160630988346, "learning_rate": 2.263441939010586e-08, "logits": -2.205580234527588, "logps": -94.78508758544922, "loss": 0.0176, "objective": 0.020189667120575905, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.5833333134651184, "ranking_simple": 0.5833333134651184, "regularize": 0.020186880603432655, "step": 1515 }, { "dpo_loss": 0.6897247433662415, "epoch": 1.436107393118652, "grad_norm": 64.2991321127621, "learning_rate": 2.2290122213815603e-08, "logits": -2.1570937633514404, "logps": -93.27649688720703, "loss": 0.0151, "objective": 0.015933455899357796, "ranking_idealized": 0.5666666626930237, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.5333333611488342, "regularize": 0.01593177765607834, "step": 1520 }, { "dpo_loss": 0.6920062899589539, "epoch": 1.440831430596016, "grad_norm": 82.55367392818718, "learning_rate": 2.194771102156456e-08, "logits": -2.079338550567627, "logps": -90.78583526611328, "loss": 0.0161, "objective": 0.012961748987436295, "ranking_idealized": 0.6666666865348816, "ranking_idealized_expo": 0.6000000238418579, "ranking_simple": 0.6000000238418579, "regularize": 0.012958657927811146, "step": 1525 }, { "dpo_loss": 0.6941262483596802, "epoch": 1.44555546807338, "grad_norm": 64.16042224652975, "learning_rate": 2.1607209118473314e-08, "logits": -2.1198713779449463, "logps": -92.36068725585938, "loss": 0.0175, "objective": 0.013944637961685658, "ranking_idealized": 0.5666666626930237, "ranking_idealized_expo": 0.46666666865348816, "ranking_simple": 0.46666666865348816, "regularize": 0.013943412341177464, "step": 1530 }, { "dpo_loss": 0.6908197999000549, "epoch": 1.450279505550744, "grad_norm": 73.78678695774653, "learning_rate": 2.1268639679712813e-08, "logits": -2.197909355163574, "logps": -94.17871856689453, "loss": 0.0175, "objective": 0.020089728757739067, "ranking_idealized": 0.5666666626930237, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5, "regularize": 0.020088987424969673, "step": 1535 }, { "dpo_loss": 0.6900218725204468, "epoch": 1.455003543028108, "grad_norm": 60.95051574524286, "learning_rate": 2.0932025748927014e-08, "logits": -2.0492825508117676, "logps": -90.19515228271484, "loss": 0.0156, "objective": 0.01438401360064745, "ranking_idealized": 0.6833333373069763, "ranking_idealized_expo": 0.5833333134651184, "ranking_simple": 0.5833333134651184, "regularize": 0.014382914640009403, "step": 1540 }, { "dpo_loss": 0.6942049264907837, "epoch": 1.4597275805054721, "grad_norm": 64.3944580161131, "learning_rate": 2.0597390236664474e-08, "logits": -2.1208300590515137, "logps": -92.43262481689453, "loss": 0.0188, "objective": 0.02223369851708412, "ranking_idealized": 0.5333333611488342, "ranking_idealized_expo": 0.4000000059604645, "ranking_simple": 0.4000000059604645, "regularize": 0.022232500836253166, "step": 1545 }, { "dpo_loss": 0.6913332343101501, "epoch": 1.464451617982836, "grad_norm": 65.89315791512014, "learning_rate": 2.026475591881906e-08, "logits": -2.080505609512329, "logps": -91.72013092041016, "loss": 0.0164, "objective": 0.015918172895908356, "ranking_idealized": 0.6333333253860474, "ranking_idealized_expo": 0.5666666626930237, "ranking_simple": 0.550000011920929, "regularize": 0.01591550186276436, "step": 1550 }, { "epoch": 1.464451617982836, "eval_dpo_loss": 0.6928287148475647, "eval_logits": -1.9898428916931152, "eval_logps": -98.48872375488281, "eval_loss": 0.009991789236664772, "eval_objective": 0.010131197050213814, "eval_ranking_idealized": 0.6022099256515503, "eval_ranking_idealized_expo": 0.5207182168960571, "eval_ranking_simple": 0.5179557800292969, "eval_regularize": 0.010128886438906193, "eval_runtime": 446.2677, "eval_samples_per_second": 12.974, "eval_steps_per_second": 3.245, "step": 1550 }, { "dpo_loss": 0.6896055936813354, "epoch": 1.4691756554602, "grad_norm": 72.03048326532462, "learning_rate": 1.9934145435079702e-08, "logits": -2.2838551998138428, "logps": -92.67180633544922, "loss": 0.0163, "objective": 0.01897108368575573, "ranking_idealized": 0.6333333253860474, "ranking_idealized_expo": 0.6000000238418579, "ranking_simple": 0.6000000238418579, "regularize": 0.018967188894748688, "step": 1555 }, { "dpo_loss": 0.6906958222389221, "epoch": 1.473899692937564, "grad_norm": 73.82372228393551, "learning_rate": 1.9605581287389632e-08, "logits": -2.126072645187378, "logps": -91.18004608154297, "loss": 0.017, "objective": 0.01644645445048809, "ranking_idealized": 0.5333333611488342, "ranking_idealized_expo": 0.4166666567325592, "ranking_simple": 0.4166666567325592, "regularize": 0.01644515059888363, "step": 1560 }, { "dpo_loss": 0.6889926791191101, "epoch": 1.478623730414928, "grad_norm": 64.09272915756699, "learning_rate": 1.92790858384147e-08, "logits": -2.13236927986145, "logps": -92.78182983398438, "loss": 0.0154, "objective": 0.016674092039465904, "ranking_idealized": 0.6833333373069763, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.5166666507720947, "regularize": 0.01667255535721779, "step": 1565 }, { "dpo_loss": 0.6913832426071167, "epoch": 1.483347767892292, "grad_norm": 66.43479163914742, "learning_rate": 1.895468131002143e-08, "logits": -2.0496039390563965, "logps": -92.02823638916016, "loss": 0.0135, "objective": 0.014378294348716736, "ranking_idealized": 0.6333333253860474, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.5333333611488342, "regularize": 0.014377345331013203, "step": 1570 }, { "dpo_loss": 0.6909292340278625, "epoch": 1.488071805369656, "grad_norm": 74.75415511259118, "learning_rate": 1.863238978176455e-08, "logits": -2.2580831050872803, "logps": -91.48379516601562, "loss": 0.014, "objective": 0.014301776885986328, "ranking_idealized": 0.6499999761581421, "ranking_idealized_expo": 0.5833333134651184, "ranking_simple": 0.5833333134651184, "regularize": 0.014300725422799587, "step": 1575 }, { "dpo_loss": 0.6893575191497803, "epoch": 1.4927958428470198, "grad_norm": 58.43183435208023, "learning_rate": 1.831223318938419e-08, "logits": -2.16597843170166, "logps": -93.69886016845703, "loss": 0.0152, "objective": 0.014309495687484741, "ranking_idealized": 0.550000011920929, "ranking_idealized_expo": 0.46666666865348816, "ranking_simple": 0.46666666865348816, "regularize": 0.014308045618236065, "step": 1580 }, { "dpo_loss": 0.6888726949691772, "epoch": 1.4975198803243839, "grad_norm": 73.21921475273949, "learning_rate": 1.7994233323312913e-08, "logits": -2.2151682376861572, "logps": -89.77079772949219, "loss": 0.0151, "objective": 0.017258943989872932, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.5833333134651184, "ranking_simple": 0.5833333134651184, "regularize": 0.01725638099014759, "step": 1585 }, { "dpo_loss": 0.6907024383544922, "epoch": 1.502243917801748, "grad_norm": 63.078892059144785, "learning_rate": 1.767841182719262e-08, "logits": -2.1972243785858154, "logps": -95.78870391845703, "loss": 0.0132, "objective": 0.011403498239815235, "ranking_idealized": 0.6166666746139526, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.5666666626930237, "regularize": 0.011402006261050701, "step": 1590 }, { "dpo_loss": 0.690780520439148, "epoch": 1.5069679552791118, "grad_norm": 72.75847192003988, "learning_rate": 1.7364790196401436e-08, "logits": -2.1997883319854736, "logps": -92.46896362304688, "loss": 0.0157, "objective": 0.017867466434836388, "ranking_idealized": 0.5833333134651184, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.5333333611488342, "regularize": 0.017865275964140892, "step": 1595 }, { "dpo_loss": 0.6890024542808533, "epoch": 1.511691992756476, "grad_norm": 88.44042595685, "learning_rate": 1.705338977659071e-08, "logits": -2.1227810382843018, "logps": -93.08589172363281, "loss": 0.0165, "objective": 0.013732160441577435, "ranking_idealized": 0.6166666746139526, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.550000011920929, "regularize": 0.013729160651564598, "step": 1600 }, { "epoch": 1.511691992756476, "eval_dpo_loss": 0.6928747296333313, "eval_logits": -1.9891741275787354, "eval_logps": -98.44180297851562, "eval_loss": 0.009673803113400936, "eval_objective": 0.009624399244785309, "eval_ranking_idealized": 0.6022099256515503, "eval_ranking_idealized_expo": 0.5207182168960571, "eval_ranking_simple": 0.5186464190483093, "eval_regularize": 0.009622092358767986, "eval_runtime": 451.0127, "eval_samples_per_second": 12.838, "eval_steps_per_second": 3.211, "step": 1600 }, { "dpo_loss": 0.6902530193328857, "epoch": 1.5164160302338399, "grad_norm": 71.99972413373813, "learning_rate": 1.6744231762232176e-08, "logits": -2.190404176712036, "logps": -92.08161163330078, "loss": 0.0141, "objective": 0.013264096342027187, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.46666666865348816, "ranking_simple": 0.46666666865348816, "regularize": 0.013262915425002575, "step": 1605 }, { "dpo_loss": 0.6898870468139648, "epoch": 1.5211400677112037, "grad_norm": 74.10537546046199, "learning_rate": 1.6437337195175428e-08, "logits": -2.19018816947937, "logps": -92.24858856201172, "loss": 0.0144, "objective": 0.014542266726493835, "ranking_idealized": 0.6666666865348816, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.550000011920929, "regularize": 0.014540906064212322, "step": 1610 }, { "dpo_loss": 0.6916466951370239, "epoch": 1.525864105188568, "grad_norm": 58.38920752518266, "learning_rate": 1.613272696321576e-08, "logits": -2.136929750442505, "logps": -92.16728973388672, "loss": 0.0139, "objective": 0.015932830050587654, "ranking_idealized": 0.6666666865348816, "ranking_idealized_expo": 0.6000000238418579, "ranking_simple": 0.6000000238418579, "regularize": 0.015931693837046623, "step": 1615 }, { "dpo_loss": 0.6908102631568909, "epoch": 1.5305881426659318, "grad_norm": 63.00313504444051, "learning_rate": 1.5830421798672566e-08, "logits": -2.118234872817993, "logps": -90.81103515625, "loss": 0.0158, "objective": 0.013052166439592838, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.46666666865348816, "ranking_simple": 0.46666666865348816, "regularize": 0.013051275163888931, "step": 1620 }, { "dpo_loss": 0.6895550489425659, "epoch": 1.5353121801432958, "grad_norm": 58.07839320165127, "learning_rate": 1.5530442276978155e-08, "logits": -2.139536142349243, "logps": -90.25867462158203, "loss": 0.0148, "objective": 0.014852885156869888, "ranking_idealized": 0.5833333134651184, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.5166666507720947, "regularize": 0.014851867221295834, "step": 1625 }, { "dpo_loss": 0.6881382465362549, "epoch": 1.54003621762066, "grad_norm": 68.39570526618726, "learning_rate": 1.523280881527743e-08, "logits": -2.058195114135742, "logps": -92.33999633789062, "loss": 0.0153, "objective": 0.018310803920030594, "ranking_idealized": 0.6333333253860474, "ranking_idealized_expo": 0.5666666626930237, "ranking_simple": 0.5666666626930237, "regularize": 0.018308250233530998, "step": 1630 }, { "dpo_loss": 0.6918675303459167, "epoch": 1.5447602550980237, "grad_norm": 71.28198542563501, "learning_rate": 1.4937541671038245e-08, "logits": -2.0989344120025635, "logps": -94.7214584350586, "loss": 0.0141, "objective": 0.016751030460000038, "ranking_idealized": 0.699999988079071, "ranking_idealized_expo": 0.6333333253860474, "ranking_simple": 0.6333333253860474, "regularize": 0.016748782247304916, "step": 1635 }, { "dpo_loss": 0.6916669011116028, "epoch": 1.5494842925753878, "grad_norm": 55.738628042601434, "learning_rate": 1.4644660940672625e-08, "logits": -2.154735803604126, "logps": -93.92298126220703, "loss": 0.0156, "objective": 0.012084761634469032, "ranking_idealized": 0.6333333253860474, "ranking_idealized_expo": 0.46666666865348816, "ranking_simple": 0.46666666865348816, "regularize": 0.01208337489515543, "step": 1640 }, { "dpo_loss": 0.6894612908363342, "epoch": 1.5542083300527518, "grad_norm": 62.99549337373731, "learning_rate": 1.435418655816899e-08, "logits": -2.169052839279175, "logps": -91.41073608398438, "loss": 0.0143, "objective": 0.014856117777526379, "ranking_idealized": 0.4833333194255829, "ranking_idealized_expo": 0.44999998807907104, "ranking_simple": 0.44999998807907104, "regularize": 0.014855272136628628, "step": 1645 }, { "dpo_loss": 0.6909356713294983, "epoch": 1.5589323675301157, "grad_norm": 67.0305140958766, "learning_rate": 1.4066138293735408e-08, "logits": -2.098741292953491, "logps": -92.3939208984375, "loss": 0.0128, "objective": 0.012405160814523697, "ranking_idealized": 0.6333333253860474, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.550000011920929, "regularize": 0.012403340078890324, "step": 1650 }, { "epoch": 1.5589323675301157, "eval_dpo_loss": 0.6927416324615479, "eval_logits": -1.9889006614685059, "eval_logps": -98.3604736328125, "eval_loss": 0.010028412565588951, "eval_objective": 0.010088582523167133, "eval_ranking_idealized": 0.6022099256515503, "eval_ranking_idealized_expo": 0.5207182168960571, "eval_ranking_simple": 0.5179557800292969, "eval_regularize": 0.010086326859891415, "eval_runtime": 446.4522, "eval_samples_per_second": 12.969, "eval_steps_per_second": 3.243, "step": 1650 }, { "dpo_loss": 0.6893053650856018, "epoch": 1.5636564050074797, "grad_norm": 63.18957665484938, "learning_rate": 1.3780535752453976e-08, "logits": -2.115309000015259, "logps": -92.01270294189453, "loss": 0.016, "objective": 0.01488524954766035, "ranking_idealized": 0.6166666746139526, "ranking_idealized_expo": 0.5666666626930237, "ranking_simple": 0.5666666626930237, "regularize": 0.014881560578942299, "step": 1655 }, { "dpo_loss": 0.6926845908164978, "epoch": 1.5683804424848438, "grad_norm": 58.824443635483924, "learning_rate": 1.34973983729465e-08, "logits": -2.0248100757598877, "logps": -91.5145263671875, "loss": 0.0155, "objective": 0.016196589916944504, "ranking_idealized": 0.5666666626930237, "ranking_idealized_expo": 0.4833333194255829, "ranking_simple": 0.4833333194255829, "regularize": 0.01619427278637886, "step": 1660 }, { "dpo_loss": 0.6904213428497314, "epoch": 1.5731044799622076, "grad_norm": 62.30456762761477, "learning_rate": 1.3216745426051451e-08, "logits": -2.12634015083313, "logps": -93.39350891113281, "loss": 0.0136, "objective": 0.012430812232196331, "ranking_idealized": 0.6166666746139526, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.5333333611488342, "regularize": 0.012427231296896935, "step": 1665 }, { "dpo_loss": 0.6912954449653625, "epoch": 1.5778285174395716, "grad_norm": 69.75551627833396, "learning_rate": 1.293859601351232e-08, "logits": -2.253553867340088, "logps": -94.33558654785156, "loss": 0.0154, "objective": 0.015208045952022076, "ranking_idealized": 0.6166666746139526, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.5166666507720947, "regularize": 0.015205765143036842, "step": 1670 }, { "dpo_loss": 0.6908196806907654, "epoch": 1.5825525549169357, "grad_norm": 66.72338681583422, "learning_rate": 1.266296906667762e-08, "logits": -2.232628107070923, "logps": -92.84732818603516, "loss": 0.0154, "objective": 0.014128237962722778, "ranking_idealized": 0.4166666567325592, "ranking_idealized_expo": 0.4166666567325592, "ranking_simple": 0.4166666567325592, "regularize": 0.014126955531537533, "step": 1675 }, { "dpo_loss": 0.6899545192718506, "epoch": 1.5872765923942995, "grad_norm": 63.2891144846116, "learning_rate": 1.238988334521226e-08, "logits": -2.1890323162078857, "logps": -95.2237777709961, "loss": 0.0152, "objective": 0.014830714091658592, "ranking_idealized": 0.550000011920929, "ranking_idealized_expo": 0.4833333194255829, "ranking_simple": 0.4833333194255829, "regularize": 0.014829723164439201, "step": 1680 }, { "dpo_loss": 0.6891339421272278, "epoch": 1.5920006298716638, "grad_norm": 59.6793164031309, "learning_rate": 1.2119357435820816e-08, "logits": -2.1683380603790283, "logps": -89.5069580078125, "loss": 0.0118, "objective": 0.01023172028362751, "ranking_idealized": 0.46666666865348816, "ranking_idealized_expo": 0.3166666626930237, "ranking_simple": 0.3166666626930237, "regularize": 0.010230082087218761, "step": 1685 }, { "dpo_loss": 0.6914657354354858, "epoch": 1.5967246673490276, "grad_norm": 61.16970720231615, "learning_rate": 1.1851409750982438e-08, "logits": -2.128115653991699, "logps": -92.26956176757812, "loss": 0.013, "objective": 0.013722400180995464, "ranking_idealized": 0.699999988079071, "ranking_idealized_expo": 0.5833333134651184, "ranking_simple": 0.6000000238418579, "regularize": 0.01371851097792387, "step": 1690 }, { "dpo_loss": 0.6910730600357056, "epoch": 1.6014487048263917, "grad_norm": 70.19949003863668, "learning_rate": 1.1586058527697707e-08, "logits": -2.1930956840515137, "logps": -91.90154266357422, "loss": 0.0136, "objective": 0.01538047008216381, "ranking_idealized": 0.6666666865348816, "ranking_idealized_expo": 0.6000000238418579, "ranking_simple": 0.6000000238418579, "regularize": 0.015379803255200386, "step": 1695 }, { "dpo_loss": 0.6901703476905823, "epoch": 1.6061727423037557, "grad_norm": 68.99239991969064, "learning_rate": 1.1323321826247346e-08, "logits": -2.1464996337890625, "logps": -93.59435272216797, "loss": 0.0132, "objective": 0.014362351037561893, "ranking_idealized": 0.6333333253860474, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.5166666507720947, "regularize": 0.014360878616571426, "step": 1700 }, { "epoch": 1.6061727423037557, "eval_dpo_loss": 0.6927831768989563, "eval_logits": -1.9890520572662354, "eval_logps": -98.40552520751953, "eval_loss": 0.008960912004113197, "eval_objective": 0.008905092254281044, "eval_ranking_idealized": 0.6022099256515503, "eval_ranking_idealized_expo": 0.5207182168960571, "eval_ranking_simple": 0.5179557800292969, "eval_regularize": 0.008902397006750107, "eval_runtime": 446.3386, "eval_samples_per_second": 12.972, "eval_steps_per_second": 3.244, "step": 1700 }, { "dpo_loss": 0.6905626654624939, "epoch": 1.6108967797811196, "grad_norm": 61.071862990680025, "learning_rate": 1.1063217528963042e-08, "logits": -2.1819908618927, "logps": -95.61785125732422, "loss": 0.0138, "objective": 0.014233703725039959, "ranking_idealized": 0.5833333134651184, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.5, "regularize": 0.014231013134121895, "step": 1705 }, { "dpo_loss": 0.6933135986328125, "epoch": 1.6156208172584836, "grad_norm": 65.29758946742726, "learning_rate": 1.0805763339010326e-08, "logits": -2.13582444190979, "logps": -92.55400848388672, "loss": 0.0135, "objective": 0.014589487574994564, "ranking_idealized": 0.6833333373069763, "ranking_idealized_expo": 0.6499999761581421, "ranking_simple": 0.6499999761581421, "regularize": 0.014586606994271278, "step": 1710 }, { "dpo_loss": 0.6906417608261108, "epoch": 1.6203448547358477, "grad_norm": 65.78620291334798, "learning_rate": 1.0550976779183651e-08, "logits": -2.1200928688049316, "logps": -95.07630920410156, "loss": 0.0136, "objective": 0.014620447531342506, "ranking_idealized": 0.5833333134651184, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.5166666507720947, "regularize": 0.014618877321481705, "step": 1715 }, { "dpo_loss": 0.6903366446495056, "epoch": 1.6250688922132115, "grad_norm": 61.420985867018985, "learning_rate": 1.02988751907138e-08, "logits": -2.196997880935669, "logps": -89.81111145019531, "loss": 0.0124, "objective": 0.01165497861802578, "ranking_idealized": 0.6499999761581421, "ranking_idealized_expo": 0.5666666626930237, "ranking_simple": 0.5666666626930237, "regularize": 0.011652585119009018, "step": 1720 }, { "dpo_loss": 0.6910706758499146, "epoch": 1.6297929296905755, "grad_norm": 69.65328151335441, "learning_rate": 1.0049475732087559e-08, "logits": -2.1551551818847656, "logps": -92.8335952758789, "loss": 0.0137, "objective": 0.011478899046778679, "ranking_idealized": 0.5666666626930237, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5, "regularize": 0.0114761833101511, "step": 1725 }, { "dpo_loss": 0.6897426247596741, "epoch": 1.6345169671679396, "grad_norm": 70.43018567128607, "learning_rate": 9.802795377879903e-09, "logits": -2.243594169616699, "logps": -91.26370239257812, "loss": 0.0128, "objective": 0.012597540393471718, "ranking_idealized": 0.5833333134651184, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.5333333611488342, "regularize": 0.012595159001648426, "step": 1730 }, { "dpo_loss": 0.6892535090446472, "epoch": 1.6392410046453034, "grad_norm": 63.82988388608345, "learning_rate": 9.558850917598716e-09, "logits": -2.070333957672119, "logps": -92.88044738769531, "loss": 0.0123, "objective": 0.011261907406151295, "ranking_idealized": 0.5166666507720947, "ranking_idealized_expo": 0.4333333373069763, "ranking_simple": 0.44999998807907104, "regularize": 0.011259406805038452, "step": 1735 }, { "dpo_loss": 0.6904967427253723, "epoch": 1.6439650421226675, "grad_norm": 57.01783294970417, "learning_rate": 9.31765895454199e-09, "logits": -2.178701877593994, "logps": -92.82861328125, "loss": 0.0128, "objective": 0.013563827611505985, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.5666666626930237, "ranking_simple": 0.5666666626930237, "regularize": 0.013562088832259178, "step": 1740 }, { "dpo_loss": 0.6911302208900452, "epoch": 1.6486890796000315, "grad_norm": 67.14492805040307, "learning_rate": 9.079235904667825e-09, "logits": -2.160048484802246, "logps": -97.07341766357422, "loss": 0.0132, "objective": 0.011855502612888813, "ranking_idealized": 0.6499999761581421, "ranking_idealized_expo": 0.5833333134651184, "ranking_simple": 0.5833333134651184, "regularize": 0.011852098628878593, "step": 1745 }, { "dpo_loss": 0.6899723410606384, "epoch": 1.6534131170773954, "grad_norm": 61.803443023637456, "learning_rate": 8.84359799547712e-09, "logits": -2.1978349685668945, "logps": -92.97909545898438, "loss": 0.0133, "objective": 0.01489060465246439, "ranking_idealized": 0.5166666507720947, "ranking_idealized_expo": 0.44999998807907104, "ranking_simple": 0.44999998807907104, "regularize": 0.014889102429151535, "step": 1750 }, { "epoch": 1.6534131170773954, "eval_dpo_loss": 0.6928050518035889, "eval_logits": -1.988478183746338, "eval_logps": -98.41736602783203, "eval_loss": 0.009351465851068497, "eval_objective": 0.009363526478409767, "eval_ranking_idealized": 0.6022099256515503, "eval_ranking_idealized_expo": 0.5207182168960571, "eval_ranking_simple": 0.5179557800292969, "eval_regularize": 0.009361328557133675, "eval_runtime": 445.0354, "eval_samples_per_second": 13.01, "eval_steps_per_second": 3.254, "step": 1750 }, { "dpo_loss": 0.6889625787734985, "epoch": 1.6581371545547596, "grad_norm": 69.19823709150691, "learning_rate": 8.6107612649091e-09, "logits": -2.17798113822937, "logps": -90.93826293945312, "loss": 0.0136, "objective": 0.014197876676917076, "ranking_idealized": 0.5166666507720947, "ranking_idealized_expo": 0.44999998807907104, "ranking_simple": 0.4333333373069763, "regularize": 0.014196816831827164, "step": 1755 }, { "dpo_loss": 0.6894813776016235, "epoch": 1.6628611920321235, "grad_norm": 70.33904785688647, "learning_rate": 8.380741560249726e-09, "logits": -2.2228808403015137, "logps": -91.38529205322266, "loss": 0.0136, "objective": 0.0157302338629961, "ranking_idealized": 0.550000011920929, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.5166666507720947, "regularize": 0.015728596597909927, "step": 1760 }, { "dpo_loss": 0.6904619336128235, "epoch": 1.6675852295094873, "grad_norm": 66.62367568306153, "learning_rate": 8.153554537053149e-09, "logits": -2.1843998432159424, "logps": -91.0223159790039, "loss": 0.0144, "objective": 0.012193134985864162, "ranking_idealized": 0.5833333134651184, "ranking_idealized_expo": 0.46666666865348816, "ranking_simple": 0.46666666865348816, "regularize": 0.01219152845442295, "step": 1765 }, { "dpo_loss": 0.691467821598053, "epoch": 1.6723092669868516, "grad_norm": 63.63343895864985, "learning_rate": 7.929215658076093e-09, "logits": -2.2059359550476074, "logps": -92.13400268554688, "loss": 0.0114, "objective": 0.01266545057296753, "ranking_idealized": 0.6166666746139526, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5, "regularize": 0.012664331123232841, "step": 1770 }, { "dpo_loss": 0.6895557045936584, "epoch": 1.6770333044642154, "grad_norm": 61.16013319033806, "learning_rate": 7.707740192225515e-09, "logits": -2.257197380065918, "logps": -93.42992401123047, "loss": 0.0128, "objective": 0.014360646717250347, "ranking_idealized": 0.6666666865348816, "ranking_idealized_expo": 0.6666666865348816, "ranking_simple": 0.6666666865348816, "regularize": 0.014359161257743835, "step": 1775 }, { "dpo_loss": 0.691416323184967, "epoch": 1.6817573419415794, "grad_norm": 68.36063699136817, "learning_rate": 7.4891432135193e-09, "logits": -2.251347303390503, "logps": -91.27902221679688, "loss": 0.0141, "objective": 0.013033194467425346, "ranking_idealized": 0.6666666865348816, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.5333333611488342, "regularize": 0.013032082468271255, "step": 1780 }, { "dpo_loss": 0.6915625929832458, "epoch": 1.6864813794189435, "grad_norm": 64.60918615534963, "learning_rate": 7.273439600060344e-09, "logits": -2.165839195251465, "logps": -93.43399810791016, "loss": 0.0158, "objective": 0.016496647149324417, "ranking_idealized": 0.6333333253860474, "ranking_idealized_expo": 0.5666666626930237, "ranking_simple": 0.550000011920929, "regularize": 0.016495179384946823, "step": 1785 }, { "dpo_loss": 0.6900497078895569, "epoch": 1.6912054168963073, "grad_norm": 59.4845336366133, "learning_rate": 7.060644033023894e-09, "logits": -2.1421496868133545, "logps": -89.32416534423828, "loss": 0.0132, "objective": 0.011856761761009693, "ranking_idealized": 0.5666666626930237, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5, "regularize": 0.011854317970573902, "step": 1790 }, { "dpo_loss": 0.6918495297431946, "epoch": 1.6959294543736714, "grad_norm": 62.10968860121513, "learning_rate": 6.850770995658372e-09, "logits": -2.145258665084839, "logps": -91.16167449951172, "loss": 0.0135, "objective": 0.011840968392789364, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.5333333611488342, "regularize": 0.011839687824249268, "step": 1795 }, { "dpo_loss": 0.6910142302513123, "epoch": 1.7006534918510354, "grad_norm": 63.84635783884053, "learning_rate": 6.6438347722995445e-09, "logits": -2.2468461990356445, "logps": -91.39185333251953, "loss": 0.0138, "objective": 0.010580122470855713, "ranking_idealized": 0.5, "ranking_idealized_expo": 0.44999998807907104, "ranking_simple": 0.44999998807907104, "regularize": 0.010576292872428894, "step": 1800 }, { "epoch": 1.7006534918510354, "eval_dpo_loss": 0.6928184628486633, "eval_logits": -1.9885612726211548, "eval_logps": -98.35978698730469, "eval_loss": 0.009589685127139091, "eval_objective": 0.009668215177953243, "eval_ranking_idealized": 0.6022099256515503, "eval_ranking_idealized_expo": 0.5207182168960571, "eval_ranking_simple": 0.5179557800292969, "eval_regularize": 0.00966621097177267, "eval_runtime": 448.9142, "eval_samples_per_second": 12.898, "eval_steps_per_second": 3.226, "step": 1800 }, { "dpo_loss": 0.6905153393745422, "epoch": 1.7053775293283993, "grad_norm": 69.0367318972444, "learning_rate": 6.43984944739836e-09, "logits": -2.206124782562256, "logps": -93.37840270996094, "loss": 0.0133, "objective": 0.013736736960709095, "ranking_idealized": 0.5333333611488342, "ranking_idealized_expo": 0.4833333194255829, "ranking_simple": 0.4833333194255829, "regularize": 0.013735477812588215, "step": 1805 }, { "dpo_loss": 0.6886168718338013, "epoch": 1.7101015668057633, "grad_norm": 60.061121059879774, "learning_rate": 6.238828904562315e-09, "logits": -2.238112449645996, "logps": -93.0470199584961, "loss": 0.0146, "objective": 0.015197351574897766, "ranking_idealized": 0.6666666865348816, "ranking_idealized_expo": 0.6666666865348816, "ranking_simple": 0.6666666865348816, "regularize": 0.015195462852716446, "step": 1810 }, { "dpo_loss": 0.6895704865455627, "epoch": 1.7148256042831274, "grad_norm": 62.370319050099795, "learning_rate": 6.040786825610517e-09, "logits": -2.18115234375, "logps": -92.52540588378906, "loss": 0.0127, "objective": 0.015414653345942497, "ranking_idealized": 0.5833333134651184, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5, "regularize": 0.015413584187626839, "step": 1815 }, { "dpo_loss": 0.6913415193557739, "epoch": 1.7195496417604912, "grad_norm": 68.7151840413099, "learning_rate": 5.845736689642472e-09, "logits": -2.2472267150878906, "logps": -93.7915267944336, "loss": 0.015, "objective": 0.01595698669552803, "ranking_idealized": 0.5833333134651184, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.5166666507720947, "regularize": 0.01595553196966648, "step": 1820 }, { "dpo_loss": 0.6893234848976135, "epoch": 1.7242736792378552, "grad_norm": 61.62237599260527, "learning_rate": 5.653691772120672e-09, "logits": -2.2202160358428955, "logps": -91.71192932128906, "loss": 0.0125, "objective": 0.013099589385092258, "ranking_idealized": 0.699999988079071, "ranking_idealized_expo": 0.6333333253860474, "ranking_simple": 0.6333333253860474, "regularize": 0.013098122552037239, "step": 1825 }, { "dpo_loss": 0.6918686032295227, "epoch": 1.7289977167152193, "grad_norm": 73.26853232568251, "learning_rate": 5.464665143967051e-09, "logits": -2.095928907394409, "logps": -92.31403350830078, "loss": 0.0132, "objective": 0.014795850031077862, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.4833333194255829, "ranking_simple": 0.46666666865348816, "regularize": 0.01479416061192751, "step": 1830 }, { "dpo_loss": 0.6898643374443054, "epoch": 1.7337217541925831, "grad_norm": 58.513985312428574, "learning_rate": 5.278669670673347e-09, "logits": -2.1115658283233643, "logps": -91.03611755371094, "loss": 0.0117, "objective": 0.0112331947311759, "ranking_idealized": 0.699999988079071, "ranking_idealized_expo": 0.6000000238418579, "ranking_simple": 0.6000000238418579, "regularize": 0.011231665499508381, "step": 1835 }, { "dpo_loss": 0.6913903951644897, "epoch": 1.7384457916699474, "grad_norm": 63.11876863535166, "learning_rate": 5.095718011425454e-09, "logits": -2.168307304382324, "logps": -91.80027770996094, "loss": 0.0135, "objective": 0.014516009949147701, "ranking_idealized": 0.6499999761581421, "ranking_idealized_expo": 0.5666666626930237, "ranking_simple": 0.5666666626930237, "regularize": 0.01451488770544529, "step": 1840 }, { "dpo_loss": 0.689866304397583, "epoch": 1.7431698291473112, "grad_norm": 72.43548684342062, "learning_rate": 4.9158226182418104e-09, "logits": -2.1879196166992188, "logps": -88.85045623779297, "loss": 0.0151, "objective": 0.014930271543562412, "ranking_idealized": 0.550000011920929, "ranking_idealized_expo": 0.4833333194255829, "ranking_simple": 0.4833333194255829, "regularize": 0.014927973970770836, "step": 1845 }, { "dpo_loss": 0.689470648765564, "epoch": 1.7478938666246753, "grad_norm": 65.9250612110063, "learning_rate": 4.738995735125894e-09, "logits": -2.074735164642334, "logps": -94.42037963867188, "loss": 0.0122, "objective": 0.011366092599928379, "ranking_idealized": 0.6833333373069763, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.5333333611488342, "regularize": 0.011363506317138672, "step": 1850 }, { "epoch": 1.7478938666246753, "eval_dpo_loss": 0.6928740739822388, "eval_logits": -1.9887943267822266, "eval_logps": -98.4156723022461, "eval_loss": 0.009023972786962986, "eval_objective": 0.00906344410032034, "eval_ranking_idealized": 0.6022099256515503, "eval_ranking_idealized_expo": 0.5207182168960571, "eval_ranking_simple": 0.5179557800292969, "eval_regularize": 0.009061108343303204, "eval_runtime": 445.128, "eval_samples_per_second": 13.007, "eval_steps_per_second": 3.253, "step": 1850 }, { "dpo_loss": 0.6925438046455383, "epoch": 1.7526179041020393, "grad_norm": 60.51205917887876, "learning_rate": 4.565249397232923e-09, "logits": -2.1302504539489746, "logps": -89.6479263305664, "loss": 0.0119, "objective": 0.012942561879754066, "ranking_idealized": 0.6666666865348816, "ranking_idealized_expo": 0.6333333253860474, "ranking_simple": 0.6333333253860474, "regularize": 0.012941225431859493, "step": 1855 }, { "dpo_loss": 0.6919233798980713, "epoch": 1.7573419415794032, "grad_norm": 73.74409349566294, "learning_rate": 4.394595430050613e-09, "logits": -2.1312899589538574, "logps": -92.23778533935547, "loss": 0.0134, "objective": 0.013376330956816673, "ranking_idealized": 0.6166666746139526, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5, "regularize": 0.013374886475503445, "step": 1860 }, { "dpo_loss": 0.6894384026527405, "epoch": 1.7620659790567672, "grad_norm": 62.88647078101833, "learning_rate": 4.2270454485944125e-09, "logits": -2.0558435916900635, "logps": -91.38388061523438, "loss": 0.0107, "objective": 0.010976298712193966, "ranking_idealized": 0.7666666507720947, "ranking_idealized_expo": 0.6666666865348816, "ranking_simple": 0.6666666865348816, "regularize": 0.010971426963806152, "step": 1865 }, { "dpo_loss": 0.6887921094894409, "epoch": 1.7667900165341313, "grad_norm": 87.932062525531, "learning_rate": 4.062610856616922e-09, "logits": -2.1452383995056152, "logps": -92.77478790283203, "loss": 0.013, "objective": 0.017829876393079758, "ranking_idealized": 0.550000011920929, "ranking_idealized_expo": 0.44999998807907104, "ranking_simple": 0.44999998807907104, "regularize": 0.017827244475483894, "step": 1870 }, { "dpo_loss": 0.6901695132255554, "epoch": 1.771514054011495, "grad_norm": 59.56034569340847, "learning_rate": 3.901302845831728e-09, "logits": -2.1918883323669434, "logps": -91.66542053222656, "loss": 0.0103, "objective": 0.00952277984470129, "ranking_idealized": 0.6166666746139526, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.550000011920929, "regularize": 0.009518155828118324, "step": 1875 }, { "dpo_loss": 0.6916844844818115, "epoch": 1.7762380914888591, "grad_norm": 59.31587765757665, "learning_rate": 3.743132395151705e-09, "logits": -2.1958460807800293, "logps": -92.2247085571289, "loss": 0.0132, "objective": 0.014002182520925999, "ranking_idealized": 0.5166666507720947, "ranking_idealized_expo": 0.44999998807907104, "ranking_simple": 0.44999998807907104, "regularize": 0.014000624418258667, "step": 1880 }, { "dpo_loss": 0.6910093426704407, "epoch": 1.7809621289662232, "grad_norm": 65.01445310326804, "learning_rate": 3.5881102699417463e-09, "logits": -2.15521240234375, "logps": -92.9655990600586, "loss": 0.0122, "objective": 0.013613136485219002, "ranking_idealized": 0.6166666746139526, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5, "regularize": 0.013612000271677971, "step": 1885 }, { "dpo_loss": 0.688242495059967, "epoch": 1.785686166443587, "grad_norm": 68.21241149596634, "learning_rate": 3.4362470212860483e-09, "logits": -2.2397677898406982, "logps": -92.33454132080078, "loss": 0.0101, "objective": 0.011641601100564003, "ranking_idealized": 0.6833333373069763, "ranking_idealized_expo": 0.6000000238418579, "ranking_simple": 0.6000000238418579, "regularize": 0.011638239957392216, "step": 1890 }, { "dpo_loss": 0.691038966178894, "epoch": 1.790410203920951, "grad_norm": 68.20823651098704, "learning_rate": 3.2875529852700148e-09, "logits": -2.2314653396606445, "logps": -86.68601989746094, "loss": 0.0131, "objective": 0.01244689617305994, "ranking_idealized": 0.5, "ranking_idealized_expo": 0.4333333373069763, "ranking_simple": 0.4333333373069763, "regularize": 0.012444679625332355, "step": 1895 }, { "dpo_loss": 0.6882398128509521, "epoch": 1.7951342413983151, "grad_norm": 74.00426423645611, "learning_rate": 3.142038282276732e-09, "logits": -2.1504063606262207, "logps": -92.22313690185547, "loss": 0.0128, "objective": 0.013669600710272789, "ranking_idealized": 0.6333333253860474, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.550000011920929, "regularize": 0.013666595332324505, "step": 1900 }, { "epoch": 1.7951342413983151, "eval_dpo_loss": 0.6928610801696777, "eval_logits": -1.9891064167022705, "eval_logps": -98.42906951904297, "eval_loss": 0.008886425755918026, "eval_objective": 0.0089500043541193, "eval_ranking_idealized": 0.6022099256515503, "eval_ranking_idealized_expo": 0.5207182168960571, "eval_ranking_simple": 0.5179557800292969, "eval_regularize": 0.008947666734457016, "eval_runtime": 445.1268, "eval_samples_per_second": 13.008, "eval_steps_per_second": 3.253, "step": 1900 }, { "dpo_loss": 0.6886274218559265, "epoch": 1.799858278875679, "grad_norm": 70.48280427657417, "learning_rate": 2.9997128162981835e-09, "logits": -2.061340808868408, "logps": -92.88438415527344, "loss": 0.0124, "objective": 0.011484592221677303, "ranking_idealized": 0.6166666746139526, "ranking_idealized_expo": 0.5666666626930237, "ranking_simple": 0.5666666626930237, "regularize": 0.01148195844143629, "step": 1905 }, { "dpo_loss": 0.6910132169723511, "epoch": 1.8045823163530432, "grad_norm": 69.16766112778924, "learning_rate": 2.8605862742611453e-09, "logits": -2.2064709663391113, "logps": -93.21188354492188, "loss": 0.0131, "objective": 0.013887956738471985, "ranking_idealized": 0.6333333253860474, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5, "regularize": 0.013886776752769947, "step": 1910 }, { "dpo_loss": 0.6885823011398315, "epoch": 1.809306353830407, "grad_norm": 60.392467741599056, "learning_rate": 2.724668125367896e-09, "logits": -2.063930034637451, "logps": -93.89399719238281, "loss": 0.0118, "objective": 0.012090322561562061, "ranking_idealized": 0.5, "ranking_idealized_expo": 0.46666666865348816, "ranking_simple": 0.46666666865348816, "regularize": 0.012088621035218239, "step": 1915 }, { "dpo_loss": 0.691336452960968, "epoch": 1.8140303913077709, "grad_norm": 64.14689736170588, "learning_rate": 2.591967620451707e-09, "logits": -2.2195615768432617, "logps": -95.23002624511719, "loss": 0.0126, "objective": 0.011388556100428104, "ranking_idealized": 0.6499999761581421, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.550000011920929, "regularize": 0.011385311372578144, "step": 1920 }, { "dpo_loss": 0.6888495683670044, "epoch": 1.8187544287851352, "grad_norm": 61.363679552810055, "learning_rate": 2.462493791347231e-09, "logits": -2.1906003952026367, "logps": -93.10894012451172, "loss": 0.0109, "objective": 0.012360634282231331, "ranking_idealized": 0.6666666865348816, "ranking_idealized_expo": 0.5666666626930237, "ranking_simple": 0.5666666626930237, "regularize": 0.012358280830085278, "step": 1925 }, { "dpo_loss": 0.6920725107192993, "epoch": 1.823478466262499, "grad_norm": 62.3361060804096, "learning_rate": 2.3362554502757536e-09, "logits": -2.174255132675171, "logps": -91.97770690917969, "loss": 0.0133, "objective": 0.0109206298366189, "ranking_idealized": 0.6499999761581421, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.5166666507720947, "regularize": 0.010918223299086094, "step": 1930 }, { "dpo_loss": 0.6925593614578247, "epoch": 1.828202503739863, "grad_norm": 62.11529994850697, "learning_rate": 2.213261189245458e-09, "logits": -2.0892937183380127, "logps": -95.28164672851562, "loss": 0.0128, "objective": 0.0141153484582901, "ranking_idealized": 0.7166666388511658, "ranking_idealized_expo": 0.6166666746139526, "ranking_simple": 0.6333333253860474, "regularize": 0.01411362923681736, "step": 1935 }, { "dpo_loss": 0.6907196044921875, "epoch": 1.832926541217227, "grad_norm": 75.31073605655762, "learning_rate": 2.093519379466602e-09, "logits": -2.219423770904541, "logps": -93.87139129638672, "loss": 0.0136, "objective": 0.012250066734850407, "ranking_idealized": 0.6666666865348816, "ranking_idealized_expo": 0.6333333253860474, "ranking_simple": 0.6333333253860474, "regularize": 0.01224894542247057, "step": 1940 }, { "dpo_loss": 0.6914030909538269, "epoch": 1.837650578694591, "grad_norm": 59.36851585608135, "learning_rate": 1.9770381707817696e-09, "logits": -2.181976318359375, "logps": -89.93838500976562, "loss": 0.014, "objective": 0.01064326148480177, "ranking_idealized": 0.7333333492279053, "ranking_idealized_expo": 0.6000000238418579, "ranking_simple": 0.6000000238418579, "regularize": 0.010639534331858158, "step": 1945 }, { "dpo_loss": 0.6927531361579895, "epoch": 1.842374616171955, "grad_norm": 62.813994604552214, "learning_rate": 1.8638254911111816e-09, "logits": -2.1365416049957275, "logps": -93.81517028808594, "loss": 0.0133, "objective": 0.016254087910056114, "ranking_idealized": 0.6499999761581421, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.5166666507720947, "regularize": 0.01625274494290352, "step": 1950 }, { "epoch": 1.842374616171955, "eval_dpo_loss": 0.6928887367248535, "eval_logits": -1.9892430305480957, "eval_logps": -98.45298767089844, "eval_loss": 0.008893881924450397, "eval_objective": 0.008952551521360874, "eval_ranking_idealized": 0.6022099256515503, "eval_ranking_idealized_expo": 0.5207182168960571, "eval_ranking_simple": 0.5179557800292969, "eval_regularize": 0.008950230665504932, "eval_runtime": 445.189, "eval_samples_per_second": 13.006, "eval_steps_per_second": 3.253, "step": 1950 }, { "dpo_loss": 0.6905800104141235, "epoch": 1.847098653649319, "grad_norm": 62.33667060485866, "learning_rate": 1.7538890459131094e-09, "logits": -2.0535969734191895, "logps": -92.17353057861328, "loss": 0.0125, "objective": 0.0103539377450943, "ranking_idealized": 0.6166666746139526, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.5333333611488342, "regularize": 0.010349558666348457, "step": 1955 }, { "dpo_loss": 0.6896222233772278, "epoch": 1.8518226911266829, "grad_norm": 63.19798951059373, "learning_rate": 1.647236317659423e-09, "logits": -2.209256172180176, "logps": -90.65841674804688, "loss": 0.0117, "objective": 0.012652536854147911, "ranking_idealized": 0.699999988079071, "ranking_idealized_expo": 0.5833333134651184, "ranking_simple": 0.5833333134651184, "regularize": 0.012651127763092518, "step": 1960 }, { "dpo_loss": 0.6895377039909363, "epoch": 1.856546728604047, "grad_norm": 72.48024831454246, "learning_rate": 1.5438745653263086e-09, "logits": -2.0985636711120605, "logps": -91.15563201904297, "loss": 0.0112, "objective": 0.009469871409237385, "ranking_idealized": 0.6499999761581421, "ranking_idealized_expo": 0.5833333134651184, "ranking_simple": 0.5833333134651184, "regularize": 0.009467961266636848, "step": 1965 }, { "dpo_loss": 0.6904885172843933, "epoch": 1.861270766081411, "grad_norm": 58.60723650611681, "learning_rate": 1.4438108239002322e-09, "logits": -2.138218641281128, "logps": -93.22808074951172, "loss": 0.013, "objective": 0.013735007494688034, "ranking_idealized": 0.5666666626930237, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.5166666507720947, "regularize": 0.01373241189867258, "step": 1970 }, { "dpo_loss": 0.6905199885368347, "epoch": 1.8659948035587748, "grad_norm": 75.78083265362716, "learning_rate": 1.3470519038991268e-09, "logits": -2.1427910327911377, "logps": -94.4820556640625, "loss": 0.0133, "objective": 0.016799870878458023, "ranking_idealized": 0.6166666746139526, "ranking_idealized_expo": 0.5666666626930237, "ranking_simple": 0.5666666626930237, "regularize": 0.016797440126538277, "step": 1975 }, { "dpo_loss": 0.692724347114563, "epoch": 1.8707188410361388, "grad_norm": 67.62639102220639, "learning_rate": 1.253604390908819e-09, "logits": -2.068427562713623, "logps": -91.92679595947266, "loss": 0.0118, "objective": 0.012069400399923325, "ranking_idealized": 0.6333333253860474, "ranking_idealized_expo": 0.5833333134651184, "ranking_simple": 0.5833333134651184, "regularize": 0.012066869996488094, "step": 1980 }, { "dpo_loss": 0.6905953288078308, "epoch": 1.8754428785135029, "grad_norm": 67.47097990851285, "learning_rate": 1.1634746451348487e-09, "logits": -2.1265487670898438, "logps": -91.03251647949219, "loss": 0.0117, "objective": 0.007730665151029825, "ranking_idealized": 0.5666666626930237, "ranking_idealized_expo": 0.4833333194255829, "ranking_simple": 0.4833333194255829, "regularize": 0.007728379685431719, "step": 1985 }, { "dpo_loss": 0.6893682479858398, "epoch": 1.8801669159908667, "grad_norm": 66.59499294449671, "learning_rate": 1.0766688009695545e-09, "logits": -2.200402021408081, "logps": -91.0616226196289, "loss": 0.0121, "objective": 0.011565645225346088, "ranking_idealized": 0.699999988079071, "ranking_idealized_expo": 0.6499999761581421, "ranking_simple": 0.6499999761581421, "regularize": 0.01156419888138771, "step": 1990 }, { "dpo_loss": 0.6916558742523193, "epoch": 1.884890953468231, "grad_norm": 59.74243706815504, "learning_rate": 9.931927665745521e-10, "logits": -2.1524574756622314, "logps": -90.13037109375, "loss": 0.0111, "objective": 0.012568699195981026, "ranking_idealized": 0.550000011920929, "ranking_idealized_expo": 0.4000000059604645, "ranking_simple": 0.4000000059604645, "regularize": 0.012566999532282352, "step": 1995 }, { "dpo_loss": 0.6858018636703491, "epoch": 1.8896149909455948, "grad_norm": 67.18519308744443, "learning_rate": 9.130522234786498e-10, "logits": -2.1586594581604004, "logps": -87.96702575683594, "loss": 0.012, "objective": 0.01719100959599018, "ranking_idealized": 0.6166666746139526, "ranking_idealized_expo": 0.5666666626930237, "ranking_simple": 0.5666666626930237, "regularize": 0.017189564183354378, "step": 2000 }, { "epoch": 1.8896149909455948, "eval_dpo_loss": 0.6928969025611877, "eval_logits": -1.9894063472747803, "eval_logps": -98.4583511352539, "eval_loss": 0.008732160553336143, "eval_objective": 0.008810975588858128, "eval_ranking_idealized": 0.6022099256515503, "eval_ranking_idealized_expo": 0.5207182168960571, "eval_ranking_simple": 0.5179557800292969, "eval_regularize": 0.008808653801679611, "eval_runtime": 446.6812, "eval_samples_per_second": 12.962, "eval_steps_per_second": 3.242, "step": 2000 }, { "dpo_loss": 0.6894116997718811, "epoch": 1.8943390284229589, "grad_norm": 63.88750093236265, "learning_rate": 8.36252626191103e-10, "logits": -2.1512064933776855, "logps": -90.55884552001953, "loss": 0.0112, "objective": 0.011560056358575821, "ranking_idealized": 0.5833333134651184, "ranking_idealized_expo": 0.4333333373069763, "ranking_simple": 0.4333333373069763, "regularize": 0.011556769721210003, "step": 2005 }, { "dpo_loss": 0.691102147102356, "epoch": 1.899063065900323, "grad_norm": 66.23415561126623, "learning_rate": 7.627992018304163e-10, "logits": -2.1225428581237793, "logps": -90.3371810913086, "loss": 0.012, "objective": 0.009777167811989784, "ranking_idealized": 0.5333333611488342, "ranking_idealized_expo": 0.38333332538604736, "ranking_simple": 0.38333332538604736, "regularize": 0.009775066748261452, "step": 2010 }, { "dpo_loss": 0.6881377100944519, "epoch": 1.9037871033776868, "grad_norm": 69.19769212044956, "learning_rate": 6.926969497685397e-10, "logits": -2.1484780311584473, "logps": -90.53960418701172, "loss": 0.0128, "objective": 0.011675039306282997, "ranking_idealized": 0.550000011920929, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.5166666507720947, "regularize": 0.01167262066155672, "step": 2015 }, { "dpo_loss": 0.6911852955818176, "epoch": 1.9085111408550508, "grad_norm": 54.24867081294276, "learning_rate": 6.259506412906401e-10, "logits": -2.036067008972168, "logps": -94.92063903808594, "loss": 0.0114, "objective": 0.011328586377203465, "ranking_idealized": 0.4833333194255829, "ranking_idealized_expo": 0.44999998807907104, "ranking_simple": 0.44999998807907104, "regularize": 0.011327385902404785, "step": 2020 }, { "dpo_loss": 0.6897438168525696, "epoch": 1.9132351783324149, "grad_norm": 66.21589409339711, "learning_rate": 5.625648192703114e-10, "logits": -2.100722074508667, "logps": -88.59317779541016, "loss": 0.0111, "objective": 0.01007751189172268, "ranking_idealized": 0.6166666746139526, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.550000011920929, "regularize": 0.010075613856315613, "step": 2025 }, { "dpo_loss": 0.6879320740699768, "epoch": 1.9179592158097787, "grad_norm": 97.2021444500617, "learning_rate": 5.025437978604219e-10, "logits": -2.174182653427124, "logps": -98.01433563232422, "loss": 0.0128, "objective": 0.015440816059708595, "ranking_idealized": 0.699999988079071, "ranking_idealized_expo": 0.6333333253860474, "ranking_simple": 0.6333333253860474, "regularize": 0.01543727982789278, "step": 2030 }, { "dpo_loss": 0.6889265775680542, "epoch": 1.9226832532871427, "grad_norm": 60.36698105450304, "learning_rate": 4.458916621994713e-10, "logits": -2.1146934032440186, "logps": -96.32176208496094, "loss": 0.013, "objective": 0.012396620586514473, "ranking_idealized": 0.7833333611488342, "ranking_idealized_expo": 0.5833333134651184, "ranking_simple": 0.5833333134651184, "regularize": 0.012394459918141365, "step": 2035 }, { "dpo_loss": 0.6921891570091248, "epoch": 1.9274072907645068, "grad_norm": 62.70952327603329, "learning_rate": 3.9261226813353533e-10, "logits": -2.2713499069213867, "logps": -94.95305633544922, "loss": 0.0111, "objective": 0.012650835327804089, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.5166666507720947, "regularize": 0.012648210860788822, "step": 2040 }, { "dpo_loss": 0.6918179988861084, "epoch": 1.9321313282418706, "grad_norm": 60.71087366165103, "learning_rate": 3.4270924195384246e-10, "logits": -2.072065830230713, "logps": -93.6068344116211, "loss": 0.0112, "objective": 0.010315236635506153, "ranking_idealized": 0.5333333611488342, "ranking_idealized_expo": 0.46666666865348816, "ranking_simple": 0.46666666865348816, "regularize": 0.010312527418136597, "step": 2045 }, { "dpo_loss": 0.6912323236465454, "epoch": 1.9368553657192347, "grad_norm": 66.0225333978305, "learning_rate": 2.9618598014997107e-10, "logits": -2.1574909687042236, "logps": -93.39547729492188, "loss": 0.0119, "objective": 0.012261408381164074, "ranking_idealized": 0.5, "ranking_idealized_expo": 0.4166666567325592, "ranking_simple": 0.4166666567325592, "regularize": 0.012259239330887794, "step": 2050 }, { "epoch": 1.9368553657192347, "eval_dpo_loss": 0.6928916573524475, "eval_logits": -1.989404320716858, "eval_logps": -98.45708465576172, "eval_loss": 0.008758697658777237, "eval_objective": 0.008833469823002815, "eval_ranking_idealized": 0.6022099256515503, "eval_ranking_idealized_expo": 0.5207182168960571, "eval_ranking_simple": 0.5179557800292969, "eval_regularize": 0.008830932900309563, "eval_runtime": 446.0684, "eval_samples_per_second": 12.98, "eval_steps_per_second": 3.246, "step": 2050 }, { "dpo_loss": 0.6905261874198914, "epoch": 1.9415794031965987, "grad_norm": 72.43113470913532, "learning_rate": 2.5304564917865145e-10, "logits": -2.20497465133667, "logps": -93.12974548339844, "loss": 0.0106, "objective": 0.011251457966864109, "ranking_idealized": 0.7333333492279053, "ranking_idealized_expo": 0.6833333373069763, "ranking_simple": 0.6833333373069763, "regularize": 0.011249854229390621, "step": 2055 }, { "dpo_loss": 0.6920349597930908, "epoch": 1.9463034406739625, "grad_norm": 63.18535597705442, "learning_rate": 2.132911852482766e-10, "logits": -2.2412173748016357, "logps": -90.34603881835938, "loss": 0.0125, "objective": 0.013630078174173832, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.550000011920929, "regularize": 0.013628335669636726, "step": 2060 }, { "dpo_loss": 0.691752016544342, "epoch": 1.9510274781513268, "grad_norm": 69.39489329530257, "learning_rate": 1.7692529411904578e-10, "logits": -2.248879909515381, "logps": -96.89351654052734, "loss": 0.012, "objective": 0.011724698357284069, "ranking_idealized": 0.6499999761581421, "ranking_idealized_expo": 0.5666666626930237, "ranking_simple": 0.5666666626930237, "regularize": 0.011722984723746777, "step": 2065 }, { "dpo_loss": 0.6898305416107178, "epoch": 1.9557515156286907, "grad_norm": 64.01320477510419, "learning_rate": 1.4395045091880608e-10, "logits": -2.133453845977783, "logps": -89.6153564453125, "loss": 0.0122, "objective": 0.013158326968550682, "ranking_idealized": 0.6166666746139526, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.5333333611488342, "regularize": 0.013156161643564701, "step": 2070 }, { "dpo_loss": 0.6894416213035583, "epoch": 1.9604755531060545, "grad_norm": 70.45537055239048, "learning_rate": 1.1436889997460397e-10, "logits": -2.1818015575408936, "logps": -87.75623321533203, "loss": 0.0122, "objective": 0.01147166732698679, "ranking_idealized": 0.5666666626930237, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.5333333611488342, "regularize": 0.0114695830270648, "step": 2075 }, { "dpo_loss": 0.689843475818634, "epoch": 1.9651995905834188, "grad_norm": 72.42995706576724, "learning_rate": 8.818265465991292e-11, "logits": -2.1221158504486084, "logps": -92.03446960449219, "loss": 0.0116, "objective": 0.01287260465323925, "ranking_idealized": 0.6666666865348816, "ranking_idealized_expo": 0.6000000238418579, "ranking_simple": 0.6000000238418579, "regularize": 0.012871033512055874, "step": 2080 }, { "dpo_loss": 0.6901150345802307, "epoch": 1.9699236280607826, "grad_norm": 67.99462621349231, "learning_rate": 6.539349725760423e-11, "logits": -2.2134578227996826, "logps": -94.01133728027344, "loss": 0.0138, "objective": 0.014971112832427025, "ranking_idealized": 0.6333333253860474, "ranking_idealized_expo": 0.5666666626930237, "ranking_simple": 0.5666666626930237, "regularize": 0.014969981275498867, "step": 2085 }, { "dpo_loss": 0.6904923915863037, "epoch": 1.9746476655381466, "grad_norm": 64.88891143293061, "learning_rate": 4.600297883866067e-11, "logits": -2.1357476711273193, "logps": -88.28942108154297, "loss": 0.0103, "objective": 0.008000458590686321, "ranking_idealized": 0.6166666746139526, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5, "regularize": 0.00799502618610859, "step": 2090 }, { "dpo_loss": 0.691422700881958, "epoch": 1.9793717030155107, "grad_norm": 59.76257934662463, "learning_rate": 3.0012419156572044e-11, "logits": -2.106537103652954, "logps": -92.17442321777344, "loss": 0.0128, "objective": 0.012720795348286629, "ranking_idealized": 0.6499999761581421, "ranking_idealized_expo": 0.5666666626930237, "ranking_simple": 0.5666666626930237, "regularize": 0.0127179604023695, "step": 2095 }, { "dpo_loss": 0.6881771087646484, "epoch": 1.9840957404928745, "grad_norm": 65.98055833646369, "learning_rate": 1.7422906557557073e-11, "logits": -2.096862316131592, "logps": -96.3370132446289, "loss": 0.0116, "objective": 0.01271964143961668, "ranking_idealized": 0.550000011920929, "ranking_idealized_expo": 0.4833333194255829, "ranking_simple": 0.4833333194255829, "regularize": 0.01271754689514637, "step": 2100 }, { "epoch": 1.9840957404928745, "eval_dpo_loss": 0.6928920149803162, "eval_logits": -1.9894039630889893, "eval_logps": -98.45726013183594, "eval_loss": 0.008759252727031708, "eval_objective": 0.008833796717226505, "eval_ranking_idealized": 0.6022099256515503, "eval_ranking_idealized_expo": 0.5207182168960571, "eval_ranking_simple": 0.5179557800292969, "eval_regularize": 0.008831293322145939, "eval_runtime": 446.6316, "eval_samples_per_second": 12.964, "eval_steps_per_second": 3.242, "step": 2100 }, { "dpo_loss": 0.69146329164505, "epoch": 1.9888197779702386, "grad_norm": 64.49594040581191, "learning_rate": 8.235297906444837e-12, "logits": -2.149221181869507, "logps": -88.66574096679688, "loss": 0.0128, "objective": 0.01396742183715105, "ranking_idealized": 0.5666666626930237, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5, "regularize": 0.013964297249913216, "step": 2105 }, { "dpo_loss": 0.6907679438591003, "epoch": 1.9935438154476026, "grad_norm": 70.66784622308231, "learning_rate": 2.450218528377013e-12, "logits": -2.137131452560425, "logps": -93.39989471435547, "loss": 0.0112, "objective": 0.010235412046313286, "ranking_idealized": 0.550000011920929, "ranking_idealized_expo": 0.4833333194255829, "ranking_simple": 0.46666666865348816, "regularize": 0.010233612731099129, "step": 2110 }, { "dpo_loss": 0.6910449266433716, "epoch": 1.9982678529249664, "grad_norm": 60.167547577945186, "learning_rate": 6.806216624188899e-14, "logits": -2.124387502670288, "logps": -93.0544204711914, "loss": 0.014, "objective": 0.009751829318702221, "ranking_idealized": 0.5666666626930237, "ranking_idealized_expo": 0.4833333194255829, "ranking_simple": 0.4833333194255829, "regularize": 0.009749443270266056, "step": 2115 }, { "epoch": 1.9992126604204392, "step": 2116, "total_flos": 0.0, "train_loss": 0.020640970417914562, "train_runtime": 38688.7685, "train_samples_per_second": 2.626, "train_steps_per_second": 0.055 } ], "logging_steps": 5, "max_steps": 2116, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }