qwen2.5-0.5b-expo-L1EXPO / trainer_state.json
hZzy's picture
Model save
145aae2 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.9992126604204392,
"eval_steps": 50,
"global_step": 2116,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"dpo_loss": 0.6931471824645996,
"epoch": 0.0009448074954727974,
"grad_norm": 66.46447760146653,
"learning_rate": 4.716981132075471e-10,
"logits": -2.096372604370117,
"logps": -90.695556640625,
"loss": 0.0053,
"objective": 0.004870629869401455,
"ranking_idealized": 0.5,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.5,
"regularize": 0.004867264535278082,
"step": 1
},
{
"dpo_loss": 0.6931397914886475,
"epoch": 0.004724037477363987,
"grad_norm": 73.1061214549564,
"learning_rate": 2.358490566037736e-09,
"logits": -2.2148256301879883,
"logps": -89.18419647216797,
"loss": 0.0056,
"objective": 0.006199519615620375,
"ranking_idealized": 0.5,
"ranking_idealized_expo": 0.3958333432674408,
"ranking_simple": 0.3958333432674408,
"regularize": 0.0061979773454368114,
"step": 5
},
{
"dpo_loss": 0.6931113600730896,
"epoch": 0.009448074954727975,
"grad_norm": 66.26130454557787,
"learning_rate": 4.716981132075472e-09,
"logits": -2.1163227558135986,
"logps": -90.67784118652344,
"loss": 0.0056,
"objective": 0.0063161361031234264,
"ranking_idealized": 0.5333333611488342,
"ranking_idealized_expo": 0.4166666567325592,
"ranking_simple": 0.4166666567325592,
"regularize": 0.006314346566796303,
"step": 10
},
{
"dpo_loss": 0.6930894255638123,
"epoch": 0.014172112432091962,
"grad_norm": 65.86685369935576,
"learning_rate": 7.075471698113207e-09,
"logits": -2.0237159729003906,
"logps": -88.98513793945312,
"loss": 0.0052,
"objective": 0.004170234780758619,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5166666507720947,
"regularize": 0.004166428931057453,
"step": 15
},
{
"dpo_loss": 0.6932118535041809,
"epoch": 0.01889614990945595,
"grad_norm": 67.08170030108543,
"learning_rate": 9.433962264150943e-09,
"logits": -2.220696449279785,
"logps": -91.20802307128906,
"loss": 0.0056,
"objective": 0.0056175715290009975,
"ranking_idealized": 0.6333333253860474,
"ranking_idealized_expo": 0.46666666865348816,
"ranking_simple": 0.46666666865348816,
"regularize": 0.00561108160763979,
"step": 20
},
{
"dpo_loss": 0.6930363774299622,
"epoch": 0.023620187386819935,
"grad_norm": 64.75938875868191,
"learning_rate": 1.1792452830188679e-08,
"logits": -2.184110164642334,
"logps": -93.84049224853516,
"loss": 0.005,
"objective": 0.005908097140491009,
"ranking_idealized": 0.699999988079071,
"ranking_idealized_expo": 0.6166666746139526,
"ranking_simple": 0.6166666746139526,
"regularize": 0.0059060631319880486,
"step": 25
},
{
"dpo_loss": 0.6930921673774719,
"epoch": 0.028344224864183924,
"grad_norm": 70.85746649835617,
"learning_rate": 1.4150943396226414e-08,
"logits": -2.048970937728882,
"logps": -92.22063446044922,
"loss": 0.0062,
"objective": 0.006645068060606718,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.5,
"regularize": 0.0066424161195755005,
"step": 30
},
{
"dpo_loss": 0.6927813291549683,
"epoch": 0.03306826234154791,
"grad_norm": 69.39212454087999,
"learning_rate": 1.6509433962264148e-08,
"logits": -2.164494276046753,
"logps": -90.56898498535156,
"loss": 0.006,
"objective": 0.0044576446525752544,
"ranking_idealized": 0.6333333253860474,
"ranking_idealized_expo": 0.5666666626930237,
"ranking_simple": 0.5666666626930237,
"regularize": 0.004453611560165882,
"step": 35
},
{
"dpo_loss": 0.6927517056465149,
"epoch": 0.0377922998189119,
"grad_norm": 79.75758352153623,
"learning_rate": 1.8867924528301887e-08,
"logits": -2.2105553150177,
"logps": -94.57971954345703,
"loss": 0.0058,
"objective": 0.004820433910936117,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.5833333134651184,
"ranking_simple": 0.5833333134651184,
"regularize": 0.004817112348973751,
"step": 40
},
{
"dpo_loss": 0.6930319666862488,
"epoch": 0.04251633729627589,
"grad_norm": 64.62991156662962,
"learning_rate": 2.1226415094339622e-08,
"logits": -2.149827241897583,
"logps": -90.14302062988281,
"loss": 0.0057,
"objective": 0.004925203043967485,
"ranking_idealized": 0.550000011920929,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.4833333194255829,
"regularize": 0.004921570885926485,
"step": 45
},
{
"dpo_loss": 0.692544162273407,
"epoch": 0.04724037477363987,
"grad_norm": 68.06296242846598,
"learning_rate": 2.3584905660377358e-08,
"logits": -2.1563477516174316,
"logps": -91.66796875,
"loss": 0.006,
"objective": 0.006787313614040613,
"ranking_idealized": 0.6499999761581421,
"ranking_idealized_expo": 0.550000011920929,
"ranking_simple": 0.550000011920929,
"regularize": 0.006785000674426556,
"step": 50
},
{
"epoch": 0.04724037477363987,
"eval_dpo_loss": 0.6929724812507629,
"eval_logits": -1.9958003759384155,
"eval_logps": -98.61515808105469,
"eval_loss": 0.0059888348914682865,
"eval_objective": 0.006077947095036507,
"eval_ranking_idealized": 0.6022099256515503,
"eval_ranking_idealized_expo": 0.5207182168960571,
"eval_ranking_simple": 0.5179557800292969,
"eval_regularize": 0.006075054872781038,
"eval_runtime": 445.1462,
"eval_samples_per_second": 13.007,
"eval_steps_per_second": 3.253,
"step": 50
},
{
"dpo_loss": 0.6929494738578796,
"epoch": 0.05196441225100386,
"grad_norm": 73.41432282855463,
"learning_rate": 2.5943396226415093e-08,
"logits": -2.1937828063964844,
"logps": -93.48242950439453,
"loss": 0.006,
"objective": 0.005628067534416914,
"ranking_idealized": 0.46666666865348816,
"ranking_idealized_expo": 0.4166666567325592,
"ranking_simple": 0.4166666567325592,
"regularize": 0.005625530146062374,
"step": 55
},
{
"dpo_loss": 0.692658543586731,
"epoch": 0.05668844972836785,
"grad_norm": 65.33336896476966,
"learning_rate": 2.830188679245283e-08,
"logits": -2.2241933345794678,
"logps": -89.14469146728516,
"loss": 0.0058,
"objective": 0.004998047836124897,
"ranking_idealized": 0.6499999761581421,
"ranking_idealized_expo": 0.6166666746139526,
"ranking_simple": 0.6166666746139526,
"regularize": 0.004994309972971678,
"step": 60
},
{
"dpo_loss": 0.692764163017273,
"epoch": 0.06141248720573183,
"grad_norm": 66.55102685698147,
"learning_rate": 3.0660377358490564e-08,
"logits": -2.2217800617218018,
"logps": -89.03886413574219,
"loss": 0.0074,
"objective": 0.0076709226705133915,
"ranking_idealized": 0.5,
"ranking_idealized_expo": 0.38333332538604736,
"ranking_simple": 0.38333332538604736,
"regularize": 0.007668651174753904,
"step": 65
},
{
"dpo_loss": 0.6925709843635559,
"epoch": 0.06613652468309582,
"grad_norm": 75.22580153097067,
"learning_rate": 3.3018867924528296e-08,
"logits": -2.1472768783569336,
"logps": -94.85504913330078,
"loss": 0.0076,
"objective": 0.007108477409929037,
"ranking_idealized": 0.550000011920929,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5166666507720947,
"regularize": 0.007105442229658365,
"step": 70
},
{
"dpo_loss": 0.6925690770149231,
"epoch": 0.0708605621604598,
"grad_norm": 79.40443090905357,
"learning_rate": 3.5377358490566035e-08,
"logits": -2.206367015838623,
"logps": -92.62692260742188,
"loss": 0.0071,
"objective": 0.007145268842577934,
"ranking_idealized": 0.5666666626930237,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5166666507720947,
"regularize": 0.00714282738044858,
"step": 75
},
{
"dpo_loss": 0.6921057105064392,
"epoch": 0.0755845996378238,
"grad_norm": 76.34709727963714,
"learning_rate": 3.7735849056603774e-08,
"logits": -2.1775050163269043,
"logps": -94.01787567138672,
"loss": 0.0066,
"objective": 0.006093442440032959,
"ranking_idealized": 0.6499999761581421,
"ranking_idealized_expo": 0.5666666626930237,
"ranking_simple": 0.5666666626930237,
"regularize": 0.006089916918426752,
"step": 80
},
{
"dpo_loss": 0.6927501559257507,
"epoch": 0.08030863711518778,
"grad_norm": 76.24460999547686,
"learning_rate": 4.009433962264151e-08,
"logits": -2.1956326961517334,
"logps": -92.5903091430664,
"loss": 0.0069,
"objective": 0.0067410701885819435,
"ranking_idealized": 0.699999988079071,
"ranking_idealized_expo": 0.6000000238418579,
"ranking_simple": 0.6000000238418579,
"regularize": 0.006739132571965456,
"step": 85
},
{
"dpo_loss": 0.6902840733528137,
"epoch": 0.08503267459255177,
"grad_norm": 65.10729628861057,
"learning_rate": 4.2452830188679244e-08,
"logits": -2.2021093368530273,
"logps": -96.31147766113281,
"loss": 0.0075,
"objective": 0.008030267432332039,
"ranking_idealized": 0.6499999761581421,
"ranking_idealized_expo": 0.5833333134651184,
"ranking_simple": 0.5833333134651184,
"regularize": 0.008029019460082054,
"step": 90
},
{
"dpo_loss": 0.6926410794258118,
"epoch": 0.08975671206991576,
"grad_norm": 71.92277477052832,
"learning_rate": 4.481132075471698e-08,
"logits": -2.1148061752319336,
"logps": -89.49421691894531,
"loss": 0.007,
"objective": 0.0068625533021986485,
"ranking_idealized": 0.5833333134651184,
"ranking_idealized_expo": 0.550000011920929,
"ranking_simple": 0.550000011920929,
"regularize": 0.006859814748167992,
"step": 95
},
{
"dpo_loss": 0.6942407488822937,
"epoch": 0.09448074954727974,
"grad_norm": 65.63913294191785,
"learning_rate": 4.7169811320754715e-08,
"logits": -2.2743189334869385,
"logps": -94.41613006591797,
"loss": 0.0092,
"objective": 0.012641828507184982,
"ranking_idealized": 0.6499999761581421,
"ranking_idealized_expo": 0.5666666626930237,
"ranking_simple": 0.5333333611488342,
"regularize": 0.012639058753848076,
"step": 100
},
{
"epoch": 0.09448074954727974,
"eval_dpo_loss": 0.6929203867912292,
"eval_logits": -1.995413899421692,
"eval_logps": -98.7889404296875,
"eval_loss": 0.00729329651221633,
"eval_objective": 0.00731161143630743,
"eval_ranking_idealized": 0.6022099256515503,
"eval_ranking_idealized_expo": 0.5207182168960571,
"eval_ranking_simple": 0.5186464190483093,
"eval_regularize": 0.00730866938829422,
"eval_runtime": 447.0397,
"eval_samples_per_second": 12.952,
"eval_steps_per_second": 3.239,
"step": 100
},
{
"dpo_loss": 0.6924384832382202,
"epoch": 0.09920478702464373,
"grad_norm": 67.55366715626052,
"learning_rate": 4.9528301886792454e-08,
"logits": -2.088653802871704,
"logps": -92.43064880371094,
"loss": 0.009,
"objective": 0.00849145371466875,
"ranking_idealized": 0.5833333134651184,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5166666507720947,
"regularize": 0.008488119579851627,
"step": 105
},
{
"dpo_loss": 0.6923626661300659,
"epoch": 0.10392882450200772,
"grad_norm": 68.4026236372278,
"learning_rate": 5.1886792452830186e-08,
"logits": -2.1796536445617676,
"logps": -91.34872436523438,
"loss": 0.0083,
"objective": 0.0080822529271245,
"ranking_idealized": 0.5666666626930237,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.4833333194255829,
"regularize": 0.008080901578068733,
"step": 110
},
{
"dpo_loss": 0.6901246309280396,
"epoch": 0.1086528619793717,
"grad_norm": 64.06144820478669,
"learning_rate": 5.4245283018867925e-08,
"logits": -2.1858596801757812,
"logps": -89.73294830322266,
"loss": 0.0097,
"objective": 0.013094036839902401,
"ranking_idealized": 0.5333333611488342,
"ranking_idealized_expo": 0.46666666865348816,
"ranking_simple": 0.46666666865348816,
"regularize": 0.013092008419334888,
"step": 115
},
{
"dpo_loss": 0.6923102736473083,
"epoch": 0.1133768994567357,
"grad_norm": 61.237648061668985,
"learning_rate": 5.660377358490566e-08,
"logits": -2.1982421875,
"logps": -94.54019927978516,
"loss": 0.0085,
"objective": 0.008155700750648975,
"ranking_idealized": 0.7166666388511658,
"ranking_idealized_expo": 0.6666666865348816,
"ranking_simple": 0.6666666865348816,
"regularize": 0.00815290305763483,
"step": 120
},
{
"dpo_loss": 0.6917387247085571,
"epoch": 0.11810093693409968,
"grad_norm": 73.42929721502428,
"learning_rate": 5.8962264150943396e-08,
"logits": -2.1935439109802246,
"logps": -95.18013000488281,
"loss": 0.0103,
"objective": 0.010648042894899845,
"ranking_idealized": 0.5333333611488342,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.4833333194255829,
"regularize": 0.010645588859915733,
"step": 125
},
{
"dpo_loss": 0.6919466853141785,
"epoch": 0.12282497441146366,
"grad_norm": 68.99586341226502,
"learning_rate": 6.132075471698113e-08,
"logits": -2.1257071495056152,
"logps": -97.63853454589844,
"loss": 0.0103,
"objective": 0.009905511513352394,
"ranking_idealized": 0.6333333253860474,
"ranking_idealized_expo": 0.6000000238418579,
"ranking_simple": 0.6000000238418579,
"regularize": 0.009904048405587673,
"step": 130
},
{
"dpo_loss": 0.6923593878746033,
"epoch": 0.12754901188882764,
"grad_norm": 56.22865998140236,
"learning_rate": 6.367924528301887e-08,
"logits": -2.1036369800567627,
"logps": -91.52125549316406,
"loss": 0.0098,
"objective": 0.009057187475264072,
"ranking_idealized": 0.5333333611488342,
"ranking_idealized_expo": 0.46666666865348816,
"ranking_simple": 0.46666666865348816,
"regularize": 0.009054058231413364,
"step": 135
},
{
"dpo_loss": 0.6894025206565857,
"epoch": 0.13227304936619164,
"grad_norm": 64.3061621244485,
"learning_rate": 6.603773584905659e-08,
"logits": -2.246990442276001,
"logps": -92.88211059570312,
"loss": 0.0113,
"objective": 0.011589973233640194,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.5333333611488342,
"regularize": 0.011587386019527912,
"step": 140
},
{
"dpo_loss": 0.6934190988540649,
"epoch": 0.13699708684355563,
"grad_norm": 74.71610410718624,
"learning_rate": 6.839622641509434e-08,
"logits": -2.102886199951172,
"logps": -90.7693099975586,
"loss": 0.0123,
"objective": 0.00984902959316969,
"ranking_idealized": 0.5666666626930237,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.4833333194255829,
"regularize": 0.009846380911767483,
"step": 145
},
{
"dpo_loss": 0.6908667087554932,
"epoch": 0.1417211243209196,
"grad_norm": 76.42928931969817,
"learning_rate": 7.075471698113207e-08,
"logits": -2.238095998764038,
"logps": -94.04148864746094,
"loss": 0.0142,
"objective": 0.014535349793732166,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5166666507720947,
"regularize": 0.014532845467329025,
"step": 150
},
{
"epoch": 0.1417211243209196,
"eval_dpo_loss": 0.693015992641449,
"eval_logits": -1.9985584020614624,
"eval_logps": -98.66200256347656,
"eval_loss": 0.009166295640170574,
"eval_objective": 0.00932803563773632,
"eval_ranking_idealized": 0.6022099256515503,
"eval_ranking_idealized_expo": 0.5207182168960571,
"eval_ranking_simple": 0.5186464190483093,
"eval_regularize": 0.009325726889073849,
"eval_runtime": 446.6869,
"eval_samples_per_second": 12.962,
"eval_steps_per_second": 3.242,
"step": 150
},
{
"dpo_loss": 0.6918679475784302,
"epoch": 0.1464451617982836,
"grad_norm": 80.89548200084305,
"learning_rate": 7.311320754716981e-08,
"logits": -2.1368908882141113,
"logps": -90.76949310302734,
"loss": 0.0131,
"objective": 0.011844370514154434,
"ranking_idealized": 0.699999988079071,
"ranking_idealized_expo": 0.5833333134651184,
"ranking_simple": 0.5833333134651184,
"regularize": 0.011842181906104088,
"step": 155
},
{
"dpo_loss": 0.6904442310333252,
"epoch": 0.1511691992756476,
"grad_norm": 70.5889289835053,
"learning_rate": 7.547169811320755e-08,
"logits": -2.216298818588257,
"logps": -94.7828140258789,
"loss": 0.0132,
"objective": 0.012807334773242474,
"ranking_idealized": 0.5666666626930237,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.5,
"regularize": 0.012806250713765621,
"step": 160
},
{
"dpo_loss": 0.6893646121025085,
"epoch": 0.15589323675301156,
"grad_norm": 70.53443315770147,
"learning_rate": 7.783018867924527e-08,
"logits": -2.2371225357055664,
"logps": -91.81126403808594,
"loss": 0.0123,
"objective": 0.014454828575253487,
"ranking_idealized": 0.5666666626930237,
"ranking_idealized_expo": 0.46666666865348816,
"ranking_simple": 0.46666666865348816,
"regularize": 0.01445354800671339,
"step": 165
},
{
"dpo_loss": 0.6928521990776062,
"epoch": 0.16061727423037556,
"grad_norm": 78.26059021460965,
"learning_rate": 8.018867924528302e-08,
"logits": -2.215101480484009,
"logps": -92.55703735351562,
"loss": 0.0141,
"objective": 0.012299363501369953,
"ranking_idealized": 0.699999988079071,
"ranking_idealized_expo": 0.5833333134651184,
"ranking_simple": 0.5833333134651184,
"regularize": 0.012298095040023327,
"step": 170
},
{
"dpo_loss": 0.6916810274124146,
"epoch": 0.16534131170773955,
"grad_norm": 62.838329994801875,
"learning_rate": 8.254716981132075e-08,
"logits": -2.1370534896850586,
"logps": -91.3881607055664,
"loss": 0.014,
"objective": 0.015395297668874264,
"ranking_idealized": 0.6499999761581421,
"ranking_idealized_expo": 0.5666666626930237,
"ranking_simple": 0.5666666626930237,
"regularize": 0.015393855981528759,
"step": 175
},
{
"dpo_loss": 0.6918050050735474,
"epoch": 0.17006534918510355,
"grad_norm": 73.17909568701033,
"learning_rate": 8.490566037735849e-08,
"logits": -2.281663417816162,
"logps": -94.94965362548828,
"loss": 0.0138,
"objective": 0.017136668786406517,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5166666507720947,
"regularize": 0.017135154455900192,
"step": 180
},
{
"dpo_loss": 0.69007408618927,
"epoch": 0.17478938666246752,
"grad_norm": 62.37985427959411,
"learning_rate": 8.726415094339621e-08,
"logits": -2.1940929889678955,
"logps": -93.47923278808594,
"loss": 0.0159,
"objective": 0.017906082794070244,
"ranking_idealized": 0.550000011920929,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.5166666507720947,
"regularize": 0.017902227118611336,
"step": 185
},
{
"dpo_loss": 0.6921120882034302,
"epoch": 0.1795134241398315,
"grad_norm": 70.11549174821204,
"learning_rate": 8.962264150943397e-08,
"logits": -2.1571404933929443,
"logps": -87.7894287109375,
"loss": 0.0144,
"objective": 0.01052508968859911,
"ranking_idealized": 0.6666666865348816,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5166666507720947,
"regularize": 0.010523774661123753,
"step": 190
},
{
"dpo_loss": 0.6912659406661987,
"epoch": 0.1842374616171955,
"grad_norm": 78.18173910687094,
"learning_rate": 9.198113207547169e-08,
"logits": -2.1662251949310303,
"logps": -90.77739715576172,
"loss": 0.0139,
"objective": 0.01253608800470829,
"ranking_idealized": 0.7166666388511658,
"ranking_idealized_expo": 0.6000000238418579,
"ranking_simple": 0.6000000238418579,
"regularize": 0.012534084729850292,
"step": 195
},
{
"dpo_loss": 0.6948674321174622,
"epoch": 0.18896149909455948,
"grad_norm": 74.9573189190215,
"learning_rate": 9.433962264150943e-08,
"logits": -2.1426119804382324,
"logps": -93.58480072021484,
"loss": 0.0173,
"objective": 0.018598254770040512,
"ranking_idealized": 0.5833333134651184,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.5,
"regularize": 0.018596837297081947,
"step": 200
},
{
"epoch": 0.18896149909455948,
"eval_dpo_loss": 0.6928583383560181,
"eval_logits": -1.9957162141799927,
"eval_logps": -98.79460144042969,
"eval_loss": 0.009674900211393833,
"eval_objective": 0.009766080416738987,
"eval_ranking_idealized": 0.6022099256515503,
"eval_ranking_idealized_expo": 0.5207182168960571,
"eval_ranking_simple": 0.5172652006149292,
"eval_regularize": 0.009763876907527447,
"eval_runtime": 457.4524,
"eval_samples_per_second": 12.657,
"eval_steps_per_second": 3.165,
"step": 200
},
{
"dpo_loss": 0.6902305483818054,
"epoch": 0.19368553657192347,
"grad_norm": 69.58780380589624,
"learning_rate": 9.669811320754716e-08,
"logits": -2.137186288833618,
"logps": -90.56549835205078,
"loss": 0.0203,
"objective": 0.016451861709356308,
"ranking_idealized": 0.550000011920929,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.5,
"regularize": 0.016451073810458183,
"step": 205
},
{
"dpo_loss": 0.693261444568634,
"epoch": 0.19840957404928747,
"grad_norm": 62.13478304132868,
"learning_rate": 9.905660377358491e-08,
"logits": -2.0685033798217773,
"logps": -92.36515045166016,
"loss": 0.0198,
"objective": 0.02386774867773056,
"ranking_idealized": 0.5166666507720947,
"ranking_idealized_expo": 0.46666666865348816,
"ranking_simple": 0.44999998807907104,
"regularize": 0.023865588009357452,
"step": 210
},
{
"dpo_loss": 0.6884204149246216,
"epoch": 0.20313361152665144,
"grad_norm": 60.854335858242926,
"learning_rate": 9.999938744161562e-08,
"logits": -2.1262550354003906,
"logps": -92.82433319091797,
"loss": 0.0173,
"objective": 0.020975453779101372,
"ranking_idealized": 0.6333333253860474,
"ranking_idealized_expo": 0.5833333134651184,
"ranking_simple": 0.5833333134651184,
"regularize": 0.020974494516849518,
"step": 215
},
{
"dpo_loss": 0.6918230652809143,
"epoch": 0.20785764900401543,
"grad_norm": 67.33088424835829,
"learning_rate": 9.999564408362053e-08,
"logits": -2.169888734817505,
"logps": -88.9549331665039,
"loss": 0.0187,
"objective": 0.015909165143966675,
"ranking_idealized": 0.5333333611488342,
"ranking_idealized_expo": 0.4333333373069763,
"ranking_simple": 0.4333333373069763,
"regularize": 0.015907270833849907,
"step": 220
},
{
"dpo_loss": 0.689198911190033,
"epoch": 0.21258168648137943,
"grad_norm": 70.77403555695805,
"learning_rate": 9.998849793231472e-08,
"logits": -2.099949359893799,
"logps": -91.25509643554688,
"loss": 0.0218,
"objective": 0.02295403741300106,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.46666666865348816,
"ranking_simple": 0.46666666865348816,
"regularize": 0.022952700033783913,
"step": 225
},
{
"dpo_loss": 0.6939520239830017,
"epoch": 0.2173057239587434,
"grad_norm": 80.6144373753939,
"learning_rate": 9.997794947407808e-08,
"logits": -2.1620826721191406,
"logps": -94.76692962646484,
"loss": 0.0239,
"objective": 0.026342039927840233,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.5333333611488342,
"regularize": 0.026341309770941734,
"step": 230
},
{
"dpo_loss": 0.688434362411499,
"epoch": 0.2220297614361074,
"grad_norm": 62.83216394514196,
"learning_rate": 9.996399942685763e-08,
"logits": -2.2001922130584717,
"logps": -90.64398956298828,
"loss": 0.0197,
"objective": 0.02317703142762184,
"ranking_idealized": 0.6666666865348816,
"ranking_idealized_expo": 0.6000000238418579,
"ranking_simple": 0.6000000238418579,
"regularize": 0.023176301270723343,
"step": 235
},
{
"dpo_loss": 0.6894098520278931,
"epoch": 0.2267537989134714,
"grad_norm": 69.59337954721937,
"learning_rate": 9.994664874011862e-08,
"logits": -2.1389572620391846,
"logps": -91.78279113769531,
"loss": 0.0225,
"objective": 0.02215094491839409,
"ranking_idealized": 0.5333333611488342,
"ranking_idealized_expo": 0.46666666865348816,
"ranking_simple": 0.46666666865348816,
"regularize": 0.02214915119111538,
"step": 240
},
{
"dpo_loss": 0.692762017250061,
"epoch": 0.23147783639083536,
"grad_norm": 70.45563918532531,
"learning_rate": 9.992589859477995e-08,
"logits": -2.0933754444122314,
"logps": -92.82108306884766,
"loss": 0.0215,
"objective": 0.02130296640098095,
"ranking_idealized": 0.6666666865348816,
"ranking_idealized_expo": 0.550000011920929,
"ranking_simple": 0.550000011920929,
"regularize": 0.021301595494151115,
"step": 245
},
{
"dpo_loss": 0.6907398104667664,
"epoch": 0.23620187386819935,
"grad_norm": 68.46129704284265,
"learning_rate": 9.990175040313376e-08,
"logits": -2.0800933837890625,
"logps": -88.17916870117188,
"loss": 0.0245,
"objective": 0.022766491398215294,
"ranking_idealized": 0.6333333253860474,
"ranking_idealized_expo": 0.5833333134651184,
"ranking_simple": 0.5833333134651184,
"regularize": 0.02276558242738247,
"step": 250
},
{
"epoch": 0.23620187386819935,
"eval_dpo_loss": 0.6929402351379395,
"eval_logits": -1.995142936706543,
"eval_logps": -98.64155578613281,
"eval_loss": 0.012135702185332775,
"eval_objective": 0.012087649665772915,
"eval_ranking_idealized": 0.6022099256515503,
"eval_ranking_idealized_expo": 0.5207182168960571,
"eval_ranking_simple": 0.5186464190483093,
"eval_regularize": 0.012085951864719391,
"eval_runtime": 446.4062,
"eval_samples_per_second": 12.97,
"eval_steps_per_second": 3.244,
"step": 250
},
{
"dpo_loss": 0.6918343901634216,
"epoch": 0.24092591134556335,
"grad_norm": 67.22846019191091,
"learning_rate": 9.987420580874936e-08,
"logits": -2.116420269012451,
"logps": -90.83265686035156,
"loss": 0.0239,
"objective": 0.019956286996603012,
"ranking_idealized": 0.6666666865348816,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.5666666626930237,
"regularize": 0.019955595955252647,
"step": 255
},
{
"dpo_loss": 0.688471257686615,
"epoch": 0.24564994882292732,
"grad_norm": 70.37729127367317,
"learning_rate": 9.98432666863613e-08,
"logits": -2.240382194519043,
"logps": -94.62246704101562,
"loss": 0.0228,
"objective": 0.026484496891498566,
"ranking_idealized": 0.5666666626930237,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5,
"regularize": 0.026483872905373573,
"step": 260
},
{
"dpo_loss": 0.6923092603683472,
"epoch": 0.25037398630029134,
"grad_norm": 66.24088589632522,
"learning_rate": 9.980893514174179e-08,
"logits": -2.2002015113830566,
"logps": -92.85660552978516,
"loss": 0.0214,
"objective": 0.016592005267739296,
"ranking_idealized": 0.5833333134651184,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5333333611488342,
"regularize": 0.01658981665968895,
"step": 265
},
{
"dpo_loss": 0.69212406873703,
"epoch": 0.2550980237776553,
"grad_norm": 67.74325606253207,
"learning_rate": 9.97712135115574e-08,
"logits": -2.1255643367767334,
"logps": -93.65802764892578,
"loss": 0.0234,
"objective": 0.029443560168147087,
"ranking_idealized": 0.6333333253860474,
"ranking_idealized_expo": 0.5666666626930237,
"ranking_simple": 0.5666666626930237,
"regularize": 0.02944253757596016,
"step": 270
},
{
"dpo_loss": 0.6937362551689148,
"epoch": 0.2598220612550193,
"grad_norm": 62.77300536920228,
"learning_rate": 9.973010436321003e-08,
"logits": -2.1865429878234863,
"logps": -94.39256286621094,
"loss": 0.0231,
"objective": 0.024354156106710434,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5166666507720947,
"regularize": 0.024351568892598152,
"step": 275
},
{
"dpo_loss": 0.6910390853881836,
"epoch": 0.2645460987323833,
"grad_norm": 62.67099824882817,
"learning_rate": 9.968561049466213e-08,
"logits": -2.1923298835754395,
"logps": -90.44979095458984,
"loss": 0.0231,
"objective": 0.020880402997136116,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.44999998807907104,
"ranking_simple": 0.44999998807907104,
"regularize": 0.02087988331913948,
"step": 280
},
{
"dpo_loss": 0.6930695176124573,
"epoch": 0.26927013620974727,
"grad_norm": 87.62776016079746,
"learning_rate": 9.963773493424628e-08,
"logits": -2.2007596492767334,
"logps": -95.48867797851562,
"loss": 0.0265,
"objective": 0.028789160773158073,
"ranking_idealized": 0.5166666507720947,
"ranking_idealized_expo": 0.4000000059604645,
"ranking_simple": 0.4166666567325592,
"regularize": 0.028786195442080498,
"step": 285
},
{
"dpo_loss": 0.6869128346443176,
"epoch": 0.27399417368711126,
"grad_norm": 72.49597645901943,
"learning_rate": 9.95864809404591e-08,
"logits": -2.1374075412750244,
"logps": -93.5144271850586,
"loss": 0.0275,
"objective": 0.026990260928869247,
"ranking_idealized": 0.5833333134651184,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.5333333611488342,
"regularize": 0.026989614591002464,
"step": 290
},
{
"dpo_loss": 0.6920251250267029,
"epoch": 0.27871821116447526,
"grad_norm": 77.90764756547169,
"learning_rate": 9.953185200173945e-08,
"logits": -2.111963987350464,
"logps": -90.939453125,
"loss": 0.0274,
"objective": 0.022320417687296867,
"ranking_idealized": 0.5166666507720947,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.4833333194255829,
"regularize": 0.02231987938284874,
"step": 295
},
{
"dpo_loss": 0.6920836567878723,
"epoch": 0.2834422486418392,
"grad_norm": 68.10918361524686,
"learning_rate": 9.947385183623097e-08,
"logits": -2.213435173034668,
"logps": -91.73210906982422,
"loss": 0.0234,
"objective": 0.026671167463064194,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5166666507720947,
"regularize": 0.026670336723327637,
"step": 300
},
{
"epoch": 0.2834422486418392,
"eval_dpo_loss": 0.6931979060173035,
"eval_logits": -1.993965744972229,
"eval_logps": -98.33208465576172,
"eval_loss": 0.01361795049160719,
"eval_objective": 0.01397615671157837,
"eval_ranking_idealized": 0.6022099256515503,
"eval_ranking_idealized_expo": 0.5207182168960571,
"eval_ranking_simple": 0.5165745615959167,
"eval_regularize": 0.013974088244140148,
"eval_runtime": 447.4565,
"eval_samples_per_second": 12.94,
"eval_steps_per_second": 3.236,
"step": 300
},
{
"dpo_loss": 0.68857741355896,
"epoch": 0.2881662861192032,
"grad_norm": 68.87756454021408,
"learning_rate": 9.94124843915291e-08,
"logits": -2.1138675212860107,
"logps": -93.91316986083984,
"loss": 0.0266,
"objective": 0.03335392475128174,
"ranking_idealized": 0.5833333134651184,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5166666507720947,
"regularize": 0.03335336595773697,
"step": 305
},
{
"dpo_loss": 0.6887925863265991,
"epoch": 0.2928903235965672,
"grad_norm": 125.82123779225154,
"learning_rate": 9.934775384441227e-08,
"logits": -2.138413190841675,
"logps": -90.38550567626953,
"loss": 0.0265,
"objective": 0.021200962364673615,
"ranking_idealized": 0.550000011920929,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.4833333194255829,
"regularize": 0.02119968645274639,
"step": 310
},
{
"dpo_loss": 0.6924371123313904,
"epoch": 0.2976143610739312,
"grad_norm": 64.16350361165954,
"learning_rate": 9.92796646005578e-08,
"logits": -2.1671297550201416,
"logps": -93.71112823486328,
"loss": 0.0255,
"objective": 0.027043061330914497,
"ranking_idealized": 0.6499999761581421,
"ranking_idealized_expo": 0.6333333253860474,
"ranking_simple": 0.6499999761581421,
"regularize": 0.027042122557759285,
"step": 315
},
{
"dpo_loss": 0.6908622980117798,
"epoch": 0.3023383985512952,
"grad_norm": 73.45877351161931,
"learning_rate": 9.920822129424189e-08,
"logits": -2.0810954570770264,
"logps": -93.48076629638672,
"loss": 0.0264,
"objective": 0.03161174803972244,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.4833333194255829,
"regularize": 0.03161115199327469,
"step": 320
},
{
"dpo_loss": 0.6928619742393494,
"epoch": 0.3070624360286592,
"grad_norm": 73.35574157285905,
"learning_rate": 9.913342878802423e-08,
"logits": -2.102128505706787,
"logps": -92.2950439453125,
"loss": 0.0286,
"objective": 0.037009891122579575,
"ranking_idealized": 0.6499999761581421,
"ranking_idealized_expo": 0.5833333134651184,
"ranking_simple": 0.5666666626930237,
"regularize": 0.03700947389006615,
"step": 325
},
{
"dpo_loss": 0.6908413767814636,
"epoch": 0.3117864735060231,
"grad_norm": 79.4503768102059,
"learning_rate": 9.90552921724171e-08,
"logits": -2.2076501846313477,
"logps": -91.73526763916016,
"loss": 0.0276,
"objective": 0.020319262519478798,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.46666666865348816,
"ranking_simple": 0.46666666865348816,
"regularize": 0.020317750051617622,
"step": 330
},
{
"dpo_loss": 0.6880075335502625,
"epoch": 0.3165105109833871,
"grad_norm": 82.61039711327662,
"learning_rate": 9.897381676553888e-08,
"logits": -2.177678346633911,
"logps": -91.5321044921875,
"loss": 0.0261,
"objective": 0.027510004118084908,
"ranking_idealized": 0.5833333134651184,
"ranking_idealized_expo": 0.550000011920929,
"ranking_simple": 0.5333333611488342,
"regularize": 0.027509503066539764,
"step": 335
},
{
"dpo_loss": 0.6954006552696228,
"epoch": 0.3212345484607511,
"grad_norm": 69.91545242214347,
"learning_rate": 9.888900811275203e-08,
"logits": -2.1840174198150635,
"logps": -92.2385025024414,
"loss": 0.0272,
"objective": 0.037196170538663864,
"ranking_idealized": 0.5666666626930237,
"ranking_idealized_expo": 0.46666666865348816,
"ranking_simple": 0.46666666865348816,
"regularize": 0.037195660173892975,
"step": 340
},
{
"dpo_loss": 0.6906254887580872,
"epoch": 0.3259585859381151,
"grad_norm": 65.03933644888008,
"learning_rate": 9.880087198628577e-08,
"logits": -2.153885841369629,
"logps": -91.60904693603516,
"loss": 0.0246,
"objective": 0.0248491782695055,
"ranking_idealized": 0.5666666626930237,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.5,
"regularize": 0.024848194792866707,
"step": 345
},
{
"dpo_loss": 0.6858618855476379,
"epoch": 0.3306826234154791,
"grad_norm": 65.04231679146841,
"learning_rate": 9.870941438484314e-08,
"logits": -2.1364102363586426,
"logps": -89.71781921386719,
"loss": 0.0262,
"objective": 0.026014180853962898,
"ranking_idealized": 0.550000011920929,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.5166666507720947,
"regularize": 0.026013409718871117,
"step": 350
},
{
"epoch": 0.3306826234154791,
"eval_dpo_loss": 0.6925798058509827,
"eval_logits": -1.9947079420089722,
"eval_logps": -98.34574890136719,
"eval_loss": 0.017829304561018944,
"eval_objective": 0.018099796026945114,
"eval_ranking_idealized": 0.6022099256515503,
"eval_ranking_idealized_expo": 0.5207182168960571,
"eval_ranking_simple": 0.5200276374816895,
"eval_regularize": 0.018098480999469757,
"eval_runtime": 446.968,
"eval_samples_per_second": 12.954,
"eval_steps_per_second": 3.24,
"step": 350
},
{
"dpo_loss": 0.6954938173294067,
"epoch": 0.3354066608928431,
"grad_norm": 80.69779694329567,
"learning_rate": 9.861464153319269e-08,
"logits": -2.129030466079712,
"logps": -93.99476623535156,
"loss": 0.0296,
"objective": 0.03383675962686539,
"ranking_idealized": 0.6499999761581421,
"ranking_idealized_expo": 0.5666666626930237,
"ranking_simple": 0.550000011920929,
"regularize": 0.03383495658636093,
"step": 355
},
{
"dpo_loss": 0.6874939799308777,
"epoch": 0.3401306983702071,
"grad_norm": 76.1212296270761,
"learning_rate": 9.85165598817449e-08,
"logits": -2.1060662269592285,
"logps": -94.81258392333984,
"loss": 0.0314,
"objective": 0.03174449875950813,
"ranking_idealized": 0.5166666507720947,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5166666507720947,
"regularize": 0.031743988394737244,
"step": 360
},
{
"dpo_loss": 0.6876952648162842,
"epoch": 0.34485473584757104,
"grad_norm": 63.17330963799499,
"learning_rate": 9.841517610611307e-08,
"logits": -2.223184823989868,
"logps": -94.34845733642578,
"loss": 0.0271,
"objective": 0.03227417171001434,
"ranking_idealized": 0.550000011920929,
"ranking_idealized_expo": 0.46666666865348816,
"ranking_simple": 0.4833333194255829,
"regularize": 0.03227332606911659,
"step": 365
},
{
"dpo_loss": 0.6924201846122742,
"epoch": 0.34957877332493503,
"grad_norm": 70.25439806775886,
"learning_rate": 9.831049710665904e-08,
"logits": -2.153981924057007,
"logps": -92.24577331542969,
"loss": 0.0267,
"objective": 0.03014095313847065,
"ranking_idealized": 0.550000011920929,
"ranking_idealized_expo": 0.46666666865348816,
"ranking_simple": 0.44999998807907104,
"regularize": 0.030140016227960587,
"step": 370
},
{
"dpo_loss": 0.6881352066993713,
"epoch": 0.35430281080229903,
"grad_norm": 67.1301497619948,
"learning_rate": 9.820253000802345e-08,
"logits": -2.1567375659942627,
"logps": -90.48641204833984,
"loss": 0.029,
"objective": 0.034893397241830826,
"ranking_idealized": 0.5333333611488342,
"ranking_idealized_expo": 0.46666666865348816,
"ranking_simple": 0.4833333194255829,
"regularize": 0.034892160445451736,
"step": 375
},
{
"dpo_loss": 0.6959127187728882,
"epoch": 0.359026848279663,
"grad_norm": 65.92204147729184,
"learning_rate": 9.809128215864095e-08,
"logits": -2.1123626232147217,
"logps": -90.6343002319336,
"loss": 0.0274,
"objective": 0.02818784862756729,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.5833333134651184,
"ranking_simple": 0.5833333134651184,
"regularize": 0.028187256306409836,
"step": 380
},
{
"dpo_loss": 0.6918838620185852,
"epoch": 0.363750885757027,
"grad_norm": 78.02594257500782,
"learning_rate": 9.797676113023989e-08,
"logits": -2.1536900997161865,
"logps": -91.86727905273438,
"loss": 0.0271,
"objective": 0.024834012612700462,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5166666507720947,
"regularize": 0.024833252653479576,
"step": 385
},
{
"dpo_loss": 0.6891117095947266,
"epoch": 0.368474923234391,
"grad_norm": 77.62275101385076,
"learning_rate": 9.785897471732711e-08,
"logits": -2.220367193222046,
"logps": -93.69184875488281,
"loss": 0.0315,
"objective": 0.04405975714325905,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.550000011920929,
"ranking_simple": 0.5333333611488342,
"regularize": 0.04405748099088669,
"step": 390
},
{
"dpo_loss": 0.6931707859039307,
"epoch": 0.37319896071175496,
"grad_norm": 70.95977856029823,
"learning_rate": 9.773793093665739e-08,
"logits": -2.188248872756958,
"logps": -90.29833984375,
"loss": 0.0304,
"objective": 0.0337492860853672,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.5,
"regularize": 0.03374841436743736,
"step": 395
},
{
"dpo_loss": 0.6871830224990845,
"epoch": 0.37792299818911895,
"grad_norm": 71.69739058358107,
"learning_rate": 9.76136380266878e-08,
"logits": -2.155177354812622,
"logps": -92.64527130126953,
"loss": 0.0315,
"objective": 0.031953115016222,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.5,
"regularize": 0.03195144981145859,
"step": 400
},
{
"epoch": 0.37792299818911895,
"eval_dpo_loss": 0.6926193833351135,
"eval_logits": -1.9940829277038574,
"eval_logps": -98.11278533935547,
"eval_loss": 0.01653479039669037,
"eval_objective": 0.01635783165693283,
"eval_ranking_idealized": 0.6022099256515503,
"eval_ranking_idealized_expo": 0.5207182168960571,
"eval_ranking_simple": 0.5200276374816895,
"eval_regularize": 0.01635659858584404,
"eval_runtime": 446.4503,
"eval_samples_per_second": 12.969,
"eval_steps_per_second": 3.243,
"step": 400
},
{
"dpo_loss": 0.6941009759902954,
"epoch": 0.38264703566648295,
"grad_norm": 62.224154885036064,
"learning_rate": 9.748610444701694e-08,
"logits": -2.1617021560668945,
"logps": -90.76243591308594,
"loss": 0.0293,
"objective": 0.02944065071642399,
"ranking_idealized": 0.6333333253860474,
"ranking_idealized_expo": 0.550000011920929,
"ranking_simple": 0.550000011920929,
"regularize": 0.02944008819758892,
"step": 405
},
{
"dpo_loss": 0.6934362649917603,
"epoch": 0.38737107314384694,
"grad_norm": 71.86989684775978,
"learning_rate": 9.735533887780928e-08,
"logits": -2.1968331336975098,
"logps": -94.97209930419922,
"loss": 0.0284,
"objective": 0.02905886620283127,
"ranking_idealized": 0.46666666865348816,
"ranking_idealized_expo": 0.4000000059604645,
"ranking_simple": 0.4000000059604645,
"regularize": 0.029057972133159637,
"step": 410
},
{
"dpo_loss": 0.6933155059814453,
"epoch": 0.39209511062121094,
"grad_norm": 69.39777880403724,
"learning_rate": 9.722135021920426e-08,
"logits": -2.1606533527374268,
"logps": -90.09014129638672,
"loss": 0.0288,
"objective": 0.025797124952077866,
"ranking_idealized": 0.6333333253860474,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.4833333194255829,
"regularize": 0.025796448811888695,
"step": 415
},
{
"dpo_loss": 0.6879637837409973,
"epoch": 0.39681914809857494,
"grad_norm": 65.66563021960218,
"learning_rate": 9.708414759071057e-08,
"logits": -2.192812204360962,
"logps": -90.60978698730469,
"loss": 0.0293,
"objective": 0.028554469347000122,
"ranking_idealized": 0.5333333611488342,
"ranking_idealized_expo": 0.4333333373069763,
"ranking_simple": 0.4333333373069763,
"regularize": 0.028553970158100128,
"step": 420
},
{
"dpo_loss": 0.6905270218849182,
"epoch": 0.4015431855759389,
"grad_norm": 73.81480159017963,
"learning_rate": 9.694374033058549e-08,
"logits": -2.1698479652404785,
"logps": -92.80254364013672,
"loss": 0.03,
"objective": 0.032098546624183655,
"ranking_idealized": 0.5666666626930237,
"ranking_idealized_expo": 0.550000011920929,
"ranking_simple": 0.550000011920929,
"regularize": 0.0320979543030262,
"step": 425
},
{
"dpo_loss": 0.6938761472702026,
"epoch": 0.4062672230533029,
"grad_norm": 76.38130749325458,
"learning_rate": 9.680013799519926e-08,
"logits": -2.2929608821868896,
"logps": -92.20008087158203,
"loss": 0.0332,
"objective": 0.03666527569293976,
"ranking_idealized": 0.5833333134651184,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.5333333611488342,
"regularize": 0.036664899438619614,
"step": 430
},
{
"dpo_loss": 0.6846203207969666,
"epoch": 0.41099126053066687,
"grad_norm": 62.016252142417606,
"learning_rate": 9.665335035838468e-08,
"logits": -2.1209442615509033,
"logps": -91.99024963378906,
"loss": 0.0308,
"objective": 0.04143450781702995,
"ranking_idealized": 0.7333333492279053,
"ranking_idealized_expo": 0.6166666746139526,
"ranking_simple": 0.6166666746139526,
"regularize": 0.04143408685922623,
"step": 435
},
{
"dpo_loss": 0.692919909954071,
"epoch": 0.41571529800803086,
"grad_norm": 83.01421114100998,
"learning_rate": 9.650338741077189e-08,
"logits": -2.1818275451660156,
"logps": -88.64667510986328,
"loss": 0.0282,
"objective": 0.020624225959181786,
"ranking_idealized": 0.5833333134651184,
"ranking_idealized_expo": 0.550000011920929,
"ranking_simple": 0.5666666626930237,
"regularize": 0.02062271721661091,
"step": 440
},
{
"dpo_loss": 0.6882209777832031,
"epoch": 0.42043933548539486,
"grad_norm": 69.91926112096503,
"learning_rate": 9.635025935910839e-08,
"logits": -2.078962564468384,
"logps": -94.8196792602539,
"loss": 0.03,
"objective": 0.029924126341938972,
"ranking_idealized": 0.6499999761581421,
"ranking_idealized_expo": 0.6000000238418579,
"ranking_simple": 0.6000000238418579,
"regularize": 0.029923679307103157,
"step": 445
},
{
"dpo_loss": 0.6905581951141357,
"epoch": 0.42516337296275886,
"grad_norm": 68.70387033094245,
"learning_rate": 9.619397662556434e-08,
"logits": -2.1093952655792236,
"logps": -90.6502914428711,
"loss": 0.0294,
"objective": 0.028780171647667885,
"ranking_idealized": 0.5833333134651184,
"ranking_idealized_expo": 0.44999998807907104,
"ranking_simple": 0.46666666865348816,
"regularize": 0.028779106214642525,
"step": 450
},
{
"epoch": 0.42516337296275886,
"eval_dpo_loss": 0.6923617124557495,
"eval_logits": -1.994999885559082,
"eval_logps": -98.37866973876953,
"eval_loss": 0.014490882866084576,
"eval_objective": 0.014800351113080978,
"eval_ranking_idealized": 0.6022099256515503,
"eval_ranking_idealized_expo": 0.5207182168960571,
"eval_ranking_simple": 0.5186464190483093,
"eval_regularize": 0.014799040742218494,
"eval_runtime": 444.0476,
"eval_samples_per_second": 13.039,
"eval_steps_per_second": 3.261,
"step": 450
},
{
"dpo_loss": 0.6909880042076111,
"epoch": 0.42988741044012285,
"grad_norm": 68.38544757200873,
"learning_rate": 9.60345498470232e-08,
"logits": -2.158226728439331,
"logps": -90.17542266845703,
"loss": 0.0255,
"objective": 0.02339431643486023,
"ranking_idealized": 0.5833333134651184,
"ranking_idealized_expo": 0.550000011920929,
"ranking_simple": 0.550000011920929,
"regularize": 0.023393774405121803,
"step": 455
},
{
"dpo_loss": 0.6882848739624023,
"epoch": 0.4346114479174868,
"grad_norm": 60.565560674058744,
"learning_rate": 9.58719898743578e-08,
"logits": -2.1875061988830566,
"logps": -93.89222717285156,
"loss": 0.0289,
"objective": 0.03423256427049637,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.5333333611488342,
"regularize": 0.034231893718242645,
"step": 460
},
{
"dpo_loss": 0.6911517977714539,
"epoch": 0.4393354853948508,
"grad_norm": 68.08241104925199,
"learning_rate": 9.57063077716918e-08,
"logits": -2.1419482231140137,
"logps": -94.25173950195312,
"loss": 0.0304,
"objective": 0.02857878990471363,
"ranking_idealized": 0.699999988079071,
"ranking_idealized_expo": 0.6666666865348816,
"ranking_simple": 0.6666666865348816,
"regularize": 0.028578022494912148,
"step": 465
},
{
"dpo_loss": 0.6899906396865845,
"epoch": 0.4440595228722148,
"grad_norm": 75.2887054128907,
"learning_rate": 9.553751481564658e-08,
"logits": -2.0578720569610596,
"logps": -88.11711120605469,
"loss": 0.0318,
"objective": 0.02341555431485176,
"ranking_idealized": 0.6499999761581421,
"ranking_idealized_expo": 0.6166666746139526,
"ranking_simple": 0.6166666746139526,
"regularize": 0.02341252751648426,
"step": 470
},
{
"dpo_loss": 0.6940017938613892,
"epoch": 0.4487835603495788,
"grad_norm": 61.894078510860375,
"learning_rate": 9.536562249457386e-08,
"logits": -2.1432507038116455,
"logps": -91.78999328613281,
"loss": 0.0276,
"objective": 0.02927049808204174,
"ranking_idealized": 0.5666666626930237,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5333333611488342,
"regularize": 0.029269874095916748,
"step": 475
},
{
"dpo_loss": 0.6929585933685303,
"epoch": 0.4535075978269428,
"grad_norm": 69.75674817980894,
"learning_rate": 9.51906425077736e-08,
"logits": -2.1212713718414307,
"logps": -91.61197662353516,
"loss": 0.0303,
"objective": 0.028644824400544167,
"ranking_idealized": 0.5333333611488342,
"ranking_idealized_expo": 0.4333333373069763,
"ranking_simple": 0.44999998807907104,
"regularize": 0.028642630204558372,
"step": 480
},
{
"dpo_loss": 0.6894620656967163,
"epoch": 0.4582316353043068,
"grad_norm": 68.06555771230744,
"learning_rate": 9.501258676469798e-08,
"logits": -2.2252414226531982,
"logps": -92.49252319335938,
"loss": 0.0273,
"objective": 0.029991615563631058,
"ranking_idealized": 0.6333333253860474,
"ranking_idealized_expo": 0.6000000238418579,
"ranking_simple": 0.6000000238418579,
"regularize": 0.02999037504196167,
"step": 485
},
{
"dpo_loss": 0.6851038336753845,
"epoch": 0.4629556727816707,
"grad_norm": 68.81350143691881,
"learning_rate": 9.483146738414056e-08,
"logits": -2.1528584957122803,
"logps": -93.01960754394531,
"loss": 0.0298,
"objective": 0.03543411195278168,
"ranking_idealized": 0.5666666626930237,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.5166666507720947,
"regularize": 0.03543229401111603,
"step": 490
},
{
"dpo_loss": 0.6880256533622742,
"epoch": 0.4676797102590347,
"grad_norm": 63.11314302096046,
"learning_rate": 9.46472966934116e-08,
"logits": -2.1136722564697266,
"logps": -90.93292999267578,
"loss": 0.0346,
"objective": 0.03396356850862503,
"ranking_idealized": 0.6499999761581421,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.5166666507720947,
"regularize": 0.03396301716566086,
"step": 495
},
{
"dpo_loss": 0.6870742440223694,
"epoch": 0.4724037477363987,
"grad_norm": 73.04815070979407,
"learning_rate": 9.446008722749906e-08,
"logits": -2.2441928386688232,
"logps": -95.81112670898438,
"loss": 0.032,
"objective": 0.026291660964488983,
"ranking_idealized": 0.6333333253860474,
"ranking_idealized_expo": 0.5833333134651184,
"ranking_simple": 0.6000000238418579,
"regularize": 0.026291247457265854,
"step": 500
},
{
"epoch": 0.4724037477363987,
"eval_dpo_loss": 0.6924601793289185,
"eval_logits": -1.9919774532318115,
"eval_logps": -98.64570617675781,
"eval_loss": 0.013912476599216461,
"eval_objective": 0.0139460489153862,
"eval_ranking_idealized": 0.6022099256515503,
"eval_ranking_idealized_expo": 0.5207182168960571,
"eval_ranking_simple": 0.519336998462677,
"eval_regularize": 0.013944561593234539,
"eval_runtime": 444.8021,
"eval_samples_per_second": 13.017,
"eval_steps_per_second": 3.255,
"step": 500
},
{
"dpo_loss": 0.6906213164329529,
"epoch": 0.4771277852137627,
"grad_norm": 75.29236969807971,
"learning_rate": 9.426985172821529e-08,
"logits": -2.225041151046753,
"logps": -90.76871490478516,
"loss": 0.0313,
"objective": 0.034964669495821,
"ranking_idealized": 0.550000011920929,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.5,
"regularize": 0.03496433049440384,
"step": 505
},
{
"dpo_loss": 0.6928242444992065,
"epoch": 0.4818518226911267,
"grad_norm": 71.72745918834562,
"learning_rate": 9.407660314333001e-08,
"logits": -2.0290334224700928,
"logps": -92.85369110107422,
"loss": 0.0322,
"objective": 0.039766810834407806,
"ranking_idealized": 0.7166666388511658,
"ranking_idealized_expo": 0.5666666626930237,
"ranking_simple": 0.5833333134651184,
"regularize": 0.03976641967892647,
"step": 510
},
{
"dpo_loss": 0.6923685073852539,
"epoch": 0.4865758601684907,
"grad_norm": 61.131599755380016,
"learning_rate": 9.388035462568891e-08,
"logits": -2.147352933883667,
"logps": -91.90682220458984,
"loss": 0.0331,
"objective": 0.035778481513261795,
"ranking_idealized": 0.6666666865348816,
"ranking_idealized_expo": 0.6000000238418579,
"ranking_simple": 0.5666666626930237,
"regularize": 0.035777974873781204,
"step": 515
},
{
"dpo_loss": 0.6962333917617798,
"epoch": 0.49129989764585463,
"grad_norm": 65.42531713674678,
"learning_rate": 9.368111953231848e-08,
"logits": -2.1052534580230713,
"logps": -92.43571472167969,
"loss": 0.0302,
"objective": 0.030195049941539764,
"ranking_idealized": 0.6499999761581421,
"ranking_idealized_expo": 0.6166666746139526,
"ranking_simple": 0.6166666746139526,
"regularize": 0.030194593593478203,
"step": 520
},
{
"dpo_loss": 0.6958824396133423,
"epoch": 0.49602393512321863,
"grad_norm": 68.86518401985903,
"learning_rate": 9.347891142351692e-08,
"logits": -2.1327033042907715,
"logps": -95.017578125,
"loss": 0.0343,
"objective": 0.033561404794454575,
"ranking_idealized": 0.5166666507720947,
"ranking_idealized_expo": 0.46666666865348816,
"ranking_simple": 0.44999998807907104,
"regularize": 0.033560872077941895,
"step": 525
},
{
"dpo_loss": 0.692583441734314,
"epoch": 0.5007479726005827,
"grad_norm": 68.03560111972747,
"learning_rate": 9.327374406193124e-08,
"logits": -2.1641759872436523,
"logps": -92.0415267944336,
"loss": 0.032,
"objective": 0.03345762938261032,
"ranking_idealized": 0.6666666865348816,
"ranking_idealized_expo": 0.6000000238418579,
"ranking_simple": 0.6000000238418579,
"regularize": 0.03345634788274765,
"step": 530
},
{
"dpo_loss": 0.6896530389785767,
"epoch": 0.5054720100779466,
"grad_norm": 66.90019999237873,
"learning_rate": 9.306563141162044e-08,
"logits": -2.1231565475463867,
"logps": -91.51903533935547,
"loss": 0.0298,
"objective": 0.03302415460348129,
"ranking_idealized": 0.6666666865348816,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5166666507720947,
"regularize": 0.03302332013845444,
"step": 535
},
{
"dpo_loss": 0.6914986371994019,
"epoch": 0.5101960475553106,
"grad_norm": 58.738988827079176,
"learning_rate": 9.285458763710523e-08,
"logits": -2.147346019744873,
"logps": -93.07068634033203,
"loss": 0.0341,
"objective": 0.03528103977441788,
"ranking_idealized": 0.5666666626930237,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.4833333194255829,
"regularize": 0.035279832780361176,
"step": 540
},
{
"dpo_loss": 0.6926673650741577,
"epoch": 0.5149200850326746,
"grad_norm": 65.83406826541673,
"learning_rate": 9.264062710240386e-08,
"logits": -2.1894426345825195,
"logps": -97.09349822998047,
"loss": 0.0276,
"objective": 0.029730303213000298,
"ranking_idealized": 0.5666666626930237,
"ranking_idealized_expo": 0.46666666865348816,
"ranking_simple": 0.46666666865348816,
"regularize": 0.02972940169274807,
"step": 545
},
{
"dpo_loss": 0.6896089315414429,
"epoch": 0.5196441225100386,
"grad_norm": 80.74824222454775,
"learning_rate": 9.242376437005448e-08,
"logits": -2.1549692153930664,
"logps": -93.35411834716797,
"loss": 0.0314,
"objective": 0.0288882777094841,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5166666507720947,
"regularize": 0.028887782245874405,
"step": 550
},
{
"epoch": 0.5196441225100386,
"eval_dpo_loss": 0.6926965713500977,
"eval_logits": -1.9942920207977295,
"eval_logps": -98.96892547607422,
"eval_loss": 0.013568516820669174,
"eval_objective": 0.013540062122046947,
"eval_ranking_idealized": 0.6022099256515503,
"eval_ranking_idealized_expo": 0.5207182168960571,
"eval_ranking_simple": 0.5186464190483093,
"eval_regularize": 0.013538442552089691,
"eval_runtime": 444.65,
"eval_samples_per_second": 13.021,
"eval_steps_per_second": 3.256,
"step": 550
},
{
"dpo_loss": 0.6818323135375977,
"epoch": 0.5243681599874026,
"grad_norm": 71.45675055284568,
"learning_rate": 9.22040142001241e-08,
"logits": -2.1764817237854004,
"logps": -92.4581298828125,
"loss": 0.0336,
"objective": 0.04190651327371597,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.550000011920929,
"regularize": 0.04190612956881523,
"step": 555
},
{
"dpo_loss": 0.6884815692901611,
"epoch": 0.5290921974647665,
"grad_norm": 72.10264430141908,
"learning_rate": 9.198139154920388e-08,
"logits": -2.2008354663848877,
"logps": -90.6949234008789,
"loss": 0.0344,
"objective": 0.034483686089515686,
"ranking_idealized": 0.5333333611488342,
"ranking_idealized_expo": 0.46666666865348816,
"ranking_simple": 0.46666666865348816,
"regularize": 0.03448285162448883,
"step": 560
},
{
"dpo_loss": 0.6909436583518982,
"epoch": 0.5338162349421306,
"grad_norm": 60.56883204825771,
"learning_rate": 9.175591156939118e-08,
"logits": -2.1834826469421387,
"logps": -94.38992309570312,
"loss": 0.03,
"objective": 0.02786482684314251,
"ranking_idealized": 0.6333333253860474,
"ranking_idealized_expo": 0.5666666626930237,
"ranking_simple": 0.5666666626930237,
"regularize": 0.02786322310566902,
"step": 565
},
{
"dpo_loss": 0.6896558403968811,
"epoch": 0.5385402724194945,
"grad_norm": 63.27876500292638,
"learning_rate": 9.152758960725829e-08,
"logits": -2.0850472450256348,
"logps": -90.94063568115234,
"loss": 0.0305,
"objective": 0.03306278958916664,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.5166666507720947,
"regularize": 0.03306075185537338,
"step": 570
},
{
"dpo_loss": 0.6910984516143799,
"epoch": 0.5432643098968585,
"grad_norm": 64.97441175201224,
"learning_rate": 9.129644120280797e-08,
"logits": -2.215700387954712,
"logps": -93.2086181640625,
"loss": 0.0363,
"objective": 0.03730526939034462,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.46666666865348816,
"ranking_simple": 0.46666666865348816,
"regularize": 0.03730436787009239,
"step": 575
},
{
"dpo_loss": 0.6901772618293762,
"epoch": 0.5479883473742225,
"grad_norm": 60.84920518500226,
"learning_rate": 9.106248208841567e-08,
"logits": -2.077465534210205,
"logps": -89.92863464355469,
"loss": 0.0301,
"objective": 0.029211556538939476,
"ranking_idealized": 0.5166666507720947,
"ranking_idealized_expo": 0.46666666865348816,
"ranking_simple": 0.46666666865348816,
"regularize": 0.02920910157263279,
"step": 580
},
{
"dpo_loss": 0.6912521123886108,
"epoch": 0.5527123848515865,
"grad_norm": 61.38401518242899,
"learning_rate": 9.082572818775884e-08,
"logits": -2.0964841842651367,
"logps": -96.6317138671875,
"loss": 0.0311,
"objective": 0.0291235763579607,
"ranking_idealized": 0.550000011920929,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5166666507720947,
"regularize": 0.029122162610292435,
"step": 585
},
{
"dpo_loss": 0.6879535913467407,
"epoch": 0.5574364223289505,
"grad_norm": 70.97177434877489,
"learning_rate": 9.058619561473306e-08,
"logits": -2.1359400749206543,
"logps": -91.66080474853516,
"loss": 0.0309,
"objective": 0.0273025743663311,
"ranking_idealized": 0.550000011920929,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5166666507720947,
"regularize": 0.02730202116072178,
"step": 590
},
{
"dpo_loss": 0.69169682264328,
"epoch": 0.5621604598063145,
"grad_norm": 66.67475961321956,
"learning_rate": 9.034390067235538e-08,
"logits": -2.122257947921753,
"logps": -93.28813934326172,
"loss": 0.0292,
"objective": 0.03189357370138168,
"ranking_idealized": 0.5833333134651184,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.5,
"regularize": 0.03189240023493767,
"step": 595
},
{
"dpo_loss": 0.692051351070404,
"epoch": 0.5668844972836784,
"grad_norm": 66.82237885750858,
"learning_rate": 9.009885985165465e-08,
"logits": -2.1968979835510254,
"logps": -91.0505599975586,
"loss": 0.0311,
"objective": 0.027313487604260445,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5166666507720947,
"regularize": 0.027312906458973885,
"step": 600
},
{
"epoch": 0.5668844972836784,
"eval_dpo_loss": 0.6925215721130371,
"eval_logits": -1.9967907667160034,
"eval_logps": -98.12234497070312,
"eval_loss": 0.014187943190336227,
"eval_objective": 0.014395096339285374,
"eval_ranking_idealized": 0.6022099256515503,
"eval_ranking_idealized_expo": 0.5207182168960571,
"eval_ranking_simple": 0.5186464190483093,
"eval_regularize": 0.014393283054232597,
"eval_runtime": 445.7504,
"eval_samples_per_second": 12.989,
"eval_steps_per_second": 3.248,
"step": 600
},
{
"dpo_loss": 0.6929703950881958,
"epoch": 0.5716085347610425,
"grad_norm": 64.74275314805836,
"learning_rate": 8.985108983054912e-08,
"logits": -2.0810482501983643,
"logps": -92.1054916381836,
"loss": 0.0343,
"objective": 0.03585705906152725,
"ranking_idealized": 0.5,
"ranking_idealized_expo": 0.4166666567325592,
"ranking_simple": 0.4166666567325592,
"regularize": 0.03585506230592728,
"step": 605
},
{
"dpo_loss": 0.6893709897994995,
"epoch": 0.5763325722384064,
"grad_norm": 63.69006739310455,
"learning_rate": 8.960060747271137e-08,
"logits": -2.1485848426818848,
"logps": -92.45893096923828,
"loss": 0.0292,
"objective": 0.03595684841275215,
"ranking_idealized": 0.5666666626930237,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.4833333194255829,
"regularize": 0.03595583513379097,
"step": 610
},
{
"dpo_loss": 0.6894236207008362,
"epoch": 0.5810566097157704,
"grad_norm": 63.68171123239859,
"learning_rate": 8.934742982642041e-08,
"logits": -2.2213053703308105,
"logps": -92.00927734375,
"loss": 0.0289,
"objective": 0.030653396621346474,
"ranking_idealized": 0.5833333134651184,
"ranking_idealized_expo": 0.4333333373069763,
"ranking_simple": 0.44999998807907104,
"regularize": 0.030652187764644623,
"step": 615
},
{
"dpo_loss": 0.6907939314842224,
"epoch": 0.5857806471931344,
"grad_norm": 64.38726305381529,
"learning_rate": 8.90915741234015e-08,
"logits": -2.22101092338562,
"logps": -93.67798614501953,
"loss": 0.0293,
"objective": 0.028055744245648384,
"ranking_idealized": 0.6499999761581421,
"ranking_idealized_expo": 0.5666666626930237,
"ranking_simple": 0.5666666626930237,
"regularize": 0.028055019676685333,
"step": 620
},
{
"dpo_loss": 0.6883565783500671,
"epoch": 0.5905046846704984,
"grad_norm": 69.11436679749976,
"learning_rate": 8.883305777765317e-08,
"logits": -2.095867395401001,
"logps": -95.01261138916016,
"loss": 0.0311,
"objective": 0.033847175538539886,
"ranking_idealized": 0.5166666507720947,
"ranking_idealized_expo": 0.4333333373069763,
"ranking_simple": 0.4333333373069763,
"regularize": 0.03384659066796303,
"step": 625
},
{
"dpo_loss": 0.6883829236030579,
"epoch": 0.5952287221478624,
"grad_norm": 74.04617527963917,
"learning_rate": 8.857189838426216e-08,
"logits": -2.183093547821045,
"logps": -92.19212341308594,
"loss": 0.0332,
"objective": 0.03131110966205597,
"ranking_idealized": 0.6333333253860474,
"ranking_idealized_expo": 0.5666666626930237,
"ranking_simple": 0.5666666626930237,
"regularize": 0.03130975365638733,
"step": 630
},
{
"dpo_loss": 0.6901324987411499,
"epoch": 0.5999527596252263,
"grad_norm": 63.254018805089515,
"learning_rate": 8.83081137182057e-08,
"logits": -2.137653112411499,
"logps": -92.25005340576172,
"loss": 0.028,
"objective": 0.02667616680264473,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.550000011920929,
"ranking_simple": 0.550000011920929,
"regularize": 0.026675010100007057,
"step": 635
},
{
"dpo_loss": 0.6854274868965149,
"epoch": 0.6046767971025904,
"grad_norm": 67.682861674831,
"learning_rate": 8.804172173314183e-08,
"logits": -2.1525957584381104,
"logps": -96.51889038085938,
"loss": 0.0305,
"objective": 0.026471592485904694,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.46666666865348816,
"regularize": 0.026470640674233437,
"step": 640
},
{
"dpo_loss": 0.6916467547416687,
"epoch": 0.6094008345799543,
"grad_norm": 74.11851413570284,
"learning_rate": 8.777274056018745e-08,
"logits": -2.0791733264923096,
"logps": -90.76611328125,
"loss": 0.0275,
"objective": 0.025933992117643356,
"ranking_idealized": 0.5333333611488342,
"ranking_idealized_expo": 0.4333333373069763,
"ranking_simple": 0.4333333373069763,
"regularize": 0.025932662189006805,
"step": 645
},
{
"dpo_loss": 0.6861349940299988,
"epoch": 0.6141248720573184,
"grad_norm": 71.7083018593228,
"learning_rate": 8.750118850668412e-08,
"logits": -2.0774688720703125,
"logps": -91.57192993164062,
"loss": 0.0333,
"objective": 0.03247459605336189,
"ranking_idealized": 0.5833333134651184,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5,
"regularize": 0.03247232735157013,
"step": 650
},
{
"epoch": 0.6141248720573184,
"eval_dpo_loss": 0.6926119327545166,
"eval_logits": -1.993467926979065,
"eval_logps": -98.69168853759766,
"eval_loss": 0.014501783065497875,
"eval_objective": 0.014641453512012959,
"eval_ranking_idealized": 0.6022099256515503,
"eval_ranking_idealized_expo": 0.5207182168960571,
"eval_ranking_simple": 0.5179557800292969,
"eval_regularize": 0.014639632776379585,
"eval_runtime": 445.8707,
"eval_samples_per_second": 12.986,
"eval_steps_per_second": 3.248,
"step": 650
},
{
"dpo_loss": 0.6902641654014587,
"epoch": 0.6188489095346823,
"grad_norm": 58.31607902985929,
"learning_rate": 8.722708405495222e-08,
"logits": -2.2487266063690186,
"logps": -89.10828399658203,
"loss": 0.0289,
"objective": 0.02854420617222786,
"ranking_idealized": 0.5666666626930237,
"ranking_idealized_expo": 0.44999998807907104,
"ranking_simple": 0.4333333373069763,
"regularize": 0.028543464839458466,
"step": 655
},
{
"dpo_loss": 0.6865178942680359,
"epoch": 0.6235729470120462,
"grad_norm": 65.43564652911088,
"learning_rate": 8.695044586103295e-08,
"logits": -2.105522394180298,
"logps": -94.11585998535156,
"loss": 0.0288,
"objective": 0.027172502130270004,
"ranking_idealized": 0.4333333373069763,
"ranking_idealized_expo": 0.4000000059604645,
"ranking_simple": 0.4000000059604645,
"regularize": 0.027170367538928986,
"step": 660
},
{
"dpo_loss": 0.6929230093955994,
"epoch": 0.6282969844894103,
"grad_norm": 75.08767896831156,
"learning_rate": 8.667129275341853e-08,
"logits": -2.261946201324463,
"logps": -90.70641326904297,
"loss": 0.036,
"objective": 0.03217438980937004,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.550000011920929,
"regularize": 0.03217388316988945,
"step": 665
},
{
"dpo_loss": 0.6909863948822021,
"epoch": 0.6330210219667742,
"grad_norm": 61.63435170443301,
"learning_rate": 8.638964373177073e-08,
"logits": -2.0806498527526855,
"logps": -93.46875762939453,
"loss": 0.0283,
"objective": 0.03122856095433235,
"ranking_idealized": 0.5833333134651184,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.550000011920929,
"regularize": 0.03122722916305065,
"step": 670
},
{
"dpo_loss": 0.6940844058990479,
"epoch": 0.6377450594441383,
"grad_norm": 65.12038155568429,
"learning_rate": 8.610551796562768e-08,
"logits": -2.2103471755981445,
"logps": -92.73240661621094,
"loss": 0.0319,
"objective": 0.03719858080148697,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.5,
"regularize": 0.03719812259078026,
"step": 675
},
{
"dpo_loss": 0.6896507740020752,
"epoch": 0.6424690969215022,
"grad_norm": 65.07489196501477,
"learning_rate": 8.581893479309924e-08,
"logits": -2.2053842544555664,
"logps": -93.24919128417969,
"loss": 0.0265,
"objective": 0.023518383502960205,
"ranking_idealized": 0.5666666626930237,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.4833333194255829,
"regularize": 0.023516135290265083,
"step": 680
},
{
"dpo_loss": 0.6927950382232666,
"epoch": 0.6471931343988663,
"grad_norm": 70.32780445036671,
"learning_rate": 8.552991371955072e-08,
"logits": -2.296104907989502,
"logps": -92.59764099121094,
"loss": 0.0318,
"objective": 0.034257274121046066,
"ranking_idealized": 0.6833333373069763,
"ranking_idealized_expo": 0.6166666746139526,
"ranking_simple": 0.6166666746139526,
"regularize": 0.03425610437989235,
"step": 685
},
{
"dpo_loss": 0.692520797252655,
"epoch": 0.6519171718762302,
"grad_norm": 70.14652096802335,
"learning_rate": 8.523847441627536e-08,
"logits": -2.193286657333374,
"logps": -94.4785385131836,
"loss": 0.0326,
"objective": 0.03835910186171532,
"ranking_idealized": 0.5333333611488342,
"ranking_idealized_expo": 0.4166666567325592,
"ranking_simple": 0.4000000059604645,
"regularize": 0.03835882246494293,
"step": 690
},
{
"dpo_loss": 0.6892996430397034,
"epoch": 0.6566412093535942,
"grad_norm": 73.28164754238634,
"learning_rate": 8.494463671915546e-08,
"logits": -2.1629860401153564,
"logps": -93.7652359008789,
"loss": 0.0265,
"objective": 0.026581525802612305,
"ranking_idealized": 0.5666666626930237,
"ranking_idealized_expo": 0.4166666567325592,
"ranking_simple": 0.4000000059604645,
"regularize": 0.026580767706036568,
"step": 695
},
{
"dpo_loss": 0.6907955408096313,
"epoch": 0.6613652468309582,
"grad_norm": 66.01685046319234,
"learning_rate": 8.464842062731234e-08,
"logits": -2.2634246349334717,
"logps": -91.12454986572266,
"loss": 0.028,
"objective": 0.028724508360028267,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5166666507720947,
"regularize": 0.028722405433654785,
"step": 700
},
{
"epoch": 0.6613652468309582,
"eval_dpo_loss": 0.6929543018341064,
"eval_logits": -1.9953092336654663,
"eval_logps": -98.67767333984375,
"eval_loss": 0.013822407461702824,
"eval_objective": 0.014025083743035793,
"eval_ranking_idealized": 0.6022099256515503,
"eval_ranking_idealized_expo": 0.5207182168960571,
"eval_ranking_simple": 0.519336998462677,
"eval_regularize": 0.01402334589511156,
"eval_runtime": 446.1644,
"eval_samples_per_second": 12.977,
"eval_steps_per_second": 3.245,
"step": 700
},
{
"dpo_loss": 0.6904506683349609,
"epoch": 0.6660892843083221,
"grad_norm": 64.45584344371969,
"learning_rate": 8.434984630174508e-08,
"logits": -2.223440408706665,
"logps": -94.05587005615234,
"loss": 0.0302,
"objective": 0.029637468978762627,
"ranking_idealized": 0.6833333373069763,
"ranking_idealized_expo": 0.5666666626930237,
"ranking_simple": 0.5666666626930237,
"regularize": 0.029636209830641747,
"step": 705
},
{
"dpo_loss": 0.6879330277442932,
"epoch": 0.6708133217856862,
"grad_norm": 67.77900053910153,
"learning_rate": 8.404893406395842e-08,
"logits": -2.1772301197052,
"logps": -93.94538879394531,
"loss": 0.0323,
"objective": 0.030688025057315826,
"ranking_idealized": 0.5166666507720947,
"ranking_idealized_expo": 0.4166666567325592,
"ranking_simple": 0.4166666567325592,
"regularize": 0.030687240883708,
"step": 710
},
{
"dpo_loss": 0.6863754987716675,
"epoch": 0.6755373592630501,
"grad_norm": 78.62750388933773,
"learning_rate": 8.37457043945796e-08,
"logits": -2.1862614154815674,
"logps": -88.71346282958984,
"loss": 0.0319,
"objective": 0.034725725650787354,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.4833333194255829,
"regularize": 0.03472534194588661,
"step": 715
},
{
"dpo_loss": 0.6946022510528564,
"epoch": 0.6802613967404142,
"grad_norm": 63.01104309220956,
"learning_rate": 8.344017793196442e-08,
"logits": -2.1920392513275146,
"logps": -90.14446258544922,
"loss": 0.0265,
"objective": 0.025683369487524033,
"ranking_idealized": 0.6499999761581421,
"ranking_idealized_expo": 0.5666666626930237,
"ranking_simple": 0.5666666626930237,
"regularize": 0.025682510808110237,
"step": 720
},
{
"dpo_loss": 0.6887207627296448,
"epoch": 0.6849854342177781,
"grad_norm": 75.04026894879733,
"learning_rate": 8.313237547079252e-08,
"logits": -2.10304594039917,
"logps": -90.62553405761719,
"loss": 0.0292,
"objective": 0.029727067798376083,
"ranking_idealized": 0.6833333373069763,
"ranking_idealized_expo": 0.6166666746139526,
"ranking_simple": 0.6166666746139526,
"regularize": 0.0297266636043787,
"step": 725
},
{
"dpo_loss": 0.6926788687705994,
"epoch": 0.6897094716951421,
"grad_norm": 67.22966096550593,
"learning_rate": 8.282231796065213e-08,
"logits": -2.1637871265411377,
"logps": -91.91923522949219,
"loss": 0.0265,
"objective": 0.02672416716814041,
"ranking_idealized": 0.6333333253860474,
"ranking_idealized_expo": 0.5833333134651184,
"ranking_simple": 0.5833333134651184,
"regularize": 0.026723742485046387,
"step": 730
},
{
"dpo_loss": 0.688262403011322,
"epoch": 0.6944335091725061,
"grad_norm": 64.56167756628024,
"learning_rate": 8.251002650461411e-08,
"logits": -2.1801397800445557,
"logps": -93.63780212402344,
"loss": 0.0294,
"objective": 0.029570966958999634,
"ranking_idealized": 0.5666666626930237,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.4833333194255829,
"regularize": 0.029570437967777252,
"step": 735
},
{
"dpo_loss": 0.6889380216598511,
"epoch": 0.6991575466498701,
"grad_norm": 76.20522666744934,
"learning_rate": 8.219552235779577e-08,
"logits": -2.1762733459472656,
"logps": -93.22509765625,
"loss": 0.0341,
"objective": 0.03592396527528763,
"ranking_idealized": 0.6333333253860474,
"ranking_idealized_expo": 0.5666666626930237,
"ranking_simple": 0.5833333134651184,
"regularize": 0.035923395305871964,
"step": 740
},
{
"dpo_loss": 0.6904739141464233,
"epoch": 0.7038815841272341,
"grad_norm": 61.170974083082186,
"learning_rate": 8.187882692591406e-08,
"logits": -2.148138999938965,
"logps": -91.92343139648438,
"loss": 0.0298,
"objective": 0.027687864378094673,
"ranking_idealized": 0.5,
"ranking_idealized_expo": 0.4333333373069763,
"ranking_simple": 0.4166666567325592,
"regularize": 0.027687139809131622,
"step": 745
},
{
"dpo_loss": 0.6924771666526794,
"epoch": 0.7086056216045981,
"grad_norm": 86.99376248944333,
"learning_rate": 8.155996176382873e-08,
"logits": -2.2314558029174805,
"logps": -92.25162506103516,
"loss": 0.0319,
"objective": 0.033008575439453125,
"ranking_idealized": 0.550000011920929,
"ranking_idealized_expo": 0.46666666865348816,
"ranking_simple": 0.4833333194255829,
"regularize": 0.03300632908940315,
"step": 750
},
{
"epoch": 0.7086056216045981,
"eval_dpo_loss": 0.6925805807113647,
"eval_logits": -1.9951562881469727,
"eval_logps": -98.77120208740234,
"eval_loss": 0.014676159247756004,
"eval_objective": 0.014523538760840893,
"eval_ranking_idealized": 0.6022099256515503,
"eval_ranking_idealized_expo": 0.5207182168960571,
"eval_ranking_simple": 0.5179557800292969,
"eval_regularize": 0.014521739445626736,
"eval_runtime": 446.8328,
"eval_samples_per_second": 12.958,
"eval_steps_per_second": 3.241,
"step": 750
},
{
"dpo_loss": 0.6896133422851562,
"epoch": 0.713329659081962,
"grad_norm": 65.62950261910362,
"learning_rate": 8.123894857407532e-08,
"logits": -2.175105571746826,
"logps": -92.83119201660156,
"loss": 0.0297,
"objective": 0.02945883385837078,
"ranking_idealized": 0.5666666626930237,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.4833333194255829,
"regularize": 0.029456783086061478,
"step": 755
},
{
"dpo_loss": 0.6876399517059326,
"epoch": 0.718053696559326,
"grad_norm": 80.69728652803693,
"learning_rate": 8.091580920538789e-08,
"logits": -2.2073442935943604,
"logps": -90.69680786132812,
"loss": 0.0284,
"objective": 0.029233213514089584,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.4333333373069763,
"ranking_simple": 0.4333333373069763,
"regularize": 0.029232406988739967,
"step": 760
},
{
"dpo_loss": 0.6934041380882263,
"epoch": 0.72277773403669,
"grad_norm": 59.09750353750027,
"learning_rate": 8.059056565121216e-08,
"logits": -2.2103536128997803,
"logps": -91.05927276611328,
"loss": 0.0275,
"objective": 0.026471644639968872,
"ranking_idealized": 0.5666666626930237,
"ranking_idealized_expo": 0.38333332538604736,
"ranking_simple": 0.38333332538604736,
"regularize": 0.02647002600133419,
"step": 765
},
{
"dpo_loss": 0.6914010643959045,
"epoch": 0.727501771514054,
"grad_norm": 80.2131293813357,
"learning_rate": 8.026324004820844e-08,
"logits": -2.1993424892425537,
"logps": -91.33180236816406,
"loss": 0.0329,
"objective": 0.03441242873668671,
"ranking_idealized": 0.5333333611488342,
"ranking_idealized_expo": 0.44999998807907104,
"ranking_simple": 0.44999998807907104,
"regularize": 0.03441200777888298,
"step": 770
},
{
"dpo_loss": 0.693133533000946,
"epoch": 0.732225808991418,
"grad_norm": 65.96085059361222,
"learning_rate": 7.993385467474502e-08,
"logits": -2.2453505992889404,
"logps": -94.63382720947266,
"loss": 0.0376,
"objective": 0.032590463757514954,
"ranking_idealized": 0.5333333611488342,
"ranking_idealized_expo": 0.46666666865348816,
"ranking_simple": 0.4833333194255829,
"regularize": 0.032589759677648544,
"step": 775
},
{
"dpo_loss": 0.6881453394889832,
"epoch": 0.736949846468782,
"grad_norm": 66.36692143180971,
"learning_rate": 7.960243194938191e-08,
"logits": -2.1516549587249756,
"logps": -94.29581451416016,
"loss": 0.0322,
"objective": 0.030854353681206703,
"ranking_idealized": 0.5333333611488342,
"ranking_idealized_expo": 0.44999998807907104,
"ranking_simple": 0.44999998807907104,
"regularize": 0.030853325501084328,
"step": 780
},
{
"dpo_loss": 0.6893908381462097,
"epoch": 0.741673883946146,
"grad_norm": 73.24038753014604,
"learning_rate": 7.926899442934488e-08,
"logits": -2.1456098556518555,
"logps": -93.58820343017578,
"loss": 0.0301,
"objective": 0.030529705807566643,
"ranking_idealized": 0.6666666865348816,
"ranking_idealized_expo": 0.5833333134651184,
"ranking_simple": 0.5833333134651184,
"regularize": 0.030527813360095024,
"step": 785
},
{
"dpo_loss": 0.6873824000358582,
"epoch": 0.7463979214235099,
"grad_norm": 61.386663619909456,
"learning_rate": 7.893356480899029e-08,
"logits": -2.202815055847168,
"logps": -89.73310089111328,
"loss": 0.0284,
"objective": 0.026303457096219063,
"ranking_idealized": 0.7166666388511658,
"ranking_idealized_expo": 0.6333333253860474,
"ranking_simple": 0.6333333253860474,
"regularize": 0.02630232647061348,
"step": 790
},
{
"dpo_loss": 0.6941797733306885,
"epoch": 0.751121958900874,
"grad_norm": 74.12319085244714,
"learning_rate": 7.85961659182604e-08,
"logits": -2.2534494400024414,
"logps": -92.2616195678711,
"loss": 0.0317,
"objective": 0.028791796416044235,
"ranking_idealized": 0.6499999761581421,
"ranking_idealized_expo": 0.6166666746139526,
"ranking_simple": 0.6166666746139526,
"regularize": 0.028791317716240883,
"step": 795
},
{
"dpo_loss": 0.6879761815071106,
"epoch": 0.7558459963782379,
"grad_norm": 66.76187019896828,
"learning_rate": 7.825682072112959e-08,
"logits": -2.152491807937622,
"logps": -90.43921661376953,
"loss": 0.0297,
"objective": 0.031770989298820496,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.5666666626930237,
"ranking_simple": 0.5666666626930237,
"regularize": 0.031770527362823486,
"step": 800
},
{
"epoch": 0.7558459963782379,
"eval_dpo_loss": 0.6929048299789429,
"eval_logits": -1.9949605464935303,
"eval_logps": -98.13481903076172,
"eval_loss": 0.015697013586759567,
"eval_objective": 0.016285618767142296,
"eval_ranking_idealized": 0.6022099256515503,
"eval_ranking_idealized_expo": 0.5207182168960571,
"eval_ranking_simple": 0.5186464190483093,
"eval_regularize": 0.01628427766263485,
"eval_runtime": 446.756,
"eval_samples_per_second": 12.96,
"eval_steps_per_second": 3.241,
"step": 800
},
{
"dpo_loss": 0.6914661526679993,
"epoch": 0.760570033855602,
"grad_norm": 61.85563229584601,
"learning_rate": 7.79155523140413e-08,
"logits": -2.1741960048675537,
"logps": -94.34522247314453,
"loss": 0.0341,
"objective": 0.03409460559487343,
"ranking_idealized": 0.46666666865348816,
"ranking_idealized_expo": 0.38333332538604736,
"ranking_simple": 0.38333332538604736,
"regularize": 0.03409397229552269,
"step": 805
},
{
"dpo_loss": 0.6910237669944763,
"epoch": 0.7652940713329659,
"grad_norm": 66.02922525617878,
"learning_rate": 7.757238392433613e-08,
"logits": -2.218034267425537,
"logps": -91.0445327758789,
"loss": 0.0309,
"objective": 0.02644220180809498,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.550000011920929,
"ranking_simple": 0.550000011920929,
"regularize": 0.026441721245646477,
"step": 810
},
{
"dpo_loss": 0.6903732419013977,
"epoch": 0.77001810881033,
"grad_norm": 71.25069648841824,
"learning_rate": 7.722733890867088e-08,
"logits": -2.13299298286438,
"logps": -94.09717559814453,
"loss": 0.0306,
"objective": 0.027522355318069458,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5166666507720947,
"regularize": 0.027521483600139618,
"step": 815
},
{
"dpo_loss": 0.692348062992096,
"epoch": 0.7747421462876939,
"grad_norm": 62.71232207694529,
"learning_rate": 7.688044075142886e-08,
"logits": -2.2638330459594727,
"logps": -89.2739486694336,
"loss": 0.0265,
"objective": 0.024408848956227303,
"ranking_idealized": 0.5666666626930237,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.5,
"regularize": 0.024408036842942238,
"step": 820
},
{
"dpo_loss": 0.6927106976509094,
"epoch": 0.7794661837650578,
"grad_norm": 68.11094417791882,
"learning_rate": 7.653171306312161e-08,
"logits": -2.155310869216919,
"logps": -92.2811508178711,
"loss": 0.0314,
"objective": 0.029093213379383087,
"ranking_idealized": 0.5333333611488342,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.4833333194255829,
"regularize": 0.029092345386743546,
"step": 825
},
{
"dpo_loss": 0.6911001801490784,
"epoch": 0.7841902212424219,
"grad_norm": 66.75059983982347,
"learning_rate": 7.618117957878178e-08,
"logits": -2.236713409423828,
"logps": -93.50963592529297,
"loss": 0.0363,
"objective": 0.033676620572805405,
"ranking_idealized": 0.5833333134651184,
"ranking_idealized_expo": 0.46666666865348816,
"ranking_simple": 0.4833333194255829,
"regularize": 0.03367554768919945,
"step": 830
},
{
"dpo_loss": 0.6890848278999329,
"epoch": 0.7889142587197858,
"grad_norm": 59.842310291910785,
"learning_rate": 7.582886415634773e-08,
"logits": -2.1434099674224854,
"logps": -89.62670135498047,
"loss": 0.0261,
"objective": 0.032363876700401306,
"ranking_idealized": 0.6333333253860474,
"ranking_idealized_expo": 0.5833333134651184,
"ranking_simple": 0.5833333134651184,
"regularize": 0.032363247126340866,
"step": 835
},
{
"dpo_loss": 0.6889583468437195,
"epoch": 0.7936382961971499,
"grad_norm": 60.528801427412084,
"learning_rate": 7.547479077503975e-08,
"logits": -2.0931692123413086,
"logps": -90.27711486816406,
"loss": 0.0286,
"objective": 0.032331857830286026,
"ranking_idealized": 0.5833333134651184,
"ranking_idealized_expo": 0.550000011920929,
"ranking_simple": 0.550000011920929,
"regularize": 0.03233127295970917,
"step": 840
},
{
"dpo_loss": 0.6943262815475464,
"epoch": 0.7983623336745138,
"grad_norm": 66.0119748602127,
"learning_rate": 7.511898353372797e-08,
"logits": -2.21136212348938,
"logps": -91.64664459228516,
"loss": 0.0279,
"objective": 0.03553476184606552,
"ranking_idealized": 0.6333333253860474,
"ranking_idealized_expo": 0.5666666626930237,
"ranking_simple": 0.5666666626930237,
"regularize": 0.03553430363535881,
"step": 845
},
{
"dpo_loss": 0.6893811821937561,
"epoch": 0.8030863711518778,
"grad_norm": 64.38574308982342,
"learning_rate": 7.476146664929213e-08,
"logits": -2.2435154914855957,
"logps": -92.36274719238281,
"loss": 0.0286,
"objective": 0.0326698012650013,
"ranking_idealized": 0.550000011920929,
"ranking_idealized_expo": 0.44999998807907104,
"ranking_simple": 0.46666666865348816,
"regularize": 0.03266819566488266,
"step": 850
},
{
"epoch": 0.8030863711518778,
"eval_dpo_loss": 0.6928204894065857,
"eval_logits": -1.9953876733779907,
"eval_logps": -98.59400939941406,
"eval_loss": 0.012405806221067905,
"eval_objective": 0.012502364814281464,
"eval_ranking_idealized": 0.6022099256515503,
"eval_ranking_idealized_expo": 0.5207182168960571,
"eval_ranking_simple": 0.5172652006149292,
"eval_regularize": 0.012500518001616001,
"eval_runtime": 453.0739,
"eval_samples_per_second": 12.779,
"eval_steps_per_second": 3.196,
"step": 850
},
{
"dpo_loss": 0.6887614130973816,
"epoch": 0.8078104086292418,
"grad_norm": 68.55505260723746,
"learning_rate": 7.440226445497333e-08,
"logits": -2.201233386993408,
"logps": -92.58832550048828,
"loss": 0.0274,
"objective": 0.026876337826251984,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5333333611488342,
"regularize": 0.0268756952136755,
"step": 855
},
{
"dpo_loss": 0.6904102563858032,
"epoch": 0.8125344461066057,
"grad_norm": 71.78389198214526,
"learning_rate": 7.404140139871796e-08,
"logits": -2.231065273284912,
"logps": -95.48096466064453,
"loss": 0.0317,
"objective": 0.03271006420254707,
"ranking_idealized": 0.6833333373069763,
"ranking_idealized_expo": 0.550000011920929,
"ranking_simple": 0.550000011920929,
"regularize": 0.032709524035453796,
"step": 860
},
{
"dpo_loss": 0.6954269409179688,
"epoch": 0.8172584835839698,
"grad_norm": 66.09070940831865,
"learning_rate": 7.36789020415136e-08,
"logits": -2.1372532844543457,
"logps": -91.0411605834961,
"loss": 0.027,
"objective": 0.03049122728407383,
"ranking_idealized": 0.7166666388511658,
"ranking_idealized_expo": 0.6000000238418579,
"ranking_simple": 0.6000000238418579,
"regularize": 0.03049064427614212,
"step": 865
},
{
"dpo_loss": 0.6953790187835693,
"epoch": 0.8219825210613337,
"grad_norm": 61.58311436161328,
"learning_rate": 7.331479105571739e-08,
"logits": -2.1517558097839355,
"logps": -88.9326171875,
"loss": 0.0281,
"objective": 0.02535523846745491,
"ranking_idealized": 0.5,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.5,
"regularize": 0.025354215875267982,
"step": 870
},
{
"dpo_loss": 0.6913577318191528,
"epoch": 0.8267065585386978,
"grad_norm": 61.45457019245544,
"learning_rate": 7.294909322337688e-08,
"logits": -2.0830719470977783,
"logps": -95.3221664428711,
"loss": 0.0267,
"objective": 0.02792440913617611,
"ranking_idealized": 0.6499999761581421,
"ranking_idealized_expo": 0.6000000238418579,
"ranking_simple": 0.5833333134651184,
"regularize": 0.0279233455657959,
"step": 875
},
{
"dpo_loss": 0.6916779279708862,
"epoch": 0.8314305960160617,
"grad_norm": 69.76363049930384,
"learning_rate": 7.258183343454319e-08,
"logits": -2.276218891143799,
"logps": -91.77556610107422,
"loss": 0.029,
"objective": 0.02677006646990776,
"ranking_idealized": 0.699999988079071,
"ranking_idealized_expo": 0.5666666626930237,
"ranking_simple": 0.5666666626930237,
"regularize": 0.02676946483552456,
"step": 880
},
{
"dpo_loss": 0.6923359036445618,
"epoch": 0.8361546334934257,
"grad_norm": 65.0950944876437,
"learning_rate": 7.221303668557696e-08,
"logits": -2.1599981784820557,
"logps": -90.72624969482422,
"loss": 0.025,
"objective": 0.026336384937167168,
"ranking_idealized": 0.5166666507720947,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.5,
"regularize": 0.026335686445236206,
"step": 885
},
{
"dpo_loss": 0.692168116569519,
"epoch": 0.8408786709707897,
"grad_norm": 63.609027029284135,
"learning_rate": 7.184272807744725e-08,
"logits": -2.1683857440948486,
"logps": -92.93081665039062,
"loss": 0.0278,
"objective": 0.03211880847811699,
"ranking_idealized": 0.5666666626930237,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5166666507720947,
"regularize": 0.03211786970496178,
"step": 890
},
{
"dpo_loss": 0.6949544548988342,
"epoch": 0.8456027084481537,
"grad_norm": 71.19347753450685,
"learning_rate": 7.147093281402281e-08,
"logits": -2.2566373348236084,
"logps": -91.25569915771484,
"loss": 0.0287,
"objective": 0.022946473211050034,
"ranking_idealized": 0.5833333134651184,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.4833333194255829,
"regularize": 0.022945420816540718,
"step": 895
},
{
"dpo_loss": 0.6891117095947266,
"epoch": 0.8503267459255177,
"grad_norm": 73.13390434400947,
"learning_rate": 7.109767620035688e-08,
"logits": -2.1637258529663086,
"logps": -95.48709869384766,
"loss": 0.0285,
"objective": 0.030570391565561295,
"ranking_idealized": 0.7166666388511658,
"ranking_idealized_expo": 0.6499999761581421,
"ranking_simple": 0.6499999761581421,
"regularize": 0.030569853261113167,
"step": 900
},
{
"epoch": 0.8503267459255177,
"eval_dpo_loss": 0.69291752576828,
"eval_logits": -1.993115782737732,
"eval_logps": -98.94220733642578,
"eval_loss": 0.011713932268321514,
"eval_objective": 0.011828156188130379,
"eval_ranking_idealized": 0.6022099256515503,
"eval_ranking_idealized_expo": 0.5207182168960571,
"eval_ranking_simple": 0.5165745615959167,
"eval_regularize": 0.01182604394853115,
"eval_runtime": 445.945,
"eval_samples_per_second": 12.984,
"eval_steps_per_second": 3.247,
"step": 900
},
{
"dpo_loss": 0.6910974383354187,
"epoch": 0.8550507834028817,
"grad_norm": 72.76821991685014,
"learning_rate": 7.072298364096485e-08,
"logits": -2.094447374343872,
"logps": -89.45642852783203,
"loss": 0.0266,
"objective": 0.027411019429564476,
"ranking_idealized": 0.5666666626930237,
"ranking_idealized_expo": 0.550000011920929,
"ranking_simple": 0.550000011920929,
"regularize": 0.02741014026105404,
"step": 905
},
{
"dpo_loss": 0.6891763806343079,
"epoch": 0.8597748208802457,
"grad_norm": 71.26536161303537,
"learning_rate": 7.034688063809511e-08,
"logits": -2.1282496452331543,
"logps": -91.80699157714844,
"loss": 0.0298,
"objective": 0.025737237185239792,
"ranking_idealized": 0.6333333253860474,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.5333333611488342,
"regularize": 0.025736594572663307,
"step": 910
},
{
"dpo_loss": 0.6862377524375916,
"epoch": 0.8644988583576096,
"grad_norm": 69.04287293810181,
"learning_rate": 6.996939278999337e-08,
"logits": -2.152179479598999,
"logps": -94.09297180175781,
"loss": 0.0263,
"objective": 0.027985723689198494,
"ranking_idealized": 0.5833333134651184,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.4833333194255829,
"regularize": 0.02798387221992016,
"step": 915
},
{
"dpo_loss": 0.690563440322876,
"epoch": 0.8692228958349736,
"grad_norm": 60.9102612710031,
"learning_rate": 6.959054578916042e-08,
"logits": -2.1106715202331543,
"logps": -90.7065658569336,
"loss": 0.0281,
"objective": 0.02792646363377571,
"ranking_idealized": 0.6666666865348816,
"ranking_idealized_expo": 0.5833333134651184,
"ranking_simple": 0.6000000238418579,
"regularize": 0.02792549505829811,
"step": 920
},
{
"dpo_loss": 0.6911750435829163,
"epoch": 0.8739469333123376,
"grad_norm": 62.2660390306647,
"learning_rate": 6.921036542060343e-08,
"logits": -1.9987537860870361,
"logps": -89.90222930908203,
"loss": 0.0251,
"objective": 0.01876661367714405,
"ranking_idealized": 0.6833333373069763,
"ranking_idealized_expo": 0.6333333253860474,
"ranking_simple": 0.6333333253860474,
"regularize": 0.018765147775411606,
"step": 925
},
{
"dpo_loss": 0.6897457242012024,
"epoch": 0.8786709707897016,
"grad_norm": 59.1678071537183,
"learning_rate": 6.882887756008093e-08,
"logits": -2.111668825149536,
"logps": -87.85643768310547,
"loss": 0.0257,
"objective": 0.02605438232421875,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.550000011920929,
"ranking_simple": 0.550000011920929,
"regularize": 0.026053914800286293,
"step": 930
},
{
"dpo_loss": 0.6957460045814514,
"epoch": 0.8833950082670656,
"grad_norm": 64.76262904725678,
"learning_rate": 6.844610817234172e-08,
"logits": -2.093857765197754,
"logps": -92.8622055053711,
"loss": 0.0273,
"objective": 0.027011338621377945,
"ranking_idealized": 0.5333333611488342,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.46666666865348816,
"regularize": 0.027010958641767502,
"step": 935
},
{
"dpo_loss": 0.6905789971351624,
"epoch": 0.8881190457444296,
"grad_norm": 70.7130564090627,
"learning_rate": 6.806208330935765e-08,
"logits": -2.2473738193511963,
"logps": -91.84986114501953,
"loss": 0.0268,
"objective": 0.027645627036690712,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.5666666626930237,
"ranking_simple": 0.5666666626930237,
"regularize": 0.02764386683702469,
"step": 940
},
{
"dpo_loss": 0.6940723061561584,
"epoch": 0.8928430832217935,
"grad_norm": 60.88887279226543,
"learning_rate": 6.767682910855045e-08,
"logits": -2.287950038909912,
"logps": -89.53514862060547,
"loss": 0.0261,
"objective": 0.028092078864574432,
"ranking_idealized": 0.550000011920929,
"ranking_idealized_expo": 0.46666666865348816,
"ranking_simple": 0.46666666865348816,
"regularize": 0.0280916728079319,
"step": 945
},
{
"dpo_loss": 0.694039523601532,
"epoch": 0.8975671206991576,
"grad_norm": 67.50050200668711,
"learning_rate": 6.729037179101287e-08,
"logits": -2.304736614227295,
"logps": -93.9173812866211,
"loss": 0.0248,
"objective": 0.02201911062002182,
"ranking_idealized": 0.5833333134651184,
"ranking_idealized_expo": 0.46666666865348816,
"ranking_simple": 0.4833333194255829,
"regularize": 0.02201777510344982,
"step": 950
},
{
"epoch": 0.8975671206991576,
"eval_dpo_loss": 0.6931836009025574,
"eval_logits": -1.9902262687683105,
"eval_logps": -98.64472198486328,
"eval_loss": 0.015600171871483326,
"eval_objective": 0.015454174019396305,
"eval_ranking_idealized": 0.6022099256515503,
"eval_ranking_idealized_expo": 0.5207182168960571,
"eval_ranking_simple": 0.5172652006149292,
"eval_regularize": 0.015452706255018711,
"eval_runtime": 446.0966,
"eval_samples_per_second": 12.979,
"eval_steps_per_second": 3.246,
"step": 950
},
{
"dpo_loss": 0.6894887089729309,
"epoch": 0.9022911581765215,
"grad_norm": 62.475301521026935,
"learning_rate": 6.690273765972383e-08,
"logits": -2.1381261348724365,
"logps": -90.50852966308594,
"loss": 0.0247,
"objective": 0.029864691197872162,
"ranking_idealized": 0.7166666388511658,
"ranking_idealized_expo": 0.5833333134651184,
"ranking_simple": 0.6000000238418579,
"regularize": 0.02986370399594307,
"step": 955
},
{
"dpo_loss": 0.6904736161231995,
"epoch": 0.9070151956538856,
"grad_norm": 71.36008054276485,
"learning_rate": 6.651395309775836e-08,
"logits": -2.161102294921875,
"logps": -94.71805572509766,
"loss": 0.0273,
"objective": 0.03055974654853344,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.5833333134651184,
"ranking_simple": 0.5833333134651184,
"regularize": 0.03055933117866516,
"step": 960
},
{
"dpo_loss": 0.6911565661430359,
"epoch": 0.9117392331312495,
"grad_norm": 75.9289905899972,
"learning_rate": 6.612404456649187e-08,
"logits": -2.174187660217285,
"logps": -90.80412292480469,
"loss": 0.0255,
"objective": 0.02420150302350521,
"ranking_idealized": 0.550000011920929,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5166666507720947,
"regularize": 0.024200933054089546,
"step": 965
},
{
"dpo_loss": 0.6908090114593506,
"epoch": 0.9164632706086135,
"grad_norm": 66.41988248188461,
"learning_rate": 6.573303860379914e-08,
"logits": -2.258518695831299,
"logps": -91.59303283691406,
"loss": 0.0271,
"objective": 0.02120455540716648,
"ranking_idealized": 0.5,
"ranking_idealized_expo": 0.44999998807907104,
"ranking_simple": 0.4333333373069763,
"regularize": 0.02120377868413925,
"step": 970
},
{
"dpo_loss": 0.686578631401062,
"epoch": 0.9211873080859775,
"grad_norm": 68.57226169783617,
"learning_rate": 6.534096182224808e-08,
"logits": -2.0389044284820557,
"logps": -94.76436614990234,
"loss": 0.0299,
"objective": 0.029232459142804146,
"ranking_idealized": 0.4833333194255829,
"ranking_idealized_expo": 0.4000000059604645,
"ranking_simple": 0.4000000059604645,
"regularize": 0.029231999069452286,
"step": 975
},
{
"dpo_loss": 0.6919539570808411,
"epoch": 0.9259113455633414,
"grad_norm": 69.41679502492849,
"learning_rate": 6.494784090728851e-08,
"logits": -2.2500946521759033,
"logps": -96.09117126464844,
"loss": 0.0262,
"objective": 0.026704249903559685,
"ranking_idealized": 0.6499999761581421,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.5333333611488342,
"regularize": 0.0267037320882082,
"step": 980
},
{
"dpo_loss": 0.6925919055938721,
"epoch": 0.9306353830407055,
"grad_norm": 64.18653487464074,
"learning_rate": 6.455370261543578e-08,
"logits": -2.1756606101989746,
"logps": -93.87403106689453,
"loss": 0.0251,
"objective": 0.025608109310269356,
"ranking_idealized": 0.6833333373069763,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5166666507720947,
"regularize": 0.025606883689761162,
"step": 985
},
{
"dpo_loss": 0.6908634901046753,
"epoch": 0.9353594205180694,
"grad_norm": 61.06423988779827,
"learning_rate": 6.415857377244979e-08,
"logits": -2.1095356941223145,
"logps": -88.83150482177734,
"loss": 0.0251,
"objective": 0.026077650487422943,
"ranking_idealized": 0.5666666626930237,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.5333333611488342,
"regularize": 0.02607729658484459,
"step": 990
},
{
"dpo_loss": 0.689622700214386,
"epoch": 0.9400834579954335,
"grad_norm": 68.1910690637634,
"learning_rate": 6.376248127150908e-08,
"logits": -2.150278329849243,
"logps": -91.8506088256836,
"loss": 0.0269,
"objective": 0.025722531601786613,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.5666666626930237,
"ranking_simple": 0.5666666626930237,
"regularize": 0.025721782818436623,
"step": 995
},
{
"dpo_loss": 0.6903151273727417,
"epoch": 0.9448074954727974,
"grad_norm": 64.10384868193276,
"learning_rate": 6.33654520713805e-08,
"logits": -2.1098899841308594,
"logps": -93.50405883789062,
"loss": 0.0272,
"objective": 0.02512853965163231,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.5333333611488342,
"regularize": 0.0251275934278965,
"step": 1000
},
{
"epoch": 0.9448074954727974,
"eval_dpo_loss": 0.6931213736534119,
"eval_logits": -1.9906424283981323,
"eval_logps": -98.12418365478516,
"eval_loss": 0.01257664430886507,
"eval_objective": 0.012785565108060837,
"eval_ranking_idealized": 0.6022099256515503,
"eval_ranking_idealized_expo": 0.5207182168960571,
"eval_ranking_simple": 0.5179557800292969,
"eval_regularize": 0.012783819809556007,
"eval_runtime": 444.658,
"eval_samples_per_second": 13.021,
"eval_steps_per_second": 3.256,
"step": 1000
},
{
"dpo_loss": 0.6847264170646667,
"epoch": 0.9495315329501613,
"grad_norm": 68.90168504337811,
"learning_rate": 6.296751319458434e-08,
"logits": -2.1259357929229736,
"logps": -91.41114044189453,
"loss": 0.0298,
"objective": 0.03322311118245125,
"ranking_idealized": 0.5666666626930237,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.5,
"regularize": 0.03322221338748932,
"step": 1005
},
{
"dpo_loss": 0.6920087933540344,
"epoch": 0.9542555704275254,
"grad_norm": 64.89339434605618,
"learning_rate": 6.256869172555513e-08,
"logits": -2.1444926261901855,
"logps": -91.69261932373047,
"loss": 0.0264,
"objective": 0.02593044377863407,
"ranking_idealized": 0.4833333194255829,
"ranking_idealized_expo": 0.44999998807907104,
"ranking_simple": 0.4333333373069763,
"regularize": 0.025929344817996025,
"step": 1010
},
{
"dpo_loss": 0.6885382533073425,
"epoch": 0.9589796079048893,
"grad_norm": 71.3368687595913,
"learning_rate": 6.216901480879819e-08,
"logits": -2.0881664752960205,
"logps": -90.8614501953125,
"loss": 0.0236,
"objective": 0.021669141948223114,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.5166666507720947,
"regularize": 0.021668575704097748,
"step": 1015
},
{
"dpo_loss": 0.6919686794281006,
"epoch": 0.9637036453822534,
"grad_norm": 68.26105061311512,
"learning_rate": 6.176850964704212e-08,
"logits": -2.129828453063965,
"logps": -95.18238830566406,
"loss": 0.0226,
"objective": 0.025728456676006317,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.5833333134651184,
"ranking_simple": 0.5666666626930237,
"regularize": 0.02572786808013916,
"step": 1020
},
{
"dpo_loss": 0.6866704225540161,
"epoch": 0.9684276828596173,
"grad_norm": 61.40903124064667,
"learning_rate": 6.136720349938743e-08,
"logits": -2.2576215267181396,
"logps": -94.10470581054688,
"loss": 0.0257,
"objective": 0.022323768585920334,
"ranking_idealized": 0.5666666626930237,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5166666507720947,
"regularize": 0.022323111072182655,
"step": 1025
},
{
"dpo_loss": 0.69089674949646,
"epoch": 0.9731517203369814,
"grad_norm": 63.691297756435006,
"learning_rate": 6.096512367945113e-08,
"logits": -2.113276243209839,
"logps": -90.31819152832031,
"loss": 0.0244,
"objective": 0.022346744313836098,
"ranking_idealized": 0.5833333134651184,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.5333333611488342,
"regularize": 0.02234587073326111,
"step": 1030
},
{
"dpo_loss": 0.6910406351089478,
"epoch": 0.9778757578143453,
"grad_norm": 65.44632044043543,
"learning_rate": 6.056229755350772e-08,
"logits": -2.147958517074585,
"logps": -93.92337036132812,
"loss": 0.0234,
"objective": 0.02298605814576149,
"ranking_idealized": 0.6833333373069763,
"ranking_idealized_expo": 0.6166666746139526,
"ranking_simple": 0.6166666746139526,
"regularize": 0.022985396906733513,
"step": 1035
},
{
"dpo_loss": 0.68792724609375,
"epoch": 0.9825997952917093,
"grad_norm": 76.3390109961412,
"learning_rate": 6.01587525386267e-08,
"logits": -2.1341538429260254,
"logps": -90.12808227539062,
"loss": 0.0253,
"objective": 0.02918338030576706,
"ranking_idealized": 0.5333333611488342,
"ranking_idealized_expo": 0.46666666865348816,
"ranking_simple": 0.46666666865348816,
"regularize": 0.029182856902480125,
"step": 1040
},
{
"dpo_loss": 0.6883952021598816,
"epoch": 0.9873238327690733,
"grad_norm": 62.771275862582634,
"learning_rate": 5.975451610080642e-08,
"logits": -2.1016061305999756,
"logps": -92.14013671875,
"loss": 0.0243,
"objective": 0.02377927675843239,
"ranking_idealized": 0.5833333134651184,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.4833333194255829,
"regularize": 0.023778444156050682,
"step": 1045
},
{
"dpo_loss": 0.6908401846885681,
"epoch": 0.9920478702464373,
"grad_norm": 61.00213004445293,
"learning_rate": 5.9349615753104655e-08,
"logits": -2.1279587745666504,
"logps": -97.24657440185547,
"loss": 0.0215,
"objective": 0.021991008892655373,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.5666666626930237,
"ranking_simple": 0.5666666626930237,
"regularize": 0.02199002355337143,
"step": 1050
},
{
"epoch": 0.9920478702464373,
"eval_dpo_loss": 0.6927458047866821,
"eval_logits": -1.991100788116455,
"eval_logps": -98.33568572998047,
"eval_loss": 0.01325704250484705,
"eval_objective": 0.013452271930873394,
"eval_ranking_idealized": 0.6022099256515503,
"eval_ranking_idealized_expo": 0.5207182168960571,
"eval_ranking_simple": 0.5179557800292969,
"eval_regularize": 0.01345061045140028,
"eval_runtime": 451.6429,
"eval_samples_per_second": 12.82,
"eval_steps_per_second": 3.206,
"step": 1050
},
{
"dpo_loss": 0.6905434131622314,
"epoch": 0.9967719077238013,
"grad_norm": 69.09205896680123,
"learning_rate": 5.894407905376616e-08,
"logits": -2.2125723361968994,
"logps": -90.43359375,
"loss": 0.0256,
"objective": 0.022856025025248528,
"ranking_idealized": 0.550000011920929,
"ranking_idealized_expo": 0.46666666865348816,
"ranking_simple": 0.46666666865348816,
"regularize": 0.022855514660477638,
"step": 1055
},
{
"dpo_loss": 0.6896554231643677,
"epoch": 1.0014959452011654,
"grad_norm": 71.18743706384133,
"learning_rate": 5.853793360434687e-08,
"logits": -2.095319986343384,
"logps": -92.04082489013672,
"loss": 0.0261,
"objective": 0.025890907272696495,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.550000011920929,
"ranking_simple": 0.550000011920929,
"regularize": 0.025890160351991653,
"step": 1060
},
{
"dpo_loss": 0.6926552057266235,
"epoch": 1.0062199826785292,
"grad_norm": 68.03463485699294,
"learning_rate": 5.813120704783539e-08,
"logits": -2.1974759101867676,
"logps": -91.99174499511719,
"loss": 0.0258,
"objective": 0.026935292407870293,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.5666666626930237,
"ranking_simple": 0.5666666626930237,
"regularize": 0.026934677734971046,
"step": 1065
},
{
"dpo_loss": 0.6922659277915955,
"epoch": 1.0109440201558932,
"grad_norm": 84.22043191701646,
"learning_rate": 5.772392706677148e-08,
"logits": -2.0366082191467285,
"logps": -93.32905578613281,
"loss": 0.0257,
"objective": 0.026561260223388672,
"ranking_idealized": 0.6333333253860474,
"ranking_idealized_expo": 0.5666666626930237,
"ranking_simple": 0.5833333134651184,
"regularize": 0.026560688391327858,
"step": 1070
},
{
"dpo_loss": 0.6917497515678406,
"epoch": 1.0156680576332573,
"grad_norm": 73.42469612086076,
"learning_rate": 5.7316121381361984e-08,
"logits": -2.204793691635132,
"logps": -94.83844757080078,
"loss": 0.0269,
"objective": 0.029431190341711044,
"ranking_idealized": 0.5666666626930237,
"ranking_idealized_expo": 0.4333333373069763,
"ranking_simple": 0.46666666865348816,
"regularize": 0.029430601745843887,
"step": 1075
},
{
"dpo_loss": 0.683849573135376,
"epoch": 1.0203920951106211,
"grad_norm": 69.22367476602035,
"learning_rate": 5.690781774759411e-08,
"logits": -2.2117373943328857,
"logps": -94.56703186035156,
"loss": 0.0276,
"objective": 0.030149787664413452,
"ranking_idealized": 0.5333333611488342,
"ranking_idealized_expo": 0.44999998807907104,
"ranking_simple": 0.46666666865348816,
"regularize": 0.030148986726999283,
"step": 1080
},
{
"dpo_loss": 0.6918656229972839,
"epoch": 1.0251161325879852,
"grad_norm": 67.88816058800367,
"learning_rate": 5.649904395534636e-08,
"logits": -2.1058478355407715,
"logps": -94.81607055664062,
"loss": 0.0249,
"objective": 0.025079350918531418,
"ranking_idealized": 0.550000011920929,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.4833333194255829,
"regularize": 0.025078853592276573,
"step": 1085
},
{
"dpo_loss": 0.6931339502334595,
"epoch": 1.0298401700653492,
"grad_norm": 71.9727166820056,
"learning_rate": 5.6089827826497026e-08,
"logits": -2.2008562088012695,
"logps": -93.15959930419922,
"loss": 0.0233,
"objective": 0.024377651512622833,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.4333333373069763,
"ranking_simple": 0.44999998807907104,
"regularize": 0.024377118796110153,
"step": 1090
},
{
"dpo_loss": 0.6928921341896057,
"epoch": 1.0345642075427133,
"grad_norm": 64.563119648666,
"learning_rate": 5.568019721303068e-08,
"logits": -2.146667957305908,
"logps": -95.26697540283203,
"loss": 0.0232,
"objective": 0.022780917584896088,
"ranking_idealized": 0.5666666626930237,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5166666507720947,
"regularize": 0.022780440747737885,
"step": 1095
},
{
"dpo_loss": 0.6914113759994507,
"epoch": 1.039288245020077,
"grad_norm": 70.34249089351219,
"learning_rate": 5.527017999514239e-08,
"logits": -2.1238293647766113,
"logps": -90.81373596191406,
"loss": 0.0242,
"objective": 0.019299499690532684,
"ranking_idealized": 0.550000011920929,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.5,
"regularize": 0.019298112019896507,
"step": 1100
},
{
"epoch": 1.039288245020077,
"eval_dpo_loss": 0.692744791507721,
"eval_logits": -1.9881205558776855,
"eval_logps": -98.5120849609375,
"eval_loss": 0.012820076197385788,
"eval_objective": 0.012685425579547882,
"eval_ranking_idealized": 0.6022099256515503,
"eval_ranking_idealized_expo": 0.5207182168960571,
"eval_ranking_simple": 0.5179557800292969,
"eval_regularize": 0.012683761306107044,
"eval_runtime": 445.6772,
"eval_samples_per_second": 12.991,
"eval_steps_per_second": 3.249,
"step": 1100
},
{
"dpo_loss": 0.6895564198493958,
"epoch": 1.0440122824974412,
"grad_norm": 69.83976607143053,
"learning_rate": 5.4859804079340266e-08,
"logits": -2.158614158630371,
"logps": -88.3459243774414,
"loss": 0.0234,
"objective": 0.024073513224720955,
"ranking_idealized": 0.5333333611488342,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.4833333194255829,
"regularize": 0.024072829633951187,
"step": 1105
},
{
"dpo_loss": 0.6888210773468018,
"epoch": 1.0487363199748052,
"grad_norm": 78.2561714772354,
"learning_rate": 5.444909739654602e-08,
"logits": -2.2234978675842285,
"logps": -92.2721939086914,
"loss": 0.025,
"objective": 0.025087477639317513,
"ranking_idealized": 0.5833333134651184,
"ranking_idealized_expo": 0.4333333373069763,
"ranking_simple": 0.44999998807907104,
"regularize": 0.025086527690291405,
"step": 1110
},
{
"dpo_loss": 0.6934700608253479,
"epoch": 1.053460357452169,
"grad_norm": 67.69947794878017,
"learning_rate": 5.4038087900193974e-08,
"logits": -2.0514726638793945,
"logps": -92.03214263916016,
"loss": 0.0243,
"objective": 0.020962979644536972,
"ranking_idealized": 0.5666666626930237,
"ranking_idealized_expo": 0.44999998807907104,
"ranking_simple": 0.44999998807907104,
"regularize": 0.020961163565516472,
"step": 1115
},
{
"dpo_loss": 0.6921118497848511,
"epoch": 1.058184394929533,
"grad_norm": 60.76759737021519,
"learning_rate": 5.362680356432846e-08,
"logits": -2.082772731781006,
"logps": -91.51958465576172,
"loss": 0.0226,
"objective": 0.02336716279387474,
"ranking_idealized": 0.5333333611488342,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.4833333194255829,
"regularize": 0.023366322740912437,
"step": 1120
},
{
"dpo_loss": 0.6900876760482788,
"epoch": 1.0629084324068971,
"grad_norm": 89.61755393855444,
"learning_rate": 5.321527238169992e-08,
"logits": -2.137908935546875,
"logps": -94.91239166259766,
"loss": 0.0258,
"objective": 0.0195107851177454,
"ranking_idealized": 0.550000011920929,
"ranking_idealized_expo": 0.46666666865348816,
"ranking_simple": 0.46666666865348816,
"regularize": 0.01950863003730774,
"step": 1125
},
{
"dpo_loss": 0.6918936371803284,
"epoch": 1.067632469884261,
"grad_norm": 60.32020595039731,
"learning_rate": 5.280352236185959e-08,
"logits": -2.223163604736328,
"logps": -94.0035629272461,
"loss": 0.0198,
"objective": 0.020925112068653107,
"ranking_idealized": 0.5666666626930237,
"ranking_idealized_expo": 0.550000011920929,
"ranking_simple": 0.550000011920929,
"regularize": 0.020924491807818413,
"step": 1130
},
{
"dpo_loss": 0.6936992406845093,
"epoch": 1.072356507361625,
"grad_norm": 68.85146603912719,
"learning_rate": 5.239158152925319e-08,
"logits": -2.089085102081299,
"logps": -89.82282257080078,
"loss": 0.0232,
"objective": 0.02565601095557213,
"ranking_idealized": 0.5333333611488342,
"ranking_idealized_expo": 0.46666666865348816,
"ranking_simple": 0.46666666865348816,
"regularize": 0.025654161348938942,
"step": 1135
},
{
"dpo_loss": 0.6891080737113953,
"epoch": 1.077080544838989,
"grad_norm": 63.283810030024625,
"learning_rate": 5.197947792131348e-08,
"logits": -2.201981782913208,
"logps": -93.19425964355469,
"loss": 0.0252,
"objective": 0.02514718845486641,
"ranking_idealized": 0.6666666865348816,
"ranking_idealized_expo": 0.5833333134651184,
"ranking_simple": 0.5833333134651184,
"regularize": 0.025146154686808586,
"step": 1140
},
{
"dpo_loss": 0.6902083158493042,
"epoch": 1.0818045823163531,
"grad_norm": 65.2698459996647,
"learning_rate": 5.1567239586552e-08,
"logits": -2.185304880142212,
"logps": -90.79157257080078,
"loss": 0.0219,
"objective": 0.021113820374011993,
"ranking_idealized": 0.5833333134651184,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5166666507720947,
"regularize": 0.021113011986017227,
"step": 1145
},
{
"dpo_loss": 0.6910417675971985,
"epoch": 1.086528619793717,
"grad_norm": 65.02233557080358,
"learning_rate": 5.115489458265005e-08,
"logits": -2.1189420223236084,
"logps": -94.17777252197266,
"loss": 0.0248,
"objective": 0.023960812017321587,
"ranking_idealized": 0.5666666626930237,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.5333333611488342,
"regularize": 0.02395929954946041,
"step": 1150
},
{
"epoch": 1.086528619793717,
"eval_dpo_loss": 0.6928678154945374,
"eval_logits": -1.9900000095367432,
"eval_logps": -98.37399291992188,
"eval_loss": 0.01211391482502222,
"eval_objective": 0.012414646334946156,
"eval_ranking_idealized": 0.6022099256515503,
"eval_ranking_idealized_expo": 0.5207182168960571,
"eval_ranking_simple": 0.5179557800292969,
"eval_regularize": 0.0124129019677639,
"eval_runtime": 446.4802,
"eval_samples_per_second": 12.968,
"eval_steps_per_second": 3.243,
"step": 1150
},
{
"dpo_loss": 0.6903362274169922,
"epoch": 1.091252657271081,
"grad_norm": 65.0696209993928,
"learning_rate": 5.0742470974549036e-08,
"logits": -2.100759744644165,
"logps": -90.93352508544922,
"loss": 0.0233,
"objective": 0.020316295325756073,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.5333333611488342,
"regularize": 0.020315730944275856,
"step": 1155
},
{
"dpo_loss": 0.6921348571777344,
"epoch": 1.095976694748445,
"grad_norm": 68.18545652779869,
"learning_rate": 5.032999683254028e-08,
"logits": -2.1389288902282715,
"logps": -90.10498046875,
"loss": 0.0247,
"objective": 0.021737979725003242,
"ranking_idealized": 0.6499999761581421,
"ranking_idealized_expo": 0.5833333134651184,
"ranking_simple": 0.5833333134651184,
"regularize": 0.021737171337008476,
"step": 1160
},
{
"dpo_loss": 0.69456547498703,
"epoch": 1.100700732225809,
"grad_norm": 73.7471857905923,
"learning_rate": 4.991750023035455e-08,
"logits": -2.124562978744507,
"logps": -90.93301391601562,
"loss": 0.024,
"objective": 0.02404080517590046,
"ranking_idealized": 0.6666666865348816,
"ranking_idealized_expo": 0.6000000238418579,
"ranking_simple": 0.6000000238418579,
"regularize": 0.02403992973268032,
"step": 1165
},
{
"dpo_loss": 0.6940571069717407,
"epoch": 1.105424769703173,
"grad_norm": 67.32632108670151,
"learning_rate": 4.950500924325127e-08,
"logits": -2.2072455883026123,
"logps": -94.81490325927734,
"loss": 0.0255,
"objective": 0.026444217190146446,
"ranking_idealized": 0.4833333194255829,
"ranking_idealized_expo": 0.4166666567325592,
"ranking_simple": 0.4333333373069763,
"regularize": 0.026443663984537125,
"step": 1170
},
{
"dpo_loss": 0.692010223865509,
"epoch": 1.110148807180537,
"grad_norm": 60.40484735228916,
"learning_rate": 4.909255194610773e-08,
"logits": -2.1622235774993896,
"logps": -90.50462341308594,
"loss": 0.0217,
"objective": 0.01786196231842041,
"ranking_idealized": 0.5333333611488342,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.5,
"regularize": 0.01785971410572529,
"step": 1175
},
{
"dpo_loss": 0.6931031942367554,
"epoch": 1.114872844657901,
"grad_norm": 59.38906623535502,
"learning_rate": 4.8680156411508193e-08,
"logits": -2.16975474357605,
"logps": -89.3101806640625,
"loss": 0.0243,
"objective": 0.01995784044265747,
"ranking_idealized": 0.5833333134651184,
"ranking_idealized_expo": 0.44999998807907104,
"ranking_simple": 0.44999998807907104,
"regularize": 0.019954947754740715,
"step": 1180
},
{
"dpo_loss": 0.690836489200592,
"epoch": 1.1195968821352649,
"grad_norm": 68.89482324651908,
"learning_rate": 4.826785070783326e-08,
"logits": -2.058103084564209,
"logps": -91.24579620361328,
"loss": 0.0233,
"objective": 0.022483140230178833,
"ranking_idealized": 0.6499999761581421,
"ranking_idealized_expo": 0.6166666746139526,
"ranking_simple": 0.6166666746139526,
"regularize": 0.02248191460967064,
"step": 1185
},
{
"dpo_loss": 0.6914324164390564,
"epoch": 1.124320919612629,
"grad_norm": 61.796191905639844,
"learning_rate": 4.7855662897349464e-08,
"logits": -2.1661124229431152,
"logps": -91.51298522949219,
"loss": 0.0226,
"objective": 0.018483061343431473,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.5166666507720947,
"regularize": 0.018482128158211708,
"step": 1190
},
{
"dpo_loss": 0.6916797161102295,
"epoch": 1.129044957089993,
"grad_norm": 63.413323128276396,
"learning_rate": 4.744362103429933e-08,
"logits": -2.204550266265869,
"logps": -90.19840240478516,
"loss": 0.0244,
"objective": 0.025325793772935867,
"ranking_idealized": 0.6499999761581421,
"ranking_idealized_expo": 0.6000000238418579,
"ranking_simple": 0.6000000238418579,
"regularize": 0.02532271295785904,
"step": 1195
},
{
"dpo_loss": 0.6876908540725708,
"epoch": 1.1337689945673568,
"grad_norm": 66.08019689679915,
"learning_rate": 4.703175316299196e-08,
"logits": -2.147653341293335,
"logps": -94.6951904296875,
"loss": 0.0238,
"objective": 0.022918345406651497,
"ranking_idealized": 0.699999988079071,
"ranking_idealized_expo": 0.5666666626930237,
"ranking_simple": 0.5666666626930237,
"regularize": 0.022917365655303,
"step": 1200
},
{
"epoch": 1.1337689945673568,
"eval_dpo_loss": 0.6931039094924927,
"eval_logits": -1.988110899925232,
"eval_logps": -98.65231323242188,
"eval_loss": 0.013076459057629108,
"eval_objective": 0.013204570859670639,
"eval_ranking_idealized": 0.6022099256515503,
"eval_ranking_idealized_expo": 0.5207182168960571,
"eval_ranking_simple": 0.5186464190483093,
"eval_regularize": 0.013202749192714691,
"eval_runtime": 446.1352,
"eval_samples_per_second": 12.978,
"eval_steps_per_second": 3.246,
"step": 1200
},
{
"dpo_loss": 0.6895859837532043,
"epoch": 1.1384930320447209,
"grad_norm": 59.82159099357435,
"learning_rate": 4.662008731589424e-08,
"logits": -2.2835893630981445,
"logps": -93.41173553466797,
"loss": 0.0238,
"objective": 0.01953883096575737,
"ranking_idealized": 0.5666666626930237,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5166666507720947,
"regularize": 0.019538111984729767,
"step": 1205
},
{
"dpo_loss": 0.6878269910812378,
"epoch": 1.143217069522085,
"grad_norm": 64.13908923913202,
"learning_rate": 4.6208651511722916e-08,
"logits": -2.107128381729126,
"logps": -95.02501678466797,
"loss": 0.0225,
"objective": 0.02028297260403633,
"ranking_idealized": 0.5833333134651184,
"ranking_idealized_expo": 0.44999998807907104,
"ranking_simple": 0.4833333194255829,
"regularize": 0.020282411947846413,
"step": 1210
},
{
"dpo_loss": 0.6913109421730042,
"epoch": 1.147941106999449,
"grad_norm": 64.08267971583916,
"learning_rate": 4.579747375353763e-08,
"logits": -2.152080774307251,
"logps": -93.3291244506836,
"loss": 0.0225,
"objective": 0.02415003441274166,
"ranking_idealized": 0.550000011920929,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.4833333194255829,
"regularize": 0.024148976430296898,
"step": 1215
},
{
"dpo_loss": 0.6908139586448669,
"epoch": 1.1526651444768128,
"grad_norm": 61.90111498957097,
"learning_rate": 4.5386582026834904e-08,
"logits": -2.2181687355041504,
"logps": -91.48564147949219,
"loss": 0.0211,
"objective": 0.01728188991546631,
"ranking_idealized": 0.5333333611488342,
"ranking_idealized_expo": 0.44999998807907104,
"ranking_simple": 0.44999998807907104,
"regularize": 0.017280517145991325,
"step": 1220
},
{
"dpo_loss": 0.6892186999320984,
"epoch": 1.1573891819541768,
"grad_norm": 67.90586520467559,
"learning_rate": 4.497600429764349e-08,
"logits": -2.0878336429595947,
"logps": -92.51049041748047,
"loss": 0.023,
"objective": 0.021265888586640358,
"ranking_idealized": 0.5833333134651184,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5,
"regularize": 0.021264949813485146,
"step": 1225
},
{
"dpo_loss": 0.6902192831039429,
"epoch": 1.162113219431541,
"grad_norm": 69.62062658592635,
"learning_rate": 4.456576851062089e-08,
"logits": -2.1754400730133057,
"logps": -89.9488296508789,
"loss": 0.0202,
"objective": 0.015848658978939056,
"ranking_idealized": 0.6666666865348816,
"ranking_idealized_expo": 0.550000011920929,
"ranking_simple": 0.5666666626930237,
"regularize": 0.015847818925976753,
"step": 1230
},
{
"dpo_loss": 0.6919657588005066,
"epoch": 1.1668372569089047,
"grad_norm": 76.5213820049961,
"learning_rate": 4.4155902587151404e-08,
"logits": -2.2707595825195312,
"logps": -89.4555435180664,
"loss": 0.0231,
"objective": 0.02043619193136692,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.5333333611488342,
"regularize": 0.020434273406863213,
"step": 1235
},
{
"dpo_loss": 0.6887553930282593,
"epoch": 1.1715612943862688,
"grad_norm": 68.85342051503754,
"learning_rate": 4.374643442344576e-08,
"logits": -2.1689367294311523,
"logps": -91.3507080078125,
"loss": 0.022,
"objective": 0.02212885580956936,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.5833333134651184,
"ranking_simple": 0.5833333134651184,
"regularize": 0.02212790958583355,
"step": 1240
},
{
"dpo_loss": 0.6907090544700623,
"epoch": 1.1762853318636328,
"grad_norm": 63.9895529963574,
"learning_rate": 4.333739188864243e-08,
"logits": -2.116743564605713,
"logps": -89.35691833496094,
"loss": 0.0231,
"objective": 0.02183300256729126,
"ranking_idealized": 0.5333333611488342,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.5,
"regularize": 0.021832352504134178,
"step": 1245
},
{
"dpo_loss": 0.6873170733451843,
"epoch": 1.1810093693409969,
"grad_norm": 62.000344928337206,
"learning_rate": 4.292880282291083e-08,
"logits": -2.0605881214141846,
"logps": -89.00260925292969,
"loss": 0.0213,
"objective": 0.016554510220885277,
"ranking_idealized": 0.550000011920929,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.4833333194255829,
"regularize": 0.016552967950701714,
"step": 1250
},
{
"epoch": 1.1810093693409969,
"eval_dpo_loss": 0.6929011344909668,
"eval_logits": -1.9892135858535767,
"eval_logps": -98.38199615478516,
"eval_loss": 0.011604293249547482,
"eval_objective": 0.011785290203988552,
"eval_ranking_idealized": 0.6022099256515503,
"eval_ranking_idealized_expo": 0.5207182168960571,
"eval_ranking_simple": 0.5186464190483093,
"eval_regularize": 0.011783457361161709,
"eval_runtime": 446.5262,
"eval_samples_per_second": 12.967,
"eval_steps_per_second": 3.243,
"step": 1250
},
{
"dpo_loss": 0.6883438229560852,
"epoch": 1.1857334068183607,
"grad_norm": 76.14193268278439,
"learning_rate": 4.2520695035556444e-08,
"logits": -2.238811731338501,
"logps": -93.17744445800781,
"loss": 0.0227,
"objective": 0.02147439494729042,
"ranking_idealized": 0.4166666567325592,
"ranking_idealized_expo": 0.4000000059604645,
"ranking_simple": 0.4000000059604645,
"regularize": 0.021473314613103867,
"step": 1255
},
{
"dpo_loss": 0.6898403763771057,
"epoch": 1.1904574442957248,
"grad_norm": 72.14873570982253,
"learning_rate": 4.211309630312812e-08,
"logits": -2.186509847640991,
"logps": -92.71757507324219,
"loss": 0.0221,
"objective": 0.024685295298695564,
"ranking_idealized": 0.6666666865348816,
"ranking_idealized_expo": 0.6166666746139526,
"ranking_simple": 0.6000000238418579,
"regularize": 0.02468358352780342,
"step": 1260
},
{
"dpo_loss": 0.6934939026832581,
"epoch": 1.1951814817730888,
"grad_norm": 76.43192783166353,
"learning_rate": 4.1706034367527484e-08,
"logits": -2.2296221256256104,
"logps": -90.43429565429688,
"loss": 0.0216,
"objective": 0.017879430204629898,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.5,
"regularize": 0.017877496778964996,
"step": 1265
},
{
"dpo_loss": 0.6907635927200317,
"epoch": 1.1999055192504526,
"grad_norm": 63.68697185614875,
"learning_rate": 4.12995369341208e-08,
"logits": -2.2198116779327393,
"logps": -89.92237854003906,
"loss": 0.0186,
"objective": 0.019899163395166397,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.5,
"regularize": 0.019898338243365288,
"step": 1270
},
{
"dpo_loss": 0.6885548233985901,
"epoch": 1.2046295567278167,
"grad_norm": 67.30508547665502,
"learning_rate": 4.0893631669853315e-08,
"logits": -2.2070553302764893,
"logps": -91.00017547607422,
"loss": 0.0213,
"objective": 0.0249098539352417,
"ranking_idealized": 0.6833333373069763,
"ranking_idealized_expo": 0.5833333134651184,
"ranking_simple": 0.5833333134651184,
"regularize": 0.024909144267439842,
"step": 1275
},
{
"dpo_loss": 0.6864418387413025,
"epoch": 1.2093535942051807,
"grad_norm": 58.37239845591427,
"learning_rate": 4.048834620136618e-08,
"logits": -2.157111406326294,
"logps": -90.54441833496094,
"loss": 0.0216,
"objective": 0.024475712329149246,
"ranking_idealized": 0.6333333253860474,
"ranking_idealized_expo": 0.5833333134651184,
"ranking_simple": 0.5833333134651184,
"regularize": 0.02447471395134926,
"step": 1280
},
{
"dpo_loss": 0.6923628449440002,
"epoch": 1.2140776316825446,
"grad_norm": 62.390419253758,
"learning_rate": 4.0083708113116125e-08,
"logits": -2.20066499710083,
"logps": -91.0829849243164,
"loss": 0.0194,
"objective": 0.017484158277511597,
"ranking_idealized": 0.699999988079071,
"ranking_idealized_expo": 0.6499999761581421,
"ranking_simple": 0.6499999761581421,
"regularize": 0.017483441159129143,
"step": 1285
},
{
"dpo_loss": 0.6917316317558289,
"epoch": 1.2188016691599086,
"grad_norm": 62.080679222088676,
"learning_rate": 3.9679744945498026e-08,
"logits": -2.0995683670043945,
"logps": -89.5858154296875,
"loss": 0.0215,
"objective": 0.019274141639471054,
"ranking_idealized": 0.5333333611488342,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.5,
"regularize": 0.01927088387310505,
"step": 1290
},
{
"dpo_loss": 0.6907222867012024,
"epoch": 1.2235257066372727,
"grad_norm": 69.35505760627515,
"learning_rate": 3.9276484192970427e-08,
"logits": -2.0752005577087402,
"logps": -88.9092788696289,
"loss": 0.0202,
"objective": 0.01790427789092064,
"ranking_idealized": 0.6499999761581421,
"ranking_idealized_expo": 0.5833333134651184,
"ranking_simple": 0.5833333134651184,
"regularize": 0.017903130501508713,
"step": 1295
},
{
"dpo_loss": 0.6888744831085205,
"epoch": 1.2282497441146367,
"grad_norm": 64.13387937854051,
"learning_rate": 3.887395330218428e-08,
"logits": -2.088010549545288,
"logps": -93.63009643554688,
"loss": 0.0213,
"objective": 0.01880194991827011,
"ranking_idealized": 0.6333333253860474,
"ranking_idealized_expo": 0.5666666626930237,
"ranking_simple": 0.5666666626930237,
"regularize": 0.018800783902406693,
"step": 1300
},
{
"epoch": 1.2282497441146367,
"eval_dpo_loss": 0.6930280923843384,
"eval_logits": -1.9901354312896729,
"eval_logps": -98.35194396972656,
"eval_loss": 0.010079173371195793,
"eval_objective": 0.010304316878318787,
"eval_ranking_idealized": 0.6022099256515503,
"eval_ranking_idealized_expo": 0.5207182168960571,
"eval_ranking_simple": 0.5179557800292969,
"eval_regularize": 0.010302502661943436,
"eval_runtime": 446.7461,
"eval_samples_per_second": 12.96,
"eval_steps_per_second": 3.241,
"step": 1300
},
{
"dpo_loss": 0.6898637413978577,
"epoch": 1.2329737815920006,
"grad_norm": 64.4984732207436,
"learning_rate": 3.847217967011481e-08,
"logits": -2.107663154602051,
"logps": -90.70755004882812,
"loss": 0.0188,
"objective": 0.020963182672858238,
"ranking_idealized": 0.4333333373069763,
"ranking_idealized_expo": 0.4000000059604645,
"ranking_simple": 0.4000000059604645,
"regularize": 0.020961280912160873,
"step": 1305
},
{
"dpo_loss": 0.6896305084228516,
"epoch": 1.2376978190693646,
"grad_norm": 65.1483177905117,
"learning_rate": 3.807119064219686e-08,
"logits": -2.1721391677856445,
"logps": -89.53897094726562,
"loss": 0.0187,
"objective": 0.022347215563058853,
"ranking_idealized": 0.6833333373069763,
"ranking_idealized_expo": 0.6000000238418579,
"ranking_simple": 0.6000000238418579,
"regularize": 0.02234494686126709,
"step": 1310
},
{
"dpo_loss": 0.6885426044464111,
"epoch": 1.2424218565467287,
"grad_norm": 67.6302362675184,
"learning_rate": 3.7671013510463685e-08,
"logits": -2.1977858543395996,
"logps": -90.79574584960938,
"loss": 0.0254,
"objective": 0.021330129355192184,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.550000011920929,
"regularize": 0.02132764458656311,
"step": 1315
},
{
"dpo_loss": 0.6933093667030334,
"epoch": 1.2471458940240927,
"grad_norm": 63.75946991656521,
"learning_rate": 3.727167551168947e-08,
"logits": -2.229327917098999,
"logps": -91.51911163330078,
"loss": 0.0225,
"objective": 0.01876850612461567,
"ranking_idealized": 0.5666666626930237,
"ranking_idealized_expo": 0.46666666865348816,
"ranking_simple": 0.46666666865348816,
"regularize": 0.01876780204474926,
"step": 1320
},
{
"dpo_loss": 0.6886643171310425,
"epoch": 1.2518699315014565,
"grad_norm": 61.93022907625324,
"learning_rate": 3.687320382553547e-08,
"logits": -2.1852173805236816,
"logps": -94.50476837158203,
"loss": 0.0213,
"objective": 0.019077714532613754,
"ranking_idealized": 0.6666666865348816,
"ranking_idealized_expo": 0.5666666626930237,
"ranking_simple": 0.5666666626930237,
"regularize": 0.01907687447965145,
"step": 1325
},
{
"dpo_loss": 0.690542459487915,
"epoch": 1.2565939689788206,
"grad_norm": 61.862265225762556,
"learning_rate": 3.6475625572700156e-08,
"logits": -2.1126949787139893,
"logps": -93.08950805664062,
"loss": 0.0201,
"objective": 0.020278314128518105,
"ranking_idealized": 0.550000011920929,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5166666507720947,
"regularize": 0.020277447998523712,
"step": 1330
},
{
"dpo_loss": 0.6890642046928406,
"epoch": 1.2613180064561846,
"grad_norm": 65.89840804248666,
"learning_rate": 3.607896781307333e-08,
"logits": -2.0817463397979736,
"logps": -93.81517791748047,
"loss": 0.0186,
"objective": 0.018369121477007866,
"ranking_idealized": 0.6333333253860474,
"ranking_idealized_expo": 0.44999998807907104,
"ranking_simple": 0.44999998807907104,
"regularize": 0.01836659200489521,
"step": 1335
},
{
"dpo_loss": 0.6935243606567383,
"epoch": 1.2660420439335485,
"grad_norm": 61.73867224830495,
"learning_rate": 3.5683257543894376e-08,
"logits": -2.158568859100342,
"logps": -91.2880859375,
"loss": 0.0199,
"objective": 0.02297687530517578,
"ranking_idealized": 0.6333333253860474,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5166666507720947,
"regularize": 0.022976113483309746,
"step": 1340
},
{
"dpo_loss": 0.6870158910751343,
"epoch": 1.2707660814109125,
"grad_norm": 55.58597973847245,
"learning_rate": 3.528852169791474e-08,
"logits": -2.130025625228882,
"logps": -92.3035888671875,
"loss": 0.0191,
"objective": 0.018635360524058342,
"ranking_idealized": 0.5833333134651184,
"ranking_idealized_expo": 0.5666666626930237,
"ranking_simple": 0.5666666626930237,
"regularize": 0.018634265288710594,
"step": 1345
},
{
"dpo_loss": 0.6925967335700989,
"epoch": 1.2754901188882766,
"grad_norm": 60.169587280433916,
"learning_rate": 3.489478714156493e-08,
"logits": -2.1538307666778564,
"logps": -90.29386901855469,
"loss": 0.0191,
"objective": 0.01989407278597355,
"ranking_idealized": 0.6499999761581421,
"ranking_idealized_expo": 0.5833333134651184,
"ranking_simple": 0.5833333134651184,
"regularize": 0.01989228092133999,
"step": 1350
},
{
"epoch": 1.2754901188882766,
"eval_dpo_loss": 0.6928868293762207,
"eval_logits": -1.9894771575927734,
"eval_logps": -98.17080688476562,
"eval_loss": 0.010478594340384007,
"eval_objective": 0.01073493529111147,
"eval_ranking_idealized": 0.6022099256515503,
"eval_ranking_idealized_expo": 0.5207182168960571,
"eval_ranking_simple": 0.5186464190483093,
"eval_regularize": 0.01073309313505888,
"eval_runtime": 446.5009,
"eval_samples_per_second": 12.967,
"eval_steps_per_second": 3.243,
"step": 1350
},
{
"dpo_loss": 0.6914752721786499,
"epoch": 1.2802141563656404,
"grad_norm": 63.02270194963686,
"learning_rate": 3.450208067312586e-08,
"logits": -2.1492412090301514,
"logps": -89.29891967773438,
"loss": 0.019,
"objective": 0.017367955297231674,
"ranking_idealized": 0.5,
"ranking_idealized_expo": 0.46666666865348816,
"ranking_simple": 0.46666666865348816,
"regularize": 0.01736696995794773,
"step": 1355
},
{
"dpo_loss": 0.6908875703811646,
"epoch": 1.2849381938430045,
"grad_norm": 66.84237286020337,
"learning_rate": 3.411042902090492e-08,
"logits": -2.2156636714935303,
"logps": -91.21939849853516,
"loss": 0.0183,
"objective": 0.016017399728298187,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5166666507720947,
"regularize": 0.016014499589800835,
"step": 1360
},
{
"dpo_loss": 0.6913639307022095,
"epoch": 1.2896622313203685,
"grad_norm": 76.89545336747237,
"learning_rate": 3.3719858841416836e-08,
"logits": -2.1557438373565674,
"logps": -92.71784210205078,
"loss": 0.0196,
"objective": 0.019386129453778267,
"ranking_idealized": 0.5666666626930237,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5166666507720947,
"regularize": 0.019384343177080154,
"step": 1365
},
{
"dpo_loss": 0.6889938712120056,
"epoch": 1.2943862687977323,
"grad_norm": 56.519950070251724,
"learning_rate": 3.333039671756934e-08,
"logits": -2.055145502090454,
"logps": -90.29429626464844,
"loss": 0.0191,
"objective": 0.019297009333968163,
"ranking_idealized": 0.5666666626930237,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.5,
"regularize": 0.01929611526429653,
"step": 1370
},
{
"dpo_loss": 0.689259946346283,
"epoch": 1.2991103062750964,
"grad_norm": 73.9021579793652,
"learning_rate": 3.294206915685392e-08,
"logits": -2.2613272666931152,
"logps": -95.13359832763672,
"loss": 0.0184,
"objective": 0.015245441347360611,
"ranking_idealized": 0.6666666865348816,
"ranking_idealized_expo": 0.5833333134651184,
"ranking_simple": 0.5833333134651184,
"regularize": 0.01524441223591566,
"step": 1375
},
{
"dpo_loss": 0.6903609037399292,
"epoch": 1.3038343437524604,
"grad_norm": 63.39814646096018,
"learning_rate": 3.2554902589541666e-08,
"logits": -2.1530189514160156,
"logps": -90.94368743896484,
"loss": 0.0174,
"objective": 0.017877008765935898,
"ranking_idealized": 0.7166666388511658,
"ranking_idealized_expo": 0.6499999761581421,
"ranking_simple": 0.6499999761581421,
"regularize": 0.017875246703624725,
"step": 1380
},
{
"dpo_loss": 0.6925813555717468,
"epoch": 1.3085583812298245,
"grad_norm": 67.03876383480757,
"learning_rate": 3.216892336688435e-08,
"logits": -2.162677526473999,
"logps": -91.54695892333984,
"loss": 0.0196,
"objective": 0.019883565604686737,
"ranking_idealized": 0.5666666626930237,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.5,
"regularize": 0.019882572814822197,
"step": 1385
},
{
"dpo_loss": 0.690301775932312,
"epoch": 1.3132824187071885,
"grad_norm": 65.28692296573145,
"learning_rate": 3.1784157759320954e-08,
"logits": -2.116351842880249,
"logps": -91.4957504272461,
"loss": 0.018,
"objective": 0.015376557596027851,
"ranking_idealized": 0.6833333373069763,
"ranking_idealized_expo": 0.6333333253860474,
"ranking_simple": 0.6333333253860474,
"regularize": 0.015375560149550438,
"step": 1390
},
{
"dpo_loss": 0.6893251538276672,
"epoch": 1.3180064561845524,
"grad_norm": 62.76399037621205,
"learning_rate": 3.140063195468962e-08,
"logits": -2.163536548614502,
"logps": -92.03372192382812,
"loss": 0.0177,
"objective": 0.017055170610547066,
"ranking_idealized": 0.5333333611488342,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.5,
"regularize": 0.017053287476301193,
"step": 1395
},
{
"dpo_loss": 0.6904376745223999,
"epoch": 1.3227304936619164,
"grad_norm": 66.03868596298913,
"learning_rate": 3.101837205644531e-08,
"logits": -2.1526260375976562,
"logps": -91.71768951416016,
"loss": 0.0183,
"objective": 0.017484767362475395,
"ranking_idealized": 0.5666666626930237,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.5,
"regularize": 0.017483625560998917,
"step": 1400
},
{
"epoch": 1.3227304936619164,
"eval_dpo_loss": 0.6928439736366272,
"eval_logits": -1.9895795583724976,
"eval_logps": -98.29888916015625,
"eval_loss": 0.009779366664588451,
"eval_objective": 0.009947007521986961,
"eval_ranking_idealized": 0.6022099256515503,
"eval_ranking_idealized_expo": 0.5207182168960571,
"eval_ranking_simple": 0.5179557800292969,
"eval_regularize": 0.009945098310709,
"eval_runtime": 445.875,
"eval_samples_per_second": 12.986,
"eval_steps_per_second": 3.248,
"step": 1400
},
{
"dpo_loss": 0.6910092234611511,
"epoch": 1.3274545311392805,
"grad_norm": 60.84233260746348,
"learning_rate": 3.063740408188308e-08,
"logits": -2.129271984100342,
"logps": -88.37971496582031,
"loss": 0.0189,
"objective": 0.02033030241727829,
"ranking_idealized": 0.6833333373069763,
"ranking_idealized_expo": 0.5833333134651184,
"ranking_simple": 0.5666666626930237,
"regularize": 0.020329667255282402,
"step": 1405
},
{
"dpo_loss": 0.68757563829422,
"epoch": 1.3321785686166443,
"grad_norm": 66.02099749274366,
"learning_rate": 3.0257753960367374e-08,
"logits": -2.15506911277771,
"logps": -94.14618682861328,
"loss": 0.0203,
"objective": 0.022749019786715508,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5166666507720947,
"regularize": 0.022747376933693886,
"step": 1410
},
{
"dpo_loss": 0.6922497153282166,
"epoch": 1.3369026060940084,
"grad_norm": 69.66802628356685,
"learning_rate": 2.987944753156717e-08,
"logits": -2.111666440963745,
"logps": -91.04405975341797,
"loss": 0.0182,
"objective": 0.0166956577450037,
"ranking_idealized": 0.6666666865348816,
"ranking_idealized_expo": 0.6499999761581421,
"ranking_simple": 0.6499999761581421,
"regularize": 0.016694890335202217,
"step": 1415
},
{
"dpo_loss": 0.6890503168106079,
"epoch": 1.3416266435713724,
"grad_norm": 60.95975646160118,
"learning_rate": 2.9502510543697322e-08,
"logits": -2.153930902481079,
"logps": -90.40367889404297,
"loss": 0.0168,
"objective": 0.01781788095831871,
"ranking_idealized": 0.5666666626930237,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.4833333194255829,
"regularize": 0.017816245555877686,
"step": 1420
},
{
"dpo_loss": 0.6874597668647766,
"epoch": 1.3463506810487362,
"grad_norm": 59.58581498665142,
"learning_rate": 2.912696865176607e-08,
"logits": -2.243446111679077,
"logps": -96.16178894042969,
"loss": 0.0177,
"objective": 0.023015646263957024,
"ranking_idealized": 0.6499999761581421,
"ranking_idealized_expo": 0.6166666746139526,
"ranking_simple": 0.6000000238418579,
"regularize": 0.02301453799009323,
"step": 1425
},
{
"dpo_loss": 0.6897552013397217,
"epoch": 1.3510747185261003,
"grad_norm": 60.74669762947064,
"learning_rate": 2.875284741582892e-08,
"logits": -2.1864495277404785,
"logps": -96.33065032958984,
"loss": 0.017,
"objective": 0.015690678730607033,
"ranking_idealized": 0.5666666626930237,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.4833333194255829,
"regularize": 0.015688156709074974,
"step": 1430
},
{
"dpo_loss": 0.6917933821678162,
"epoch": 1.3557987560034643,
"grad_norm": 52.60951888043,
"learning_rate": 2.838017229924894e-08,
"logits": -2.142225742340088,
"logps": -92.60111999511719,
"loss": 0.0173,
"objective": 0.01731746830046177,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.550000011920929,
"ranking_simple": 0.550000011920929,
"regularize": 0.01731485314667225,
"step": 1435
},
{
"dpo_loss": 0.6895142793655396,
"epoch": 1.3605227934808282,
"grad_norm": 72.2797304152481,
"learning_rate": 2.8008968666963817e-08,
"logits": -2.1638967990875244,
"logps": -92.39698028564453,
"loss": 0.0165,
"objective": 0.017737431451678276,
"ranking_idealized": 0.5166666507720947,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.46666666865348816,
"regularize": 0.017736157402396202,
"step": 1440
},
{
"dpo_loss": 0.689642608165741,
"epoch": 1.3652468309581922,
"grad_norm": 79.06644823842738,
"learning_rate": 2.763926178375929e-08,
"logits": -2.190371036529541,
"logps": -91.85442352294922,
"loss": 0.0148,
"objective": 0.01207685936242342,
"ranking_idealized": 0.5833333134651184,
"ranking_idealized_expo": 0.550000011920929,
"ranking_simple": 0.550000011920929,
"regularize": 0.012074961327016354,
"step": 1445
},
{
"dpo_loss": 0.690252423286438,
"epoch": 1.3699708684355563,
"grad_norm": 61.578467113272715,
"learning_rate": 2.7271076812549688e-08,
"logits": -2.2324106693267822,
"logps": -91.74723815917969,
"loss": 0.0173,
"objective": 0.020336376503109932,
"ranking_idealized": 0.5666666626930237,
"ranking_idealized_expo": 0.4333333373069763,
"ranking_simple": 0.44999998807907104,
"regularize": 0.02033485844731331,
"step": 1450
},
{
"epoch": 1.3699708684355563,
"eval_dpo_loss": 0.6928586959838867,
"eval_logits": -1.9887967109680176,
"eval_logps": -98.44745635986328,
"eval_loss": 0.01200713962316513,
"eval_objective": 0.012023248709738255,
"eval_ranking_idealized": 0.6022099256515503,
"eval_ranking_idealized_expo": 0.5207182168960571,
"eval_ranking_simple": 0.519336998462677,
"eval_regularize": 0.012021254748106003,
"eval_runtime": 446.5411,
"eval_samples_per_second": 12.966,
"eval_steps_per_second": 3.243,
"step": 1450
},
{
"dpo_loss": 0.6905301809310913,
"epoch": 1.3746949059129203,
"grad_norm": 63.36787972947676,
"learning_rate": 2.6904438812665275e-08,
"logits": -2.1534087657928467,
"logps": -90.40296936035156,
"loss": 0.0161,
"objective": 0.01465876679867506,
"ranking_idealized": 0.5666666626930237,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.5333333611488342,
"regularize": 0.01465718261897564,
"step": 1455
},
{
"dpo_loss": 0.6913805603981018,
"epoch": 1.3794189433902841,
"grad_norm": 82.33504933507443,
"learning_rate": 2.6539372738146693e-08,
"logits": -2.2307941913604736,
"logps": -95.2313003540039,
"loss": 0.0175,
"objective": 0.01627987250685692,
"ranking_idealized": 0.550000011920929,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.5166666507720947,
"regularize": 0.01627855747938156,
"step": 1460
},
{
"dpo_loss": 0.6937485933303833,
"epoch": 1.3841429808676482,
"grad_norm": 77.80619617469834,
"learning_rate": 2.6175903436046474e-08,
"logits": -2.127424716949463,
"logps": -93.49677276611328,
"loss": 0.019,
"objective": 0.018087979406118393,
"ranking_idealized": 0.7333333492279053,
"ranking_idealized_expo": 0.5833333134651184,
"ranking_simple": 0.5666666626930237,
"regularize": 0.018087027594447136,
"step": 1465
},
{
"dpo_loss": 0.6909429430961609,
"epoch": 1.3888670183450122,
"grad_norm": 65.449053741861,
"learning_rate": 2.5814055644738007e-08,
"logits": -2.13195538520813,
"logps": -92.86261749267578,
"loss": 0.018,
"objective": 0.01916462555527687,
"ranking_idealized": 0.6833333373069763,
"ranking_idealized_expo": 0.5666666626930237,
"ranking_simple": 0.5666666626930237,
"regularize": 0.019163591787219048,
"step": 1470
},
{
"dpo_loss": 0.6916597485542297,
"epoch": 1.3935910558223763,
"grad_norm": 64.31813956475592,
"learning_rate": 2.545385399223171e-08,
"logits": -2.1243183612823486,
"logps": -91.4704818725586,
"loss": 0.016,
"objective": 0.01847274787724018,
"ranking_idealized": 0.5833333134651184,
"ranking_idealized_expo": 0.46666666865348816,
"ranking_simple": 0.46666666865348816,
"regularize": 0.01847095414996147,
"step": 1475
},
{
"dpo_loss": 0.6896728277206421,
"epoch": 1.3983150932997401,
"grad_norm": 68.83830299489735,
"learning_rate": 2.5095322994498846e-08,
"logits": -2.1868510246276855,
"logps": -88.3174819946289,
"loss": 0.0148,
"objective": 0.014686751179397106,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.5333333611488342,
"regularize": 0.014684156514704227,
"step": 1480
},
{
"dpo_loss": 0.691864550113678,
"epoch": 1.4030391307771042,
"grad_norm": 64.34780184550137,
"learning_rate": 2.4738487053802913e-08,
"logits": -2.121894359588623,
"logps": -90.51294708251953,
"loss": 0.0178,
"objective": 0.017730435356497765,
"ranking_idealized": 0.699999988079071,
"ranking_idealized_expo": 0.5833333134651184,
"ranking_simple": 0.5666666626930237,
"regularize": 0.017729543149471283,
"step": 1485
},
{
"dpo_loss": 0.6895692944526672,
"epoch": 1.4077631682544682,
"grad_norm": 76.56054457201986,
"learning_rate": 2.4383370457038788e-08,
"logits": -2.287666082382202,
"logps": -91.52227020263672,
"loss": 0.0178,
"objective": 0.020892778411507607,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.46666666865348816,
"regularize": 0.020891882479190826,
"step": 1490
},
{
"dpo_loss": 0.6884461641311646,
"epoch": 1.412487205731832,
"grad_norm": 66.46105530145252,
"learning_rate": 2.4029997374079687e-08,
"logits": -2.1489083766937256,
"logps": -94.47865295410156,
"loss": 0.0164,
"objective": 0.017250513657927513,
"ranking_idealized": 0.6333333253860474,
"ranking_idealized_expo": 0.5833333134651184,
"ranking_simple": 0.5833333134651184,
"regularize": 0.01724918559193611,
"step": 1495
},
{
"dpo_loss": 0.6900500059127808,
"epoch": 1.4172112432091961,
"grad_norm": 65.6967093839243,
"learning_rate": 2.3678391856132203e-08,
"logits": -2.1476263999938965,
"logps": -92.45713806152344,
"loss": 0.0171,
"objective": 0.017398254945874214,
"ranking_idealized": 0.5666666626930237,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.5,
"regularize": 0.01739557646214962,
"step": 1500
},
{
"epoch": 1.4172112432091961,
"eval_dpo_loss": 0.6928529143333435,
"eval_logits": -1.989194393157959,
"eval_logps": -98.49781036376953,
"eval_loss": 0.009285102598369122,
"eval_objective": 0.009339064359664917,
"eval_ranking_idealized": 0.6022099256515503,
"eval_ranking_idealized_expo": 0.5207182168960571,
"eval_ranking_simple": 0.5186464190483093,
"eval_regularize": 0.009336920455098152,
"eval_runtime": 454.5105,
"eval_samples_per_second": 12.739,
"eval_steps_per_second": 3.186,
"step": 1500
},
{
"dpo_loss": 0.6893900632858276,
"epoch": 1.4219352806865602,
"grad_norm": 66.7330764289215,
"learning_rate": 2.3328577834099238e-08,
"logits": -2.1472671031951904,
"logps": -93.55850982666016,
"loss": 0.0159,
"objective": 0.01486156228929758,
"ranking_idealized": 0.6666666865348816,
"ranking_idealized_expo": 0.6000000238418579,
"ranking_simple": 0.6000000238418579,
"regularize": 0.014860122464597225,
"step": 1505
},
{
"dpo_loss": 0.6900672912597656,
"epoch": 1.426659318163924,
"grad_norm": 68.69500961540548,
"learning_rate": 2.2980579116951266e-08,
"logits": -2.205268144607544,
"logps": -94.00220489501953,
"loss": 0.0165,
"objective": 0.015583300963044167,
"ranking_idealized": 0.6333333253860474,
"ranking_idealized_expo": 0.5666666626930237,
"ranking_simple": 0.5666666626930237,
"regularize": 0.015581325627863407,
"step": 1510
},
{
"dpo_loss": 0.6928814053535461,
"epoch": 1.431383355641288,
"grad_norm": 66.06160630988346,
"learning_rate": 2.263441939010586e-08,
"logits": -2.205580234527588,
"logps": -94.78508758544922,
"loss": 0.0176,
"objective": 0.020189667120575905,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.5833333134651184,
"ranking_simple": 0.5833333134651184,
"regularize": 0.020186880603432655,
"step": 1515
},
{
"dpo_loss": 0.6897247433662415,
"epoch": 1.436107393118652,
"grad_norm": 64.2991321127621,
"learning_rate": 2.2290122213815603e-08,
"logits": -2.1570937633514404,
"logps": -93.27649688720703,
"loss": 0.0151,
"objective": 0.015933455899357796,
"ranking_idealized": 0.5666666626930237,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.5333333611488342,
"regularize": 0.01593177765607834,
"step": 1520
},
{
"dpo_loss": 0.6920062899589539,
"epoch": 1.440831430596016,
"grad_norm": 82.55367392818718,
"learning_rate": 2.194771102156456e-08,
"logits": -2.079338550567627,
"logps": -90.78583526611328,
"loss": 0.0161,
"objective": 0.012961748987436295,
"ranking_idealized": 0.6666666865348816,
"ranking_idealized_expo": 0.6000000238418579,
"ranking_simple": 0.6000000238418579,
"regularize": 0.012958657927811146,
"step": 1525
},
{
"dpo_loss": 0.6941262483596802,
"epoch": 1.44555546807338,
"grad_norm": 64.16042224652975,
"learning_rate": 2.1607209118473314e-08,
"logits": -2.1198713779449463,
"logps": -92.36068725585938,
"loss": 0.0175,
"objective": 0.013944637961685658,
"ranking_idealized": 0.5666666626930237,
"ranking_idealized_expo": 0.46666666865348816,
"ranking_simple": 0.46666666865348816,
"regularize": 0.013943412341177464,
"step": 1530
},
{
"dpo_loss": 0.6908197999000549,
"epoch": 1.450279505550744,
"grad_norm": 73.78678695774653,
"learning_rate": 2.1268639679712813e-08,
"logits": -2.197909355163574,
"logps": -94.17871856689453,
"loss": 0.0175,
"objective": 0.020089728757739067,
"ranking_idealized": 0.5666666626930237,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.5,
"regularize": 0.020088987424969673,
"step": 1535
},
{
"dpo_loss": 0.6900218725204468,
"epoch": 1.455003543028108,
"grad_norm": 60.95051574524286,
"learning_rate": 2.0932025748927014e-08,
"logits": -2.0492825508117676,
"logps": -90.19515228271484,
"loss": 0.0156,
"objective": 0.01438401360064745,
"ranking_idealized": 0.6833333373069763,
"ranking_idealized_expo": 0.5833333134651184,
"ranking_simple": 0.5833333134651184,
"regularize": 0.014382914640009403,
"step": 1540
},
{
"dpo_loss": 0.6942049264907837,
"epoch": 1.4597275805054721,
"grad_norm": 64.3944580161131,
"learning_rate": 2.0597390236664474e-08,
"logits": -2.1208300590515137,
"logps": -92.43262481689453,
"loss": 0.0188,
"objective": 0.02223369851708412,
"ranking_idealized": 0.5333333611488342,
"ranking_idealized_expo": 0.4000000059604645,
"ranking_simple": 0.4000000059604645,
"regularize": 0.022232500836253166,
"step": 1545
},
{
"dpo_loss": 0.6913332343101501,
"epoch": 1.464451617982836,
"grad_norm": 65.89315791512014,
"learning_rate": 2.026475591881906e-08,
"logits": -2.080505609512329,
"logps": -91.72013092041016,
"loss": 0.0164,
"objective": 0.015918172895908356,
"ranking_idealized": 0.6333333253860474,
"ranking_idealized_expo": 0.5666666626930237,
"ranking_simple": 0.550000011920929,
"regularize": 0.01591550186276436,
"step": 1550
},
{
"epoch": 1.464451617982836,
"eval_dpo_loss": 0.6928287148475647,
"eval_logits": -1.9898428916931152,
"eval_logps": -98.48872375488281,
"eval_loss": 0.009991789236664772,
"eval_objective": 0.010131197050213814,
"eval_ranking_idealized": 0.6022099256515503,
"eval_ranking_idealized_expo": 0.5207182168960571,
"eval_ranking_simple": 0.5179557800292969,
"eval_regularize": 0.010128886438906193,
"eval_runtime": 446.2677,
"eval_samples_per_second": 12.974,
"eval_steps_per_second": 3.245,
"step": 1550
},
{
"dpo_loss": 0.6896055936813354,
"epoch": 1.4691756554602,
"grad_norm": 72.03048326532462,
"learning_rate": 1.9934145435079702e-08,
"logits": -2.2838551998138428,
"logps": -92.67180633544922,
"loss": 0.0163,
"objective": 0.01897108368575573,
"ranking_idealized": 0.6333333253860474,
"ranking_idealized_expo": 0.6000000238418579,
"ranking_simple": 0.6000000238418579,
"regularize": 0.018967188894748688,
"step": 1555
},
{
"dpo_loss": 0.6906958222389221,
"epoch": 1.473899692937564,
"grad_norm": 73.82372228393551,
"learning_rate": 1.9605581287389632e-08,
"logits": -2.126072645187378,
"logps": -91.18004608154297,
"loss": 0.017,
"objective": 0.01644645445048809,
"ranking_idealized": 0.5333333611488342,
"ranking_idealized_expo": 0.4166666567325592,
"ranking_simple": 0.4166666567325592,
"regularize": 0.01644515059888363,
"step": 1560
},
{
"dpo_loss": 0.6889926791191101,
"epoch": 1.478623730414928,
"grad_norm": 64.09272915756699,
"learning_rate": 1.92790858384147e-08,
"logits": -2.13236927986145,
"logps": -92.78182983398438,
"loss": 0.0154,
"objective": 0.016674092039465904,
"ranking_idealized": 0.6833333373069763,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5166666507720947,
"regularize": 0.01667255535721779,
"step": 1565
},
{
"dpo_loss": 0.6913832426071167,
"epoch": 1.483347767892292,
"grad_norm": 66.43479163914742,
"learning_rate": 1.895468131002143e-08,
"logits": -2.0496039390563965,
"logps": -92.02823638916016,
"loss": 0.0135,
"objective": 0.014378294348716736,
"ranking_idealized": 0.6333333253860474,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.5333333611488342,
"regularize": 0.014377345331013203,
"step": 1570
},
{
"dpo_loss": 0.6909292340278625,
"epoch": 1.488071805369656,
"grad_norm": 74.75415511259118,
"learning_rate": 1.863238978176455e-08,
"logits": -2.2580831050872803,
"logps": -91.48379516601562,
"loss": 0.014,
"objective": 0.014301776885986328,
"ranking_idealized": 0.6499999761581421,
"ranking_idealized_expo": 0.5833333134651184,
"ranking_simple": 0.5833333134651184,
"regularize": 0.014300725422799587,
"step": 1575
},
{
"dpo_loss": 0.6893575191497803,
"epoch": 1.4927958428470198,
"grad_norm": 58.43183435208023,
"learning_rate": 1.831223318938419e-08,
"logits": -2.16597843170166,
"logps": -93.69886016845703,
"loss": 0.0152,
"objective": 0.014309495687484741,
"ranking_idealized": 0.550000011920929,
"ranking_idealized_expo": 0.46666666865348816,
"ranking_simple": 0.46666666865348816,
"regularize": 0.014308045618236065,
"step": 1580
},
{
"dpo_loss": 0.6888726949691772,
"epoch": 1.4975198803243839,
"grad_norm": 73.21921475273949,
"learning_rate": 1.7994233323312913e-08,
"logits": -2.2151682376861572,
"logps": -89.77079772949219,
"loss": 0.0151,
"objective": 0.017258943989872932,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.5833333134651184,
"ranking_simple": 0.5833333134651184,
"regularize": 0.01725638099014759,
"step": 1585
},
{
"dpo_loss": 0.6907024383544922,
"epoch": 1.502243917801748,
"grad_norm": 63.078892059144785,
"learning_rate": 1.767841182719262e-08,
"logits": -2.1972243785858154,
"logps": -95.78870391845703,
"loss": 0.0132,
"objective": 0.011403498239815235,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.550000011920929,
"ranking_simple": 0.5666666626930237,
"regularize": 0.011402006261050701,
"step": 1590
},
{
"dpo_loss": 0.690780520439148,
"epoch": 1.5069679552791118,
"grad_norm": 72.75847192003988,
"learning_rate": 1.7364790196401436e-08,
"logits": -2.1997883319854736,
"logps": -92.46896362304688,
"loss": 0.0157,
"objective": 0.017867466434836388,
"ranking_idealized": 0.5833333134651184,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.5333333611488342,
"regularize": 0.017865275964140892,
"step": 1595
},
{
"dpo_loss": 0.6890024542808533,
"epoch": 1.511691992756476,
"grad_norm": 88.44042595685,
"learning_rate": 1.705338977659071e-08,
"logits": -2.1227810382843018,
"logps": -93.08589172363281,
"loss": 0.0165,
"objective": 0.013732160441577435,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.550000011920929,
"ranking_simple": 0.550000011920929,
"regularize": 0.013729160651564598,
"step": 1600
},
{
"epoch": 1.511691992756476,
"eval_dpo_loss": 0.6928747296333313,
"eval_logits": -1.9891741275787354,
"eval_logps": -98.44180297851562,
"eval_loss": 0.009673803113400936,
"eval_objective": 0.009624399244785309,
"eval_ranking_idealized": 0.6022099256515503,
"eval_ranking_idealized_expo": 0.5207182168960571,
"eval_ranking_simple": 0.5186464190483093,
"eval_regularize": 0.009622092358767986,
"eval_runtime": 451.0127,
"eval_samples_per_second": 12.838,
"eval_steps_per_second": 3.211,
"step": 1600
},
{
"dpo_loss": 0.6902530193328857,
"epoch": 1.5164160302338399,
"grad_norm": 71.99972413373813,
"learning_rate": 1.6744231762232176e-08,
"logits": -2.190404176712036,
"logps": -92.08161163330078,
"loss": 0.0141,
"objective": 0.013264096342027187,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.46666666865348816,
"ranking_simple": 0.46666666865348816,
"regularize": 0.013262915425002575,
"step": 1605
},
{
"dpo_loss": 0.6898870468139648,
"epoch": 1.5211400677112037,
"grad_norm": 74.10537546046199,
"learning_rate": 1.6437337195175428e-08,
"logits": -2.19018816947937,
"logps": -92.24858856201172,
"loss": 0.0144,
"objective": 0.014542266726493835,
"ranking_idealized": 0.6666666865348816,
"ranking_idealized_expo": 0.550000011920929,
"ranking_simple": 0.550000011920929,
"regularize": 0.014540906064212322,
"step": 1610
},
{
"dpo_loss": 0.6916466951370239,
"epoch": 1.525864105188568,
"grad_norm": 58.38920752518266,
"learning_rate": 1.613272696321576e-08,
"logits": -2.136929750442505,
"logps": -92.16728973388672,
"loss": 0.0139,
"objective": 0.015932830050587654,
"ranking_idealized": 0.6666666865348816,
"ranking_idealized_expo": 0.6000000238418579,
"ranking_simple": 0.6000000238418579,
"regularize": 0.015931693837046623,
"step": 1615
},
{
"dpo_loss": 0.6908102631568909,
"epoch": 1.5305881426659318,
"grad_norm": 63.00313504444051,
"learning_rate": 1.5830421798672566e-08,
"logits": -2.118234872817993,
"logps": -90.81103515625,
"loss": 0.0158,
"objective": 0.013052166439592838,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.46666666865348816,
"ranking_simple": 0.46666666865348816,
"regularize": 0.013051275163888931,
"step": 1620
},
{
"dpo_loss": 0.6895550489425659,
"epoch": 1.5353121801432958,
"grad_norm": 58.07839320165127,
"learning_rate": 1.5530442276978155e-08,
"logits": -2.139536142349243,
"logps": -90.25867462158203,
"loss": 0.0148,
"objective": 0.014852885156869888,
"ranking_idealized": 0.5833333134651184,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5166666507720947,
"regularize": 0.014851867221295834,
"step": 1625
},
{
"dpo_loss": 0.6881382465362549,
"epoch": 1.54003621762066,
"grad_norm": 68.39570526618726,
"learning_rate": 1.523280881527743e-08,
"logits": -2.058195114135742,
"logps": -92.33999633789062,
"loss": 0.0153,
"objective": 0.018310803920030594,
"ranking_idealized": 0.6333333253860474,
"ranking_idealized_expo": 0.5666666626930237,
"ranking_simple": 0.5666666626930237,
"regularize": 0.018308250233530998,
"step": 1630
},
{
"dpo_loss": 0.6918675303459167,
"epoch": 1.5447602550980237,
"grad_norm": 71.28198542563501,
"learning_rate": 1.4937541671038245e-08,
"logits": -2.0989344120025635,
"logps": -94.7214584350586,
"loss": 0.0141,
"objective": 0.016751030460000038,
"ranking_idealized": 0.699999988079071,
"ranking_idealized_expo": 0.6333333253860474,
"ranking_simple": 0.6333333253860474,
"regularize": 0.016748782247304916,
"step": 1635
},
{
"dpo_loss": 0.6916669011116028,
"epoch": 1.5494842925753878,
"grad_norm": 55.738628042601434,
"learning_rate": 1.4644660940672625e-08,
"logits": -2.154735803604126,
"logps": -93.92298126220703,
"loss": 0.0156,
"objective": 0.012084761634469032,
"ranking_idealized": 0.6333333253860474,
"ranking_idealized_expo": 0.46666666865348816,
"ranking_simple": 0.46666666865348816,
"regularize": 0.01208337489515543,
"step": 1640
},
{
"dpo_loss": 0.6894612908363342,
"epoch": 1.5542083300527518,
"grad_norm": 62.99549337373731,
"learning_rate": 1.435418655816899e-08,
"logits": -2.169052839279175,
"logps": -91.41073608398438,
"loss": 0.0143,
"objective": 0.014856117777526379,
"ranking_idealized": 0.4833333194255829,
"ranking_idealized_expo": 0.44999998807907104,
"ranking_simple": 0.44999998807907104,
"regularize": 0.014855272136628628,
"step": 1645
},
{
"dpo_loss": 0.6909356713294983,
"epoch": 1.5589323675301157,
"grad_norm": 67.0305140958766,
"learning_rate": 1.4066138293735408e-08,
"logits": -2.098741292953491,
"logps": -92.3939208984375,
"loss": 0.0128,
"objective": 0.012405160814523697,
"ranking_idealized": 0.6333333253860474,
"ranking_idealized_expo": 0.550000011920929,
"ranking_simple": 0.550000011920929,
"regularize": 0.012403340078890324,
"step": 1650
},
{
"epoch": 1.5589323675301157,
"eval_dpo_loss": 0.6927416324615479,
"eval_logits": -1.9889006614685059,
"eval_logps": -98.3604736328125,
"eval_loss": 0.010028412565588951,
"eval_objective": 0.010088582523167133,
"eval_ranking_idealized": 0.6022099256515503,
"eval_ranking_idealized_expo": 0.5207182168960571,
"eval_ranking_simple": 0.5179557800292969,
"eval_regularize": 0.010086326859891415,
"eval_runtime": 446.4522,
"eval_samples_per_second": 12.969,
"eval_steps_per_second": 3.243,
"step": 1650
},
{
"dpo_loss": 0.6893053650856018,
"epoch": 1.5636564050074797,
"grad_norm": 63.18957665484938,
"learning_rate": 1.3780535752453976e-08,
"logits": -2.115309000015259,
"logps": -92.01270294189453,
"loss": 0.016,
"objective": 0.01488524954766035,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.5666666626930237,
"ranking_simple": 0.5666666626930237,
"regularize": 0.014881560578942299,
"step": 1655
},
{
"dpo_loss": 0.6926845908164978,
"epoch": 1.5683804424848438,
"grad_norm": 58.824443635483924,
"learning_rate": 1.34973983729465e-08,
"logits": -2.0248100757598877,
"logps": -91.5145263671875,
"loss": 0.0155,
"objective": 0.016196589916944504,
"ranking_idealized": 0.5666666626930237,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.4833333194255829,
"regularize": 0.01619427278637886,
"step": 1660
},
{
"dpo_loss": 0.6904213428497314,
"epoch": 1.5731044799622076,
"grad_norm": 62.30456762761477,
"learning_rate": 1.3216745426051451e-08,
"logits": -2.12634015083313,
"logps": -93.39350891113281,
"loss": 0.0136,
"objective": 0.012430812232196331,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.5333333611488342,
"regularize": 0.012427231296896935,
"step": 1665
},
{
"dpo_loss": 0.6912954449653625,
"epoch": 1.5778285174395716,
"grad_norm": 69.75551627833396,
"learning_rate": 1.293859601351232e-08,
"logits": -2.253553867340088,
"logps": -94.33558654785156,
"loss": 0.0154,
"objective": 0.015208045952022076,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5166666507720947,
"regularize": 0.015205765143036842,
"step": 1670
},
{
"dpo_loss": 0.6908196806907654,
"epoch": 1.5825525549169357,
"grad_norm": 66.72338681583422,
"learning_rate": 1.266296906667762e-08,
"logits": -2.232628107070923,
"logps": -92.84732818603516,
"loss": 0.0154,
"objective": 0.014128237962722778,
"ranking_idealized": 0.4166666567325592,
"ranking_idealized_expo": 0.4166666567325592,
"ranking_simple": 0.4166666567325592,
"regularize": 0.014126955531537533,
"step": 1675
},
{
"dpo_loss": 0.6899545192718506,
"epoch": 1.5872765923942995,
"grad_norm": 63.2891144846116,
"learning_rate": 1.238988334521226e-08,
"logits": -2.1890323162078857,
"logps": -95.2237777709961,
"loss": 0.0152,
"objective": 0.014830714091658592,
"ranking_idealized": 0.550000011920929,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.4833333194255829,
"regularize": 0.014829723164439201,
"step": 1680
},
{
"dpo_loss": 0.6891339421272278,
"epoch": 1.5920006298716638,
"grad_norm": 59.6793164031309,
"learning_rate": 1.2119357435820816e-08,
"logits": -2.1683380603790283,
"logps": -89.5069580078125,
"loss": 0.0118,
"objective": 0.01023172028362751,
"ranking_idealized": 0.46666666865348816,
"ranking_idealized_expo": 0.3166666626930237,
"ranking_simple": 0.3166666626930237,
"regularize": 0.010230082087218761,
"step": 1685
},
{
"dpo_loss": 0.6914657354354858,
"epoch": 1.5967246673490276,
"grad_norm": 61.16970720231615,
"learning_rate": 1.1851409750982438e-08,
"logits": -2.128115653991699,
"logps": -92.26956176757812,
"loss": 0.013,
"objective": 0.013722400180995464,
"ranking_idealized": 0.699999988079071,
"ranking_idealized_expo": 0.5833333134651184,
"ranking_simple": 0.6000000238418579,
"regularize": 0.01371851097792387,
"step": 1690
},
{
"dpo_loss": 0.6910730600357056,
"epoch": 1.6014487048263917,
"grad_norm": 70.19949003863668,
"learning_rate": 1.1586058527697707e-08,
"logits": -2.1930956840515137,
"logps": -91.90154266357422,
"loss": 0.0136,
"objective": 0.01538047008216381,
"ranking_idealized": 0.6666666865348816,
"ranking_idealized_expo": 0.6000000238418579,
"ranking_simple": 0.6000000238418579,
"regularize": 0.015379803255200386,
"step": 1695
},
{
"dpo_loss": 0.6901703476905823,
"epoch": 1.6061727423037557,
"grad_norm": 68.99239991969064,
"learning_rate": 1.1323321826247346e-08,
"logits": -2.1464996337890625,
"logps": -93.59435272216797,
"loss": 0.0132,
"objective": 0.014362351037561893,
"ranking_idealized": 0.6333333253860474,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5166666507720947,
"regularize": 0.014360878616571426,
"step": 1700
},
{
"epoch": 1.6061727423037557,
"eval_dpo_loss": 0.6927831768989563,
"eval_logits": -1.9890520572662354,
"eval_logps": -98.40552520751953,
"eval_loss": 0.008960912004113197,
"eval_objective": 0.008905092254281044,
"eval_ranking_idealized": 0.6022099256515503,
"eval_ranking_idealized_expo": 0.5207182168960571,
"eval_ranking_simple": 0.5179557800292969,
"eval_regularize": 0.008902397006750107,
"eval_runtime": 446.3386,
"eval_samples_per_second": 12.972,
"eval_steps_per_second": 3.244,
"step": 1700
},
{
"dpo_loss": 0.6905626654624939,
"epoch": 1.6108967797811196,
"grad_norm": 61.071862990680025,
"learning_rate": 1.1063217528963042e-08,
"logits": -2.1819908618927,
"logps": -95.61785125732422,
"loss": 0.0138,
"objective": 0.014233703725039959,
"ranking_idealized": 0.5833333134651184,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5,
"regularize": 0.014231013134121895,
"step": 1705
},
{
"dpo_loss": 0.6933135986328125,
"epoch": 1.6156208172584836,
"grad_norm": 65.29758946742726,
"learning_rate": 1.0805763339010326e-08,
"logits": -2.13582444190979,
"logps": -92.55400848388672,
"loss": 0.0135,
"objective": 0.014589487574994564,
"ranking_idealized": 0.6833333373069763,
"ranking_idealized_expo": 0.6499999761581421,
"ranking_simple": 0.6499999761581421,
"regularize": 0.014586606994271278,
"step": 1710
},
{
"dpo_loss": 0.6906417608261108,
"epoch": 1.6203448547358477,
"grad_norm": 65.78620291334798,
"learning_rate": 1.0550976779183651e-08,
"logits": -2.1200928688049316,
"logps": -95.07630920410156,
"loss": 0.0136,
"objective": 0.014620447531342506,
"ranking_idealized": 0.5833333134651184,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5166666507720947,
"regularize": 0.014618877321481705,
"step": 1715
},
{
"dpo_loss": 0.6903366446495056,
"epoch": 1.6250688922132115,
"grad_norm": 61.420985867018985,
"learning_rate": 1.02988751907138e-08,
"logits": -2.196997880935669,
"logps": -89.81111145019531,
"loss": 0.0124,
"objective": 0.01165497861802578,
"ranking_idealized": 0.6499999761581421,
"ranking_idealized_expo": 0.5666666626930237,
"ranking_simple": 0.5666666626930237,
"regularize": 0.011652585119009018,
"step": 1720
},
{
"dpo_loss": 0.6910706758499146,
"epoch": 1.6297929296905755,
"grad_norm": 69.65328151335441,
"learning_rate": 1.0049475732087559e-08,
"logits": -2.1551551818847656,
"logps": -92.8335952758789,
"loss": 0.0137,
"objective": 0.011478899046778679,
"ranking_idealized": 0.5666666626930237,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.5,
"regularize": 0.0114761833101511,
"step": 1725
},
{
"dpo_loss": 0.6897426247596741,
"epoch": 1.6345169671679396,
"grad_norm": 70.43018567128607,
"learning_rate": 9.802795377879903e-09,
"logits": -2.243594169616699,
"logps": -91.26370239257812,
"loss": 0.0128,
"objective": 0.012597540393471718,
"ranking_idealized": 0.5833333134651184,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.5333333611488342,
"regularize": 0.012595159001648426,
"step": 1730
},
{
"dpo_loss": 0.6892535090446472,
"epoch": 1.6392410046453034,
"grad_norm": 63.82988388608345,
"learning_rate": 9.558850917598716e-09,
"logits": -2.070333957672119,
"logps": -92.88044738769531,
"loss": 0.0123,
"objective": 0.011261907406151295,
"ranking_idealized": 0.5166666507720947,
"ranking_idealized_expo": 0.4333333373069763,
"ranking_simple": 0.44999998807907104,
"regularize": 0.011259406805038452,
"step": 1735
},
{
"dpo_loss": 0.6904967427253723,
"epoch": 1.6439650421226675,
"grad_norm": 57.01783294970417,
"learning_rate": 9.31765895454199e-09,
"logits": -2.178701877593994,
"logps": -92.82861328125,
"loss": 0.0128,
"objective": 0.013563827611505985,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.5666666626930237,
"ranking_simple": 0.5666666626930237,
"regularize": 0.013562088832259178,
"step": 1740
},
{
"dpo_loss": 0.6911302208900452,
"epoch": 1.6486890796000315,
"grad_norm": 67.14492805040307,
"learning_rate": 9.079235904667825e-09,
"logits": -2.160048484802246,
"logps": -97.07341766357422,
"loss": 0.0132,
"objective": 0.011855502612888813,
"ranking_idealized": 0.6499999761581421,
"ranking_idealized_expo": 0.5833333134651184,
"ranking_simple": 0.5833333134651184,
"regularize": 0.011852098628878593,
"step": 1745
},
{
"dpo_loss": 0.6899723410606384,
"epoch": 1.6534131170773954,
"grad_norm": 61.803443023637456,
"learning_rate": 8.84359799547712e-09,
"logits": -2.1978349685668945,
"logps": -92.97909545898438,
"loss": 0.0133,
"objective": 0.01489060465246439,
"ranking_idealized": 0.5166666507720947,
"ranking_idealized_expo": 0.44999998807907104,
"ranking_simple": 0.44999998807907104,
"regularize": 0.014889102429151535,
"step": 1750
},
{
"epoch": 1.6534131170773954,
"eval_dpo_loss": 0.6928050518035889,
"eval_logits": -1.988478183746338,
"eval_logps": -98.41736602783203,
"eval_loss": 0.009351465851068497,
"eval_objective": 0.009363526478409767,
"eval_ranking_idealized": 0.6022099256515503,
"eval_ranking_idealized_expo": 0.5207182168960571,
"eval_ranking_simple": 0.5179557800292969,
"eval_regularize": 0.009361328557133675,
"eval_runtime": 445.0354,
"eval_samples_per_second": 13.01,
"eval_steps_per_second": 3.254,
"step": 1750
},
{
"dpo_loss": 0.6889625787734985,
"epoch": 1.6581371545547596,
"grad_norm": 69.19823709150691,
"learning_rate": 8.6107612649091e-09,
"logits": -2.17798113822937,
"logps": -90.93826293945312,
"loss": 0.0136,
"objective": 0.014197876676917076,
"ranking_idealized": 0.5166666507720947,
"ranking_idealized_expo": 0.44999998807907104,
"ranking_simple": 0.4333333373069763,
"regularize": 0.014196816831827164,
"step": 1755
},
{
"dpo_loss": 0.6894813776016235,
"epoch": 1.6628611920321235,
"grad_norm": 70.33904785688647,
"learning_rate": 8.380741560249726e-09,
"logits": -2.2228808403015137,
"logps": -91.38529205322266,
"loss": 0.0136,
"objective": 0.0157302338629961,
"ranking_idealized": 0.550000011920929,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5166666507720947,
"regularize": 0.015728596597909927,
"step": 1760
},
{
"dpo_loss": 0.6904619336128235,
"epoch": 1.6675852295094873,
"grad_norm": 66.62367568306153,
"learning_rate": 8.153554537053149e-09,
"logits": -2.1843998432159424,
"logps": -91.0223159790039,
"loss": 0.0144,
"objective": 0.012193134985864162,
"ranking_idealized": 0.5833333134651184,
"ranking_idealized_expo": 0.46666666865348816,
"ranking_simple": 0.46666666865348816,
"regularize": 0.01219152845442295,
"step": 1765
},
{
"dpo_loss": 0.691467821598053,
"epoch": 1.6723092669868516,
"grad_norm": 63.63343895864985,
"learning_rate": 7.929215658076093e-09,
"logits": -2.2059359550476074,
"logps": -92.13400268554688,
"loss": 0.0114,
"objective": 0.01266545057296753,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.5,
"regularize": 0.012664331123232841,
"step": 1770
},
{
"dpo_loss": 0.6895557045936584,
"epoch": 1.6770333044642154,
"grad_norm": 61.16013319033806,
"learning_rate": 7.707740192225515e-09,
"logits": -2.257197380065918,
"logps": -93.42992401123047,
"loss": 0.0128,
"objective": 0.014360646717250347,
"ranking_idealized": 0.6666666865348816,
"ranking_idealized_expo": 0.6666666865348816,
"ranking_simple": 0.6666666865348816,
"regularize": 0.014359161257743835,
"step": 1775
},
{
"dpo_loss": 0.691416323184967,
"epoch": 1.6817573419415794,
"grad_norm": 68.36063699136817,
"learning_rate": 7.4891432135193e-09,
"logits": -2.251347303390503,
"logps": -91.27902221679688,
"loss": 0.0141,
"objective": 0.013033194467425346,
"ranking_idealized": 0.6666666865348816,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.5333333611488342,
"regularize": 0.013032082468271255,
"step": 1780
},
{
"dpo_loss": 0.6915625929832458,
"epoch": 1.6864813794189435,
"grad_norm": 64.60918615534963,
"learning_rate": 7.273439600060344e-09,
"logits": -2.165839195251465,
"logps": -93.43399810791016,
"loss": 0.0158,
"objective": 0.016496647149324417,
"ranking_idealized": 0.6333333253860474,
"ranking_idealized_expo": 0.5666666626930237,
"ranking_simple": 0.550000011920929,
"regularize": 0.016495179384946823,
"step": 1785
},
{
"dpo_loss": 0.6900497078895569,
"epoch": 1.6912054168963073,
"grad_norm": 59.4845336366133,
"learning_rate": 7.060644033023894e-09,
"logits": -2.1421496868133545,
"logps": -89.32416534423828,
"loss": 0.0132,
"objective": 0.011856761761009693,
"ranking_idealized": 0.5666666626930237,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.5,
"regularize": 0.011854317970573902,
"step": 1790
},
{
"dpo_loss": 0.6918495297431946,
"epoch": 1.6959294543736714,
"grad_norm": 62.10968860121513,
"learning_rate": 6.850770995658372e-09,
"logits": -2.145258665084839,
"logps": -91.16167449951172,
"loss": 0.0135,
"objective": 0.011840968392789364,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.5333333611488342,
"regularize": 0.011839687824249268,
"step": 1795
},
{
"dpo_loss": 0.6910142302513123,
"epoch": 1.7006534918510354,
"grad_norm": 63.84635783884053,
"learning_rate": 6.6438347722995445e-09,
"logits": -2.2468461990356445,
"logps": -91.39185333251953,
"loss": 0.0138,
"objective": 0.010580122470855713,
"ranking_idealized": 0.5,
"ranking_idealized_expo": 0.44999998807907104,
"ranking_simple": 0.44999998807907104,
"regularize": 0.010576292872428894,
"step": 1800
},
{
"epoch": 1.7006534918510354,
"eval_dpo_loss": 0.6928184628486633,
"eval_logits": -1.9885612726211548,
"eval_logps": -98.35978698730469,
"eval_loss": 0.009589685127139091,
"eval_objective": 0.009668215177953243,
"eval_ranking_idealized": 0.6022099256515503,
"eval_ranking_idealized_expo": 0.5207182168960571,
"eval_ranking_simple": 0.5179557800292969,
"eval_regularize": 0.00966621097177267,
"eval_runtime": 448.9142,
"eval_samples_per_second": 12.898,
"eval_steps_per_second": 3.226,
"step": 1800
},
{
"dpo_loss": 0.6905153393745422,
"epoch": 1.7053775293283993,
"grad_norm": 69.0367318972444,
"learning_rate": 6.43984944739836e-09,
"logits": -2.206124782562256,
"logps": -93.37840270996094,
"loss": 0.0133,
"objective": 0.013736736960709095,
"ranking_idealized": 0.5333333611488342,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.4833333194255829,
"regularize": 0.013735477812588215,
"step": 1805
},
{
"dpo_loss": 0.6886168718338013,
"epoch": 1.7101015668057633,
"grad_norm": 60.061121059879774,
"learning_rate": 6.238828904562315e-09,
"logits": -2.238112449645996,
"logps": -93.0470199584961,
"loss": 0.0146,
"objective": 0.015197351574897766,
"ranking_idealized": 0.6666666865348816,
"ranking_idealized_expo": 0.6666666865348816,
"ranking_simple": 0.6666666865348816,
"regularize": 0.015195462852716446,
"step": 1810
},
{
"dpo_loss": 0.6895704865455627,
"epoch": 1.7148256042831274,
"grad_norm": 62.370319050099795,
"learning_rate": 6.040786825610517e-09,
"logits": -2.18115234375,
"logps": -92.52540588378906,
"loss": 0.0127,
"objective": 0.015414653345942497,
"ranking_idealized": 0.5833333134651184,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.5,
"regularize": 0.015413584187626839,
"step": 1815
},
{
"dpo_loss": 0.6913415193557739,
"epoch": 1.7195496417604912,
"grad_norm": 68.7151840413099,
"learning_rate": 5.845736689642472e-09,
"logits": -2.2472267150878906,
"logps": -93.7915267944336,
"loss": 0.015,
"objective": 0.01595698669552803,
"ranking_idealized": 0.5833333134651184,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.5166666507720947,
"regularize": 0.01595553196966648,
"step": 1820
},
{
"dpo_loss": 0.6893234848976135,
"epoch": 1.7242736792378552,
"grad_norm": 61.62237599260527,
"learning_rate": 5.653691772120672e-09,
"logits": -2.2202160358428955,
"logps": -91.71192932128906,
"loss": 0.0125,
"objective": 0.013099589385092258,
"ranking_idealized": 0.699999988079071,
"ranking_idealized_expo": 0.6333333253860474,
"ranking_simple": 0.6333333253860474,
"regularize": 0.013098122552037239,
"step": 1825
},
{
"dpo_loss": 0.6918686032295227,
"epoch": 1.7289977167152193,
"grad_norm": 73.26853232568251,
"learning_rate": 5.464665143967051e-09,
"logits": -2.095928907394409,
"logps": -92.31403350830078,
"loss": 0.0132,
"objective": 0.014795850031077862,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.46666666865348816,
"regularize": 0.01479416061192751,
"step": 1830
},
{
"dpo_loss": 0.6898643374443054,
"epoch": 1.7337217541925831,
"grad_norm": 58.513985312428574,
"learning_rate": 5.278669670673347e-09,
"logits": -2.1115658283233643,
"logps": -91.03611755371094,
"loss": 0.0117,
"objective": 0.0112331947311759,
"ranking_idealized": 0.699999988079071,
"ranking_idealized_expo": 0.6000000238418579,
"ranking_simple": 0.6000000238418579,
"regularize": 0.011231665499508381,
"step": 1835
},
{
"dpo_loss": 0.6913903951644897,
"epoch": 1.7384457916699474,
"grad_norm": 63.11876863535166,
"learning_rate": 5.095718011425454e-09,
"logits": -2.168307304382324,
"logps": -91.80027770996094,
"loss": 0.0135,
"objective": 0.014516009949147701,
"ranking_idealized": 0.6499999761581421,
"ranking_idealized_expo": 0.5666666626930237,
"ranking_simple": 0.5666666626930237,
"regularize": 0.01451488770544529,
"step": 1840
},
{
"dpo_loss": 0.689866304397583,
"epoch": 1.7431698291473112,
"grad_norm": 72.43548684342062,
"learning_rate": 4.9158226182418104e-09,
"logits": -2.1879196166992188,
"logps": -88.85045623779297,
"loss": 0.0151,
"objective": 0.014930271543562412,
"ranking_idealized": 0.550000011920929,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.4833333194255829,
"regularize": 0.014927973970770836,
"step": 1845
},
{
"dpo_loss": 0.689470648765564,
"epoch": 1.7478938666246753,
"grad_norm": 65.9250612110063,
"learning_rate": 4.738995735125894e-09,
"logits": -2.074735164642334,
"logps": -94.42037963867188,
"loss": 0.0122,
"objective": 0.011366092599928379,
"ranking_idealized": 0.6833333373069763,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5333333611488342,
"regularize": 0.011363506317138672,
"step": 1850
},
{
"epoch": 1.7478938666246753,
"eval_dpo_loss": 0.6928740739822388,
"eval_logits": -1.9887943267822266,
"eval_logps": -98.4156723022461,
"eval_loss": 0.009023972786962986,
"eval_objective": 0.00906344410032034,
"eval_ranking_idealized": 0.6022099256515503,
"eval_ranking_idealized_expo": 0.5207182168960571,
"eval_ranking_simple": 0.5179557800292969,
"eval_regularize": 0.009061108343303204,
"eval_runtime": 445.128,
"eval_samples_per_second": 13.007,
"eval_steps_per_second": 3.253,
"step": 1850
},
{
"dpo_loss": 0.6925438046455383,
"epoch": 1.7526179041020393,
"grad_norm": 60.51205917887876,
"learning_rate": 4.565249397232923e-09,
"logits": -2.1302504539489746,
"logps": -89.6479263305664,
"loss": 0.0119,
"objective": 0.012942561879754066,
"ranking_idealized": 0.6666666865348816,
"ranking_idealized_expo": 0.6333333253860474,
"ranking_simple": 0.6333333253860474,
"regularize": 0.012941225431859493,
"step": 1855
},
{
"dpo_loss": 0.6919233798980713,
"epoch": 1.7573419415794032,
"grad_norm": 73.74409349566294,
"learning_rate": 4.394595430050613e-09,
"logits": -2.1312899589538574,
"logps": -92.23778533935547,
"loss": 0.0134,
"objective": 0.013376330956816673,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.5,
"regularize": 0.013374886475503445,
"step": 1860
},
{
"dpo_loss": 0.6894384026527405,
"epoch": 1.7620659790567672,
"grad_norm": 62.88647078101833,
"learning_rate": 4.2270454485944125e-09,
"logits": -2.0558435916900635,
"logps": -91.38388061523438,
"loss": 0.0107,
"objective": 0.010976298712193966,
"ranking_idealized": 0.7666666507720947,
"ranking_idealized_expo": 0.6666666865348816,
"ranking_simple": 0.6666666865348816,
"regularize": 0.010971426963806152,
"step": 1865
},
{
"dpo_loss": 0.6887921094894409,
"epoch": 1.7667900165341313,
"grad_norm": 87.932062525531,
"learning_rate": 4.062610856616922e-09,
"logits": -2.1452383995056152,
"logps": -92.77478790283203,
"loss": 0.013,
"objective": 0.017829876393079758,
"ranking_idealized": 0.550000011920929,
"ranking_idealized_expo": 0.44999998807907104,
"ranking_simple": 0.44999998807907104,
"regularize": 0.017827244475483894,
"step": 1870
},
{
"dpo_loss": 0.6901695132255554,
"epoch": 1.771514054011495,
"grad_norm": 59.56034569340847,
"learning_rate": 3.901302845831728e-09,
"logits": -2.1918883323669434,
"logps": -91.66542053222656,
"loss": 0.0103,
"objective": 0.00952277984470129,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.550000011920929,
"regularize": 0.009518155828118324,
"step": 1875
},
{
"dpo_loss": 0.6916844844818115,
"epoch": 1.7762380914888591,
"grad_norm": 59.31587765757665,
"learning_rate": 3.743132395151705e-09,
"logits": -2.1958460807800293,
"logps": -92.2247085571289,
"loss": 0.0132,
"objective": 0.014002182520925999,
"ranking_idealized": 0.5166666507720947,
"ranking_idealized_expo": 0.44999998807907104,
"ranking_simple": 0.44999998807907104,
"regularize": 0.014000624418258667,
"step": 1880
},
{
"dpo_loss": 0.6910093426704407,
"epoch": 1.7809621289662232,
"grad_norm": 65.01445310326804,
"learning_rate": 3.5881102699417463e-09,
"logits": -2.15521240234375,
"logps": -92.9655990600586,
"loss": 0.0122,
"objective": 0.013613136485219002,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.5,
"regularize": 0.013612000271677971,
"step": 1885
},
{
"dpo_loss": 0.688242495059967,
"epoch": 1.785686166443587,
"grad_norm": 68.21241149596634,
"learning_rate": 3.4362470212860483e-09,
"logits": -2.2397677898406982,
"logps": -92.33454132080078,
"loss": 0.0101,
"objective": 0.011641601100564003,
"ranking_idealized": 0.6833333373069763,
"ranking_idealized_expo": 0.6000000238418579,
"ranking_simple": 0.6000000238418579,
"regularize": 0.011638239957392216,
"step": 1890
},
{
"dpo_loss": 0.691038966178894,
"epoch": 1.790410203920951,
"grad_norm": 68.20823651098704,
"learning_rate": 3.2875529852700148e-09,
"logits": -2.2314653396606445,
"logps": -86.68601989746094,
"loss": 0.0131,
"objective": 0.01244689617305994,
"ranking_idealized": 0.5,
"ranking_idealized_expo": 0.4333333373069763,
"ranking_simple": 0.4333333373069763,
"regularize": 0.012444679625332355,
"step": 1895
},
{
"dpo_loss": 0.6882398128509521,
"epoch": 1.7951342413983151,
"grad_norm": 74.00426423645611,
"learning_rate": 3.142038282276732e-09,
"logits": -2.1504063606262207,
"logps": -92.22313690185547,
"loss": 0.0128,
"objective": 0.013669600710272789,
"ranking_idealized": 0.6333333253860474,
"ranking_idealized_expo": 0.550000011920929,
"ranking_simple": 0.550000011920929,
"regularize": 0.013666595332324505,
"step": 1900
},
{
"epoch": 1.7951342413983151,
"eval_dpo_loss": 0.6928610801696777,
"eval_logits": -1.9891064167022705,
"eval_logps": -98.42906951904297,
"eval_loss": 0.008886425755918026,
"eval_objective": 0.0089500043541193,
"eval_ranking_idealized": 0.6022099256515503,
"eval_ranking_idealized_expo": 0.5207182168960571,
"eval_ranking_simple": 0.5179557800292969,
"eval_regularize": 0.008947666734457016,
"eval_runtime": 445.1268,
"eval_samples_per_second": 13.008,
"eval_steps_per_second": 3.253,
"step": 1900
},
{
"dpo_loss": 0.6886274218559265,
"epoch": 1.799858278875679,
"grad_norm": 70.48280427657417,
"learning_rate": 2.9997128162981835e-09,
"logits": -2.061340808868408,
"logps": -92.88438415527344,
"loss": 0.0124,
"objective": 0.011484592221677303,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.5666666626930237,
"ranking_simple": 0.5666666626930237,
"regularize": 0.01148195844143629,
"step": 1905
},
{
"dpo_loss": 0.6910132169723511,
"epoch": 1.8045823163530432,
"grad_norm": 69.16766112778924,
"learning_rate": 2.8605862742611453e-09,
"logits": -2.2064709663391113,
"logps": -93.21188354492188,
"loss": 0.0131,
"objective": 0.013887956738471985,
"ranking_idealized": 0.6333333253860474,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.5,
"regularize": 0.013886776752769947,
"step": 1910
},
{
"dpo_loss": 0.6885823011398315,
"epoch": 1.809306353830407,
"grad_norm": 60.392467741599056,
"learning_rate": 2.724668125367896e-09,
"logits": -2.063930034637451,
"logps": -93.89399719238281,
"loss": 0.0118,
"objective": 0.012090322561562061,
"ranking_idealized": 0.5,
"ranking_idealized_expo": 0.46666666865348816,
"ranking_simple": 0.46666666865348816,
"regularize": 0.012088621035218239,
"step": 1915
},
{
"dpo_loss": 0.691336452960968,
"epoch": 1.8140303913077709,
"grad_norm": 64.14689736170588,
"learning_rate": 2.591967620451707e-09,
"logits": -2.2195615768432617,
"logps": -95.23002624511719,
"loss": 0.0126,
"objective": 0.011388556100428104,
"ranking_idealized": 0.6499999761581421,
"ranking_idealized_expo": 0.550000011920929,
"ranking_simple": 0.550000011920929,
"regularize": 0.011385311372578144,
"step": 1920
},
{
"dpo_loss": 0.6888495683670044,
"epoch": 1.8187544287851352,
"grad_norm": 61.363679552810055,
"learning_rate": 2.462493791347231e-09,
"logits": -2.1906003952026367,
"logps": -93.10894012451172,
"loss": 0.0109,
"objective": 0.012360634282231331,
"ranking_idealized": 0.6666666865348816,
"ranking_idealized_expo": 0.5666666626930237,
"ranking_simple": 0.5666666626930237,
"regularize": 0.012358280830085278,
"step": 1925
},
{
"dpo_loss": 0.6920725107192993,
"epoch": 1.823478466262499,
"grad_norm": 62.3361060804096,
"learning_rate": 2.3362554502757536e-09,
"logits": -2.174255132675171,
"logps": -91.97770690917969,
"loss": 0.0133,
"objective": 0.0109206298366189,
"ranking_idealized": 0.6499999761581421,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.5166666507720947,
"regularize": 0.010918223299086094,
"step": 1930
},
{
"dpo_loss": 0.6925593614578247,
"epoch": 1.828202503739863,
"grad_norm": 62.11529994850697,
"learning_rate": 2.213261189245458e-09,
"logits": -2.0892937183380127,
"logps": -95.28164672851562,
"loss": 0.0128,
"objective": 0.0141153484582901,
"ranking_idealized": 0.7166666388511658,
"ranking_idealized_expo": 0.6166666746139526,
"ranking_simple": 0.6333333253860474,
"regularize": 0.01411362923681736,
"step": 1935
},
{
"dpo_loss": 0.6907196044921875,
"epoch": 1.832926541217227,
"grad_norm": 75.31073605655762,
"learning_rate": 2.093519379466602e-09,
"logits": -2.219423770904541,
"logps": -93.87139129638672,
"loss": 0.0136,
"objective": 0.012250066734850407,
"ranking_idealized": 0.6666666865348816,
"ranking_idealized_expo": 0.6333333253860474,
"ranking_simple": 0.6333333253860474,
"regularize": 0.01224894542247057,
"step": 1940
},
{
"dpo_loss": 0.6914030909538269,
"epoch": 1.837650578694591,
"grad_norm": 59.36851585608135,
"learning_rate": 1.9770381707817696e-09,
"logits": -2.181976318359375,
"logps": -89.93838500976562,
"loss": 0.014,
"objective": 0.01064326148480177,
"ranking_idealized": 0.7333333492279053,
"ranking_idealized_expo": 0.6000000238418579,
"ranking_simple": 0.6000000238418579,
"regularize": 0.010639534331858158,
"step": 1945
},
{
"dpo_loss": 0.6927531361579895,
"epoch": 1.842374616171955,
"grad_norm": 62.813994604552214,
"learning_rate": 1.8638254911111816e-09,
"logits": -2.1365416049957275,
"logps": -93.81517028808594,
"loss": 0.0133,
"objective": 0.016254087910056114,
"ranking_idealized": 0.6499999761581421,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5166666507720947,
"regularize": 0.01625274494290352,
"step": 1950
},
{
"epoch": 1.842374616171955,
"eval_dpo_loss": 0.6928887367248535,
"eval_logits": -1.9892430305480957,
"eval_logps": -98.45298767089844,
"eval_loss": 0.008893881924450397,
"eval_objective": 0.008952551521360874,
"eval_ranking_idealized": 0.6022099256515503,
"eval_ranking_idealized_expo": 0.5207182168960571,
"eval_ranking_simple": 0.5179557800292969,
"eval_regularize": 0.008950230665504932,
"eval_runtime": 445.189,
"eval_samples_per_second": 13.006,
"eval_steps_per_second": 3.253,
"step": 1950
},
{
"dpo_loss": 0.6905800104141235,
"epoch": 1.847098653649319,
"grad_norm": 62.33667060485866,
"learning_rate": 1.7538890459131094e-09,
"logits": -2.0535969734191895,
"logps": -92.17353057861328,
"loss": 0.0125,
"objective": 0.0103539377450943,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.5333333611488342,
"regularize": 0.010349558666348457,
"step": 1955
},
{
"dpo_loss": 0.6896222233772278,
"epoch": 1.8518226911266829,
"grad_norm": 63.19798951059373,
"learning_rate": 1.647236317659423e-09,
"logits": -2.209256172180176,
"logps": -90.65841674804688,
"loss": 0.0117,
"objective": 0.012652536854147911,
"ranking_idealized": 0.699999988079071,
"ranking_idealized_expo": 0.5833333134651184,
"ranking_simple": 0.5833333134651184,
"regularize": 0.012651127763092518,
"step": 1960
},
{
"dpo_loss": 0.6895377039909363,
"epoch": 1.856546728604047,
"grad_norm": 72.48024831454246,
"learning_rate": 1.5438745653263086e-09,
"logits": -2.0985636711120605,
"logps": -91.15563201904297,
"loss": 0.0112,
"objective": 0.009469871409237385,
"ranking_idealized": 0.6499999761581421,
"ranking_idealized_expo": 0.5833333134651184,
"ranking_simple": 0.5833333134651184,
"regularize": 0.009467961266636848,
"step": 1965
},
{
"dpo_loss": 0.6904885172843933,
"epoch": 1.861270766081411,
"grad_norm": 58.60723650611681,
"learning_rate": 1.4438108239002322e-09,
"logits": -2.138218641281128,
"logps": -93.22808074951172,
"loss": 0.013,
"objective": 0.013735007494688034,
"ranking_idealized": 0.5666666626930237,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5166666507720947,
"regularize": 0.01373241189867258,
"step": 1970
},
{
"dpo_loss": 0.6905199885368347,
"epoch": 1.8659948035587748,
"grad_norm": 75.78083265362716,
"learning_rate": 1.3470519038991268e-09,
"logits": -2.1427910327911377,
"logps": -94.4820556640625,
"loss": 0.0133,
"objective": 0.016799870878458023,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.5666666626930237,
"ranking_simple": 0.5666666626930237,
"regularize": 0.016797440126538277,
"step": 1975
},
{
"dpo_loss": 0.692724347114563,
"epoch": 1.8707188410361388,
"grad_norm": 67.62639102220639,
"learning_rate": 1.253604390908819e-09,
"logits": -2.068427562713623,
"logps": -91.92679595947266,
"loss": 0.0118,
"objective": 0.012069400399923325,
"ranking_idealized": 0.6333333253860474,
"ranking_idealized_expo": 0.5833333134651184,
"ranking_simple": 0.5833333134651184,
"regularize": 0.012066869996488094,
"step": 1980
},
{
"dpo_loss": 0.6905953288078308,
"epoch": 1.8754428785135029,
"grad_norm": 67.47097990851285,
"learning_rate": 1.1634746451348487e-09,
"logits": -2.1265487670898438,
"logps": -91.03251647949219,
"loss": 0.0117,
"objective": 0.007730665151029825,
"ranking_idealized": 0.5666666626930237,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.4833333194255829,
"regularize": 0.007728379685431719,
"step": 1985
},
{
"dpo_loss": 0.6893682479858398,
"epoch": 1.8801669159908667,
"grad_norm": 66.59499294449671,
"learning_rate": 1.0766688009695545e-09,
"logits": -2.200402021408081,
"logps": -91.0616226196289,
"loss": 0.0121,
"objective": 0.011565645225346088,
"ranking_idealized": 0.699999988079071,
"ranking_idealized_expo": 0.6499999761581421,
"ranking_simple": 0.6499999761581421,
"regularize": 0.01156419888138771,
"step": 1990
},
{
"dpo_loss": 0.6916558742523193,
"epoch": 1.884890953468231,
"grad_norm": 59.74243706815504,
"learning_rate": 9.931927665745521e-10,
"logits": -2.1524574756622314,
"logps": -90.13037109375,
"loss": 0.0111,
"objective": 0.012568699195981026,
"ranking_idealized": 0.550000011920929,
"ranking_idealized_expo": 0.4000000059604645,
"ranking_simple": 0.4000000059604645,
"regularize": 0.012566999532282352,
"step": 1995
},
{
"dpo_loss": 0.6858018636703491,
"epoch": 1.8896149909455948,
"grad_norm": 67.18519308744443,
"learning_rate": 9.130522234786498e-10,
"logits": -2.1586594581604004,
"logps": -87.96702575683594,
"loss": 0.012,
"objective": 0.01719100959599018,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.5666666626930237,
"ranking_simple": 0.5666666626930237,
"regularize": 0.017189564183354378,
"step": 2000
},
{
"epoch": 1.8896149909455948,
"eval_dpo_loss": 0.6928969025611877,
"eval_logits": -1.9894063472747803,
"eval_logps": -98.4583511352539,
"eval_loss": 0.008732160553336143,
"eval_objective": 0.008810975588858128,
"eval_ranking_idealized": 0.6022099256515503,
"eval_ranking_idealized_expo": 0.5207182168960571,
"eval_ranking_simple": 0.5179557800292969,
"eval_regularize": 0.008808653801679611,
"eval_runtime": 446.6812,
"eval_samples_per_second": 12.962,
"eval_steps_per_second": 3.242,
"step": 2000
},
{
"dpo_loss": 0.6894116997718811,
"epoch": 1.8943390284229589,
"grad_norm": 63.88750093236265,
"learning_rate": 8.36252626191103e-10,
"logits": -2.1512064933776855,
"logps": -90.55884552001953,
"loss": 0.0112,
"objective": 0.011560056358575821,
"ranking_idealized": 0.5833333134651184,
"ranking_idealized_expo": 0.4333333373069763,
"ranking_simple": 0.4333333373069763,
"regularize": 0.011556769721210003,
"step": 2005
},
{
"dpo_loss": 0.691102147102356,
"epoch": 1.899063065900323,
"grad_norm": 66.23415561126623,
"learning_rate": 7.627992018304163e-10,
"logits": -2.1225428581237793,
"logps": -90.3371810913086,
"loss": 0.012,
"objective": 0.009777167811989784,
"ranking_idealized": 0.5333333611488342,
"ranking_idealized_expo": 0.38333332538604736,
"ranking_simple": 0.38333332538604736,
"regularize": 0.009775066748261452,
"step": 2010
},
{
"dpo_loss": 0.6881377100944519,
"epoch": 1.9037871033776868,
"grad_norm": 69.19769212044956,
"learning_rate": 6.926969497685397e-10,
"logits": -2.1484780311584473,
"logps": -90.53960418701172,
"loss": 0.0128,
"objective": 0.011675039306282997,
"ranking_idealized": 0.550000011920929,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5166666507720947,
"regularize": 0.01167262066155672,
"step": 2015
},
{
"dpo_loss": 0.6911852955818176,
"epoch": 1.9085111408550508,
"grad_norm": 54.24867081294276,
"learning_rate": 6.259506412906401e-10,
"logits": -2.036067008972168,
"logps": -94.92063903808594,
"loss": 0.0114,
"objective": 0.011328586377203465,
"ranking_idealized": 0.4833333194255829,
"ranking_idealized_expo": 0.44999998807907104,
"ranking_simple": 0.44999998807907104,
"regularize": 0.011327385902404785,
"step": 2020
},
{
"dpo_loss": 0.6897438168525696,
"epoch": 1.9132351783324149,
"grad_norm": 66.21589409339711,
"learning_rate": 5.625648192703114e-10,
"logits": -2.100722074508667,
"logps": -88.59317779541016,
"loss": 0.0111,
"objective": 0.01007751189172268,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.550000011920929,
"ranking_simple": 0.550000011920929,
"regularize": 0.010075613856315613,
"step": 2025
},
{
"dpo_loss": 0.6879320740699768,
"epoch": 1.9179592158097787,
"grad_norm": 97.2021444500617,
"learning_rate": 5.025437978604219e-10,
"logits": -2.174182653427124,
"logps": -98.01433563232422,
"loss": 0.0128,
"objective": 0.015440816059708595,
"ranking_idealized": 0.699999988079071,
"ranking_idealized_expo": 0.6333333253860474,
"ranking_simple": 0.6333333253860474,
"regularize": 0.01543727982789278,
"step": 2030
},
{
"dpo_loss": 0.6889265775680542,
"epoch": 1.9226832532871427,
"grad_norm": 60.36698105450304,
"learning_rate": 4.458916621994713e-10,
"logits": -2.1146934032440186,
"logps": -96.32176208496094,
"loss": 0.013,
"objective": 0.012396620586514473,
"ranking_idealized": 0.7833333611488342,
"ranking_idealized_expo": 0.5833333134651184,
"ranking_simple": 0.5833333134651184,
"regularize": 0.012394459918141365,
"step": 2035
},
{
"dpo_loss": 0.6921891570091248,
"epoch": 1.9274072907645068,
"grad_norm": 62.70952327603329,
"learning_rate": 3.9261226813353533e-10,
"logits": -2.2713499069213867,
"logps": -94.95305633544922,
"loss": 0.0111,
"objective": 0.012650835327804089,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5166666507720947,
"regularize": 0.012648210860788822,
"step": 2040
},
{
"dpo_loss": 0.6918179988861084,
"epoch": 1.9321313282418706,
"grad_norm": 60.71087366165103,
"learning_rate": 3.4270924195384246e-10,
"logits": -2.072065830230713,
"logps": -93.6068344116211,
"loss": 0.0112,
"objective": 0.010315236635506153,
"ranking_idealized": 0.5333333611488342,
"ranking_idealized_expo": 0.46666666865348816,
"ranking_simple": 0.46666666865348816,
"regularize": 0.010312527418136597,
"step": 2045
},
{
"dpo_loss": 0.6912323236465454,
"epoch": 1.9368553657192347,
"grad_norm": 66.0225333978305,
"learning_rate": 2.9618598014997107e-10,
"logits": -2.1574909687042236,
"logps": -93.39547729492188,
"loss": 0.0119,
"objective": 0.012261408381164074,
"ranking_idealized": 0.5,
"ranking_idealized_expo": 0.4166666567325592,
"ranking_simple": 0.4166666567325592,
"regularize": 0.012259239330887794,
"step": 2050
},
{
"epoch": 1.9368553657192347,
"eval_dpo_loss": 0.6928916573524475,
"eval_logits": -1.989404320716858,
"eval_logps": -98.45708465576172,
"eval_loss": 0.008758697658777237,
"eval_objective": 0.008833469823002815,
"eval_ranking_idealized": 0.6022099256515503,
"eval_ranking_idealized_expo": 0.5207182168960571,
"eval_ranking_simple": 0.5179557800292969,
"eval_regularize": 0.008830932900309563,
"eval_runtime": 446.0684,
"eval_samples_per_second": 12.98,
"eval_steps_per_second": 3.246,
"step": 2050
},
{
"dpo_loss": 0.6905261874198914,
"epoch": 1.9415794031965987,
"grad_norm": 72.43113470913532,
"learning_rate": 2.5304564917865145e-10,
"logits": -2.20497465133667,
"logps": -93.12974548339844,
"loss": 0.0106,
"objective": 0.011251457966864109,
"ranking_idealized": 0.7333333492279053,
"ranking_idealized_expo": 0.6833333373069763,
"ranking_simple": 0.6833333373069763,
"regularize": 0.011249854229390621,
"step": 2055
},
{
"dpo_loss": 0.6920349597930908,
"epoch": 1.9463034406739625,
"grad_norm": 63.18535597705442,
"learning_rate": 2.132911852482766e-10,
"logits": -2.2412173748016357,
"logps": -90.34603881835938,
"loss": 0.0125,
"objective": 0.013630078174173832,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.550000011920929,
"ranking_simple": 0.550000011920929,
"regularize": 0.013628335669636726,
"step": 2060
},
{
"dpo_loss": 0.691752016544342,
"epoch": 1.9510274781513268,
"grad_norm": 69.39489329530257,
"learning_rate": 1.7692529411904578e-10,
"logits": -2.248879909515381,
"logps": -96.89351654052734,
"loss": 0.012,
"objective": 0.011724698357284069,
"ranking_idealized": 0.6499999761581421,
"ranking_idealized_expo": 0.5666666626930237,
"ranking_simple": 0.5666666626930237,
"regularize": 0.011722984723746777,
"step": 2065
},
{
"dpo_loss": 0.6898305416107178,
"epoch": 1.9557515156286907,
"grad_norm": 64.01320477510419,
"learning_rate": 1.4395045091880608e-10,
"logits": -2.133453845977783,
"logps": -89.6153564453125,
"loss": 0.0122,
"objective": 0.013158326968550682,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.5333333611488342,
"regularize": 0.013156161643564701,
"step": 2070
},
{
"dpo_loss": 0.6894416213035583,
"epoch": 1.9604755531060545,
"grad_norm": 70.45537055239048,
"learning_rate": 1.1436889997460397e-10,
"logits": -2.1818015575408936,
"logps": -87.75623321533203,
"loss": 0.0122,
"objective": 0.01147166732698679,
"ranking_idealized": 0.5666666626930237,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.5333333611488342,
"regularize": 0.0114695830270648,
"step": 2075
},
{
"dpo_loss": 0.689843475818634,
"epoch": 1.9651995905834188,
"grad_norm": 72.42995706576724,
"learning_rate": 8.818265465991292e-11,
"logits": -2.1221158504486084,
"logps": -92.03446960449219,
"loss": 0.0116,
"objective": 0.01287260465323925,
"ranking_idealized": 0.6666666865348816,
"ranking_idealized_expo": 0.6000000238418579,
"ranking_simple": 0.6000000238418579,
"regularize": 0.012871033512055874,
"step": 2080
},
{
"dpo_loss": 0.6901150345802307,
"epoch": 1.9699236280607826,
"grad_norm": 67.99462621349231,
"learning_rate": 6.539349725760423e-11,
"logits": -2.2134578227996826,
"logps": -94.01133728027344,
"loss": 0.0138,
"objective": 0.014971112832427025,
"ranking_idealized": 0.6333333253860474,
"ranking_idealized_expo": 0.5666666626930237,
"ranking_simple": 0.5666666626930237,
"regularize": 0.014969981275498867,
"step": 2085
},
{
"dpo_loss": 0.6904923915863037,
"epoch": 1.9746476655381466,
"grad_norm": 64.88891143293061,
"learning_rate": 4.600297883866067e-11,
"logits": -2.1357476711273193,
"logps": -88.28942108154297,
"loss": 0.0103,
"objective": 0.008000458590686321,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.5,
"regularize": 0.00799502618610859,
"step": 2090
},
{
"dpo_loss": 0.691422700881958,
"epoch": 1.9793717030155107,
"grad_norm": 59.76257934662463,
"learning_rate": 3.0012419156572044e-11,
"logits": -2.106537103652954,
"logps": -92.17442321777344,
"loss": 0.0128,
"objective": 0.012720795348286629,
"ranking_idealized": 0.6499999761581421,
"ranking_idealized_expo": 0.5666666626930237,
"ranking_simple": 0.5666666626930237,
"regularize": 0.0127179604023695,
"step": 2095
},
{
"dpo_loss": 0.6881771087646484,
"epoch": 1.9840957404928745,
"grad_norm": 65.98055833646369,
"learning_rate": 1.7422906557557073e-11,
"logits": -2.096862316131592,
"logps": -96.3370132446289,
"loss": 0.0116,
"objective": 0.01271964143961668,
"ranking_idealized": 0.550000011920929,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.4833333194255829,
"regularize": 0.01271754689514637,
"step": 2100
},
{
"epoch": 1.9840957404928745,
"eval_dpo_loss": 0.6928920149803162,
"eval_logits": -1.9894039630889893,
"eval_logps": -98.45726013183594,
"eval_loss": 0.008759252727031708,
"eval_objective": 0.008833796717226505,
"eval_ranking_idealized": 0.6022099256515503,
"eval_ranking_idealized_expo": 0.5207182168960571,
"eval_ranking_simple": 0.5179557800292969,
"eval_regularize": 0.008831293322145939,
"eval_runtime": 446.6316,
"eval_samples_per_second": 12.964,
"eval_steps_per_second": 3.242,
"step": 2100
},
{
"dpo_loss": 0.69146329164505,
"epoch": 1.9888197779702386,
"grad_norm": 64.49594040581191,
"learning_rate": 8.235297906444837e-12,
"logits": -2.149221181869507,
"logps": -88.66574096679688,
"loss": 0.0128,
"objective": 0.01396742183715105,
"ranking_idealized": 0.5666666626930237,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.5,
"regularize": 0.013964297249913216,
"step": 2105
},
{
"dpo_loss": 0.6907679438591003,
"epoch": 1.9935438154476026,
"grad_norm": 70.66784622308231,
"learning_rate": 2.450218528377013e-12,
"logits": -2.137131452560425,
"logps": -93.39989471435547,
"loss": 0.0112,
"objective": 0.010235412046313286,
"ranking_idealized": 0.550000011920929,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.46666666865348816,
"regularize": 0.010233612731099129,
"step": 2110
},
{
"dpo_loss": 0.6910449266433716,
"epoch": 1.9982678529249664,
"grad_norm": 60.167547577945186,
"learning_rate": 6.806216624188899e-14,
"logits": -2.124387502670288,
"logps": -93.0544204711914,
"loss": 0.014,
"objective": 0.009751829318702221,
"ranking_idealized": 0.5666666626930237,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.4833333194255829,
"regularize": 0.009749443270266056,
"step": 2115
},
{
"epoch": 1.9992126604204392,
"step": 2116,
"total_flos": 0.0,
"train_loss": 0.020640970417914562,
"train_runtime": 38688.7685,
"train_samples_per_second": 2.626,
"train_steps_per_second": 0.055
}
],
"logging_steps": 5,
"max_steps": 2116,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": false,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}