|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.9992126604204392, |
|
"eval_steps": 50, |
|
"global_step": 2116, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"dpo_loss": 0.6931471824645996, |
|
"epoch": 0.0009448074954727974, |
|
"grad_norm": 66.46447760146653, |
|
"learning_rate": 4.716981132075471e-10, |
|
"logits": -2.096372604370117, |
|
"logps": -90.695556640625, |
|
"loss": 0.0053, |
|
"objective": 0.004870629869401455, |
|
"ranking_idealized": 0.5, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.5, |
|
"regularize": 0.004867264535278082, |
|
"step": 1 |
|
}, |
|
{ |
|
"dpo_loss": 0.6931397914886475, |
|
"epoch": 0.004724037477363987, |
|
"grad_norm": 73.1061214549564, |
|
"learning_rate": 2.358490566037736e-09, |
|
"logits": -2.2148256301879883, |
|
"logps": -89.18419647216797, |
|
"loss": 0.0056, |
|
"objective": 0.006199519615620375, |
|
"ranking_idealized": 0.5, |
|
"ranking_idealized_expo": 0.3958333432674408, |
|
"ranking_simple": 0.3958333432674408, |
|
"regularize": 0.0061979773454368114, |
|
"step": 5 |
|
}, |
|
{ |
|
"dpo_loss": 0.6931113600730896, |
|
"epoch": 0.009448074954727975, |
|
"grad_norm": 66.26130454557787, |
|
"learning_rate": 4.716981132075472e-09, |
|
"logits": -2.1163227558135986, |
|
"logps": -90.67784118652344, |
|
"loss": 0.0056, |
|
"objective": 0.0063161361031234264, |
|
"ranking_idealized": 0.5333333611488342, |
|
"ranking_idealized_expo": 0.4166666567325592, |
|
"ranking_simple": 0.4166666567325592, |
|
"regularize": 0.006314346566796303, |
|
"step": 10 |
|
}, |
|
{ |
|
"dpo_loss": 0.6930894255638123, |
|
"epoch": 0.014172112432091962, |
|
"grad_norm": 65.86685369935576, |
|
"learning_rate": 7.075471698113207e-09, |
|
"logits": -2.0237159729003906, |
|
"logps": -88.98513793945312, |
|
"loss": 0.0052, |
|
"objective": 0.004170234780758619, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 0.004166428931057453, |
|
"step": 15 |
|
}, |
|
{ |
|
"dpo_loss": 0.6932118535041809, |
|
"epoch": 0.01889614990945595, |
|
"grad_norm": 67.08170030108543, |
|
"learning_rate": 9.433962264150943e-09, |
|
"logits": -2.220696449279785, |
|
"logps": -91.20802307128906, |
|
"loss": 0.0056, |
|
"objective": 0.0056175715290009975, |
|
"ranking_idealized": 0.6333333253860474, |
|
"ranking_idealized_expo": 0.46666666865348816, |
|
"ranking_simple": 0.46666666865348816, |
|
"regularize": 0.00561108160763979, |
|
"step": 20 |
|
}, |
|
{ |
|
"dpo_loss": 0.6930363774299622, |
|
"epoch": 0.023620187386819935, |
|
"grad_norm": 64.75938875868191, |
|
"learning_rate": 1.1792452830188679e-08, |
|
"logits": -2.184110164642334, |
|
"logps": -93.84049224853516, |
|
"loss": 0.005, |
|
"objective": 0.005908097140491009, |
|
"ranking_idealized": 0.699999988079071, |
|
"ranking_idealized_expo": 0.6166666746139526, |
|
"ranking_simple": 0.6166666746139526, |
|
"regularize": 0.0059060631319880486, |
|
"step": 25 |
|
}, |
|
{ |
|
"dpo_loss": 0.6930921673774719, |
|
"epoch": 0.028344224864183924, |
|
"grad_norm": 70.85746649835617, |
|
"learning_rate": 1.4150943396226414e-08, |
|
"logits": -2.048970937728882, |
|
"logps": -92.22063446044922, |
|
"loss": 0.0062, |
|
"objective": 0.006645068060606718, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.5, |
|
"regularize": 0.0066424161195755005, |
|
"step": 30 |
|
}, |
|
{ |
|
"dpo_loss": 0.6927813291549683, |
|
"epoch": 0.03306826234154791, |
|
"grad_norm": 69.39212454087999, |
|
"learning_rate": 1.6509433962264148e-08, |
|
"logits": -2.164494276046753, |
|
"logps": -90.56898498535156, |
|
"loss": 0.006, |
|
"objective": 0.0044576446525752544, |
|
"ranking_idealized": 0.6333333253860474, |
|
"ranking_idealized_expo": 0.5666666626930237, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.004453611560165882, |
|
"step": 35 |
|
}, |
|
{ |
|
"dpo_loss": 0.6927517056465149, |
|
"epoch": 0.0377922998189119, |
|
"grad_norm": 79.75758352153623, |
|
"learning_rate": 1.8867924528301887e-08, |
|
"logits": -2.2105553150177, |
|
"logps": -94.57971954345703, |
|
"loss": 0.0058, |
|
"objective": 0.004820433910936117, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5833333134651184, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.004817112348973751, |
|
"step": 40 |
|
}, |
|
{ |
|
"dpo_loss": 0.6930319666862488, |
|
"epoch": 0.04251633729627589, |
|
"grad_norm": 64.62991156662962, |
|
"learning_rate": 2.1226415094339622e-08, |
|
"logits": -2.149827241897583, |
|
"logps": -90.14302062988281, |
|
"loss": 0.0057, |
|
"objective": 0.004925203043967485, |
|
"ranking_idealized": 0.550000011920929, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.4833333194255829, |
|
"regularize": 0.004921570885926485, |
|
"step": 45 |
|
}, |
|
{ |
|
"dpo_loss": 0.692544162273407, |
|
"epoch": 0.04724037477363987, |
|
"grad_norm": 68.06296242846598, |
|
"learning_rate": 2.3584905660377358e-08, |
|
"logits": -2.1563477516174316, |
|
"logps": -91.66796875, |
|
"loss": 0.006, |
|
"objective": 0.006787313614040613, |
|
"ranking_idealized": 0.6499999761581421, |
|
"ranking_idealized_expo": 0.550000011920929, |
|
"ranking_simple": 0.550000011920929, |
|
"regularize": 0.006785000674426556, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.04724037477363987, |
|
"eval_dpo_loss": 0.6929724812507629, |
|
"eval_logits": -1.9958003759384155, |
|
"eval_logps": -98.61515808105469, |
|
"eval_loss": 0.0059888348914682865, |
|
"eval_objective": 0.006077947095036507, |
|
"eval_ranking_idealized": 0.6022099256515503, |
|
"eval_ranking_idealized_expo": 0.5207182168960571, |
|
"eval_ranking_simple": 0.5179557800292969, |
|
"eval_regularize": 0.006075054872781038, |
|
"eval_runtime": 445.1462, |
|
"eval_samples_per_second": 13.007, |
|
"eval_steps_per_second": 3.253, |
|
"step": 50 |
|
}, |
|
{ |
|
"dpo_loss": 0.6929494738578796, |
|
"epoch": 0.05196441225100386, |
|
"grad_norm": 73.41432282855463, |
|
"learning_rate": 2.5943396226415093e-08, |
|
"logits": -2.1937828063964844, |
|
"logps": -93.48242950439453, |
|
"loss": 0.006, |
|
"objective": 0.005628067534416914, |
|
"ranking_idealized": 0.46666666865348816, |
|
"ranking_idealized_expo": 0.4166666567325592, |
|
"ranking_simple": 0.4166666567325592, |
|
"regularize": 0.005625530146062374, |
|
"step": 55 |
|
}, |
|
{ |
|
"dpo_loss": 0.692658543586731, |
|
"epoch": 0.05668844972836785, |
|
"grad_norm": 65.33336896476966, |
|
"learning_rate": 2.830188679245283e-08, |
|
"logits": -2.2241933345794678, |
|
"logps": -89.14469146728516, |
|
"loss": 0.0058, |
|
"objective": 0.004998047836124897, |
|
"ranking_idealized": 0.6499999761581421, |
|
"ranking_idealized_expo": 0.6166666746139526, |
|
"ranking_simple": 0.6166666746139526, |
|
"regularize": 0.004994309972971678, |
|
"step": 60 |
|
}, |
|
{ |
|
"dpo_loss": 0.692764163017273, |
|
"epoch": 0.06141248720573183, |
|
"grad_norm": 66.55102685698147, |
|
"learning_rate": 3.0660377358490564e-08, |
|
"logits": -2.2217800617218018, |
|
"logps": -89.03886413574219, |
|
"loss": 0.0074, |
|
"objective": 0.0076709226705133915, |
|
"ranking_idealized": 0.5, |
|
"ranking_idealized_expo": 0.38333332538604736, |
|
"ranking_simple": 0.38333332538604736, |
|
"regularize": 0.007668651174753904, |
|
"step": 65 |
|
}, |
|
{ |
|
"dpo_loss": 0.6925709843635559, |
|
"epoch": 0.06613652468309582, |
|
"grad_norm": 75.22580153097067, |
|
"learning_rate": 3.3018867924528296e-08, |
|
"logits": -2.1472768783569336, |
|
"logps": -94.85504913330078, |
|
"loss": 0.0076, |
|
"objective": 0.007108477409929037, |
|
"ranking_idealized": 0.550000011920929, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 0.007105442229658365, |
|
"step": 70 |
|
}, |
|
{ |
|
"dpo_loss": 0.6925690770149231, |
|
"epoch": 0.0708605621604598, |
|
"grad_norm": 79.40443090905357, |
|
"learning_rate": 3.5377358490566035e-08, |
|
"logits": -2.206367015838623, |
|
"logps": -92.62692260742188, |
|
"loss": 0.0071, |
|
"objective": 0.007145268842577934, |
|
"ranking_idealized": 0.5666666626930237, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 0.00714282738044858, |
|
"step": 75 |
|
}, |
|
{ |
|
"dpo_loss": 0.6921057105064392, |
|
"epoch": 0.0755845996378238, |
|
"grad_norm": 76.34709727963714, |
|
"learning_rate": 3.7735849056603774e-08, |
|
"logits": -2.1775050163269043, |
|
"logps": -94.01787567138672, |
|
"loss": 0.0066, |
|
"objective": 0.006093442440032959, |
|
"ranking_idealized": 0.6499999761581421, |
|
"ranking_idealized_expo": 0.5666666626930237, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.006089916918426752, |
|
"step": 80 |
|
}, |
|
{ |
|
"dpo_loss": 0.6927501559257507, |
|
"epoch": 0.08030863711518778, |
|
"grad_norm": 76.24460999547686, |
|
"learning_rate": 4.009433962264151e-08, |
|
"logits": -2.1956326961517334, |
|
"logps": -92.5903091430664, |
|
"loss": 0.0069, |
|
"objective": 0.0067410701885819435, |
|
"ranking_idealized": 0.699999988079071, |
|
"ranking_idealized_expo": 0.6000000238418579, |
|
"ranking_simple": 0.6000000238418579, |
|
"regularize": 0.006739132571965456, |
|
"step": 85 |
|
}, |
|
{ |
|
"dpo_loss": 0.6902840733528137, |
|
"epoch": 0.08503267459255177, |
|
"grad_norm": 65.10729628861057, |
|
"learning_rate": 4.2452830188679244e-08, |
|
"logits": -2.2021093368530273, |
|
"logps": -96.31147766113281, |
|
"loss": 0.0075, |
|
"objective": 0.008030267432332039, |
|
"ranking_idealized": 0.6499999761581421, |
|
"ranking_idealized_expo": 0.5833333134651184, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.008029019460082054, |
|
"step": 90 |
|
}, |
|
{ |
|
"dpo_loss": 0.6926410794258118, |
|
"epoch": 0.08975671206991576, |
|
"grad_norm": 71.92277477052832, |
|
"learning_rate": 4.481132075471698e-08, |
|
"logits": -2.1148061752319336, |
|
"logps": -89.49421691894531, |
|
"loss": 0.007, |
|
"objective": 0.0068625533021986485, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.550000011920929, |
|
"ranking_simple": 0.550000011920929, |
|
"regularize": 0.006859814748167992, |
|
"step": 95 |
|
}, |
|
{ |
|
"dpo_loss": 0.6942407488822937, |
|
"epoch": 0.09448074954727974, |
|
"grad_norm": 65.63913294191785, |
|
"learning_rate": 4.7169811320754715e-08, |
|
"logits": -2.2743189334869385, |
|
"logps": -94.41613006591797, |
|
"loss": 0.0092, |
|
"objective": 0.012641828507184982, |
|
"ranking_idealized": 0.6499999761581421, |
|
"ranking_idealized_expo": 0.5666666626930237, |
|
"ranking_simple": 0.5333333611488342, |
|
"regularize": 0.012639058753848076, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.09448074954727974, |
|
"eval_dpo_loss": 0.6929203867912292, |
|
"eval_logits": -1.995413899421692, |
|
"eval_logps": -98.7889404296875, |
|
"eval_loss": 0.00729329651221633, |
|
"eval_objective": 0.00731161143630743, |
|
"eval_ranking_idealized": 0.6022099256515503, |
|
"eval_ranking_idealized_expo": 0.5207182168960571, |
|
"eval_ranking_simple": 0.5186464190483093, |
|
"eval_regularize": 0.00730866938829422, |
|
"eval_runtime": 447.0397, |
|
"eval_samples_per_second": 12.952, |
|
"eval_steps_per_second": 3.239, |
|
"step": 100 |
|
}, |
|
{ |
|
"dpo_loss": 0.6924384832382202, |
|
"epoch": 0.09920478702464373, |
|
"grad_norm": 67.55366715626052, |
|
"learning_rate": 4.9528301886792454e-08, |
|
"logits": -2.088653802871704, |
|
"logps": -92.43064880371094, |
|
"loss": 0.009, |
|
"objective": 0.00849145371466875, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 0.008488119579851627, |
|
"step": 105 |
|
}, |
|
{ |
|
"dpo_loss": 0.6923626661300659, |
|
"epoch": 0.10392882450200772, |
|
"grad_norm": 68.4026236372278, |
|
"learning_rate": 5.1886792452830186e-08, |
|
"logits": -2.1796536445617676, |
|
"logps": -91.34872436523438, |
|
"loss": 0.0083, |
|
"objective": 0.0080822529271245, |
|
"ranking_idealized": 0.5666666626930237, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.4833333194255829, |
|
"regularize": 0.008080901578068733, |
|
"step": 110 |
|
}, |
|
{ |
|
"dpo_loss": 0.6901246309280396, |
|
"epoch": 0.1086528619793717, |
|
"grad_norm": 64.06144820478669, |
|
"learning_rate": 5.4245283018867925e-08, |
|
"logits": -2.1858596801757812, |
|
"logps": -89.73294830322266, |
|
"loss": 0.0097, |
|
"objective": 0.013094036839902401, |
|
"ranking_idealized": 0.5333333611488342, |
|
"ranking_idealized_expo": 0.46666666865348816, |
|
"ranking_simple": 0.46666666865348816, |
|
"regularize": 0.013092008419334888, |
|
"step": 115 |
|
}, |
|
{ |
|
"dpo_loss": 0.6923102736473083, |
|
"epoch": 0.1133768994567357, |
|
"grad_norm": 61.237648061668985, |
|
"learning_rate": 5.660377358490566e-08, |
|
"logits": -2.1982421875, |
|
"logps": -94.54019927978516, |
|
"loss": 0.0085, |
|
"objective": 0.008155700750648975, |
|
"ranking_idealized": 0.7166666388511658, |
|
"ranking_idealized_expo": 0.6666666865348816, |
|
"ranking_simple": 0.6666666865348816, |
|
"regularize": 0.00815290305763483, |
|
"step": 120 |
|
}, |
|
{ |
|
"dpo_loss": 0.6917387247085571, |
|
"epoch": 0.11810093693409968, |
|
"grad_norm": 73.42929721502428, |
|
"learning_rate": 5.8962264150943396e-08, |
|
"logits": -2.1935439109802246, |
|
"logps": -95.18013000488281, |
|
"loss": 0.0103, |
|
"objective": 0.010648042894899845, |
|
"ranking_idealized": 0.5333333611488342, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.4833333194255829, |
|
"regularize": 0.010645588859915733, |
|
"step": 125 |
|
}, |
|
{ |
|
"dpo_loss": 0.6919466853141785, |
|
"epoch": 0.12282497441146366, |
|
"grad_norm": 68.99586341226502, |
|
"learning_rate": 6.132075471698113e-08, |
|
"logits": -2.1257071495056152, |
|
"logps": -97.63853454589844, |
|
"loss": 0.0103, |
|
"objective": 0.009905511513352394, |
|
"ranking_idealized": 0.6333333253860474, |
|
"ranking_idealized_expo": 0.6000000238418579, |
|
"ranking_simple": 0.6000000238418579, |
|
"regularize": 0.009904048405587673, |
|
"step": 130 |
|
}, |
|
{ |
|
"dpo_loss": 0.6923593878746033, |
|
"epoch": 0.12754901188882764, |
|
"grad_norm": 56.22865998140236, |
|
"learning_rate": 6.367924528301887e-08, |
|
"logits": -2.1036369800567627, |
|
"logps": -91.52125549316406, |
|
"loss": 0.0098, |
|
"objective": 0.009057187475264072, |
|
"ranking_idealized": 0.5333333611488342, |
|
"ranking_idealized_expo": 0.46666666865348816, |
|
"ranking_simple": 0.46666666865348816, |
|
"regularize": 0.009054058231413364, |
|
"step": 135 |
|
}, |
|
{ |
|
"dpo_loss": 0.6894025206565857, |
|
"epoch": 0.13227304936619164, |
|
"grad_norm": 64.3061621244485, |
|
"learning_rate": 6.603773584905659e-08, |
|
"logits": -2.246990442276001, |
|
"logps": -92.88211059570312, |
|
"loss": 0.0113, |
|
"objective": 0.011589973233640194, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.5333333611488342, |
|
"regularize": 0.011587386019527912, |
|
"step": 140 |
|
}, |
|
{ |
|
"dpo_loss": 0.6934190988540649, |
|
"epoch": 0.13699708684355563, |
|
"grad_norm": 74.71610410718624, |
|
"learning_rate": 6.839622641509434e-08, |
|
"logits": -2.102886199951172, |
|
"logps": -90.7693099975586, |
|
"loss": 0.0123, |
|
"objective": 0.00984902959316969, |
|
"ranking_idealized": 0.5666666626930237, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.4833333194255829, |
|
"regularize": 0.009846380911767483, |
|
"step": 145 |
|
}, |
|
{ |
|
"dpo_loss": 0.6908667087554932, |
|
"epoch": 0.1417211243209196, |
|
"grad_norm": 76.42928931969817, |
|
"learning_rate": 7.075471698113207e-08, |
|
"logits": -2.238095998764038, |
|
"logps": -94.04148864746094, |
|
"loss": 0.0142, |
|
"objective": 0.014535349793732166, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 0.014532845467329025, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.1417211243209196, |
|
"eval_dpo_loss": 0.693015992641449, |
|
"eval_logits": -1.9985584020614624, |
|
"eval_logps": -98.66200256347656, |
|
"eval_loss": 0.009166295640170574, |
|
"eval_objective": 0.00932803563773632, |
|
"eval_ranking_idealized": 0.6022099256515503, |
|
"eval_ranking_idealized_expo": 0.5207182168960571, |
|
"eval_ranking_simple": 0.5186464190483093, |
|
"eval_regularize": 0.009325726889073849, |
|
"eval_runtime": 446.6869, |
|
"eval_samples_per_second": 12.962, |
|
"eval_steps_per_second": 3.242, |
|
"step": 150 |
|
}, |
|
{ |
|
"dpo_loss": 0.6918679475784302, |
|
"epoch": 0.1464451617982836, |
|
"grad_norm": 80.89548200084305, |
|
"learning_rate": 7.311320754716981e-08, |
|
"logits": -2.1368908882141113, |
|
"logps": -90.76949310302734, |
|
"loss": 0.0131, |
|
"objective": 0.011844370514154434, |
|
"ranking_idealized": 0.699999988079071, |
|
"ranking_idealized_expo": 0.5833333134651184, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.011842181906104088, |
|
"step": 155 |
|
}, |
|
{ |
|
"dpo_loss": 0.6904442310333252, |
|
"epoch": 0.1511691992756476, |
|
"grad_norm": 70.5889289835053, |
|
"learning_rate": 7.547169811320755e-08, |
|
"logits": -2.216298818588257, |
|
"logps": -94.7828140258789, |
|
"loss": 0.0132, |
|
"objective": 0.012807334773242474, |
|
"ranking_idealized": 0.5666666626930237, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.5, |
|
"regularize": 0.012806250713765621, |
|
"step": 160 |
|
}, |
|
{ |
|
"dpo_loss": 0.6893646121025085, |
|
"epoch": 0.15589323675301156, |
|
"grad_norm": 70.53443315770147, |
|
"learning_rate": 7.783018867924527e-08, |
|
"logits": -2.2371225357055664, |
|
"logps": -91.81126403808594, |
|
"loss": 0.0123, |
|
"objective": 0.014454828575253487, |
|
"ranking_idealized": 0.5666666626930237, |
|
"ranking_idealized_expo": 0.46666666865348816, |
|
"ranking_simple": 0.46666666865348816, |
|
"regularize": 0.01445354800671339, |
|
"step": 165 |
|
}, |
|
{ |
|
"dpo_loss": 0.6928521990776062, |
|
"epoch": 0.16061727423037556, |
|
"grad_norm": 78.26059021460965, |
|
"learning_rate": 8.018867924528302e-08, |
|
"logits": -2.215101480484009, |
|
"logps": -92.55703735351562, |
|
"loss": 0.0141, |
|
"objective": 0.012299363501369953, |
|
"ranking_idealized": 0.699999988079071, |
|
"ranking_idealized_expo": 0.5833333134651184, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.012298095040023327, |
|
"step": 170 |
|
}, |
|
{ |
|
"dpo_loss": 0.6916810274124146, |
|
"epoch": 0.16534131170773955, |
|
"grad_norm": 62.838329994801875, |
|
"learning_rate": 8.254716981132075e-08, |
|
"logits": -2.1370534896850586, |
|
"logps": -91.3881607055664, |
|
"loss": 0.014, |
|
"objective": 0.015395297668874264, |
|
"ranking_idealized": 0.6499999761581421, |
|
"ranking_idealized_expo": 0.5666666626930237, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.015393855981528759, |
|
"step": 175 |
|
}, |
|
{ |
|
"dpo_loss": 0.6918050050735474, |
|
"epoch": 0.17006534918510355, |
|
"grad_norm": 73.17909568701033, |
|
"learning_rate": 8.490566037735849e-08, |
|
"logits": -2.281663417816162, |
|
"logps": -94.94965362548828, |
|
"loss": 0.0138, |
|
"objective": 0.017136668786406517, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 0.017135154455900192, |
|
"step": 180 |
|
}, |
|
{ |
|
"dpo_loss": 0.69007408618927, |
|
"epoch": 0.17478938666246752, |
|
"grad_norm": 62.37985427959411, |
|
"learning_rate": 8.726415094339621e-08, |
|
"logits": -2.1940929889678955, |
|
"logps": -93.47923278808594, |
|
"loss": 0.0159, |
|
"objective": 0.017906082794070244, |
|
"ranking_idealized": 0.550000011920929, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 0.017902227118611336, |
|
"step": 185 |
|
}, |
|
{ |
|
"dpo_loss": 0.6921120882034302, |
|
"epoch": 0.1795134241398315, |
|
"grad_norm": 70.11549174821204, |
|
"learning_rate": 8.962264150943397e-08, |
|
"logits": -2.1571404933929443, |
|
"logps": -87.7894287109375, |
|
"loss": 0.0144, |
|
"objective": 0.01052508968859911, |
|
"ranking_idealized": 0.6666666865348816, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 0.010523774661123753, |
|
"step": 190 |
|
}, |
|
{ |
|
"dpo_loss": 0.6912659406661987, |
|
"epoch": 0.1842374616171955, |
|
"grad_norm": 78.18173910687094, |
|
"learning_rate": 9.198113207547169e-08, |
|
"logits": -2.1662251949310303, |
|
"logps": -90.77739715576172, |
|
"loss": 0.0139, |
|
"objective": 0.01253608800470829, |
|
"ranking_idealized": 0.7166666388511658, |
|
"ranking_idealized_expo": 0.6000000238418579, |
|
"ranking_simple": 0.6000000238418579, |
|
"regularize": 0.012534084729850292, |
|
"step": 195 |
|
}, |
|
{ |
|
"dpo_loss": 0.6948674321174622, |
|
"epoch": 0.18896149909455948, |
|
"grad_norm": 74.9573189190215, |
|
"learning_rate": 9.433962264150943e-08, |
|
"logits": -2.1426119804382324, |
|
"logps": -93.58480072021484, |
|
"loss": 0.0173, |
|
"objective": 0.018598254770040512, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.5, |
|
"regularize": 0.018596837297081947, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.18896149909455948, |
|
"eval_dpo_loss": 0.6928583383560181, |
|
"eval_logits": -1.9957162141799927, |
|
"eval_logps": -98.79460144042969, |
|
"eval_loss": 0.009674900211393833, |
|
"eval_objective": 0.009766080416738987, |
|
"eval_ranking_idealized": 0.6022099256515503, |
|
"eval_ranking_idealized_expo": 0.5207182168960571, |
|
"eval_ranking_simple": 0.5172652006149292, |
|
"eval_regularize": 0.009763876907527447, |
|
"eval_runtime": 457.4524, |
|
"eval_samples_per_second": 12.657, |
|
"eval_steps_per_second": 3.165, |
|
"step": 200 |
|
}, |
|
{ |
|
"dpo_loss": 0.6902305483818054, |
|
"epoch": 0.19368553657192347, |
|
"grad_norm": 69.58780380589624, |
|
"learning_rate": 9.669811320754716e-08, |
|
"logits": -2.137186288833618, |
|
"logps": -90.56549835205078, |
|
"loss": 0.0203, |
|
"objective": 0.016451861709356308, |
|
"ranking_idealized": 0.550000011920929, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.5, |
|
"regularize": 0.016451073810458183, |
|
"step": 205 |
|
}, |
|
{ |
|
"dpo_loss": 0.693261444568634, |
|
"epoch": 0.19840957404928747, |
|
"grad_norm": 62.13478304132868, |
|
"learning_rate": 9.905660377358491e-08, |
|
"logits": -2.0685033798217773, |
|
"logps": -92.36515045166016, |
|
"loss": 0.0198, |
|
"objective": 0.02386774867773056, |
|
"ranking_idealized": 0.5166666507720947, |
|
"ranking_idealized_expo": 0.46666666865348816, |
|
"ranking_simple": 0.44999998807907104, |
|
"regularize": 0.023865588009357452, |
|
"step": 210 |
|
}, |
|
{ |
|
"dpo_loss": 0.6884204149246216, |
|
"epoch": 0.20313361152665144, |
|
"grad_norm": 60.854335858242926, |
|
"learning_rate": 9.999938744161562e-08, |
|
"logits": -2.1262550354003906, |
|
"logps": -92.82433319091797, |
|
"loss": 0.0173, |
|
"objective": 0.020975453779101372, |
|
"ranking_idealized": 0.6333333253860474, |
|
"ranking_idealized_expo": 0.5833333134651184, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.020974494516849518, |
|
"step": 215 |
|
}, |
|
{ |
|
"dpo_loss": 0.6918230652809143, |
|
"epoch": 0.20785764900401543, |
|
"grad_norm": 67.33088424835829, |
|
"learning_rate": 9.999564408362053e-08, |
|
"logits": -2.169888734817505, |
|
"logps": -88.9549331665039, |
|
"loss": 0.0187, |
|
"objective": 0.015909165143966675, |
|
"ranking_idealized": 0.5333333611488342, |
|
"ranking_idealized_expo": 0.4333333373069763, |
|
"ranking_simple": 0.4333333373069763, |
|
"regularize": 0.015907270833849907, |
|
"step": 220 |
|
}, |
|
{ |
|
"dpo_loss": 0.689198911190033, |
|
"epoch": 0.21258168648137943, |
|
"grad_norm": 70.77403555695805, |
|
"learning_rate": 9.998849793231472e-08, |
|
"logits": -2.099949359893799, |
|
"logps": -91.25509643554688, |
|
"loss": 0.0218, |
|
"objective": 0.02295403741300106, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.46666666865348816, |
|
"ranking_simple": 0.46666666865348816, |
|
"regularize": 0.022952700033783913, |
|
"step": 225 |
|
}, |
|
{ |
|
"dpo_loss": 0.6939520239830017, |
|
"epoch": 0.2173057239587434, |
|
"grad_norm": 80.6144373753939, |
|
"learning_rate": 9.997794947407808e-08, |
|
"logits": -2.1620826721191406, |
|
"logps": -94.76692962646484, |
|
"loss": 0.0239, |
|
"objective": 0.026342039927840233, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.5333333611488342, |
|
"regularize": 0.026341309770941734, |
|
"step": 230 |
|
}, |
|
{ |
|
"dpo_loss": 0.688434362411499, |
|
"epoch": 0.2220297614361074, |
|
"grad_norm": 62.83216394514196, |
|
"learning_rate": 9.996399942685763e-08, |
|
"logits": -2.2001922130584717, |
|
"logps": -90.64398956298828, |
|
"loss": 0.0197, |
|
"objective": 0.02317703142762184, |
|
"ranking_idealized": 0.6666666865348816, |
|
"ranking_idealized_expo": 0.6000000238418579, |
|
"ranking_simple": 0.6000000238418579, |
|
"regularize": 0.023176301270723343, |
|
"step": 235 |
|
}, |
|
{ |
|
"dpo_loss": 0.6894098520278931, |
|
"epoch": 0.2267537989134714, |
|
"grad_norm": 69.59337954721937, |
|
"learning_rate": 9.994664874011862e-08, |
|
"logits": -2.1389572620391846, |
|
"logps": -91.78279113769531, |
|
"loss": 0.0225, |
|
"objective": 0.02215094491839409, |
|
"ranking_idealized": 0.5333333611488342, |
|
"ranking_idealized_expo": 0.46666666865348816, |
|
"ranking_simple": 0.46666666865348816, |
|
"regularize": 0.02214915119111538, |
|
"step": 240 |
|
}, |
|
{ |
|
"dpo_loss": 0.692762017250061, |
|
"epoch": 0.23147783639083536, |
|
"grad_norm": 70.45563918532531, |
|
"learning_rate": 9.992589859477995e-08, |
|
"logits": -2.0933754444122314, |
|
"logps": -92.82108306884766, |
|
"loss": 0.0215, |
|
"objective": 0.02130296640098095, |
|
"ranking_idealized": 0.6666666865348816, |
|
"ranking_idealized_expo": 0.550000011920929, |
|
"ranking_simple": 0.550000011920929, |
|
"regularize": 0.021301595494151115, |
|
"step": 245 |
|
}, |
|
{ |
|
"dpo_loss": 0.6907398104667664, |
|
"epoch": 0.23620187386819935, |
|
"grad_norm": 68.46129704284265, |
|
"learning_rate": 9.990175040313376e-08, |
|
"logits": -2.0800933837890625, |
|
"logps": -88.17916870117188, |
|
"loss": 0.0245, |
|
"objective": 0.022766491398215294, |
|
"ranking_idealized": 0.6333333253860474, |
|
"ranking_idealized_expo": 0.5833333134651184, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.02276558242738247, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.23620187386819935, |
|
"eval_dpo_loss": 0.6929402351379395, |
|
"eval_logits": -1.995142936706543, |
|
"eval_logps": -98.64155578613281, |
|
"eval_loss": 0.012135702185332775, |
|
"eval_objective": 0.012087649665772915, |
|
"eval_ranking_idealized": 0.6022099256515503, |
|
"eval_ranking_idealized_expo": 0.5207182168960571, |
|
"eval_ranking_simple": 0.5186464190483093, |
|
"eval_regularize": 0.012085951864719391, |
|
"eval_runtime": 446.4062, |
|
"eval_samples_per_second": 12.97, |
|
"eval_steps_per_second": 3.244, |
|
"step": 250 |
|
}, |
|
{ |
|
"dpo_loss": 0.6918343901634216, |
|
"epoch": 0.24092591134556335, |
|
"grad_norm": 67.22846019191091, |
|
"learning_rate": 9.987420580874936e-08, |
|
"logits": -2.116420269012451, |
|
"logps": -90.83265686035156, |
|
"loss": 0.0239, |
|
"objective": 0.019956286996603012, |
|
"ranking_idealized": 0.6666666865348816, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.019955595955252647, |
|
"step": 255 |
|
}, |
|
{ |
|
"dpo_loss": 0.688471257686615, |
|
"epoch": 0.24564994882292732, |
|
"grad_norm": 70.37729127367317, |
|
"learning_rate": 9.98432666863613e-08, |
|
"logits": -2.240382194519043, |
|
"logps": -94.62246704101562, |
|
"loss": 0.0228, |
|
"objective": 0.026484496891498566, |
|
"ranking_idealized": 0.5666666626930237, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5, |
|
"regularize": 0.026483872905373573, |
|
"step": 260 |
|
}, |
|
{ |
|
"dpo_loss": 0.6923092603683472, |
|
"epoch": 0.25037398630029134, |
|
"grad_norm": 66.24088589632522, |
|
"learning_rate": 9.980893514174179e-08, |
|
"logits": -2.2002015113830566, |
|
"logps": -92.85660552978516, |
|
"loss": 0.0214, |
|
"objective": 0.016592005267739296, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5333333611488342, |
|
"regularize": 0.01658981665968895, |
|
"step": 265 |
|
}, |
|
{ |
|
"dpo_loss": 0.69212406873703, |
|
"epoch": 0.2550980237776553, |
|
"grad_norm": 67.74325606253207, |
|
"learning_rate": 9.97712135115574e-08, |
|
"logits": -2.1255643367767334, |
|
"logps": -93.65802764892578, |
|
"loss": 0.0234, |
|
"objective": 0.029443560168147087, |
|
"ranking_idealized": 0.6333333253860474, |
|
"ranking_idealized_expo": 0.5666666626930237, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.02944253757596016, |
|
"step": 270 |
|
}, |
|
{ |
|
"dpo_loss": 0.6937362551689148, |
|
"epoch": 0.2598220612550193, |
|
"grad_norm": 62.77300536920228, |
|
"learning_rate": 9.973010436321003e-08, |
|
"logits": -2.1865429878234863, |
|
"logps": -94.39256286621094, |
|
"loss": 0.0231, |
|
"objective": 0.024354156106710434, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 0.024351568892598152, |
|
"step": 275 |
|
}, |
|
{ |
|
"dpo_loss": 0.6910390853881836, |
|
"epoch": 0.2645460987323833, |
|
"grad_norm": 62.67099824882817, |
|
"learning_rate": 9.968561049466213e-08, |
|
"logits": -2.1923298835754395, |
|
"logps": -90.44979095458984, |
|
"loss": 0.0231, |
|
"objective": 0.020880402997136116, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.44999998807907104, |
|
"ranking_simple": 0.44999998807907104, |
|
"regularize": 0.02087988331913948, |
|
"step": 280 |
|
}, |
|
{ |
|
"dpo_loss": 0.6930695176124573, |
|
"epoch": 0.26927013620974727, |
|
"grad_norm": 87.62776016079746, |
|
"learning_rate": 9.963773493424628e-08, |
|
"logits": -2.2007596492767334, |
|
"logps": -95.48867797851562, |
|
"loss": 0.0265, |
|
"objective": 0.028789160773158073, |
|
"ranking_idealized": 0.5166666507720947, |
|
"ranking_idealized_expo": 0.4000000059604645, |
|
"ranking_simple": 0.4166666567325592, |
|
"regularize": 0.028786195442080498, |
|
"step": 285 |
|
}, |
|
{ |
|
"dpo_loss": 0.6869128346443176, |
|
"epoch": 0.27399417368711126, |
|
"grad_norm": 72.49597645901943, |
|
"learning_rate": 9.95864809404591e-08, |
|
"logits": -2.1374075412750244, |
|
"logps": -93.5144271850586, |
|
"loss": 0.0275, |
|
"objective": 0.026990260928869247, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.5333333611488342, |
|
"regularize": 0.026989614591002464, |
|
"step": 290 |
|
}, |
|
{ |
|
"dpo_loss": 0.6920251250267029, |
|
"epoch": 0.27871821116447526, |
|
"grad_norm": 77.90764756547169, |
|
"learning_rate": 9.953185200173945e-08, |
|
"logits": -2.111963987350464, |
|
"logps": -90.939453125, |
|
"loss": 0.0274, |
|
"objective": 0.022320417687296867, |
|
"ranking_idealized": 0.5166666507720947, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.4833333194255829, |
|
"regularize": 0.02231987938284874, |
|
"step": 295 |
|
}, |
|
{ |
|
"dpo_loss": 0.6920836567878723, |
|
"epoch": 0.2834422486418392, |
|
"grad_norm": 68.10918361524686, |
|
"learning_rate": 9.947385183623097e-08, |
|
"logits": -2.213435173034668, |
|
"logps": -91.73210906982422, |
|
"loss": 0.0234, |
|
"objective": 0.026671167463064194, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 0.026670336723327637, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.2834422486418392, |
|
"eval_dpo_loss": 0.6931979060173035, |
|
"eval_logits": -1.993965744972229, |
|
"eval_logps": -98.33208465576172, |
|
"eval_loss": 0.01361795049160719, |
|
"eval_objective": 0.01397615671157837, |
|
"eval_ranking_idealized": 0.6022099256515503, |
|
"eval_ranking_idealized_expo": 0.5207182168960571, |
|
"eval_ranking_simple": 0.5165745615959167, |
|
"eval_regularize": 0.013974088244140148, |
|
"eval_runtime": 447.4565, |
|
"eval_samples_per_second": 12.94, |
|
"eval_steps_per_second": 3.236, |
|
"step": 300 |
|
}, |
|
{ |
|
"dpo_loss": 0.68857741355896, |
|
"epoch": 0.2881662861192032, |
|
"grad_norm": 68.87756454021408, |
|
"learning_rate": 9.94124843915291e-08, |
|
"logits": -2.1138675212860107, |
|
"logps": -93.91316986083984, |
|
"loss": 0.0266, |
|
"objective": 0.03335392475128174, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 0.03335336595773697, |
|
"step": 305 |
|
}, |
|
{ |
|
"dpo_loss": 0.6887925863265991, |
|
"epoch": 0.2928903235965672, |
|
"grad_norm": 125.82123779225154, |
|
"learning_rate": 9.934775384441227e-08, |
|
"logits": -2.138413190841675, |
|
"logps": -90.38550567626953, |
|
"loss": 0.0265, |
|
"objective": 0.021200962364673615, |
|
"ranking_idealized": 0.550000011920929, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.4833333194255829, |
|
"regularize": 0.02119968645274639, |
|
"step": 310 |
|
}, |
|
{ |
|
"dpo_loss": 0.6924371123313904, |
|
"epoch": 0.2976143610739312, |
|
"grad_norm": 64.16350361165954, |
|
"learning_rate": 9.92796646005578e-08, |
|
"logits": -2.1671297550201416, |
|
"logps": -93.71112823486328, |
|
"loss": 0.0255, |
|
"objective": 0.027043061330914497, |
|
"ranking_idealized": 0.6499999761581421, |
|
"ranking_idealized_expo": 0.6333333253860474, |
|
"ranking_simple": 0.6499999761581421, |
|
"regularize": 0.027042122557759285, |
|
"step": 315 |
|
}, |
|
{ |
|
"dpo_loss": 0.6908622980117798, |
|
"epoch": 0.3023383985512952, |
|
"grad_norm": 73.45877351161931, |
|
"learning_rate": 9.920822129424189e-08, |
|
"logits": -2.0810954570770264, |
|
"logps": -93.48076629638672, |
|
"loss": 0.0264, |
|
"objective": 0.03161174803972244, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.4833333194255829, |
|
"regularize": 0.03161115199327469, |
|
"step": 320 |
|
}, |
|
{ |
|
"dpo_loss": 0.6928619742393494, |
|
"epoch": 0.3070624360286592, |
|
"grad_norm": 73.35574157285905, |
|
"learning_rate": 9.913342878802423e-08, |
|
"logits": -2.102128505706787, |
|
"logps": -92.2950439453125, |
|
"loss": 0.0286, |
|
"objective": 0.037009891122579575, |
|
"ranking_idealized": 0.6499999761581421, |
|
"ranking_idealized_expo": 0.5833333134651184, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.03700947389006615, |
|
"step": 325 |
|
}, |
|
{ |
|
"dpo_loss": 0.6908413767814636, |
|
"epoch": 0.3117864735060231, |
|
"grad_norm": 79.4503768102059, |
|
"learning_rate": 9.90552921724171e-08, |
|
"logits": -2.2076501846313477, |
|
"logps": -91.73526763916016, |
|
"loss": 0.0276, |
|
"objective": 0.020319262519478798, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.46666666865348816, |
|
"ranking_simple": 0.46666666865348816, |
|
"regularize": 0.020317750051617622, |
|
"step": 330 |
|
}, |
|
{ |
|
"dpo_loss": 0.6880075335502625, |
|
"epoch": 0.3165105109833871, |
|
"grad_norm": 82.61039711327662, |
|
"learning_rate": 9.897381676553888e-08, |
|
"logits": -2.177678346633911, |
|
"logps": -91.5321044921875, |
|
"loss": 0.0261, |
|
"objective": 0.027510004118084908, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.550000011920929, |
|
"ranking_simple": 0.5333333611488342, |
|
"regularize": 0.027509503066539764, |
|
"step": 335 |
|
}, |
|
{ |
|
"dpo_loss": 0.6954006552696228, |
|
"epoch": 0.3212345484607511, |
|
"grad_norm": 69.91545242214347, |
|
"learning_rate": 9.888900811275203e-08, |
|
"logits": -2.1840174198150635, |
|
"logps": -92.2385025024414, |
|
"loss": 0.0272, |
|
"objective": 0.037196170538663864, |
|
"ranking_idealized": 0.5666666626930237, |
|
"ranking_idealized_expo": 0.46666666865348816, |
|
"ranking_simple": 0.46666666865348816, |
|
"regularize": 0.037195660173892975, |
|
"step": 340 |
|
}, |
|
{ |
|
"dpo_loss": 0.6906254887580872, |
|
"epoch": 0.3259585859381151, |
|
"grad_norm": 65.03933644888008, |
|
"learning_rate": 9.880087198628577e-08, |
|
"logits": -2.153885841369629, |
|
"logps": -91.60904693603516, |
|
"loss": 0.0246, |
|
"objective": 0.0248491782695055, |
|
"ranking_idealized": 0.5666666626930237, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.5, |
|
"regularize": 0.024848194792866707, |
|
"step": 345 |
|
}, |
|
{ |
|
"dpo_loss": 0.6858618855476379, |
|
"epoch": 0.3306826234154791, |
|
"grad_norm": 65.04231679146841, |
|
"learning_rate": 9.870941438484314e-08, |
|
"logits": -2.1364102363586426, |
|
"logps": -89.71781921386719, |
|
"loss": 0.0262, |
|
"objective": 0.026014180853962898, |
|
"ranking_idealized": 0.550000011920929, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 0.026013409718871117, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.3306826234154791, |
|
"eval_dpo_loss": 0.6925798058509827, |
|
"eval_logits": -1.9947079420089722, |
|
"eval_logps": -98.34574890136719, |
|
"eval_loss": 0.017829304561018944, |
|
"eval_objective": 0.018099796026945114, |
|
"eval_ranking_idealized": 0.6022099256515503, |
|
"eval_ranking_idealized_expo": 0.5207182168960571, |
|
"eval_ranking_simple": 0.5200276374816895, |
|
"eval_regularize": 0.018098480999469757, |
|
"eval_runtime": 446.968, |
|
"eval_samples_per_second": 12.954, |
|
"eval_steps_per_second": 3.24, |
|
"step": 350 |
|
}, |
|
{ |
|
"dpo_loss": 0.6954938173294067, |
|
"epoch": 0.3354066608928431, |
|
"grad_norm": 80.69779694329567, |
|
"learning_rate": 9.861464153319269e-08, |
|
"logits": -2.129030466079712, |
|
"logps": -93.99476623535156, |
|
"loss": 0.0296, |
|
"objective": 0.03383675962686539, |
|
"ranking_idealized": 0.6499999761581421, |
|
"ranking_idealized_expo": 0.5666666626930237, |
|
"ranking_simple": 0.550000011920929, |
|
"regularize": 0.03383495658636093, |
|
"step": 355 |
|
}, |
|
{ |
|
"dpo_loss": 0.6874939799308777, |
|
"epoch": 0.3401306983702071, |
|
"grad_norm": 76.1212296270761, |
|
"learning_rate": 9.85165598817449e-08, |
|
"logits": -2.1060662269592285, |
|
"logps": -94.81258392333984, |
|
"loss": 0.0314, |
|
"objective": 0.03174449875950813, |
|
"ranking_idealized": 0.5166666507720947, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 0.031743988394737244, |
|
"step": 360 |
|
}, |
|
{ |
|
"dpo_loss": 0.6876952648162842, |
|
"epoch": 0.34485473584757104, |
|
"grad_norm": 63.17330963799499, |
|
"learning_rate": 9.841517610611307e-08, |
|
"logits": -2.223184823989868, |
|
"logps": -94.34845733642578, |
|
"loss": 0.0271, |
|
"objective": 0.03227417171001434, |
|
"ranking_idealized": 0.550000011920929, |
|
"ranking_idealized_expo": 0.46666666865348816, |
|
"ranking_simple": 0.4833333194255829, |
|
"regularize": 0.03227332606911659, |
|
"step": 365 |
|
}, |
|
{ |
|
"dpo_loss": 0.6924201846122742, |
|
"epoch": 0.34957877332493503, |
|
"grad_norm": 70.25439806775886, |
|
"learning_rate": 9.831049710665904e-08, |
|
"logits": -2.153981924057007, |
|
"logps": -92.24577331542969, |
|
"loss": 0.0267, |
|
"objective": 0.03014095313847065, |
|
"ranking_idealized": 0.550000011920929, |
|
"ranking_idealized_expo": 0.46666666865348816, |
|
"ranking_simple": 0.44999998807907104, |
|
"regularize": 0.030140016227960587, |
|
"step": 370 |
|
}, |
|
{ |
|
"dpo_loss": 0.6881352066993713, |
|
"epoch": 0.35430281080229903, |
|
"grad_norm": 67.1301497619948, |
|
"learning_rate": 9.820253000802345e-08, |
|
"logits": -2.1567375659942627, |
|
"logps": -90.48641204833984, |
|
"loss": 0.029, |
|
"objective": 0.034893397241830826, |
|
"ranking_idealized": 0.5333333611488342, |
|
"ranking_idealized_expo": 0.46666666865348816, |
|
"ranking_simple": 0.4833333194255829, |
|
"regularize": 0.034892160445451736, |
|
"step": 375 |
|
}, |
|
{ |
|
"dpo_loss": 0.6959127187728882, |
|
"epoch": 0.359026848279663, |
|
"grad_norm": 65.92204147729184, |
|
"learning_rate": 9.809128215864095e-08, |
|
"logits": -2.1123626232147217, |
|
"logps": -90.6343002319336, |
|
"loss": 0.0274, |
|
"objective": 0.02818784862756729, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5833333134651184, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.028187256306409836, |
|
"step": 380 |
|
}, |
|
{ |
|
"dpo_loss": 0.6918838620185852, |
|
"epoch": 0.363750885757027, |
|
"grad_norm": 78.02594257500782, |
|
"learning_rate": 9.797676113023989e-08, |
|
"logits": -2.1536900997161865, |
|
"logps": -91.86727905273438, |
|
"loss": 0.0271, |
|
"objective": 0.024834012612700462, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 0.024833252653479576, |
|
"step": 385 |
|
}, |
|
{ |
|
"dpo_loss": 0.6891117095947266, |
|
"epoch": 0.368474923234391, |
|
"grad_norm": 77.62275101385076, |
|
"learning_rate": 9.785897471732711e-08, |
|
"logits": -2.220367193222046, |
|
"logps": -93.69184875488281, |
|
"loss": 0.0315, |
|
"objective": 0.04405975714325905, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.550000011920929, |
|
"ranking_simple": 0.5333333611488342, |
|
"regularize": 0.04405748099088669, |
|
"step": 390 |
|
}, |
|
{ |
|
"dpo_loss": 0.6931707859039307, |
|
"epoch": 0.37319896071175496, |
|
"grad_norm": 70.95977856029823, |
|
"learning_rate": 9.773793093665739e-08, |
|
"logits": -2.188248872756958, |
|
"logps": -90.29833984375, |
|
"loss": 0.0304, |
|
"objective": 0.0337492860853672, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.5, |
|
"regularize": 0.03374841436743736, |
|
"step": 395 |
|
}, |
|
{ |
|
"dpo_loss": 0.6871830224990845, |
|
"epoch": 0.37792299818911895, |
|
"grad_norm": 71.69739058358107, |
|
"learning_rate": 9.76136380266878e-08, |
|
"logits": -2.155177354812622, |
|
"logps": -92.64527130126953, |
|
"loss": 0.0315, |
|
"objective": 0.031953115016222, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.5, |
|
"regularize": 0.03195144981145859, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.37792299818911895, |
|
"eval_dpo_loss": 0.6926193833351135, |
|
"eval_logits": -1.9940829277038574, |
|
"eval_logps": -98.11278533935547, |
|
"eval_loss": 0.01653479039669037, |
|
"eval_objective": 0.01635783165693283, |
|
"eval_ranking_idealized": 0.6022099256515503, |
|
"eval_ranking_idealized_expo": 0.5207182168960571, |
|
"eval_ranking_simple": 0.5200276374816895, |
|
"eval_regularize": 0.01635659858584404, |
|
"eval_runtime": 446.4503, |
|
"eval_samples_per_second": 12.969, |
|
"eval_steps_per_second": 3.243, |
|
"step": 400 |
|
}, |
|
{ |
|
"dpo_loss": 0.6941009759902954, |
|
"epoch": 0.38264703566648295, |
|
"grad_norm": 62.224154885036064, |
|
"learning_rate": 9.748610444701694e-08, |
|
"logits": -2.1617021560668945, |
|
"logps": -90.76243591308594, |
|
"loss": 0.0293, |
|
"objective": 0.02944065071642399, |
|
"ranking_idealized": 0.6333333253860474, |
|
"ranking_idealized_expo": 0.550000011920929, |
|
"ranking_simple": 0.550000011920929, |
|
"regularize": 0.02944008819758892, |
|
"step": 405 |
|
}, |
|
{ |
|
"dpo_loss": 0.6934362649917603, |
|
"epoch": 0.38737107314384694, |
|
"grad_norm": 71.86989684775978, |
|
"learning_rate": 9.735533887780928e-08, |
|
"logits": -2.1968331336975098, |
|
"logps": -94.97209930419922, |
|
"loss": 0.0284, |
|
"objective": 0.02905886620283127, |
|
"ranking_idealized": 0.46666666865348816, |
|
"ranking_idealized_expo": 0.4000000059604645, |
|
"ranking_simple": 0.4000000059604645, |
|
"regularize": 0.029057972133159637, |
|
"step": 410 |
|
}, |
|
{ |
|
"dpo_loss": 0.6933155059814453, |
|
"epoch": 0.39209511062121094, |
|
"grad_norm": 69.39777880403724, |
|
"learning_rate": 9.722135021920426e-08, |
|
"logits": -2.1606533527374268, |
|
"logps": -90.09014129638672, |
|
"loss": 0.0288, |
|
"objective": 0.025797124952077866, |
|
"ranking_idealized": 0.6333333253860474, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.4833333194255829, |
|
"regularize": 0.025796448811888695, |
|
"step": 415 |
|
}, |
|
{ |
|
"dpo_loss": 0.6879637837409973, |
|
"epoch": 0.39681914809857494, |
|
"grad_norm": 65.66563021960218, |
|
"learning_rate": 9.708414759071057e-08, |
|
"logits": -2.192812204360962, |
|
"logps": -90.60978698730469, |
|
"loss": 0.0293, |
|
"objective": 0.028554469347000122, |
|
"ranking_idealized": 0.5333333611488342, |
|
"ranking_idealized_expo": 0.4333333373069763, |
|
"ranking_simple": 0.4333333373069763, |
|
"regularize": 0.028553970158100128, |
|
"step": 420 |
|
}, |
|
{ |
|
"dpo_loss": 0.6905270218849182, |
|
"epoch": 0.4015431855759389, |
|
"grad_norm": 73.81480159017963, |
|
"learning_rate": 9.694374033058549e-08, |
|
"logits": -2.1698479652404785, |
|
"logps": -92.80254364013672, |
|
"loss": 0.03, |
|
"objective": 0.032098546624183655, |
|
"ranking_idealized": 0.5666666626930237, |
|
"ranking_idealized_expo": 0.550000011920929, |
|
"ranking_simple": 0.550000011920929, |
|
"regularize": 0.0320979543030262, |
|
"step": 425 |
|
}, |
|
{ |
|
"dpo_loss": 0.6938761472702026, |
|
"epoch": 0.4062672230533029, |
|
"grad_norm": 76.38130749325458, |
|
"learning_rate": 9.680013799519926e-08, |
|
"logits": -2.2929608821868896, |
|
"logps": -92.20008087158203, |
|
"loss": 0.0332, |
|
"objective": 0.03666527569293976, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.5333333611488342, |
|
"regularize": 0.036664899438619614, |
|
"step": 430 |
|
}, |
|
{ |
|
"dpo_loss": 0.6846203207969666, |
|
"epoch": 0.41099126053066687, |
|
"grad_norm": 62.016252142417606, |
|
"learning_rate": 9.665335035838468e-08, |
|
"logits": -2.1209442615509033, |
|
"logps": -91.99024963378906, |
|
"loss": 0.0308, |
|
"objective": 0.04143450781702995, |
|
"ranking_idealized": 0.7333333492279053, |
|
"ranking_idealized_expo": 0.6166666746139526, |
|
"ranking_simple": 0.6166666746139526, |
|
"regularize": 0.04143408685922623, |
|
"step": 435 |
|
}, |
|
{ |
|
"dpo_loss": 0.692919909954071, |
|
"epoch": 0.41571529800803086, |
|
"grad_norm": 83.01421114100998, |
|
"learning_rate": 9.650338741077189e-08, |
|
"logits": -2.1818275451660156, |
|
"logps": -88.64667510986328, |
|
"loss": 0.0282, |
|
"objective": 0.020624225959181786, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.550000011920929, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.02062271721661091, |
|
"step": 440 |
|
}, |
|
{ |
|
"dpo_loss": 0.6882209777832031, |
|
"epoch": 0.42043933548539486, |
|
"grad_norm": 69.91926112096503, |
|
"learning_rate": 9.635025935910839e-08, |
|
"logits": -2.078962564468384, |
|
"logps": -94.8196792602539, |
|
"loss": 0.03, |
|
"objective": 0.029924126341938972, |
|
"ranking_idealized": 0.6499999761581421, |
|
"ranking_idealized_expo": 0.6000000238418579, |
|
"ranking_simple": 0.6000000238418579, |
|
"regularize": 0.029923679307103157, |
|
"step": 445 |
|
}, |
|
{ |
|
"dpo_loss": 0.6905581951141357, |
|
"epoch": 0.42516337296275886, |
|
"grad_norm": 68.70387033094245, |
|
"learning_rate": 9.619397662556434e-08, |
|
"logits": -2.1093952655792236, |
|
"logps": -90.6502914428711, |
|
"loss": 0.0294, |
|
"objective": 0.028780171647667885, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.44999998807907104, |
|
"ranking_simple": 0.46666666865348816, |
|
"regularize": 0.028779106214642525, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.42516337296275886, |
|
"eval_dpo_loss": 0.6923617124557495, |
|
"eval_logits": -1.994999885559082, |
|
"eval_logps": -98.37866973876953, |
|
"eval_loss": 0.014490882866084576, |
|
"eval_objective": 0.014800351113080978, |
|
"eval_ranking_idealized": 0.6022099256515503, |
|
"eval_ranking_idealized_expo": 0.5207182168960571, |
|
"eval_ranking_simple": 0.5186464190483093, |
|
"eval_regularize": 0.014799040742218494, |
|
"eval_runtime": 444.0476, |
|
"eval_samples_per_second": 13.039, |
|
"eval_steps_per_second": 3.261, |
|
"step": 450 |
|
}, |
|
{ |
|
"dpo_loss": 0.6909880042076111, |
|
"epoch": 0.42988741044012285, |
|
"grad_norm": 68.38544757200873, |
|
"learning_rate": 9.60345498470232e-08, |
|
"logits": -2.158226728439331, |
|
"logps": -90.17542266845703, |
|
"loss": 0.0255, |
|
"objective": 0.02339431643486023, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.550000011920929, |
|
"ranking_simple": 0.550000011920929, |
|
"regularize": 0.023393774405121803, |
|
"step": 455 |
|
}, |
|
{ |
|
"dpo_loss": 0.6882848739624023, |
|
"epoch": 0.4346114479174868, |
|
"grad_norm": 60.565560674058744, |
|
"learning_rate": 9.58719898743578e-08, |
|
"logits": -2.1875061988830566, |
|
"logps": -93.89222717285156, |
|
"loss": 0.0289, |
|
"objective": 0.03423256427049637, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.5333333611488342, |
|
"regularize": 0.034231893718242645, |
|
"step": 460 |
|
}, |
|
{ |
|
"dpo_loss": 0.6911517977714539, |
|
"epoch": 0.4393354853948508, |
|
"grad_norm": 68.08241104925199, |
|
"learning_rate": 9.57063077716918e-08, |
|
"logits": -2.1419482231140137, |
|
"logps": -94.25173950195312, |
|
"loss": 0.0304, |
|
"objective": 0.02857878990471363, |
|
"ranking_idealized": 0.699999988079071, |
|
"ranking_idealized_expo": 0.6666666865348816, |
|
"ranking_simple": 0.6666666865348816, |
|
"regularize": 0.028578022494912148, |
|
"step": 465 |
|
}, |
|
{ |
|
"dpo_loss": 0.6899906396865845, |
|
"epoch": 0.4440595228722148, |
|
"grad_norm": 75.2887054128907, |
|
"learning_rate": 9.553751481564658e-08, |
|
"logits": -2.0578720569610596, |
|
"logps": -88.11711120605469, |
|
"loss": 0.0318, |
|
"objective": 0.02341555431485176, |
|
"ranking_idealized": 0.6499999761581421, |
|
"ranking_idealized_expo": 0.6166666746139526, |
|
"ranking_simple": 0.6166666746139526, |
|
"regularize": 0.02341252751648426, |
|
"step": 470 |
|
}, |
|
{ |
|
"dpo_loss": 0.6940017938613892, |
|
"epoch": 0.4487835603495788, |
|
"grad_norm": 61.894078510860375, |
|
"learning_rate": 9.536562249457386e-08, |
|
"logits": -2.1432507038116455, |
|
"logps": -91.78999328613281, |
|
"loss": 0.0276, |
|
"objective": 0.02927049808204174, |
|
"ranking_idealized": 0.5666666626930237, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5333333611488342, |
|
"regularize": 0.029269874095916748, |
|
"step": 475 |
|
}, |
|
{ |
|
"dpo_loss": 0.6929585933685303, |
|
"epoch": 0.4535075978269428, |
|
"grad_norm": 69.75674817980894, |
|
"learning_rate": 9.51906425077736e-08, |
|
"logits": -2.1212713718414307, |
|
"logps": -91.61197662353516, |
|
"loss": 0.0303, |
|
"objective": 0.028644824400544167, |
|
"ranking_idealized": 0.5333333611488342, |
|
"ranking_idealized_expo": 0.4333333373069763, |
|
"ranking_simple": 0.44999998807907104, |
|
"regularize": 0.028642630204558372, |
|
"step": 480 |
|
}, |
|
{ |
|
"dpo_loss": 0.6894620656967163, |
|
"epoch": 0.4582316353043068, |
|
"grad_norm": 68.06555771230744, |
|
"learning_rate": 9.501258676469798e-08, |
|
"logits": -2.2252414226531982, |
|
"logps": -92.49252319335938, |
|
"loss": 0.0273, |
|
"objective": 0.029991615563631058, |
|
"ranking_idealized": 0.6333333253860474, |
|
"ranking_idealized_expo": 0.6000000238418579, |
|
"ranking_simple": 0.6000000238418579, |
|
"regularize": 0.02999037504196167, |
|
"step": 485 |
|
}, |
|
{ |
|
"dpo_loss": 0.6851038336753845, |
|
"epoch": 0.4629556727816707, |
|
"grad_norm": 68.81350143691881, |
|
"learning_rate": 9.483146738414056e-08, |
|
"logits": -2.1528584957122803, |
|
"logps": -93.01960754394531, |
|
"loss": 0.0298, |
|
"objective": 0.03543411195278168, |
|
"ranking_idealized": 0.5666666626930237, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 0.03543229401111603, |
|
"step": 490 |
|
}, |
|
{ |
|
"dpo_loss": 0.6880256533622742, |
|
"epoch": 0.4676797102590347, |
|
"grad_norm": 63.11314302096046, |
|
"learning_rate": 9.46472966934116e-08, |
|
"logits": -2.1136722564697266, |
|
"logps": -90.93292999267578, |
|
"loss": 0.0346, |
|
"objective": 0.03396356850862503, |
|
"ranking_idealized": 0.6499999761581421, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 0.03396301716566086, |
|
"step": 495 |
|
}, |
|
{ |
|
"dpo_loss": 0.6870742440223694, |
|
"epoch": 0.4724037477363987, |
|
"grad_norm": 73.04815070979407, |
|
"learning_rate": 9.446008722749906e-08, |
|
"logits": -2.2441928386688232, |
|
"logps": -95.81112670898438, |
|
"loss": 0.032, |
|
"objective": 0.026291660964488983, |
|
"ranking_idealized": 0.6333333253860474, |
|
"ranking_idealized_expo": 0.5833333134651184, |
|
"ranking_simple": 0.6000000238418579, |
|
"regularize": 0.026291247457265854, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.4724037477363987, |
|
"eval_dpo_loss": 0.6924601793289185, |
|
"eval_logits": -1.9919774532318115, |
|
"eval_logps": -98.64570617675781, |
|
"eval_loss": 0.013912476599216461, |
|
"eval_objective": 0.0139460489153862, |
|
"eval_ranking_idealized": 0.6022099256515503, |
|
"eval_ranking_idealized_expo": 0.5207182168960571, |
|
"eval_ranking_simple": 0.519336998462677, |
|
"eval_regularize": 0.013944561593234539, |
|
"eval_runtime": 444.8021, |
|
"eval_samples_per_second": 13.017, |
|
"eval_steps_per_second": 3.255, |
|
"step": 500 |
|
}, |
|
{ |
|
"dpo_loss": 0.6906213164329529, |
|
"epoch": 0.4771277852137627, |
|
"grad_norm": 75.29236969807971, |
|
"learning_rate": 9.426985172821529e-08, |
|
"logits": -2.225041151046753, |
|
"logps": -90.76871490478516, |
|
"loss": 0.0313, |
|
"objective": 0.034964669495821, |
|
"ranking_idealized": 0.550000011920929, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.5, |
|
"regularize": 0.03496433049440384, |
|
"step": 505 |
|
}, |
|
{ |
|
"dpo_loss": 0.6928242444992065, |
|
"epoch": 0.4818518226911267, |
|
"grad_norm": 71.72745918834562, |
|
"learning_rate": 9.407660314333001e-08, |
|
"logits": -2.0290334224700928, |
|
"logps": -92.85369110107422, |
|
"loss": 0.0322, |
|
"objective": 0.039766810834407806, |
|
"ranking_idealized": 0.7166666388511658, |
|
"ranking_idealized_expo": 0.5666666626930237, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.03976641967892647, |
|
"step": 510 |
|
}, |
|
{ |
|
"dpo_loss": 0.6923685073852539, |
|
"epoch": 0.4865758601684907, |
|
"grad_norm": 61.131599755380016, |
|
"learning_rate": 9.388035462568891e-08, |
|
"logits": -2.147352933883667, |
|
"logps": -91.90682220458984, |
|
"loss": 0.0331, |
|
"objective": 0.035778481513261795, |
|
"ranking_idealized": 0.6666666865348816, |
|
"ranking_idealized_expo": 0.6000000238418579, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.035777974873781204, |
|
"step": 515 |
|
}, |
|
{ |
|
"dpo_loss": 0.6962333917617798, |
|
"epoch": 0.49129989764585463, |
|
"grad_norm": 65.42531713674678, |
|
"learning_rate": 9.368111953231848e-08, |
|
"logits": -2.1052534580230713, |
|
"logps": -92.43571472167969, |
|
"loss": 0.0302, |
|
"objective": 0.030195049941539764, |
|
"ranking_idealized": 0.6499999761581421, |
|
"ranking_idealized_expo": 0.6166666746139526, |
|
"ranking_simple": 0.6166666746139526, |
|
"regularize": 0.030194593593478203, |
|
"step": 520 |
|
}, |
|
{ |
|
"dpo_loss": 0.6958824396133423, |
|
"epoch": 0.49602393512321863, |
|
"grad_norm": 68.86518401985903, |
|
"learning_rate": 9.347891142351692e-08, |
|
"logits": -2.1327033042907715, |
|
"logps": -95.017578125, |
|
"loss": 0.0343, |
|
"objective": 0.033561404794454575, |
|
"ranking_idealized": 0.5166666507720947, |
|
"ranking_idealized_expo": 0.46666666865348816, |
|
"ranking_simple": 0.44999998807907104, |
|
"regularize": 0.033560872077941895, |
|
"step": 525 |
|
}, |
|
{ |
|
"dpo_loss": 0.692583441734314, |
|
"epoch": 0.5007479726005827, |
|
"grad_norm": 68.03560111972747, |
|
"learning_rate": 9.327374406193124e-08, |
|
"logits": -2.1641759872436523, |
|
"logps": -92.0415267944336, |
|
"loss": 0.032, |
|
"objective": 0.03345762938261032, |
|
"ranking_idealized": 0.6666666865348816, |
|
"ranking_idealized_expo": 0.6000000238418579, |
|
"ranking_simple": 0.6000000238418579, |
|
"regularize": 0.03345634788274765, |
|
"step": 530 |
|
}, |
|
{ |
|
"dpo_loss": 0.6896530389785767, |
|
"epoch": 0.5054720100779466, |
|
"grad_norm": 66.90019999237873, |
|
"learning_rate": 9.306563141162044e-08, |
|
"logits": -2.1231565475463867, |
|
"logps": -91.51903533935547, |
|
"loss": 0.0298, |
|
"objective": 0.03302415460348129, |
|
"ranking_idealized": 0.6666666865348816, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 0.03302332013845444, |
|
"step": 535 |
|
}, |
|
{ |
|
"dpo_loss": 0.6914986371994019, |
|
"epoch": 0.5101960475553106, |
|
"grad_norm": 58.738988827079176, |
|
"learning_rate": 9.285458763710523e-08, |
|
"logits": -2.147346019744873, |
|
"logps": -93.07068634033203, |
|
"loss": 0.0341, |
|
"objective": 0.03528103977441788, |
|
"ranking_idealized": 0.5666666626930237, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.4833333194255829, |
|
"regularize": 0.035279832780361176, |
|
"step": 540 |
|
}, |
|
{ |
|
"dpo_loss": 0.6926673650741577, |
|
"epoch": 0.5149200850326746, |
|
"grad_norm": 65.83406826541673, |
|
"learning_rate": 9.264062710240386e-08, |
|
"logits": -2.1894426345825195, |
|
"logps": -97.09349822998047, |
|
"loss": 0.0276, |
|
"objective": 0.029730303213000298, |
|
"ranking_idealized": 0.5666666626930237, |
|
"ranking_idealized_expo": 0.46666666865348816, |
|
"ranking_simple": 0.46666666865348816, |
|
"regularize": 0.02972940169274807, |
|
"step": 545 |
|
}, |
|
{ |
|
"dpo_loss": 0.6896089315414429, |
|
"epoch": 0.5196441225100386, |
|
"grad_norm": 80.74824222454775, |
|
"learning_rate": 9.242376437005448e-08, |
|
"logits": -2.1549692153930664, |
|
"logps": -93.35411834716797, |
|
"loss": 0.0314, |
|
"objective": 0.0288882777094841, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 0.028887782245874405, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.5196441225100386, |
|
"eval_dpo_loss": 0.6926965713500977, |
|
"eval_logits": -1.9942920207977295, |
|
"eval_logps": -98.96892547607422, |
|
"eval_loss": 0.013568516820669174, |
|
"eval_objective": 0.013540062122046947, |
|
"eval_ranking_idealized": 0.6022099256515503, |
|
"eval_ranking_idealized_expo": 0.5207182168960571, |
|
"eval_ranking_simple": 0.5186464190483093, |
|
"eval_regularize": 0.013538442552089691, |
|
"eval_runtime": 444.65, |
|
"eval_samples_per_second": 13.021, |
|
"eval_steps_per_second": 3.256, |
|
"step": 550 |
|
}, |
|
{ |
|
"dpo_loss": 0.6818323135375977, |
|
"epoch": 0.5243681599874026, |
|
"grad_norm": 71.45675055284568, |
|
"learning_rate": 9.22040142001241e-08, |
|
"logits": -2.1764817237854004, |
|
"logps": -92.4581298828125, |
|
"loss": 0.0336, |
|
"objective": 0.04190651327371597, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.550000011920929, |
|
"regularize": 0.04190612956881523, |
|
"step": 555 |
|
}, |
|
{ |
|
"dpo_loss": 0.6884815692901611, |
|
"epoch": 0.5290921974647665, |
|
"grad_norm": 72.10264430141908, |
|
"learning_rate": 9.198139154920388e-08, |
|
"logits": -2.2008354663848877, |
|
"logps": -90.6949234008789, |
|
"loss": 0.0344, |
|
"objective": 0.034483686089515686, |
|
"ranking_idealized": 0.5333333611488342, |
|
"ranking_idealized_expo": 0.46666666865348816, |
|
"ranking_simple": 0.46666666865348816, |
|
"regularize": 0.03448285162448883, |
|
"step": 560 |
|
}, |
|
{ |
|
"dpo_loss": 0.6909436583518982, |
|
"epoch": 0.5338162349421306, |
|
"grad_norm": 60.56883204825771, |
|
"learning_rate": 9.175591156939118e-08, |
|
"logits": -2.1834826469421387, |
|
"logps": -94.38992309570312, |
|
"loss": 0.03, |
|
"objective": 0.02786482684314251, |
|
"ranking_idealized": 0.6333333253860474, |
|
"ranking_idealized_expo": 0.5666666626930237, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.02786322310566902, |
|
"step": 565 |
|
}, |
|
{ |
|
"dpo_loss": 0.6896558403968811, |
|
"epoch": 0.5385402724194945, |
|
"grad_norm": 63.27876500292638, |
|
"learning_rate": 9.152758960725829e-08, |
|
"logits": -2.0850472450256348, |
|
"logps": -90.94063568115234, |
|
"loss": 0.0305, |
|
"objective": 0.03306278958916664, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 0.03306075185537338, |
|
"step": 570 |
|
}, |
|
{ |
|
"dpo_loss": 0.6910984516143799, |
|
"epoch": 0.5432643098968585, |
|
"grad_norm": 64.97441175201224, |
|
"learning_rate": 9.129644120280797e-08, |
|
"logits": -2.215700387954712, |
|
"logps": -93.2086181640625, |
|
"loss": 0.0363, |
|
"objective": 0.03730526939034462, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.46666666865348816, |
|
"ranking_simple": 0.46666666865348816, |
|
"regularize": 0.03730436787009239, |
|
"step": 575 |
|
}, |
|
{ |
|
"dpo_loss": 0.6901772618293762, |
|
"epoch": 0.5479883473742225, |
|
"grad_norm": 60.84920518500226, |
|
"learning_rate": 9.106248208841567e-08, |
|
"logits": -2.077465534210205, |
|
"logps": -89.92863464355469, |
|
"loss": 0.0301, |
|
"objective": 0.029211556538939476, |
|
"ranking_idealized": 0.5166666507720947, |
|
"ranking_idealized_expo": 0.46666666865348816, |
|
"ranking_simple": 0.46666666865348816, |
|
"regularize": 0.02920910157263279, |
|
"step": 580 |
|
}, |
|
{ |
|
"dpo_loss": 0.6912521123886108, |
|
"epoch": 0.5527123848515865, |
|
"grad_norm": 61.38401518242899, |
|
"learning_rate": 9.082572818775884e-08, |
|
"logits": -2.0964841842651367, |
|
"logps": -96.6317138671875, |
|
"loss": 0.0311, |
|
"objective": 0.0291235763579607, |
|
"ranking_idealized": 0.550000011920929, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 0.029122162610292435, |
|
"step": 585 |
|
}, |
|
{ |
|
"dpo_loss": 0.6879535913467407, |
|
"epoch": 0.5574364223289505, |
|
"grad_norm": 70.97177434877489, |
|
"learning_rate": 9.058619561473306e-08, |
|
"logits": -2.1359400749206543, |
|
"logps": -91.66080474853516, |
|
"loss": 0.0309, |
|
"objective": 0.0273025743663311, |
|
"ranking_idealized": 0.550000011920929, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 0.02730202116072178, |
|
"step": 590 |
|
}, |
|
{ |
|
"dpo_loss": 0.69169682264328, |
|
"epoch": 0.5621604598063145, |
|
"grad_norm": 66.67475961321956, |
|
"learning_rate": 9.034390067235538e-08, |
|
"logits": -2.122257947921753, |
|
"logps": -93.28813934326172, |
|
"loss": 0.0292, |
|
"objective": 0.03189357370138168, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.5, |
|
"regularize": 0.03189240023493767, |
|
"step": 595 |
|
}, |
|
{ |
|
"dpo_loss": 0.692051351070404, |
|
"epoch": 0.5668844972836784, |
|
"grad_norm": 66.82237885750858, |
|
"learning_rate": 9.009885985165465e-08, |
|
"logits": -2.1968979835510254, |
|
"logps": -91.0505599975586, |
|
"loss": 0.0311, |
|
"objective": 0.027313487604260445, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 0.027312906458973885, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.5668844972836784, |
|
"eval_dpo_loss": 0.6925215721130371, |
|
"eval_logits": -1.9967907667160034, |
|
"eval_logps": -98.12234497070312, |
|
"eval_loss": 0.014187943190336227, |
|
"eval_objective": 0.014395096339285374, |
|
"eval_ranking_idealized": 0.6022099256515503, |
|
"eval_ranking_idealized_expo": 0.5207182168960571, |
|
"eval_ranking_simple": 0.5186464190483093, |
|
"eval_regularize": 0.014393283054232597, |
|
"eval_runtime": 445.7504, |
|
"eval_samples_per_second": 12.989, |
|
"eval_steps_per_second": 3.248, |
|
"step": 600 |
|
}, |
|
{ |
|
"dpo_loss": 0.6929703950881958, |
|
"epoch": 0.5716085347610425, |
|
"grad_norm": 64.74275314805836, |
|
"learning_rate": 8.985108983054912e-08, |
|
"logits": -2.0810482501983643, |
|
"logps": -92.1054916381836, |
|
"loss": 0.0343, |
|
"objective": 0.03585705906152725, |
|
"ranking_idealized": 0.5, |
|
"ranking_idealized_expo": 0.4166666567325592, |
|
"ranking_simple": 0.4166666567325592, |
|
"regularize": 0.03585506230592728, |
|
"step": 605 |
|
}, |
|
{ |
|
"dpo_loss": 0.6893709897994995, |
|
"epoch": 0.5763325722384064, |
|
"grad_norm": 63.69006739310455, |
|
"learning_rate": 8.960060747271137e-08, |
|
"logits": -2.1485848426818848, |
|
"logps": -92.45893096923828, |
|
"loss": 0.0292, |
|
"objective": 0.03595684841275215, |
|
"ranking_idealized": 0.5666666626930237, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.4833333194255829, |
|
"regularize": 0.03595583513379097, |
|
"step": 610 |
|
}, |
|
{ |
|
"dpo_loss": 0.6894236207008362, |
|
"epoch": 0.5810566097157704, |
|
"grad_norm": 63.68171123239859, |
|
"learning_rate": 8.934742982642041e-08, |
|
"logits": -2.2213053703308105, |
|
"logps": -92.00927734375, |
|
"loss": 0.0289, |
|
"objective": 0.030653396621346474, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.4333333373069763, |
|
"ranking_simple": 0.44999998807907104, |
|
"regularize": 0.030652187764644623, |
|
"step": 615 |
|
}, |
|
{ |
|
"dpo_loss": 0.6907939314842224, |
|
"epoch": 0.5857806471931344, |
|
"grad_norm": 64.38726305381529, |
|
"learning_rate": 8.90915741234015e-08, |
|
"logits": -2.22101092338562, |
|
"logps": -93.67798614501953, |
|
"loss": 0.0293, |
|
"objective": 0.028055744245648384, |
|
"ranking_idealized": 0.6499999761581421, |
|
"ranking_idealized_expo": 0.5666666626930237, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.028055019676685333, |
|
"step": 620 |
|
}, |
|
{ |
|
"dpo_loss": 0.6883565783500671, |
|
"epoch": 0.5905046846704984, |
|
"grad_norm": 69.11436679749976, |
|
"learning_rate": 8.883305777765317e-08, |
|
"logits": -2.095867395401001, |
|
"logps": -95.01261138916016, |
|
"loss": 0.0311, |
|
"objective": 0.033847175538539886, |
|
"ranking_idealized": 0.5166666507720947, |
|
"ranking_idealized_expo": 0.4333333373069763, |
|
"ranking_simple": 0.4333333373069763, |
|
"regularize": 0.03384659066796303, |
|
"step": 625 |
|
}, |
|
{ |
|
"dpo_loss": 0.6883829236030579, |
|
"epoch": 0.5952287221478624, |
|
"grad_norm": 74.04617527963917, |
|
"learning_rate": 8.857189838426216e-08, |
|
"logits": -2.183093547821045, |
|
"logps": -92.19212341308594, |
|
"loss": 0.0332, |
|
"objective": 0.03131110966205597, |
|
"ranking_idealized": 0.6333333253860474, |
|
"ranking_idealized_expo": 0.5666666626930237, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.03130975365638733, |
|
"step": 630 |
|
}, |
|
{ |
|
"dpo_loss": 0.6901324987411499, |
|
"epoch": 0.5999527596252263, |
|
"grad_norm": 63.254018805089515, |
|
"learning_rate": 8.83081137182057e-08, |
|
"logits": -2.137653112411499, |
|
"logps": -92.25005340576172, |
|
"loss": 0.028, |
|
"objective": 0.02667616680264473, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.550000011920929, |
|
"ranking_simple": 0.550000011920929, |
|
"regularize": 0.026675010100007057, |
|
"step": 635 |
|
}, |
|
{ |
|
"dpo_loss": 0.6854274868965149, |
|
"epoch": 0.6046767971025904, |
|
"grad_norm": 67.682861674831, |
|
"learning_rate": 8.804172173314183e-08, |
|
"logits": -2.1525957584381104, |
|
"logps": -96.51889038085938, |
|
"loss": 0.0305, |
|
"objective": 0.026471592485904694, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.46666666865348816, |
|
"regularize": 0.026470640674233437, |
|
"step": 640 |
|
}, |
|
{ |
|
"dpo_loss": 0.6916467547416687, |
|
"epoch": 0.6094008345799543, |
|
"grad_norm": 74.11851413570284, |
|
"learning_rate": 8.777274056018745e-08, |
|
"logits": -2.0791733264923096, |
|
"logps": -90.76611328125, |
|
"loss": 0.0275, |
|
"objective": 0.025933992117643356, |
|
"ranking_idealized": 0.5333333611488342, |
|
"ranking_idealized_expo": 0.4333333373069763, |
|
"ranking_simple": 0.4333333373069763, |
|
"regularize": 0.025932662189006805, |
|
"step": 645 |
|
}, |
|
{ |
|
"dpo_loss": 0.6861349940299988, |
|
"epoch": 0.6141248720573184, |
|
"grad_norm": 71.7083018593228, |
|
"learning_rate": 8.750118850668412e-08, |
|
"logits": -2.0774688720703125, |
|
"logps": -91.57192993164062, |
|
"loss": 0.0333, |
|
"objective": 0.03247459605336189, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5, |
|
"regularize": 0.03247232735157013, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.6141248720573184, |
|
"eval_dpo_loss": 0.6926119327545166, |
|
"eval_logits": -1.993467926979065, |
|
"eval_logps": -98.69168853759766, |
|
"eval_loss": 0.014501783065497875, |
|
"eval_objective": 0.014641453512012959, |
|
"eval_ranking_idealized": 0.6022099256515503, |
|
"eval_ranking_idealized_expo": 0.5207182168960571, |
|
"eval_ranking_simple": 0.5179557800292969, |
|
"eval_regularize": 0.014639632776379585, |
|
"eval_runtime": 445.8707, |
|
"eval_samples_per_second": 12.986, |
|
"eval_steps_per_second": 3.248, |
|
"step": 650 |
|
}, |
|
{ |
|
"dpo_loss": 0.6902641654014587, |
|
"epoch": 0.6188489095346823, |
|
"grad_norm": 58.31607902985929, |
|
"learning_rate": 8.722708405495222e-08, |
|
"logits": -2.2487266063690186, |
|
"logps": -89.10828399658203, |
|
"loss": 0.0289, |
|
"objective": 0.02854420617222786, |
|
"ranking_idealized": 0.5666666626930237, |
|
"ranking_idealized_expo": 0.44999998807907104, |
|
"ranking_simple": 0.4333333373069763, |
|
"regularize": 0.028543464839458466, |
|
"step": 655 |
|
}, |
|
{ |
|
"dpo_loss": 0.6865178942680359, |
|
"epoch": 0.6235729470120462, |
|
"grad_norm": 65.43564652911088, |
|
"learning_rate": 8.695044586103295e-08, |
|
"logits": -2.105522394180298, |
|
"logps": -94.11585998535156, |
|
"loss": 0.0288, |
|
"objective": 0.027172502130270004, |
|
"ranking_idealized": 0.4333333373069763, |
|
"ranking_idealized_expo": 0.4000000059604645, |
|
"ranking_simple": 0.4000000059604645, |
|
"regularize": 0.027170367538928986, |
|
"step": 660 |
|
}, |
|
{ |
|
"dpo_loss": 0.6929230093955994, |
|
"epoch": 0.6282969844894103, |
|
"grad_norm": 75.08767896831156, |
|
"learning_rate": 8.667129275341853e-08, |
|
"logits": -2.261946201324463, |
|
"logps": -90.70641326904297, |
|
"loss": 0.036, |
|
"objective": 0.03217438980937004, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.550000011920929, |
|
"regularize": 0.03217388316988945, |
|
"step": 665 |
|
}, |
|
{ |
|
"dpo_loss": 0.6909863948822021, |
|
"epoch": 0.6330210219667742, |
|
"grad_norm": 61.63435170443301, |
|
"learning_rate": 8.638964373177073e-08, |
|
"logits": -2.0806498527526855, |
|
"logps": -93.46875762939453, |
|
"loss": 0.0283, |
|
"objective": 0.03122856095433235, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.550000011920929, |
|
"regularize": 0.03122722916305065, |
|
"step": 670 |
|
}, |
|
{ |
|
"dpo_loss": 0.6940844058990479, |
|
"epoch": 0.6377450594441383, |
|
"grad_norm": 65.12038155568429, |
|
"learning_rate": 8.610551796562768e-08, |
|
"logits": -2.2103471755981445, |
|
"logps": -92.73240661621094, |
|
"loss": 0.0319, |
|
"objective": 0.03719858080148697, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.5, |
|
"regularize": 0.03719812259078026, |
|
"step": 675 |
|
}, |
|
{ |
|
"dpo_loss": 0.6896507740020752, |
|
"epoch": 0.6424690969215022, |
|
"grad_norm": 65.07489196501477, |
|
"learning_rate": 8.581893479309924e-08, |
|
"logits": -2.2053842544555664, |
|
"logps": -93.24919128417969, |
|
"loss": 0.0265, |
|
"objective": 0.023518383502960205, |
|
"ranking_idealized": 0.5666666626930237, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.4833333194255829, |
|
"regularize": 0.023516135290265083, |
|
"step": 680 |
|
}, |
|
{ |
|
"dpo_loss": 0.6927950382232666, |
|
"epoch": 0.6471931343988663, |
|
"grad_norm": 70.32780445036671, |
|
"learning_rate": 8.552991371955072e-08, |
|
"logits": -2.296104907989502, |
|
"logps": -92.59764099121094, |
|
"loss": 0.0318, |
|
"objective": 0.034257274121046066, |
|
"ranking_idealized": 0.6833333373069763, |
|
"ranking_idealized_expo": 0.6166666746139526, |
|
"ranking_simple": 0.6166666746139526, |
|
"regularize": 0.03425610437989235, |
|
"step": 685 |
|
}, |
|
{ |
|
"dpo_loss": 0.692520797252655, |
|
"epoch": 0.6519171718762302, |
|
"grad_norm": 70.14652096802335, |
|
"learning_rate": 8.523847441627536e-08, |
|
"logits": -2.193286657333374, |
|
"logps": -94.4785385131836, |
|
"loss": 0.0326, |
|
"objective": 0.03835910186171532, |
|
"ranking_idealized": 0.5333333611488342, |
|
"ranking_idealized_expo": 0.4166666567325592, |
|
"ranking_simple": 0.4000000059604645, |
|
"regularize": 0.03835882246494293, |
|
"step": 690 |
|
}, |
|
{ |
|
"dpo_loss": 0.6892996430397034, |
|
"epoch": 0.6566412093535942, |
|
"grad_norm": 73.28164754238634, |
|
"learning_rate": 8.494463671915546e-08, |
|
"logits": -2.1629860401153564, |
|
"logps": -93.7652359008789, |
|
"loss": 0.0265, |
|
"objective": 0.026581525802612305, |
|
"ranking_idealized": 0.5666666626930237, |
|
"ranking_idealized_expo": 0.4166666567325592, |
|
"ranking_simple": 0.4000000059604645, |
|
"regularize": 0.026580767706036568, |
|
"step": 695 |
|
}, |
|
{ |
|
"dpo_loss": 0.6907955408096313, |
|
"epoch": 0.6613652468309582, |
|
"grad_norm": 66.01685046319234, |
|
"learning_rate": 8.464842062731234e-08, |
|
"logits": -2.2634246349334717, |
|
"logps": -91.12454986572266, |
|
"loss": 0.028, |
|
"objective": 0.028724508360028267, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 0.028722405433654785, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.6613652468309582, |
|
"eval_dpo_loss": 0.6929543018341064, |
|
"eval_logits": -1.9953092336654663, |
|
"eval_logps": -98.67767333984375, |
|
"eval_loss": 0.013822407461702824, |
|
"eval_objective": 0.014025083743035793, |
|
"eval_ranking_idealized": 0.6022099256515503, |
|
"eval_ranking_idealized_expo": 0.5207182168960571, |
|
"eval_ranking_simple": 0.519336998462677, |
|
"eval_regularize": 0.01402334589511156, |
|
"eval_runtime": 446.1644, |
|
"eval_samples_per_second": 12.977, |
|
"eval_steps_per_second": 3.245, |
|
"step": 700 |
|
}, |
|
{ |
|
"dpo_loss": 0.6904506683349609, |
|
"epoch": 0.6660892843083221, |
|
"grad_norm": 64.45584344371969, |
|
"learning_rate": 8.434984630174508e-08, |
|
"logits": -2.223440408706665, |
|
"logps": -94.05587005615234, |
|
"loss": 0.0302, |
|
"objective": 0.029637468978762627, |
|
"ranking_idealized": 0.6833333373069763, |
|
"ranking_idealized_expo": 0.5666666626930237, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.029636209830641747, |
|
"step": 705 |
|
}, |
|
{ |
|
"dpo_loss": 0.6879330277442932, |
|
"epoch": 0.6708133217856862, |
|
"grad_norm": 67.77900053910153, |
|
"learning_rate": 8.404893406395842e-08, |
|
"logits": -2.1772301197052, |
|
"logps": -93.94538879394531, |
|
"loss": 0.0323, |
|
"objective": 0.030688025057315826, |
|
"ranking_idealized": 0.5166666507720947, |
|
"ranking_idealized_expo": 0.4166666567325592, |
|
"ranking_simple": 0.4166666567325592, |
|
"regularize": 0.030687240883708, |
|
"step": 710 |
|
}, |
|
{ |
|
"dpo_loss": 0.6863754987716675, |
|
"epoch": 0.6755373592630501, |
|
"grad_norm": 78.62750388933773, |
|
"learning_rate": 8.37457043945796e-08, |
|
"logits": -2.1862614154815674, |
|
"logps": -88.71346282958984, |
|
"loss": 0.0319, |
|
"objective": 0.034725725650787354, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.4833333194255829, |
|
"regularize": 0.03472534194588661, |
|
"step": 715 |
|
}, |
|
{ |
|
"dpo_loss": 0.6946022510528564, |
|
"epoch": 0.6802613967404142, |
|
"grad_norm": 63.01104309220956, |
|
"learning_rate": 8.344017793196442e-08, |
|
"logits": -2.1920392513275146, |
|
"logps": -90.14446258544922, |
|
"loss": 0.0265, |
|
"objective": 0.025683369487524033, |
|
"ranking_idealized": 0.6499999761581421, |
|
"ranking_idealized_expo": 0.5666666626930237, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.025682510808110237, |
|
"step": 720 |
|
}, |
|
{ |
|
"dpo_loss": 0.6887207627296448, |
|
"epoch": 0.6849854342177781, |
|
"grad_norm": 75.04026894879733, |
|
"learning_rate": 8.313237547079252e-08, |
|
"logits": -2.10304594039917, |
|
"logps": -90.62553405761719, |
|
"loss": 0.0292, |
|
"objective": 0.029727067798376083, |
|
"ranking_idealized": 0.6833333373069763, |
|
"ranking_idealized_expo": 0.6166666746139526, |
|
"ranking_simple": 0.6166666746139526, |
|
"regularize": 0.0297266636043787, |
|
"step": 725 |
|
}, |
|
{ |
|
"dpo_loss": 0.6926788687705994, |
|
"epoch": 0.6897094716951421, |
|
"grad_norm": 67.22966096550593, |
|
"learning_rate": 8.282231796065213e-08, |
|
"logits": -2.1637871265411377, |
|
"logps": -91.91923522949219, |
|
"loss": 0.0265, |
|
"objective": 0.02672416716814041, |
|
"ranking_idealized": 0.6333333253860474, |
|
"ranking_idealized_expo": 0.5833333134651184, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.026723742485046387, |
|
"step": 730 |
|
}, |
|
{ |
|
"dpo_loss": 0.688262403011322, |
|
"epoch": 0.6944335091725061, |
|
"grad_norm": 64.56167756628024, |
|
"learning_rate": 8.251002650461411e-08, |
|
"logits": -2.1801397800445557, |
|
"logps": -93.63780212402344, |
|
"loss": 0.0294, |
|
"objective": 0.029570966958999634, |
|
"ranking_idealized": 0.5666666626930237, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.4833333194255829, |
|
"regularize": 0.029570437967777252, |
|
"step": 735 |
|
}, |
|
{ |
|
"dpo_loss": 0.6889380216598511, |
|
"epoch": 0.6991575466498701, |
|
"grad_norm": 76.20522666744934, |
|
"learning_rate": 8.219552235779577e-08, |
|
"logits": -2.1762733459472656, |
|
"logps": -93.22509765625, |
|
"loss": 0.0341, |
|
"objective": 0.03592396527528763, |
|
"ranking_idealized": 0.6333333253860474, |
|
"ranking_idealized_expo": 0.5666666626930237, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.035923395305871964, |
|
"step": 740 |
|
}, |
|
{ |
|
"dpo_loss": 0.6904739141464233, |
|
"epoch": 0.7038815841272341, |
|
"grad_norm": 61.170974083082186, |
|
"learning_rate": 8.187882692591406e-08, |
|
"logits": -2.148138999938965, |
|
"logps": -91.92343139648438, |
|
"loss": 0.0298, |
|
"objective": 0.027687864378094673, |
|
"ranking_idealized": 0.5, |
|
"ranking_idealized_expo": 0.4333333373069763, |
|
"ranking_simple": 0.4166666567325592, |
|
"regularize": 0.027687139809131622, |
|
"step": 745 |
|
}, |
|
{ |
|
"dpo_loss": 0.6924771666526794, |
|
"epoch": 0.7086056216045981, |
|
"grad_norm": 86.99376248944333, |
|
"learning_rate": 8.155996176382873e-08, |
|
"logits": -2.2314558029174805, |
|
"logps": -92.25162506103516, |
|
"loss": 0.0319, |
|
"objective": 0.033008575439453125, |
|
"ranking_idealized": 0.550000011920929, |
|
"ranking_idealized_expo": 0.46666666865348816, |
|
"ranking_simple": 0.4833333194255829, |
|
"regularize": 0.03300632908940315, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.7086056216045981, |
|
"eval_dpo_loss": 0.6925805807113647, |
|
"eval_logits": -1.9951562881469727, |
|
"eval_logps": -98.77120208740234, |
|
"eval_loss": 0.014676159247756004, |
|
"eval_objective": 0.014523538760840893, |
|
"eval_ranking_idealized": 0.6022099256515503, |
|
"eval_ranking_idealized_expo": 0.5207182168960571, |
|
"eval_ranking_simple": 0.5179557800292969, |
|
"eval_regularize": 0.014521739445626736, |
|
"eval_runtime": 446.8328, |
|
"eval_samples_per_second": 12.958, |
|
"eval_steps_per_second": 3.241, |
|
"step": 750 |
|
}, |
|
{ |
|
"dpo_loss": 0.6896133422851562, |
|
"epoch": 0.713329659081962, |
|
"grad_norm": 65.62950261910362, |
|
"learning_rate": 8.123894857407532e-08, |
|
"logits": -2.175105571746826, |
|
"logps": -92.83119201660156, |
|
"loss": 0.0297, |
|
"objective": 0.02945883385837078, |
|
"ranking_idealized": 0.5666666626930237, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.4833333194255829, |
|
"regularize": 0.029456783086061478, |
|
"step": 755 |
|
}, |
|
{ |
|
"dpo_loss": 0.6876399517059326, |
|
"epoch": 0.718053696559326, |
|
"grad_norm": 80.69728652803693, |
|
"learning_rate": 8.091580920538789e-08, |
|
"logits": -2.2073442935943604, |
|
"logps": -90.69680786132812, |
|
"loss": 0.0284, |
|
"objective": 0.029233213514089584, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.4333333373069763, |
|
"ranking_simple": 0.4333333373069763, |
|
"regularize": 0.029232406988739967, |
|
"step": 760 |
|
}, |
|
{ |
|
"dpo_loss": 0.6934041380882263, |
|
"epoch": 0.72277773403669, |
|
"grad_norm": 59.09750353750027, |
|
"learning_rate": 8.059056565121216e-08, |
|
"logits": -2.2103536128997803, |
|
"logps": -91.05927276611328, |
|
"loss": 0.0275, |
|
"objective": 0.026471644639968872, |
|
"ranking_idealized": 0.5666666626930237, |
|
"ranking_idealized_expo": 0.38333332538604736, |
|
"ranking_simple": 0.38333332538604736, |
|
"regularize": 0.02647002600133419, |
|
"step": 765 |
|
}, |
|
{ |
|
"dpo_loss": 0.6914010643959045, |
|
"epoch": 0.727501771514054, |
|
"grad_norm": 80.2131293813357, |
|
"learning_rate": 8.026324004820844e-08, |
|
"logits": -2.1993424892425537, |
|
"logps": -91.33180236816406, |
|
"loss": 0.0329, |
|
"objective": 0.03441242873668671, |
|
"ranking_idealized": 0.5333333611488342, |
|
"ranking_idealized_expo": 0.44999998807907104, |
|
"ranking_simple": 0.44999998807907104, |
|
"regularize": 0.03441200777888298, |
|
"step": 770 |
|
}, |
|
{ |
|
"dpo_loss": 0.693133533000946, |
|
"epoch": 0.732225808991418, |
|
"grad_norm": 65.96085059361222, |
|
"learning_rate": 7.993385467474502e-08, |
|
"logits": -2.2453505992889404, |
|
"logps": -94.63382720947266, |
|
"loss": 0.0376, |
|
"objective": 0.032590463757514954, |
|
"ranking_idealized": 0.5333333611488342, |
|
"ranking_idealized_expo": 0.46666666865348816, |
|
"ranking_simple": 0.4833333194255829, |
|
"regularize": 0.032589759677648544, |
|
"step": 775 |
|
}, |
|
{ |
|
"dpo_loss": 0.6881453394889832, |
|
"epoch": 0.736949846468782, |
|
"grad_norm": 66.36692143180971, |
|
"learning_rate": 7.960243194938191e-08, |
|
"logits": -2.1516549587249756, |
|
"logps": -94.29581451416016, |
|
"loss": 0.0322, |
|
"objective": 0.030854353681206703, |
|
"ranking_idealized": 0.5333333611488342, |
|
"ranking_idealized_expo": 0.44999998807907104, |
|
"ranking_simple": 0.44999998807907104, |
|
"regularize": 0.030853325501084328, |
|
"step": 780 |
|
}, |
|
{ |
|
"dpo_loss": 0.6893908381462097, |
|
"epoch": 0.741673883946146, |
|
"grad_norm": 73.24038753014604, |
|
"learning_rate": 7.926899442934488e-08, |
|
"logits": -2.1456098556518555, |
|
"logps": -93.58820343017578, |
|
"loss": 0.0301, |
|
"objective": 0.030529705807566643, |
|
"ranking_idealized": 0.6666666865348816, |
|
"ranking_idealized_expo": 0.5833333134651184, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.030527813360095024, |
|
"step": 785 |
|
}, |
|
{ |
|
"dpo_loss": 0.6873824000358582, |
|
"epoch": 0.7463979214235099, |
|
"grad_norm": 61.386663619909456, |
|
"learning_rate": 7.893356480899029e-08, |
|
"logits": -2.202815055847168, |
|
"logps": -89.73310089111328, |
|
"loss": 0.0284, |
|
"objective": 0.026303457096219063, |
|
"ranking_idealized": 0.7166666388511658, |
|
"ranking_idealized_expo": 0.6333333253860474, |
|
"ranking_simple": 0.6333333253860474, |
|
"regularize": 0.02630232647061348, |
|
"step": 790 |
|
}, |
|
{ |
|
"dpo_loss": 0.6941797733306885, |
|
"epoch": 0.751121958900874, |
|
"grad_norm": 74.12319085244714, |
|
"learning_rate": 7.85961659182604e-08, |
|
"logits": -2.2534494400024414, |
|
"logps": -92.2616195678711, |
|
"loss": 0.0317, |
|
"objective": 0.028791796416044235, |
|
"ranking_idealized": 0.6499999761581421, |
|
"ranking_idealized_expo": 0.6166666746139526, |
|
"ranking_simple": 0.6166666746139526, |
|
"regularize": 0.028791317716240883, |
|
"step": 795 |
|
}, |
|
{ |
|
"dpo_loss": 0.6879761815071106, |
|
"epoch": 0.7558459963782379, |
|
"grad_norm": 66.76187019896828, |
|
"learning_rate": 7.825682072112959e-08, |
|
"logits": -2.152491807937622, |
|
"logps": -90.43921661376953, |
|
"loss": 0.0297, |
|
"objective": 0.031770989298820496, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5666666626930237, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.031770527362823486, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.7558459963782379, |
|
"eval_dpo_loss": 0.6929048299789429, |
|
"eval_logits": -1.9949605464935303, |
|
"eval_logps": -98.13481903076172, |
|
"eval_loss": 0.015697013586759567, |
|
"eval_objective": 0.016285618767142296, |
|
"eval_ranking_idealized": 0.6022099256515503, |
|
"eval_ranking_idealized_expo": 0.5207182168960571, |
|
"eval_ranking_simple": 0.5186464190483093, |
|
"eval_regularize": 0.01628427766263485, |
|
"eval_runtime": 446.756, |
|
"eval_samples_per_second": 12.96, |
|
"eval_steps_per_second": 3.241, |
|
"step": 800 |
|
}, |
|
{ |
|
"dpo_loss": 0.6914661526679993, |
|
"epoch": 0.760570033855602, |
|
"grad_norm": 61.85563229584601, |
|
"learning_rate": 7.79155523140413e-08, |
|
"logits": -2.1741960048675537, |
|
"logps": -94.34522247314453, |
|
"loss": 0.0341, |
|
"objective": 0.03409460559487343, |
|
"ranking_idealized": 0.46666666865348816, |
|
"ranking_idealized_expo": 0.38333332538604736, |
|
"ranking_simple": 0.38333332538604736, |
|
"regularize": 0.03409397229552269, |
|
"step": 805 |
|
}, |
|
{ |
|
"dpo_loss": 0.6910237669944763, |
|
"epoch": 0.7652940713329659, |
|
"grad_norm": 66.02922525617878, |
|
"learning_rate": 7.757238392433613e-08, |
|
"logits": -2.218034267425537, |
|
"logps": -91.0445327758789, |
|
"loss": 0.0309, |
|
"objective": 0.02644220180809498, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.550000011920929, |
|
"ranking_simple": 0.550000011920929, |
|
"regularize": 0.026441721245646477, |
|
"step": 810 |
|
}, |
|
{ |
|
"dpo_loss": 0.6903732419013977, |
|
"epoch": 0.77001810881033, |
|
"grad_norm": 71.25069648841824, |
|
"learning_rate": 7.722733890867088e-08, |
|
"logits": -2.13299298286438, |
|
"logps": -94.09717559814453, |
|
"loss": 0.0306, |
|
"objective": 0.027522355318069458, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 0.027521483600139618, |
|
"step": 815 |
|
}, |
|
{ |
|
"dpo_loss": 0.692348062992096, |
|
"epoch": 0.7747421462876939, |
|
"grad_norm": 62.71232207694529, |
|
"learning_rate": 7.688044075142886e-08, |
|
"logits": -2.2638330459594727, |
|
"logps": -89.2739486694336, |
|
"loss": 0.0265, |
|
"objective": 0.024408848956227303, |
|
"ranking_idealized": 0.5666666626930237, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.5, |
|
"regularize": 0.024408036842942238, |
|
"step": 820 |
|
}, |
|
{ |
|
"dpo_loss": 0.6927106976509094, |
|
"epoch": 0.7794661837650578, |
|
"grad_norm": 68.11094417791882, |
|
"learning_rate": 7.653171306312161e-08, |
|
"logits": -2.155310869216919, |
|
"logps": -92.2811508178711, |
|
"loss": 0.0314, |
|
"objective": 0.029093213379383087, |
|
"ranking_idealized": 0.5333333611488342, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.4833333194255829, |
|
"regularize": 0.029092345386743546, |
|
"step": 825 |
|
}, |
|
{ |
|
"dpo_loss": 0.6911001801490784, |
|
"epoch": 0.7841902212424219, |
|
"grad_norm": 66.75059983982347, |
|
"learning_rate": 7.618117957878178e-08, |
|
"logits": -2.236713409423828, |
|
"logps": -93.50963592529297, |
|
"loss": 0.0363, |
|
"objective": 0.033676620572805405, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.46666666865348816, |
|
"ranking_simple": 0.4833333194255829, |
|
"regularize": 0.03367554768919945, |
|
"step": 830 |
|
}, |
|
{ |
|
"dpo_loss": 0.6890848278999329, |
|
"epoch": 0.7889142587197858, |
|
"grad_norm": 59.842310291910785, |
|
"learning_rate": 7.582886415634773e-08, |
|
"logits": -2.1434099674224854, |
|
"logps": -89.62670135498047, |
|
"loss": 0.0261, |
|
"objective": 0.032363876700401306, |
|
"ranking_idealized": 0.6333333253860474, |
|
"ranking_idealized_expo": 0.5833333134651184, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.032363247126340866, |
|
"step": 835 |
|
}, |
|
{ |
|
"dpo_loss": 0.6889583468437195, |
|
"epoch": 0.7936382961971499, |
|
"grad_norm": 60.528801427412084, |
|
"learning_rate": 7.547479077503975e-08, |
|
"logits": -2.0931692123413086, |
|
"logps": -90.27711486816406, |
|
"loss": 0.0286, |
|
"objective": 0.032331857830286026, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.550000011920929, |
|
"ranking_simple": 0.550000011920929, |
|
"regularize": 0.03233127295970917, |
|
"step": 840 |
|
}, |
|
{ |
|
"dpo_loss": 0.6943262815475464, |
|
"epoch": 0.7983623336745138, |
|
"grad_norm": 66.0119748602127, |
|
"learning_rate": 7.511898353372797e-08, |
|
"logits": -2.21136212348938, |
|
"logps": -91.64664459228516, |
|
"loss": 0.0279, |
|
"objective": 0.03553476184606552, |
|
"ranking_idealized": 0.6333333253860474, |
|
"ranking_idealized_expo": 0.5666666626930237, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.03553430363535881, |
|
"step": 845 |
|
}, |
|
{ |
|
"dpo_loss": 0.6893811821937561, |
|
"epoch": 0.8030863711518778, |
|
"grad_norm": 64.38574308982342, |
|
"learning_rate": 7.476146664929213e-08, |
|
"logits": -2.2435154914855957, |
|
"logps": -92.36274719238281, |
|
"loss": 0.0286, |
|
"objective": 0.0326698012650013, |
|
"ranking_idealized": 0.550000011920929, |
|
"ranking_idealized_expo": 0.44999998807907104, |
|
"ranking_simple": 0.46666666865348816, |
|
"regularize": 0.03266819566488266, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.8030863711518778, |
|
"eval_dpo_loss": 0.6928204894065857, |
|
"eval_logits": -1.9953876733779907, |
|
"eval_logps": -98.59400939941406, |
|
"eval_loss": 0.012405806221067905, |
|
"eval_objective": 0.012502364814281464, |
|
"eval_ranking_idealized": 0.6022099256515503, |
|
"eval_ranking_idealized_expo": 0.5207182168960571, |
|
"eval_ranking_simple": 0.5172652006149292, |
|
"eval_regularize": 0.012500518001616001, |
|
"eval_runtime": 453.0739, |
|
"eval_samples_per_second": 12.779, |
|
"eval_steps_per_second": 3.196, |
|
"step": 850 |
|
}, |
|
{ |
|
"dpo_loss": 0.6887614130973816, |
|
"epoch": 0.8078104086292418, |
|
"grad_norm": 68.55505260723746, |
|
"learning_rate": 7.440226445497333e-08, |
|
"logits": -2.201233386993408, |
|
"logps": -92.58832550048828, |
|
"loss": 0.0274, |
|
"objective": 0.026876337826251984, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5333333611488342, |
|
"regularize": 0.0268756952136755, |
|
"step": 855 |
|
}, |
|
{ |
|
"dpo_loss": 0.6904102563858032, |
|
"epoch": 0.8125344461066057, |
|
"grad_norm": 71.78389198214526, |
|
"learning_rate": 7.404140139871796e-08, |
|
"logits": -2.231065273284912, |
|
"logps": -95.48096466064453, |
|
"loss": 0.0317, |
|
"objective": 0.03271006420254707, |
|
"ranking_idealized": 0.6833333373069763, |
|
"ranking_idealized_expo": 0.550000011920929, |
|
"ranking_simple": 0.550000011920929, |
|
"regularize": 0.032709524035453796, |
|
"step": 860 |
|
}, |
|
{ |
|
"dpo_loss": 0.6954269409179688, |
|
"epoch": 0.8172584835839698, |
|
"grad_norm": 66.09070940831865, |
|
"learning_rate": 7.36789020415136e-08, |
|
"logits": -2.1372532844543457, |
|
"logps": -91.0411605834961, |
|
"loss": 0.027, |
|
"objective": 0.03049122728407383, |
|
"ranking_idealized": 0.7166666388511658, |
|
"ranking_idealized_expo": 0.6000000238418579, |
|
"ranking_simple": 0.6000000238418579, |
|
"regularize": 0.03049064427614212, |
|
"step": 865 |
|
}, |
|
{ |
|
"dpo_loss": 0.6953790187835693, |
|
"epoch": 0.8219825210613337, |
|
"grad_norm": 61.58311436161328, |
|
"learning_rate": 7.331479105571739e-08, |
|
"logits": -2.1517558097839355, |
|
"logps": -88.9326171875, |
|
"loss": 0.0281, |
|
"objective": 0.02535523846745491, |
|
"ranking_idealized": 0.5, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.5, |
|
"regularize": 0.025354215875267982, |
|
"step": 870 |
|
}, |
|
{ |
|
"dpo_loss": 0.6913577318191528, |
|
"epoch": 0.8267065585386978, |
|
"grad_norm": 61.45457019245544, |
|
"learning_rate": 7.294909322337688e-08, |
|
"logits": -2.0830719470977783, |
|
"logps": -95.3221664428711, |
|
"loss": 0.0267, |
|
"objective": 0.02792440913617611, |
|
"ranking_idealized": 0.6499999761581421, |
|
"ranking_idealized_expo": 0.6000000238418579, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.0279233455657959, |
|
"step": 875 |
|
}, |
|
{ |
|
"dpo_loss": 0.6916779279708862, |
|
"epoch": 0.8314305960160617, |
|
"grad_norm": 69.76363049930384, |
|
"learning_rate": 7.258183343454319e-08, |
|
"logits": -2.276218891143799, |
|
"logps": -91.77556610107422, |
|
"loss": 0.029, |
|
"objective": 0.02677006646990776, |
|
"ranking_idealized": 0.699999988079071, |
|
"ranking_idealized_expo": 0.5666666626930237, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.02676946483552456, |
|
"step": 880 |
|
}, |
|
{ |
|
"dpo_loss": 0.6923359036445618, |
|
"epoch": 0.8361546334934257, |
|
"grad_norm": 65.0950944876437, |
|
"learning_rate": 7.221303668557696e-08, |
|
"logits": -2.1599981784820557, |
|
"logps": -90.72624969482422, |
|
"loss": 0.025, |
|
"objective": 0.026336384937167168, |
|
"ranking_idealized": 0.5166666507720947, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.5, |
|
"regularize": 0.026335686445236206, |
|
"step": 885 |
|
}, |
|
{ |
|
"dpo_loss": 0.692168116569519, |
|
"epoch": 0.8408786709707897, |
|
"grad_norm": 63.609027029284135, |
|
"learning_rate": 7.184272807744725e-08, |
|
"logits": -2.1683857440948486, |
|
"logps": -92.93081665039062, |
|
"loss": 0.0278, |
|
"objective": 0.03211880847811699, |
|
"ranking_idealized": 0.5666666626930237, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 0.03211786970496178, |
|
"step": 890 |
|
}, |
|
{ |
|
"dpo_loss": 0.6949544548988342, |
|
"epoch": 0.8456027084481537, |
|
"grad_norm": 71.19347753450685, |
|
"learning_rate": 7.147093281402281e-08, |
|
"logits": -2.2566373348236084, |
|
"logps": -91.25569915771484, |
|
"loss": 0.0287, |
|
"objective": 0.022946473211050034, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.4833333194255829, |
|
"regularize": 0.022945420816540718, |
|
"step": 895 |
|
}, |
|
{ |
|
"dpo_loss": 0.6891117095947266, |
|
"epoch": 0.8503267459255177, |
|
"grad_norm": 73.13390434400947, |
|
"learning_rate": 7.109767620035688e-08, |
|
"logits": -2.1637258529663086, |
|
"logps": -95.48709869384766, |
|
"loss": 0.0285, |
|
"objective": 0.030570391565561295, |
|
"ranking_idealized": 0.7166666388511658, |
|
"ranking_idealized_expo": 0.6499999761581421, |
|
"ranking_simple": 0.6499999761581421, |
|
"regularize": 0.030569853261113167, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.8503267459255177, |
|
"eval_dpo_loss": 0.69291752576828, |
|
"eval_logits": -1.993115782737732, |
|
"eval_logps": -98.94220733642578, |
|
"eval_loss": 0.011713932268321514, |
|
"eval_objective": 0.011828156188130379, |
|
"eval_ranking_idealized": 0.6022099256515503, |
|
"eval_ranking_idealized_expo": 0.5207182168960571, |
|
"eval_ranking_simple": 0.5165745615959167, |
|
"eval_regularize": 0.01182604394853115, |
|
"eval_runtime": 445.945, |
|
"eval_samples_per_second": 12.984, |
|
"eval_steps_per_second": 3.247, |
|
"step": 900 |
|
}, |
|
{ |
|
"dpo_loss": 0.6910974383354187, |
|
"epoch": 0.8550507834028817, |
|
"grad_norm": 72.76821991685014, |
|
"learning_rate": 7.072298364096485e-08, |
|
"logits": -2.094447374343872, |
|
"logps": -89.45642852783203, |
|
"loss": 0.0266, |
|
"objective": 0.027411019429564476, |
|
"ranking_idealized": 0.5666666626930237, |
|
"ranking_idealized_expo": 0.550000011920929, |
|
"ranking_simple": 0.550000011920929, |
|
"regularize": 0.02741014026105404, |
|
"step": 905 |
|
}, |
|
{ |
|
"dpo_loss": 0.6891763806343079, |
|
"epoch": 0.8597748208802457, |
|
"grad_norm": 71.26536161303537, |
|
"learning_rate": 7.034688063809511e-08, |
|
"logits": -2.1282496452331543, |
|
"logps": -91.80699157714844, |
|
"loss": 0.0298, |
|
"objective": 0.025737237185239792, |
|
"ranking_idealized": 0.6333333253860474, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.5333333611488342, |
|
"regularize": 0.025736594572663307, |
|
"step": 910 |
|
}, |
|
{ |
|
"dpo_loss": 0.6862377524375916, |
|
"epoch": 0.8644988583576096, |
|
"grad_norm": 69.04287293810181, |
|
"learning_rate": 6.996939278999337e-08, |
|
"logits": -2.152179479598999, |
|
"logps": -94.09297180175781, |
|
"loss": 0.0263, |
|
"objective": 0.027985723689198494, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.4833333194255829, |
|
"regularize": 0.02798387221992016, |
|
"step": 915 |
|
}, |
|
{ |
|
"dpo_loss": 0.690563440322876, |
|
"epoch": 0.8692228958349736, |
|
"grad_norm": 60.9102612710031, |
|
"learning_rate": 6.959054578916042e-08, |
|
"logits": -2.1106715202331543, |
|
"logps": -90.7065658569336, |
|
"loss": 0.0281, |
|
"objective": 0.02792646363377571, |
|
"ranking_idealized": 0.6666666865348816, |
|
"ranking_idealized_expo": 0.5833333134651184, |
|
"ranking_simple": 0.6000000238418579, |
|
"regularize": 0.02792549505829811, |
|
"step": 920 |
|
}, |
|
{ |
|
"dpo_loss": 0.6911750435829163, |
|
"epoch": 0.8739469333123376, |
|
"grad_norm": 62.2660390306647, |
|
"learning_rate": 6.921036542060343e-08, |
|
"logits": -1.9987537860870361, |
|
"logps": -89.90222930908203, |
|
"loss": 0.0251, |
|
"objective": 0.01876661367714405, |
|
"ranking_idealized": 0.6833333373069763, |
|
"ranking_idealized_expo": 0.6333333253860474, |
|
"ranking_simple": 0.6333333253860474, |
|
"regularize": 0.018765147775411606, |
|
"step": 925 |
|
}, |
|
{ |
|
"dpo_loss": 0.6897457242012024, |
|
"epoch": 0.8786709707897016, |
|
"grad_norm": 59.1678071537183, |
|
"learning_rate": 6.882887756008093e-08, |
|
"logits": -2.111668825149536, |
|
"logps": -87.85643768310547, |
|
"loss": 0.0257, |
|
"objective": 0.02605438232421875, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.550000011920929, |
|
"ranking_simple": 0.550000011920929, |
|
"regularize": 0.026053914800286293, |
|
"step": 930 |
|
}, |
|
{ |
|
"dpo_loss": 0.6957460045814514, |
|
"epoch": 0.8833950082670656, |
|
"grad_norm": 64.76262904725678, |
|
"learning_rate": 6.844610817234172e-08, |
|
"logits": -2.093857765197754, |
|
"logps": -92.8622055053711, |
|
"loss": 0.0273, |
|
"objective": 0.027011338621377945, |
|
"ranking_idealized": 0.5333333611488342, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.46666666865348816, |
|
"regularize": 0.027010958641767502, |
|
"step": 935 |
|
}, |
|
{ |
|
"dpo_loss": 0.6905789971351624, |
|
"epoch": 0.8881190457444296, |
|
"grad_norm": 70.7130564090627, |
|
"learning_rate": 6.806208330935765e-08, |
|
"logits": -2.2473738193511963, |
|
"logps": -91.84986114501953, |
|
"loss": 0.0268, |
|
"objective": 0.027645627036690712, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5666666626930237, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.02764386683702469, |
|
"step": 940 |
|
}, |
|
{ |
|
"dpo_loss": 0.6940723061561584, |
|
"epoch": 0.8928430832217935, |
|
"grad_norm": 60.88887279226543, |
|
"learning_rate": 6.767682910855045e-08, |
|
"logits": -2.287950038909912, |
|
"logps": -89.53514862060547, |
|
"loss": 0.0261, |
|
"objective": 0.028092078864574432, |
|
"ranking_idealized": 0.550000011920929, |
|
"ranking_idealized_expo": 0.46666666865348816, |
|
"ranking_simple": 0.46666666865348816, |
|
"regularize": 0.0280916728079319, |
|
"step": 945 |
|
}, |
|
{ |
|
"dpo_loss": 0.694039523601532, |
|
"epoch": 0.8975671206991576, |
|
"grad_norm": 67.50050200668711, |
|
"learning_rate": 6.729037179101287e-08, |
|
"logits": -2.304736614227295, |
|
"logps": -93.9173812866211, |
|
"loss": 0.0248, |
|
"objective": 0.02201911062002182, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.46666666865348816, |
|
"ranking_simple": 0.4833333194255829, |
|
"regularize": 0.02201777510344982, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.8975671206991576, |
|
"eval_dpo_loss": 0.6931836009025574, |
|
"eval_logits": -1.9902262687683105, |
|
"eval_logps": -98.64472198486328, |
|
"eval_loss": 0.015600171871483326, |
|
"eval_objective": 0.015454174019396305, |
|
"eval_ranking_idealized": 0.6022099256515503, |
|
"eval_ranking_idealized_expo": 0.5207182168960571, |
|
"eval_ranking_simple": 0.5172652006149292, |
|
"eval_regularize": 0.015452706255018711, |
|
"eval_runtime": 446.0966, |
|
"eval_samples_per_second": 12.979, |
|
"eval_steps_per_second": 3.246, |
|
"step": 950 |
|
}, |
|
{ |
|
"dpo_loss": 0.6894887089729309, |
|
"epoch": 0.9022911581765215, |
|
"grad_norm": 62.475301521026935, |
|
"learning_rate": 6.690273765972383e-08, |
|
"logits": -2.1381261348724365, |
|
"logps": -90.50852966308594, |
|
"loss": 0.0247, |
|
"objective": 0.029864691197872162, |
|
"ranking_idealized": 0.7166666388511658, |
|
"ranking_idealized_expo": 0.5833333134651184, |
|
"ranking_simple": 0.6000000238418579, |
|
"regularize": 0.02986370399594307, |
|
"step": 955 |
|
}, |
|
{ |
|
"dpo_loss": 0.6904736161231995, |
|
"epoch": 0.9070151956538856, |
|
"grad_norm": 71.36008054276485, |
|
"learning_rate": 6.651395309775836e-08, |
|
"logits": -2.161102294921875, |
|
"logps": -94.71805572509766, |
|
"loss": 0.0273, |
|
"objective": 0.03055974654853344, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5833333134651184, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.03055933117866516, |
|
"step": 960 |
|
}, |
|
{ |
|
"dpo_loss": 0.6911565661430359, |
|
"epoch": 0.9117392331312495, |
|
"grad_norm": 75.9289905899972, |
|
"learning_rate": 6.612404456649187e-08, |
|
"logits": -2.174187660217285, |
|
"logps": -90.80412292480469, |
|
"loss": 0.0255, |
|
"objective": 0.02420150302350521, |
|
"ranking_idealized": 0.550000011920929, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 0.024200933054089546, |
|
"step": 965 |
|
}, |
|
{ |
|
"dpo_loss": 0.6908090114593506, |
|
"epoch": 0.9164632706086135, |
|
"grad_norm": 66.41988248188461, |
|
"learning_rate": 6.573303860379914e-08, |
|
"logits": -2.258518695831299, |
|
"logps": -91.59303283691406, |
|
"loss": 0.0271, |
|
"objective": 0.02120455540716648, |
|
"ranking_idealized": 0.5, |
|
"ranking_idealized_expo": 0.44999998807907104, |
|
"ranking_simple": 0.4333333373069763, |
|
"regularize": 0.02120377868413925, |
|
"step": 970 |
|
}, |
|
{ |
|
"dpo_loss": 0.686578631401062, |
|
"epoch": 0.9211873080859775, |
|
"grad_norm": 68.57226169783617, |
|
"learning_rate": 6.534096182224808e-08, |
|
"logits": -2.0389044284820557, |
|
"logps": -94.76436614990234, |
|
"loss": 0.0299, |
|
"objective": 0.029232459142804146, |
|
"ranking_idealized": 0.4833333194255829, |
|
"ranking_idealized_expo": 0.4000000059604645, |
|
"ranking_simple": 0.4000000059604645, |
|
"regularize": 0.029231999069452286, |
|
"step": 975 |
|
}, |
|
{ |
|
"dpo_loss": 0.6919539570808411, |
|
"epoch": 0.9259113455633414, |
|
"grad_norm": 69.41679502492849, |
|
"learning_rate": 6.494784090728851e-08, |
|
"logits": -2.2500946521759033, |
|
"logps": -96.09117126464844, |
|
"loss": 0.0262, |
|
"objective": 0.026704249903559685, |
|
"ranking_idealized": 0.6499999761581421, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.5333333611488342, |
|
"regularize": 0.0267037320882082, |
|
"step": 980 |
|
}, |
|
{ |
|
"dpo_loss": 0.6925919055938721, |
|
"epoch": 0.9306353830407055, |
|
"grad_norm": 64.18653487464074, |
|
"learning_rate": 6.455370261543578e-08, |
|
"logits": -2.1756606101989746, |
|
"logps": -93.87403106689453, |
|
"loss": 0.0251, |
|
"objective": 0.025608109310269356, |
|
"ranking_idealized": 0.6833333373069763, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 0.025606883689761162, |
|
"step": 985 |
|
}, |
|
{ |
|
"dpo_loss": 0.6908634901046753, |
|
"epoch": 0.9353594205180694, |
|
"grad_norm": 61.06423988779827, |
|
"learning_rate": 6.415857377244979e-08, |
|
"logits": -2.1095356941223145, |
|
"logps": -88.83150482177734, |
|
"loss": 0.0251, |
|
"objective": 0.026077650487422943, |
|
"ranking_idealized": 0.5666666626930237, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.5333333611488342, |
|
"regularize": 0.02607729658484459, |
|
"step": 990 |
|
}, |
|
{ |
|
"dpo_loss": 0.689622700214386, |
|
"epoch": 0.9400834579954335, |
|
"grad_norm": 68.1910690637634, |
|
"learning_rate": 6.376248127150908e-08, |
|
"logits": -2.150278329849243, |
|
"logps": -91.8506088256836, |
|
"loss": 0.0269, |
|
"objective": 0.025722531601786613, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5666666626930237, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.025721782818436623, |
|
"step": 995 |
|
}, |
|
{ |
|
"dpo_loss": 0.6903151273727417, |
|
"epoch": 0.9448074954727974, |
|
"grad_norm": 64.10384868193276, |
|
"learning_rate": 6.33654520713805e-08, |
|
"logits": -2.1098899841308594, |
|
"logps": -93.50405883789062, |
|
"loss": 0.0272, |
|
"objective": 0.02512853965163231, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.5333333611488342, |
|
"regularize": 0.0251275934278965, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.9448074954727974, |
|
"eval_dpo_loss": 0.6931213736534119, |
|
"eval_logits": -1.9906424283981323, |
|
"eval_logps": -98.12418365478516, |
|
"eval_loss": 0.01257664430886507, |
|
"eval_objective": 0.012785565108060837, |
|
"eval_ranking_idealized": 0.6022099256515503, |
|
"eval_ranking_idealized_expo": 0.5207182168960571, |
|
"eval_ranking_simple": 0.5179557800292969, |
|
"eval_regularize": 0.012783819809556007, |
|
"eval_runtime": 444.658, |
|
"eval_samples_per_second": 13.021, |
|
"eval_steps_per_second": 3.256, |
|
"step": 1000 |
|
}, |
|
{ |
|
"dpo_loss": 0.6847264170646667, |
|
"epoch": 0.9495315329501613, |
|
"grad_norm": 68.90168504337811, |
|
"learning_rate": 6.296751319458434e-08, |
|
"logits": -2.1259357929229736, |
|
"logps": -91.41114044189453, |
|
"loss": 0.0298, |
|
"objective": 0.03322311118245125, |
|
"ranking_idealized": 0.5666666626930237, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.5, |
|
"regularize": 0.03322221338748932, |
|
"step": 1005 |
|
}, |
|
{ |
|
"dpo_loss": 0.6920087933540344, |
|
"epoch": 0.9542555704275254, |
|
"grad_norm": 64.89339434605618, |
|
"learning_rate": 6.256869172555513e-08, |
|
"logits": -2.1444926261901855, |
|
"logps": -91.69261932373047, |
|
"loss": 0.0264, |
|
"objective": 0.02593044377863407, |
|
"ranking_idealized": 0.4833333194255829, |
|
"ranking_idealized_expo": 0.44999998807907104, |
|
"ranking_simple": 0.4333333373069763, |
|
"regularize": 0.025929344817996025, |
|
"step": 1010 |
|
}, |
|
{ |
|
"dpo_loss": 0.6885382533073425, |
|
"epoch": 0.9589796079048893, |
|
"grad_norm": 71.3368687595913, |
|
"learning_rate": 6.216901480879819e-08, |
|
"logits": -2.0881664752960205, |
|
"logps": -90.8614501953125, |
|
"loss": 0.0236, |
|
"objective": 0.021669141948223114, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 0.021668575704097748, |
|
"step": 1015 |
|
}, |
|
{ |
|
"dpo_loss": 0.6919686794281006, |
|
"epoch": 0.9637036453822534, |
|
"grad_norm": 68.26105061311512, |
|
"learning_rate": 6.176850964704212e-08, |
|
"logits": -2.129828453063965, |
|
"logps": -95.18238830566406, |
|
"loss": 0.0226, |
|
"objective": 0.025728456676006317, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5833333134651184, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.02572786808013916, |
|
"step": 1020 |
|
}, |
|
{ |
|
"dpo_loss": 0.6866704225540161, |
|
"epoch": 0.9684276828596173, |
|
"grad_norm": 61.40903124064667, |
|
"learning_rate": 6.136720349938743e-08, |
|
"logits": -2.2576215267181396, |
|
"logps": -94.10470581054688, |
|
"loss": 0.0257, |
|
"objective": 0.022323768585920334, |
|
"ranking_idealized": 0.5666666626930237, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 0.022323111072182655, |
|
"step": 1025 |
|
}, |
|
{ |
|
"dpo_loss": 0.69089674949646, |
|
"epoch": 0.9731517203369814, |
|
"grad_norm": 63.691297756435006, |
|
"learning_rate": 6.096512367945113e-08, |
|
"logits": -2.113276243209839, |
|
"logps": -90.31819152832031, |
|
"loss": 0.0244, |
|
"objective": 0.022346744313836098, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.5333333611488342, |
|
"regularize": 0.02234587073326111, |
|
"step": 1030 |
|
}, |
|
{ |
|
"dpo_loss": 0.6910406351089478, |
|
"epoch": 0.9778757578143453, |
|
"grad_norm": 65.44632044043543, |
|
"learning_rate": 6.056229755350772e-08, |
|
"logits": -2.147958517074585, |
|
"logps": -93.92337036132812, |
|
"loss": 0.0234, |
|
"objective": 0.02298605814576149, |
|
"ranking_idealized": 0.6833333373069763, |
|
"ranking_idealized_expo": 0.6166666746139526, |
|
"ranking_simple": 0.6166666746139526, |
|
"regularize": 0.022985396906733513, |
|
"step": 1035 |
|
}, |
|
{ |
|
"dpo_loss": 0.68792724609375, |
|
"epoch": 0.9825997952917093, |
|
"grad_norm": 76.3390109961412, |
|
"learning_rate": 6.01587525386267e-08, |
|
"logits": -2.1341538429260254, |
|
"logps": -90.12808227539062, |
|
"loss": 0.0253, |
|
"objective": 0.02918338030576706, |
|
"ranking_idealized": 0.5333333611488342, |
|
"ranking_idealized_expo": 0.46666666865348816, |
|
"ranking_simple": 0.46666666865348816, |
|
"regularize": 0.029182856902480125, |
|
"step": 1040 |
|
}, |
|
{ |
|
"dpo_loss": 0.6883952021598816, |
|
"epoch": 0.9873238327690733, |
|
"grad_norm": 62.771275862582634, |
|
"learning_rate": 5.975451610080642e-08, |
|
"logits": -2.1016061305999756, |
|
"logps": -92.14013671875, |
|
"loss": 0.0243, |
|
"objective": 0.02377927675843239, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.4833333194255829, |
|
"regularize": 0.023778444156050682, |
|
"step": 1045 |
|
}, |
|
{ |
|
"dpo_loss": 0.6908401846885681, |
|
"epoch": 0.9920478702464373, |
|
"grad_norm": 61.00213004445293, |
|
"learning_rate": 5.9349615753104655e-08, |
|
"logits": -2.1279587745666504, |
|
"logps": -97.24657440185547, |
|
"loss": 0.0215, |
|
"objective": 0.021991008892655373, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5666666626930237, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.02199002355337143, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.9920478702464373, |
|
"eval_dpo_loss": 0.6927458047866821, |
|
"eval_logits": -1.991100788116455, |
|
"eval_logps": -98.33568572998047, |
|
"eval_loss": 0.01325704250484705, |
|
"eval_objective": 0.013452271930873394, |
|
"eval_ranking_idealized": 0.6022099256515503, |
|
"eval_ranking_idealized_expo": 0.5207182168960571, |
|
"eval_ranking_simple": 0.5179557800292969, |
|
"eval_regularize": 0.01345061045140028, |
|
"eval_runtime": 451.6429, |
|
"eval_samples_per_second": 12.82, |
|
"eval_steps_per_second": 3.206, |
|
"step": 1050 |
|
}, |
|
{ |
|
"dpo_loss": 0.6905434131622314, |
|
"epoch": 0.9967719077238013, |
|
"grad_norm": 69.09205896680123, |
|
"learning_rate": 5.894407905376616e-08, |
|
"logits": -2.2125723361968994, |
|
"logps": -90.43359375, |
|
"loss": 0.0256, |
|
"objective": 0.022856025025248528, |
|
"ranking_idealized": 0.550000011920929, |
|
"ranking_idealized_expo": 0.46666666865348816, |
|
"ranking_simple": 0.46666666865348816, |
|
"regularize": 0.022855514660477638, |
|
"step": 1055 |
|
}, |
|
{ |
|
"dpo_loss": 0.6896554231643677, |
|
"epoch": 1.0014959452011654, |
|
"grad_norm": 71.18743706384133, |
|
"learning_rate": 5.853793360434687e-08, |
|
"logits": -2.095319986343384, |
|
"logps": -92.04082489013672, |
|
"loss": 0.0261, |
|
"objective": 0.025890907272696495, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.550000011920929, |
|
"ranking_simple": 0.550000011920929, |
|
"regularize": 0.025890160351991653, |
|
"step": 1060 |
|
}, |
|
{ |
|
"dpo_loss": 0.6926552057266235, |
|
"epoch": 1.0062199826785292, |
|
"grad_norm": 68.03463485699294, |
|
"learning_rate": 5.813120704783539e-08, |
|
"logits": -2.1974759101867676, |
|
"logps": -91.99174499511719, |
|
"loss": 0.0258, |
|
"objective": 0.026935292407870293, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5666666626930237, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.026934677734971046, |
|
"step": 1065 |
|
}, |
|
{ |
|
"dpo_loss": 0.6922659277915955, |
|
"epoch": 1.0109440201558932, |
|
"grad_norm": 84.22043191701646, |
|
"learning_rate": 5.772392706677148e-08, |
|
"logits": -2.0366082191467285, |
|
"logps": -93.32905578613281, |
|
"loss": 0.0257, |
|
"objective": 0.026561260223388672, |
|
"ranking_idealized": 0.6333333253860474, |
|
"ranking_idealized_expo": 0.5666666626930237, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.026560688391327858, |
|
"step": 1070 |
|
}, |
|
{ |
|
"dpo_loss": 0.6917497515678406, |
|
"epoch": 1.0156680576332573, |
|
"grad_norm": 73.42469612086076, |
|
"learning_rate": 5.7316121381361984e-08, |
|
"logits": -2.204793691635132, |
|
"logps": -94.83844757080078, |
|
"loss": 0.0269, |
|
"objective": 0.029431190341711044, |
|
"ranking_idealized": 0.5666666626930237, |
|
"ranking_idealized_expo": 0.4333333373069763, |
|
"ranking_simple": 0.46666666865348816, |
|
"regularize": 0.029430601745843887, |
|
"step": 1075 |
|
}, |
|
{ |
|
"dpo_loss": 0.683849573135376, |
|
"epoch": 1.0203920951106211, |
|
"grad_norm": 69.22367476602035, |
|
"learning_rate": 5.690781774759411e-08, |
|
"logits": -2.2117373943328857, |
|
"logps": -94.56703186035156, |
|
"loss": 0.0276, |
|
"objective": 0.030149787664413452, |
|
"ranking_idealized": 0.5333333611488342, |
|
"ranking_idealized_expo": 0.44999998807907104, |
|
"ranking_simple": 0.46666666865348816, |
|
"regularize": 0.030148986726999283, |
|
"step": 1080 |
|
}, |
|
{ |
|
"dpo_loss": 0.6918656229972839, |
|
"epoch": 1.0251161325879852, |
|
"grad_norm": 67.88816058800367, |
|
"learning_rate": 5.649904395534636e-08, |
|
"logits": -2.1058478355407715, |
|
"logps": -94.81607055664062, |
|
"loss": 0.0249, |
|
"objective": 0.025079350918531418, |
|
"ranking_idealized": 0.550000011920929, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.4833333194255829, |
|
"regularize": 0.025078853592276573, |
|
"step": 1085 |
|
}, |
|
{ |
|
"dpo_loss": 0.6931339502334595, |
|
"epoch": 1.0298401700653492, |
|
"grad_norm": 71.9727166820056, |
|
"learning_rate": 5.6089827826497026e-08, |
|
"logits": -2.2008562088012695, |
|
"logps": -93.15959930419922, |
|
"loss": 0.0233, |
|
"objective": 0.024377651512622833, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.4333333373069763, |
|
"ranking_simple": 0.44999998807907104, |
|
"regularize": 0.024377118796110153, |
|
"step": 1090 |
|
}, |
|
{ |
|
"dpo_loss": 0.6928921341896057, |
|
"epoch": 1.0345642075427133, |
|
"grad_norm": 64.563119648666, |
|
"learning_rate": 5.568019721303068e-08, |
|
"logits": -2.146667957305908, |
|
"logps": -95.26697540283203, |
|
"loss": 0.0232, |
|
"objective": 0.022780917584896088, |
|
"ranking_idealized": 0.5666666626930237, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 0.022780440747737885, |
|
"step": 1095 |
|
}, |
|
{ |
|
"dpo_loss": 0.6914113759994507, |
|
"epoch": 1.039288245020077, |
|
"grad_norm": 70.34249089351219, |
|
"learning_rate": 5.527017999514239e-08, |
|
"logits": -2.1238293647766113, |
|
"logps": -90.81373596191406, |
|
"loss": 0.0242, |
|
"objective": 0.019299499690532684, |
|
"ranking_idealized": 0.550000011920929, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.5, |
|
"regularize": 0.019298112019896507, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.039288245020077, |
|
"eval_dpo_loss": 0.692744791507721, |
|
"eval_logits": -1.9881205558776855, |
|
"eval_logps": -98.5120849609375, |
|
"eval_loss": 0.012820076197385788, |
|
"eval_objective": 0.012685425579547882, |
|
"eval_ranking_idealized": 0.6022099256515503, |
|
"eval_ranking_idealized_expo": 0.5207182168960571, |
|
"eval_ranking_simple": 0.5179557800292969, |
|
"eval_regularize": 0.012683761306107044, |
|
"eval_runtime": 445.6772, |
|
"eval_samples_per_second": 12.991, |
|
"eval_steps_per_second": 3.249, |
|
"step": 1100 |
|
}, |
|
{ |
|
"dpo_loss": 0.6895564198493958, |
|
"epoch": 1.0440122824974412, |
|
"grad_norm": 69.83976607143053, |
|
"learning_rate": 5.4859804079340266e-08, |
|
"logits": -2.158614158630371, |
|
"logps": -88.3459243774414, |
|
"loss": 0.0234, |
|
"objective": 0.024073513224720955, |
|
"ranking_idealized": 0.5333333611488342, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.4833333194255829, |
|
"regularize": 0.024072829633951187, |
|
"step": 1105 |
|
}, |
|
{ |
|
"dpo_loss": 0.6888210773468018, |
|
"epoch": 1.0487363199748052, |
|
"grad_norm": 78.2561714772354, |
|
"learning_rate": 5.444909739654602e-08, |
|
"logits": -2.2234978675842285, |
|
"logps": -92.2721939086914, |
|
"loss": 0.025, |
|
"objective": 0.025087477639317513, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.4333333373069763, |
|
"ranking_simple": 0.44999998807907104, |
|
"regularize": 0.025086527690291405, |
|
"step": 1110 |
|
}, |
|
{ |
|
"dpo_loss": 0.6934700608253479, |
|
"epoch": 1.053460357452169, |
|
"grad_norm": 67.69947794878017, |
|
"learning_rate": 5.4038087900193974e-08, |
|
"logits": -2.0514726638793945, |
|
"logps": -92.03214263916016, |
|
"loss": 0.0243, |
|
"objective": 0.020962979644536972, |
|
"ranking_idealized": 0.5666666626930237, |
|
"ranking_idealized_expo": 0.44999998807907104, |
|
"ranking_simple": 0.44999998807907104, |
|
"regularize": 0.020961163565516472, |
|
"step": 1115 |
|
}, |
|
{ |
|
"dpo_loss": 0.6921118497848511, |
|
"epoch": 1.058184394929533, |
|
"grad_norm": 60.76759737021519, |
|
"learning_rate": 5.362680356432846e-08, |
|
"logits": -2.082772731781006, |
|
"logps": -91.51958465576172, |
|
"loss": 0.0226, |
|
"objective": 0.02336716279387474, |
|
"ranking_idealized": 0.5333333611488342, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.4833333194255829, |
|
"regularize": 0.023366322740912437, |
|
"step": 1120 |
|
}, |
|
{ |
|
"dpo_loss": 0.6900876760482788, |
|
"epoch": 1.0629084324068971, |
|
"grad_norm": 89.61755393855444, |
|
"learning_rate": 5.321527238169992e-08, |
|
"logits": -2.137908935546875, |
|
"logps": -94.91239166259766, |
|
"loss": 0.0258, |
|
"objective": 0.0195107851177454, |
|
"ranking_idealized": 0.550000011920929, |
|
"ranking_idealized_expo": 0.46666666865348816, |
|
"ranking_simple": 0.46666666865348816, |
|
"regularize": 0.01950863003730774, |
|
"step": 1125 |
|
}, |
|
{ |
|
"dpo_loss": 0.6918936371803284, |
|
"epoch": 1.067632469884261, |
|
"grad_norm": 60.32020595039731, |
|
"learning_rate": 5.280352236185959e-08, |
|
"logits": -2.223163604736328, |
|
"logps": -94.0035629272461, |
|
"loss": 0.0198, |
|
"objective": 0.020925112068653107, |
|
"ranking_idealized": 0.5666666626930237, |
|
"ranking_idealized_expo": 0.550000011920929, |
|
"ranking_simple": 0.550000011920929, |
|
"regularize": 0.020924491807818413, |
|
"step": 1130 |
|
}, |
|
{ |
|
"dpo_loss": 0.6936992406845093, |
|
"epoch": 1.072356507361625, |
|
"grad_norm": 68.85146603912719, |
|
"learning_rate": 5.239158152925319e-08, |
|
"logits": -2.089085102081299, |
|
"logps": -89.82282257080078, |
|
"loss": 0.0232, |
|
"objective": 0.02565601095557213, |
|
"ranking_idealized": 0.5333333611488342, |
|
"ranking_idealized_expo": 0.46666666865348816, |
|
"ranking_simple": 0.46666666865348816, |
|
"regularize": 0.025654161348938942, |
|
"step": 1135 |
|
}, |
|
{ |
|
"dpo_loss": 0.6891080737113953, |
|
"epoch": 1.077080544838989, |
|
"grad_norm": 63.283810030024625, |
|
"learning_rate": 5.197947792131348e-08, |
|
"logits": -2.201981782913208, |
|
"logps": -93.19425964355469, |
|
"loss": 0.0252, |
|
"objective": 0.02514718845486641, |
|
"ranking_idealized": 0.6666666865348816, |
|
"ranking_idealized_expo": 0.5833333134651184, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.025146154686808586, |
|
"step": 1140 |
|
}, |
|
{ |
|
"dpo_loss": 0.6902083158493042, |
|
"epoch": 1.0818045823163531, |
|
"grad_norm": 65.2698459996647, |
|
"learning_rate": 5.1567239586552e-08, |
|
"logits": -2.185304880142212, |
|
"logps": -90.79157257080078, |
|
"loss": 0.0219, |
|
"objective": 0.021113820374011993, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 0.021113011986017227, |
|
"step": 1145 |
|
}, |
|
{ |
|
"dpo_loss": 0.6910417675971985, |
|
"epoch": 1.086528619793717, |
|
"grad_norm": 65.02233557080358, |
|
"learning_rate": 5.115489458265005e-08, |
|
"logits": -2.1189420223236084, |
|
"logps": -94.17777252197266, |
|
"loss": 0.0248, |
|
"objective": 0.023960812017321587, |
|
"ranking_idealized": 0.5666666626930237, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.5333333611488342, |
|
"regularize": 0.02395929954946041, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 1.086528619793717, |
|
"eval_dpo_loss": 0.6928678154945374, |
|
"eval_logits": -1.9900000095367432, |
|
"eval_logps": -98.37399291992188, |
|
"eval_loss": 0.01211391482502222, |
|
"eval_objective": 0.012414646334946156, |
|
"eval_ranking_idealized": 0.6022099256515503, |
|
"eval_ranking_idealized_expo": 0.5207182168960571, |
|
"eval_ranking_simple": 0.5179557800292969, |
|
"eval_regularize": 0.0124129019677639, |
|
"eval_runtime": 446.4802, |
|
"eval_samples_per_second": 12.968, |
|
"eval_steps_per_second": 3.243, |
|
"step": 1150 |
|
}, |
|
{ |
|
"dpo_loss": 0.6903362274169922, |
|
"epoch": 1.091252657271081, |
|
"grad_norm": 65.0696209993928, |
|
"learning_rate": 5.0742470974549036e-08, |
|
"logits": -2.100759744644165, |
|
"logps": -90.93352508544922, |
|
"loss": 0.0233, |
|
"objective": 0.020316295325756073, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.5333333611488342, |
|
"regularize": 0.020315730944275856, |
|
"step": 1155 |
|
}, |
|
{ |
|
"dpo_loss": 0.6921348571777344, |
|
"epoch": 1.095976694748445, |
|
"grad_norm": 68.18545652779869, |
|
"learning_rate": 5.032999683254028e-08, |
|
"logits": -2.1389288902282715, |
|
"logps": -90.10498046875, |
|
"loss": 0.0247, |
|
"objective": 0.021737979725003242, |
|
"ranking_idealized": 0.6499999761581421, |
|
"ranking_idealized_expo": 0.5833333134651184, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.021737171337008476, |
|
"step": 1160 |
|
}, |
|
{ |
|
"dpo_loss": 0.69456547498703, |
|
"epoch": 1.100700732225809, |
|
"grad_norm": 73.7471857905923, |
|
"learning_rate": 4.991750023035455e-08, |
|
"logits": -2.124562978744507, |
|
"logps": -90.93301391601562, |
|
"loss": 0.024, |
|
"objective": 0.02404080517590046, |
|
"ranking_idealized": 0.6666666865348816, |
|
"ranking_idealized_expo": 0.6000000238418579, |
|
"ranking_simple": 0.6000000238418579, |
|
"regularize": 0.02403992973268032, |
|
"step": 1165 |
|
}, |
|
{ |
|
"dpo_loss": 0.6940571069717407, |
|
"epoch": 1.105424769703173, |
|
"grad_norm": 67.32632108670151, |
|
"learning_rate": 4.950500924325127e-08, |
|
"logits": -2.2072455883026123, |
|
"logps": -94.81490325927734, |
|
"loss": 0.0255, |
|
"objective": 0.026444217190146446, |
|
"ranking_idealized": 0.4833333194255829, |
|
"ranking_idealized_expo": 0.4166666567325592, |
|
"ranking_simple": 0.4333333373069763, |
|
"regularize": 0.026443663984537125, |
|
"step": 1170 |
|
}, |
|
{ |
|
"dpo_loss": 0.692010223865509, |
|
"epoch": 1.110148807180537, |
|
"grad_norm": 60.40484735228916, |
|
"learning_rate": 4.909255194610773e-08, |
|
"logits": -2.1622235774993896, |
|
"logps": -90.50462341308594, |
|
"loss": 0.0217, |
|
"objective": 0.01786196231842041, |
|
"ranking_idealized": 0.5333333611488342, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.5, |
|
"regularize": 0.01785971410572529, |
|
"step": 1175 |
|
}, |
|
{ |
|
"dpo_loss": 0.6931031942367554, |
|
"epoch": 1.114872844657901, |
|
"grad_norm": 59.38906623535502, |
|
"learning_rate": 4.8680156411508193e-08, |
|
"logits": -2.16975474357605, |
|
"logps": -89.3101806640625, |
|
"loss": 0.0243, |
|
"objective": 0.01995784044265747, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.44999998807907104, |
|
"ranking_simple": 0.44999998807907104, |
|
"regularize": 0.019954947754740715, |
|
"step": 1180 |
|
}, |
|
{ |
|
"dpo_loss": 0.690836489200592, |
|
"epoch": 1.1195968821352649, |
|
"grad_norm": 68.89482324651908, |
|
"learning_rate": 4.826785070783326e-08, |
|
"logits": -2.058103084564209, |
|
"logps": -91.24579620361328, |
|
"loss": 0.0233, |
|
"objective": 0.022483140230178833, |
|
"ranking_idealized": 0.6499999761581421, |
|
"ranking_idealized_expo": 0.6166666746139526, |
|
"ranking_simple": 0.6166666746139526, |
|
"regularize": 0.02248191460967064, |
|
"step": 1185 |
|
}, |
|
{ |
|
"dpo_loss": 0.6914324164390564, |
|
"epoch": 1.124320919612629, |
|
"grad_norm": 61.796191905639844, |
|
"learning_rate": 4.7855662897349464e-08, |
|
"logits": -2.1661124229431152, |
|
"logps": -91.51298522949219, |
|
"loss": 0.0226, |
|
"objective": 0.018483061343431473, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 0.018482128158211708, |
|
"step": 1190 |
|
}, |
|
{ |
|
"dpo_loss": 0.6916797161102295, |
|
"epoch": 1.129044957089993, |
|
"grad_norm": 63.413323128276396, |
|
"learning_rate": 4.744362103429933e-08, |
|
"logits": -2.204550266265869, |
|
"logps": -90.19840240478516, |
|
"loss": 0.0244, |
|
"objective": 0.025325793772935867, |
|
"ranking_idealized": 0.6499999761581421, |
|
"ranking_idealized_expo": 0.6000000238418579, |
|
"ranking_simple": 0.6000000238418579, |
|
"regularize": 0.02532271295785904, |
|
"step": 1195 |
|
}, |
|
{ |
|
"dpo_loss": 0.6876908540725708, |
|
"epoch": 1.1337689945673568, |
|
"grad_norm": 66.08019689679915, |
|
"learning_rate": 4.703175316299196e-08, |
|
"logits": -2.147653341293335, |
|
"logps": -94.6951904296875, |
|
"loss": 0.0238, |
|
"objective": 0.022918345406651497, |
|
"ranking_idealized": 0.699999988079071, |
|
"ranking_idealized_expo": 0.5666666626930237, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.022917365655303, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.1337689945673568, |
|
"eval_dpo_loss": 0.6931039094924927, |
|
"eval_logits": -1.988110899925232, |
|
"eval_logps": -98.65231323242188, |
|
"eval_loss": 0.013076459057629108, |
|
"eval_objective": 0.013204570859670639, |
|
"eval_ranking_idealized": 0.6022099256515503, |
|
"eval_ranking_idealized_expo": 0.5207182168960571, |
|
"eval_ranking_simple": 0.5186464190483093, |
|
"eval_regularize": 0.013202749192714691, |
|
"eval_runtime": 446.1352, |
|
"eval_samples_per_second": 12.978, |
|
"eval_steps_per_second": 3.246, |
|
"step": 1200 |
|
}, |
|
{ |
|
"dpo_loss": 0.6895859837532043, |
|
"epoch": 1.1384930320447209, |
|
"grad_norm": 59.82159099357435, |
|
"learning_rate": 4.662008731589424e-08, |
|
"logits": -2.2835893630981445, |
|
"logps": -93.41173553466797, |
|
"loss": 0.0238, |
|
"objective": 0.01953883096575737, |
|
"ranking_idealized": 0.5666666626930237, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 0.019538111984729767, |
|
"step": 1205 |
|
}, |
|
{ |
|
"dpo_loss": 0.6878269910812378, |
|
"epoch": 1.143217069522085, |
|
"grad_norm": 64.13908923913202, |
|
"learning_rate": 4.6208651511722916e-08, |
|
"logits": -2.107128381729126, |
|
"logps": -95.02501678466797, |
|
"loss": 0.0225, |
|
"objective": 0.02028297260403633, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.44999998807907104, |
|
"ranking_simple": 0.4833333194255829, |
|
"regularize": 0.020282411947846413, |
|
"step": 1210 |
|
}, |
|
{ |
|
"dpo_loss": 0.6913109421730042, |
|
"epoch": 1.147941106999449, |
|
"grad_norm": 64.08267971583916, |
|
"learning_rate": 4.579747375353763e-08, |
|
"logits": -2.152080774307251, |
|
"logps": -93.3291244506836, |
|
"loss": 0.0225, |
|
"objective": 0.02415003441274166, |
|
"ranking_idealized": 0.550000011920929, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.4833333194255829, |
|
"regularize": 0.024148976430296898, |
|
"step": 1215 |
|
}, |
|
{ |
|
"dpo_loss": 0.6908139586448669, |
|
"epoch": 1.1526651444768128, |
|
"grad_norm": 61.90111498957097, |
|
"learning_rate": 4.5386582026834904e-08, |
|
"logits": -2.2181687355041504, |
|
"logps": -91.48564147949219, |
|
"loss": 0.0211, |
|
"objective": 0.01728188991546631, |
|
"ranking_idealized": 0.5333333611488342, |
|
"ranking_idealized_expo": 0.44999998807907104, |
|
"ranking_simple": 0.44999998807907104, |
|
"regularize": 0.017280517145991325, |
|
"step": 1220 |
|
}, |
|
{ |
|
"dpo_loss": 0.6892186999320984, |
|
"epoch": 1.1573891819541768, |
|
"grad_norm": 67.90586520467559, |
|
"learning_rate": 4.497600429764349e-08, |
|
"logits": -2.0878336429595947, |
|
"logps": -92.51049041748047, |
|
"loss": 0.023, |
|
"objective": 0.021265888586640358, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5, |
|
"regularize": 0.021264949813485146, |
|
"step": 1225 |
|
}, |
|
{ |
|
"dpo_loss": 0.6902192831039429, |
|
"epoch": 1.162113219431541, |
|
"grad_norm": 69.62062658592635, |
|
"learning_rate": 4.456576851062089e-08, |
|
"logits": -2.1754400730133057, |
|
"logps": -89.9488296508789, |
|
"loss": 0.0202, |
|
"objective": 0.015848658978939056, |
|
"ranking_idealized": 0.6666666865348816, |
|
"ranking_idealized_expo": 0.550000011920929, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.015847818925976753, |
|
"step": 1230 |
|
}, |
|
{ |
|
"dpo_loss": 0.6919657588005066, |
|
"epoch": 1.1668372569089047, |
|
"grad_norm": 76.5213820049961, |
|
"learning_rate": 4.4155902587151404e-08, |
|
"logits": -2.2707595825195312, |
|
"logps": -89.4555435180664, |
|
"loss": 0.0231, |
|
"objective": 0.02043619193136692, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.5333333611488342, |
|
"regularize": 0.020434273406863213, |
|
"step": 1235 |
|
}, |
|
{ |
|
"dpo_loss": 0.6887553930282593, |
|
"epoch": 1.1715612943862688, |
|
"grad_norm": 68.85342051503754, |
|
"learning_rate": 4.374643442344576e-08, |
|
"logits": -2.1689367294311523, |
|
"logps": -91.3507080078125, |
|
"loss": 0.022, |
|
"objective": 0.02212885580956936, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5833333134651184, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.02212790958583355, |
|
"step": 1240 |
|
}, |
|
{ |
|
"dpo_loss": 0.6907090544700623, |
|
"epoch": 1.1762853318636328, |
|
"grad_norm": 63.9895529963574, |
|
"learning_rate": 4.333739188864243e-08, |
|
"logits": -2.116743564605713, |
|
"logps": -89.35691833496094, |
|
"loss": 0.0231, |
|
"objective": 0.02183300256729126, |
|
"ranking_idealized": 0.5333333611488342, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.5, |
|
"regularize": 0.021832352504134178, |
|
"step": 1245 |
|
}, |
|
{ |
|
"dpo_loss": 0.6873170733451843, |
|
"epoch": 1.1810093693409969, |
|
"grad_norm": 62.000344928337206, |
|
"learning_rate": 4.292880282291083e-08, |
|
"logits": -2.0605881214141846, |
|
"logps": -89.00260925292969, |
|
"loss": 0.0213, |
|
"objective": 0.016554510220885277, |
|
"ranking_idealized": 0.550000011920929, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.4833333194255829, |
|
"regularize": 0.016552967950701714, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 1.1810093693409969, |
|
"eval_dpo_loss": 0.6929011344909668, |
|
"eval_logits": -1.9892135858535767, |
|
"eval_logps": -98.38199615478516, |
|
"eval_loss": 0.011604293249547482, |
|
"eval_objective": 0.011785290203988552, |
|
"eval_ranking_idealized": 0.6022099256515503, |
|
"eval_ranking_idealized_expo": 0.5207182168960571, |
|
"eval_ranking_simple": 0.5186464190483093, |
|
"eval_regularize": 0.011783457361161709, |
|
"eval_runtime": 446.5262, |
|
"eval_samples_per_second": 12.967, |
|
"eval_steps_per_second": 3.243, |
|
"step": 1250 |
|
}, |
|
{ |
|
"dpo_loss": 0.6883438229560852, |
|
"epoch": 1.1857334068183607, |
|
"grad_norm": 76.14193268278439, |
|
"learning_rate": 4.2520695035556444e-08, |
|
"logits": -2.238811731338501, |
|
"logps": -93.17744445800781, |
|
"loss": 0.0227, |
|
"objective": 0.02147439494729042, |
|
"ranking_idealized": 0.4166666567325592, |
|
"ranking_idealized_expo": 0.4000000059604645, |
|
"ranking_simple": 0.4000000059604645, |
|
"regularize": 0.021473314613103867, |
|
"step": 1255 |
|
}, |
|
{ |
|
"dpo_loss": 0.6898403763771057, |
|
"epoch": 1.1904574442957248, |
|
"grad_norm": 72.14873570982253, |
|
"learning_rate": 4.211309630312812e-08, |
|
"logits": -2.186509847640991, |
|
"logps": -92.71757507324219, |
|
"loss": 0.0221, |
|
"objective": 0.024685295298695564, |
|
"ranking_idealized": 0.6666666865348816, |
|
"ranking_idealized_expo": 0.6166666746139526, |
|
"ranking_simple": 0.6000000238418579, |
|
"regularize": 0.02468358352780342, |
|
"step": 1260 |
|
}, |
|
{ |
|
"dpo_loss": 0.6934939026832581, |
|
"epoch": 1.1951814817730888, |
|
"grad_norm": 76.43192783166353, |
|
"learning_rate": 4.1706034367527484e-08, |
|
"logits": -2.2296221256256104, |
|
"logps": -90.43429565429688, |
|
"loss": 0.0216, |
|
"objective": 0.017879430204629898, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.5, |
|
"regularize": 0.017877496778964996, |
|
"step": 1265 |
|
}, |
|
{ |
|
"dpo_loss": 0.6907635927200317, |
|
"epoch": 1.1999055192504526, |
|
"grad_norm": 63.68697185614875, |
|
"learning_rate": 4.12995369341208e-08, |
|
"logits": -2.2198116779327393, |
|
"logps": -89.92237854003906, |
|
"loss": 0.0186, |
|
"objective": 0.019899163395166397, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.5, |
|
"regularize": 0.019898338243365288, |
|
"step": 1270 |
|
}, |
|
{ |
|
"dpo_loss": 0.6885548233985901, |
|
"epoch": 1.2046295567278167, |
|
"grad_norm": 67.30508547665502, |
|
"learning_rate": 4.0893631669853315e-08, |
|
"logits": -2.2070553302764893, |
|
"logps": -91.00017547607422, |
|
"loss": 0.0213, |
|
"objective": 0.0249098539352417, |
|
"ranking_idealized": 0.6833333373069763, |
|
"ranking_idealized_expo": 0.5833333134651184, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.024909144267439842, |
|
"step": 1275 |
|
}, |
|
{ |
|
"dpo_loss": 0.6864418387413025, |
|
"epoch": 1.2093535942051807, |
|
"grad_norm": 58.37239845591427, |
|
"learning_rate": 4.048834620136618e-08, |
|
"logits": -2.157111406326294, |
|
"logps": -90.54441833496094, |
|
"loss": 0.0216, |
|
"objective": 0.024475712329149246, |
|
"ranking_idealized": 0.6333333253860474, |
|
"ranking_idealized_expo": 0.5833333134651184, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.02447471395134926, |
|
"step": 1280 |
|
}, |
|
{ |
|
"dpo_loss": 0.6923628449440002, |
|
"epoch": 1.2140776316825446, |
|
"grad_norm": 62.390419253758, |
|
"learning_rate": 4.0083708113116125e-08, |
|
"logits": -2.20066499710083, |
|
"logps": -91.0829849243164, |
|
"loss": 0.0194, |
|
"objective": 0.017484158277511597, |
|
"ranking_idealized": 0.699999988079071, |
|
"ranking_idealized_expo": 0.6499999761581421, |
|
"ranking_simple": 0.6499999761581421, |
|
"regularize": 0.017483441159129143, |
|
"step": 1285 |
|
}, |
|
{ |
|
"dpo_loss": 0.6917316317558289, |
|
"epoch": 1.2188016691599086, |
|
"grad_norm": 62.080679222088676, |
|
"learning_rate": 3.9679744945498026e-08, |
|
"logits": -2.0995683670043945, |
|
"logps": -89.5858154296875, |
|
"loss": 0.0215, |
|
"objective": 0.019274141639471054, |
|
"ranking_idealized": 0.5333333611488342, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.5, |
|
"regularize": 0.01927088387310505, |
|
"step": 1290 |
|
}, |
|
{ |
|
"dpo_loss": 0.6907222867012024, |
|
"epoch": 1.2235257066372727, |
|
"grad_norm": 69.35505760627515, |
|
"learning_rate": 3.9276484192970427e-08, |
|
"logits": -2.0752005577087402, |
|
"logps": -88.9092788696289, |
|
"loss": 0.0202, |
|
"objective": 0.01790427789092064, |
|
"ranking_idealized": 0.6499999761581421, |
|
"ranking_idealized_expo": 0.5833333134651184, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.017903130501508713, |
|
"step": 1295 |
|
}, |
|
{ |
|
"dpo_loss": 0.6888744831085205, |
|
"epoch": 1.2282497441146367, |
|
"grad_norm": 64.13387937854051, |
|
"learning_rate": 3.887395330218428e-08, |
|
"logits": -2.088010549545288, |
|
"logps": -93.63009643554688, |
|
"loss": 0.0213, |
|
"objective": 0.01880194991827011, |
|
"ranking_idealized": 0.6333333253860474, |
|
"ranking_idealized_expo": 0.5666666626930237, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.018800783902406693, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.2282497441146367, |
|
"eval_dpo_loss": 0.6930280923843384, |
|
"eval_logits": -1.9901354312896729, |
|
"eval_logps": -98.35194396972656, |
|
"eval_loss": 0.010079173371195793, |
|
"eval_objective": 0.010304316878318787, |
|
"eval_ranking_idealized": 0.6022099256515503, |
|
"eval_ranking_idealized_expo": 0.5207182168960571, |
|
"eval_ranking_simple": 0.5179557800292969, |
|
"eval_regularize": 0.010302502661943436, |
|
"eval_runtime": 446.7461, |
|
"eval_samples_per_second": 12.96, |
|
"eval_steps_per_second": 3.241, |
|
"step": 1300 |
|
}, |
|
{ |
|
"dpo_loss": 0.6898637413978577, |
|
"epoch": 1.2329737815920006, |
|
"grad_norm": 64.4984732207436, |
|
"learning_rate": 3.847217967011481e-08, |
|
"logits": -2.107663154602051, |
|
"logps": -90.70755004882812, |
|
"loss": 0.0188, |
|
"objective": 0.020963182672858238, |
|
"ranking_idealized": 0.4333333373069763, |
|
"ranking_idealized_expo": 0.4000000059604645, |
|
"ranking_simple": 0.4000000059604645, |
|
"regularize": 0.020961280912160873, |
|
"step": 1305 |
|
}, |
|
{ |
|
"dpo_loss": 0.6896305084228516, |
|
"epoch": 1.2376978190693646, |
|
"grad_norm": 65.1483177905117, |
|
"learning_rate": 3.807119064219686e-08, |
|
"logits": -2.1721391677856445, |
|
"logps": -89.53897094726562, |
|
"loss": 0.0187, |
|
"objective": 0.022347215563058853, |
|
"ranking_idealized": 0.6833333373069763, |
|
"ranking_idealized_expo": 0.6000000238418579, |
|
"ranking_simple": 0.6000000238418579, |
|
"regularize": 0.02234494686126709, |
|
"step": 1310 |
|
}, |
|
{ |
|
"dpo_loss": 0.6885426044464111, |
|
"epoch": 1.2424218565467287, |
|
"grad_norm": 67.6302362675184, |
|
"learning_rate": 3.7671013510463685e-08, |
|
"logits": -2.1977858543395996, |
|
"logps": -90.79574584960938, |
|
"loss": 0.0254, |
|
"objective": 0.021330129355192184, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.550000011920929, |
|
"regularize": 0.02132764458656311, |
|
"step": 1315 |
|
}, |
|
{ |
|
"dpo_loss": 0.6933093667030334, |
|
"epoch": 1.2471458940240927, |
|
"grad_norm": 63.75946991656521, |
|
"learning_rate": 3.727167551168947e-08, |
|
"logits": -2.229327917098999, |
|
"logps": -91.51911163330078, |
|
"loss": 0.0225, |
|
"objective": 0.01876850612461567, |
|
"ranking_idealized": 0.5666666626930237, |
|
"ranking_idealized_expo": 0.46666666865348816, |
|
"ranking_simple": 0.46666666865348816, |
|
"regularize": 0.01876780204474926, |
|
"step": 1320 |
|
}, |
|
{ |
|
"dpo_loss": 0.6886643171310425, |
|
"epoch": 1.2518699315014565, |
|
"grad_norm": 61.93022907625324, |
|
"learning_rate": 3.687320382553547e-08, |
|
"logits": -2.1852173805236816, |
|
"logps": -94.50476837158203, |
|
"loss": 0.0213, |
|
"objective": 0.019077714532613754, |
|
"ranking_idealized": 0.6666666865348816, |
|
"ranking_idealized_expo": 0.5666666626930237, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.01907687447965145, |
|
"step": 1325 |
|
}, |
|
{ |
|
"dpo_loss": 0.690542459487915, |
|
"epoch": 1.2565939689788206, |
|
"grad_norm": 61.862265225762556, |
|
"learning_rate": 3.6475625572700156e-08, |
|
"logits": -2.1126949787139893, |
|
"logps": -93.08950805664062, |
|
"loss": 0.0201, |
|
"objective": 0.020278314128518105, |
|
"ranking_idealized": 0.550000011920929, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 0.020277447998523712, |
|
"step": 1330 |
|
}, |
|
{ |
|
"dpo_loss": 0.6890642046928406, |
|
"epoch": 1.2613180064561846, |
|
"grad_norm": 65.89840804248666, |
|
"learning_rate": 3.607896781307333e-08, |
|
"logits": -2.0817463397979736, |
|
"logps": -93.81517791748047, |
|
"loss": 0.0186, |
|
"objective": 0.018369121477007866, |
|
"ranking_idealized": 0.6333333253860474, |
|
"ranking_idealized_expo": 0.44999998807907104, |
|
"ranking_simple": 0.44999998807907104, |
|
"regularize": 0.01836659200489521, |
|
"step": 1335 |
|
}, |
|
{ |
|
"dpo_loss": 0.6935243606567383, |
|
"epoch": 1.2660420439335485, |
|
"grad_norm": 61.73867224830495, |
|
"learning_rate": 3.5683257543894376e-08, |
|
"logits": -2.158568859100342, |
|
"logps": -91.2880859375, |
|
"loss": 0.0199, |
|
"objective": 0.02297687530517578, |
|
"ranking_idealized": 0.6333333253860474, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 0.022976113483309746, |
|
"step": 1340 |
|
}, |
|
{ |
|
"dpo_loss": 0.6870158910751343, |
|
"epoch": 1.2707660814109125, |
|
"grad_norm": 55.58597973847245, |
|
"learning_rate": 3.528852169791474e-08, |
|
"logits": -2.130025625228882, |
|
"logps": -92.3035888671875, |
|
"loss": 0.0191, |
|
"objective": 0.018635360524058342, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.5666666626930237, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.018634265288710594, |
|
"step": 1345 |
|
}, |
|
{ |
|
"dpo_loss": 0.6925967335700989, |
|
"epoch": 1.2754901188882766, |
|
"grad_norm": 60.169587280433916, |
|
"learning_rate": 3.489478714156493e-08, |
|
"logits": -2.1538307666778564, |
|
"logps": -90.29386901855469, |
|
"loss": 0.0191, |
|
"objective": 0.01989407278597355, |
|
"ranking_idealized": 0.6499999761581421, |
|
"ranking_idealized_expo": 0.5833333134651184, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.01989228092133999, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 1.2754901188882766, |
|
"eval_dpo_loss": 0.6928868293762207, |
|
"eval_logits": -1.9894771575927734, |
|
"eval_logps": -98.17080688476562, |
|
"eval_loss": 0.010478594340384007, |
|
"eval_objective": 0.01073493529111147, |
|
"eval_ranking_idealized": 0.6022099256515503, |
|
"eval_ranking_idealized_expo": 0.5207182168960571, |
|
"eval_ranking_simple": 0.5186464190483093, |
|
"eval_regularize": 0.01073309313505888, |
|
"eval_runtime": 446.5009, |
|
"eval_samples_per_second": 12.967, |
|
"eval_steps_per_second": 3.243, |
|
"step": 1350 |
|
}, |
|
{ |
|
"dpo_loss": 0.6914752721786499, |
|
"epoch": 1.2802141563656404, |
|
"grad_norm": 63.02270194963686, |
|
"learning_rate": 3.450208067312586e-08, |
|
"logits": -2.1492412090301514, |
|
"logps": -89.29891967773438, |
|
"loss": 0.019, |
|
"objective": 0.017367955297231674, |
|
"ranking_idealized": 0.5, |
|
"ranking_idealized_expo": 0.46666666865348816, |
|
"ranking_simple": 0.46666666865348816, |
|
"regularize": 0.01736696995794773, |
|
"step": 1355 |
|
}, |
|
{ |
|
"dpo_loss": 0.6908875703811646, |
|
"epoch": 1.2849381938430045, |
|
"grad_norm": 66.84237286020337, |
|
"learning_rate": 3.411042902090492e-08, |
|
"logits": -2.2156636714935303, |
|
"logps": -91.21939849853516, |
|
"loss": 0.0183, |
|
"objective": 0.016017399728298187, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 0.016014499589800835, |
|
"step": 1360 |
|
}, |
|
{ |
|
"dpo_loss": 0.6913639307022095, |
|
"epoch": 1.2896622313203685, |
|
"grad_norm": 76.89545336747237, |
|
"learning_rate": 3.3719858841416836e-08, |
|
"logits": -2.1557438373565674, |
|
"logps": -92.71784210205078, |
|
"loss": 0.0196, |
|
"objective": 0.019386129453778267, |
|
"ranking_idealized": 0.5666666626930237, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 0.019384343177080154, |
|
"step": 1365 |
|
}, |
|
{ |
|
"dpo_loss": 0.6889938712120056, |
|
"epoch": 1.2943862687977323, |
|
"grad_norm": 56.519950070251724, |
|
"learning_rate": 3.333039671756934e-08, |
|
"logits": -2.055145502090454, |
|
"logps": -90.29429626464844, |
|
"loss": 0.0191, |
|
"objective": 0.019297009333968163, |
|
"ranking_idealized": 0.5666666626930237, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.5, |
|
"regularize": 0.01929611526429653, |
|
"step": 1370 |
|
}, |
|
{ |
|
"dpo_loss": 0.689259946346283, |
|
"epoch": 1.2991103062750964, |
|
"grad_norm": 73.9021579793652, |
|
"learning_rate": 3.294206915685392e-08, |
|
"logits": -2.2613272666931152, |
|
"logps": -95.13359832763672, |
|
"loss": 0.0184, |
|
"objective": 0.015245441347360611, |
|
"ranking_idealized": 0.6666666865348816, |
|
"ranking_idealized_expo": 0.5833333134651184, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.01524441223591566, |
|
"step": 1375 |
|
}, |
|
{ |
|
"dpo_loss": 0.6903609037399292, |
|
"epoch": 1.3038343437524604, |
|
"grad_norm": 63.39814646096018, |
|
"learning_rate": 3.2554902589541666e-08, |
|
"logits": -2.1530189514160156, |
|
"logps": -90.94368743896484, |
|
"loss": 0.0174, |
|
"objective": 0.017877008765935898, |
|
"ranking_idealized": 0.7166666388511658, |
|
"ranking_idealized_expo": 0.6499999761581421, |
|
"ranking_simple": 0.6499999761581421, |
|
"regularize": 0.017875246703624725, |
|
"step": 1380 |
|
}, |
|
{ |
|
"dpo_loss": 0.6925813555717468, |
|
"epoch": 1.3085583812298245, |
|
"grad_norm": 67.03876383480757, |
|
"learning_rate": 3.216892336688435e-08, |
|
"logits": -2.162677526473999, |
|
"logps": -91.54695892333984, |
|
"loss": 0.0196, |
|
"objective": 0.019883565604686737, |
|
"ranking_idealized": 0.5666666626930237, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.5, |
|
"regularize": 0.019882572814822197, |
|
"step": 1385 |
|
}, |
|
{ |
|
"dpo_loss": 0.690301775932312, |
|
"epoch": 1.3132824187071885, |
|
"grad_norm": 65.28692296573145, |
|
"learning_rate": 3.1784157759320954e-08, |
|
"logits": -2.116351842880249, |
|
"logps": -91.4957504272461, |
|
"loss": 0.018, |
|
"objective": 0.015376557596027851, |
|
"ranking_idealized": 0.6833333373069763, |
|
"ranking_idealized_expo": 0.6333333253860474, |
|
"ranking_simple": 0.6333333253860474, |
|
"regularize": 0.015375560149550438, |
|
"step": 1390 |
|
}, |
|
{ |
|
"dpo_loss": 0.6893251538276672, |
|
"epoch": 1.3180064561845524, |
|
"grad_norm": 62.76399037621205, |
|
"learning_rate": 3.140063195468962e-08, |
|
"logits": -2.163536548614502, |
|
"logps": -92.03372192382812, |
|
"loss": 0.0177, |
|
"objective": 0.017055170610547066, |
|
"ranking_idealized": 0.5333333611488342, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.5, |
|
"regularize": 0.017053287476301193, |
|
"step": 1395 |
|
}, |
|
{ |
|
"dpo_loss": 0.6904376745223999, |
|
"epoch": 1.3227304936619164, |
|
"grad_norm": 66.03868596298913, |
|
"learning_rate": 3.101837205644531e-08, |
|
"logits": -2.1526260375976562, |
|
"logps": -91.71768951416016, |
|
"loss": 0.0183, |
|
"objective": 0.017484767362475395, |
|
"ranking_idealized": 0.5666666626930237, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.5, |
|
"regularize": 0.017483625560998917, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.3227304936619164, |
|
"eval_dpo_loss": 0.6928439736366272, |
|
"eval_logits": -1.9895795583724976, |
|
"eval_logps": -98.29888916015625, |
|
"eval_loss": 0.009779366664588451, |
|
"eval_objective": 0.009947007521986961, |
|
"eval_ranking_idealized": 0.6022099256515503, |
|
"eval_ranking_idealized_expo": 0.5207182168960571, |
|
"eval_ranking_simple": 0.5179557800292969, |
|
"eval_regularize": 0.009945098310709, |
|
"eval_runtime": 445.875, |
|
"eval_samples_per_second": 12.986, |
|
"eval_steps_per_second": 3.248, |
|
"step": 1400 |
|
}, |
|
{ |
|
"dpo_loss": 0.6910092234611511, |
|
"epoch": 1.3274545311392805, |
|
"grad_norm": 60.84233260746348, |
|
"learning_rate": 3.063740408188308e-08, |
|
"logits": -2.129271984100342, |
|
"logps": -88.37971496582031, |
|
"loss": 0.0189, |
|
"objective": 0.02033030241727829, |
|
"ranking_idealized": 0.6833333373069763, |
|
"ranking_idealized_expo": 0.5833333134651184, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.020329667255282402, |
|
"step": 1405 |
|
}, |
|
{ |
|
"dpo_loss": 0.68757563829422, |
|
"epoch": 1.3321785686166443, |
|
"grad_norm": 66.02099749274366, |
|
"learning_rate": 3.0257753960367374e-08, |
|
"logits": -2.15506911277771, |
|
"logps": -94.14618682861328, |
|
"loss": 0.0203, |
|
"objective": 0.022749019786715508, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 0.022747376933693886, |
|
"step": 1410 |
|
}, |
|
{ |
|
"dpo_loss": 0.6922497153282166, |
|
"epoch": 1.3369026060940084, |
|
"grad_norm": 69.66802628356685, |
|
"learning_rate": 2.987944753156717e-08, |
|
"logits": -2.111666440963745, |
|
"logps": -91.04405975341797, |
|
"loss": 0.0182, |
|
"objective": 0.0166956577450037, |
|
"ranking_idealized": 0.6666666865348816, |
|
"ranking_idealized_expo": 0.6499999761581421, |
|
"ranking_simple": 0.6499999761581421, |
|
"regularize": 0.016694890335202217, |
|
"step": 1415 |
|
}, |
|
{ |
|
"dpo_loss": 0.6890503168106079, |
|
"epoch": 1.3416266435713724, |
|
"grad_norm": 60.95975646160118, |
|
"learning_rate": 2.9502510543697322e-08, |
|
"logits": -2.153930902481079, |
|
"logps": -90.40367889404297, |
|
"loss": 0.0168, |
|
"objective": 0.01781788095831871, |
|
"ranking_idealized": 0.5666666626930237, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.4833333194255829, |
|
"regularize": 0.017816245555877686, |
|
"step": 1420 |
|
}, |
|
{ |
|
"dpo_loss": 0.6874597668647766, |
|
"epoch": 1.3463506810487362, |
|
"grad_norm": 59.58581498665142, |
|
"learning_rate": 2.912696865176607e-08, |
|
"logits": -2.243446111679077, |
|
"logps": -96.16178894042969, |
|
"loss": 0.0177, |
|
"objective": 0.023015646263957024, |
|
"ranking_idealized": 0.6499999761581421, |
|
"ranking_idealized_expo": 0.6166666746139526, |
|
"ranking_simple": 0.6000000238418579, |
|
"regularize": 0.02301453799009323, |
|
"step": 1425 |
|
}, |
|
{ |
|
"dpo_loss": 0.6897552013397217, |
|
"epoch": 1.3510747185261003, |
|
"grad_norm": 60.74669762947064, |
|
"learning_rate": 2.875284741582892e-08, |
|
"logits": -2.1864495277404785, |
|
"logps": -96.33065032958984, |
|
"loss": 0.017, |
|
"objective": 0.015690678730607033, |
|
"ranking_idealized": 0.5666666626930237, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.4833333194255829, |
|
"regularize": 0.015688156709074974, |
|
"step": 1430 |
|
}, |
|
{ |
|
"dpo_loss": 0.6917933821678162, |
|
"epoch": 1.3557987560034643, |
|
"grad_norm": 52.60951888043, |
|
"learning_rate": 2.838017229924894e-08, |
|
"logits": -2.142225742340088, |
|
"logps": -92.60111999511719, |
|
"loss": 0.0173, |
|
"objective": 0.01731746830046177, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.550000011920929, |
|
"ranking_simple": 0.550000011920929, |
|
"regularize": 0.01731485314667225, |
|
"step": 1435 |
|
}, |
|
{ |
|
"dpo_loss": 0.6895142793655396, |
|
"epoch": 1.3605227934808282, |
|
"grad_norm": 72.2797304152481, |
|
"learning_rate": 2.8008968666963817e-08, |
|
"logits": -2.1638967990875244, |
|
"logps": -92.39698028564453, |
|
"loss": 0.0165, |
|
"objective": 0.017737431451678276, |
|
"ranking_idealized": 0.5166666507720947, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.46666666865348816, |
|
"regularize": 0.017736157402396202, |
|
"step": 1440 |
|
}, |
|
{ |
|
"dpo_loss": 0.689642608165741, |
|
"epoch": 1.3652468309581922, |
|
"grad_norm": 79.06644823842738, |
|
"learning_rate": 2.763926178375929e-08, |
|
"logits": -2.190371036529541, |
|
"logps": -91.85442352294922, |
|
"loss": 0.0148, |
|
"objective": 0.01207685936242342, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.550000011920929, |
|
"ranking_simple": 0.550000011920929, |
|
"regularize": 0.012074961327016354, |
|
"step": 1445 |
|
}, |
|
{ |
|
"dpo_loss": 0.690252423286438, |
|
"epoch": 1.3699708684355563, |
|
"grad_norm": 61.578467113272715, |
|
"learning_rate": 2.7271076812549688e-08, |
|
"logits": -2.2324106693267822, |
|
"logps": -91.74723815917969, |
|
"loss": 0.0173, |
|
"objective": 0.020336376503109932, |
|
"ranking_idealized": 0.5666666626930237, |
|
"ranking_idealized_expo": 0.4333333373069763, |
|
"ranking_simple": 0.44999998807907104, |
|
"regularize": 0.02033485844731331, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 1.3699708684355563, |
|
"eval_dpo_loss": 0.6928586959838867, |
|
"eval_logits": -1.9887967109680176, |
|
"eval_logps": -98.44745635986328, |
|
"eval_loss": 0.01200713962316513, |
|
"eval_objective": 0.012023248709738255, |
|
"eval_ranking_idealized": 0.6022099256515503, |
|
"eval_ranking_idealized_expo": 0.5207182168960571, |
|
"eval_ranking_simple": 0.519336998462677, |
|
"eval_regularize": 0.012021254748106003, |
|
"eval_runtime": 446.5411, |
|
"eval_samples_per_second": 12.966, |
|
"eval_steps_per_second": 3.243, |
|
"step": 1450 |
|
}, |
|
{ |
|
"dpo_loss": 0.6905301809310913, |
|
"epoch": 1.3746949059129203, |
|
"grad_norm": 63.36787972947676, |
|
"learning_rate": 2.6904438812665275e-08, |
|
"logits": -2.1534087657928467, |
|
"logps": -90.40296936035156, |
|
"loss": 0.0161, |
|
"objective": 0.01465876679867506, |
|
"ranking_idealized": 0.5666666626930237, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.5333333611488342, |
|
"regularize": 0.01465718261897564, |
|
"step": 1455 |
|
}, |
|
{ |
|
"dpo_loss": 0.6913805603981018, |
|
"epoch": 1.3794189433902841, |
|
"grad_norm": 82.33504933507443, |
|
"learning_rate": 2.6539372738146693e-08, |
|
"logits": -2.2307941913604736, |
|
"logps": -95.2313003540039, |
|
"loss": 0.0175, |
|
"objective": 0.01627987250685692, |
|
"ranking_idealized": 0.550000011920929, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 0.01627855747938156, |
|
"step": 1460 |
|
}, |
|
{ |
|
"dpo_loss": 0.6937485933303833, |
|
"epoch": 1.3841429808676482, |
|
"grad_norm": 77.80619617469834, |
|
"learning_rate": 2.6175903436046474e-08, |
|
"logits": -2.127424716949463, |
|
"logps": -93.49677276611328, |
|
"loss": 0.019, |
|
"objective": 0.018087979406118393, |
|
"ranking_idealized": 0.7333333492279053, |
|
"ranking_idealized_expo": 0.5833333134651184, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.018087027594447136, |
|
"step": 1465 |
|
}, |
|
{ |
|
"dpo_loss": 0.6909429430961609, |
|
"epoch": 1.3888670183450122, |
|
"grad_norm": 65.449053741861, |
|
"learning_rate": 2.5814055644738007e-08, |
|
"logits": -2.13195538520813, |
|
"logps": -92.86261749267578, |
|
"loss": 0.018, |
|
"objective": 0.01916462555527687, |
|
"ranking_idealized": 0.6833333373069763, |
|
"ranking_idealized_expo": 0.5666666626930237, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.019163591787219048, |
|
"step": 1470 |
|
}, |
|
{ |
|
"dpo_loss": 0.6916597485542297, |
|
"epoch": 1.3935910558223763, |
|
"grad_norm": 64.31813956475592, |
|
"learning_rate": 2.545385399223171e-08, |
|
"logits": -2.1243183612823486, |
|
"logps": -91.4704818725586, |
|
"loss": 0.016, |
|
"objective": 0.01847274787724018, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.46666666865348816, |
|
"ranking_simple": 0.46666666865348816, |
|
"regularize": 0.01847095414996147, |
|
"step": 1475 |
|
}, |
|
{ |
|
"dpo_loss": 0.6896728277206421, |
|
"epoch": 1.3983150932997401, |
|
"grad_norm": 68.83830299489735, |
|
"learning_rate": 2.5095322994498846e-08, |
|
"logits": -2.1868510246276855, |
|
"logps": -88.3174819946289, |
|
"loss": 0.0148, |
|
"objective": 0.014686751179397106, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.5333333611488342, |
|
"regularize": 0.014684156514704227, |
|
"step": 1480 |
|
}, |
|
{ |
|
"dpo_loss": 0.691864550113678, |
|
"epoch": 1.4030391307771042, |
|
"grad_norm": 64.34780184550137, |
|
"learning_rate": 2.4738487053802913e-08, |
|
"logits": -2.121894359588623, |
|
"logps": -90.51294708251953, |
|
"loss": 0.0178, |
|
"objective": 0.017730435356497765, |
|
"ranking_idealized": 0.699999988079071, |
|
"ranking_idealized_expo": 0.5833333134651184, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.017729543149471283, |
|
"step": 1485 |
|
}, |
|
{ |
|
"dpo_loss": 0.6895692944526672, |
|
"epoch": 1.4077631682544682, |
|
"grad_norm": 76.56054457201986, |
|
"learning_rate": 2.4383370457038788e-08, |
|
"logits": -2.287666082382202, |
|
"logps": -91.52227020263672, |
|
"loss": 0.0178, |
|
"objective": 0.020892778411507607, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.46666666865348816, |
|
"regularize": 0.020891882479190826, |
|
"step": 1490 |
|
}, |
|
{ |
|
"dpo_loss": 0.6884461641311646, |
|
"epoch": 1.412487205731832, |
|
"grad_norm": 66.46105530145252, |
|
"learning_rate": 2.4029997374079687e-08, |
|
"logits": -2.1489083766937256, |
|
"logps": -94.47865295410156, |
|
"loss": 0.0164, |
|
"objective": 0.017250513657927513, |
|
"ranking_idealized": 0.6333333253860474, |
|
"ranking_idealized_expo": 0.5833333134651184, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.01724918559193611, |
|
"step": 1495 |
|
}, |
|
{ |
|
"dpo_loss": 0.6900500059127808, |
|
"epoch": 1.4172112432091961, |
|
"grad_norm": 65.6967093839243, |
|
"learning_rate": 2.3678391856132203e-08, |
|
"logits": -2.1476263999938965, |
|
"logps": -92.45713806152344, |
|
"loss": 0.0171, |
|
"objective": 0.017398254945874214, |
|
"ranking_idealized": 0.5666666626930237, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.5, |
|
"regularize": 0.01739557646214962, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.4172112432091961, |
|
"eval_dpo_loss": 0.6928529143333435, |
|
"eval_logits": -1.989194393157959, |
|
"eval_logps": -98.49781036376953, |
|
"eval_loss": 0.009285102598369122, |
|
"eval_objective": 0.009339064359664917, |
|
"eval_ranking_idealized": 0.6022099256515503, |
|
"eval_ranking_idealized_expo": 0.5207182168960571, |
|
"eval_ranking_simple": 0.5186464190483093, |
|
"eval_regularize": 0.009336920455098152, |
|
"eval_runtime": 454.5105, |
|
"eval_samples_per_second": 12.739, |
|
"eval_steps_per_second": 3.186, |
|
"step": 1500 |
|
}, |
|
{ |
|
"dpo_loss": 0.6893900632858276, |
|
"epoch": 1.4219352806865602, |
|
"grad_norm": 66.7330764289215, |
|
"learning_rate": 2.3328577834099238e-08, |
|
"logits": -2.1472671031951904, |
|
"logps": -93.55850982666016, |
|
"loss": 0.0159, |
|
"objective": 0.01486156228929758, |
|
"ranking_idealized": 0.6666666865348816, |
|
"ranking_idealized_expo": 0.6000000238418579, |
|
"ranking_simple": 0.6000000238418579, |
|
"regularize": 0.014860122464597225, |
|
"step": 1505 |
|
}, |
|
{ |
|
"dpo_loss": 0.6900672912597656, |
|
"epoch": 1.426659318163924, |
|
"grad_norm": 68.69500961540548, |
|
"learning_rate": 2.2980579116951266e-08, |
|
"logits": -2.205268144607544, |
|
"logps": -94.00220489501953, |
|
"loss": 0.0165, |
|
"objective": 0.015583300963044167, |
|
"ranking_idealized": 0.6333333253860474, |
|
"ranking_idealized_expo": 0.5666666626930237, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.015581325627863407, |
|
"step": 1510 |
|
}, |
|
{ |
|
"dpo_loss": 0.6928814053535461, |
|
"epoch": 1.431383355641288, |
|
"grad_norm": 66.06160630988346, |
|
"learning_rate": 2.263441939010586e-08, |
|
"logits": -2.205580234527588, |
|
"logps": -94.78508758544922, |
|
"loss": 0.0176, |
|
"objective": 0.020189667120575905, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5833333134651184, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.020186880603432655, |
|
"step": 1515 |
|
}, |
|
{ |
|
"dpo_loss": 0.6897247433662415, |
|
"epoch": 1.436107393118652, |
|
"grad_norm": 64.2991321127621, |
|
"learning_rate": 2.2290122213815603e-08, |
|
"logits": -2.1570937633514404, |
|
"logps": -93.27649688720703, |
|
"loss": 0.0151, |
|
"objective": 0.015933455899357796, |
|
"ranking_idealized": 0.5666666626930237, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.5333333611488342, |
|
"regularize": 0.01593177765607834, |
|
"step": 1520 |
|
}, |
|
{ |
|
"dpo_loss": 0.6920062899589539, |
|
"epoch": 1.440831430596016, |
|
"grad_norm": 82.55367392818718, |
|
"learning_rate": 2.194771102156456e-08, |
|
"logits": -2.079338550567627, |
|
"logps": -90.78583526611328, |
|
"loss": 0.0161, |
|
"objective": 0.012961748987436295, |
|
"ranking_idealized": 0.6666666865348816, |
|
"ranking_idealized_expo": 0.6000000238418579, |
|
"ranking_simple": 0.6000000238418579, |
|
"regularize": 0.012958657927811146, |
|
"step": 1525 |
|
}, |
|
{ |
|
"dpo_loss": 0.6941262483596802, |
|
"epoch": 1.44555546807338, |
|
"grad_norm": 64.16042224652975, |
|
"learning_rate": 2.1607209118473314e-08, |
|
"logits": -2.1198713779449463, |
|
"logps": -92.36068725585938, |
|
"loss": 0.0175, |
|
"objective": 0.013944637961685658, |
|
"ranking_idealized": 0.5666666626930237, |
|
"ranking_idealized_expo": 0.46666666865348816, |
|
"ranking_simple": 0.46666666865348816, |
|
"regularize": 0.013943412341177464, |
|
"step": 1530 |
|
}, |
|
{ |
|
"dpo_loss": 0.6908197999000549, |
|
"epoch": 1.450279505550744, |
|
"grad_norm": 73.78678695774653, |
|
"learning_rate": 2.1268639679712813e-08, |
|
"logits": -2.197909355163574, |
|
"logps": -94.17871856689453, |
|
"loss": 0.0175, |
|
"objective": 0.020089728757739067, |
|
"ranking_idealized": 0.5666666626930237, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.5, |
|
"regularize": 0.020088987424969673, |
|
"step": 1535 |
|
}, |
|
{ |
|
"dpo_loss": 0.6900218725204468, |
|
"epoch": 1.455003543028108, |
|
"grad_norm": 60.95051574524286, |
|
"learning_rate": 2.0932025748927014e-08, |
|
"logits": -2.0492825508117676, |
|
"logps": -90.19515228271484, |
|
"loss": 0.0156, |
|
"objective": 0.01438401360064745, |
|
"ranking_idealized": 0.6833333373069763, |
|
"ranking_idealized_expo": 0.5833333134651184, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.014382914640009403, |
|
"step": 1540 |
|
}, |
|
{ |
|
"dpo_loss": 0.6942049264907837, |
|
"epoch": 1.4597275805054721, |
|
"grad_norm": 64.3944580161131, |
|
"learning_rate": 2.0597390236664474e-08, |
|
"logits": -2.1208300590515137, |
|
"logps": -92.43262481689453, |
|
"loss": 0.0188, |
|
"objective": 0.02223369851708412, |
|
"ranking_idealized": 0.5333333611488342, |
|
"ranking_idealized_expo": 0.4000000059604645, |
|
"ranking_simple": 0.4000000059604645, |
|
"regularize": 0.022232500836253166, |
|
"step": 1545 |
|
}, |
|
{ |
|
"dpo_loss": 0.6913332343101501, |
|
"epoch": 1.464451617982836, |
|
"grad_norm": 65.89315791512014, |
|
"learning_rate": 2.026475591881906e-08, |
|
"logits": -2.080505609512329, |
|
"logps": -91.72013092041016, |
|
"loss": 0.0164, |
|
"objective": 0.015918172895908356, |
|
"ranking_idealized": 0.6333333253860474, |
|
"ranking_idealized_expo": 0.5666666626930237, |
|
"ranking_simple": 0.550000011920929, |
|
"regularize": 0.01591550186276436, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 1.464451617982836, |
|
"eval_dpo_loss": 0.6928287148475647, |
|
"eval_logits": -1.9898428916931152, |
|
"eval_logps": -98.48872375488281, |
|
"eval_loss": 0.009991789236664772, |
|
"eval_objective": 0.010131197050213814, |
|
"eval_ranking_idealized": 0.6022099256515503, |
|
"eval_ranking_idealized_expo": 0.5207182168960571, |
|
"eval_ranking_simple": 0.5179557800292969, |
|
"eval_regularize": 0.010128886438906193, |
|
"eval_runtime": 446.2677, |
|
"eval_samples_per_second": 12.974, |
|
"eval_steps_per_second": 3.245, |
|
"step": 1550 |
|
}, |
|
{ |
|
"dpo_loss": 0.6896055936813354, |
|
"epoch": 1.4691756554602, |
|
"grad_norm": 72.03048326532462, |
|
"learning_rate": 1.9934145435079702e-08, |
|
"logits": -2.2838551998138428, |
|
"logps": -92.67180633544922, |
|
"loss": 0.0163, |
|
"objective": 0.01897108368575573, |
|
"ranking_idealized": 0.6333333253860474, |
|
"ranking_idealized_expo": 0.6000000238418579, |
|
"ranking_simple": 0.6000000238418579, |
|
"regularize": 0.018967188894748688, |
|
"step": 1555 |
|
}, |
|
{ |
|
"dpo_loss": 0.6906958222389221, |
|
"epoch": 1.473899692937564, |
|
"grad_norm": 73.82372228393551, |
|
"learning_rate": 1.9605581287389632e-08, |
|
"logits": -2.126072645187378, |
|
"logps": -91.18004608154297, |
|
"loss": 0.017, |
|
"objective": 0.01644645445048809, |
|
"ranking_idealized": 0.5333333611488342, |
|
"ranking_idealized_expo": 0.4166666567325592, |
|
"ranking_simple": 0.4166666567325592, |
|
"regularize": 0.01644515059888363, |
|
"step": 1560 |
|
}, |
|
{ |
|
"dpo_loss": 0.6889926791191101, |
|
"epoch": 1.478623730414928, |
|
"grad_norm": 64.09272915756699, |
|
"learning_rate": 1.92790858384147e-08, |
|
"logits": -2.13236927986145, |
|
"logps": -92.78182983398438, |
|
"loss": 0.0154, |
|
"objective": 0.016674092039465904, |
|
"ranking_idealized": 0.6833333373069763, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 0.01667255535721779, |
|
"step": 1565 |
|
}, |
|
{ |
|
"dpo_loss": 0.6913832426071167, |
|
"epoch": 1.483347767892292, |
|
"grad_norm": 66.43479163914742, |
|
"learning_rate": 1.895468131002143e-08, |
|
"logits": -2.0496039390563965, |
|
"logps": -92.02823638916016, |
|
"loss": 0.0135, |
|
"objective": 0.014378294348716736, |
|
"ranking_idealized": 0.6333333253860474, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.5333333611488342, |
|
"regularize": 0.014377345331013203, |
|
"step": 1570 |
|
}, |
|
{ |
|
"dpo_loss": 0.6909292340278625, |
|
"epoch": 1.488071805369656, |
|
"grad_norm": 74.75415511259118, |
|
"learning_rate": 1.863238978176455e-08, |
|
"logits": -2.2580831050872803, |
|
"logps": -91.48379516601562, |
|
"loss": 0.014, |
|
"objective": 0.014301776885986328, |
|
"ranking_idealized": 0.6499999761581421, |
|
"ranking_idealized_expo": 0.5833333134651184, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.014300725422799587, |
|
"step": 1575 |
|
}, |
|
{ |
|
"dpo_loss": 0.6893575191497803, |
|
"epoch": 1.4927958428470198, |
|
"grad_norm": 58.43183435208023, |
|
"learning_rate": 1.831223318938419e-08, |
|
"logits": -2.16597843170166, |
|
"logps": -93.69886016845703, |
|
"loss": 0.0152, |
|
"objective": 0.014309495687484741, |
|
"ranking_idealized": 0.550000011920929, |
|
"ranking_idealized_expo": 0.46666666865348816, |
|
"ranking_simple": 0.46666666865348816, |
|
"regularize": 0.014308045618236065, |
|
"step": 1580 |
|
}, |
|
{ |
|
"dpo_loss": 0.6888726949691772, |
|
"epoch": 1.4975198803243839, |
|
"grad_norm": 73.21921475273949, |
|
"learning_rate": 1.7994233323312913e-08, |
|
"logits": -2.2151682376861572, |
|
"logps": -89.77079772949219, |
|
"loss": 0.0151, |
|
"objective": 0.017258943989872932, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5833333134651184, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.01725638099014759, |
|
"step": 1585 |
|
}, |
|
{ |
|
"dpo_loss": 0.6907024383544922, |
|
"epoch": 1.502243917801748, |
|
"grad_norm": 63.078892059144785, |
|
"learning_rate": 1.767841182719262e-08, |
|
"logits": -2.1972243785858154, |
|
"logps": -95.78870391845703, |
|
"loss": 0.0132, |
|
"objective": 0.011403498239815235, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.550000011920929, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.011402006261050701, |
|
"step": 1590 |
|
}, |
|
{ |
|
"dpo_loss": 0.690780520439148, |
|
"epoch": 1.5069679552791118, |
|
"grad_norm": 72.75847192003988, |
|
"learning_rate": 1.7364790196401436e-08, |
|
"logits": -2.1997883319854736, |
|
"logps": -92.46896362304688, |
|
"loss": 0.0157, |
|
"objective": 0.017867466434836388, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.5333333611488342, |
|
"regularize": 0.017865275964140892, |
|
"step": 1595 |
|
}, |
|
{ |
|
"dpo_loss": 0.6890024542808533, |
|
"epoch": 1.511691992756476, |
|
"grad_norm": 88.44042595685, |
|
"learning_rate": 1.705338977659071e-08, |
|
"logits": -2.1227810382843018, |
|
"logps": -93.08589172363281, |
|
"loss": 0.0165, |
|
"objective": 0.013732160441577435, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.550000011920929, |
|
"ranking_simple": 0.550000011920929, |
|
"regularize": 0.013729160651564598, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.511691992756476, |
|
"eval_dpo_loss": 0.6928747296333313, |
|
"eval_logits": -1.9891741275787354, |
|
"eval_logps": -98.44180297851562, |
|
"eval_loss": 0.009673803113400936, |
|
"eval_objective": 0.009624399244785309, |
|
"eval_ranking_idealized": 0.6022099256515503, |
|
"eval_ranking_idealized_expo": 0.5207182168960571, |
|
"eval_ranking_simple": 0.5186464190483093, |
|
"eval_regularize": 0.009622092358767986, |
|
"eval_runtime": 451.0127, |
|
"eval_samples_per_second": 12.838, |
|
"eval_steps_per_second": 3.211, |
|
"step": 1600 |
|
}, |
|
{ |
|
"dpo_loss": 0.6902530193328857, |
|
"epoch": 1.5164160302338399, |
|
"grad_norm": 71.99972413373813, |
|
"learning_rate": 1.6744231762232176e-08, |
|
"logits": -2.190404176712036, |
|
"logps": -92.08161163330078, |
|
"loss": 0.0141, |
|
"objective": 0.013264096342027187, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.46666666865348816, |
|
"ranking_simple": 0.46666666865348816, |
|
"regularize": 0.013262915425002575, |
|
"step": 1605 |
|
}, |
|
{ |
|
"dpo_loss": 0.6898870468139648, |
|
"epoch": 1.5211400677112037, |
|
"grad_norm": 74.10537546046199, |
|
"learning_rate": 1.6437337195175428e-08, |
|
"logits": -2.19018816947937, |
|
"logps": -92.24858856201172, |
|
"loss": 0.0144, |
|
"objective": 0.014542266726493835, |
|
"ranking_idealized": 0.6666666865348816, |
|
"ranking_idealized_expo": 0.550000011920929, |
|
"ranking_simple": 0.550000011920929, |
|
"regularize": 0.014540906064212322, |
|
"step": 1610 |
|
}, |
|
{ |
|
"dpo_loss": 0.6916466951370239, |
|
"epoch": 1.525864105188568, |
|
"grad_norm": 58.38920752518266, |
|
"learning_rate": 1.613272696321576e-08, |
|
"logits": -2.136929750442505, |
|
"logps": -92.16728973388672, |
|
"loss": 0.0139, |
|
"objective": 0.015932830050587654, |
|
"ranking_idealized": 0.6666666865348816, |
|
"ranking_idealized_expo": 0.6000000238418579, |
|
"ranking_simple": 0.6000000238418579, |
|
"regularize": 0.015931693837046623, |
|
"step": 1615 |
|
}, |
|
{ |
|
"dpo_loss": 0.6908102631568909, |
|
"epoch": 1.5305881426659318, |
|
"grad_norm": 63.00313504444051, |
|
"learning_rate": 1.5830421798672566e-08, |
|
"logits": -2.118234872817993, |
|
"logps": -90.81103515625, |
|
"loss": 0.0158, |
|
"objective": 0.013052166439592838, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.46666666865348816, |
|
"ranking_simple": 0.46666666865348816, |
|
"regularize": 0.013051275163888931, |
|
"step": 1620 |
|
}, |
|
{ |
|
"dpo_loss": 0.6895550489425659, |
|
"epoch": 1.5353121801432958, |
|
"grad_norm": 58.07839320165127, |
|
"learning_rate": 1.5530442276978155e-08, |
|
"logits": -2.139536142349243, |
|
"logps": -90.25867462158203, |
|
"loss": 0.0148, |
|
"objective": 0.014852885156869888, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 0.014851867221295834, |
|
"step": 1625 |
|
}, |
|
{ |
|
"dpo_loss": 0.6881382465362549, |
|
"epoch": 1.54003621762066, |
|
"grad_norm": 68.39570526618726, |
|
"learning_rate": 1.523280881527743e-08, |
|
"logits": -2.058195114135742, |
|
"logps": -92.33999633789062, |
|
"loss": 0.0153, |
|
"objective": 0.018310803920030594, |
|
"ranking_idealized": 0.6333333253860474, |
|
"ranking_idealized_expo": 0.5666666626930237, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.018308250233530998, |
|
"step": 1630 |
|
}, |
|
{ |
|
"dpo_loss": 0.6918675303459167, |
|
"epoch": 1.5447602550980237, |
|
"grad_norm": 71.28198542563501, |
|
"learning_rate": 1.4937541671038245e-08, |
|
"logits": -2.0989344120025635, |
|
"logps": -94.7214584350586, |
|
"loss": 0.0141, |
|
"objective": 0.016751030460000038, |
|
"ranking_idealized": 0.699999988079071, |
|
"ranking_idealized_expo": 0.6333333253860474, |
|
"ranking_simple": 0.6333333253860474, |
|
"regularize": 0.016748782247304916, |
|
"step": 1635 |
|
}, |
|
{ |
|
"dpo_loss": 0.6916669011116028, |
|
"epoch": 1.5494842925753878, |
|
"grad_norm": 55.738628042601434, |
|
"learning_rate": 1.4644660940672625e-08, |
|
"logits": -2.154735803604126, |
|
"logps": -93.92298126220703, |
|
"loss": 0.0156, |
|
"objective": 0.012084761634469032, |
|
"ranking_idealized": 0.6333333253860474, |
|
"ranking_idealized_expo": 0.46666666865348816, |
|
"ranking_simple": 0.46666666865348816, |
|
"regularize": 0.01208337489515543, |
|
"step": 1640 |
|
}, |
|
{ |
|
"dpo_loss": 0.6894612908363342, |
|
"epoch": 1.5542083300527518, |
|
"grad_norm": 62.99549337373731, |
|
"learning_rate": 1.435418655816899e-08, |
|
"logits": -2.169052839279175, |
|
"logps": -91.41073608398438, |
|
"loss": 0.0143, |
|
"objective": 0.014856117777526379, |
|
"ranking_idealized": 0.4833333194255829, |
|
"ranking_idealized_expo": 0.44999998807907104, |
|
"ranking_simple": 0.44999998807907104, |
|
"regularize": 0.014855272136628628, |
|
"step": 1645 |
|
}, |
|
{ |
|
"dpo_loss": 0.6909356713294983, |
|
"epoch": 1.5589323675301157, |
|
"grad_norm": 67.0305140958766, |
|
"learning_rate": 1.4066138293735408e-08, |
|
"logits": -2.098741292953491, |
|
"logps": -92.3939208984375, |
|
"loss": 0.0128, |
|
"objective": 0.012405160814523697, |
|
"ranking_idealized": 0.6333333253860474, |
|
"ranking_idealized_expo": 0.550000011920929, |
|
"ranking_simple": 0.550000011920929, |
|
"regularize": 0.012403340078890324, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 1.5589323675301157, |
|
"eval_dpo_loss": 0.6927416324615479, |
|
"eval_logits": -1.9889006614685059, |
|
"eval_logps": -98.3604736328125, |
|
"eval_loss": 0.010028412565588951, |
|
"eval_objective": 0.010088582523167133, |
|
"eval_ranking_idealized": 0.6022099256515503, |
|
"eval_ranking_idealized_expo": 0.5207182168960571, |
|
"eval_ranking_simple": 0.5179557800292969, |
|
"eval_regularize": 0.010086326859891415, |
|
"eval_runtime": 446.4522, |
|
"eval_samples_per_second": 12.969, |
|
"eval_steps_per_second": 3.243, |
|
"step": 1650 |
|
}, |
|
{ |
|
"dpo_loss": 0.6893053650856018, |
|
"epoch": 1.5636564050074797, |
|
"grad_norm": 63.18957665484938, |
|
"learning_rate": 1.3780535752453976e-08, |
|
"logits": -2.115309000015259, |
|
"logps": -92.01270294189453, |
|
"loss": 0.016, |
|
"objective": 0.01488524954766035, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5666666626930237, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.014881560578942299, |
|
"step": 1655 |
|
}, |
|
{ |
|
"dpo_loss": 0.6926845908164978, |
|
"epoch": 1.5683804424848438, |
|
"grad_norm": 58.824443635483924, |
|
"learning_rate": 1.34973983729465e-08, |
|
"logits": -2.0248100757598877, |
|
"logps": -91.5145263671875, |
|
"loss": 0.0155, |
|
"objective": 0.016196589916944504, |
|
"ranking_idealized": 0.5666666626930237, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.4833333194255829, |
|
"regularize": 0.01619427278637886, |
|
"step": 1660 |
|
}, |
|
{ |
|
"dpo_loss": 0.6904213428497314, |
|
"epoch": 1.5731044799622076, |
|
"grad_norm": 62.30456762761477, |
|
"learning_rate": 1.3216745426051451e-08, |
|
"logits": -2.12634015083313, |
|
"logps": -93.39350891113281, |
|
"loss": 0.0136, |
|
"objective": 0.012430812232196331, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.5333333611488342, |
|
"regularize": 0.012427231296896935, |
|
"step": 1665 |
|
}, |
|
{ |
|
"dpo_loss": 0.6912954449653625, |
|
"epoch": 1.5778285174395716, |
|
"grad_norm": 69.75551627833396, |
|
"learning_rate": 1.293859601351232e-08, |
|
"logits": -2.253553867340088, |
|
"logps": -94.33558654785156, |
|
"loss": 0.0154, |
|
"objective": 0.015208045952022076, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 0.015205765143036842, |
|
"step": 1670 |
|
}, |
|
{ |
|
"dpo_loss": 0.6908196806907654, |
|
"epoch": 1.5825525549169357, |
|
"grad_norm": 66.72338681583422, |
|
"learning_rate": 1.266296906667762e-08, |
|
"logits": -2.232628107070923, |
|
"logps": -92.84732818603516, |
|
"loss": 0.0154, |
|
"objective": 0.014128237962722778, |
|
"ranking_idealized": 0.4166666567325592, |
|
"ranking_idealized_expo": 0.4166666567325592, |
|
"ranking_simple": 0.4166666567325592, |
|
"regularize": 0.014126955531537533, |
|
"step": 1675 |
|
}, |
|
{ |
|
"dpo_loss": 0.6899545192718506, |
|
"epoch": 1.5872765923942995, |
|
"grad_norm": 63.2891144846116, |
|
"learning_rate": 1.238988334521226e-08, |
|
"logits": -2.1890323162078857, |
|
"logps": -95.2237777709961, |
|
"loss": 0.0152, |
|
"objective": 0.014830714091658592, |
|
"ranking_idealized": 0.550000011920929, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.4833333194255829, |
|
"regularize": 0.014829723164439201, |
|
"step": 1680 |
|
}, |
|
{ |
|
"dpo_loss": 0.6891339421272278, |
|
"epoch": 1.5920006298716638, |
|
"grad_norm": 59.6793164031309, |
|
"learning_rate": 1.2119357435820816e-08, |
|
"logits": -2.1683380603790283, |
|
"logps": -89.5069580078125, |
|
"loss": 0.0118, |
|
"objective": 0.01023172028362751, |
|
"ranking_idealized": 0.46666666865348816, |
|
"ranking_idealized_expo": 0.3166666626930237, |
|
"ranking_simple": 0.3166666626930237, |
|
"regularize": 0.010230082087218761, |
|
"step": 1685 |
|
}, |
|
{ |
|
"dpo_loss": 0.6914657354354858, |
|
"epoch": 1.5967246673490276, |
|
"grad_norm": 61.16970720231615, |
|
"learning_rate": 1.1851409750982438e-08, |
|
"logits": -2.128115653991699, |
|
"logps": -92.26956176757812, |
|
"loss": 0.013, |
|
"objective": 0.013722400180995464, |
|
"ranking_idealized": 0.699999988079071, |
|
"ranking_idealized_expo": 0.5833333134651184, |
|
"ranking_simple": 0.6000000238418579, |
|
"regularize": 0.01371851097792387, |
|
"step": 1690 |
|
}, |
|
{ |
|
"dpo_loss": 0.6910730600357056, |
|
"epoch": 1.6014487048263917, |
|
"grad_norm": 70.19949003863668, |
|
"learning_rate": 1.1586058527697707e-08, |
|
"logits": -2.1930956840515137, |
|
"logps": -91.90154266357422, |
|
"loss": 0.0136, |
|
"objective": 0.01538047008216381, |
|
"ranking_idealized": 0.6666666865348816, |
|
"ranking_idealized_expo": 0.6000000238418579, |
|
"ranking_simple": 0.6000000238418579, |
|
"regularize": 0.015379803255200386, |
|
"step": 1695 |
|
}, |
|
{ |
|
"dpo_loss": 0.6901703476905823, |
|
"epoch": 1.6061727423037557, |
|
"grad_norm": 68.99239991969064, |
|
"learning_rate": 1.1323321826247346e-08, |
|
"logits": -2.1464996337890625, |
|
"logps": -93.59435272216797, |
|
"loss": 0.0132, |
|
"objective": 0.014362351037561893, |
|
"ranking_idealized": 0.6333333253860474, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 0.014360878616571426, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.6061727423037557, |
|
"eval_dpo_loss": 0.6927831768989563, |
|
"eval_logits": -1.9890520572662354, |
|
"eval_logps": -98.40552520751953, |
|
"eval_loss": 0.008960912004113197, |
|
"eval_objective": 0.008905092254281044, |
|
"eval_ranking_idealized": 0.6022099256515503, |
|
"eval_ranking_idealized_expo": 0.5207182168960571, |
|
"eval_ranking_simple": 0.5179557800292969, |
|
"eval_regularize": 0.008902397006750107, |
|
"eval_runtime": 446.3386, |
|
"eval_samples_per_second": 12.972, |
|
"eval_steps_per_second": 3.244, |
|
"step": 1700 |
|
}, |
|
{ |
|
"dpo_loss": 0.6905626654624939, |
|
"epoch": 1.6108967797811196, |
|
"grad_norm": 61.071862990680025, |
|
"learning_rate": 1.1063217528963042e-08, |
|
"logits": -2.1819908618927, |
|
"logps": -95.61785125732422, |
|
"loss": 0.0138, |
|
"objective": 0.014233703725039959, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5, |
|
"regularize": 0.014231013134121895, |
|
"step": 1705 |
|
}, |
|
{ |
|
"dpo_loss": 0.6933135986328125, |
|
"epoch": 1.6156208172584836, |
|
"grad_norm": 65.29758946742726, |
|
"learning_rate": 1.0805763339010326e-08, |
|
"logits": -2.13582444190979, |
|
"logps": -92.55400848388672, |
|
"loss": 0.0135, |
|
"objective": 0.014589487574994564, |
|
"ranking_idealized": 0.6833333373069763, |
|
"ranking_idealized_expo": 0.6499999761581421, |
|
"ranking_simple": 0.6499999761581421, |
|
"regularize": 0.014586606994271278, |
|
"step": 1710 |
|
}, |
|
{ |
|
"dpo_loss": 0.6906417608261108, |
|
"epoch": 1.6203448547358477, |
|
"grad_norm": 65.78620291334798, |
|
"learning_rate": 1.0550976779183651e-08, |
|
"logits": -2.1200928688049316, |
|
"logps": -95.07630920410156, |
|
"loss": 0.0136, |
|
"objective": 0.014620447531342506, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 0.014618877321481705, |
|
"step": 1715 |
|
}, |
|
{ |
|
"dpo_loss": 0.6903366446495056, |
|
"epoch": 1.6250688922132115, |
|
"grad_norm": 61.420985867018985, |
|
"learning_rate": 1.02988751907138e-08, |
|
"logits": -2.196997880935669, |
|
"logps": -89.81111145019531, |
|
"loss": 0.0124, |
|
"objective": 0.01165497861802578, |
|
"ranking_idealized": 0.6499999761581421, |
|
"ranking_idealized_expo": 0.5666666626930237, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.011652585119009018, |
|
"step": 1720 |
|
}, |
|
{ |
|
"dpo_loss": 0.6910706758499146, |
|
"epoch": 1.6297929296905755, |
|
"grad_norm": 69.65328151335441, |
|
"learning_rate": 1.0049475732087559e-08, |
|
"logits": -2.1551551818847656, |
|
"logps": -92.8335952758789, |
|
"loss": 0.0137, |
|
"objective": 0.011478899046778679, |
|
"ranking_idealized": 0.5666666626930237, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.5, |
|
"regularize": 0.0114761833101511, |
|
"step": 1725 |
|
}, |
|
{ |
|
"dpo_loss": 0.6897426247596741, |
|
"epoch": 1.6345169671679396, |
|
"grad_norm": 70.43018567128607, |
|
"learning_rate": 9.802795377879903e-09, |
|
"logits": -2.243594169616699, |
|
"logps": -91.26370239257812, |
|
"loss": 0.0128, |
|
"objective": 0.012597540393471718, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.5333333611488342, |
|
"regularize": 0.012595159001648426, |
|
"step": 1730 |
|
}, |
|
{ |
|
"dpo_loss": 0.6892535090446472, |
|
"epoch": 1.6392410046453034, |
|
"grad_norm": 63.82988388608345, |
|
"learning_rate": 9.558850917598716e-09, |
|
"logits": -2.070333957672119, |
|
"logps": -92.88044738769531, |
|
"loss": 0.0123, |
|
"objective": 0.011261907406151295, |
|
"ranking_idealized": 0.5166666507720947, |
|
"ranking_idealized_expo": 0.4333333373069763, |
|
"ranking_simple": 0.44999998807907104, |
|
"regularize": 0.011259406805038452, |
|
"step": 1735 |
|
}, |
|
{ |
|
"dpo_loss": 0.6904967427253723, |
|
"epoch": 1.6439650421226675, |
|
"grad_norm": 57.01783294970417, |
|
"learning_rate": 9.31765895454199e-09, |
|
"logits": -2.178701877593994, |
|
"logps": -92.82861328125, |
|
"loss": 0.0128, |
|
"objective": 0.013563827611505985, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5666666626930237, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.013562088832259178, |
|
"step": 1740 |
|
}, |
|
{ |
|
"dpo_loss": 0.6911302208900452, |
|
"epoch": 1.6486890796000315, |
|
"grad_norm": 67.14492805040307, |
|
"learning_rate": 9.079235904667825e-09, |
|
"logits": -2.160048484802246, |
|
"logps": -97.07341766357422, |
|
"loss": 0.0132, |
|
"objective": 0.011855502612888813, |
|
"ranking_idealized": 0.6499999761581421, |
|
"ranking_idealized_expo": 0.5833333134651184, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.011852098628878593, |
|
"step": 1745 |
|
}, |
|
{ |
|
"dpo_loss": 0.6899723410606384, |
|
"epoch": 1.6534131170773954, |
|
"grad_norm": 61.803443023637456, |
|
"learning_rate": 8.84359799547712e-09, |
|
"logits": -2.1978349685668945, |
|
"logps": -92.97909545898438, |
|
"loss": 0.0133, |
|
"objective": 0.01489060465246439, |
|
"ranking_idealized": 0.5166666507720947, |
|
"ranking_idealized_expo": 0.44999998807907104, |
|
"ranking_simple": 0.44999998807907104, |
|
"regularize": 0.014889102429151535, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 1.6534131170773954, |
|
"eval_dpo_loss": 0.6928050518035889, |
|
"eval_logits": -1.988478183746338, |
|
"eval_logps": -98.41736602783203, |
|
"eval_loss": 0.009351465851068497, |
|
"eval_objective": 0.009363526478409767, |
|
"eval_ranking_idealized": 0.6022099256515503, |
|
"eval_ranking_idealized_expo": 0.5207182168960571, |
|
"eval_ranking_simple": 0.5179557800292969, |
|
"eval_regularize": 0.009361328557133675, |
|
"eval_runtime": 445.0354, |
|
"eval_samples_per_second": 13.01, |
|
"eval_steps_per_second": 3.254, |
|
"step": 1750 |
|
}, |
|
{ |
|
"dpo_loss": 0.6889625787734985, |
|
"epoch": 1.6581371545547596, |
|
"grad_norm": 69.19823709150691, |
|
"learning_rate": 8.6107612649091e-09, |
|
"logits": -2.17798113822937, |
|
"logps": -90.93826293945312, |
|
"loss": 0.0136, |
|
"objective": 0.014197876676917076, |
|
"ranking_idealized": 0.5166666507720947, |
|
"ranking_idealized_expo": 0.44999998807907104, |
|
"ranking_simple": 0.4333333373069763, |
|
"regularize": 0.014196816831827164, |
|
"step": 1755 |
|
}, |
|
{ |
|
"dpo_loss": 0.6894813776016235, |
|
"epoch": 1.6628611920321235, |
|
"grad_norm": 70.33904785688647, |
|
"learning_rate": 8.380741560249726e-09, |
|
"logits": -2.2228808403015137, |
|
"logps": -91.38529205322266, |
|
"loss": 0.0136, |
|
"objective": 0.0157302338629961, |
|
"ranking_idealized": 0.550000011920929, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 0.015728596597909927, |
|
"step": 1760 |
|
}, |
|
{ |
|
"dpo_loss": 0.6904619336128235, |
|
"epoch": 1.6675852295094873, |
|
"grad_norm": 66.62367568306153, |
|
"learning_rate": 8.153554537053149e-09, |
|
"logits": -2.1843998432159424, |
|
"logps": -91.0223159790039, |
|
"loss": 0.0144, |
|
"objective": 0.012193134985864162, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.46666666865348816, |
|
"ranking_simple": 0.46666666865348816, |
|
"regularize": 0.01219152845442295, |
|
"step": 1765 |
|
}, |
|
{ |
|
"dpo_loss": 0.691467821598053, |
|
"epoch": 1.6723092669868516, |
|
"grad_norm": 63.63343895864985, |
|
"learning_rate": 7.929215658076093e-09, |
|
"logits": -2.2059359550476074, |
|
"logps": -92.13400268554688, |
|
"loss": 0.0114, |
|
"objective": 0.01266545057296753, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.5, |
|
"regularize": 0.012664331123232841, |
|
"step": 1770 |
|
}, |
|
{ |
|
"dpo_loss": 0.6895557045936584, |
|
"epoch": 1.6770333044642154, |
|
"grad_norm": 61.16013319033806, |
|
"learning_rate": 7.707740192225515e-09, |
|
"logits": -2.257197380065918, |
|
"logps": -93.42992401123047, |
|
"loss": 0.0128, |
|
"objective": 0.014360646717250347, |
|
"ranking_idealized": 0.6666666865348816, |
|
"ranking_idealized_expo": 0.6666666865348816, |
|
"ranking_simple": 0.6666666865348816, |
|
"regularize": 0.014359161257743835, |
|
"step": 1775 |
|
}, |
|
{ |
|
"dpo_loss": 0.691416323184967, |
|
"epoch": 1.6817573419415794, |
|
"grad_norm": 68.36063699136817, |
|
"learning_rate": 7.4891432135193e-09, |
|
"logits": -2.251347303390503, |
|
"logps": -91.27902221679688, |
|
"loss": 0.0141, |
|
"objective": 0.013033194467425346, |
|
"ranking_idealized": 0.6666666865348816, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.5333333611488342, |
|
"regularize": 0.013032082468271255, |
|
"step": 1780 |
|
}, |
|
{ |
|
"dpo_loss": 0.6915625929832458, |
|
"epoch": 1.6864813794189435, |
|
"grad_norm": 64.60918615534963, |
|
"learning_rate": 7.273439600060344e-09, |
|
"logits": -2.165839195251465, |
|
"logps": -93.43399810791016, |
|
"loss": 0.0158, |
|
"objective": 0.016496647149324417, |
|
"ranking_idealized": 0.6333333253860474, |
|
"ranking_idealized_expo": 0.5666666626930237, |
|
"ranking_simple": 0.550000011920929, |
|
"regularize": 0.016495179384946823, |
|
"step": 1785 |
|
}, |
|
{ |
|
"dpo_loss": 0.6900497078895569, |
|
"epoch": 1.6912054168963073, |
|
"grad_norm": 59.4845336366133, |
|
"learning_rate": 7.060644033023894e-09, |
|
"logits": -2.1421496868133545, |
|
"logps": -89.32416534423828, |
|
"loss": 0.0132, |
|
"objective": 0.011856761761009693, |
|
"ranking_idealized": 0.5666666626930237, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.5, |
|
"regularize": 0.011854317970573902, |
|
"step": 1790 |
|
}, |
|
{ |
|
"dpo_loss": 0.6918495297431946, |
|
"epoch": 1.6959294543736714, |
|
"grad_norm": 62.10968860121513, |
|
"learning_rate": 6.850770995658372e-09, |
|
"logits": -2.145258665084839, |
|
"logps": -91.16167449951172, |
|
"loss": 0.0135, |
|
"objective": 0.011840968392789364, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.5333333611488342, |
|
"regularize": 0.011839687824249268, |
|
"step": 1795 |
|
}, |
|
{ |
|
"dpo_loss": 0.6910142302513123, |
|
"epoch": 1.7006534918510354, |
|
"grad_norm": 63.84635783884053, |
|
"learning_rate": 6.6438347722995445e-09, |
|
"logits": -2.2468461990356445, |
|
"logps": -91.39185333251953, |
|
"loss": 0.0138, |
|
"objective": 0.010580122470855713, |
|
"ranking_idealized": 0.5, |
|
"ranking_idealized_expo": 0.44999998807907104, |
|
"ranking_simple": 0.44999998807907104, |
|
"regularize": 0.010576292872428894, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.7006534918510354, |
|
"eval_dpo_loss": 0.6928184628486633, |
|
"eval_logits": -1.9885612726211548, |
|
"eval_logps": -98.35978698730469, |
|
"eval_loss": 0.009589685127139091, |
|
"eval_objective": 0.009668215177953243, |
|
"eval_ranking_idealized": 0.6022099256515503, |
|
"eval_ranking_idealized_expo": 0.5207182168960571, |
|
"eval_ranking_simple": 0.5179557800292969, |
|
"eval_regularize": 0.00966621097177267, |
|
"eval_runtime": 448.9142, |
|
"eval_samples_per_second": 12.898, |
|
"eval_steps_per_second": 3.226, |
|
"step": 1800 |
|
}, |
|
{ |
|
"dpo_loss": 0.6905153393745422, |
|
"epoch": 1.7053775293283993, |
|
"grad_norm": 69.0367318972444, |
|
"learning_rate": 6.43984944739836e-09, |
|
"logits": -2.206124782562256, |
|
"logps": -93.37840270996094, |
|
"loss": 0.0133, |
|
"objective": 0.013736736960709095, |
|
"ranking_idealized": 0.5333333611488342, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.4833333194255829, |
|
"regularize": 0.013735477812588215, |
|
"step": 1805 |
|
}, |
|
{ |
|
"dpo_loss": 0.6886168718338013, |
|
"epoch": 1.7101015668057633, |
|
"grad_norm": 60.061121059879774, |
|
"learning_rate": 6.238828904562315e-09, |
|
"logits": -2.238112449645996, |
|
"logps": -93.0470199584961, |
|
"loss": 0.0146, |
|
"objective": 0.015197351574897766, |
|
"ranking_idealized": 0.6666666865348816, |
|
"ranking_idealized_expo": 0.6666666865348816, |
|
"ranking_simple": 0.6666666865348816, |
|
"regularize": 0.015195462852716446, |
|
"step": 1810 |
|
}, |
|
{ |
|
"dpo_loss": 0.6895704865455627, |
|
"epoch": 1.7148256042831274, |
|
"grad_norm": 62.370319050099795, |
|
"learning_rate": 6.040786825610517e-09, |
|
"logits": -2.18115234375, |
|
"logps": -92.52540588378906, |
|
"loss": 0.0127, |
|
"objective": 0.015414653345942497, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.5, |
|
"regularize": 0.015413584187626839, |
|
"step": 1815 |
|
}, |
|
{ |
|
"dpo_loss": 0.6913415193557739, |
|
"epoch": 1.7195496417604912, |
|
"grad_norm": 68.7151840413099, |
|
"learning_rate": 5.845736689642472e-09, |
|
"logits": -2.2472267150878906, |
|
"logps": -93.7915267944336, |
|
"loss": 0.015, |
|
"objective": 0.01595698669552803, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 0.01595553196966648, |
|
"step": 1820 |
|
}, |
|
{ |
|
"dpo_loss": 0.6893234848976135, |
|
"epoch": 1.7242736792378552, |
|
"grad_norm": 61.62237599260527, |
|
"learning_rate": 5.653691772120672e-09, |
|
"logits": -2.2202160358428955, |
|
"logps": -91.71192932128906, |
|
"loss": 0.0125, |
|
"objective": 0.013099589385092258, |
|
"ranking_idealized": 0.699999988079071, |
|
"ranking_idealized_expo": 0.6333333253860474, |
|
"ranking_simple": 0.6333333253860474, |
|
"regularize": 0.013098122552037239, |
|
"step": 1825 |
|
}, |
|
{ |
|
"dpo_loss": 0.6918686032295227, |
|
"epoch": 1.7289977167152193, |
|
"grad_norm": 73.26853232568251, |
|
"learning_rate": 5.464665143967051e-09, |
|
"logits": -2.095928907394409, |
|
"logps": -92.31403350830078, |
|
"loss": 0.0132, |
|
"objective": 0.014795850031077862, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.46666666865348816, |
|
"regularize": 0.01479416061192751, |
|
"step": 1830 |
|
}, |
|
{ |
|
"dpo_loss": 0.6898643374443054, |
|
"epoch": 1.7337217541925831, |
|
"grad_norm": 58.513985312428574, |
|
"learning_rate": 5.278669670673347e-09, |
|
"logits": -2.1115658283233643, |
|
"logps": -91.03611755371094, |
|
"loss": 0.0117, |
|
"objective": 0.0112331947311759, |
|
"ranking_idealized": 0.699999988079071, |
|
"ranking_idealized_expo": 0.6000000238418579, |
|
"ranking_simple": 0.6000000238418579, |
|
"regularize": 0.011231665499508381, |
|
"step": 1835 |
|
}, |
|
{ |
|
"dpo_loss": 0.6913903951644897, |
|
"epoch": 1.7384457916699474, |
|
"grad_norm": 63.11876863535166, |
|
"learning_rate": 5.095718011425454e-09, |
|
"logits": -2.168307304382324, |
|
"logps": -91.80027770996094, |
|
"loss": 0.0135, |
|
"objective": 0.014516009949147701, |
|
"ranking_idealized": 0.6499999761581421, |
|
"ranking_idealized_expo": 0.5666666626930237, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.01451488770544529, |
|
"step": 1840 |
|
}, |
|
{ |
|
"dpo_loss": 0.689866304397583, |
|
"epoch": 1.7431698291473112, |
|
"grad_norm": 72.43548684342062, |
|
"learning_rate": 4.9158226182418104e-09, |
|
"logits": -2.1879196166992188, |
|
"logps": -88.85045623779297, |
|
"loss": 0.0151, |
|
"objective": 0.014930271543562412, |
|
"ranking_idealized": 0.550000011920929, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.4833333194255829, |
|
"regularize": 0.014927973970770836, |
|
"step": 1845 |
|
}, |
|
{ |
|
"dpo_loss": 0.689470648765564, |
|
"epoch": 1.7478938666246753, |
|
"grad_norm": 65.9250612110063, |
|
"learning_rate": 4.738995735125894e-09, |
|
"logits": -2.074735164642334, |
|
"logps": -94.42037963867188, |
|
"loss": 0.0122, |
|
"objective": 0.011366092599928379, |
|
"ranking_idealized": 0.6833333373069763, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5333333611488342, |
|
"regularize": 0.011363506317138672, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 1.7478938666246753, |
|
"eval_dpo_loss": 0.6928740739822388, |
|
"eval_logits": -1.9887943267822266, |
|
"eval_logps": -98.4156723022461, |
|
"eval_loss": 0.009023972786962986, |
|
"eval_objective": 0.00906344410032034, |
|
"eval_ranking_idealized": 0.6022099256515503, |
|
"eval_ranking_idealized_expo": 0.5207182168960571, |
|
"eval_ranking_simple": 0.5179557800292969, |
|
"eval_regularize": 0.009061108343303204, |
|
"eval_runtime": 445.128, |
|
"eval_samples_per_second": 13.007, |
|
"eval_steps_per_second": 3.253, |
|
"step": 1850 |
|
}, |
|
{ |
|
"dpo_loss": 0.6925438046455383, |
|
"epoch": 1.7526179041020393, |
|
"grad_norm": 60.51205917887876, |
|
"learning_rate": 4.565249397232923e-09, |
|
"logits": -2.1302504539489746, |
|
"logps": -89.6479263305664, |
|
"loss": 0.0119, |
|
"objective": 0.012942561879754066, |
|
"ranking_idealized": 0.6666666865348816, |
|
"ranking_idealized_expo": 0.6333333253860474, |
|
"ranking_simple": 0.6333333253860474, |
|
"regularize": 0.012941225431859493, |
|
"step": 1855 |
|
}, |
|
{ |
|
"dpo_loss": 0.6919233798980713, |
|
"epoch": 1.7573419415794032, |
|
"grad_norm": 73.74409349566294, |
|
"learning_rate": 4.394595430050613e-09, |
|
"logits": -2.1312899589538574, |
|
"logps": -92.23778533935547, |
|
"loss": 0.0134, |
|
"objective": 0.013376330956816673, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.5, |
|
"regularize": 0.013374886475503445, |
|
"step": 1860 |
|
}, |
|
{ |
|
"dpo_loss": 0.6894384026527405, |
|
"epoch": 1.7620659790567672, |
|
"grad_norm": 62.88647078101833, |
|
"learning_rate": 4.2270454485944125e-09, |
|
"logits": -2.0558435916900635, |
|
"logps": -91.38388061523438, |
|
"loss": 0.0107, |
|
"objective": 0.010976298712193966, |
|
"ranking_idealized": 0.7666666507720947, |
|
"ranking_idealized_expo": 0.6666666865348816, |
|
"ranking_simple": 0.6666666865348816, |
|
"regularize": 0.010971426963806152, |
|
"step": 1865 |
|
}, |
|
{ |
|
"dpo_loss": 0.6887921094894409, |
|
"epoch": 1.7667900165341313, |
|
"grad_norm": 87.932062525531, |
|
"learning_rate": 4.062610856616922e-09, |
|
"logits": -2.1452383995056152, |
|
"logps": -92.77478790283203, |
|
"loss": 0.013, |
|
"objective": 0.017829876393079758, |
|
"ranking_idealized": 0.550000011920929, |
|
"ranking_idealized_expo": 0.44999998807907104, |
|
"ranking_simple": 0.44999998807907104, |
|
"regularize": 0.017827244475483894, |
|
"step": 1870 |
|
}, |
|
{ |
|
"dpo_loss": 0.6901695132255554, |
|
"epoch": 1.771514054011495, |
|
"grad_norm": 59.56034569340847, |
|
"learning_rate": 3.901302845831728e-09, |
|
"logits": -2.1918883323669434, |
|
"logps": -91.66542053222656, |
|
"loss": 0.0103, |
|
"objective": 0.00952277984470129, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.550000011920929, |
|
"regularize": 0.009518155828118324, |
|
"step": 1875 |
|
}, |
|
{ |
|
"dpo_loss": 0.6916844844818115, |
|
"epoch": 1.7762380914888591, |
|
"grad_norm": 59.31587765757665, |
|
"learning_rate": 3.743132395151705e-09, |
|
"logits": -2.1958460807800293, |
|
"logps": -92.2247085571289, |
|
"loss": 0.0132, |
|
"objective": 0.014002182520925999, |
|
"ranking_idealized": 0.5166666507720947, |
|
"ranking_idealized_expo": 0.44999998807907104, |
|
"ranking_simple": 0.44999998807907104, |
|
"regularize": 0.014000624418258667, |
|
"step": 1880 |
|
}, |
|
{ |
|
"dpo_loss": 0.6910093426704407, |
|
"epoch": 1.7809621289662232, |
|
"grad_norm": 65.01445310326804, |
|
"learning_rate": 3.5881102699417463e-09, |
|
"logits": -2.15521240234375, |
|
"logps": -92.9655990600586, |
|
"loss": 0.0122, |
|
"objective": 0.013613136485219002, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.5, |
|
"regularize": 0.013612000271677971, |
|
"step": 1885 |
|
}, |
|
{ |
|
"dpo_loss": 0.688242495059967, |
|
"epoch": 1.785686166443587, |
|
"grad_norm": 68.21241149596634, |
|
"learning_rate": 3.4362470212860483e-09, |
|
"logits": -2.2397677898406982, |
|
"logps": -92.33454132080078, |
|
"loss": 0.0101, |
|
"objective": 0.011641601100564003, |
|
"ranking_idealized": 0.6833333373069763, |
|
"ranking_idealized_expo": 0.6000000238418579, |
|
"ranking_simple": 0.6000000238418579, |
|
"regularize": 0.011638239957392216, |
|
"step": 1890 |
|
}, |
|
{ |
|
"dpo_loss": 0.691038966178894, |
|
"epoch": 1.790410203920951, |
|
"grad_norm": 68.20823651098704, |
|
"learning_rate": 3.2875529852700148e-09, |
|
"logits": -2.2314653396606445, |
|
"logps": -86.68601989746094, |
|
"loss": 0.0131, |
|
"objective": 0.01244689617305994, |
|
"ranking_idealized": 0.5, |
|
"ranking_idealized_expo": 0.4333333373069763, |
|
"ranking_simple": 0.4333333373069763, |
|
"regularize": 0.012444679625332355, |
|
"step": 1895 |
|
}, |
|
{ |
|
"dpo_loss": 0.6882398128509521, |
|
"epoch": 1.7951342413983151, |
|
"grad_norm": 74.00426423645611, |
|
"learning_rate": 3.142038282276732e-09, |
|
"logits": -2.1504063606262207, |
|
"logps": -92.22313690185547, |
|
"loss": 0.0128, |
|
"objective": 0.013669600710272789, |
|
"ranking_idealized": 0.6333333253860474, |
|
"ranking_idealized_expo": 0.550000011920929, |
|
"ranking_simple": 0.550000011920929, |
|
"regularize": 0.013666595332324505, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.7951342413983151, |
|
"eval_dpo_loss": 0.6928610801696777, |
|
"eval_logits": -1.9891064167022705, |
|
"eval_logps": -98.42906951904297, |
|
"eval_loss": 0.008886425755918026, |
|
"eval_objective": 0.0089500043541193, |
|
"eval_ranking_idealized": 0.6022099256515503, |
|
"eval_ranking_idealized_expo": 0.5207182168960571, |
|
"eval_ranking_simple": 0.5179557800292969, |
|
"eval_regularize": 0.008947666734457016, |
|
"eval_runtime": 445.1268, |
|
"eval_samples_per_second": 13.008, |
|
"eval_steps_per_second": 3.253, |
|
"step": 1900 |
|
}, |
|
{ |
|
"dpo_loss": 0.6886274218559265, |
|
"epoch": 1.799858278875679, |
|
"grad_norm": 70.48280427657417, |
|
"learning_rate": 2.9997128162981835e-09, |
|
"logits": -2.061340808868408, |
|
"logps": -92.88438415527344, |
|
"loss": 0.0124, |
|
"objective": 0.011484592221677303, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5666666626930237, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.01148195844143629, |
|
"step": 1905 |
|
}, |
|
{ |
|
"dpo_loss": 0.6910132169723511, |
|
"epoch": 1.8045823163530432, |
|
"grad_norm": 69.16766112778924, |
|
"learning_rate": 2.8605862742611453e-09, |
|
"logits": -2.2064709663391113, |
|
"logps": -93.21188354492188, |
|
"loss": 0.0131, |
|
"objective": 0.013887956738471985, |
|
"ranking_idealized": 0.6333333253860474, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.5, |
|
"regularize": 0.013886776752769947, |
|
"step": 1910 |
|
}, |
|
{ |
|
"dpo_loss": 0.6885823011398315, |
|
"epoch": 1.809306353830407, |
|
"grad_norm": 60.392467741599056, |
|
"learning_rate": 2.724668125367896e-09, |
|
"logits": -2.063930034637451, |
|
"logps": -93.89399719238281, |
|
"loss": 0.0118, |
|
"objective": 0.012090322561562061, |
|
"ranking_idealized": 0.5, |
|
"ranking_idealized_expo": 0.46666666865348816, |
|
"ranking_simple": 0.46666666865348816, |
|
"regularize": 0.012088621035218239, |
|
"step": 1915 |
|
}, |
|
{ |
|
"dpo_loss": 0.691336452960968, |
|
"epoch": 1.8140303913077709, |
|
"grad_norm": 64.14689736170588, |
|
"learning_rate": 2.591967620451707e-09, |
|
"logits": -2.2195615768432617, |
|
"logps": -95.23002624511719, |
|
"loss": 0.0126, |
|
"objective": 0.011388556100428104, |
|
"ranking_idealized": 0.6499999761581421, |
|
"ranking_idealized_expo": 0.550000011920929, |
|
"ranking_simple": 0.550000011920929, |
|
"regularize": 0.011385311372578144, |
|
"step": 1920 |
|
}, |
|
{ |
|
"dpo_loss": 0.6888495683670044, |
|
"epoch": 1.8187544287851352, |
|
"grad_norm": 61.363679552810055, |
|
"learning_rate": 2.462493791347231e-09, |
|
"logits": -2.1906003952026367, |
|
"logps": -93.10894012451172, |
|
"loss": 0.0109, |
|
"objective": 0.012360634282231331, |
|
"ranking_idealized": 0.6666666865348816, |
|
"ranking_idealized_expo": 0.5666666626930237, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.012358280830085278, |
|
"step": 1925 |
|
}, |
|
{ |
|
"dpo_loss": 0.6920725107192993, |
|
"epoch": 1.823478466262499, |
|
"grad_norm": 62.3361060804096, |
|
"learning_rate": 2.3362554502757536e-09, |
|
"logits": -2.174255132675171, |
|
"logps": -91.97770690917969, |
|
"loss": 0.0133, |
|
"objective": 0.0109206298366189, |
|
"ranking_idealized": 0.6499999761581421, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 0.010918223299086094, |
|
"step": 1930 |
|
}, |
|
{ |
|
"dpo_loss": 0.6925593614578247, |
|
"epoch": 1.828202503739863, |
|
"grad_norm": 62.11529994850697, |
|
"learning_rate": 2.213261189245458e-09, |
|
"logits": -2.0892937183380127, |
|
"logps": -95.28164672851562, |
|
"loss": 0.0128, |
|
"objective": 0.0141153484582901, |
|
"ranking_idealized": 0.7166666388511658, |
|
"ranking_idealized_expo": 0.6166666746139526, |
|
"ranking_simple": 0.6333333253860474, |
|
"regularize": 0.01411362923681736, |
|
"step": 1935 |
|
}, |
|
{ |
|
"dpo_loss": 0.6907196044921875, |
|
"epoch": 1.832926541217227, |
|
"grad_norm": 75.31073605655762, |
|
"learning_rate": 2.093519379466602e-09, |
|
"logits": -2.219423770904541, |
|
"logps": -93.87139129638672, |
|
"loss": 0.0136, |
|
"objective": 0.012250066734850407, |
|
"ranking_idealized": 0.6666666865348816, |
|
"ranking_idealized_expo": 0.6333333253860474, |
|
"ranking_simple": 0.6333333253860474, |
|
"regularize": 0.01224894542247057, |
|
"step": 1940 |
|
}, |
|
{ |
|
"dpo_loss": 0.6914030909538269, |
|
"epoch": 1.837650578694591, |
|
"grad_norm": 59.36851585608135, |
|
"learning_rate": 1.9770381707817696e-09, |
|
"logits": -2.181976318359375, |
|
"logps": -89.93838500976562, |
|
"loss": 0.014, |
|
"objective": 0.01064326148480177, |
|
"ranking_idealized": 0.7333333492279053, |
|
"ranking_idealized_expo": 0.6000000238418579, |
|
"ranking_simple": 0.6000000238418579, |
|
"regularize": 0.010639534331858158, |
|
"step": 1945 |
|
}, |
|
{ |
|
"dpo_loss": 0.6927531361579895, |
|
"epoch": 1.842374616171955, |
|
"grad_norm": 62.813994604552214, |
|
"learning_rate": 1.8638254911111816e-09, |
|
"logits": -2.1365416049957275, |
|
"logps": -93.81517028808594, |
|
"loss": 0.0133, |
|
"objective": 0.016254087910056114, |
|
"ranking_idealized": 0.6499999761581421, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 0.01625274494290352, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 1.842374616171955, |
|
"eval_dpo_loss": 0.6928887367248535, |
|
"eval_logits": -1.9892430305480957, |
|
"eval_logps": -98.45298767089844, |
|
"eval_loss": 0.008893881924450397, |
|
"eval_objective": 0.008952551521360874, |
|
"eval_ranking_idealized": 0.6022099256515503, |
|
"eval_ranking_idealized_expo": 0.5207182168960571, |
|
"eval_ranking_simple": 0.5179557800292969, |
|
"eval_regularize": 0.008950230665504932, |
|
"eval_runtime": 445.189, |
|
"eval_samples_per_second": 13.006, |
|
"eval_steps_per_second": 3.253, |
|
"step": 1950 |
|
}, |
|
{ |
|
"dpo_loss": 0.6905800104141235, |
|
"epoch": 1.847098653649319, |
|
"grad_norm": 62.33667060485866, |
|
"learning_rate": 1.7538890459131094e-09, |
|
"logits": -2.0535969734191895, |
|
"logps": -92.17353057861328, |
|
"loss": 0.0125, |
|
"objective": 0.0103539377450943, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.5333333611488342, |
|
"regularize": 0.010349558666348457, |
|
"step": 1955 |
|
}, |
|
{ |
|
"dpo_loss": 0.6896222233772278, |
|
"epoch": 1.8518226911266829, |
|
"grad_norm": 63.19798951059373, |
|
"learning_rate": 1.647236317659423e-09, |
|
"logits": -2.209256172180176, |
|
"logps": -90.65841674804688, |
|
"loss": 0.0117, |
|
"objective": 0.012652536854147911, |
|
"ranking_idealized": 0.699999988079071, |
|
"ranking_idealized_expo": 0.5833333134651184, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.012651127763092518, |
|
"step": 1960 |
|
}, |
|
{ |
|
"dpo_loss": 0.6895377039909363, |
|
"epoch": 1.856546728604047, |
|
"grad_norm": 72.48024831454246, |
|
"learning_rate": 1.5438745653263086e-09, |
|
"logits": -2.0985636711120605, |
|
"logps": -91.15563201904297, |
|
"loss": 0.0112, |
|
"objective": 0.009469871409237385, |
|
"ranking_idealized": 0.6499999761581421, |
|
"ranking_idealized_expo": 0.5833333134651184, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.009467961266636848, |
|
"step": 1965 |
|
}, |
|
{ |
|
"dpo_loss": 0.6904885172843933, |
|
"epoch": 1.861270766081411, |
|
"grad_norm": 58.60723650611681, |
|
"learning_rate": 1.4438108239002322e-09, |
|
"logits": -2.138218641281128, |
|
"logps": -93.22808074951172, |
|
"loss": 0.013, |
|
"objective": 0.013735007494688034, |
|
"ranking_idealized": 0.5666666626930237, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 0.01373241189867258, |
|
"step": 1970 |
|
}, |
|
{ |
|
"dpo_loss": 0.6905199885368347, |
|
"epoch": 1.8659948035587748, |
|
"grad_norm": 75.78083265362716, |
|
"learning_rate": 1.3470519038991268e-09, |
|
"logits": -2.1427910327911377, |
|
"logps": -94.4820556640625, |
|
"loss": 0.0133, |
|
"objective": 0.016799870878458023, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5666666626930237, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.016797440126538277, |
|
"step": 1975 |
|
}, |
|
{ |
|
"dpo_loss": 0.692724347114563, |
|
"epoch": 1.8707188410361388, |
|
"grad_norm": 67.62639102220639, |
|
"learning_rate": 1.253604390908819e-09, |
|
"logits": -2.068427562713623, |
|
"logps": -91.92679595947266, |
|
"loss": 0.0118, |
|
"objective": 0.012069400399923325, |
|
"ranking_idealized": 0.6333333253860474, |
|
"ranking_idealized_expo": 0.5833333134651184, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.012066869996488094, |
|
"step": 1980 |
|
}, |
|
{ |
|
"dpo_loss": 0.6905953288078308, |
|
"epoch": 1.8754428785135029, |
|
"grad_norm": 67.47097990851285, |
|
"learning_rate": 1.1634746451348487e-09, |
|
"logits": -2.1265487670898438, |
|
"logps": -91.03251647949219, |
|
"loss": 0.0117, |
|
"objective": 0.007730665151029825, |
|
"ranking_idealized": 0.5666666626930237, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.4833333194255829, |
|
"regularize": 0.007728379685431719, |
|
"step": 1985 |
|
}, |
|
{ |
|
"dpo_loss": 0.6893682479858398, |
|
"epoch": 1.8801669159908667, |
|
"grad_norm": 66.59499294449671, |
|
"learning_rate": 1.0766688009695545e-09, |
|
"logits": -2.200402021408081, |
|
"logps": -91.0616226196289, |
|
"loss": 0.0121, |
|
"objective": 0.011565645225346088, |
|
"ranking_idealized": 0.699999988079071, |
|
"ranking_idealized_expo": 0.6499999761581421, |
|
"ranking_simple": 0.6499999761581421, |
|
"regularize": 0.01156419888138771, |
|
"step": 1990 |
|
}, |
|
{ |
|
"dpo_loss": 0.6916558742523193, |
|
"epoch": 1.884890953468231, |
|
"grad_norm": 59.74243706815504, |
|
"learning_rate": 9.931927665745521e-10, |
|
"logits": -2.1524574756622314, |
|
"logps": -90.13037109375, |
|
"loss": 0.0111, |
|
"objective": 0.012568699195981026, |
|
"ranking_idealized": 0.550000011920929, |
|
"ranking_idealized_expo": 0.4000000059604645, |
|
"ranking_simple": 0.4000000059604645, |
|
"regularize": 0.012566999532282352, |
|
"step": 1995 |
|
}, |
|
{ |
|
"dpo_loss": 0.6858018636703491, |
|
"epoch": 1.8896149909455948, |
|
"grad_norm": 67.18519308744443, |
|
"learning_rate": 9.130522234786498e-10, |
|
"logits": -2.1586594581604004, |
|
"logps": -87.96702575683594, |
|
"loss": 0.012, |
|
"objective": 0.01719100959599018, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5666666626930237, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.017189564183354378, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.8896149909455948, |
|
"eval_dpo_loss": 0.6928969025611877, |
|
"eval_logits": -1.9894063472747803, |
|
"eval_logps": -98.4583511352539, |
|
"eval_loss": 0.008732160553336143, |
|
"eval_objective": 0.008810975588858128, |
|
"eval_ranking_idealized": 0.6022099256515503, |
|
"eval_ranking_idealized_expo": 0.5207182168960571, |
|
"eval_ranking_simple": 0.5179557800292969, |
|
"eval_regularize": 0.008808653801679611, |
|
"eval_runtime": 446.6812, |
|
"eval_samples_per_second": 12.962, |
|
"eval_steps_per_second": 3.242, |
|
"step": 2000 |
|
}, |
|
{ |
|
"dpo_loss": 0.6894116997718811, |
|
"epoch": 1.8943390284229589, |
|
"grad_norm": 63.88750093236265, |
|
"learning_rate": 8.36252626191103e-10, |
|
"logits": -2.1512064933776855, |
|
"logps": -90.55884552001953, |
|
"loss": 0.0112, |
|
"objective": 0.011560056358575821, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.4333333373069763, |
|
"ranking_simple": 0.4333333373069763, |
|
"regularize": 0.011556769721210003, |
|
"step": 2005 |
|
}, |
|
{ |
|
"dpo_loss": 0.691102147102356, |
|
"epoch": 1.899063065900323, |
|
"grad_norm": 66.23415561126623, |
|
"learning_rate": 7.627992018304163e-10, |
|
"logits": -2.1225428581237793, |
|
"logps": -90.3371810913086, |
|
"loss": 0.012, |
|
"objective": 0.009777167811989784, |
|
"ranking_idealized": 0.5333333611488342, |
|
"ranking_idealized_expo": 0.38333332538604736, |
|
"ranking_simple": 0.38333332538604736, |
|
"regularize": 0.009775066748261452, |
|
"step": 2010 |
|
}, |
|
{ |
|
"dpo_loss": 0.6881377100944519, |
|
"epoch": 1.9037871033776868, |
|
"grad_norm": 69.19769212044956, |
|
"learning_rate": 6.926969497685397e-10, |
|
"logits": -2.1484780311584473, |
|
"logps": -90.53960418701172, |
|
"loss": 0.0128, |
|
"objective": 0.011675039306282997, |
|
"ranking_idealized": 0.550000011920929, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 0.01167262066155672, |
|
"step": 2015 |
|
}, |
|
{ |
|
"dpo_loss": 0.6911852955818176, |
|
"epoch": 1.9085111408550508, |
|
"grad_norm": 54.24867081294276, |
|
"learning_rate": 6.259506412906401e-10, |
|
"logits": -2.036067008972168, |
|
"logps": -94.92063903808594, |
|
"loss": 0.0114, |
|
"objective": 0.011328586377203465, |
|
"ranking_idealized": 0.4833333194255829, |
|
"ranking_idealized_expo": 0.44999998807907104, |
|
"ranking_simple": 0.44999998807907104, |
|
"regularize": 0.011327385902404785, |
|
"step": 2020 |
|
}, |
|
{ |
|
"dpo_loss": 0.6897438168525696, |
|
"epoch": 1.9132351783324149, |
|
"grad_norm": 66.21589409339711, |
|
"learning_rate": 5.625648192703114e-10, |
|
"logits": -2.100722074508667, |
|
"logps": -88.59317779541016, |
|
"loss": 0.0111, |
|
"objective": 0.01007751189172268, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.550000011920929, |
|
"ranking_simple": 0.550000011920929, |
|
"regularize": 0.010075613856315613, |
|
"step": 2025 |
|
}, |
|
{ |
|
"dpo_loss": 0.6879320740699768, |
|
"epoch": 1.9179592158097787, |
|
"grad_norm": 97.2021444500617, |
|
"learning_rate": 5.025437978604219e-10, |
|
"logits": -2.174182653427124, |
|
"logps": -98.01433563232422, |
|
"loss": 0.0128, |
|
"objective": 0.015440816059708595, |
|
"ranking_idealized": 0.699999988079071, |
|
"ranking_idealized_expo": 0.6333333253860474, |
|
"ranking_simple": 0.6333333253860474, |
|
"regularize": 0.01543727982789278, |
|
"step": 2030 |
|
}, |
|
{ |
|
"dpo_loss": 0.6889265775680542, |
|
"epoch": 1.9226832532871427, |
|
"grad_norm": 60.36698105450304, |
|
"learning_rate": 4.458916621994713e-10, |
|
"logits": -2.1146934032440186, |
|
"logps": -96.32176208496094, |
|
"loss": 0.013, |
|
"objective": 0.012396620586514473, |
|
"ranking_idealized": 0.7833333611488342, |
|
"ranking_idealized_expo": 0.5833333134651184, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.012394459918141365, |
|
"step": 2035 |
|
}, |
|
{ |
|
"dpo_loss": 0.6921891570091248, |
|
"epoch": 1.9274072907645068, |
|
"grad_norm": 62.70952327603329, |
|
"learning_rate": 3.9261226813353533e-10, |
|
"logits": -2.2713499069213867, |
|
"logps": -94.95305633544922, |
|
"loss": 0.0111, |
|
"objective": 0.012650835327804089, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 0.012648210860788822, |
|
"step": 2040 |
|
}, |
|
{ |
|
"dpo_loss": 0.6918179988861084, |
|
"epoch": 1.9321313282418706, |
|
"grad_norm": 60.71087366165103, |
|
"learning_rate": 3.4270924195384246e-10, |
|
"logits": -2.072065830230713, |
|
"logps": -93.6068344116211, |
|
"loss": 0.0112, |
|
"objective": 0.010315236635506153, |
|
"ranking_idealized": 0.5333333611488342, |
|
"ranking_idealized_expo": 0.46666666865348816, |
|
"ranking_simple": 0.46666666865348816, |
|
"regularize": 0.010312527418136597, |
|
"step": 2045 |
|
}, |
|
{ |
|
"dpo_loss": 0.6912323236465454, |
|
"epoch": 1.9368553657192347, |
|
"grad_norm": 66.0225333978305, |
|
"learning_rate": 2.9618598014997107e-10, |
|
"logits": -2.1574909687042236, |
|
"logps": -93.39547729492188, |
|
"loss": 0.0119, |
|
"objective": 0.012261408381164074, |
|
"ranking_idealized": 0.5, |
|
"ranking_idealized_expo": 0.4166666567325592, |
|
"ranking_simple": 0.4166666567325592, |
|
"regularize": 0.012259239330887794, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 1.9368553657192347, |
|
"eval_dpo_loss": 0.6928916573524475, |
|
"eval_logits": -1.989404320716858, |
|
"eval_logps": -98.45708465576172, |
|
"eval_loss": 0.008758697658777237, |
|
"eval_objective": 0.008833469823002815, |
|
"eval_ranking_idealized": 0.6022099256515503, |
|
"eval_ranking_idealized_expo": 0.5207182168960571, |
|
"eval_ranking_simple": 0.5179557800292969, |
|
"eval_regularize": 0.008830932900309563, |
|
"eval_runtime": 446.0684, |
|
"eval_samples_per_second": 12.98, |
|
"eval_steps_per_second": 3.246, |
|
"step": 2050 |
|
}, |
|
{ |
|
"dpo_loss": 0.6905261874198914, |
|
"epoch": 1.9415794031965987, |
|
"grad_norm": 72.43113470913532, |
|
"learning_rate": 2.5304564917865145e-10, |
|
"logits": -2.20497465133667, |
|
"logps": -93.12974548339844, |
|
"loss": 0.0106, |
|
"objective": 0.011251457966864109, |
|
"ranking_idealized": 0.7333333492279053, |
|
"ranking_idealized_expo": 0.6833333373069763, |
|
"ranking_simple": 0.6833333373069763, |
|
"regularize": 0.011249854229390621, |
|
"step": 2055 |
|
}, |
|
{ |
|
"dpo_loss": 0.6920349597930908, |
|
"epoch": 1.9463034406739625, |
|
"grad_norm": 63.18535597705442, |
|
"learning_rate": 2.132911852482766e-10, |
|
"logits": -2.2412173748016357, |
|
"logps": -90.34603881835938, |
|
"loss": 0.0125, |
|
"objective": 0.013630078174173832, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.550000011920929, |
|
"ranking_simple": 0.550000011920929, |
|
"regularize": 0.013628335669636726, |
|
"step": 2060 |
|
}, |
|
{ |
|
"dpo_loss": 0.691752016544342, |
|
"epoch": 1.9510274781513268, |
|
"grad_norm": 69.39489329530257, |
|
"learning_rate": 1.7692529411904578e-10, |
|
"logits": -2.248879909515381, |
|
"logps": -96.89351654052734, |
|
"loss": 0.012, |
|
"objective": 0.011724698357284069, |
|
"ranking_idealized": 0.6499999761581421, |
|
"ranking_idealized_expo": 0.5666666626930237, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.011722984723746777, |
|
"step": 2065 |
|
}, |
|
{ |
|
"dpo_loss": 0.6898305416107178, |
|
"epoch": 1.9557515156286907, |
|
"grad_norm": 64.01320477510419, |
|
"learning_rate": 1.4395045091880608e-10, |
|
"logits": -2.133453845977783, |
|
"logps": -89.6153564453125, |
|
"loss": 0.0122, |
|
"objective": 0.013158326968550682, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.5333333611488342, |
|
"regularize": 0.013156161643564701, |
|
"step": 2070 |
|
}, |
|
{ |
|
"dpo_loss": 0.6894416213035583, |
|
"epoch": 1.9604755531060545, |
|
"grad_norm": 70.45537055239048, |
|
"learning_rate": 1.1436889997460397e-10, |
|
"logits": -2.1818015575408936, |
|
"logps": -87.75623321533203, |
|
"loss": 0.0122, |
|
"objective": 0.01147166732698679, |
|
"ranking_idealized": 0.5666666626930237, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.5333333611488342, |
|
"regularize": 0.0114695830270648, |
|
"step": 2075 |
|
}, |
|
{ |
|
"dpo_loss": 0.689843475818634, |
|
"epoch": 1.9651995905834188, |
|
"grad_norm": 72.42995706576724, |
|
"learning_rate": 8.818265465991292e-11, |
|
"logits": -2.1221158504486084, |
|
"logps": -92.03446960449219, |
|
"loss": 0.0116, |
|
"objective": 0.01287260465323925, |
|
"ranking_idealized": 0.6666666865348816, |
|
"ranking_idealized_expo": 0.6000000238418579, |
|
"ranking_simple": 0.6000000238418579, |
|
"regularize": 0.012871033512055874, |
|
"step": 2080 |
|
}, |
|
{ |
|
"dpo_loss": 0.6901150345802307, |
|
"epoch": 1.9699236280607826, |
|
"grad_norm": 67.99462621349231, |
|
"learning_rate": 6.539349725760423e-11, |
|
"logits": -2.2134578227996826, |
|
"logps": -94.01133728027344, |
|
"loss": 0.0138, |
|
"objective": 0.014971112832427025, |
|
"ranking_idealized": 0.6333333253860474, |
|
"ranking_idealized_expo": 0.5666666626930237, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.014969981275498867, |
|
"step": 2085 |
|
}, |
|
{ |
|
"dpo_loss": 0.6904923915863037, |
|
"epoch": 1.9746476655381466, |
|
"grad_norm": 64.88891143293061, |
|
"learning_rate": 4.600297883866067e-11, |
|
"logits": -2.1357476711273193, |
|
"logps": -88.28942108154297, |
|
"loss": 0.0103, |
|
"objective": 0.008000458590686321, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.5, |
|
"regularize": 0.00799502618610859, |
|
"step": 2090 |
|
}, |
|
{ |
|
"dpo_loss": 0.691422700881958, |
|
"epoch": 1.9793717030155107, |
|
"grad_norm": 59.76257934662463, |
|
"learning_rate": 3.0012419156572044e-11, |
|
"logits": -2.106537103652954, |
|
"logps": -92.17442321777344, |
|
"loss": 0.0128, |
|
"objective": 0.012720795348286629, |
|
"ranking_idealized": 0.6499999761581421, |
|
"ranking_idealized_expo": 0.5666666626930237, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.0127179604023695, |
|
"step": 2095 |
|
}, |
|
{ |
|
"dpo_loss": 0.6881771087646484, |
|
"epoch": 1.9840957404928745, |
|
"grad_norm": 65.98055833646369, |
|
"learning_rate": 1.7422906557557073e-11, |
|
"logits": -2.096862316131592, |
|
"logps": -96.3370132446289, |
|
"loss": 0.0116, |
|
"objective": 0.01271964143961668, |
|
"ranking_idealized": 0.550000011920929, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.4833333194255829, |
|
"regularize": 0.01271754689514637, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.9840957404928745, |
|
"eval_dpo_loss": 0.6928920149803162, |
|
"eval_logits": -1.9894039630889893, |
|
"eval_logps": -98.45726013183594, |
|
"eval_loss": 0.008759252727031708, |
|
"eval_objective": 0.008833796717226505, |
|
"eval_ranking_idealized": 0.6022099256515503, |
|
"eval_ranking_idealized_expo": 0.5207182168960571, |
|
"eval_ranking_simple": 0.5179557800292969, |
|
"eval_regularize": 0.008831293322145939, |
|
"eval_runtime": 446.6316, |
|
"eval_samples_per_second": 12.964, |
|
"eval_steps_per_second": 3.242, |
|
"step": 2100 |
|
}, |
|
{ |
|
"dpo_loss": 0.69146329164505, |
|
"epoch": 1.9888197779702386, |
|
"grad_norm": 64.49594040581191, |
|
"learning_rate": 8.235297906444837e-12, |
|
"logits": -2.149221181869507, |
|
"logps": -88.66574096679688, |
|
"loss": 0.0128, |
|
"objective": 0.01396742183715105, |
|
"ranking_idealized": 0.5666666626930237, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.5, |
|
"regularize": 0.013964297249913216, |
|
"step": 2105 |
|
}, |
|
{ |
|
"dpo_loss": 0.6907679438591003, |
|
"epoch": 1.9935438154476026, |
|
"grad_norm": 70.66784622308231, |
|
"learning_rate": 2.450218528377013e-12, |
|
"logits": -2.137131452560425, |
|
"logps": -93.39989471435547, |
|
"loss": 0.0112, |
|
"objective": 0.010235412046313286, |
|
"ranking_idealized": 0.550000011920929, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.46666666865348816, |
|
"regularize": 0.010233612731099129, |
|
"step": 2110 |
|
}, |
|
{ |
|
"dpo_loss": 0.6910449266433716, |
|
"epoch": 1.9982678529249664, |
|
"grad_norm": 60.167547577945186, |
|
"learning_rate": 6.806216624188899e-14, |
|
"logits": -2.124387502670288, |
|
"logps": -93.0544204711914, |
|
"loss": 0.014, |
|
"objective": 0.009751829318702221, |
|
"ranking_idealized": 0.5666666626930237, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.4833333194255829, |
|
"regularize": 0.009749443270266056, |
|
"step": 2115 |
|
}, |
|
{ |
|
"epoch": 1.9992126604204392, |
|
"step": 2116, |
|
"total_flos": 0.0, |
|
"train_loss": 0.020640970417914562, |
|
"train_runtime": 38688.7685, |
|
"train_samples_per_second": 2.626, |
|
"train_steps_per_second": 0.055 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 2116, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": false, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|