|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.9936102236421727, |
|
"eval_steps": 10000, |
|
"global_step": 312, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.06389776357827476, |
|
"grad_norm": 0.341777060295131, |
|
"learning_rate": 9.375e-07, |
|
"log_odds_chosen": 0.05486620217561722, |
|
"log_odds_ratio": -0.6975381374359131, |
|
"logits/chosen": 33.646114349365234, |
|
"logits/rejected": 33.55038833618164, |
|
"logps/chosen": -0.9772504568099976, |
|
"logps/rejected": -1.0204073190689087, |
|
"loss": 1.6735, |
|
"nll_loss": 1.5879063606262207, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.09772505611181259, |
|
"rewards/margins": 0.004315672442317009, |
|
"rewards/rejected": -0.10204073041677475, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.12779552715654952, |
|
"grad_norm": 0.357474094120966, |
|
"learning_rate": 1.875e-06, |
|
"log_odds_chosen": 0.054913729429244995, |
|
"log_odds_ratio": -0.6985839009284973, |
|
"logits/chosen": 34.386383056640625, |
|
"logits/rejected": 33.98283386230469, |
|
"logps/chosen": -0.9762754440307617, |
|
"logps/rejected": -1.0154238939285278, |
|
"loss": 1.668, |
|
"nll_loss": 1.6016145944595337, |
|
"rewards/accuracies": 0.515625, |
|
"rewards/chosen": -0.09762755781412125, |
|
"rewards/margins": 0.003914830274879932, |
|
"rewards/rejected": -0.10154237598180771, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.19169329073482427, |
|
"grad_norm": 0.3765211315354108, |
|
"learning_rate": 2.8125e-06, |
|
"log_odds_chosen": 0.06909728795289993, |
|
"log_odds_ratio": -0.6911064386367798, |
|
"logits/chosen": 33.32851028442383, |
|
"logits/rejected": 32.749427795410156, |
|
"logps/chosen": -0.9813788533210754, |
|
"logps/rejected": -1.0349732637405396, |
|
"loss": 1.6757, |
|
"nll_loss": 1.6085166931152344, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.09813789278268814, |
|
"rewards/margins": 0.005359448026865721, |
|
"rewards/rejected": -0.10349734127521515, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.25559105431309903, |
|
"grad_norm": 0.39833625263223615, |
|
"learning_rate": 2.993961440992859e-06, |
|
"log_odds_chosen": 0.11841567605733871, |
|
"log_odds_ratio": -0.6757725477218628, |
|
"logits/chosen": 33.839942932128906, |
|
"logits/rejected": 33.162986755371094, |
|
"logps/chosen": -0.9702426791191101, |
|
"logps/rejected": -1.0581519603729248, |
|
"loss": 1.664, |
|
"nll_loss": 1.6022984981536865, |
|
"rewards/accuracies": 0.6031249761581421, |
|
"rewards/chosen": -0.09702426195144653, |
|
"rewards/margins": 0.008790932595729828, |
|
"rewards/rejected": -0.10581519454717636, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.3194888178913738, |
|
"grad_norm": 0.45952260963075464, |
|
"learning_rate": 2.9695130976348534e-06, |
|
"log_odds_chosen": 0.09267185628414154, |
|
"log_odds_ratio": -0.684535801410675, |
|
"logits/chosen": 34.052207946777344, |
|
"logits/rejected": 32.802547454833984, |
|
"logps/chosen": -0.9801093935966492, |
|
"logps/rejected": -1.0446739196777344, |
|
"loss": 1.6582, |
|
"nll_loss": 1.5952296257019043, |
|
"rewards/accuracies": 0.565625011920929, |
|
"rewards/chosen": -0.09801094233989716, |
|
"rewards/margins": 0.0064564356580376625, |
|
"rewards/rejected": -0.10446737706661224, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.38338658146964855, |
|
"grad_norm": 0.49504068932785444, |
|
"learning_rate": 2.9265847744427307e-06, |
|
"log_odds_chosen": 0.06481704860925674, |
|
"log_odds_ratio": -0.698478102684021, |
|
"logits/chosen": 32.32544708251953, |
|
"logits/rejected": 32.51976776123047, |
|
"logps/chosen": -0.9770153164863586, |
|
"logps/rejected": -1.0272716283798218, |
|
"loss": 1.6387, |
|
"nll_loss": 1.572589635848999, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.0977015346288681, |
|
"rewards/margins": 0.0050256275571882725, |
|
"rewards/rejected": -0.10272715240716934, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.4472843450479233, |
|
"grad_norm": 0.5262433549192981, |
|
"learning_rate": 2.865716319988224e-06, |
|
"log_odds_chosen": 0.030134931206703186, |
|
"log_odds_ratio": -0.7101849317550659, |
|
"logits/chosen": 33.02894973754883, |
|
"logits/rejected": 33.309715270996094, |
|
"logps/chosen": -0.9551456570625305, |
|
"logps/rejected": -0.9776851534843445, |
|
"loss": 1.6289, |
|
"nll_loss": 1.5476250648498535, |
|
"rewards/accuracies": 0.5531250238418579, |
|
"rewards/chosen": -0.09551456570625305, |
|
"rewards/margins": 0.0022539461497217417, |
|
"rewards/rejected": -0.09776850789785385, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.5111821086261981, |
|
"grad_norm": 0.5188886669640671, |
|
"learning_rate": 2.7876731904027993e-06, |
|
"log_odds_chosen": 0.08591620624065399, |
|
"log_odds_ratio": -0.6787055730819702, |
|
"logits/chosen": 34.61581039428711, |
|
"logits/rejected": 34.63213348388672, |
|
"logps/chosen": -0.9702480435371399, |
|
"logps/rejected": -1.0235538482666016, |
|
"loss": 1.5921, |
|
"nll_loss": 1.5600519180297852, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.0970248132944107, |
|
"rewards/margins": 0.005330582614988089, |
|
"rewards/rejected": -0.10235539823770523, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.5750798722044729, |
|
"grad_norm": 0.5939864943475555, |
|
"learning_rate": 2.6934368233226715e-06, |
|
"log_odds_chosen": 0.0763852447271347, |
|
"log_odds_ratio": -0.6891772150993347, |
|
"logits/chosen": 34.269248962402344, |
|
"logits/rejected": 34.00069046020508, |
|
"logps/chosen": -0.9529930949211121, |
|
"logps/rejected": -1.0018848180770874, |
|
"loss": 1.586, |
|
"nll_loss": 1.5087509155273438, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.09529931843280792, |
|
"rewards/margins": 0.004889167379587889, |
|
"rewards/rejected": -0.1001884788274765, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.6389776357827476, |
|
"grad_norm": 0.6337044866684244, |
|
"learning_rate": 2.584192295741087e-06, |
|
"log_odds_chosen": 0.057517312467098236, |
|
"log_odds_ratio": -0.7008931040763855, |
|
"logits/chosen": 32.44559860229492, |
|
"logits/rejected": 32.7852783203125, |
|
"logps/chosen": -0.9386932253837585, |
|
"logps/rejected": -0.9813023805618286, |
|
"loss": 1.5499, |
|
"nll_loss": 1.4884004592895508, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.09386932104825974, |
|
"rewards/margins": 0.004260920453816652, |
|
"rewards/rejected": -0.0981302410364151, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.7028753993610224, |
|
"grad_norm": 0.658610355052368, |
|
"learning_rate": 2.461313420977536e-06, |
|
"log_odds_chosen": 0.06078052520751953, |
|
"log_odds_ratio": -0.694057822227478, |
|
"logits/chosen": 34.87937927246094, |
|
"logits/rejected": 34.10810470581055, |
|
"logps/chosen": -0.9328826665878296, |
|
"logps/rejected": -0.9768407940864563, |
|
"loss": 1.5083, |
|
"nll_loss": 1.4501432180404663, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.09328827261924744, |
|
"rewards/margins": 0.004395808558911085, |
|
"rewards/rejected": -0.09768407791852951, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.7667731629392971, |
|
"grad_norm": 0.7021451570051785, |
|
"learning_rate": 2.3263454721781537e-06, |
|
"log_odds_chosen": 0.012948527932167053, |
|
"log_odds_ratio": -0.7222181558609009, |
|
"logits/chosen": 32.433719635009766, |
|
"logits/rejected": 32.53590774536133, |
|
"logps/chosen": -0.9092488288879395, |
|
"logps/rejected": -0.9225506782531738, |
|
"loss": 1.4673, |
|
"nll_loss": 1.4067564010620117, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.09092487394809723, |
|
"rewards/margins": 0.0013302009319886565, |
|
"rewards/rejected": -0.09225507825613022, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.8306709265175719, |
|
"grad_norm": 0.6227085414802311, |
|
"learning_rate": 2.18098574960932e-06, |
|
"log_odds_chosen": 0.08671535551548004, |
|
"log_odds_ratio": -0.6804493069648743, |
|
"logits/chosen": 33.88345718383789, |
|
"logits/rejected": 33.001914978027344, |
|
"logps/chosen": -0.8342811465263367, |
|
"logps/rejected": -0.8885458707809448, |
|
"loss": 1.3999, |
|
"nll_loss": 1.3158425092697144, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.08342811465263367, |
|
"rewards/margins": 0.005426469258964062, |
|
"rewards/rejected": -0.0888545885682106, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.8945686900958466, |
|
"grad_norm": 0.6264864351444569, |
|
"learning_rate": 2.027062236122014e-06, |
|
"log_odds_chosen": 0.04714164510369301, |
|
"log_odds_ratio": -0.6999929547309875, |
|
"logits/chosen": 31.832406997680664, |
|
"logits/rejected": 31.350921630859375, |
|
"logps/chosen": -0.8387459516525269, |
|
"logps/rejected": -0.876442551612854, |
|
"loss": 1.3491, |
|
"nll_loss": 1.2940082550048828, |
|
"rewards/accuracies": 0.534375011920929, |
|
"rewards/chosen": -0.08387459814548492, |
|
"rewards/margins": 0.0037696503568440676, |
|
"rewards/rejected": -0.08764425665140152, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.9584664536741214, |
|
"grad_norm": 0.5655314348380543, |
|
"learning_rate": 1.866510609206841e-06, |
|
"log_odds_chosen": 0.044148243963718414, |
|
"log_odds_ratio": -0.7019084692001343, |
|
"logits/chosen": 33.6451416015625, |
|
"logits/rejected": 33.04177474975586, |
|
"logps/chosen": -0.8360700607299805, |
|
"logps/rejected": -0.8655373454093933, |
|
"loss": 1.3242, |
|
"nll_loss": 1.2609388828277588, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.08360700309276581, |
|
"rewards/margins": 0.0029467367567121983, |
|
"rewards/rejected": -0.08655373752117157, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.0223642172523961, |
|
"grad_norm": 0.5571454673635674, |
|
"learning_rate": 1.7013498987264833e-06, |
|
"log_odds_chosen": 0.06723789870738983, |
|
"log_odds_ratio": -0.6888397932052612, |
|
"logits/chosen": 33.27342987060547, |
|
"logits/rejected": 32.30434036254883, |
|
"logps/chosen": -0.7994817495346069, |
|
"logps/rejected": -0.8369095921516418, |
|
"loss": 1.2899, |
|
"nll_loss": 1.211586594581604, |
|
"rewards/accuracies": 0.565625011920929, |
|
"rewards/chosen": -0.07994817197322845, |
|
"rewards/margins": 0.0037427886854857206, |
|
"rewards/rejected": -0.08369095623493195, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.0862619808306708, |
|
"grad_norm": 0.5333966997862959, |
|
"learning_rate": 1.5336570964437077e-06, |
|
"log_odds_chosen": 0.038995109498500824, |
|
"log_odds_ratio": -0.6969125866889954, |
|
"logits/chosen": 32.00867462158203, |
|
"logits/rejected": 31.789836883544922, |
|
"logps/chosen": -0.8085753321647644, |
|
"logps/rejected": -0.8331602215766907, |
|
"loss": 1.2863, |
|
"nll_loss": 1.2334020137786865, |
|
"rewards/accuracies": 0.503125011920929, |
|
"rewards/chosen": -0.0808575376868248, |
|
"rewards/margins": 0.0024584876373410225, |
|
"rewards/rejected": -0.08331602811813354, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.1501597444089458, |
|
"grad_norm": 0.5317464882215207, |
|
"learning_rate": 1.3655410366448499e-06, |
|
"log_odds_chosen": 0.10975570976734161, |
|
"log_odds_ratio": -0.6656275987625122, |
|
"logits/chosen": 32.76184844970703, |
|
"logits/rejected": 32.081581115722656, |
|
"logps/chosen": -0.7589792013168335, |
|
"logps/rejected": -0.8160526156425476, |
|
"loss": 1.2451, |
|
"nll_loss": 1.1476625204086304, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.0758979320526123, |
|
"rewards/margins": 0.0057073310017585754, |
|
"rewards/rejected": -0.08160526305437088, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.2140575079872205, |
|
"grad_norm": 0.5186911330716555, |
|
"learning_rate": 1.199115876325091e-06, |
|
"log_odds_chosen": 0.04539128392934799, |
|
"log_odds_ratio": -0.6990563869476318, |
|
"logits/chosen": 33.35085678100586, |
|
"logits/rejected": 33.3908805847168, |
|
"logps/chosen": -0.7675826549530029, |
|
"logps/rejected": -0.7923057675361633, |
|
"loss": 1.2217, |
|
"nll_loss": 1.161102294921875, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.07675826549530029, |
|
"rewards/margins": 0.0024723131209611893, |
|
"rewards/rejected": -0.07923058420419693, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.2779552715654952, |
|
"grad_norm": 0.5411315465201, |
|
"learning_rate": 1.036474508437579e-06, |
|
"log_odds_chosen": 0.07649532705545425, |
|
"log_odds_ratio": -0.6858269572257996, |
|
"logits/chosen": 30.826059341430664, |
|
"logits/rejected": 30.824636459350586, |
|
"logps/chosen": -0.786870002746582, |
|
"logps/rejected": -0.8327391743659973, |
|
"loss": 1.215, |
|
"nll_loss": 1.165637493133545, |
|
"rewards/accuracies": 0.578125, |
|
"rewards/chosen": -0.07868699729442596, |
|
"rewards/margins": 0.004586914554238319, |
|
"rewards/rejected": -0.08327391743659973, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.34185303514377, |
|
"grad_norm": 0.5587119516125117, |
|
"learning_rate": 8.796622425502193e-07, |
|
"log_odds_chosen": 0.04922567307949066, |
|
"log_odds_ratio": -0.6928130984306335, |
|
"logits/chosen": 32.06999969482422, |
|
"logits/rejected": 31.100805282592773, |
|
"logps/chosen": -0.760046124458313, |
|
"logps/rejected": -0.7879078388214111, |
|
"loss": 1.2018, |
|
"nll_loss": 1.1223350763320923, |
|
"rewards/accuracies": 0.5218750238418579, |
|
"rewards/chosen": -0.07600460201501846, |
|
"rewards/margins": 0.00278617930598557, |
|
"rewards/rejected": -0.07879078388214111, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.4057507987220448, |
|
"grad_norm": 0.5532006875711314, |
|
"learning_rate": 7.30651083891141e-07, |
|
"log_odds_chosen": 0.09409201890230179, |
|
"log_odds_ratio": -0.6719040870666504, |
|
"logits/chosen": 31.775470733642578, |
|
"logits/rejected": 31.210159301757812, |
|
"logps/chosen": -0.7408252954483032, |
|
"logps/rejected": -0.7944933772087097, |
|
"loss": 1.1848, |
|
"nll_loss": 1.1082121133804321, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.07408253848552704, |
|
"rewards/margins": 0.005366811528801918, |
|
"rewards/rejected": -0.07944934070110321, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.4696485623003195, |
|
"grad_norm": 0.5478964489714558, |
|
"learning_rate": 5.913149342387704e-07, |
|
"log_odds_chosen": 0.022008871659636497, |
|
"log_odds_ratio": -0.7079219818115234, |
|
"logits/chosen": 30.458974838256836, |
|
"logits/rejected": 30.637874603271484, |
|
"logps/chosen": -0.7661860585212708, |
|
"logps/rejected": -0.783053994178772, |
|
"loss": 1.1753, |
|
"nll_loss": 1.1201671361923218, |
|
"rewards/accuracies": 0.5406249761581421, |
|
"rewards/chosen": -0.07661859691143036, |
|
"rewards/margins": 0.00168679840862751, |
|
"rewards/rejected": -0.07830540090799332, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.5335463258785942, |
|
"grad_norm": 0.5614636317534148, |
|
"learning_rate": 4.63406026519703e-07, |
|
"log_odds_chosen": 0.12717078626155853, |
|
"log_odds_ratio": -0.6581142544746399, |
|
"logits/chosen": 31.024799346923828, |
|
"logits/rejected": 30.477502822875977, |
|
"logps/chosen": -0.7121531367301941, |
|
"logps/rejected": -0.7798753380775452, |
|
"loss": 1.1701, |
|
"nll_loss": 1.073432207107544, |
|
"rewards/accuracies": 0.596875011920929, |
|
"rewards/chosen": -0.07121531665325165, |
|
"rewards/margins": 0.006772211752831936, |
|
"rewards/rejected": -0.07798753678798676, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.5974440894568689, |
|
"grad_norm": 0.5532326330060124, |
|
"learning_rate": 3.4853288946298335e-07, |
|
"log_odds_chosen": 0.031235849484801292, |
|
"log_odds_ratio": -0.7052245140075684, |
|
"logits/chosen": 31.759140014648438, |
|
"logits/rejected": 31.2039737701416, |
|
"logps/chosen": -0.7514272332191467, |
|
"logps/rejected": -0.7712705135345459, |
|
"loss": 1.1628, |
|
"nll_loss": 1.0986436605453491, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.07514272630214691, |
|
"rewards/margins": 0.0019843343179672956, |
|
"rewards/rejected": -0.07712705433368683, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.6613418530351438, |
|
"grad_norm": 0.5579681347407102, |
|
"learning_rate": 2.48140119418046e-07, |
|
"log_odds_chosen": 0.09361070394515991, |
|
"log_odds_ratio": -0.6759673953056335, |
|
"logits/chosen": 30.127365112304688, |
|
"logits/rejected": 29.955150604248047, |
|
"logps/chosen": -0.7377598881721497, |
|
"logps/rejected": -0.789117157459259, |
|
"loss": 1.1595, |
|
"nll_loss": 1.0882426500320435, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.0737759917974472, |
|
"rewards/margins": 0.0051357327029109, |
|
"rewards/rejected": -0.07891170680522919, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.7252396166134185, |
|
"grad_norm": 0.5388500509178578, |
|
"learning_rate": 1.634902137174483e-07, |
|
"log_odds_chosen": 0.06861741840839386, |
|
"log_odds_ratio": -0.6897167563438416, |
|
"logits/chosen": 31.801860809326172, |
|
"logits/rejected": 30.827133178710938, |
|
"logps/chosen": -0.7297223210334778, |
|
"logps/rejected": -0.7675324082374573, |
|
"loss": 1.15, |
|
"nll_loss": 1.091435194015503, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.07297223806381226, |
|
"rewards/margins": 0.003781010629609227, |
|
"rewards/rejected": -0.07675323635339737, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.7891373801916934, |
|
"grad_norm": 0.5597110218019701, |
|
"learning_rate": 9.564769404039419e-08, |
|
"log_odds_chosen": 0.10912013053894043, |
|
"log_odds_ratio": -0.6706396341323853, |
|
"logits/chosen": 30.64389419555664, |
|
"logits/rejected": 30.014019012451172, |
|
"logps/chosen": -0.7340375185012817, |
|
"logps/rejected": -0.7948423624038696, |
|
"loss": 1.1518, |
|
"nll_loss": 1.0911478996276855, |
|
"rewards/accuracies": 0.590624988079071, |
|
"rewards/chosen": -0.0734037533402443, |
|
"rewards/margins": 0.00608047703281045, |
|
"rewards/rejected": -0.0794842392206192, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.8530351437699681, |
|
"grad_norm": 0.5432963139659418, |
|
"learning_rate": 4.546571943496969e-08, |
|
"log_odds_chosen": 0.071634940803051, |
|
"log_odds_ratio": -0.6880494356155396, |
|
"logits/chosen": 31.523670196533203, |
|
"logits/rejected": 31.051345825195312, |
|
"logps/chosen": -0.7210476398468018, |
|
"logps/rejected": -0.7614242434501648, |
|
"loss": 1.1457, |
|
"nll_loss": 1.078147292137146, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.07210476696491241, |
|
"rewards/margins": 0.0040376619435846806, |
|
"rewards/rejected": -0.07614242285490036, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.9169329073482428, |
|
"grad_norm": 0.5550233900085839, |
|
"learning_rate": 1.357535734809795e-08, |
|
"log_odds_chosen": 0.08968226611614227, |
|
"log_odds_ratio": -0.6760362386703491, |
|
"logits/chosen": 30.686954498291016, |
|
"logits/rejected": 29.28194236755371, |
|
"logps/chosen": -0.7248884439468384, |
|
"logps/rejected": -0.7753577828407288, |
|
"loss": 1.1474, |
|
"nll_loss": 1.0696710348129272, |
|
"rewards/accuracies": 0.5531250238418579, |
|
"rewards/chosen": -0.07248884439468384, |
|
"rewards/margins": 0.005046932026743889, |
|
"rewards/rejected": -0.07753578573465347, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.9808306709265175, |
|
"grad_norm": 0.49758643306678957, |
|
"learning_rate": 3.77647586240204e-10, |
|
"log_odds_chosen": 0.048125725239515305, |
|
"log_odds_ratio": -0.6949858665466309, |
|
"logits/chosen": 30.8554744720459, |
|
"logits/rejected": 30.65484046936035, |
|
"logps/chosen": -0.7389064431190491, |
|
"logps/rejected": -0.7689411640167236, |
|
"loss": 1.1435, |
|
"nll_loss": 1.0751014947891235, |
|
"rewards/accuracies": 0.534375011920929, |
|
"rewards/chosen": -0.07389064878225327, |
|
"rewards/margins": 0.0030034759547561407, |
|
"rewards/rejected": -0.0768941268324852, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.9936102236421727, |
|
"step": 312, |
|
"total_flos": 0.0, |
|
"train_loss": 1.3676127867820935, |
|
"train_runtime": 5524.9246, |
|
"train_samples_per_second": 7.24, |
|
"train_steps_per_second": 0.056 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 312, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|