|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.998691442030882, |
|
"eval_steps": 500, |
|
"global_step": 477, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.010468463752944255, |
|
"grad_norm": 33.21111681571131, |
|
"learning_rate": 2.0833333333333334e-06, |
|
"logits/chosen": -0.4980102479457855, |
|
"logits/rejected": -0.5135027170181274, |
|
"logps/chosen": -1.1746745109558105, |
|
"logps/rejected": -1.3606590032577515, |
|
"loss": 2.1734, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.1746745109558105, |
|
"rewards/margins": 0.1859845519065857, |
|
"rewards/rejected": -1.3606590032577515, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.02093692750588851, |
|
"grad_norm": 21.16742924169967, |
|
"learning_rate": 4.166666666666667e-06, |
|
"logits/chosen": -0.5296765565872192, |
|
"logits/rejected": -0.5027884244918823, |
|
"logps/chosen": -1.1314122676849365, |
|
"logps/rejected": -1.2633330821990967, |
|
"loss": 2.1306, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -1.1314122676849365, |
|
"rewards/margins": 0.13192060589790344, |
|
"rewards/rejected": -1.2633330821990967, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.031405391258832765, |
|
"grad_norm": 18.622273155389507, |
|
"learning_rate": 6.25e-06, |
|
"logits/chosen": -0.45581430196762085, |
|
"logits/rejected": -0.42932063341140747, |
|
"logps/chosen": -1.1560032367706299, |
|
"logps/rejected": -1.4923290014266968, |
|
"loss": 2.0523, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -1.1560032367706299, |
|
"rewards/margins": 0.3363257944583893, |
|
"rewards/rejected": -1.4923290014266968, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.04187385501177702, |
|
"grad_norm": 25.84825355543498, |
|
"learning_rate": 8.333333333333334e-06, |
|
"logits/chosen": -0.6032270789146423, |
|
"logits/rejected": -0.5604568719863892, |
|
"logps/chosen": -1.2145692110061646, |
|
"logps/rejected": -1.5209157466888428, |
|
"loss": 2.101, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -1.2145692110061646, |
|
"rewards/margins": 0.30634641647338867, |
|
"rewards/rejected": -1.5209157466888428, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.05234231876472128, |
|
"grad_norm": 10.051572353875851, |
|
"learning_rate": 1.0416666666666668e-05, |
|
"logits/chosen": -0.7330023646354675, |
|
"logits/rejected": -0.6652411222457886, |
|
"logps/chosen": -1.3188468217849731, |
|
"logps/rejected": -1.643450140953064, |
|
"loss": 2.0473, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.3188468217849731, |
|
"rewards/margins": 0.32460346817970276, |
|
"rewards/rejected": -1.643450140953064, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.06281078251766553, |
|
"grad_norm": 12.836119680084701, |
|
"learning_rate": 1.25e-05, |
|
"logits/chosen": -0.7389785051345825, |
|
"logits/rejected": -0.7157658338546753, |
|
"logps/chosen": -1.2610353231430054, |
|
"logps/rejected": -1.5368638038635254, |
|
"loss": 2.1476, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -1.2610353231430054, |
|
"rewards/margins": 0.27582842111587524, |
|
"rewards/rejected": -1.5368638038635254, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.07327924627060979, |
|
"grad_norm": 10.916144118237353, |
|
"learning_rate": 1.4583333333333333e-05, |
|
"logits/chosen": -0.6624680757522583, |
|
"logits/rejected": -0.5841827392578125, |
|
"logps/chosen": -1.3438886404037476, |
|
"logps/rejected": -1.5585218667984009, |
|
"loss": 2.1252, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -1.3438886404037476, |
|
"rewards/margins": 0.2146332710981369, |
|
"rewards/rejected": -1.5585218667984009, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.08374771002355404, |
|
"grad_norm": 7.904056592473059, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"logits/chosen": -0.8896552920341492, |
|
"logits/rejected": -0.7669180631637573, |
|
"logps/chosen": -1.3083586692810059, |
|
"logps/rejected": -1.7862266302108765, |
|
"loss": 2.0664, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.3083586692810059, |
|
"rewards/margins": 0.47786790132522583, |
|
"rewards/rejected": -1.7862266302108765, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.0942161737764983, |
|
"grad_norm": 13.316362762434997, |
|
"learning_rate": 1.8750000000000002e-05, |
|
"logits/chosen": -0.7929601669311523, |
|
"logits/rejected": -0.752467930316925, |
|
"logps/chosen": -1.2723389863967896, |
|
"logps/rejected": -1.6567331552505493, |
|
"loss": 2.0997, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -1.2723389863967896, |
|
"rewards/margins": 0.38439422845840454, |
|
"rewards/rejected": -1.6567331552505493, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.10468463752944256, |
|
"grad_norm": 16.98219621825263, |
|
"learning_rate": 1.9998927475076107e-05, |
|
"logits/chosen": -0.3519185483455658, |
|
"logits/rejected": -0.30840247869491577, |
|
"logps/chosen": -1.275742769241333, |
|
"logps/rejected": -1.7419742345809937, |
|
"loss": 2.1089, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -1.275742769241333, |
|
"rewards/margins": 0.4662315845489502, |
|
"rewards/rejected": -1.7419742345809937, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.11515310128238682, |
|
"grad_norm": 7.79923910311647, |
|
"learning_rate": 1.998686421164407e-05, |
|
"logits/chosen": -0.13412383198738098, |
|
"logits/rejected": -0.06430118530988693, |
|
"logps/chosen": -1.3077303171157837, |
|
"logps/rejected": -1.7474453449249268, |
|
"loss": 2.0751, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -1.3077303171157837, |
|
"rewards/margins": 0.43971508741378784, |
|
"rewards/rejected": -1.7474453449249268, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.12562156503533106, |
|
"grad_norm": 7.005978339387122, |
|
"learning_rate": 1.9961413253717214e-05, |
|
"logits/chosen": -0.4779301583766937, |
|
"logits/rejected": -0.4137405455112457, |
|
"logps/chosen": -1.3986326456069946, |
|
"logps/rejected": -1.6036043167114258, |
|
"loss": 2.1009, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -1.3986326456069946, |
|
"rewards/margins": 0.20497193932533264, |
|
"rewards/rejected": -1.6036043167114258, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.1360900287882753, |
|
"grad_norm": 6.982350763810334, |
|
"learning_rate": 1.9922608719076874e-05, |
|
"logits/chosen": -0.267805278301239, |
|
"logits/rejected": -0.1766107976436615, |
|
"logps/chosen": -1.2244327068328857, |
|
"logps/rejected": -2.0722804069519043, |
|
"loss": 2.0512, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.2244327068328857, |
|
"rewards/margins": 0.8478477597236633, |
|
"rewards/rejected": -2.0722804069519043, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.14655849254121958, |
|
"grad_norm": 10.132344121190984, |
|
"learning_rate": 1.9870502626379127e-05, |
|
"logits/chosen": -0.35906368494033813, |
|
"logits/rejected": -0.33368802070617676, |
|
"logps/chosen": -1.450307846069336, |
|
"logps/rejected": -1.7906442880630493, |
|
"loss": 2.1396, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -1.450307846069336, |
|
"rewards/margins": 0.34033653140068054, |
|
"rewards/rejected": -1.7906442880630493, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.15702695629416383, |
|
"grad_norm": 13.919238096447465, |
|
"learning_rate": 1.980516482542224e-05, |
|
"logits/chosen": -0.6731249094009399, |
|
"logits/rejected": -0.6837888956069946, |
|
"logps/chosen": -1.2502187490463257, |
|
"logps/rejected": -1.7363303899765015, |
|
"loss": 2.078, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -1.2502187490463257, |
|
"rewards/margins": 0.4861116409301758, |
|
"rewards/rejected": -1.7363303899765015, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.16749542004710807, |
|
"grad_norm": 8.714846434947708, |
|
"learning_rate": 1.972668290351084e-05, |
|
"logits/chosen": -0.8093172311782837, |
|
"logits/rejected": -0.8910678029060364, |
|
"logps/chosen": -1.3465213775634766, |
|
"logps/rejected": -1.8265445232391357, |
|
"loss": 2.1277, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.3465213775634766, |
|
"rewards/margins": 0.480023056268692, |
|
"rewards/rejected": -1.8265445232391357, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.17796388380005235, |
|
"grad_norm": 34.11378786664511, |
|
"learning_rate": 1.9635162068042547e-05, |
|
"logits/chosen": -0.6499379873275757, |
|
"logits/rejected": -0.6738103628158569, |
|
"logps/chosen": -1.2838003635406494, |
|
"logps/rejected": -1.6559721231460571, |
|
"loss": 2.1205, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -1.2838003635406494, |
|
"rewards/margins": 0.37217170000076294, |
|
"rewards/rejected": -1.6559721231460571, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.1884323475529966, |
|
"grad_norm": 6.6135900227176, |
|
"learning_rate": 1.9530725005474195e-05, |
|
"logits/chosen": -0.1341579109430313, |
|
"logits/rejected": -0.1497870236635208, |
|
"logps/chosen": -1.3539865016937256, |
|
"logps/rejected": -1.7489871978759766, |
|
"loss": 2.0639, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.3539865016937256, |
|
"rewards/margins": 0.39500072598457336, |
|
"rewards/rejected": -1.7489871978759766, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.19890081130594087, |
|
"grad_norm": 7.360183689725487, |
|
"learning_rate": 1.9413511716856973e-05, |
|
"logits/chosen": -0.12092798948287964, |
|
"logits/rejected": -0.07471726834774017, |
|
"logps/chosen": -1.3030513525009155, |
|
"logps/rejected": -1.8159534931182861, |
|
"loss": 2.0725, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.3030513525009155, |
|
"rewards/margins": 0.512902021408081, |
|
"rewards/rejected": -1.8159534931182861, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.2093692750588851, |
|
"grad_norm": 7.942085572142913, |
|
"learning_rate": 1.9283679330160726e-05, |
|
"logits/chosen": 0.026644444093108177, |
|
"logits/rejected": 0.05488858371973038, |
|
"logps/chosen": -1.3510897159576416, |
|
"logps/rejected": -1.8336998224258423, |
|
"loss": 2.0911, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -1.3510897159576416, |
|
"rewards/margins": 0.482610285282135, |
|
"rewards/rejected": -1.8336998224258423, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.21983773881182936, |
|
"grad_norm": 10.38696834889441, |
|
"learning_rate": 1.9141401889639167e-05, |
|
"logits/chosen": 0.12454743683338165, |
|
"logits/rejected": 0.1521395593881607, |
|
"logps/chosen": -1.308062195777893, |
|
"logps/rejected": -1.873884916305542, |
|
"loss": 2.029, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -1.308062195777893, |
|
"rewards/margins": 0.5658227205276489, |
|
"rewards/rejected": -1.873884916305542, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.23030620256477363, |
|
"grad_norm": 14.10578954487523, |
|
"learning_rate": 1.898687012251826e-05, |
|
"logits/chosen": -0.14296935498714447, |
|
"logits/rejected": -0.08335347473621368, |
|
"logps/chosen": -1.3113409280776978, |
|
"logps/rejected": -1.7755804061889648, |
|
"loss": 2.0509, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.3113409280776978, |
|
"rewards/margins": 0.4642394483089447, |
|
"rewards/rejected": -1.7755804061889648, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.24077466631771788, |
|
"grad_norm": 7.096253964138347, |
|
"learning_rate": 1.8820291183320602e-05, |
|
"logits/chosen": -0.20576635003089905, |
|
"logits/rejected": -0.1285274177789688, |
|
"logps/chosen": -1.2730509042739868, |
|
"logps/rejected": -1.8806079626083374, |
|
"loss": 2.0506, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.2730509042739868, |
|
"rewards/margins": 0.6075571179389954, |
|
"rewards/rejected": -1.8806079626083374, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.2512431300706621, |
|
"grad_norm": 8.976667698418499, |
|
"learning_rate": 1.8641888376168483e-05, |
|
"logits/chosen": -0.10974551737308502, |
|
"logits/rejected": -0.07689039409160614, |
|
"logps/chosen": -1.442338466644287, |
|
"logps/rejected": -1.90249764919281, |
|
"loss": 2.1387, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.442338466644287, |
|
"rewards/margins": 0.46015921235084534, |
|
"rewards/rejected": -1.90249764919281, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.26171159382360637, |
|
"grad_norm": 11.84902364416852, |
|
"learning_rate": 1.845190085543795e-05, |
|
"logits/chosen": 0.1279464215040207, |
|
"logits/rejected": 0.1599569022655487, |
|
"logps/chosen": -1.29521906375885, |
|
"logps/rejected": -1.5512622594833374, |
|
"loss": 2.0874, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.29521906375885, |
|
"rewards/margins": 0.2560431957244873, |
|
"rewards/rejected": -1.5512622594833374, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.2721800575765506, |
|
"grad_norm": 12.665702263586864, |
|
"learning_rate": 1.8250583305165098e-05, |
|
"logits/chosen": 0.10071973502635956, |
|
"logits/rejected": 0.114678755402565, |
|
"logps/chosen": -1.3293492794036865, |
|
"logps/rejected": -1.6235164403915405, |
|
"loss": 2.105, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -1.3293492794036865, |
|
"rewards/margins": 0.29416733980178833, |
|
"rewards/rejected": -1.6235164403915405, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.2826485213294949, |
|
"grad_norm": 11.168921888078422, |
|
"learning_rate": 1.8038205597634392e-05, |
|
"logits/chosen": -0.2312246859073639, |
|
"logits/rejected": -0.13947580754756927, |
|
"logps/chosen": -1.3103423118591309, |
|
"logps/rejected": -1.973184585571289, |
|
"loss": 2.0983, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.3103423118591309, |
|
"rewards/margins": 0.662842333316803, |
|
"rewards/rejected": -1.973184585571289, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.29311698508243916, |
|
"grad_norm": 15.647952191048011, |
|
"learning_rate": 1.7815052431606702e-05, |
|
"logits/chosen": -0.27144142985343933, |
|
"logits/rejected": -0.2118106335401535, |
|
"logps/chosen": -1.3751564025878906, |
|
"logps/rejected": -2.03005051612854, |
|
"loss": 2.0429, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.3751564025878906, |
|
"rewards/margins": 0.6548939943313599, |
|
"rewards/rejected": -2.03005051612854, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.3035854488353834, |
|
"grad_norm": 7.242840211884775, |
|
"learning_rate": 1.7581422950671942e-05, |
|
"logits/chosen": -0.19757069647312164, |
|
"logits/rejected": -0.1668623834848404, |
|
"logps/chosen": -1.3345425128936768, |
|
"logps/rejected": -1.8127644062042236, |
|
"loss": 2.0876, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.3345425128936768, |
|
"rewards/margins": 0.4782216548919678, |
|
"rewards/rejected": -1.8127644062042236, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.31405391258832765, |
|
"grad_norm": 8.615419856166682, |
|
"learning_rate": 1.733763034223804e-05, |
|
"logits/chosen": -0.21767687797546387, |
|
"logits/rejected": -0.21838533878326416, |
|
"logps/chosen": -1.2229845523834229, |
|
"logps/rejected": -1.660559058189392, |
|
"loss": 2.0294, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -1.2229845523834229, |
|
"rewards/margins": 0.43757471442222595, |
|
"rewards/rejected": -1.660559058189392, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.3245223763412719, |
|
"grad_norm": 11.4467193943517, |
|
"learning_rate": 1.7084001417693702e-05, |
|
"logits/chosen": -0.17819705605506897, |
|
"logits/rejected": -0.1267833411693573, |
|
"logps/chosen": -1.389460563659668, |
|
"logps/rejected": -1.8192943334579468, |
|
"loss": 2.084, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.389460563659668, |
|
"rewards/margins": 0.4298337399959564, |
|
"rewards/rejected": -1.8192943334579468, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.33499084009421615, |
|
"grad_norm": 8.12988482306829, |
|
"learning_rate": 1.682087617430782e-05, |
|
"logits/chosen": -0.12651406228542328, |
|
"logits/rejected": -0.04694231227040291, |
|
"logps/chosen": -1.318313479423523, |
|
"logps/rejected": -1.8056846857070923, |
|
"loss": 2.0818, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -1.318313479423523, |
|
"rewards/margins": 0.4873710572719574, |
|
"rewards/rejected": -1.8056846857070923, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.34545930384716045, |
|
"grad_norm": 6.762373951814964, |
|
"learning_rate": 1.6548607339452853e-05, |
|
"logits/chosen": -0.10803677886724472, |
|
"logits/rejected": -0.048906028270721436, |
|
"logps/chosen": -1.25996994972229, |
|
"logps/rejected": -1.8231735229492188, |
|
"loss": 2.0354, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.25996994972229, |
|
"rewards/margins": 0.5632035732269287, |
|
"rewards/rejected": -1.8231735229492188, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.3559277676001047, |
|
"grad_norm": 11.050433248891123, |
|
"learning_rate": 1.626755989776303e-05, |
|
"logits/chosen": -0.1651381254196167, |
|
"logits/rejected": -0.04633602499961853, |
|
"logps/chosen": -1.4237867593765259, |
|
"logps/rejected": -2.101548671722412, |
|
"loss": 2.0616, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.4237867593765259, |
|
"rewards/margins": 0.6777619123458862, |
|
"rewards/rejected": -2.101548671722412, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.36639623135304894, |
|
"grad_norm": 7.0268134591588245, |
|
"learning_rate": 1.5978110601861408e-05, |
|
"logits/chosen": -0.12373347580432892, |
|
"logits/rejected": -0.0877654105424881, |
|
"logps/chosen": -1.3757156133651733, |
|
"logps/rejected": -1.7569023370742798, |
|
"loss": 2.072, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -1.3757156133651733, |
|
"rewards/margins": 0.3811867833137512, |
|
"rewards/rejected": -1.7569023370742798, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.3768646951059932, |
|
"grad_norm": 12.761833101713918, |
|
"learning_rate": 1.568064746731156e-05, |
|
"logits/chosen": -0.11597935855388641, |
|
"logits/rejected": -0.1455441117286682, |
|
"logps/chosen": -1.374710202217102, |
|
"logps/rejected": -1.7882392406463623, |
|
"loss": 2.0783, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -1.374710202217102, |
|
"rewards/margins": 0.4135288596153259, |
|
"rewards/rejected": -1.7882392406463623, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.38733315885893743, |
|
"grad_norm": 7.06376095255915, |
|
"learning_rate": 1.5375569252470897e-05, |
|
"logits/chosen": -0.16596433520317078, |
|
"logits/rejected": -0.026778871193528175, |
|
"logps/chosen": -1.3514513969421387, |
|
"logps/rejected": -2.0585455894470215, |
|
"loss": 2.0175, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.3514513969421387, |
|
"rewards/margins": 0.7070940732955933, |
|
"rewards/rejected": -2.0585455894470215, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.39780162261188173, |
|
"grad_norm": 8.565174444883986, |
|
"learning_rate": 1.506328492394303e-05, |
|
"logits/chosen": -0.1680208444595337, |
|
"logits/rejected": -0.10585353523492813, |
|
"logps/chosen": -1.3384554386138916, |
|
"logps/rejected": -1.7696669101715088, |
|
"loss": 2.1269, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -1.3384554386138916, |
|
"rewards/margins": 0.43121138215065, |
|
"rewards/rejected": -1.7696669101715088, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.408270086364826, |
|
"grad_norm": 8.66730844602624, |
|
"learning_rate": 1.4744213108345605e-05, |
|
"logits/chosen": -0.18466773629188538, |
|
"logits/rejected": -0.03730706498026848, |
|
"logps/chosen": -1.3560011386871338, |
|
"logps/rejected": -1.740012764930725, |
|
"loss": 2.0877, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.3560011386871338, |
|
"rewards/margins": 0.3840116560459137, |
|
"rewards/rejected": -1.740012764930725, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.4187385501177702, |
|
"grad_norm": 7.228970778278195, |
|
"learning_rate": 1.4418781531128636e-05, |
|
"logits/chosen": -0.0062202452681958675, |
|
"logits/rejected": 0.13902577757835388, |
|
"logps/chosen": -1.3838578462600708, |
|
"logps/rejected": -1.9292205572128296, |
|
"loss": 2.0565, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -1.3838578462600708, |
|
"rewards/margins": 0.5453627705574036, |
|
"rewards/rejected": -1.9292205572128296, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.42920701387071447, |
|
"grad_norm": 9.000136464867888, |
|
"learning_rate": 1.4087426443195549e-05, |
|
"logits/chosen": 0.13322147727012634, |
|
"logits/rejected": 0.31764692068099976, |
|
"logps/chosen": -1.2240257263183594, |
|
"logps/rejected": -1.729107141494751, |
|
"loss": 2.0412, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -1.2240257263183594, |
|
"rewards/margins": 0.5050811171531677, |
|
"rewards/rejected": -1.729107141494751, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.4396754776236587, |
|
"grad_norm": 9.154719134241198, |
|
"learning_rate": 1.375059203609562e-05, |
|
"logits/chosen": 0.16319788992404938, |
|
"logits/rejected": 0.3346864581108093, |
|
"logps/chosen": -1.4042994976043701, |
|
"logps/rejected": -1.8507611751556396, |
|
"loss": 2.1446, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -1.4042994976043701, |
|
"rewards/margins": 0.44646158814430237, |
|
"rewards/rejected": -1.8507611751556396, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.45014394137660296, |
|
"grad_norm": 6.0840861971289115, |
|
"learning_rate": 1.3408729846571716e-05, |
|
"logits/chosen": 0.09617350250482559, |
|
"logits/rejected": 0.3308163285255432, |
|
"logps/chosen": -1.284582495689392, |
|
"logps/rejected": -1.9165366888046265, |
|
"loss": 2.0609, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.284582495689392, |
|
"rewards/margins": 0.6319543123245239, |
|
"rewards/rejected": -1.9165366888046265, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.46061240512954726, |
|
"grad_norm": 8.00813078143392, |
|
"learning_rate": 1.3062298151261592e-05, |
|
"logits/chosen": 0.044529713690280914, |
|
"logits/rejected": 0.3042605519294739, |
|
"logps/chosen": -1.358139157295227, |
|
"logps/rejected": -1.929091215133667, |
|
"loss": 2.0609, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.358139157295227, |
|
"rewards/margins": 0.5709521770477295, |
|
"rewards/rejected": -1.929091215133667, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.4710808688824915, |
|
"grad_norm": 7.246210126735172, |
|
"learning_rate": 1.2711761352364172e-05, |
|
"logits/chosen": 0.03733745217323303, |
|
"logits/rejected": 0.2569599449634552, |
|
"logps/chosen": -1.2875401973724365, |
|
"logps/rejected": -2.0033631324768066, |
|
"loss": 1.9734, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.2875401973724365, |
|
"rewards/margins": 0.7158228754997253, |
|
"rewards/rejected": -2.0033631324768066, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.48154933263543576, |
|
"grad_norm": 9.113546871891296, |
|
"learning_rate": 1.2357589355094275e-05, |
|
"logits/chosen": 0.014212149195373058, |
|
"logits/rejected": 0.32842034101486206, |
|
"logps/chosen": -1.3232357501983643, |
|
"logps/rejected": -2.133357524871826, |
|
"loss": 2.0056, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.3232357501983643, |
|
"rewards/margins": 0.8101218342781067, |
|
"rewards/rejected": -2.133357524871826, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.49201779638838, |
|
"grad_norm": 8.416270817600122, |
|
"learning_rate": 1.2000256937760446e-05, |
|
"logits/chosen": 0.1557755172252655, |
|
"logits/rejected": 0.4363393187522888, |
|
"logps/chosen": -1.2627068758010864, |
|
"logps/rejected": -1.9027000665664673, |
|
"loss": 2.0341, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.2627068758010864, |
|
"rewards/margins": 0.6399933099746704, |
|
"rewards/rejected": -1.9027000665664673, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.5024862601413242, |
|
"grad_norm": 8.509607776673544, |
|
"learning_rate": 1.1640243115310219e-05, |
|
"logits/chosen": 0.1399160474538803, |
|
"logits/rejected": 0.41544660925865173, |
|
"logps/chosen": -1.2238709926605225, |
|
"logps/rejected": -1.8552753925323486, |
|
"loss": 2.043, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.2238709926605225, |
|
"rewards/margins": 0.6314042210578918, |
|
"rewards/rejected": -1.8552753925323486, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.5129547238942685, |
|
"grad_norm": 7.675820182938368, |
|
"learning_rate": 1.127803049719605e-05, |
|
"logits/chosen": 0.1161346435546875, |
|
"logits/rejected": 0.3014758825302124, |
|
"logps/chosen": -1.3739269971847534, |
|
"logps/rejected": -1.896535873413086, |
|
"loss": 2.0516, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.3739269971847534, |
|
"rewards/margins": 0.5226086378097534, |
|
"rewards/rejected": -1.896535873413086, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.5234231876472127, |
|
"grad_norm": 8.105295156815169, |
|
"learning_rate": 1.091410464042268e-05, |
|
"logits/chosen": 0.13467064499855042, |
|
"logits/rejected": 0.20409516990184784, |
|
"logps/chosen": -1.283080816268921, |
|
"logps/rejected": -1.9848954677581787, |
|
"loss": 2.0297, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.283080816268921, |
|
"rewards/margins": 0.701814591884613, |
|
"rewards/rejected": -1.9848954677581787, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.533891651400157, |
|
"grad_norm": 8.059542189088054, |
|
"learning_rate": 1.0548953398643276e-05, |
|
"logits/chosen": 0.16154329478740692, |
|
"logits/rejected": 0.32910025119781494, |
|
"logps/chosen": -1.3794437646865845, |
|
"logps/rejected": -2.083132743835449, |
|
"loss": 2.0164, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -1.3794437646865845, |
|
"rewards/margins": 0.70368891954422, |
|
"rewards/rejected": -2.083132743835449, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.5443601151531012, |
|
"grad_norm": 6.4999384094249955, |
|
"learning_rate": 1.0183066268176775e-05, |
|
"logits/chosen": 0.6510103940963745, |
|
"logits/rejected": 1.0612311363220215, |
|
"logps/chosen": -1.3178019523620605, |
|
"logps/rejected": -2.1374964714050293, |
|
"loss": 2.0503, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -1.3178019523620605, |
|
"rewards/margins": 0.8196946978569031, |
|
"rewards/rejected": -2.1374964714050293, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.5548285789060455, |
|
"grad_norm": 7.313206322781419, |
|
"learning_rate": 9.81693373182323e-06, |
|
"logits/chosen": 0.484092652797699, |
|
"logits/rejected": 0.6402750015258789, |
|
"logps/chosen": -1.3769603967666626, |
|
"logps/rejected": -1.7423893213272095, |
|
"loss": 2.0132, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -1.3769603967666626, |
|
"rewards/margins": 0.365428626537323, |
|
"rewards/rejected": -1.7423893213272095, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.5652970426589898, |
|
"grad_norm": 8.756069730601565, |
|
"learning_rate": 9.451046601356725e-06, |
|
"logits/chosen": 0.13520203530788422, |
|
"logits/rejected": 0.3411861062049866, |
|
"logps/chosen": -1.3921834230422974, |
|
"logps/rejected": -1.8440046310424805, |
|
"loss": 2.0618, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.3921834230422974, |
|
"rewards/margins": 0.4518211781978607, |
|
"rewards/rejected": -1.8440046310424805, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.575765506411934, |
|
"grad_norm": 6.597441645495227, |
|
"learning_rate": 9.085895359577324e-06, |
|
"logits/chosen": -0.17431692779064178, |
|
"logits/rejected": 0.03041163645684719, |
|
"logps/chosen": -1.317625641822815, |
|
"logps/rejected": -2.10213041305542, |
|
"loss": 2.1302, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.317625641822815, |
|
"rewards/margins": 0.7845045328140259, |
|
"rewards/rejected": -2.10213041305542, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.5862339701648783, |
|
"grad_norm": 7.014450023153696, |
|
"learning_rate": 8.721969502803954e-06, |
|
"logits/chosen": -0.2185564786195755, |
|
"logits/rejected": -0.061094462871551514, |
|
"logps/chosen": -1.2835057973861694, |
|
"logps/rejected": -1.7011182308197021, |
|
"loss": 2.0526, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.2835057973861694, |
|
"rewards/margins": 0.41761231422424316, |
|
"rewards/rejected": -1.7011182308197021, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.5967024339178225, |
|
"grad_norm": 8.176491560294703, |
|
"learning_rate": 8.359756884689785e-06, |
|
"logits/chosen": -0.24773511290550232, |
|
"logits/rejected": -0.19381779432296753, |
|
"logps/chosen": -1.4067161083221436, |
|
"logps/rejected": -2.0575575828552246, |
|
"loss": 2.0134, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.4067161083221436, |
|
"rewards/margins": 0.6508415341377258, |
|
"rewards/rejected": -2.0575575828552246, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.6071708976707668, |
|
"grad_norm": 7.876329412436082, |
|
"learning_rate": 7.999743062239557e-06, |
|
"logits/chosen": -0.4009264409542084, |
|
"logits/rejected": -0.22962765395641327, |
|
"logps/chosen": -1.3704339265823364, |
|
"logps/rejected": -2.099834680557251, |
|
"loss": 2.0007, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.3704339265823364, |
|
"rewards/margins": 0.7294005155563354, |
|
"rewards/rejected": -2.099834680557251, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.6176393614237111, |
|
"grad_norm": 7.662219802711994, |
|
"learning_rate": 7.642410644905726e-06, |
|
"logits/chosen": -0.4107929766178131, |
|
"logits/rejected": -0.22412636876106262, |
|
"logps/chosen": -1.3308082818984985, |
|
"logps/rejected": -2.054591655731201, |
|
"loss": 2.0538, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.3308082818984985, |
|
"rewards/margins": 0.7237831354141235, |
|
"rewards/rejected": -2.054591655731201, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.6281078251766553, |
|
"grad_norm": 8.408469077357054, |
|
"learning_rate": 7.2882386476358304e-06, |
|
"logits/chosen": -0.4748775064945221, |
|
"logits/rejected": -0.3870747983455658, |
|
"logps/chosen": -1.3256374597549438, |
|
"logps/rejected": -1.8613688945770264, |
|
"loss": 2.0223, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.3256374597549438, |
|
"rewards/margins": 0.5357314348220825, |
|
"rewards/rejected": -1.8613688945770264, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.6385762889295996, |
|
"grad_norm": 9.20767722080512, |
|
"learning_rate": 6.937701848738407e-06, |
|
"logits/chosen": -0.5532702803611755, |
|
"logits/rejected": -0.517833411693573, |
|
"logps/chosen": -1.3089849948883057, |
|
"logps/rejected": -1.9914848804473877, |
|
"loss": 2.0097, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.3089849948883057, |
|
"rewards/margins": 0.6825000047683716, |
|
"rewards/rejected": -1.9914848804473877, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.6490447526825438, |
|
"grad_norm": 9.168807609346384, |
|
"learning_rate": 6.591270153428288e-06, |
|
"logits/chosen": -0.5921626687049866, |
|
"logits/rejected": -0.5593982934951782, |
|
"logps/chosen": -1.233320951461792, |
|
"logps/rejected": -1.824541449546814, |
|
"loss": 2.0114, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.233320951461792, |
|
"rewards/margins": 0.5912207365036011, |
|
"rewards/rejected": -1.824541449546814, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.6595132164354881, |
|
"grad_norm": 11.74928248778955, |
|
"learning_rate": 6.249407963904381e-06, |
|
"logits/chosen": -0.5820972323417664, |
|
"logits/rejected": -0.37617072463035583, |
|
"logps/chosen": -1.308586835861206, |
|
"logps/rejected": -2.1290369033813477, |
|
"loss": 2.0377, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.308586835861206, |
|
"rewards/margins": 0.8204501271247864, |
|
"rewards/rejected": -2.1290369033813477, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.6699816801884323, |
|
"grad_norm": 7.446497900716036, |
|
"learning_rate": 5.912573556804453e-06, |
|
"logits/chosen": -0.4366278648376465, |
|
"logits/rejected": -0.34356969594955444, |
|
"logps/chosen": -1.3265436887741089, |
|
"logps/rejected": -1.9333693981170654, |
|
"loss": 1.9563, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.3265436887741089, |
|
"rewards/margins": 0.6068258285522461, |
|
"rewards/rejected": -1.9333693981170654, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.6804501439413766, |
|
"grad_norm": 7.834867022615392, |
|
"learning_rate": 5.581218468871365e-06, |
|
"logits/chosen": -0.42373937368392944, |
|
"logits/rejected": -0.1534721851348877, |
|
"logps/chosen": -1.178399682044983, |
|
"logps/rejected": -1.9840328693389893, |
|
"loss": 1.9479, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -1.178399682044983, |
|
"rewards/margins": 0.8056330680847168, |
|
"rewards/rejected": -1.9840328693389893, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.6909186076943209, |
|
"grad_norm": 7.323036569812089, |
|
"learning_rate": 5.2557868916543996e-06, |
|
"logits/chosen": -0.24924680590629578, |
|
"logits/rejected": 0.03496779128909111, |
|
"logps/chosen": -1.226240873336792, |
|
"logps/rejected": -1.8618109226226807, |
|
"loss": 1.9899, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.226240873336792, |
|
"rewards/margins": 0.6355697512626648, |
|
"rewards/rejected": -1.8618109226226807, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.7013870714472651, |
|
"grad_norm": 7.704436038290053, |
|
"learning_rate": 4.9367150760569746e-06, |
|
"logits/chosen": -0.289539635181427, |
|
"logits/rejected": 0.08722052723169327, |
|
"logps/chosen": -1.2469284534454346, |
|
"logps/rejected": -2.0381367206573486, |
|
"loss": 1.9837, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -1.2469284534454346, |
|
"rewards/margins": 0.7912081480026245, |
|
"rewards/rejected": -2.0381367206573486, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.7118555352002094, |
|
"grad_norm": 8.947529662445875, |
|
"learning_rate": 4.6244307475291025e-06, |
|
"logits/chosen": -0.18107546865940094, |
|
"logits/rejected": 0.22904996573925018, |
|
"logps/chosen": -1.446345329284668, |
|
"logps/rejected": -2.1773736476898193, |
|
"loss": 2.0338, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -1.446345329284668, |
|
"rewards/margins": 0.7310282588005066, |
|
"rewards/rejected": -2.1773736476898193, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.7223239989531536, |
|
"grad_norm": 9.583549007245818, |
|
"learning_rate": 4.319352532688444e-06, |
|
"logits/chosen": -0.29274436831474304, |
|
"logits/rejected": 0.00033287107362411916, |
|
"logps/chosen": -1.2861610651016235, |
|
"logps/rejected": -1.9985120296478271, |
|
"loss": 2.0307, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -1.2861610651016235, |
|
"rewards/margins": 0.7123511433601379, |
|
"rewards/rejected": -1.9985120296478271, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.7327924627060979, |
|
"grad_norm": 6.266721042983619, |
|
"learning_rate": 4.0218893981385935e-06, |
|
"logits/chosen": -0.28006237745285034, |
|
"logits/rejected": -0.14952346682548523, |
|
"logps/chosen": -1.2466567754745483, |
|
"logps/rejected": -1.7666349411010742, |
|
"loss": 2.0448, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -1.2466567754745483, |
|
"rewards/margins": 0.5199781656265259, |
|
"rewards/rejected": -1.7666349411010742, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.7432609264590422, |
|
"grad_norm": 8.529189010191509, |
|
"learning_rate": 3.732440102236975e-06, |
|
"logits/chosen": -0.38612329959869385, |
|
"logits/rejected": -0.1318252980709076, |
|
"logps/chosen": -1.1369296312332153, |
|
"logps/rejected": -1.9028323888778687, |
|
"loss": 1.9434, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -1.1369296312332153, |
|
"rewards/margins": 0.7659028172492981, |
|
"rewards/rejected": -1.9028323888778687, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.7537293902119864, |
|
"grad_norm": 8.98691100719935, |
|
"learning_rate": 3.4513926605471504e-06, |
|
"logits/chosen": -0.2708672881126404, |
|
"logits/rejected": -0.002604148583486676, |
|
"logps/chosen": -1.2128788232803345, |
|
"logps/rejected": -1.881731629371643, |
|
"loss": 1.9188, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -1.2128788232803345, |
|
"rewards/margins": 0.6688528060913086, |
|
"rewards/rejected": -1.881731629371643, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.7641978539649307, |
|
"grad_norm": 8.427363605011601, |
|
"learning_rate": 3.1791238256921785e-06, |
|
"logits/chosen": -0.2245834320783615, |
|
"logits/rejected": 0.03710466995835304, |
|
"logps/chosen": -1.3998098373413086, |
|
"logps/rejected": -2.1002440452575684, |
|
"loss": 2.0448, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.3998098373413086, |
|
"rewards/margins": 0.7004340887069702, |
|
"rewards/rejected": -2.1002440452575684, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.7746663177178749, |
|
"grad_norm": 8.84465057372313, |
|
"learning_rate": 2.9159985823062997e-06, |
|
"logits/chosen": -0.35639292001724243, |
|
"logits/rejected": -0.17423222959041595, |
|
"logps/chosen": -1.3097021579742432, |
|
"logps/rejected": -2.0959980487823486, |
|
"loss": 1.9799, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -1.3097021579742432, |
|
"rewards/margins": 0.7862957715988159, |
|
"rewards/rejected": -2.0959980487823486, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.7851347814708192, |
|
"grad_norm": 7.000990509721392, |
|
"learning_rate": 2.662369657761963e-06, |
|
"logits/chosen": -0.32897457480430603, |
|
"logits/rejected": -0.3297235369682312, |
|
"logps/chosen": -1.2792903184890747, |
|
"logps/rejected": -1.8416297435760498, |
|
"loss": 2.0065, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -1.2792903184890747, |
|
"rewards/margins": 0.5623396635055542, |
|
"rewards/rejected": -1.8416297435760498, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.7956032452237635, |
|
"grad_norm": 8.146523706243913, |
|
"learning_rate": 2.418577049328058e-06, |
|
"logits/chosen": -0.3611428439617157, |
|
"logits/rejected": -0.2454133927822113, |
|
"logps/chosen": -1.266564130783081, |
|
"logps/rejected": -1.7843055725097656, |
|
"loss": 1.9381, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -1.266564130783081, |
|
"rewards/margins": 0.517741322517395, |
|
"rewards/rejected": -1.7843055725097656, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.8060717089767077, |
|
"grad_norm": 8.167524524758202, |
|
"learning_rate": 2.1849475683932996e-06, |
|
"logits/chosen": -0.3831802010536194, |
|
"logits/rejected": -0.2638740539550781, |
|
"logps/chosen": -1.2620943784713745, |
|
"logps/rejected": -1.9156465530395508, |
|
"loss": 1.9611, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -1.2620943784713745, |
|
"rewards/margins": 0.6535523533821106, |
|
"rewards/rejected": -1.9156465530395508, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.816540172729652, |
|
"grad_norm": 8.30770659075244, |
|
"learning_rate": 1.961794402365611e-06, |
|
"logits/chosen": -0.3085532486438751, |
|
"logits/rejected": -0.14951160550117493, |
|
"logps/chosen": -1.2462884187698364, |
|
"logps/rejected": -2.1334927082061768, |
|
"loss": 1.9555, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.2462884187698364, |
|
"rewards/margins": 0.8872040510177612, |
|
"rewards/rejected": -2.1334927082061768, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.8270086364825961, |
|
"grad_norm": 7.7825890292689905, |
|
"learning_rate": 1.7494166948349057e-06, |
|
"logits/chosen": -0.267643541097641, |
|
"logits/rejected": 0.02370324358344078, |
|
"logps/chosen": -1.3041934967041016, |
|
"logps/rejected": -2.0585289001464844, |
|
"loss": 1.9722, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.3041934967041016, |
|
"rewards/margins": 0.7543356418609619, |
|
"rewards/rejected": -2.0585289001464844, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.8374771002355405, |
|
"grad_norm": 11.47946040263076, |
|
"learning_rate": 1.5480991445620541e-06, |
|
"logits/chosen": -0.2226092368364334, |
|
"logits/rejected": 0.03486952185630798, |
|
"logps/chosen": -1.237866759300232, |
|
"logps/rejected": -1.9466907978057861, |
|
"loss": 1.9589, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.237866759300232, |
|
"rewards/margins": 0.7088239192962646, |
|
"rewards/rejected": -1.9466907978057861, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.8479455639884846, |
|
"grad_norm": 8.663615848996892, |
|
"learning_rate": 1.3581116238315194e-06, |
|
"logits/chosen": -0.2045535147190094, |
|
"logits/rejected": 0.006712320260703564, |
|
"logps/chosen": -1.3987605571746826, |
|
"logps/rejected": -2.1295289993286133, |
|
"loss": 2.0021, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -1.3987605571746826, |
|
"rewards/margins": 0.7307685017585754, |
|
"rewards/rejected": -2.1295289993286133, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.8584140277414289, |
|
"grad_norm": 8.186995321334237, |
|
"learning_rate": 1.1797088166794002e-06, |
|
"logits/chosen": -0.20332176983356476, |
|
"logits/rejected": 0.029474016278982162, |
|
"logps/chosen": -1.172918677330017, |
|
"logps/rejected": -1.7614654302597046, |
|
"loss": 1.9558, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.172918677330017, |
|
"rewards/margins": 0.5885466933250427, |
|
"rewards/rejected": -1.7614654302597046, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.8688824914943732, |
|
"grad_norm": 6.525083767762586, |
|
"learning_rate": 1.013129877481741e-06, |
|
"logits/chosen": -0.2240767925977707, |
|
"logits/rejected": 0.07820748537778854, |
|
"logps/chosen": -1.20893132686615, |
|
"logps/rejected": -1.91985285282135, |
|
"loss": 1.9579, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.20893132686615, |
|
"rewards/margins": 0.7109212875366211, |
|
"rewards/rejected": -1.91985285282135, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.8793509552473174, |
|
"grad_norm": 9.083137976581597, |
|
"learning_rate": 8.585981103608343e-07, |
|
"logits/chosen": -0.11082730442285538, |
|
"logits/rejected": 0.09507735818624496, |
|
"logps/chosen": -1.1996517181396484, |
|
"logps/rejected": -1.8896563053131104, |
|
"loss": 2.0042, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.1996517181396484, |
|
"rewards/margins": 0.6900044679641724, |
|
"rewards/rejected": -1.8896563053131104, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.8898194190002617, |
|
"grad_norm": 6.873945114472838, |
|
"learning_rate": 7.163206698392744e-07, |
|
"logits/chosen": -0.10862954705953598, |
|
"logits/rejected": 0.1946602761745453, |
|
"logps/chosen": -1.3608647584915161, |
|
"logps/rejected": -1.970487356185913, |
|
"loss": 1.9911, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -1.3608647584915161, |
|
"rewards/margins": 0.6096227169036865, |
|
"rewards/rejected": -1.970487356185913, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.9002878827532059, |
|
"grad_norm": 7.542928376234922, |
|
"learning_rate": 5.864882831430274e-07, |
|
"logits/chosen": -0.16213567554950714, |
|
"logits/rejected": 0.21448484063148499, |
|
"logps/chosen": -1.3169506788253784, |
|
"logps/rejected": -2.048978328704834, |
|
"loss": 1.956, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.3169506788253784, |
|
"rewards/margins": 0.7320275902748108, |
|
"rewards/rejected": -2.048978328704834, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.9107563465061502, |
|
"grad_norm": 10.100813855786358, |
|
"learning_rate": 4.6927499452580574e-07, |
|
"logits/chosen": -0.12442419677972794, |
|
"logits/rejected": 0.06503897905349731, |
|
"logps/chosen": -1.288496732711792, |
|
"logps/rejected": -2.066117286682129, |
|
"loss": 1.9804, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.288496732711792, |
|
"rewards/margins": 0.777620792388916, |
|
"rewards/rejected": -2.066117286682129, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.9212248102590945, |
|
"grad_norm": 10.273579675957365, |
|
"learning_rate": 3.6483793195745686e-07, |
|
"logits/chosen": -0.04678087681531906, |
|
"logits/rejected": 0.3355256915092468, |
|
"logps/chosen": -1.2764372825622559, |
|
"logps/rejected": -2.03460955619812, |
|
"loss": 1.9929, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -1.2764372825622559, |
|
"rewards/margins": 0.7581723928451538, |
|
"rewards/rejected": -2.03460955619812, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.9316932740120387, |
|
"grad_norm": 8.126486085847516, |
|
"learning_rate": 2.733170964891607e-07, |
|
"logits/chosen": -0.17456679046154022, |
|
"logits/rejected": 0.09745622426271439, |
|
"logps/chosen": -1.2472385168075562, |
|
"logps/rejected": -1.892249345779419, |
|
"loss": 1.9962, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -1.2472385168075562, |
|
"rewards/margins": 0.6450108289718628, |
|
"rewards/rejected": -1.892249345779419, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.942161737764983, |
|
"grad_norm": 8.866488191256463, |
|
"learning_rate": 1.9483517457776436e-07, |
|
"logits/chosen": -0.05351231247186661, |
|
"logits/rejected": 0.1783636510372162, |
|
"logps/chosen": -1.2550338506698608, |
|
"logps/rejected": -1.8777573108673096, |
|
"loss": 1.9585, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -1.2550338506698608, |
|
"rewards/margins": 0.6227231621742249, |
|
"rewards/rejected": -1.8777573108673096, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.9526302015179272, |
|
"grad_norm": 8.43900399314022, |
|
"learning_rate": 1.2949737362087156e-07, |
|
"logits/chosen": -0.09251005947589874, |
|
"logits/rejected": 0.2792736291885376, |
|
"logps/chosen": -1.381317377090454, |
|
"logps/rejected": -1.868950605392456, |
|
"loss": 1.9791, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -1.381317377090454, |
|
"rewards/margins": 0.48763322830200195, |
|
"rewards/rejected": -1.868950605392456, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.9630986652708715, |
|
"grad_norm": 7.3135362806868365, |
|
"learning_rate": 7.73912809231292e-08, |
|
"logits/chosen": -0.15108491480350494, |
|
"logits/rejected": 0.16171926259994507, |
|
"logps/chosen": -1.2007992267608643, |
|
"logps/rejected": -2.045020341873169, |
|
"loss": 1.9082, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.2007992267608643, |
|
"rewards/margins": 0.8442209362983704, |
|
"rewards/rejected": -2.045020341873169, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.9735671290238157, |
|
"grad_norm": 8.496120629902867, |
|
"learning_rate": 3.858674628278825e-08, |
|
"logits/chosen": -0.16963747143745422, |
|
"logits/rejected": 0.33284881711006165, |
|
"logps/chosen": -1.3178845643997192, |
|
"logps/rejected": -2.077279806137085, |
|
"loss": 1.9421, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.3178845643997192, |
|
"rewards/margins": 0.7593953013420105, |
|
"rewards/rejected": -2.077279806137085, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.98403559277676, |
|
"grad_norm": 8.29474797261466, |
|
"learning_rate": 1.3135788355934652e-08, |
|
"logits/chosen": -0.18520286679267883, |
|
"logits/rejected": 0.14646300673484802, |
|
"logps/chosen": -1.2585632801055908, |
|
"logps/rejected": -1.9155442714691162, |
|
"loss": 1.9995, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.2585632801055908, |
|
"rewards/margins": 0.6569809317588806, |
|
"rewards/rejected": -1.9155442714691162, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.9945040565297043, |
|
"grad_norm": 8.421060788269113, |
|
"learning_rate": 1.0725249238940916e-09, |
|
"logits/chosen": -0.2087993174791336, |
|
"logits/rejected": 0.3205938935279846, |
|
"logps/chosen": -1.2470612525939941, |
|
"logps/rejected": -1.9542922973632812, |
|
"loss": 1.985, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -1.2470612525939941, |
|
"rewards/margins": 0.7072311639785767, |
|
"rewards/rejected": -1.9542922973632812, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.998691442030882, |
|
"step": 477, |
|
"total_flos": 0.0, |
|
"train_loss": 0.0, |
|
"train_runtime": 4.0963, |
|
"train_samples_per_second": 14924.323, |
|
"train_steps_per_second": 116.446 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 477, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 1000000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|