|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9984431759211209, |
|
"eval_steps": 1000, |
|
"global_step": 481, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0020757654385054488, |
|
"grad_norm": 7.453312579539728, |
|
"learning_rate": 1.020408163265306e-08, |
|
"logits/chosen": -2.730942726135254, |
|
"logits/rejected": -2.654609203338623, |
|
"logps/chosen": -350.489990234375, |
|
"logps/rejected": -325.546875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.02075765438505449, |
|
"grad_norm": 7.878888995232912, |
|
"learning_rate": 1.0204081632653061e-07, |
|
"logits/chosen": -2.7330236434936523, |
|
"logits/rejected": -2.735116720199585, |
|
"logps/chosen": -366.51531982421875, |
|
"logps/rejected": -412.2677001953125, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.4270833432674408, |
|
"rewards/chosen": -0.000205132644623518, |
|
"rewards/margins": 4.354613702162169e-05, |
|
"rewards/rejected": -0.00024867875617928803, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.04151530877010898, |
|
"grad_norm": 7.53294584904676, |
|
"learning_rate": 2.0408163265306121e-07, |
|
"logits/chosen": -2.7173304557800293, |
|
"logits/rejected": -2.693912982940674, |
|
"logps/chosen": -378.73748779296875, |
|
"logps/rejected": -404.47003173828125, |
|
"loss": 0.6892, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.0025507560931146145, |
|
"rewards/margins": 0.008084132336080074, |
|
"rewards/rejected": -0.005533376708626747, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.062272963155163466, |
|
"grad_norm": 8.007471678418003, |
|
"learning_rate": 3.0612244897959183e-07, |
|
"logits/chosen": -2.716646194458008, |
|
"logits/rejected": -2.700786590576172, |
|
"logps/chosen": -363.6639709472656, |
|
"logps/rejected": -390.54083251953125, |
|
"loss": 0.6692, |
|
"rewards/accuracies": 0.903124988079071, |
|
"rewards/chosen": 0.02478734776377678, |
|
"rewards/margins": 0.051134396344423294, |
|
"rewards/rejected": -0.026347041130065918, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.08303061754021795, |
|
"grad_norm": 9.3622528637074, |
|
"learning_rate": 4.0816326530612243e-07, |
|
"logits/chosen": -2.7087109088897705, |
|
"logits/rejected": -2.669712543487549, |
|
"logps/chosen": -347.83538818359375, |
|
"logps/rejected": -376.85260009765625, |
|
"loss": 0.6044, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.05488457530736923, |
|
"rewards/margins": 0.19498120248317719, |
|
"rewards/rejected": -0.14009663462638855, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.10378827192527244, |
|
"grad_norm": 11.969966849217528, |
|
"learning_rate": 4.999933894080444e-07, |
|
"logits/chosen": -2.7135281562805176, |
|
"logits/rejected": -2.6938090324401855, |
|
"logps/chosen": -403.2617492675781, |
|
"logps/rejected": -495.21270751953125, |
|
"loss": 0.4674, |
|
"rewards/accuracies": 0.871874988079071, |
|
"rewards/chosen": -0.37011662125587463, |
|
"rewards/margins": 0.6785963177680969, |
|
"rewards/rejected": -1.048712968826294, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.12454592631032693, |
|
"grad_norm": 19.176201042712012, |
|
"learning_rate": 4.992005413014143e-07, |
|
"logits/chosen": -2.7302985191345215, |
|
"logits/rejected": -2.7273764610290527, |
|
"logps/chosen": -528.5646362304688, |
|
"logps/rejected": -741.4615478515625, |
|
"loss": 0.3523, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -1.8138965368270874, |
|
"rewards/margins": 1.7118844985961914, |
|
"rewards/rejected": -3.5257811546325684, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.14530358069538143, |
|
"grad_norm": 20.310776567548704, |
|
"learning_rate": 4.970903776169402e-07, |
|
"logits/chosen": -2.7460341453552246, |
|
"logits/rejected": -2.7275753021240234, |
|
"logps/chosen": -634.8268432617188, |
|
"logps/rejected": -865.6439208984375, |
|
"loss": 0.3052, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -2.382263422012329, |
|
"rewards/margins": 2.2324626445770264, |
|
"rewards/rejected": -4.6147260665893555, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.1660612350804359, |
|
"grad_norm": 19.886871444649117, |
|
"learning_rate": 4.936740530314087e-07, |
|
"logits/chosen": -2.3413853645324707, |
|
"logits/rejected": -2.102804660797119, |
|
"logps/chosen": -591.3840942382812, |
|
"logps/rejected": -896.90625, |
|
"loss": 0.25, |
|
"rewards/accuracies": 0.903124988079071, |
|
"rewards/chosen": -2.003624200820923, |
|
"rewards/margins": 2.942108631134033, |
|
"rewards/rejected": -4.945733070373535, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.1868188894654904, |
|
"grad_norm": 16.07417524586734, |
|
"learning_rate": 4.889696268057348e-07, |
|
"logits/chosen": -1.8468377590179443, |
|
"logits/rejected": -1.1863611936569214, |
|
"logps/chosen": -567.8921508789062, |
|
"logps/rejected": -934.1871337890625, |
|
"loss": 0.2254, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -2.0263195037841797, |
|
"rewards/margins": 3.4611504077911377, |
|
"rewards/rejected": -5.487469673156738, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.2075765438505449, |
|
"grad_norm": 15.952464662940557, |
|
"learning_rate": 4.830019673206996e-07, |
|
"logits/chosen": -1.3128455877304077, |
|
"logits/rejected": -0.37191733717918396, |
|
"logps/chosen": -637.7520751953125, |
|
"logps/rejected": -1123.6968994140625, |
|
"loss": 0.2034, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -2.622469425201416, |
|
"rewards/margins": 4.605704307556152, |
|
"rewards/rejected": -7.22817325592041, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.2283341982355994, |
|
"grad_norm": 20.61911089038355, |
|
"learning_rate": 4.7580262061854606e-07, |
|
"logits/chosen": -0.8984780311584473, |
|
"logits/rejected": 0.00587611785158515, |
|
"logps/chosen": -629.09521484375, |
|
"logps/rejected": -1123.9976806640625, |
|
"loss": 0.2043, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -2.5760746002197266, |
|
"rewards/margins": 4.712790012359619, |
|
"rewards/rejected": -7.288865089416504, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.24909185262065386, |
|
"grad_norm": 13.519087980837655, |
|
"learning_rate": 4.674096436453447e-07, |
|
"logits/chosen": -0.8746647834777832, |
|
"logits/rejected": 0.03333142027258873, |
|
"logps/chosen": -662.9947509765625, |
|
"logps/rejected": -1107.119140625, |
|
"loss": 0.1947, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -2.6802916526794434, |
|
"rewards/margins": 4.257796287536621, |
|
"rewards/rejected": -6.938088417053223, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.26984950700570837, |
|
"grad_norm": 15.179032922942401, |
|
"learning_rate": 4.578674030756363e-07, |
|
"logits/chosen": -0.5216516256332397, |
|
"logits/rejected": 0.797188401222229, |
|
"logps/chosen": -672.4591064453125, |
|
"logps/rejected": -1201.9649658203125, |
|
"loss": 0.1766, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -2.92866849899292, |
|
"rewards/margins": 5.020692348480225, |
|
"rewards/rejected": -7.949362277984619, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.29060716139076287, |
|
"grad_norm": 19.394840025974254, |
|
"learning_rate": 4.4722634078279865e-07, |
|
"logits/chosen": 0.05672640725970268, |
|
"logits/rejected": 1.2894923686981201, |
|
"logps/chosen": -632.3123779296875, |
|
"logps/rejected": -1134.365478515625, |
|
"loss": 0.1989, |
|
"rewards/accuracies": 0.8843749761581421, |
|
"rewards/chosen": -2.6184778213500977, |
|
"rewards/margins": 4.792341709136963, |
|
"rewards/rejected": -7.410820007324219, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.3113648157758173, |
|
"grad_norm": 15.545837682982413, |
|
"learning_rate": 4.355427071949004e-07, |
|
"logits/chosen": -0.034926723688840866, |
|
"logits/rejected": 1.3178081512451172, |
|
"logps/chosen": -625.6254272460938, |
|
"logps/rejected": -1133.8699951171875, |
|
"loss": 0.1657, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -2.654139757156372, |
|
"rewards/margins": 4.885876655578613, |
|
"rewards/rejected": -7.540017127990723, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.3321224701608718, |
|
"grad_norm": 21.099719863638594, |
|
"learning_rate": 4.228782639455674e-07, |
|
"logits/chosen": -0.2264009416103363, |
|
"logits/rejected": 1.3679448366165161, |
|
"logps/chosen": -684.0534057617188, |
|
"logps/rejected": -1281.2177734375, |
|
"loss": 0.1738, |
|
"rewards/accuracies": 0.934374988079071, |
|
"rewards/chosen": -3.0014939308166504, |
|
"rewards/margins": 5.716488838195801, |
|
"rewards/rejected": -8.717982292175293, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.3528801245459263, |
|
"grad_norm": 14.4755316075222, |
|
"learning_rate": 4.092999573916971e-07, |
|
"logits/chosen": 0.14696760475635529, |
|
"logits/rejected": 1.6602694988250732, |
|
"logps/chosen": -664.4630126953125, |
|
"logps/rejected": -1225.433837890625, |
|
"loss": 0.1804, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -3.056241273880005, |
|
"rewards/margins": 5.417787551879883, |
|
"rewards/rejected": -8.474028587341309, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.3736377789309808, |
|
"grad_norm": 15.213191213812825, |
|
"learning_rate": 3.948795647238637e-07, |
|
"logits/chosen": -0.7323606014251709, |
|
"logits/rejected": 1.1011723279953003, |
|
"logps/chosen": -630.0668334960938, |
|
"logps/rejected": -1234.099609375, |
|
"loss": 0.1783, |
|
"rewards/accuracies": 0.921875, |
|
"rewards/chosen": -2.6342709064483643, |
|
"rewards/margins": 5.844083786010742, |
|
"rewards/rejected": -8.478353500366211, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.39439543331603527, |
|
"grad_norm": 16.709219865079046, |
|
"learning_rate": 3.796933145401304e-07, |
|
"logits/chosen": -0.12861236929893494, |
|
"logits/rejected": 1.5260117053985596, |
|
"logps/chosen": -731.4463500976562, |
|
"logps/rejected": -1386.452880859375, |
|
"loss": 0.1646, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -3.487692356109619, |
|
"rewards/margins": 6.185595989227295, |
|
"rewards/rejected": -9.673288345336914, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.4151530877010898, |
|
"grad_norm": 16.46330948268556, |
|
"learning_rate": 3.638214838889801e-07, |
|
"logits/chosen": 0.014425823464989662, |
|
"logits/rejected": 1.5777640342712402, |
|
"logps/chosen": -647.3234252929688, |
|
"logps/rejected": -1219.0548095703125, |
|
"loss": 0.1669, |
|
"rewards/accuracies": 0.909375011920929, |
|
"rewards/chosen": -2.797513484954834, |
|
"rewards/margins": 5.429152011871338, |
|
"rewards/rejected": -8.226665496826172, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.4359107420861443, |
|
"grad_norm": 19.39279809291309, |
|
"learning_rate": 3.4734797391146383e-07, |
|
"logits/chosen": -0.12084762752056122, |
|
"logits/rejected": 1.6122562885284424, |
|
"logps/chosen": -656.5711059570312, |
|
"logps/rejected": -1250.8482666015625, |
|
"loss": 0.1594, |
|
"rewards/accuracies": 0.9281250238418579, |
|
"rewards/chosen": -2.874763011932373, |
|
"rewards/margins": 5.74980354309082, |
|
"rewards/rejected": -8.624567031860352, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.4566683964711988, |
|
"grad_norm": 23.517592451341034, |
|
"learning_rate": 3.3035986632579036e-07, |
|
"logits/chosen": -1.0772771835327148, |
|
"logits/rejected": 0.6209205389022827, |
|
"logps/chosen": -622.2130126953125, |
|
"logps/rejected": -1293.0042724609375, |
|
"loss": 0.1678, |
|
"rewards/accuracies": 0.921875, |
|
"rewards/chosen": -2.5699634552001953, |
|
"rewards/margins": 6.527965545654297, |
|
"rewards/rejected": -9.097929000854492, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.4774260508562532, |
|
"grad_norm": 20.580951166094668, |
|
"learning_rate": 3.1294696309885716e-07, |
|
"logits/chosen": -1.1179264783859253, |
|
"logits/rejected": 0.7691652178764343, |
|
"logps/chosen": -666.9544677734375, |
|
"logps/rejected": -1416.962158203125, |
|
"loss": 0.1673, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -3.058974027633667, |
|
"rewards/margins": 7.343924522399902, |
|
"rewards/rejected": -10.402898788452148, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.49818370524130773, |
|
"grad_norm": 14.600883148928759, |
|
"learning_rate": 2.952013117380913e-07, |
|
"logits/chosen": -0.9207614660263062, |
|
"logits/rejected": 1.1953575611114502, |
|
"logps/chosen": -662.4710693359375, |
|
"logps/rejected": -1418.23291015625, |
|
"loss": 0.1614, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -3.0431346893310547, |
|
"rewards/margins": 7.319464206695557, |
|
"rewards/rejected": -10.362597465515137, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.5189413596263622, |
|
"grad_norm": 11.446046359523642, |
|
"learning_rate": 2.7721671871299114e-07, |
|
"logits/chosen": -0.7357327938079834, |
|
"logits/rejected": 1.461576223373413, |
|
"logps/chosen": -672.9193725585938, |
|
"logps/rejected": -1338.949462890625, |
|
"loss": 0.1602, |
|
"rewards/accuracies": 0.940625011920929, |
|
"rewards/chosen": -2.9312338829040527, |
|
"rewards/margins": 6.449375152587891, |
|
"rewards/rejected": -9.380608558654785, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.5396990140114167, |
|
"grad_norm": 14.757287503572078, |
|
"learning_rate": 2.5908825357849993e-07, |
|
"logits/chosen": -0.8231679797172546, |
|
"logits/rejected": 1.1155385971069336, |
|
"logps/chosen": -656.1690063476562, |
|
"logps/rejected": -1271.033447265625, |
|
"loss": 0.1622, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -2.8299801349639893, |
|
"rewards/margins": 5.925788402557373, |
|
"rewards/rejected": -8.755769729614258, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.5604566683964712, |
|
"grad_norm": 17.896435569322648, |
|
"learning_rate": 2.409117464215001e-07, |
|
"logits/chosen": -0.4632663130760193, |
|
"logits/rejected": 1.819011926651001, |
|
"logps/chosen": -664.986572265625, |
|
"logps/rejected": -1455.3441162109375, |
|
"loss": 0.1471, |
|
"rewards/accuracies": 0.940625011920929, |
|
"rewards/chosen": -3.113548517227173, |
|
"rewards/margins": 7.589502811431885, |
|
"rewards/rejected": -10.70305061340332, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.5812143227815257, |
|
"grad_norm": 18.08668716790038, |
|
"learning_rate": 2.227832812870089e-07, |
|
"logits/chosen": -0.542155385017395, |
|
"logits/rejected": 1.9537347555160522, |
|
"logps/chosen": -686.6569213867188, |
|
"logps/rejected": -1460.84619140625, |
|
"loss": 0.1557, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -3.106727123260498, |
|
"rewards/margins": 7.513753414154053, |
|
"rewards/rejected": -10.620479583740234, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.6019719771665801, |
|
"grad_norm": 20.414881165009998, |
|
"learning_rate": 2.0479868826190871e-07, |
|
"logits/chosen": -0.437448650598526, |
|
"logits/rejected": 1.7888593673706055, |
|
"logps/chosen": -709.882568359375, |
|
"logps/rejected": -1392.702392578125, |
|
"loss": 0.1617, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -3.35121488571167, |
|
"rewards/margins": 6.753846645355225, |
|
"rewards/rejected": -10.105062484741211, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.6227296315516346, |
|
"grad_norm": 16.484968508125828, |
|
"learning_rate": 1.8705303690114287e-07, |
|
"logits/chosen": -0.2719939947128296, |
|
"logits/rejected": 1.811428427696228, |
|
"logps/chosen": -710.2088012695312, |
|
"logps/rejected": -1419.313720703125, |
|
"loss": 0.1478, |
|
"rewards/accuracies": 0.9468749761581421, |
|
"rewards/chosen": -3.3416149616241455, |
|
"rewards/margins": 6.840612888336182, |
|
"rewards/rejected": -10.182229042053223, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.6434872859366891, |
|
"grad_norm": 15.309368142287557, |
|
"learning_rate": 1.6964013367420965e-07, |
|
"logits/chosen": -0.341867595911026, |
|
"logits/rejected": 1.7422988414764404, |
|
"logps/chosen": -686.5745849609375, |
|
"logps/rejected": -1366.7637939453125, |
|
"loss": 0.1534, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -3.167562961578369, |
|
"rewards/margins": 6.726889133453369, |
|
"rewards/rejected": -9.894452095031738, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.6642449403217436, |
|
"grad_norm": 11.4821386278306, |
|
"learning_rate": 1.5265202608853628e-07, |
|
"logits/chosen": -0.17296895384788513, |
|
"logits/rejected": 1.9412486553192139, |
|
"logps/chosen": -665.36376953125, |
|
"logps/rejected": -1430.9146728515625, |
|
"loss": 0.148, |
|
"rewards/accuracies": 0.934374988079071, |
|
"rewards/chosen": -3.0024094581604004, |
|
"rewards/margins": 7.437635898590088, |
|
"rewards/rejected": -10.440046310424805, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.6850025947067981, |
|
"grad_norm": 10.552793459289758, |
|
"learning_rate": 1.3617851611101993e-07, |
|
"logits/chosen": -0.5047305226325989, |
|
"logits/rejected": 1.5836106538772583, |
|
"logps/chosen": -686.6402587890625, |
|
"logps/rejected": -1443.398193359375, |
|
"loss": 0.1519, |
|
"rewards/accuracies": 0.8968750238418579, |
|
"rewards/chosen": -3.1413583755493164, |
|
"rewards/margins": 7.2722015380859375, |
|
"rewards/rejected": -10.413559913635254, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.7057602490918526, |
|
"grad_norm": 17.65296980495948, |
|
"learning_rate": 1.2030668545986958e-07, |
|
"logits/chosen": -0.569928765296936, |
|
"logits/rejected": 1.6948425769805908, |
|
"logps/chosen": -716.9683837890625, |
|
"logps/rejected": -1518.8372802734375, |
|
"loss": 0.1463, |
|
"rewards/accuracies": 0.921875, |
|
"rewards/chosen": -3.4350712299346924, |
|
"rewards/margins": 7.720976829528809, |
|
"rewards/rejected": -11.156047821044922, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.7265179034769071, |
|
"grad_norm": 14.715250965523067, |
|
"learning_rate": 1.0512043527613623e-07, |
|
"logits/chosen": -0.7549006342887878, |
|
"logits/rejected": 1.3407833576202393, |
|
"logps/chosen": -695.8525390625, |
|
"logps/rejected": -1478.35546875, |
|
"loss": 0.1559, |
|
"rewards/accuracies": 0.903124988079071, |
|
"rewards/chosen": -3.347827911376953, |
|
"rewards/margins": 7.52248477935791, |
|
"rewards/rejected": -10.870311737060547, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.7472755578619616, |
|
"grad_norm": 18.014896980938854, |
|
"learning_rate": 9.070004260830294e-08, |
|
"logits/chosen": -0.9223737716674805, |
|
"logits/rejected": 1.4507310390472412, |
|
"logps/chosen": -692.382080078125, |
|
"logps/rejected": -1453.919921875, |
|
"loss": 0.1437, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -3.2790751457214355, |
|
"rewards/margins": 7.526673316955566, |
|
"rewards/rejected": -10.80574893951416, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.768033212247016, |
|
"grad_norm": 14.401106731580096, |
|
"learning_rate": 7.712173605443267e-08, |
|
"logits/chosen": -0.9376351237297058, |
|
"logits/rejected": 1.583579421043396, |
|
"logps/chosen": -700.8660888671875, |
|
"logps/rejected": -1505.467529296875, |
|
"loss": 0.1494, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -3.340554714202881, |
|
"rewards/margins": 7.916815280914307, |
|
"rewards/rejected": -11.257369995117188, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.7887908666320705, |
|
"grad_norm": 11.438125899827915, |
|
"learning_rate": 6.445729280509957e-08, |
|
"logits/chosen": -0.8953694105148315, |
|
"logits/rejected": 1.4685465097427368, |
|
"logps/chosen": -690.8338623046875, |
|
"logps/rejected": -1588.8245849609375, |
|
"loss": 0.1468, |
|
"rewards/accuracies": 0.921875, |
|
"rewards/chosen": -3.3841090202331543, |
|
"rewards/margins": 8.392255783081055, |
|
"rewards/rejected": -11.77636432647705, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.809548521017125, |
|
"grad_norm": 13.207322793003062, |
|
"learning_rate": 5.2773659217201364e-08, |
|
"logits/chosen": -0.9152933359146118, |
|
"logits/rejected": 1.6746854782104492, |
|
"logps/chosen": -707.5098266601562, |
|
"logps/rejected": -1445.887939453125, |
|
"loss": 0.1555, |
|
"rewards/accuracies": 0.934374988079071, |
|
"rewards/chosen": -3.3184616565704346, |
|
"rewards/margins": 7.17882776260376, |
|
"rewards/rejected": -10.497289657592773, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.8303061754021795, |
|
"grad_norm": 11.811171239173802, |
|
"learning_rate": 4.213259692436366e-08, |
|
"logits/chosen": -0.807452380657196, |
|
"logits/rejected": 1.5794246196746826, |
|
"logps/chosen": -692.3187255859375, |
|
"logps/rejected": -1477.223388671875, |
|
"loss": 0.1455, |
|
"rewards/accuracies": 0.934374988079071, |
|
"rewards/chosen": -3.2713589668273926, |
|
"rewards/margins": 7.557108402252197, |
|
"rewards/rejected": -10.82846736907959, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.851063829787234, |
|
"grad_norm": 13.900287224699019, |
|
"learning_rate": 3.259035635465529e-08, |
|
"logits/chosen": -0.6530941128730774, |
|
"logits/rejected": 1.565045714378357, |
|
"logps/chosen": -704.8355712890625, |
|
"logps/rejected": -1375.3914794921875, |
|
"loss": 0.1474, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -3.393775463104248, |
|
"rewards/margins": 6.558309078216553, |
|
"rewards/rejected": -9.952085494995117, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.8718214841722886, |
|
"grad_norm": 25.604242461790278, |
|
"learning_rate": 2.4197379381453942e-08, |
|
"logits/chosen": -0.6059257388114929, |
|
"logits/rejected": 1.747667670249939, |
|
"logps/chosen": -710.49365234375, |
|
"logps/rejected": -1475.5013427734375, |
|
"loss": 0.1551, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -3.3643269538879395, |
|
"rewards/margins": 7.329138278961182, |
|
"rewards/rejected": -10.693464279174805, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.892579138557343, |
|
"grad_norm": 17.02837493916288, |
|
"learning_rate": 1.699803267930039e-08, |
|
"logits/chosen": -0.6381738781929016, |
|
"logits/rejected": 1.581946611404419, |
|
"logps/chosen": -700.8256225585938, |
|
"logps/rejected": -1424.5767822265625, |
|
"loss": 0.14, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -3.278334140777588, |
|
"rewards/margins": 7.011561393737793, |
|
"rewards/rejected": -10.289896965026855, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.9133367929423976, |
|
"grad_norm": 12.115208573188879, |
|
"learning_rate": 1.1030373194265114e-08, |
|
"logits/chosen": -0.6104884743690491, |
|
"logits/rejected": 1.8680555820465088, |
|
"logps/chosen": -696.623046875, |
|
"logps/rejected": -1484.712646484375, |
|
"loss": 0.1555, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -3.214094638824463, |
|
"rewards/margins": 7.734494686126709, |
|
"rewards/rejected": -10.948590278625488, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.934094447327452, |
|
"grad_norm": 16.53240755993655, |
|
"learning_rate": 6.325946968591317e-09, |
|
"logits/chosen": -0.5625468492507935, |
|
"logits/rejected": 2.0248143672943115, |
|
"logps/chosen": -701.9017333984375, |
|
"logps/rejected": -1457.185791015625, |
|
"loss": 0.1506, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -3.3476765155792236, |
|
"rewards/margins": 7.459791660308838, |
|
"rewards/rejected": -10.807466506958008, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.9548521017125065, |
|
"grad_norm": 13.032337936414088, |
|
"learning_rate": 2.909622383059834e-09, |
|
"logits/chosen": -0.5611749291419983, |
|
"logits/rejected": 1.8944803476333618, |
|
"logps/chosen": -692.7264404296875, |
|
"logps/rejected": -1460.9708251953125, |
|
"loss": 0.1466, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -3.228968858718872, |
|
"rewards/margins": 7.5441155433654785, |
|
"rewards/rejected": -10.77308464050293, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.975609756097561, |
|
"grad_norm": 17.252869051917436, |
|
"learning_rate": 7.994586985856089e-10, |
|
"logits/chosen": -0.6213638782501221, |
|
"logits/rejected": 1.7617113590240479, |
|
"logps/chosen": -707.3765869140625, |
|
"logps/rejected": -1481.257080078125, |
|
"loss": 0.1374, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -3.3621768951416016, |
|
"rewards/margins": 7.499251365661621, |
|
"rewards/rejected": -10.861429214477539, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.9963674104826155, |
|
"grad_norm": 12.988129191271089, |
|
"learning_rate": 6.610591955641398e-12, |
|
"logits/chosen": -0.5171535015106201, |
|
"logits/rejected": 1.7204986810684204, |
|
"logps/chosen": -687.4800415039062, |
|
"logps/rejected": -1466.64501953125, |
|
"loss": 0.1487, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -3.355544328689575, |
|
"rewards/margins": 7.49337911605835, |
|
"rewards/rejected": -10.848923683166504, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.9984431759211209, |
|
"step": 481, |
|
"total_flos": 0.0, |
|
"train_loss": 0.21933725711709495, |
|
"train_runtime": 13808.8776, |
|
"train_samples_per_second": 8.93, |
|
"train_steps_per_second": 0.035 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 481, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|