|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 10.0, |
|
"eval_steps": 1000, |
|
"global_step": 1250, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.008, |
|
"grad_norm": 0.5664508606614915, |
|
"learning_rate": 8e-08, |
|
"logits/chosen": -1.7671998739242554, |
|
"logits/rejected": -2.2639822959899902, |
|
"logps/chosen": -46.430763244628906, |
|
"logps/rejected": -102.85381317138672, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.5939624426575738, |
|
"learning_rate": 8.000000000000001e-07, |
|
"logits/chosen": -1.6349234580993652, |
|
"logits/rejected": -2.0293447971343994, |
|
"logps/chosen": -50.39720916748047, |
|
"logps/rejected": -98.8401870727539, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.5069444179534912, |
|
"rewards/chosen": -0.00029719059239141643, |
|
"rewards/margins": 0.0004301935259718448, |
|
"rewards/rejected": -0.0007273841183632612, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.5700892501542819, |
|
"learning_rate": 1.6000000000000001e-06, |
|
"logits/chosen": -1.6655193567276, |
|
"logits/rejected": -2.1240811347961426, |
|
"logps/chosen": -47.0224609375, |
|
"logps/rejected": -105.32938385009766, |
|
"loss": 0.6875, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/chosen": 0.0002471635234542191, |
|
"rewards/margins": 0.012360258027911186, |
|
"rewards/rejected": -0.01211309339851141, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.7554778741427768, |
|
"learning_rate": 2.4000000000000003e-06, |
|
"logits/chosen": -1.7440729141235352, |
|
"logits/rejected": -2.1188106536865234, |
|
"logps/chosen": -50.84648513793945, |
|
"logps/rejected": -106.61978912353516, |
|
"loss": 0.6579, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.0015375657239928842, |
|
"rewards/margins": 0.07141353189945221, |
|
"rewards/rejected": -0.07295109331607819, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.7532828556442333, |
|
"learning_rate": 3.2000000000000003e-06, |
|
"logits/chosen": -1.6423908472061157, |
|
"logits/rejected": -2.07899808883667, |
|
"logps/chosen": -48.13755416870117, |
|
"logps/rejected": -131.5819854736328, |
|
"loss": 0.5614, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.005950761027634144, |
|
"rewards/margins": 0.3043977618217468, |
|
"rewards/rejected": -0.29844698309898376, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.49124925197921077, |
|
"learning_rate": 4.000000000000001e-06, |
|
"logits/chosen": -1.577450156211853, |
|
"logits/rejected": -2.0352962017059326, |
|
"logps/chosen": -47.25825119018555, |
|
"logps/rejected": -175.77737426757812, |
|
"loss": 0.4101, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.014603497460484505, |
|
"rewards/margins": 0.7516024708747864, |
|
"rewards/rejected": -0.7369989156723022, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 0.3421632301388367, |
|
"learning_rate": 4.800000000000001e-06, |
|
"logits/chosen": -1.54917311668396, |
|
"logits/rejected": -1.9336481094360352, |
|
"logps/chosen": -54.58576583862305, |
|
"logps/rejected": -217.2119598388672, |
|
"loss": 0.2991, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -0.034352801740169525, |
|
"rewards/margins": 1.1476414203643799, |
|
"rewards/rejected": -1.1819943189620972, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 0.33380968035486847, |
|
"learning_rate": 5.600000000000001e-06, |
|
"logits/chosen": -1.4622433185577393, |
|
"logits/rejected": -1.9104700088500977, |
|
"logps/chosen": -53.6714973449707, |
|
"logps/rejected": -297.1726379394531, |
|
"loss": 0.2231, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -0.03121398389339447, |
|
"rewards/margins": 1.926568627357483, |
|
"rewards/rejected": -1.957782506942749, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 0.14294152601696442, |
|
"learning_rate": 6.4000000000000006e-06, |
|
"logits/chosen": -1.3913681507110596, |
|
"logits/rejected": -1.818612813949585, |
|
"logps/chosen": -54.882850646972656, |
|
"logps/rejected": -364.594970703125, |
|
"loss": 0.2104, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.04058960825204849, |
|
"rewards/margins": 2.5959861278533936, |
|
"rewards/rejected": -2.636575937271118, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 0.2043412163104557, |
|
"learning_rate": 7.2000000000000005e-06, |
|
"logits/chosen": -1.3484817743301392, |
|
"logits/rejected": -1.7438443899154663, |
|
"logps/chosen": -57.729034423828125, |
|
"logps/rejected": -391.08892822265625, |
|
"loss": 0.194, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -0.0659557357430458, |
|
"rewards/margins": 2.8331382274627686, |
|
"rewards/rejected": -2.8990941047668457, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 0.45349092259459883, |
|
"learning_rate": 8.000000000000001e-06, |
|
"logits/chosen": -1.3855174779891968, |
|
"logits/rejected": -1.819236159324646, |
|
"logps/chosen": -49.61981964111328, |
|
"logps/rejected": -360.9615173339844, |
|
"loss": 0.1907, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": 0.004681958816945553, |
|
"rewards/margins": 2.5954396724700928, |
|
"rewards/rejected": -2.5907576084136963, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 0.2724569498424518, |
|
"learning_rate": 8.8e-06, |
|
"logits/chosen": -1.3040708303451538, |
|
"logits/rejected": -1.7432258129119873, |
|
"logps/chosen": -52.180030822753906, |
|
"logps/rejected": -379.49664306640625, |
|
"loss": 0.1868, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.02268405631184578, |
|
"rewards/margins": 2.7577483654022217, |
|
"rewards/rejected": -2.78043270111084, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 0.17941572717033036, |
|
"learning_rate": 9.600000000000001e-06, |
|
"logits/chosen": -1.2530890703201294, |
|
"logits/rejected": -1.7477638721466064, |
|
"logps/chosen": -38.981895446777344, |
|
"logps/rejected": -443.25347900390625, |
|
"loss": 0.1556, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": 0.07770782709121704, |
|
"rewards/margins": 3.4688849449157715, |
|
"rewards/rejected": -3.3911774158477783, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"grad_norm": 0.3303028590674529, |
|
"learning_rate": 9.999512620046523e-06, |
|
"logits/chosen": -1.346740484237671, |
|
"logits/rejected": -1.805371880531311, |
|
"logps/chosen": -55.48865509033203, |
|
"logps/rejected": -389.91607666015625, |
|
"loss": 0.1713, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -0.04284561052918434, |
|
"rewards/margins": 2.8560733795166016, |
|
"rewards/rejected": -2.898919105529785, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"grad_norm": 0.9056804720582001, |
|
"learning_rate": 9.995614150494293e-06, |
|
"logits/chosen": -1.234359622001648, |
|
"logits/rejected": -1.6866785287857056, |
|
"logps/chosen": -48.799949645996094, |
|
"logps/rejected": -423.95452880859375, |
|
"loss": 0.164, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.011531209573149681, |
|
"rewards/margins": 3.2391915321350098, |
|
"rewards/rejected": -3.2276599407196045, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"grad_norm": 0.5254416965749492, |
|
"learning_rate": 9.987820251299121e-06, |
|
"logits/chosen": -1.0958689451217651, |
|
"logits/rejected": -1.4965614080429077, |
|
"logps/chosen": -52.782798767089844, |
|
"logps/rejected": -455.8980407714844, |
|
"loss": 0.1648, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -0.012227678671479225, |
|
"rewards/margins": 3.540536880493164, |
|
"rewards/rejected": -3.552764415740967, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"grad_norm": 0.5108498363771904, |
|
"learning_rate": 9.976136999909156e-06, |
|
"logits/chosen": -1.1107490062713623, |
|
"logits/rejected": -1.5378646850585938, |
|
"logps/chosen": -47.4257698059082, |
|
"logps/rejected": -470.36163330078125, |
|
"loss": 0.11, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": 0.026664480566978455, |
|
"rewards/margins": 3.716306209564209, |
|
"rewards/rejected": -3.6896419525146484, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.3599999999999999, |
|
"grad_norm": 1.2383483480266742, |
|
"learning_rate": 9.960573506572391e-06, |
|
"logits/chosen": -1.028438687324524, |
|
"logits/rejected": -1.5239307880401611, |
|
"logps/chosen": -42.13895034790039, |
|
"logps/rejected": -517.1678466796875, |
|
"loss": 0.1026, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": 0.05908266454935074, |
|
"rewards/margins": 4.210160255432129, |
|
"rewards/rejected": -4.151078224182129, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"grad_norm": 0.9166087354544606, |
|
"learning_rate": 9.941141907232766e-06, |
|
"logits/chosen": -0.9610630869865417, |
|
"logits/rejected": -1.4120290279388428, |
|
"logps/chosen": -46.765838623046875, |
|
"logps/rejected": -542.84716796875, |
|
"loss": 0.109, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.03420247882604599, |
|
"rewards/margins": 4.456036567687988, |
|
"rewards/rejected": -4.421834468841553, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"grad_norm": 1.6771630345432116, |
|
"learning_rate": 9.91785735406693e-06, |
|
"logits/chosen": -0.8817203640937805, |
|
"logits/rejected": -1.3373805284500122, |
|
"logps/chosen": -43.31850051879883, |
|
"logps/rejected": -525.7540283203125, |
|
"loss": 0.0982, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.06416326761245728, |
|
"rewards/margins": 4.315359115600586, |
|
"rewards/rejected": -4.251195907592773, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"grad_norm": 1.2635622789324346, |
|
"learning_rate": 9.890738003669029e-06, |
|
"logits/chosen": -0.9383857846260071, |
|
"logits/rejected": -1.3598968982696533, |
|
"logps/chosen": -40.324180603027344, |
|
"logps/rejected": -580.6539916992188, |
|
"loss": 0.0748, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": 0.09767557680606842, |
|
"rewards/margins": 4.886355876922607, |
|
"rewards/rejected": -4.788680553436279, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.6800000000000002, |
|
"grad_norm": 2.051002862784939, |
|
"learning_rate": 9.859805002892733e-06, |
|
"logits/chosen": -0.747630774974823, |
|
"logits/rejected": -1.189408302307129, |
|
"logps/chosen": -38.997947692871094, |
|
"logps/rejected": -600.232421875, |
|
"loss": 0.0693, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/chosen": 0.10605404525995255, |
|
"rewards/margins": 5.094006538391113, |
|
"rewards/rejected": -4.98795223236084, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"grad_norm": 0.5973861965703521, |
|
"learning_rate": 9.825082472361558e-06, |
|
"logits/chosen": -0.769231915473938, |
|
"logits/rejected": -1.2444443702697754, |
|
"logps/chosen": -37.80399703979492, |
|
"logps/rejected": -661.2396850585938, |
|
"loss": 0.0355, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": 0.1041645035147667, |
|
"rewards/margins": 5.686351299285889, |
|
"rewards/rejected": -5.582187652587891, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.8399999999999999, |
|
"grad_norm": 0.36668201108212406, |
|
"learning_rate": 9.786597487660336e-06, |
|
"logits/chosen": -0.7400678396224976, |
|
"logits/rejected": -1.185605764389038, |
|
"logps/chosen": -38.35655975341797, |
|
"logps/rejected": -675.3729248046875, |
|
"loss": 0.0604, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": 0.10024236142635345, |
|
"rewards/margins": 5.824278831481934, |
|
"rewards/rejected": -5.72403621673584, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"grad_norm": 0.18097106498610982, |
|
"learning_rate": 9.744380058222483e-06, |
|
"logits/chosen": -0.8332279324531555, |
|
"logits/rejected": -1.2189085483551025, |
|
"logps/chosen": -39.80487060546875, |
|
"logps/rejected": -665.7376708984375, |
|
"loss": 0.0445, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.1086835116147995, |
|
"rewards/margins": 5.7541303634643555, |
|
"rewards/rejected": -5.64544677734375, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 0.5482510873677592, |
|
"learning_rate": 9.698463103929542e-06, |
|
"logits/chosen": -0.8593254089355469, |
|
"logits/rejected": -1.219299077987671, |
|
"logps/chosen": -40.509761810302734, |
|
"logps/rejected": -678.4737548828125, |
|
"loss": 0.0333, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.10330041497945786, |
|
"rewards/margins": 5.88486385345459, |
|
"rewards/rejected": -5.781562805175781, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"grad_norm": 0.06311428811879578, |
|
"learning_rate": 9.648882429441258e-06, |
|
"logits/chosen": -0.721664309501648, |
|
"logits/rejected": -1.1366941928863525, |
|
"logps/chosen": -43.40575408935547, |
|
"logps/rejected": -718.126953125, |
|
"loss": 0.043, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": 0.06619155406951904, |
|
"rewards/margins": 6.222557067871094, |
|
"rewards/rejected": -6.156365394592285, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"grad_norm": 0.10420712161857161, |
|
"learning_rate": 9.595676696276173e-06, |
|
"logits/chosen": -0.8397006988525391, |
|
"logits/rejected": -1.1919224262237549, |
|
"logps/chosen": -45.371559143066406, |
|
"logps/rejected": -671.6702880859375, |
|
"loss": 0.0399, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.05775808170437813, |
|
"rewards/margins": 5.775918006896973, |
|
"rewards/rejected": -5.718161106109619, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"grad_norm": 0.048219144314199394, |
|
"learning_rate": 9.538887392664544e-06, |
|
"logits/chosen": -0.932695746421814, |
|
"logits/rejected": -1.2857800722122192, |
|
"logps/chosen": -49.043556213378906, |
|
"logps/rejected": -700.9938354492188, |
|
"loss": 0.0302, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.01686009392142296, |
|
"rewards/margins": 6.021510601043701, |
|
"rewards/rejected": -6.004650592803955, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"grad_norm": 0.052699483559969486, |
|
"learning_rate": 9.478558801197065e-06, |
|
"logits/chosen": -1.0952008962631226, |
|
"logits/rejected": -1.419945478439331, |
|
"logps/chosen": -50.7900276184082, |
|
"logps/rejected": -746.294921875, |
|
"loss": 0.0287, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.011282497085630894, |
|
"rewards/margins": 6.445050239562988, |
|
"rewards/rejected": -6.456332206726074, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"grad_norm": 0.39282718133559014, |
|
"learning_rate": 9.414737964294636e-06, |
|
"logits/chosen": -1.276970386505127, |
|
"logits/rejected": -1.5558645725250244, |
|
"logps/chosen": -57.718162536621094, |
|
"logps/rejected": -758.6500244140625, |
|
"loss": 0.0284, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -0.07060392200946808, |
|
"rewards/margins": 6.502586364746094, |
|
"rewards/rejected": -6.573190212249756, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"grad_norm": 0.12204254875967184, |
|
"learning_rate": 9.347474647526095e-06, |
|
"logits/chosen": -1.4385970830917358, |
|
"logits/rejected": -1.6844545602798462, |
|
"logps/chosen": -59.007118225097656, |
|
"logps/rejected": -749.748046875, |
|
"loss": 0.0353, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -0.08172997832298279, |
|
"rewards/margins": 6.407279014587402, |
|
"rewards/rejected": -6.489008903503418, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"grad_norm": 0.05045051166529495, |
|
"learning_rate": 9.276821300802535e-06, |
|
"logits/chosen": -1.682716965675354, |
|
"logits/rejected": -1.9395768642425537, |
|
"logps/chosen": -69.35375213623047, |
|
"logps/rejected": -762.8756103515625, |
|
"loss": 0.0149, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -0.20327362418174744, |
|
"rewards/margins": 6.410320281982422, |
|
"rewards/rejected": -6.613594055175781, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"grad_norm": 0.041644876319752826, |
|
"learning_rate": 9.202833017478421e-06, |
|
"logits/chosen": -1.9520126581192017, |
|
"logits/rejected": -2.184046745300293, |
|
"logps/chosen": -74.17578125, |
|
"logps/rejected": -782.9901123046875, |
|
"loss": 0.0139, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.25414201617240906, |
|
"rewards/margins": 6.548297882080078, |
|
"rewards/rejected": -6.802439212799072, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 2.7199999999999998, |
|
"grad_norm": 0.10894239404290727, |
|
"learning_rate": 9.125567491391476e-06, |
|
"logits/chosen": -2.3633341789245605, |
|
"logits/rejected": -2.5258421897888184, |
|
"logps/chosen": -95.89537811279297, |
|
"logps/rejected": -769.3350830078125, |
|
"loss": 0.026, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4556370675563812, |
|
"rewards/margins": 6.227226734161377, |
|
"rewards/rejected": -6.682864189147949, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"grad_norm": 0.09090196291657318, |
|
"learning_rate": 9.045084971874738e-06, |
|
"logits/chosen": -2.9310622215270996, |
|
"logits/rejected": -3.0340473651885986, |
|
"logps/chosen": -181.8387451171875, |
|
"logps/rejected": -789.5302734375, |
|
"loss": 0.0108, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.3074408769607544, |
|
"rewards/margins": 5.575064182281494, |
|
"rewards/rejected": -6.882504463195801, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"grad_norm": 0.12031593041735512, |
|
"learning_rate": 8.961448216775955e-06, |
|
"logits/chosen": -2.900047779083252, |
|
"logits/rejected": -3.0111327171325684, |
|
"logps/chosen": -186.22052001953125, |
|
"logps/rejected": -826.2166137695312, |
|
"loss": 0.0242, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.3595960140228271, |
|
"rewards/margins": 5.881885051727295, |
|
"rewards/rejected": -7.241480350494385, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"grad_norm": 0.055136260032898635, |
|
"learning_rate": 8.874722443520898e-06, |
|
"logits/chosen": -2.855480432510376, |
|
"logits/rejected": -2.9430692195892334, |
|
"logps/chosen": -199.16647338867188, |
|
"logps/rejected": -827.880859375, |
|
"loss": 0.0346, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.4962244033813477, |
|
"rewards/margins": 5.776437282562256, |
|
"rewards/rejected": -7.272662162780762, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"grad_norm": 0.05072076339049116, |
|
"learning_rate": 8.784975278258783e-06, |
|
"logits/chosen": -2.6045117378234863, |
|
"logits/rejected": -2.729827404022217, |
|
"logps/chosen": -133.8080596923828, |
|
"logps/rejected": -809.306640625, |
|
"loss": 0.0092, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8419109582901001, |
|
"rewards/margins": 6.24552059173584, |
|
"rewards/rejected": -7.08743143081665, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"grad_norm": 0.7450627702438166, |
|
"learning_rate": 8.692276703129421e-06, |
|
"logits/chosen": -2.6937222480773926, |
|
"logits/rejected": -2.7929458618164062, |
|
"logps/chosen": -168.631103515625, |
|
"logps/rejected": -842.021484375, |
|
"loss": 0.0196, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.18534255027771, |
|
"rewards/margins": 6.214483737945557, |
|
"rewards/rejected": -7.3998260498046875, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"grad_norm": 0.05027116077254764, |
|
"learning_rate": 8.596699001693257e-06, |
|
"logits/chosen": -2.852487087249756, |
|
"logits/rejected": -2.908648729324341, |
|
"logps/chosen": -187.98727416992188, |
|
"logps/rejected": -834.2393798828125, |
|
"loss": 0.016, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -1.361481785774231, |
|
"rewards/margins": 5.982842922210693, |
|
"rewards/rejected": -7.344325065612793, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 3.2800000000000002, |
|
"grad_norm": 0.042611669944768574, |
|
"learning_rate": 8.498316702566828e-06, |
|
"logits/chosen": -2.6405489444732666, |
|
"logits/rejected": -2.737513303756714, |
|
"logps/chosen": -163.23654174804688, |
|
"logps/rejected": -875.9517822265625, |
|
"loss": 0.0099, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -1.1402196884155273, |
|
"rewards/margins": 6.602635383605957, |
|
"rewards/rejected": -7.742854118347168, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 3.36, |
|
"grad_norm": 0.08908259328663255, |
|
"learning_rate": 8.397206521307584e-06, |
|
"logits/chosen": -2.797304630279541, |
|
"logits/rejected": -2.8749709129333496, |
|
"logps/chosen": -200.6483917236328, |
|
"logps/rejected": -878.53466796875, |
|
"loss": 0.0056, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.5122627019882202, |
|
"rewards/margins": 6.271607398986816, |
|
"rewards/rejected": -7.783869743347168, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"grad_norm": 0.11190864225013811, |
|
"learning_rate": 8.293447300593402e-06, |
|
"logits/chosen": -2.712634563446045, |
|
"logits/rejected": -2.790599822998047, |
|
"logps/chosen": -156.58209228515625, |
|
"logps/rejected": -865.5999755859375, |
|
"loss": 0.0061, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.0528513193130493, |
|
"rewards/margins": 6.598712921142578, |
|
"rewards/rejected": -7.651564598083496, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"grad_norm": 0.06216733964642332, |
|
"learning_rate": 8.18711994874345e-06, |
|
"logits/chosen": -2.7393593788146973, |
|
"logits/rejected": -2.810574531555176, |
|
"logps/chosen": -159.99278259277344, |
|
"logps/rejected": -852.8875732421875, |
|
"loss": 0.0102, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -1.0680657625198364, |
|
"rewards/margins": 6.475743770599365, |
|
"rewards/rejected": -7.543809413909912, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"grad_norm": 1.6020556191748092, |
|
"learning_rate": 8.078307376628292e-06, |
|
"logits/chosen": -2.8640763759613037, |
|
"logits/rejected": -2.9100239276885986, |
|
"logps/chosen": -194.4068603515625, |
|
"logps/rejected": -889.5808715820312, |
|
"loss": 0.0241, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.4508755207061768, |
|
"rewards/margins": 6.437768459320068, |
|
"rewards/rejected": -7.888643741607666, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"grad_norm": 0.08068344292732496, |
|
"learning_rate": 7.967094433018508e-06, |
|
"logits/chosen": -2.863485813140869, |
|
"logits/rejected": -2.9134774208068848, |
|
"logps/chosen": -192.51461791992188, |
|
"logps/rejected": -875.9539184570312, |
|
"loss": 0.0173, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -1.4307234287261963, |
|
"rewards/margins": 6.317349433898926, |
|
"rewards/rejected": -7.748072624206543, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 3.76, |
|
"grad_norm": 0.03890160957476439, |
|
"learning_rate": 7.85356783842216e-06, |
|
"logits/chosen": -2.780956983566284, |
|
"logits/rejected": -2.8286776542663574, |
|
"logps/chosen": -156.67994689941406, |
|
"logps/rejected": -865.4143676757812, |
|
"loss": 0.0076, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.0601476430892944, |
|
"rewards/margins": 6.580509185791016, |
|
"rewards/rejected": -7.6406569480896, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"grad_norm": 0.2797627859203357, |
|
"learning_rate": 7.737816117462752e-06, |
|
"logits/chosen": -2.872148036956787, |
|
"logits/rejected": -2.9261298179626465, |
|
"logps/chosen": -162.9639434814453, |
|
"logps/rejected": -861.81201171875, |
|
"loss": 0.0189, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -1.1223922967910767, |
|
"rewards/margins": 6.500160217285156, |
|
"rewards/rejected": -7.62255334854126, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"grad_norm": 0.03202298420625548, |
|
"learning_rate": 7.619929529850397e-06, |
|
"logits/chosen": -3.1152751445770264, |
|
"logits/rejected": -3.124176263809204, |
|
"logps/chosen": -212.0701446533203, |
|
"logps/rejected": -899.0244140625, |
|
"loss": 0.0053, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.6227067708969116, |
|
"rewards/margins": 6.344549655914307, |
|
"rewards/rejected": -7.96725606918335, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 0.056671500571648784, |
|
"learning_rate": 7.500000000000001e-06, |
|
"logits/chosen": -3.027770757675171, |
|
"logits/rejected": -3.058871030807495, |
|
"logps/chosen": -178.46084594726562, |
|
"logps/rejected": -894.8692626953125, |
|
"loss": 0.0051, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.2816778421401978, |
|
"rewards/margins": 6.661429405212402, |
|
"rewards/rejected": -7.943107604980469, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"grad_norm": 0.08007457084674505, |
|
"learning_rate": 7.378121045351378e-06, |
|
"logits/chosen": -3.0769410133361816, |
|
"logits/rejected": -3.099307060241699, |
|
"logps/chosen": -194.36056518554688, |
|
"logps/rejected": -881.5306396484375, |
|
"loss": 0.0097, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.4257417917251587, |
|
"rewards/margins": 6.408936977386475, |
|
"rewards/rejected": -7.834680080413818, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"grad_norm": 2.4134748485667603, |
|
"learning_rate": 7.254387703447154e-06, |
|
"logits/chosen": -3.0368213653564453, |
|
"logits/rejected": -3.0519778728485107, |
|
"logps/chosen": -203.78298950195312, |
|
"logps/rejected": -914.9696044921875, |
|
"loss": 0.0071, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.5372952222824097, |
|
"rewards/margins": 6.609500885009766, |
|
"rewards/rejected": -8.146797180175781, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 4.24, |
|
"grad_norm": 0.029510070347368533, |
|
"learning_rate": 7.128896457825364e-06, |
|
"logits/chosen": -2.9700584411621094, |
|
"logits/rejected": -3.0200304985046387, |
|
"logps/chosen": -185.78460693359375, |
|
"logps/rejected": -919.3395385742188, |
|
"loss": 0.0036, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.3613429069519043, |
|
"rewards/margins": 6.823419094085693, |
|
"rewards/rejected": -8.184762001037598, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 4.32, |
|
"grad_norm": 1.2493776168986983, |
|
"learning_rate": 7.0017451627844765e-06, |
|
"logits/chosen": -2.917212724685669, |
|
"logits/rejected": -2.9564435482025146, |
|
"logps/chosen": -197.5432891845703, |
|
"logps/rejected": -924.7901611328125, |
|
"loss": 0.0201, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.476697325706482, |
|
"rewards/margins": 6.7479658126831055, |
|
"rewards/rejected": -8.224664688110352, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"grad_norm": 0.2763373802778404, |
|
"learning_rate": 6.873032967079562e-06, |
|
"logits/chosen": -2.7449865341186523, |
|
"logits/rejected": -2.7941806316375732, |
|
"logps/chosen": -146.8518829345703, |
|
"logps/rejected": -870.0069580078125, |
|
"loss": 0.0121, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.9301493763923645, |
|
"rewards/margins": 6.802959442138672, |
|
"rewards/rejected": -7.733109474182129, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 4.48, |
|
"grad_norm": 0.03334067080540203, |
|
"learning_rate": 6.7428602366090764e-06, |
|
"logits/chosen": -2.7536513805389404, |
|
"logits/rejected": -2.8025503158569336, |
|
"logps/chosen": -160.67921447753906, |
|
"logps/rejected": -924.1072998046875, |
|
"loss": 0.0208, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.0948050022125244, |
|
"rewards/margins": 7.13351583480835, |
|
"rewards/rejected": -8.228321075439453, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 4.5600000000000005, |
|
"grad_norm": 0.10014923752683715, |
|
"learning_rate": 6.611328476152557e-06, |
|
"logits/chosen": -2.943023204803467, |
|
"logits/rejected": -2.971611261367798, |
|
"logps/chosen": -181.0494384765625, |
|
"logps/rejected": -916.7213134765625, |
|
"loss": 0.0064, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.306639552116394, |
|
"rewards/margins": 6.8469133377075195, |
|
"rewards/rejected": -8.153553009033203, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 4.64, |
|
"grad_norm": 0.5303544380996533, |
|
"learning_rate": 6.4785402502202345e-06, |
|
"logits/chosen": -3.0545575618743896, |
|
"logits/rejected": -3.05729079246521, |
|
"logps/chosen": -207.95034790039062, |
|
"logps/rejected": -939.8880615234375, |
|
"loss": 0.0076, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.5920554399490356, |
|
"rewards/margins": 6.775687217712402, |
|
"rewards/rejected": -8.367743492126465, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"grad_norm": 0.04084615849116197, |
|
"learning_rate": 6.344599103076329e-06, |
|
"logits/chosen": -2.9924559593200684, |
|
"logits/rejected": -2.992640972137451, |
|
"logps/chosen": -198.8438720703125, |
|
"logps/rejected": -943.1241455078125, |
|
"loss": 0.0041, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.5001078844070435, |
|
"rewards/margins": 6.916898250579834, |
|
"rewards/rejected": -8.417006492614746, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"grad_norm": 1.0475565210531403, |
|
"learning_rate": 6.209609477998339e-06, |
|
"logits/chosen": -3.0877511501312256, |
|
"logits/rejected": -3.0805959701538086, |
|
"logps/chosen": -226.8428955078125, |
|
"logps/rejected": -941.5935668945312, |
|
"loss": 0.0178, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -1.7735874652862549, |
|
"rewards/margins": 6.6292877197265625, |
|
"rewards/rejected": -8.402875900268555, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 4.88, |
|
"grad_norm": 0.0998487112139878, |
|
"learning_rate": 6.073676635835317e-06, |
|
"logits/chosen": -3.117692232131958, |
|
"logits/rejected": -3.0967986583709717, |
|
"logps/chosen": -220.5649871826172, |
|
"logps/rejected": -916.0166015625, |
|
"loss": 0.01, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.7153180837631226, |
|
"rewards/margins": 6.428615570068359, |
|
"rewards/rejected": -8.143933296203613, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 4.96, |
|
"grad_norm": 0.04577054636911131, |
|
"learning_rate": 5.936906572928625e-06, |
|
"logits/chosen": -2.8556697368621826, |
|
"logits/rejected": -2.8885579109191895, |
|
"logps/chosen": -171.10992431640625, |
|
"logps/rejected": -943.6531982421875, |
|
"loss": 0.0063, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.2215626239776611, |
|
"rewards/margins": 7.193364143371582, |
|
"rewards/rejected": -8.414926528930664, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 5.04, |
|
"grad_norm": 0.1261248214008726, |
|
"learning_rate": 5.799405938459175e-06, |
|
"logits/chosen": -2.886103868484497, |
|
"logits/rejected": -2.930838108062744, |
|
"logps/chosen": -162.7739715576172, |
|
"logps/rejected": -950.7653198242188, |
|
"loss": 0.0046, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.1451432704925537, |
|
"rewards/margins": 7.34460973739624, |
|
"rewards/rejected": -8.489753723144531, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 5.12, |
|
"grad_norm": 0.1505621604680798, |
|
"learning_rate": 5.661281951285613e-06, |
|
"logits/chosen": -3.0620782375335693, |
|
"logits/rejected": -3.0735926628112793, |
|
"logps/chosen": -191.69161987304688, |
|
"logps/rejected": -940.0349731445312, |
|
"loss": 0.0038, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.417407751083374, |
|
"rewards/margins": 6.979997158050537, |
|
"rewards/rejected": -8.397405624389648, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 5.2, |
|
"grad_norm": 0.019057548002213837, |
|
"learning_rate": 5.522642316338268e-06, |
|
"logits/chosen": -3.170454502105713, |
|
"logits/rejected": -3.1733736991882324, |
|
"logps/chosen": -206.1622314453125, |
|
"logps/rejected": -938.7789306640625, |
|
"loss": 0.0038, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.5485303401947021, |
|
"rewards/margins": 6.836629390716553, |
|
"rewards/rejected": -8.385160446166992, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 5.28, |
|
"grad_norm": 0.04553800143055307, |
|
"learning_rate": 5.383595140634093e-06, |
|
"logits/chosen": -3.111215114593506, |
|
"logits/rejected": -3.113774061203003, |
|
"logps/chosen": -197.33230590820312, |
|
"logps/rejected": -965.2951049804688, |
|
"loss": 0.0032, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.4725955724716187, |
|
"rewards/margins": 7.165165901184082, |
|
"rewards/rejected": -8.637761116027832, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 5.36, |
|
"grad_norm": 0.023672933333312794, |
|
"learning_rate": 5.244248848978067e-06, |
|
"logits/chosen": -3.087512493133545, |
|
"logits/rejected": -3.0911624431610107, |
|
"logps/chosen": -215.00796508789062, |
|
"logps/rejected": -978.7565307617188, |
|
"loss": 0.0064, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.669549584388733, |
|
"rewards/margins": 7.078073978424072, |
|
"rewards/rejected": -8.747623443603516, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 5.44, |
|
"grad_norm": 0.027237164042707367, |
|
"learning_rate": 5.1047120994167855e-06, |
|
"logits/chosen": -3.069511890411377, |
|
"logits/rejected": -3.0692381858825684, |
|
"logps/chosen": -202.83555603027344, |
|
"logps/rejected": -972.17578125, |
|
"loss": 0.003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.5378749370574951, |
|
"rewards/margins": 7.15402889251709, |
|
"rewards/rejected": -8.691904067993164, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 5.52, |
|
"grad_norm": 0.0238313232064652, |
|
"learning_rate": 4.965093698510192e-06, |
|
"logits/chosen": -3.045116424560547, |
|
"logits/rejected": -3.070358991622925, |
|
"logps/chosen": -189.43704223632812, |
|
"logps/rejected": -947.7220458984375, |
|
"loss": 0.0066, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.3802012205123901, |
|
"rewards/margins": 7.0903000831604, |
|
"rewards/rejected": -8.470501899719238, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 5.6, |
|
"grad_norm": 0.06266588262969701, |
|
"learning_rate": 4.825502516487497e-06, |
|
"logits/chosen": -2.9892053604125977, |
|
"logits/rejected": -3.0090491771698, |
|
"logps/chosen": -190.52420043945312, |
|
"logps/rejected": -957.5784301757812, |
|
"loss": 0.0041, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.3977526426315308, |
|
"rewards/margins": 7.167298793792725, |
|
"rewards/rejected": -8.565052032470703, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 5.68, |
|
"grad_norm": 0.21683933666142513, |
|
"learning_rate": 4.686047402353433e-06, |
|
"logits/chosen": -3.0948545932769775, |
|
"logits/rejected": -3.0949206352233887, |
|
"logps/chosen": -215.0717315673828, |
|
"logps/rejected": -974.2849731445312, |
|
"loss": 0.0048, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.6559123992919922, |
|
"rewards/margins": 7.0673394203186035, |
|
"rewards/rejected": -8.723250389099121, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 5.76, |
|
"grad_norm": 0.029076731867175645, |
|
"learning_rate": 4.546837099011101e-06, |
|
"logits/chosen": -3.138347864151001, |
|
"logits/rejected": -3.1147701740264893, |
|
"logps/chosen": -221.6696014404297, |
|
"logps/rejected": -976.2689208984375, |
|
"loss": 0.0034, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.7190300226211548, |
|
"rewards/margins": 7.019604682922363, |
|
"rewards/rejected": -8.738635063171387, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 5.84, |
|
"grad_norm": 0.036662851444810934, |
|
"learning_rate": 4.4079801584674955e-06, |
|
"logits/chosen": -3.0819013118743896, |
|
"logits/rejected": -3.0665931701660156, |
|
"logps/chosen": -216.7307891845703, |
|
"logps/rejected": -979.8884887695312, |
|
"loss": 0.0056, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -1.6721961498260498, |
|
"rewards/margins": 7.096380710601807, |
|
"rewards/rejected": -8.768576622009277, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 5.92, |
|
"grad_norm": 0.03922484581534493, |
|
"learning_rate": 4.269584857187942e-06, |
|
"logits/chosen": -3.033648729324341, |
|
"logits/rejected": -3.03180193901062, |
|
"logps/chosen": -203.57614135742188, |
|
"logps/rejected": -953.6720581054688, |
|
"loss": 0.0031, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.5226513147354126, |
|
"rewards/margins": 7.014796257019043, |
|
"rewards/rejected": -8.537446975708008, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 0.031567842692926854, |
|
"learning_rate": 4.131759111665349e-06, |
|
"logits/chosen": -3.0094892978668213, |
|
"logits/rejected": -3.0100715160369873, |
|
"logps/chosen": -222.1414794921875, |
|
"logps/rejected": -981.8389892578125, |
|
"loss": 0.0031, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.7311455011367798, |
|
"rewards/margins": 7.06912899017334, |
|
"rewards/rejected": -8.800275802612305, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 6.08, |
|
"grad_norm": 0.02449878094582341, |
|
"learning_rate": 3.994610394270178e-06, |
|
"logits/chosen": -2.970534563064575, |
|
"logits/rejected": -2.9691994190216064, |
|
"logps/chosen": -207.33847045898438, |
|
"logps/rejected": -994.8795166015625, |
|
"loss": 0.0032, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.5827735662460327, |
|
"rewards/margins": 7.3385796546936035, |
|
"rewards/rejected": -8.921354293823242, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 6.16, |
|
"grad_norm": 0.02981087291288187, |
|
"learning_rate": 3.8582456494467214e-06, |
|
"logits/chosen": -2.9473607540130615, |
|
"logits/rejected": -2.956864833831787, |
|
"logps/chosen": -203.9072265625, |
|
"logps/rejected": -984.5435791015625, |
|
"loss": 0.0028, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.5347377061843872, |
|
"rewards/margins": 7.284613132476807, |
|
"rewards/rejected": -8.819352149963379, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 6.24, |
|
"grad_norm": 0.021043064932021155, |
|
"learning_rate": 3.7227712103210485e-06, |
|
"logits/chosen": -2.973010778427124, |
|
"logits/rejected": -2.9938042163848877, |
|
"logps/chosen": -190.59471130371094, |
|
"logps/rejected": -966.3521728515625, |
|
"loss": 0.0033, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.3837454319000244, |
|
"rewards/margins": 7.294589996337891, |
|
"rewards/rejected": -8.678335189819336, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 6.32, |
|
"grad_norm": 0.039673872651556516, |
|
"learning_rate": 3.5882927157856175e-06, |
|
"logits/chosen": -2.9964919090270996, |
|
"logits/rejected": -3.003882884979248, |
|
"logps/chosen": -211.8938446044922, |
|
"logps/rejected": -996.83544921875, |
|
"loss": 0.0025, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.6143487691879272, |
|
"rewards/margins": 7.334100246429443, |
|
"rewards/rejected": -8.948450088500977, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"grad_norm": 0.04339817998984244, |
|
"learning_rate": 3.4549150281252635e-06, |
|
"logits/chosen": -2.9923062324523926, |
|
"logits/rejected": -3.008997917175293, |
|
"logps/chosen": -199.03875732421875, |
|
"logps/rejected": -962.5731201171875, |
|
"loss": 0.0033, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.4706881046295166, |
|
"rewards/margins": 7.157661437988281, |
|
"rewards/rejected": -8.628351211547852, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 6.48, |
|
"grad_norm": 0.03256715178755916, |
|
"learning_rate": 3.322742151248726e-06, |
|
"logits/chosen": -2.947535514831543, |
|
"logits/rejected": -2.9602136611938477, |
|
"logps/chosen": -194.71202087402344, |
|
"logps/rejected": -992.3277587890625, |
|
"loss": 0.0033, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.4545636177062988, |
|
"rewards/margins": 7.46028995513916, |
|
"rewards/rejected": -8.914854049682617, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 6.5600000000000005, |
|
"grad_norm": 0.03103799595540118, |
|
"learning_rate": 3.1918771495895395e-06, |
|
"logits/chosen": -2.986626625061035, |
|
"logits/rejected": -3.0086371898651123, |
|
"logps/chosen": -199.30032348632812, |
|
"logps/rejected": -983.8270263671875, |
|
"loss": 0.0051, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -1.479447603225708, |
|
"rewards/margins": 7.346575736999512, |
|
"rewards/rejected": -8.826024055480957, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 6.64, |
|
"grad_norm": 0.012923263616360448, |
|
"learning_rate": 3.0624220677394854e-06, |
|
"logits/chosen": -2.980841636657715, |
|
"logits/rejected": -2.9940218925476074, |
|
"logps/chosen": -201.14207458496094, |
|
"logps/rejected": -989.4503784179688, |
|
"loss": 0.002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.5033818483352661, |
|
"rewards/margins": 7.376742362976074, |
|
"rewards/rejected": -8.880125045776367, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 6.72, |
|
"grad_norm": 0.4008776832999778, |
|
"learning_rate": 2.934477850877292e-06, |
|
"logits/chosen": -2.8989338874816895, |
|
"logits/rejected": -2.904329299926758, |
|
"logps/chosen": -175.6615447998047, |
|
"logps/rejected": -993.4110107421875, |
|
"loss": 0.0059, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.2583926916122437, |
|
"rewards/margins": 7.667746543884277, |
|
"rewards/rejected": -8.926138877868652, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 6.8, |
|
"grad_norm": 0.03242501786191666, |
|
"learning_rate": 2.8081442660546126e-06, |
|
"logits/chosen": -2.9362475872039795, |
|
"logits/rejected": -2.942796230316162, |
|
"logps/chosen": -200.99786376953125, |
|
"logps/rejected": -999.9691162109375, |
|
"loss": 0.0026, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.530400037765503, |
|
"rewards/margins": 7.44219970703125, |
|
"rewards/rejected": -8.9725980758667, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 6.88, |
|
"grad_norm": 0.04701366652822717, |
|
"learning_rate": 2.683519824400693e-06, |
|
"logits/chosen": -2.9499258995056152, |
|
"logits/rejected": -2.9498021602630615, |
|
"logps/chosen": -200.02708435058594, |
|
"logps/rejected": -1003.5309448242188, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.5131416320800781, |
|
"rewards/margins": 7.508401393890381, |
|
"rewards/rejected": -9.021543502807617, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 6.96, |
|
"grad_norm": 0.06965442317131018, |
|
"learning_rate": 2.560701704306336e-06, |
|
"logits/chosen": -2.992658853530884, |
|
"logits/rejected": -3.0063395500183105, |
|
"logps/chosen": -196.2028045654297, |
|
"logps/rejected": -988.9591064453125, |
|
"loss": 0.0028, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.4468988180160522, |
|
"rewards/margins": 7.434958457946777, |
|
"rewards/rejected": -8.881857872009277, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 7.04, |
|
"grad_norm": 0.04943704962243662, |
|
"learning_rate": 2.4397856756471435e-06, |
|
"logits/chosen": -2.967582941055298, |
|
"logits/rejected": -2.969144344329834, |
|
"logps/chosen": -201.17965698242188, |
|
"logps/rejected": -1002.7322387695312, |
|
"loss": 0.0023, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.5157372951507568, |
|
"rewards/margins": 7.4925055503845215, |
|
"rewards/rejected": -9.0082426071167, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 7.12, |
|
"grad_norm": 0.026898832512229635, |
|
"learning_rate": 2.320866025105016e-06, |
|
"logits/chosen": -3.0048301219940186, |
|
"logits/rejected": -3.0222504138946533, |
|
"logps/chosen": -212.1355438232422, |
|
"logps/rejected": -974.49072265625, |
|
"loss": 0.0028, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.6027580499649048, |
|
"rewards/margins": 7.149576663970947, |
|
"rewards/rejected": -8.752334594726562, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 7.2, |
|
"grad_norm": 0.018773487381868556, |
|
"learning_rate": 2.204035482646267e-06, |
|
"logits/chosen": -3.004668712615967, |
|
"logits/rejected": -3.02616024017334, |
|
"logps/chosen": -204.20449829101562, |
|
"logps/rejected": -988.2781372070312, |
|
"loss": 0.0023, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.5249879360198975, |
|
"rewards/margins": 7.354971408843994, |
|
"rewards/rejected": -8.879959106445312, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 7.28, |
|
"grad_norm": 0.010977198093830244, |
|
"learning_rate": 2.0893851492135536e-06, |
|
"logits/chosen": -3.026869773864746, |
|
"logits/rejected": -3.0388169288635254, |
|
"logps/chosen": -211.68359375, |
|
"logps/rejected": -987.3611450195312, |
|
"loss": 0.0021, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.6020605564117432, |
|
"rewards/margins": 7.269850254058838, |
|
"rewards/rejected": -8.871912002563477, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 7.36, |
|
"grad_norm": 0.29173762576971574, |
|
"learning_rate": 1.977004425688126e-06, |
|
"logits/chosen": -3.0339274406433105, |
|
"logits/rejected": -3.066455602645874, |
|
"logps/chosen": -212.5125732421875, |
|
"logps/rejected": -1007.0792846679688, |
|
"loss": 0.005, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.6347990036010742, |
|
"rewards/margins": 7.410192966461182, |
|
"rewards/rejected": -9.044992446899414, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 7.44, |
|
"grad_norm": 0.021055806854853823, |
|
"learning_rate": 1.8669809431776991e-06, |
|
"logits/chosen": -2.9736626148223877, |
|
"logits/rejected": -2.9851651191711426, |
|
"logps/chosen": -196.3205108642578, |
|
"logps/rejected": -990.7384643554688, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.459997296333313, |
|
"rewards/margins": 7.45266580581665, |
|
"rewards/rejected": -8.912662506103516, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 7.52, |
|
"grad_norm": 0.18348617533249506, |
|
"learning_rate": 1.7594004946843458e-06, |
|
"logits/chosen": -2.942028522491455, |
|
"logits/rejected": -2.9542624950408936, |
|
"logps/chosen": -199.4943389892578, |
|
"logps/rejected": -1008.6310424804688, |
|
"loss": 0.003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.5059641599655151, |
|
"rewards/margins": 7.565948486328125, |
|
"rewards/rejected": -9.07191276550293, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 7.6, |
|
"grad_norm": 0.02961600216806713, |
|
"learning_rate": 1.6543469682057105e-06, |
|
"logits/chosen": -2.9790632724761963, |
|
"logits/rejected": -2.981333017349243, |
|
"logps/chosen": -225.8614959716797, |
|
"logps/rejected": -1018.3231201171875, |
|
"loss": 0.0025, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.773397445678711, |
|
"rewards/margins": 7.389155387878418, |
|
"rewards/rejected": -9.162553787231445, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 7.68, |
|
"grad_norm": 0.033303351274835385, |
|
"learning_rate": 1.551902281321651e-06, |
|
"logits/chosen": -3.0063486099243164, |
|
"logits/rejected": -3.0057759284973145, |
|
"logps/chosen": -210.657470703125, |
|
"logps/rejected": -1013.4367065429688, |
|
"loss": 0.0023, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.6198650598526, |
|
"rewards/margins": 7.4931464195251465, |
|
"rewards/rejected": -9.113012313842773, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 7.76, |
|
"grad_norm": 0.02991133657233578, |
|
"learning_rate": 1.4521463173173966e-06, |
|
"logits/chosen": -2.9552433490753174, |
|
"logits/rejected": -2.986539363861084, |
|
"logps/chosen": -185.34751892089844, |
|
"logps/rejected": -995.6087646484375, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.3463085889816284, |
|
"rewards/margins": 7.610987186431885, |
|
"rewards/rejected": -8.957294464111328, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 7.84, |
|
"grad_norm": 0.03956036447686216, |
|
"learning_rate": 1.3551568628929434e-06, |
|
"logits/chosen": -2.987436532974243, |
|
"logits/rejected": -2.99739146232605, |
|
"logps/chosen": -202.7963409423828, |
|
"logps/rejected": -1010.6976318359375, |
|
"loss": 0.0028, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.5336072444915771, |
|
"rewards/margins": 7.547433376312256, |
|
"rewards/rejected": -9.081039428710938, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 7.92, |
|
"grad_norm": 0.0380908854419757, |
|
"learning_rate": 1.2610095475073415e-06, |
|
"logits/chosen": -2.966024160385132, |
|
"logits/rejected": -2.9880573749542236, |
|
"logps/chosen": -188.83627319335938, |
|
"logps/rejected": -1017.0061645507812, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.397072196006775, |
|
"rewards/margins": 7.754624366760254, |
|
"rewards/rejected": -9.15169620513916, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 0.056813053850928295, |
|
"learning_rate": 1.1697777844051105e-06, |
|
"logits/chosen": -3.0522654056549072, |
|
"logits/rejected": -3.037459135055542, |
|
"logps/chosen": -224.0611114501953, |
|
"logps/rejected": -1007.8203125, |
|
"loss": 0.0028, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.7456802129745483, |
|
"rewards/margins": 7.323793888092041, |
|
"rewards/rejected": -9.069474220275879, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 8.08, |
|
"grad_norm": 0.04428932232427895, |
|
"learning_rate": 1.0815327133708015e-06, |
|
"logits/chosen": -2.974698305130005, |
|
"logits/rejected": -2.989356756210327, |
|
"logps/chosen": -199.12106323242188, |
|
"logps/rejected": -999.2364501953125, |
|
"loss": 0.0048, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -1.4827978610992432, |
|
"rewards/margins": 7.491432189941406, |
|
"rewards/rejected": -8.97422981262207, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 8.16, |
|
"grad_norm": 0.03094089083601851, |
|
"learning_rate": 9.963431452563331e-07, |
|
"logits/chosen": -2.9914603233337402, |
|
"logits/rejected": -2.9979825019836426, |
|
"logps/chosen": -202.41212463378906, |
|
"logps/rejected": -983.4681396484375, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.5089391469955444, |
|
"rewards/margins": 7.3303656578063965, |
|
"rewards/rejected": -8.839305877685547, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 8.24, |
|
"grad_norm": 0.027522596865270775, |
|
"learning_rate": 9.142755083243577e-07, |
|
"logits/chosen": -3.0142979621887207, |
|
"logits/rejected": -3.0246782302856445, |
|
"logps/chosen": -206.26199340820312, |
|
"logps/rejected": -1013.3017578125, |
|
"loss": 0.0028, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.5663325786590576, |
|
"rewards/margins": 7.545605659484863, |
|
"rewards/rejected": -9.111937522888184, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 8.32, |
|
"grad_norm": 0.02636838723753889, |
|
"learning_rate": 8.353937964495029e-07, |
|
"logits/chosen": -2.9712257385253906, |
|
"logits/rejected": -2.994713544845581, |
|
"logps/chosen": -199.3070526123047, |
|
"logps/rejected": -992.4100341796875, |
|
"loss": 0.0021, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.476993441581726, |
|
"rewards/margins": 7.447300910949707, |
|
"rewards/rejected": -8.924293518066406, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 8.4, |
|
"grad_norm": 0.021646075748453193, |
|
"learning_rate": 7.597595192178702e-07, |
|
"logits/chosen": -2.9999868869781494, |
|
"logits/rejected": -3.0006492137908936, |
|
"logps/chosen": -215.9517364501953, |
|
"logps/rejected": -997.6002197265625, |
|
"loss": 0.0026, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.6561391353607178, |
|
"rewards/margins": 7.31753396987915, |
|
"rewards/rejected": -8.973673820495605, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 8.48, |
|
"grad_norm": 0.030961558657159674, |
|
"learning_rate": 6.874316539637127e-07, |
|
"logits/chosen": -2.9921913146972656, |
|
"logits/rejected": -2.9953560829162598, |
|
"logps/chosen": -215.5575408935547, |
|
"logps/rejected": -1014.0217895507812, |
|
"loss": 0.0021, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.659396767616272, |
|
"rewards/margins": 7.469141960144043, |
|
"rewards/rejected": -9.128539085388184, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 8.56, |
|
"grad_norm": 0.021967484279154154, |
|
"learning_rate": 6.184665997806832e-07, |
|
"logits/chosen": -3.006669282913208, |
|
"logits/rejected": -3.005312442779541, |
|
"logps/chosen": -213.9212188720703, |
|
"logps/rejected": -990.9972534179688, |
|
"loss": 0.0026, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.6195297241210938, |
|
"rewards/margins": 7.294030666351318, |
|
"rewards/rejected": -8.91356086730957, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 8.64, |
|
"grad_norm": 0.042081431226620426, |
|
"learning_rate": 5.529181335435124e-07, |
|
"logits/chosen": -3.008005142211914, |
|
"logits/rejected": -3.0087175369262695, |
|
"logps/chosen": -218.51760864257812, |
|
"logps/rejected": -1024.052978515625, |
|
"loss": 0.002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.690387487411499, |
|
"rewards/margins": 7.524999141693115, |
|
"rewards/rejected": -9.215387344360352, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 8.72, |
|
"grad_norm": 0.043812953916419525, |
|
"learning_rate": 4.908373679744316e-07, |
|
"logits/chosen": -3.0348081588745117, |
|
"logits/rejected": -3.0385255813598633, |
|
"logps/chosen": -226.70779418945312, |
|
"logps/rejected": -1010.8932495117188, |
|
"loss": 0.0023, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.7693846225738525, |
|
"rewards/margins": 7.316775321960449, |
|
"rewards/rejected": -9.086159706115723, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 8.8, |
|
"grad_norm": 0.02641646477682858, |
|
"learning_rate": 4.322727117869951e-07, |
|
"logits/chosen": -3.014754056930542, |
|
"logits/rejected": -3.0084919929504395, |
|
"logps/chosen": -217.29788208007812, |
|
"logps/rejected": -1013.4778442382812, |
|
"loss": 0.0021, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.679574728012085, |
|
"rewards/margins": 7.428310394287109, |
|
"rewards/rejected": -9.107885360717773, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 8.88, |
|
"grad_norm": 0.027581407278890403, |
|
"learning_rate": 3.772698319384349e-07, |
|
"logits/chosen": -2.9870715141296387, |
|
"logits/rejected": -2.974972724914551, |
|
"logps/chosen": -213.857177734375, |
|
"logps/rejected": -1029.269287109375, |
|
"loss": 0.0023, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.6536537408828735, |
|
"rewards/margins": 7.60448694229126, |
|
"rewards/rejected": -9.258139610290527, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 8.96, |
|
"grad_norm": 0.018398802557307123, |
|
"learning_rate": 3.258716180199278e-07, |
|
"logits/chosen": -2.969782590866089, |
|
"logits/rejected": -2.9666781425476074, |
|
"logps/chosen": -201.89608764648438, |
|
"logps/rejected": -1012.4627075195312, |
|
"loss": 0.0021, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.521287441253662, |
|
"rewards/margins": 7.587810516357422, |
|
"rewards/rejected": -9.109098434448242, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 9.04, |
|
"grad_norm": 0.02530896697144264, |
|
"learning_rate": 2.7811814881259503e-07, |
|
"logits/chosen": -2.967005491256714, |
|
"logits/rejected": -2.9650299549102783, |
|
"logps/chosen": -211.93923950195312, |
|
"logps/rejected": -1015.2527465820312, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.6247079372406006, |
|
"rewards/margins": 7.509281158447266, |
|
"rewards/rejected": -9.133989334106445, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 9.12, |
|
"grad_norm": 0.021443293328022403, |
|
"learning_rate": 2.3404666103526542e-07, |
|
"logits/chosen": -2.951347827911377, |
|
"logits/rejected": -2.967103958129883, |
|
"logps/chosen": -192.74740600585938, |
|
"logps/rejected": -1004.8844604492188, |
|
"loss": 0.0023, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.4152947664260864, |
|
"rewards/margins": 7.6158270835876465, |
|
"rewards/rejected": -9.031121253967285, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 9.2, |
|
"grad_norm": 0.028269129055998828, |
|
"learning_rate": 1.9369152030840553e-07, |
|
"logits/chosen": -2.968433141708374, |
|
"logits/rejected": -2.975369930267334, |
|
"logps/chosen": -208.6031036376953, |
|
"logps/rejected": -1004.6807861328125, |
|
"loss": 0.002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.5802977085113525, |
|
"rewards/margins": 7.448237419128418, |
|
"rewards/rejected": -9.028534889221191, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 9.28, |
|
"grad_norm": 0.012741391504073269, |
|
"learning_rate": 1.5708419435684463e-07, |
|
"logits/chosen": -2.979626417160034, |
|
"logits/rejected": -3.0008533000946045, |
|
"logps/chosen": -203.78268432617188, |
|
"logps/rejected": -998.7214965820312, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.53437077999115, |
|
"rewards/margins": 7.443058967590332, |
|
"rewards/rejected": -8.97743034362793, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 9.36, |
|
"grad_norm": 0.018928993356126772, |
|
"learning_rate": 1.2425322847218368e-07, |
|
"logits/chosen": -2.9456331729888916, |
|
"logits/rejected": -2.947418689727783, |
|
"logps/chosen": -208.3197784423828, |
|
"logps/rejected": -1002.4910888671875, |
|
"loss": 0.0024, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.5662791728973389, |
|
"rewards/margins": 7.459691524505615, |
|
"rewards/rejected": -9.025972366333008, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 9.44, |
|
"grad_norm": 0.11171748274756513, |
|
"learning_rate": 9.522422325404234e-08, |
|
"logits/chosen": -2.972970485687256, |
|
"logits/rejected": -2.9851505756378174, |
|
"logps/chosen": -196.48709106445312, |
|
"logps/rejected": -1000.6891479492188, |
|
"loss": 0.0025, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.4484045505523682, |
|
"rewards/margins": 7.5574445724487305, |
|
"rewards/rejected": -9.005849838256836, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 9.52, |
|
"grad_norm": 0.014643254346465179, |
|
"learning_rate": 7.001981464747565e-08, |
|
"logits/chosen": -3.006410598754883, |
|
"logits/rejected": -3.001279354095459, |
|
"logps/chosen": -239.8344268798828, |
|
"logps/rejected": -1013.0524291992188, |
|
"loss": 0.0044, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.9079118967056274, |
|
"rewards/margins": 7.20407247543335, |
|
"rewards/rejected": -9.111984252929688, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 9.6, |
|
"grad_norm": 0.023445989539047585, |
|
"learning_rate": 4.865965629214819e-08, |
|
"logits/chosen": -2.9561855792999268, |
|
"logits/rejected": -2.9647486209869385, |
|
"logps/chosen": -201.7306671142578, |
|
"logps/rejected": -982.6613159179688, |
|
"loss": 0.0026, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.502721905708313, |
|
"rewards/margins": 7.332386016845703, |
|
"rewards/rejected": -8.835107803344727, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 9.68, |
|
"grad_norm": 0.0197958747361798, |
|
"learning_rate": 3.1160404197018155e-08, |
|
"logits/chosen": -2.9367284774780273, |
|
"logits/rejected": -2.94865083694458, |
|
"logps/chosen": -199.8970947265625, |
|
"logps/rejected": -1011.3294677734375, |
|
"loss": 0.0027, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.5013561248779297, |
|
"rewards/margins": 7.6023712158203125, |
|
"rewards/rejected": -9.103727340698242, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 9.76, |
|
"grad_norm": 0.07635646097593517, |
|
"learning_rate": 1.753570375247815e-08, |
|
"logits/chosen": -3.0195131301879883, |
|
"logits/rejected": -3.0240790843963623, |
|
"logps/chosen": -220.675048828125, |
|
"logps/rejected": -1000.5875854492188, |
|
"loss": 0.0024, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.7074737548828125, |
|
"rewards/margins": 7.296438694000244, |
|
"rewards/rejected": -9.003911972045898, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 9.84, |
|
"grad_norm": 0.011307093391428234, |
|
"learning_rate": 7.796179090094891e-09, |
|
"logits/chosen": -2.937976837158203, |
|
"logits/rejected": -2.9619266986846924, |
|
"logps/chosen": -190.8544921875, |
|
"logps/rejected": -1021.5460815429688, |
|
"loss": 0.0018, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.419277548789978, |
|
"rewards/margins": 7.7647881507873535, |
|
"rewards/rejected": -9.184064865112305, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 9.92, |
|
"grad_norm": 0.017541852350439674, |
|
"learning_rate": 1.9494247982282386e-09, |
|
"logits/chosen": -2.976008653640747, |
|
"logits/rejected": -2.9958183765411377, |
|
"logps/chosen": -201.06101989746094, |
|
"logps/rejected": -1008.154296875, |
|
"loss": 0.0021, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.5034410953521729, |
|
"rewards/margins": 7.560837745666504, |
|
"rewards/rejected": -9.064278602600098, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 0.01886261936533329, |
|
"learning_rate": 0.0, |
|
"logits/chosen": -2.998544454574585, |
|
"logits/rejected": -3.019519329071045, |
|
"logps/chosen": -217.5911102294922, |
|
"logps/rejected": -1009.5062255859375, |
|
"loss": 0.002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.6805696487426758, |
|
"rewards/margins": 7.409621238708496, |
|
"rewards/rejected": -9.090190887451172, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 1250, |
|
"total_flos": 0.0, |
|
"train_loss": 0.05206389323771, |
|
"train_runtime": 14277.3883, |
|
"train_samples_per_second": 5.603, |
|
"train_steps_per_second": 0.088 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1250, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 1000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|