|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9999375663357682, |
|
"eval_steps": 1000, |
|
"global_step": 4004, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00024973465692701506, |
|
"grad_norm": 0.609375, |
|
"learning_rate": 1.2468827930174565e-08, |
|
"logits/chosen": -0.33114343881607056, |
|
"logits/rejected": -0.24089118838310242, |
|
"logps/chosen": -44.38773727416992, |
|
"logps/rejected": -68.85894775390625, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0024973465692701507, |
|
"grad_norm": 0.609375, |
|
"learning_rate": 1.2468827930174566e-07, |
|
"logits/chosen": -0.4296959638595581, |
|
"logits/rejected": -0.34308701753616333, |
|
"logps/chosen": -43.235145568847656, |
|
"logps/rejected": -80.90267944335938, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.4444444477558136, |
|
"rewards/chosen": 0.0005310365813784301, |
|
"rewards/margins": 9.072302782442421e-05, |
|
"rewards/rejected": 0.0004403134807944298, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.004994693138540301, |
|
"grad_norm": 0.7890625, |
|
"learning_rate": 2.493765586034913e-07, |
|
"logits/chosen": -0.4125714898109436, |
|
"logits/rejected": -0.3169251084327698, |
|
"logps/chosen": -42.952693939208984, |
|
"logps/rejected": -78.09742736816406, |
|
"loss": 0.6928, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.000491735409013927, |
|
"rewards/margins": 0.0007636584923602641, |
|
"rewards/rejected": -0.0012553940759971738, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0074920397078104516, |
|
"grad_norm": 0.50390625, |
|
"learning_rate": 3.7406483790523695e-07, |
|
"logits/chosen": -0.4181899130344391, |
|
"logits/rejected": -0.3332025110721588, |
|
"logps/chosen": -44.16044235229492, |
|
"logps/rejected": -71.77767181396484, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.00046690466115251184, |
|
"rewards/margins": 0.0001037311740219593, |
|
"rewards/rejected": -0.0005706357769668102, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.009989386277080603, |
|
"grad_norm": 0.66796875, |
|
"learning_rate": 4.987531172069826e-07, |
|
"logits/chosen": -0.43510785698890686, |
|
"logits/rejected": -0.34268879890441895, |
|
"logps/chosen": -43.815826416015625, |
|
"logps/rejected": -80.65787506103516, |
|
"loss": 0.6923, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.0002724333025980741, |
|
"rewards/margins": 0.0016190242022275925, |
|
"rewards/rejected": -0.0013465910451486707, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.012486732846350752, |
|
"grad_norm": 0.96484375, |
|
"learning_rate": 6.234413965087283e-07, |
|
"logits/chosen": -0.4381956160068512, |
|
"logits/rejected": -0.32796674966812134, |
|
"logps/chosen": -43.25028610229492, |
|
"logps/rejected": -77.0926742553711, |
|
"loss": 0.6927, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.0003348322061356157, |
|
"rewards/margins": 0.0009277343633584678, |
|
"rewards/rejected": -0.0005929021863266826, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.014984079415620903, |
|
"grad_norm": 0.76171875, |
|
"learning_rate": 7.481296758104739e-07, |
|
"logits/chosen": -0.4058023989200592, |
|
"logits/rejected": -0.31743547320365906, |
|
"logps/chosen": -43.332618713378906, |
|
"logps/rejected": -81.47147369384766, |
|
"loss": 0.6923, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.0009701108792796731, |
|
"rewards/margins": 0.0017434615874662995, |
|
"rewards/rejected": -0.0007733507081866264, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.017481425984891052, |
|
"grad_norm": 0.419921875, |
|
"learning_rate": 8.728179551122195e-07, |
|
"logits/chosen": -0.40699687600135803, |
|
"logits/rejected": -0.33324044942855835, |
|
"logps/chosen": -42.81806182861328, |
|
"logps/rejected": -69.8255844116211, |
|
"loss": 0.6918, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.0005619878647848964, |
|
"rewards/margins": 0.0026690722443163395, |
|
"rewards/rejected": -0.0021070842631161213, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.019978772554161205, |
|
"grad_norm": 1.1171875, |
|
"learning_rate": 9.975062344139653e-07, |
|
"logits/chosen": -0.4060027003288269, |
|
"logits/rejected": -0.31257936358451843, |
|
"logps/chosen": -43.606048583984375, |
|
"logps/rejected": -74.920654296875, |
|
"loss": 0.691, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.0015056885313242674, |
|
"rewards/margins": 0.004372184630483389, |
|
"rewards/rejected": -0.0028664960991591215, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.022476119123431355, |
|
"grad_norm": 0.515625, |
|
"learning_rate": 1.1221945137157108e-06, |
|
"logits/chosen": -0.39966338872909546, |
|
"logits/rejected": -0.3299568295478821, |
|
"logps/chosen": -42.9066047668457, |
|
"logps/rejected": -67.96953582763672, |
|
"loss": 0.6899, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.0013856906443834305, |
|
"rewards/margins": 0.00656374916434288, |
|
"rewards/rejected": -0.00517805851995945, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.024973465692701504, |
|
"grad_norm": 0.73046875, |
|
"learning_rate": 1.2468827930174565e-06, |
|
"logits/chosen": -0.4090539515018463, |
|
"logits/rejected": -0.31136512756347656, |
|
"logps/chosen": -44.48310089111328, |
|
"logps/rejected": -84.36518096923828, |
|
"loss": 0.688, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.0026647746562957764, |
|
"rewards/margins": 0.010295169427990913, |
|
"rewards/rejected": -0.00763039430603385, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.027470812261971653, |
|
"grad_norm": 0.6484375, |
|
"learning_rate": 1.3715710723192023e-06, |
|
"logits/chosen": -0.40336164832115173, |
|
"logits/rejected": -0.3107188642024994, |
|
"logps/chosen": -43.532264709472656, |
|
"logps/rejected": -78.85313415527344, |
|
"loss": 0.6869, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.0021588439121842384, |
|
"rewards/margins": 0.012583871372044086, |
|
"rewards/rejected": -0.010425028391182423, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.029968158831241806, |
|
"grad_norm": 0.62109375, |
|
"learning_rate": 1.4962593516209478e-06, |
|
"logits/chosen": -0.44534754753112793, |
|
"logits/rejected": -0.3525586724281311, |
|
"logps/chosen": -43.2638053894043, |
|
"logps/rejected": -73.06291961669922, |
|
"loss": 0.6844, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.0026553391944617033, |
|
"rewards/margins": 0.017566664144396782, |
|
"rewards/rejected": -0.014911326579749584, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.032465505400511956, |
|
"grad_norm": 0.734375, |
|
"learning_rate": 1.6209476309226935e-06, |
|
"logits/chosen": -0.4450170397758484, |
|
"logits/rejected": -0.356881707906723, |
|
"logps/chosen": -43.020606994628906, |
|
"logps/rejected": -74.61629486083984, |
|
"loss": 0.6783, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.004791858606040478, |
|
"rewards/margins": 0.030012447386980057, |
|
"rewards/rejected": -0.025220584124326706, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.034962851969782105, |
|
"grad_norm": 0.52734375, |
|
"learning_rate": 1.745635910224439e-06, |
|
"logits/chosen": -0.4518131613731384, |
|
"logits/rejected": -0.35589107871055603, |
|
"logps/chosen": -44.260887145996094, |
|
"logps/rejected": -89.10530853271484, |
|
"loss": 0.6769, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.0043911924585700035, |
|
"rewards/margins": 0.03277025744318962, |
|
"rewards/rejected": -0.02837906777858734, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.037460198539052254, |
|
"grad_norm": 0.470703125, |
|
"learning_rate": 1.8703241895261848e-06, |
|
"logits/chosen": -0.39934635162353516, |
|
"logits/rejected": -0.32321810722351074, |
|
"logps/chosen": -41.51782989501953, |
|
"logps/rejected": -74.04359436035156, |
|
"loss": 0.67, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.009082725271582603, |
|
"rewards/margins": 0.04699797183275223, |
|
"rewards/rejected": -0.037915244698524475, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.03995754510832241, |
|
"grad_norm": 0.703125, |
|
"learning_rate": 1.9950124688279305e-06, |
|
"logits/chosen": -0.40700763463974, |
|
"logits/rejected": -0.33096417784690857, |
|
"logps/chosen": -42.31119918823242, |
|
"logps/rejected": -82.06526947021484, |
|
"loss": 0.6641, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.010724621824920177, |
|
"rewards/margins": 0.059142522513866425, |
|
"rewards/rejected": -0.04841790720820427, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.04245489167759256, |
|
"grad_norm": 0.443359375, |
|
"learning_rate": 2.119700748129676e-06, |
|
"logits/chosen": -0.4063618779182434, |
|
"logits/rejected": -0.31147629022598267, |
|
"logps/chosen": -43.24675750732422, |
|
"logps/rejected": -74.64862060546875, |
|
"loss": 0.6545, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.009759762324392796, |
|
"rewards/margins": 0.07911679148674011, |
|
"rewards/rejected": -0.0693570226430893, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.04495223824686271, |
|
"grad_norm": 0.369140625, |
|
"learning_rate": 2.2443890274314216e-06, |
|
"logits/chosen": -0.3992343842983246, |
|
"logits/rejected": -0.30265265703201294, |
|
"logps/chosen": -42.563804626464844, |
|
"logps/rejected": -89.08007049560547, |
|
"loss": 0.6496, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.016897384077310562, |
|
"rewards/margins": 0.08930385112762451, |
|
"rewards/rejected": -0.07240646332502365, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.04744958481613286, |
|
"grad_norm": 0.388671875, |
|
"learning_rate": 2.3690773067331675e-06, |
|
"logits/chosen": -0.393463671207428, |
|
"logits/rejected": -0.29836633801460266, |
|
"logps/chosen": -42.574581146240234, |
|
"logps/rejected": -81.52467346191406, |
|
"loss": 0.6448, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.01014000829309225, |
|
"rewards/margins": 0.09943968802690506, |
|
"rewards/rejected": -0.08929967135190964, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.04994693138540301, |
|
"grad_norm": 0.423828125, |
|
"learning_rate": 2.493765586034913e-06, |
|
"logits/chosen": -0.3705541491508484, |
|
"logits/rejected": -0.2776363492012024, |
|
"logps/chosen": -43.70671844482422, |
|
"logps/rejected": -79.51457214355469, |
|
"loss": 0.635, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.011818965896964073, |
|
"rewards/margins": 0.12041501700878143, |
|
"rewards/rejected": -0.10859604924917221, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.05244427795467316, |
|
"grad_norm": 0.400390625, |
|
"learning_rate": 2.6184538653366586e-06, |
|
"logits/chosen": -0.3588668704032898, |
|
"logits/rejected": -0.27049878239631653, |
|
"logps/chosen": -41.42917251586914, |
|
"logps/rejected": -80.90901947021484, |
|
"loss": 0.6249, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.012408060021698475, |
|
"rewards/margins": 0.14213527739048004, |
|
"rewards/rejected": -0.12972721457481384, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.05494162452394331, |
|
"grad_norm": 0.376953125, |
|
"learning_rate": 2.7431421446384045e-06, |
|
"logits/chosen": -0.32904312014579773, |
|
"logits/rejected": -0.24068386852741241, |
|
"logps/chosen": -42.47250747680664, |
|
"logps/rejected": -89.7323226928711, |
|
"loss": 0.6272, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.011426225304603577, |
|
"rewards/margins": 0.13695809245109558, |
|
"rewards/rejected": -0.125531867146492, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.05743897109321346, |
|
"grad_norm": 0.4921875, |
|
"learning_rate": 2.86783042394015e-06, |
|
"logits/chosen": -0.3509990870952606, |
|
"logits/rejected": -0.26932188868522644, |
|
"logps/chosen": -41.524696350097656, |
|
"logps/rejected": -84.92623901367188, |
|
"loss": 0.6128, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.016151348128914833, |
|
"rewards/margins": 0.16833417117595673, |
|
"rewards/rejected": -0.15218281745910645, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.05993631766248361, |
|
"grad_norm": 0.494140625, |
|
"learning_rate": 2.9925187032418956e-06, |
|
"logits/chosen": -0.3635261356830597, |
|
"logits/rejected": -0.26545146107673645, |
|
"logps/chosen": -43.126625061035156, |
|
"logps/rejected": -89.85209655761719, |
|
"loss": 0.6002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.004656613804399967, |
|
"rewards/margins": 0.19621676206588745, |
|
"rewards/rejected": -0.19156016409397125, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.06243366423175376, |
|
"grad_norm": 0.435546875, |
|
"learning_rate": 3.117206982543641e-06, |
|
"logits/chosen": -0.3245137929916382, |
|
"logits/rejected": -0.22121305763721466, |
|
"logps/chosen": -42.189552307128906, |
|
"logps/rejected": -91.37117767333984, |
|
"loss": 0.5891, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.009633781388401985, |
|
"rewards/margins": 0.22138457000255585, |
|
"rewards/rejected": -0.2117508202791214, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.06493101080102391, |
|
"grad_norm": 0.76171875, |
|
"learning_rate": 3.241895261845387e-06, |
|
"logits/chosen": -0.3298066258430481, |
|
"logits/rejected": -0.2243480682373047, |
|
"logps/chosen": -41.658103942871094, |
|
"logps/rejected": -97.98243713378906, |
|
"loss": 0.5608, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.014573690481483936, |
|
"rewards/margins": 0.28632912039756775, |
|
"rewards/rejected": -0.2717553973197937, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.06742835737029407, |
|
"grad_norm": 0.84765625, |
|
"learning_rate": 3.3665835411471326e-06, |
|
"logits/chosen": -0.274959921836853, |
|
"logits/rejected": -0.15127086639404297, |
|
"logps/chosen": -42.591773986816406, |
|
"logps/rejected": -109.750732421875, |
|
"loss": 0.5226, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.010403521358966827, |
|
"rewards/margins": 0.3793814182281494, |
|
"rewards/rejected": -0.36897793412208557, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.06992570393956421, |
|
"grad_norm": 1.265625, |
|
"learning_rate": 3.491271820448878e-06, |
|
"logits/chosen": -0.26599782705307007, |
|
"logits/rejected": -0.14541617035865784, |
|
"logps/chosen": -43.68675994873047, |
|
"logps/rejected": -131.44851684570312, |
|
"loss": 0.4865, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.009956231340765953, |
|
"rewards/margins": 0.4785284399986267, |
|
"rewards/rejected": -0.4685722291469574, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.07242305050883437, |
|
"grad_norm": 1.8828125, |
|
"learning_rate": 3.615960099750624e-06, |
|
"logits/chosen": -0.2336564064025879, |
|
"logits/rejected": -0.09945651143789291, |
|
"logps/chosen": -42.83462142944336, |
|
"logps/rejected": -155.51699829101562, |
|
"loss": 0.3811, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.010509507730603218, |
|
"rewards/margins": 0.8152663111686707, |
|
"rewards/rejected": -0.804756760597229, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.07492039707810451, |
|
"grad_norm": 1.484375, |
|
"learning_rate": 3.7406483790523696e-06, |
|
"logits/chosen": -0.187991201877594, |
|
"logits/rejected": -0.015538264997303486, |
|
"logps/chosen": -53.440765380859375, |
|
"logps/rejected": -225.8020477294922, |
|
"loss": 0.2419, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.09434106200933456, |
|
"rewards/margins": 1.4534434080123901, |
|
"rewards/rejected": -1.5477845668792725, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.07741774364737466, |
|
"grad_norm": 0.94140625, |
|
"learning_rate": 3.8653366583541155e-06, |
|
"logits/chosen": -0.10128624737262726, |
|
"logits/rejected": 0.10228855907917023, |
|
"logps/chosen": -66.81230163574219, |
|
"logps/rejected": -350.84722900390625, |
|
"loss": 0.1818, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.22738003730773926, |
|
"rewards/margins": 2.590130567550659, |
|
"rewards/rejected": -2.8175110816955566, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.07991509021664482, |
|
"grad_norm": 0.66015625, |
|
"learning_rate": 3.990024937655861e-06, |
|
"logits/chosen": -0.06341538578271866, |
|
"logits/rejected": 0.19574430584907532, |
|
"logps/chosen": -63.102256774902344, |
|
"logps/rejected": -506.8706970214844, |
|
"loss": 0.1113, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.19701920449733734, |
|
"rewards/margins": 4.0467329025268555, |
|
"rewards/rejected": -4.2437520027160645, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.08241243678591496, |
|
"grad_norm": 0.703125, |
|
"learning_rate": 4.114713216957607e-06, |
|
"logits/chosen": 0.03162340074777603, |
|
"logits/rejected": 0.2972305417060852, |
|
"logps/chosen": -55.010414123535156, |
|
"logps/rejected": -444.8564453125, |
|
"loss": 0.0957, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.1186632290482521, |
|
"rewards/margins": 3.6549084186553955, |
|
"rewards/rejected": -3.773571729660034, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.08490978335518512, |
|
"grad_norm": 0.48046875, |
|
"learning_rate": 4.239401496259352e-06, |
|
"logits/chosen": 0.05119480937719345, |
|
"logits/rejected": 0.35537463426589966, |
|
"logps/chosen": -52.077064514160156, |
|
"logps/rejected": -547.7008056640625, |
|
"loss": 0.0761, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.08894447982311249, |
|
"rewards/margins": 4.584813117980957, |
|
"rewards/rejected": -4.673757076263428, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.08740712992445526, |
|
"grad_norm": 0.1796875, |
|
"learning_rate": 4.364089775561098e-06, |
|
"logits/chosen": 0.17311367392539978, |
|
"logits/rejected": 0.49543648958206177, |
|
"logps/chosen": -59.5765380859375, |
|
"logps/rejected": -548.73876953125, |
|
"loss": 0.0769, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.1556529700756073, |
|
"rewards/margins": 4.651310443878174, |
|
"rewards/rejected": -4.806963920593262, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.08990447649372542, |
|
"grad_norm": 0.216796875, |
|
"learning_rate": 4.488778054862843e-06, |
|
"logits/chosen": 0.13120940327644348, |
|
"logits/rejected": 0.5331201553344727, |
|
"logps/chosen": -59.65636444091797, |
|
"logps/rejected": -660.1800537109375, |
|
"loss": 0.0462, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.15369465947151184, |
|
"rewards/margins": 5.723788738250732, |
|
"rewards/rejected": -5.877484321594238, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.09240182306299556, |
|
"grad_norm": 0.90625, |
|
"learning_rate": 4.6134663341645895e-06, |
|
"logits/chosen": 0.25265592336654663, |
|
"logits/rejected": 0.6620725989341736, |
|
"logps/chosen": -61.8662109375, |
|
"logps/rejected": -688.4534912109375, |
|
"loss": 0.0476, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.18499073386192322, |
|
"rewards/margins": 5.994035720825195, |
|
"rewards/rejected": -6.179026126861572, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.09489916963226572, |
|
"grad_norm": 1.0703125, |
|
"learning_rate": 4.738154613466335e-06, |
|
"logits/chosen": 0.21470895409584045, |
|
"logits/rejected": 0.7127342224121094, |
|
"logps/chosen": -76.92805480957031, |
|
"logps/rejected": -1027.8746337890625, |
|
"loss": 0.0318, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3377203345298767, |
|
"rewards/margins": 9.173759460449219, |
|
"rewards/rejected": -9.511480331420898, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.09739651620153587, |
|
"grad_norm": 0.013671875, |
|
"learning_rate": 4.862842892768081e-06, |
|
"logits/chosen": 0.2577429413795471, |
|
"logits/rejected": 0.7398630380630493, |
|
"logps/chosen": -87.82744598388672, |
|
"logps/rejected": -828.4269409179688, |
|
"loss": 0.0229, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.44494834542274475, |
|
"rewards/margins": 7.14224100112915, |
|
"rewards/rejected": -7.5871901512146, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.09989386277080602, |
|
"grad_norm": 0.326171875, |
|
"learning_rate": 4.987531172069826e-06, |
|
"logits/chosen": 0.312338262796402, |
|
"logits/rejected": 0.8188611268997192, |
|
"logps/chosen": -83.17652130126953, |
|
"logps/rejected": -929.5695190429688, |
|
"loss": 0.037, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.39534783363342285, |
|
"rewards/margins": 8.192750930786133, |
|
"rewards/rejected": -8.588098526000977, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.10239120934007617, |
|
"grad_norm": 0.2412109375, |
|
"learning_rate": 4.999923022460671e-06, |
|
"logits/chosen": 0.2771604657173157, |
|
"logits/rejected": 0.875481903553009, |
|
"logps/chosen": -71.91412353515625, |
|
"logps/rejected": -1142.7008056640625, |
|
"loss": 0.0166, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.29563969373703003, |
|
"rewards/margins": 10.368195533752441, |
|
"rewards/rejected": -10.663835525512695, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.10488855590934631, |
|
"grad_norm": 0.361328125, |
|
"learning_rate": 4.999656933348981e-06, |
|
"logits/chosen": 0.3595578372478485, |
|
"logits/rejected": 0.8383792638778687, |
|
"logps/chosen": -77.22844696044922, |
|
"logps/rejected": -818.2454833984375, |
|
"loss": 0.0347, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3375477194786072, |
|
"rewards/margins": 7.169804573059082, |
|
"rewards/rejected": -7.50735330581665, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.10738590247861647, |
|
"grad_norm": 0.006805419921875, |
|
"learning_rate": 4.99920080255011e-06, |
|
"logits/chosen": 0.3072226047515869, |
|
"logits/rejected": 0.9365525245666504, |
|
"logps/chosen": -69.96758270263672, |
|
"logps/rejected": -1107.283447265625, |
|
"loss": 0.011, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.24378785490989685, |
|
"rewards/margins": 9.968598365783691, |
|
"rewards/rejected": -10.212385177612305, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.10988324904788661, |
|
"grad_norm": 0.9375, |
|
"learning_rate": 4.998554664742362e-06, |
|
"logits/chosen": 0.386096328496933, |
|
"logits/rejected": 0.9298421740531921, |
|
"logps/chosen": -77.93641662597656, |
|
"logps/rejected": -925.2971801757812, |
|
"loss": 0.0152, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3427829146385193, |
|
"rewards/margins": 8.217004776000977, |
|
"rewards/rejected": -8.559788703918457, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.11238059561715677, |
|
"grad_norm": 0.162109375, |
|
"learning_rate": 4.997718569049726e-06, |
|
"logits/chosen": 0.38062307238578796, |
|
"logits/rejected": 0.9510132670402527, |
|
"logps/chosen": -62.76348876953125, |
|
"logps/rejected": -1039.5853271484375, |
|
"loss": 0.0178, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.19411073625087738, |
|
"rewards/margins": 9.47815227508545, |
|
"rewards/rejected": -9.672263145446777, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.11487794218642693, |
|
"grad_norm": 0.0625, |
|
"learning_rate": 4.9966925790381404e-06, |
|
"logits/chosen": 0.4757159352302551, |
|
"logits/rejected": 1.018425703048706, |
|
"logps/chosen": -81.46342468261719, |
|
"logps/rejected": -907.7916870117188, |
|
"loss": 0.0209, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.38752180337905884, |
|
"rewards/margins": 7.994225978851318, |
|
"rewards/rejected": -8.381747245788574, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.11737528875569707, |
|
"grad_norm": 0.39453125, |
|
"learning_rate": 4.995476772710657e-06, |
|
"logits/chosen": 0.40233319997787476, |
|
"logits/rejected": 1.0515995025634766, |
|
"logps/chosen": -82.72390747070312, |
|
"logps/rejected": -1207.831787109375, |
|
"loss": 0.0199, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.37652480602264404, |
|
"rewards/margins": 10.886858940124512, |
|
"rewards/rejected": -11.263383865356445, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.11987263532496722, |
|
"grad_norm": 0.109375, |
|
"learning_rate": 4.994071242501516e-06, |
|
"logits/chosen": 0.4317776560783386, |
|
"logits/rejected": 1.0796253681182861, |
|
"logps/chosen": -62.16728591918945, |
|
"logps/rejected": -1022.6038208007812, |
|
"loss": 0.0134, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.18862931430339813, |
|
"rewards/margins": 9.357450485229492, |
|
"rewards/rejected": -9.546079635620117, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.12236998189423737, |
|
"grad_norm": 3.3527612686157227e-06, |
|
"learning_rate": 4.992476095269112e-06, |
|
"logits/chosen": 0.4001534581184387, |
|
"logits/rejected": 0.9869491457939148, |
|
"logps/chosen": -64.50323486328125, |
|
"logps/rejected": -1058.342041015625, |
|
"loss": 0.0186, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.21038322150707245, |
|
"rewards/margins": 9.569865226745605, |
|
"rewards/rejected": -9.780248641967773, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.12486732846350752, |
|
"grad_norm": 0.166015625, |
|
"learning_rate": 4.990691452287877e-06, |
|
"logits/chosen": 0.513416051864624, |
|
"logits/rejected": 1.122924566268921, |
|
"logps/chosen": -86.93208312988281, |
|
"logps/rejected": -1010.1483154296875, |
|
"loss": 0.0175, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.43185362219810486, |
|
"rewards/margins": 8.921982765197754, |
|
"rewards/rejected": -9.353837966918945, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.12736467503277768, |
|
"grad_norm": 0.1806640625, |
|
"learning_rate": 4.988717449239056e-06, |
|
"logits/chosen": 0.5288435220718384, |
|
"logits/rejected": 1.184326410293579, |
|
"logps/chosen": -75.24764251708984, |
|
"logps/rejected": -1083.439697265625, |
|
"loss": 0.0205, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.31985238194465637, |
|
"rewards/margins": 9.819514274597168, |
|
"rewards/rejected": -10.139368057250977, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.12986202160204782, |
|
"grad_norm": 0.05126953125, |
|
"learning_rate": 4.98655423620039e-06, |
|
"logits/chosen": 0.45140591263771057, |
|
"logits/rejected": 1.135999083518982, |
|
"logps/chosen": -66.84233093261719, |
|
"logps/rejected": -1121.6982421875, |
|
"loss": 0.0088, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.23821225762367249, |
|
"rewards/margins": 10.265534400939941, |
|
"rewards/rejected": -10.503746032714844, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.13235936817131796, |
|
"grad_norm": 0.0130615234375, |
|
"learning_rate": 4.984201977634711e-06, |
|
"logits/chosen": 0.44299745559692383, |
|
"logits/rejected": 1.2149170637130737, |
|
"logps/chosen": -74.1891860961914, |
|
"logps/rejected": -1348.673095703125, |
|
"loss": 0.0081, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.30132222175598145, |
|
"rewards/margins": 12.441837310791016, |
|
"rewards/rejected": -12.74316120147705, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.13485671474058814, |
|
"grad_norm": 0.1982421875, |
|
"learning_rate": 4.9816608523774345e-06, |
|
"logits/chosen": 0.4906342625617981, |
|
"logits/rejected": 1.1866085529327393, |
|
"logps/chosen": -61.67924880981445, |
|
"logps/rejected": -1052.905029296875, |
|
"loss": 0.013, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.1749764382839203, |
|
"rewards/margins": 9.563148498535156, |
|
"rewards/rejected": -9.73812484741211, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.13735406130985828, |
|
"grad_norm": 0.02099609375, |
|
"learning_rate": 4.978931053622964e-06, |
|
"logits/chosen": 0.5177958607673645, |
|
"logits/rejected": 1.2569612264633179, |
|
"logps/chosen": -70.76200866699219, |
|
"logps/rejected": -1278.626708984375, |
|
"loss": 0.0133, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.27674826979637146, |
|
"rewards/margins": 11.792952537536621, |
|
"rewards/rejected": -12.069701194763184, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.13985140787912842, |
|
"grad_norm": 0.0003528594970703125, |
|
"learning_rate": 4.9760127889100044e-06, |
|
"logits/chosen": 0.5248929262161255, |
|
"logits/rejected": 1.2501459121704102, |
|
"logps/chosen": -81.47541809082031, |
|
"logps/rejected": -1154.671630859375, |
|
"loss": 0.0119, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3913446366786957, |
|
"rewards/margins": 10.44621467590332, |
|
"rewards/rejected": -10.837559700012207, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.1423487544483986, |
|
"grad_norm": 0.09423828125, |
|
"learning_rate": 4.972906280105781e-06, |
|
"logits/chosen": 0.5316249132156372, |
|
"logits/rejected": 1.3082139492034912, |
|
"logps/chosen": -88.09521484375, |
|
"logps/rejected": -1197.23486328125, |
|
"loss": 0.0096, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4347292482852936, |
|
"rewards/margins": 10.762666702270508, |
|
"rewards/rejected": -11.197395324707031, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.14484610101766873, |
|
"grad_norm": 0.0106201171875, |
|
"learning_rate": 4.969611763389175e-06, |
|
"logits/chosen": 0.5327505469322205, |
|
"logits/rejected": 1.3031514883041382, |
|
"logps/chosen": -73.19583129882812, |
|
"logps/rejected": -1114.89990234375, |
|
"loss": 0.0085, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.30240899324417114, |
|
"rewards/margins": 10.148442268371582, |
|
"rewards/rejected": -10.450851440429688, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.14734344758693887, |
|
"grad_norm": 0.0751953125, |
|
"learning_rate": 4.966129489232762e-06, |
|
"logits/chosen": 0.47109970450401306, |
|
"logits/rejected": 1.3012760877609253, |
|
"logps/chosen": -71.85210418701172, |
|
"logps/rejected": -1336.1424560546875, |
|
"loss": 0.0071, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.277540385723114, |
|
"rewards/margins": 12.241964340209961, |
|
"rewards/rejected": -12.51950454711914, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.14984079415620902, |
|
"grad_norm": 0.193359375, |
|
"learning_rate": 4.962459722383775e-06, |
|
"logits/chosen": 0.4269895553588867, |
|
"logits/rejected": 1.1828067302703857, |
|
"logps/chosen": -71.79056549072266, |
|
"logps/rejected": -1337.454345703125, |
|
"loss": 0.0111, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.2726261019706726, |
|
"rewards/margins": 12.248571395874023, |
|
"rewards/rejected": -12.521197319030762, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.1523381407254792, |
|
"grad_norm": 0.09619140625, |
|
"learning_rate": 4.958602741843975e-06, |
|
"logits/chosen": 0.4595261216163635, |
|
"logits/rejected": 1.3015968799591064, |
|
"logps/chosen": -78.28643035888672, |
|
"logps/rejected": -1252.5814208984375, |
|
"loss": 0.0054, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3374534249305725, |
|
"rewards/margins": 11.378314018249512, |
|
"rewards/rejected": -11.715767860412598, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.15483548729474933, |
|
"grad_norm": 0.048583984375, |
|
"learning_rate": 4.954558840848437e-06, |
|
"logits/chosen": 0.5825181007385254, |
|
"logits/rejected": 1.3565789461135864, |
|
"logps/chosen": -76.19590759277344, |
|
"logps/rejected": -1119.151611328125, |
|
"loss": 0.0109, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3366764783859253, |
|
"rewards/margins": 10.178030967712402, |
|
"rewards/rejected": -10.514707565307617, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.15733283386401947, |
|
"grad_norm": 0.1572265625, |
|
"learning_rate": 4.950328326843258e-06, |
|
"logits/chosen": 0.5459114909172058, |
|
"logits/rejected": 1.3613156080245972, |
|
"logps/chosen": -82.85084533691406, |
|
"logps/rejected": -1277.3514404296875, |
|
"loss": 0.0104, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3968349099159241, |
|
"rewards/margins": 11.52591323852539, |
|
"rewards/rejected": -11.922747611999512, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.15983018043328964, |
|
"grad_norm": 0.005096435546875, |
|
"learning_rate": 4.945911521462182e-06, |
|
"logits/chosen": 0.5720694065093994, |
|
"logits/rejected": 1.411368727684021, |
|
"logps/chosen": -80.45169067382812, |
|
"logps/rejected": -1358.6873779296875, |
|
"loss": 0.008, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3696327209472656, |
|
"rewards/margins": 12.431825637817383, |
|
"rewards/rejected": -12.801457405090332, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.16232752700255978, |
|
"grad_norm": 0.1708984375, |
|
"learning_rate": 4.941308760502149e-06, |
|
"logits/chosen": 0.5064912438392639, |
|
"logits/rejected": 1.2029752731323242, |
|
"logps/chosen": -67.90926361083984, |
|
"logps/rejected": -1026.6124267578125, |
|
"loss": 0.0125, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.2300504744052887, |
|
"rewards/margins": 9.079760551452637, |
|
"rewards/rejected": -9.30981159210205, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.16482487357182993, |
|
"grad_norm": 0.1787109375, |
|
"learning_rate": 4.936520393897762e-06, |
|
"logits/chosen": 0.4909030497074127, |
|
"logits/rejected": 1.2922523021697998, |
|
"logps/chosen": -69.4020004272461, |
|
"logps/rejected": -1298.668212890625, |
|
"loss": 0.0096, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.25019845366477966, |
|
"rewards/margins": 11.917684555053711, |
|
"rewards/rejected": -12.167882919311523, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.16732222014110007, |
|
"grad_norm": 0.00469970703125, |
|
"learning_rate": 4.931546785694684e-06, |
|
"logits/chosen": 0.5053218007087708, |
|
"logits/rejected": 1.4669103622436523, |
|
"logps/chosen": -86.32283782958984, |
|
"logps/rejected": -1483.82763671875, |
|
"loss": 0.0045, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.42769041657447815, |
|
"rewards/margins": 13.680854797363281, |
|
"rewards/rejected": -14.108546257019043, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.16981956671037024, |
|
"grad_norm": 0.0238037109375, |
|
"learning_rate": 4.926388314021964e-06, |
|
"logits/chosen": 0.6257452368736267, |
|
"logits/rejected": 1.5271151065826416, |
|
"logps/chosen": -92.75479888916016, |
|
"logps/rejected": -1237.61474609375, |
|
"loss": 0.0038, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4862573742866516, |
|
"rewards/margins": 11.188620567321777, |
|
"rewards/rejected": -11.67487907409668, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.17231691327964038, |
|
"grad_norm": 0.000385284423828125, |
|
"learning_rate": 4.921045371063283e-06, |
|
"logits/chosen": 0.584161102771759, |
|
"logits/rejected": 1.478125810623169, |
|
"logps/chosen": -89.634033203125, |
|
"logps/rejected": -1360.25537109375, |
|
"loss": 0.0066, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.46992653608322144, |
|
"rewards/margins": 12.42024040222168, |
|
"rewards/rejected": -12.890167236328125, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.17481425984891052, |
|
"grad_norm": 0.1318359375, |
|
"learning_rate": 4.915518363027142e-06, |
|
"logits/chosen": 0.5938104391098022, |
|
"logits/rejected": 1.4910205602645874, |
|
"logps/chosen": -73.86201477050781, |
|
"logps/rejected": -1182.0716552734375, |
|
"loss": 0.0059, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3045103847980499, |
|
"rewards/margins": 10.839627265930176, |
|
"rewards/rejected": -11.144137382507324, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.1773116064181807, |
|
"grad_norm": 0.08740234375, |
|
"learning_rate": 4.909807710115977e-06, |
|
"logits/chosen": 0.543526828289032, |
|
"logits/rejected": 1.467707633972168, |
|
"logps/chosen": -81.62191009521484, |
|
"logps/rejected": -1380.295166015625, |
|
"loss": 0.0066, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.38441941142082214, |
|
"rewards/margins": 12.690566062927246, |
|
"rewards/rejected": -13.074984550476074, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.17980895298745084, |
|
"grad_norm": 0.025146484375, |
|
"learning_rate": 4.903913846494211e-06, |
|
"logits/chosen": 0.4768219590187073, |
|
"logits/rejected": 1.4783326387405396, |
|
"logps/chosen": -79.43184661865234, |
|
"logps/rejected": -1673.644287109375, |
|
"loss": 0.0023, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3560616374015808, |
|
"rewards/margins": 15.518526077270508, |
|
"rewards/rejected": -15.87458610534668, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.18230629955672098, |
|
"grad_norm": 0.08154296875, |
|
"learning_rate": 4.897837220255251e-06, |
|
"logits/chosen": 0.5687042474746704, |
|
"logits/rejected": 1.4331896305084229, |
|
"logps/chosen": -86.38923645019531, |
|
"logps/rejected": -1329.374755859375, |
|
"loss": 0.008, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4307560920715332, |
|
"rewards/margins": 12.039878845214844, |
|
"rewards/rejected": -12.470634460449219, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.18480364612599112, |
|
"grad_norm": 0.0101318359375, |
|
"learning_rate": 4.891578293387413e-06, |
|
"logits/chosen": 0.604946494102478, |
|
"logits/rejected": 1.5590946674346924, |
|
"logps/chosen": -80.6954345703125, |
|
"logps/rejected": -1429.257080078125, |
|
"loss": 0.0056, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.37448957562446594, |
|
"rewards/margins": 13.214704513549805, |
|
"rewards/rejected": -13.589195251464844, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.1873009926952613, |
|
"grad_norm": 0.07177734375, |
|
"learning_rate": 4.885137541738808e-06, |
|
"logits/chosen": 0.5679504871368408, |
|
"logits/rejected": 1.4625308513641357, |
|
"logps/chosen": -74.07333374023438, |
|
"logps/rejected": -1173.9857177734375, |
|
"loss": 0.0055, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3022187054157257, |
|
"rewards/margins": 10.615083694458008, |
|
"rewards/rejected": -10.917302131652832, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.18979833926453143, |
|
"grad_norm": 0.000659942626953125, |
|
"learning_rate": 4.878515454981153e-06, |
|
"logits/chosen": 0.5600544214248657, |
|
"logits/rejected": 1.5554611682891846, |
|
"logps/chosen": -95.30448913574219, |
|
"logps/rejected": -1539.63427734375, |
|
"loss": 0.0029, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.5141724348068237, |
|
"rewards/margins": 14.041679382324219, |
|
"rewards/rejected": -14.555851936340332, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.19229568583380158, |
|
"grad_norm": 0.154296875, |
|
"learning_rate": 4.8717125365725545e-06, |
|
"logits/chosen": 0.6704256534576416, |
|
"logits/rejected": 1.511325716972351, |
|
"logps/chosen": -78.95833587646484, |
|
"logps/rejected": -1096.5010986328125, |
|
"loss": 0.0109, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.35167983174324036, |
|
"rewards/margins": 9.9315185546875, |
|
"rewards/rejected": -10.283197402954102, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.19479303240307175, |
|
"grad_norm": 0.04443359375, |
|
"learning_rate": 4.864729303719221e-06, |
|
"logits/chosen": 0.49029749631881714, |
|
"logits/rejected": 1.4827202558517456, |
|
"logps/chosen": -78.9704360961914, |
|
"logps/rejected": -1541.809326171875, |
|
"loss": 0.0031, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.34706807136535645, |
|
"rewards/margins": 14.254959106445312, |
|
"rewards/rejected": -14.602025985717773, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.1972903789723419, |
|
"grad_norm": 0.11328125, |
|
"learning_rate": 4.857566287336152e-06, |
|
"logits/chosen": 0.5910658836364746, |
|
"logits/rejected": 1.5483187437057495, |
|
"logps/chosen": -99.37945556640625, |
|
"logps/rejected": -1432.4755859375, |
|
"loss": 0.0086, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.5464431047439575, |
|
"rewards/margins": 12.95887565612793, |
|
"rewards/rejected": -13.505319595336914, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.19978772554161203, |
|
"grad_norm": 0.0011444091796875, |
|
"learning_rate": 4.850224032006765e-06, |
|
"logits/chosen": 0.6179195642471313, |
|
"logits/rejected": 1.5901352167129517, |
|
"logps/chosen": -81.73147583007812, |
|
"logps/rejected": -1412.5142822265625, |
|
"loss": 0.0054, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.38581180572509766, |
|
"rewards/margins": 13.030293464660645, |
|
"rewards/rejected": -13.416107177734375, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.20228507211088217, |
|
"grad_norm": 0.1357421875, |
|
"learning_rate": 4.8427030959414984e-06, |
|
"logits/chosen": 0.5971755385398865, |
|
"logits/rejected": 1.6486015319824219, |
|
"logps/chosen": -74.70821380615234, |
|
"logps/rejected": -1532.8193359375, |
|
"loss": 0.0063, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3184259831905365, |
|
"rewards/margins": 14.300427436828613, |
|
"rewards/rejected": -14.618852615356445, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.20478241868015234, |
|
"grad_norm": 0.1689453125, |
|
"learning_rate": 4.835004050935369e-06, |
|
"logits/chosen": 0.6013739705085754, |
|
"logits/rejected": 1.4875710010528564, |
|
"logps/chosen": -74.9230728149414, |
|
"logps/rejected": -1462.062255859375, |
|
"loss": 0.0107, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.31746476888656616, |
|
"rewards/margins": 13.55724811553955, |
|
"rewards/rejected": -13.874712944030762, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.2072797652494225, |
|
"grad_norm": 0.0279541015625, |
|
"learning_rate": 4.8271274823245e-06, |
|
"logits/chosen": 0.6184748411178589, |
|
"logits/rejected": 1.5838812589645386, |
|
"logps/chosen": -74.03543853759766, |
|
"logps/rejected": -1436.349853515625, |
|
"loss": 0.0048, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.29689908027648926, |
|
"rewards/margins": 13.336532592773438, |
|
"rewards/rejected": -13.63343334197998, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.20977711181869263, |
|
"grad_norm": 0.0888671875, |
|
"learning_rate": 4.8190739889416264e-06, |
|
"logits/chosen": 0.6181553602218628, |
|
"logits/rejected": 1.6880038976669312, |
|
"logps/chosen": -73.8195571899414, |
|
"logps/rejected": -1627.819091796875, |
|
"loss": 0.003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3135436177253723, |
|
"rewards/margins": 15.254382133483887, |
|
"rewards/rejected": -15.567927360534668, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.2122744583879628, |
|
"grad_norm": 0.0703125, |
|
"learning_rate": 4.810844183070553e-06, |
|
"logits/chosen": 0.5540085434913635, |
|
"logits/rejected": 1.6211318969726562, |
|
"logps/chosen": -72.0704116821289, |
|
"logps/rejected": -1341.570068359375, |
|
"loss": 0.0033, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.29530271887779236, |
|
"rewards/margins": 12.359551429748535, |
|
"rewards/rejected": -12.654852867126465, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.21477180495723294, |
|
"grad_norm": 0.1435546875, |
|
"learning_rate": 4.802438690399622e-06, |
|
"logits/chosen": 0.600739598274231, |
|
"logits/rejected": 1.643431305885315, |
|
"logps/chosen": -70.41534423828125, |
|
"logps/rejected": -1476.7939453125, |
|
"loss": 0.0068, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.26624736189842224, |
|
"rewards/margins": 13.719133377075195, |
|
"rewards/rejected": -13.985379219055176, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.21726915152650308, |
|
"grad_norm": 0.000701904296875, |
|
"learning_rate": 4.793858149974129e-06, |
|
"logits/chosen": 0.6058120727539062, |
|
"logits/rejected": 1.721599817276001, |
|
"logps/chosen": -79.14794921875, |
|
"logps/rejected": -1739.1048583984375, |
|
"loss": 0.0036, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3524496853351593, |
|
"rewards/margins": 16.286325454711914, |
|
"rewards/rejected": -16.638776779174805, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.21976649809577323, |
|
"grad_norm": 0.047119140625, |
|
"learning_rate": 4.785103214147747e-06, |
|
"logits/chosen": 0.6141168475151062, |
|
"logits/rejected": 1.7174230813980103, |
|
"logps/chosen": -77.48551940917969, |
|
"logps/rejected": -1538.275634765625, |
|
"loss": 0.004, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.34762924909591675, |
|
"rewards/margins": 14.33294677734375, |
|
"rewards/rejected": -14.680575370788574, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.2222638446650434, |
|
"grad_norm": 0.000667572021484375, |
|
"learning_rate": 4.776174548532926e-06, |
|
"logits/chosen": 0.6287072896957397, |
|
"logits/rejected": 1.6851770877838135, |
|
"logps/chosen": -77.02873229980469, |
|
"logps/rejected": -1560.05810546875, |
|
"loss": 0.003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3427741825580597, |
|
"rewards/margins": 14.53984260559082, |
|
"rewards/rejected": -14.882616996765137, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.22476119123431354, |
|
"grad_norm": 0.07080078125, |
|
"learning_rate": 4.767072831950288e-06, |
|
"logits/chosen": 0.617357611656189, |
|
"logits/rejected": 1.7079490423202515, |
|
"logps/chosen": -77.236572265625, |
|
"logps/rejected": -1560.383544921875, |
|
"loss": 0.002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3377893567085266, |
|
"rewards/margins": 14.535722732543945, |
|
"rewards/rejected": -14.873510360717773, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.22725853780358368, |
|
"grad_norm": 0.01129150390625, |
|
"learning_rate": 4.7577987563770226e-06, |
|
"logits/chosen": 0.6263229250907898, |
|
"logits/rejected": 1.674384355545044, |
|
"logps/chosen": -81.606201171875, |
|
"logps/rejected": -1556.177490234375, |
|
"loss": 0.0057, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.37518563866615295, |
|
"rewards/margins": 14.350992202758789, |
|
"rewards/rejected": -14.726178169250488, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.22975588437285385, |
|
"grad_norm": 0.00042724609375, |
|
"learning_rate": 4.748353026894273e-06, |
|
"logits/chosen": 0.63312166929245, |
|
"logits/rejected": 1.7262632846832275, |
|
"logps/chosen": -73.91940307617188, |
|
"logps/rejected": -1473.6871337890625, |
|
"loss": 0.0037, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.29600244760513306, |
|
"rewards/margins": 13.644391059875488, |
|
"rewards/rejected": -13.940393447875977, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.232253230942124, |
|
"grad_norm": 0.00836181640625, |
|
"learning_rate": 4.738736361633532e-06, |
|
"logits/chosen": 0.6512018442153931, |
|
"logits/rejected": 1.6386451721191406, |
|
"logps/chosen": -75.5640869140625, |
|
"logps/rejected": -1397.0894775390625, |
|
"loss": 0.0042, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3240968585014343, |
|
"rewards/margins": 12.940801620483398, |
|
"rewards/rejected": -13.264900207519531, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.23475057751139414, |
|
"grad_norm": 0.00250244140625, |
|
"learning_rate": 4.728949491722046e-06, |
|
"logits/chosen": 0.6666821837425232, |
|
"logits/rejected": 1.6732994318008423, |
|
"logps/chosen": -76.89160919189453, |
|
"logps/rejected": -1383.048095703125, |
|
"loss": 0.0052, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3491043150424957, |
|
"rewards/margins": 12.806585311889648, |
|
"rewards/rejected": -13.155691146850586, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.2372479240806643, |
|
"grad_norm": 0.07421875, |
|
"learning_rate": 4.718993161227231e-06, |
|
"logits/chosen": 0.5883976221084595, |
|
"logits/rejected": 1.7654300928115845, |
|
"logps/chosen": -84.6438217163086, |
|
"logps/rejected": -1749.6275634765625, |
|
"loss": 0.0031, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3954206109046936, |
|
"rewards/margins": 16.34560775756836, |
|
"rewards/rejected": -16.74102783203125, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.23974527064993445, |
|
"grad_norm": 0.006500244140625, |
|
"learning_rate": 4.708868127100098e-06, |
|
"logits/chosen": 0.666793167591095, |
|
"logits/rejected": 1.6790577173233032, |
|
"logps/chosen": -75.14250183105469, |
|
"logps/rejected": -1408.4847412109375, |
|
"loss": 0.0034, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3262697458267212, |
|
"rewards/margins": 13.068046569824219, |
|
"rewards/rejected": -13.394315719604492, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.2422426172192046, |
|
"grad_norm": 0.0001659393310546875, |
|
"learning_rate": 4.6985751591177075e-06, |
|
"logits/chosen": 0.62273108959198, |
|
"logits/rejected": 1.7520809173583984, |
|
"logps/chosen": -87.58243560791016, |
|
"logps/rejected": -1661.821533203125, |
|
"loss": 0.0028, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4351579546928406, |
|
"rewards/margins": 15.40393352508545, |
|
"rewards/rejected": -15.839093208312988, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.24473996378847473, |
|
"grad_norm": 0.00885009765625, |
|
"learning_rate": 4.688115039824648e-06, |
|
"logits/chosen": 0.6803555488586426, |
|
"logits/rejected": 1.7338272333145142, |
|
"logps/chosen": -88.861328125, |
|
"logps/rejected": -1561.404541015625, |
|
"loss": 0.0032, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4565040171146393, |
|
"rewards/margins": 14.439603805541992, |
|
"rewards/rejected": -14.896108627319336, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.2472373103577449, |
|
"grad_norm": 5.424022674560547e-06, |
|
"learning_rate": 4.677488564473535e-06, |
|
"logits/chosen": 0.6470680832862854, |
|
"logits/rejected": 1.818428635597229, |
|
"logps/chosen": -92.59998321533203, |
|
"logps/rejected": -1680.185791015625, |
|
"loss": 0.0027, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4968494772911072, |
|
"rewards/margins": 15.579751968383789, |
|
"rewards/rejected": -16.076602935791016, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.24973465692701505, |
|
"grad_norm": 0.03173828125, |
|
"learning_rate": 4.666696540964556e-06, |
|
"logits/chosen": 0.7138900756835938, |
|
"logits/rejected": 1.7771879434585571, |
|
"logps/chosen": -99.16166687011719, |
|
"logps/rejected": -1503.458984375, |
|
"loss": 0.0033, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.551328182220459, |
|
"rewards/margins": 13.802647590637207, |
|
"rewards/rejected": -14.353976249694824, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.24973465692701505, |
|
"eval_logits/chosen": 0.7451997995376587, |
|
"eval_logits/rejected": 1.5489047765731812, |
|
"eval_logps/chosen": -80.45184326171875, |
|
"eval_logps/rejected": -830.6234130859375, |
|
"eval_loss": 0.006477854214608669, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/chosen": -0.36095961928367615, |
|
"eval_rewards/margins": 7.334376335144043, |
|
"eval_rewards/rejected": -7.695336818695068, |
|
"eval_runtime": 0.619, |
|
"eval_samples_per_second": 8.077, |
|
"eval_steps_per_second": 8.077, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.2522320034962852, |
|
"grad_norm": 0.0003871917724609375, |
|
"learning_rate": 4.6557397897840454e-06, |
|
"logits/chosen": 0.693498969078064, |
|
"logits/rejected": 1.753259301185608, |
|
"logps/chosen": -102.29890441894531, |
|
"logps/rejected": -1543.0269775390625, |
|
"loss": 0.005, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.583904504776001, |
|
"rewards/margins": 14.051568984985352, |
|
"rewards/rejected": -14.635473251342773, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.25472935006555536, |
|
"grad_norm": 0.0003032684326171875, |
|
"learning_rate": 4.644619143942108e-06, |
|
"logits/chosen": 0.5707821249961853, |
|
"logits/rejected": 1.7276995182037354, |
|
"logps/chosen": -88.48922729492188, |
|
"logps/rejected": -1698.140625, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.44621315598487854, |
|
"rewards/margins": 15.652883529663086, |
|
"rewards/rejected": -16.099096298217773, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.2572266966348255, |
|
"grad_norm": 0.076171875, |
|
"learning_rate": 4.633335448909284e-06, |
|
"logits/chosen": 0.658003032207489, |
|
"logits/rejected": 1.7504956722259521, |
|
"logps/chosen": -89.60042572021484, |
|
"logps/rejected": -1624.2664794921875, |
|
"loss": 0.0019, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4533475339412689, |
|
"rewards/margins": 14.999029159545898, |
|
"rewards/rejected": -15.452377319335938, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.25972404320409564, |
|
"grad_norm": 0.0308837890625, |
|
"learning_rate": 4.621889562552272e-06, |
|
"logits/chosen": 0.6270695924758911, |
|
"logits/rejected": 1.8431812524795532, |
|
"logps/chosen": -105.17597961425781, |
|
"logps/rejected": -1880.988037109375, |
|
"loss": 0.0032, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.6098282933235168, |
|
"rewards/margins": 17.456111907958984, |
|
"rewards/rejected": -18.065940856933594, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.2622213897733658, |
|
"grad_norm": 3.886222839355469e-05, |
|
"learning_rate": 4.610282355068707e-06, |
|
"logits/chosen": 0.577286422252655, |
|
"logits/rejected": 1.723755121231079, |
|
"logps/chosen": -109.06168365478516, |
|
"logps/rejected": -1825.4671630859375, |
|
"loss": 0.002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.6522443890571594, |
|
"rewards/margins": 16.784276962280273, |
|
"rewards/rejected": -17.4365234375, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.26471873634263593, |
|
"grad_norm": 0.078125, |
|
"learning_rate": 4.598514708921006e-06, |
|
"logits/chosen": 0.6790138483047485, |
|
"logits/rejected": 1.8542677164077759, |
|
"logps/chosen": -101.43669128417969, |
|
"logps/rejected": -1781.3316650390625, |
|
"loss": 0.0024, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.5860346555709839, |
|
"rewards/margins": 16.492624282836914, |
|
"rewards/rejected": -17.078659057617188, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.26721608291190607, |
|
"grad_norm": 0.0002994537353515625, |
|
"learning_rate": 4.5865875187692695e-06, |
|
"logits/chosen": 0.6998518109321594, |
|
"logits/rejected": 1.8515506982803345, |
|
"logps/chosen": -90.31967163085938, |
|
"logps/rejected": -1559.104736328125, |
|
"loss": 0.0043, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.46949252486228943, |
|
"rewards/margins": 14.420549392700195, |
|
"rewards/rejected": -14.890042304992676, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.26971342948117627, |
|
"grad_norm": 0.04296875, |
|
"learning_rate": 4.57450169140327e-06, |
|
"logits/chosen": 0.6541129350662231, |
|
"logits/rejected": 1.944573163986206, |
|
"logps/chosen": -94.61952209472656, |
|
"logps/rejected": -1980.4349365234375, |
|
"loss": 0.0018, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.5051703453063965, |
|
"rewards/margins": 18.555133819580078, |
|
"rewards/rejected": -19.060306549072266, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.2722107760504464, |
|
"grad_norm": 0.0250244140625, |
|
"learning_rate": 4.562258145673507e-06, |
|
"logits/chosen": 0.624032199382782, |
|
"logits/rejected": 1.8918708562850952, |
|
"logps/chosen": -105.9957275390625, |
|
"logps/rejected": -1974.2584228515625, |
|
"loss": 0.0009, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.6295837759971619, |
|
"rewards/margins": 18.393098831176758, |
|
"rewards/rejected": -19.022680282592773, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.27470812261971655, |
|
"grad_norm": 0.0026092529296875, |
|
"learning_rate": 4.549857812421353e-06, |
|
"logits/chosen": 0.64922696352005, |
|
"logits/rejected": 1.8475501537322998, |
|
"logps/chosen": -89.56166076660156, |
|
"logps/rejected": -1663.010986328125, |
|
"loss": 0.0012, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.46423882246017456, |
|
"rewards/margins": 15.462870597839355, |
|
"rewards/rejected": -15.92711067199707, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.2772054691889867, |
|
"grad_norm": 0.0084228515625, |
|
"learning_rate": 4.537301634408281e-06, |
|
"logits/chosen": 0.6925086975097656, |
|
"logits/rejected": 1.7738326787948608, |
|
"logps/chosen": -85.5665512084961, |
|
"logps/rejected": -1501.200439453125, |
|
"loss": 0.0023, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4182400703430176, |
|
"rewards/margins": 13.875646591186523, |
|
"rewards/rejected": -14.293886184692383, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.27970281575825684, |
|
"grad_norm": 0.0732421875, |
|
"learning_rate": 4.52459056624419e-06, |
|
"logits/chosen": 0.7249046564102173, |
|
"logits/rejected": 1.795248031616211, |
|
"logps/chosen": -102.11865997314453, |
|
"logps/rejected": -1670.522216796875, |
|
"loss": 0.0035, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.5903103351593018, |
|
"rewards/margins": 15.393072128295898, |
|
"rewards/rejected": -15.983380317687988, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.282200162327527, |
|
"grad_norm": 0.06494140625, |
|
"learning_rate": 4.51172557431483e-06, |
|
"logits/chosen": 0.6399365663528442, |
|
"logits/rejected": 1.7282390594482422, |
|
"logps/chosen": -105.56150817871094, |
|
"logps/rejected": -1699.7261962890625, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.6241176724433899, |
|
"rewards/margins": 15.525866508483887, |
|
"rewards/rejected": -16.14998435974121, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.2846975088967972, |
|
"grad_norm": 0.054443359375, |
|
"learning_rate": 4.49870763670833e-06, |
|
"logits/chosen": 0.6207230687141418, |
|
"logits/rejected": 1.878230333328247, |
|
"logps/chosen": -95.18315124511719, |
|
"logps/rejected": -1844.5869140625, |
|
"loss": 0.0013, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.5091260671615601, |
|
"rewards/margins": 17.207239151000977, |
|
"rewards/rejected": -17.71636390686035, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.2871948554660673, |
|
"grad_norm": 0.06591796875, |
|
"learning_rate": 4.4855377431408335e-06, |
|
"logits/chosen": 0.6628460884094238, |
|
"logits/rejected": 1.7432994842529297, |
|
"logps/chosen": -109.8287124633789, |
|
"logps/rejected": -1642.527099609375, |
|
"loss": 0.002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.6535288095474243, |
|
"rewards/margins": 14.924878120422363, |
|
"rewards/rejected": -15.578405380249023, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.28969220203533746, |
|
"grad_norm": 0.005096435546875, |
|
"learning_rate": 4.472216894881261e-06, |
|
"logits/chosen": 0.6672796010971069, |
|
"logits/rejected": 1.738856315612793, |
|
"logps/chosen": -89.06207275390625, |
|
"logps/rejected": -1540.048583984375, |
|
"loss": 0.0018, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.46248659491539, |
|
"rewards/margins": 14.235052108764648, |
|
"rewards/rejected": -14.697538375854492, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.2921895486046076, |
|
"grad_norm": 0.047607421875, |
|
"learning_rate": 4.4587461046751815e-06, |
|
"logits/chosen": 0.6774252653121948, |
|
"logits/rejected": 1.7696081399917603, |
|
"logps/chosen": -81.09745788574219, |
|
"logps/rejected": -1620.4942626953125, |
|
"loss": 0.003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.38098251819610596, |
|
"rewards/margins": 15.126324653625488, |
|
"rewards/rejected": -15.507307052612305, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.29468689517387775, |
|
"grad_norm": 0.008544921875, |
|
"learning_rate": 4.44512639666781e-06, |
|
"logits/chosen": 0.6951876878738403, |
|
"logits/rejected": 1.7908748388290405, |
|
"logps/chosen": -76.070068359375, |
|
"logps/rejected": -1503.576416015625, |
|
"loss": 0.0032, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.32203441858291626, |
|
"rewards/margins": 14.029817581176758, |
|
"rewards/rejected": -14.351852416992188, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.2971842417431479, |
|
"grad_norm": 0.890625, |
|
"learning_rate": 4.431358806326158e-06, |
|
"logits/chosen": 0.5664582848548889, |
|
"logits/rejected": 1.6636062860488892, |
|
"logps/chosen": -77.29322814941406, |
|
"logps/rejected": -1649.8541259765625, |
|
"loss": 0.004, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.32812389731407166, |
|
"rewards/margins": 15.302160263061523, |
|
"rewards/rejected": -15.630284309387207, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.29968158831241803, |
|
"grad_norm": 0.765625, |
|
"learning_rate": 4.4174443803603e-06, |
|
"logits/chosen": 0.7006584405899048, |
|
"logits/rejected": 1.8050626516342163, |
|
"logps/chosen": -101.17628479003906, |
|
"logps/rejected": -1638.1669921875, |
|
"loss": 0.004, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.5764530301094055, |
|
"rewards/margins": 15.088088989257812, |
|
"rewards/rejected": -15.664541244506836, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.30217893488168823, |
|
"grad_norm": 0.045166015625, |
|
"learning_rate": 4.4033841766438e-06, |
|
"logits/chosen": 0.5828436613082886, |
|
"logits/rejected": 1.6060895919799805, |
|
"logps/chosen": -79.80711364746094, |
|
"logps/rejected": -1499.616943359375, |
|
"loss": 0.0038, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.35304346680641174, |
|
"rewards/margins": 13.869488716125488, |
|
"rewards/rejected": -14.222529411315918, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.3046762814509584, |
|
"grad_norm": 0.0005035400390625, |
|
"learning_rate": 4.389179264133281e-06, |
|
"logits/chosen": 0.6191063523292542, |
|
"logits/rejected": 1.7791473865509033, |
|
"logps/chosen": -74.18888854980469, |
|
"logps/rejected": -1617.736572265625, |
|
"loss": 0.0013, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.31279683113098145, |
|
"rewards/margins": 15.179719924926758, |
|
"rewards/rejected": -15.492517471313477, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.3071736280202285, |
|
"grad_norm": 0.10205078125, |
|
"learning_rate": 4.374830722787159e-06, |
|
"logits/chosen": 0.5632847547531128, |
|
"logits/rejected": 1.709839105606079, |
|
"logps/chosen": -71.81803894042969, |
|
"logps/rejected": -1728.1201171875, |
|
"loss": 0.0035, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.2758982479572296, |
|
"rewards/margins": 16.288021087646484, |
|
"rewards/rejected": -16.563919067382812, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.30967097458949866, |
|
"grad_norm": 0.08984375, |
|
"learning_rate": 4.360339643483533e-06, |
|
"logits/chosen": 0.5613077878952026, |
|
"logits/rejected": 1.6441980600357056, |
|
"logps/chosen": -71.94982147216797, |
|
"logps/rejected": -1693.345947265625, |
|
"loss": 0.0031, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.27554553747177124, |
|
"rewards/margins": 15.830523490905762, |
|
"rewards/rejected": -16.106069564819336, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.3121683211587688, |
|
"grad_norm": 0.00022411346435546875, |
|
"learning_rate": 4.345707127937253e-06, |
|
"logits/chosen": 0.5210096836090088, |
|
"logits/rejected": 1.81972336769104, |
|
"logps/chosen": -72.21741485595703, |
|
"logps/rejected": -1985.244873046875, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.2866365611553192, |
|
"rewards/margins": 18.848371505737305, |
|
"rewards/rejected": -19.135007858276367, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.31466566772803894, |
|
"grad_norm": 0.0087890625, |
|
"learning_rate": 4.330934288616154e-06, |
|
"logits/chosen": 0.6370071172714233, |
|
"logits/rejected": 1.816506028175354, |
|
"logps/chosen": -77.37321472167969, |
|
"logps/rejected": -1700.324951171875, |
|
"loss": 0.0013, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.33770519495010376, |
|
"rewards/margins": 15.96662712097168, |
|
"rewards/rejected": -16.304332733154297, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.3171630142973091, |
|
"grad_norm": 0.043701171875, |
|
"learning_rate": 4.316022248656485e-06, |
|
"logits/chosen": 0.5022410154342651, |
|
"logits/rejected": 1.5646175146102905, |
|
"logps/chosen": -71.90743255615234, |
|
"logps/rejected": -1543.3675537109375, |
|
"loss": 0.0019, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.2772095799446106, |
|
"rewards/margins": 14.190910339355469, |
|
"rewards/rejected": -14.468118667602539, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.3196603608665793, |
|
"grad_norm": 0.01177978515625, |
|
"learning_rate": 4.3009721417775166e-06, |
|
"logits/chosen": 0.5786353349685669, |
|
"logits/rejected": 1.7490533590316772, |
|
"logps/chosen": -76.0634536743164, |
|
"logps/rejected": -1822.3128662109375, |
|
"loss": 0.0014, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.33201277256011963, |
|
"rewards/margins": 17.15824317932129, |
|
"rewards/rejected": -17.49025535583496, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.3221577074358494, |
|
"grad_norm": 0.006378173828125, |
|
"learning_rate": 4.285785112195346e-06, |
|
"logits/chosen": 0.5005044341087341, |
|
"logits/rejected": 1.6343857049942017, |
|
"logps/chosen": -79.80322265625, |
|
"logps/rejected": -1840.0863037109375, |
|
"loss": 0.001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.360287070274353, |
|
"rewards/margins": 17.2882080078125, |
|
"rewards/rejected": -17.648494720458984, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.32465505400511957, |
|
"grad_norm": 0.0218505859375, |
|
"learning_rate": 4.27046231453591e-06, |
|
"logits/chosen": 0.5454662442207336, |
|
"logits/rejected": 1.7699216604232788, |
|
"logps/chosen": -73.63867950439453, |
|
"logps/rejected": -1785.0687255859375, |
|
"loss": 0.0008, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3033478856086731, |
|
"rewards/margins": 16.751049041748047, |
|
"rewards/rejected": -17.054393768310547, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.3271524005743897, |
|
"grad_norm": 3.4458935260772705e-08, |
|
"learning_rate": 4.255004913747196e-06, |
|
"logits/chosen": 0.5776439905166626, |
|
"logits/rejected": 1.7879540920257568, |
|
"logps/chosen": -73.9155502319336, |
|
"logps/rejected": -1853.062744140625, |
|
"loss": 0.0017, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3109303414821625, |
|
"rewards/margins": 17.477283477783203, |
|
"rewards/rejected": -17.78821563720703, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.32964974714365985, |
|
"grad_norm": 0.0025787353515625, |
|
"learning_rate": 4.2394140850106825e-06, |
|
"logits/chosen": 0.5839067697525024, |
|
"logits/rejected": 1.7082710266113281, |
|
"logps/chosen": -79.85846710205078, |
|
"logps/rejected": -1769.608154296875, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.36045509576797485, |
|
"rewards/margins": 16.53636360168457, |
|
"rewards/rejected": -16.896818161010742, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.33214709371293, |
|
"grad_norm": 0.049072265625, |
|
"learning_rate": 4.223691013651986e-06, |
|
"logits/chosen": 0.4838981032371521, |
|
"logits/rejected": 1.5925347805023193, |
|
"logps/chosen": -78.32035827636719, |
|
"logps/rejected": -1797.3795166015625, |
|
"loss": 0.0008, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.33012980222702026, |
|
"rewards/margins": 16.690799713134766, |
|
"rewards/rejected": -17.02092933654785, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.33464444028220014, |
|
"grad_norm": 0.07421875, |
|
"learning_rate": 4.207836895050748e-06, |
|
"logits/chosen": 0.5183486342430115, |
|
"logits/rejected": 1.813838243484497, |
|
"logps/chosen": -78.78418731689453, |
|
"logps/rejected": -2234.604248046875, |
|
"loss": 0.0014, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.35720717906951904, |
|
"rewards/margins": 21.229446411132812, |
|
"rewards/rejected": -21.586654663085938, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.33714178685147034, |
|
"grad_norm": 0.034912109375, |
|
"learning_rate": 4.1918529345497525e-06, |
|
"logits/chosen": 0.6919676661491394, |
|
"logits/rejected": 1.7151187658309937, |
|
"logps/chosen": -73.4660415649414, |
|
"logps/rejected": -1405.526123046875, |
|
"loss": 0.0024, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.2937282919883728, |
|
"rewards/margins": 13.013958930969238, |
|
"rewards/rejected": -13.307687759399414, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.3396391334207405, |
|
"grad_norm": 0.0673828125, |
|
"learning_rate": 4.175740347363289e-06, |
|
"logits/chosen": 0.58757483959198, |
|
"logits/rejected": 1.651389718055725, |
|
"logps/chosen": -76.53401947021484, |
|
"logps/rejected": -1477.0704345703125, |
|
"loss": 0.0025, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3171566426753998, |
|
"rewards/margins": 13.620218276977539, |
|
"rewards/rejected": -13.93737506866455, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.3421364799900106, |
|
"grad_norm": 0.00014400482177734375, |
|
"learning_rate": 4.159500358484759e-06, |
|
"logits/chosen": 0.5347609519958496, |
|
"logits/rejected": 1.775714635848999, |
|
"logps/chosen": -80.72816467285156, |
|
"logps/rejected": -2101.271240234375, |
|
"loss": 0.0016, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.37172675132751465, |
|
"rewards/margins": 19.874296188354492, |
|
"rewards/rejected": -20.246021270751953, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.34463382655928076, |
|
"grad_norm": 0.08544921875, |
|
"learning_rate": 4.143134202593549e-06, |
|
"logits/chosen": 0.624781608581543, |
|
"logits/rejected": 1.6769899129867554, |
|
"logps/chosen": -73.6835708618164, |
|
"logps/rejected": -1517.2562255859375, |
|
"loss": 0.002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.28604602813720703, |
|
"rewards/margins": 14.03973388671875, |
|
"rewards/rejected": -14.325779914855957, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.3471311731285509, |
|
"grad_norm": 0.000701904296875, |
|
"learning_rate": 4.126643123961158e-06, |
|
"logits/chosen": 0.5619300007820129, |
|
"logits/rejected": 1.733264684677124, |
|
"logps/chosen": -85.01399230957031, |
|
"logps/rejected": -1977.3433837890625, |
|
"loss": 0.0019, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4228588938713074, |
|
"rewards/margins": 18.610515594482422, |
|
"rewards/rejected": -19.033374786376953, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.34962851969782105, |
|
"grad_norm": 0.054931640625, |
|
"learning_rate": 4.110028376356599e-06, |
|
"logits/chosen": 0.6396419405937195, |
|
"logits/rejected": 1.709763765335083, |
|
"logps/chosen": -78.08328247070312, |
|
"logps/rejected": -1361.017822265625, |
|
"loss": 0.0018, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.32667404413223267, |
|
"rewards/margins": 12.484647750854492, |
|
"rewards/rejected": -12.811322212219238, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.3521258662670912, |
|
"grad_norm": 0.0230712890625, |
|
"learning_rate": 4.093291222951079e-06, |
|
"logits/chosen": 0.59341961145401, |
|
"logits/rejected": 1.863669991493225, |
|
"logps/chosen": -88.14995574951172, |
|
"logps/rejected": -1872.766845703125, |
|
"loss": 0.0006, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4488135874271393, |
|
"rewards/margins": 17.484996795654297, |
|
"rewards/rejected": -17.93381118774414, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.3546232128363614, |
|
"grad_norm": 0.058837890625, |
|
"learning_rate": 4.076432936221965e-06, |
|
"logits/chosen": 0.7002652287483215, |
|
"logits/rejected": 1.8483015298843384, |
|
"logps/chosen": -82.04905700683594, |
|
"logps/rejected": -1564.48388671875, |
|
"loss": 0.0025, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.38699063658714294, |
|
"rewards/margins": 14.584707260131836, |
|
"rewards/rejected": -14.971699714660645, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.35712055940563153, |
|
"grad_norm": 0.0228271484375, |
|
"learning_rate": 4.059454797856039e-06, |
|
"logits/chosen": 0.6757210493087769, |
|
"logits/rejected": 1.8517526388168335, |
|
"logps/chosen": -79.04359436035156, |
|
"logps/rejected": -1546.5145263671875, |
|
"loss": 0.0025, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.36757320165634155, |
|
"rewards/margins": 14.412260055541992, |
|
"rewards/rejected": -14.77983570098877, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.3596179059749017, |
|
"grad_norm": 2.86102294921875e-05, |
|
"learning_rate": 4.042358098652057e-06, |
|
"logits/chosen": 0.6149075627326965, |
|
"logits/rejected": 1.7819246053695679, |
|
"logps/chosen": -79.50230407714844, |
|
"logps/rejected": -1636.815673828125, |
|
"loss": 0.001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.36546218395233154, |
|
"rewards/margins": 15.302419662475586, |
|
"rewards/rejected": -15.667881965637207, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.3621152525441718, |
|
"grad_norm": 0.00115203857421875, |
|
"learning_rate": 4.025144138422615e-06, |
|
"logits/chosen": 0.6270621418952942, |
|
"logits/rejected": 1.8290207386016846, |
|
"logps/chosen": -94.74217224121094, |
|
"logps/rejected": -1927.5875244140625, |
|
"loss": 0.0019, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.5115745663642883, |
|
"rewards/margins": 18.033849716186523, |
|
"rewards/rejected": -18.54542350769043, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.36461259911344196, |
|
"grad_norm": 0.01483154296875, |
|
"learning_rate": 4.007814225895321e-06, |
|
"logits/chosen": 0.6495813131332397, |
|
"logits/rejected": 1.905644178390503, |
|
"logps/chosen": -77.37786865234375, |
|
"logps/rejected": -1849.2958984375, |
|
"loss": 0.0021, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.35049691796302795, |
|
"rewards/margins": 17.428863525390625, |
|
"rewards/rejected": -17.779361724853516, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.3671099456827121, |
|
"grad_norm": 0.030029296875, |
|
"learning_rate": 3.990369678613303e-06, |
|
"logits/chosen": 0.5495260953903198, |
|
"logits/rejected": 1.7052236795425415, |
|
"logps/chosen": -80.6564712524414, |
|
"logps/rejected": -1763.9212646484375, |
|
"loss": 0.0008, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3531542718410492, |
|
"rewards/margins": 16.24311637878418, |
|
"rewards/rejected": -16.596271514892578, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.36960729225198224, |
|
"grad_norm": 0.005859375, |
|
"learning_rate": 3.97281182283504e-06, |
|
"logits/chosen": 0.625984251499176, |
|
"logits/rejected": 1.8606735467910767, |
|
"logps/chosen": -79.91282653808594, |
|
"logps/rejected": -2002.6253662109375, |
|
"loss": 0.0015, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.37286117672920227, |
|
"rewards/margins": 18.913448333740234, |
|
"rewards/rejected": -19.28631019592285, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.37210463882125244, |
|
"grad_norm": 0.0966796875, |
|
"learning_rate": 3.955141993433526e-06, |
|
"logits/chosen": 0.6155849695205688, |
|
"logits/rejected": 1.8059221506118774, |
|
"logps/chosen": -83.35047912597656, |
|
"logps/rejected": -1705.9954833984375, |
|
"loss": 0.0009, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.40526407957077026, |
|
"rewards/margins": 15.947067260742188, |
|
"rewards/rejected": -16.352331161499023, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.3746019853905226, |
|
"grad_norm": 0.028076171875, |
|
"learning_rate": 3.937361533794784e-06, |
|
"logits/chosen": 0.6340751647949219, |
|
"logits/rejected": 1.7404279708862305, |
|
"logps/chosen": -88.5922622680664, |
|
"logps/rejected": -1672.023193359375, |
|
"loss": 0.0018, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4479256570339203, |
|
"rewards/margins": 15.48670768737793, |
|
"rewards/rejected": -15.934633255004883, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.3770993319597927, |
|
"grad_norm": 0.005706787109375, |
|
"learning_rate": 3.919471795715738e-06, |
|
"logits/chosen": 0.6204045414924622, |
|
"logits/rejected": 1.7921810150146484, |
|
"logps/chosen": -76.07710266113281, |
|
"logps/rejected": -1581.0501708984375, |
|
"loss": 0.0012, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3369273245334625, |
|
"rewards/margins": 14.815042495727539, |
|
"rewards/rejected": -15.151969909667969, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.37959667852906287, |
|
"grad_norm": 0.033203125, |
|
"learning_rate": 3.901474139301433e-06, |
|
"logits/chosen": 0.5973562002182007, |
|
"logits/rejected": 1.781730055809021, |
|
"logps/chosen": -83.27436065673828, |
|
"logps/rejected": -1706.674560546875, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3964506983757019, |
|
"rewards/margins": 15.901094436645508, |
|
"rewards/rejected": -16.297544479370117, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.382094025098333, |
|
"grad_norm": 0.020263671875, |
|
"learning_rate": 3.883369932861634e-06, |
|
"logits/chosen": 0.66780024766922, |
|
"logits/rejected": 1.8160803318023682, |
|
"logps/chosen": -88.08844757080078, |
|
"logps/rejected": -1613.07373046875, |
|
"loss": 0.0006, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.44633254408836365, |
|
"rewards/margins": 15.01531982421875, |
|
"rewards/rejected": -15.461652755737305, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.38459137166760315, |
|
"grad_norm": 5.245208740234375e-05, |
|
"learning_rate": 3.865160552806796e-06, |
|
"logits/chosen": 0.6651610136032104, |
|
"logits/rejected": 1.8406972885131836, |
|
"logps/chosen": -79.48463439941406, |
|
"logps/rejected": -1628.993896484375, |
|
"loss": 0.0008, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.36581045389175415, |
|
"rewards/margins": 15.247782707214355, |
|
"rewards/rejected": -15.613592147827148, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.3870887182368733, |
|
"grad_norm": 1.7404556274414062e-05, |
|
"learning_rate": 3.84684738354342e-06, |
|
"logits/chosen": 0.6299249529838562, |
|
"logits/rejected": 1.8017246723175049, |
|
"logps/chosen": -78.25436401367188, |
|
"logps/rejected": -1699.4664306640625, |
|
"loss": 0.0021, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.34191855788230896, |
|
"rewards/margins": 15.943153381347656, |
|
"rewards/rejected": -16.285072326660156, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.3895860648061435, |
|
"grad_norm": 0.043212890625, |
|
"learning_rate": 3.828431817368798e-06, |
|
"logits/chosen": 0.6114810705184937, |
|
"logits/rejected": 1.7776029109954834, |
|
"logps/chosen": -83.11959075927734, |
|
"logps/rejected": -1808.9710693359375, |
|
"loss": 0.0021, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.39526110887527466, |
|
"rewards/margins": 16.893442153930664, |
|
"rewards/rejected": -17.2887020111084, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.39208341137541364, |
|
"grad_norm": 0.0859375, |
|
"learning_rate": 3.8099152543651684e-06, |
|
"logits/chosen": 0.5259889364242554, |
|
"logits/rejected": 1.8491640090942383, |
|
"logps/chosen": -76.72280883789062, |
|
"logps/rejected": -1907.3863525390625, |
|
"loss": 0.0009, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3375067710876465, |
|
"rewards/margins": 18.016918182373047, |
|
"rewards/rejected": -18.354427337646484, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.3945807579446838, |
|
"grad_norm": 0.07177734375, |
|
"learning_rate": 3.791299102293261e-06, |
|
"logits/chosen": 0.5979338884353638, |
|
"logits/rejected": 1.8080129623413086, |
|
"logps/chosen": -85.99974060058594, |
|
"logps/rejected": -1962.4896240234375, |
|
"loss": 0.0014, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4262419641017914, |
|
"rewards/margins": 18.461589813232422, |
|
"rewards/rejected": -18.88783073425293, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.3970781045139539, |
|
"grad_norm": 0.0078125, |
|
"learning_rate": 3.7725847764852774e-06, |
|
"logits/chosen": 0.5477781891822815, |
|
"logits/rejected": 1.7578785419464111, |
|
"logps/chosen": -83.63060760498047, |
|
"logps/rejected": -1990.638671875, |
|
"loss": 0.0014, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.39986157417297363, |
|
"rewards/margins": 18.64605140686035, |
|
"rewards/rejected": -19.045909881591797, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.39957545108322406, |
|
"grad_norm": 0.0067138671875, |
|
"learning_rate": 3.7537736997372833e-06, |
|
"logits/chosen": 0.5983849167823792, |
|
"logits/rejected": 1.6318690776824951, |
|
"logps/chosen": -74.38432312011719, |
|
"logps/rejected": -1474.78759765625, |
|
"loss": 0.0019, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.30779850482940674, |
|
"rewards/margins": 13.598607063293457, |
|
"rewards/rejected": -13.906405448913574, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.4020727976524942, |
|
"grad_norm": 0.0035247802734375, |
|
"learning_rate": 3.734867302201038e-06, |
|
"logits/chosen": 0.620630145072937, |
|
"logits/rejected": 1.7149145603179932, |
|
"logps/chosen": -75.28178405761719, |
|
"logps/rejected": -1552.66650390625, |
|
"loss": 0.0012, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.32418856024742126, |
|
"rewards/margins": 14.512880325317383, |
|
"rewards/rejected": -14.837068557739258, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.40457014422176435, |
|
"grad_norm": 0.04833984375, |
|
"learning_rate": 3.7158670212752666e-06, |
|
"logits/chosen": 0.609667181968689, |
|
"logits/rejected": 1.8217569589614868, |
|
"logps/chosen": -75.36375427246094, |
|
"logps/rejected": -1846.893798828125, |
|
"loss": 0.0028, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.31798630952835083, |
|
"rewards/margins": 17.42896842956543, |
|
"rewards/rejected": -17.746957778930664, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.40706749079103455, |
|
"grad_norm": 0.0034332275390625, |
|
"learning_rate": 3.696774301496376e-06, |
|
"logits/chosen": 0.6272271871566772, |
|
"logits/rejected": 1.8513765335083008, |
|
"logps/chosen": -76.99528503417969, |
|
"logps/rejected": -1668.758056640625, |
|
"loss": 0.0009, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.33525028824806213, |
|
"rewards/margins": 15.664929389953613, |
|
"rewards/rejected": -16.000181198120117, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.4095648373603047, |
|
"grad_norm": 0.0020599365234375, |
|
"learning_rate": 3.677590594428629e-06, |
|
"logits/chosen": 0.6275375485420227, |
|
"logits/rejected": 1.746649980545044, |
|
"logps/chosen": -82.9039535522461, |
|
"logps/rejected": -1647.865478515625, |
|
"loss": 0.0023, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.402643620967865, |
|
"rewards/margins": 15.365156173706055, |
|
"rewards/rejected": -15.767801284790039, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.41206218392957483, |
|
"grad_norm": 0.0001926422119140625, |
|
"learning_rate": 3.658317358553794e-06, |
|
"logits/chosen": 0.6051415205001831, |
|
"logits/rejected": 1.807227373123169, |
|
"logps/chosen": -78.21363830566406, |
|
"logps/rejected": -1698.2001953125, |
|
"loss": 0.0013, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3519694209098816, |
|
"rewards/margins": 15.862031936645508, |
|
"rewards/rejected": -16.214000701904297, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.414559530498845, |
|
"grad_norm": 0.05908203125, |
|
"learning_rate": 3.638956059160252e-06, |
|
"logits/chosen": 0.659566342830658, |
|
"logits/rejected": 1.9395606517791748, |
|
"logps/chosen": -79.38732147216797, |
|
"logps/rejected": -1887.150390625, |
|
"loss": 0.001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.369088351726532, |
|
"rewards/margins": 17.81894874572754, |
|
"rewards/rejected": -18.188034057617188, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.4170568770681151, |
|
"grad_norm": 0.0064697265625, |
|
"learning_rate": 3.6195081682315972e-06, |
|
"logits/chosen": 0.6888834834098816, |
|
"logits/rejected": 1.855298638343811, |
|
"logps/chosen": -87.92467498779297, |
|
"logps/rejected": -1717.685546875, |
|
"loss": 0.0006, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.45286068320274353, |
|
"rewards/margins": 16.04346466064453, |
|
"rewards/rejected": -16.496326446533203, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.41955422363738526, |
|
"grad_norm": 0.026123046875, |
|
"learning_rate": 3.5999751643347342e-06, |
|
"logits/chosen": 0.5452974438667297, |
|
"logits/rejected": 1.710627794265747, |
|
"logps/chosen": -84.69573974609375, |
|
"logps/rejected": -1964.759033203125, |
|
"loss": 0.0015, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4223009943962097, |
|
"rewards/margins": 18.474639892578125, |
|
"rewards/rejected": -18.896940231323242, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.4220515702066554, |
|
"grad_norm": 0.050537109375, |
|
"learning_rate": 3.5803585325074536e-06, |
|
"logits/chosen": 0.5890778303146362, |
|
"logits/rejected": 1.8013776540756226, |
|
"logps/chosen": -78.17984008789062, |
|
"logps/rejected": -1845.5576171875, |
|
"loss": 0.0014, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3491733968257904, |
|
"rewards/margins": 17.39130210876465, |
|
"rewards/rejected": -17.740474700927734, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.4245489167759256, |
|
"grad_norm": 0.0849609375, |
|
"learning_rate": 3.5606597641455387e-06, |
|
"logits/chosen": 0.6665171384811401, |
|
"logits/rejected": 1.7867825031280518, |
|
"logps/chosen": -82.6786117553711, |
|
"logps/rejected": -1745.9921875, |
|
"loss": 0.0021, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4073053300380707, |
|
"rewards/margins": 16.353519439697266, |
|
"rewards/rejected": -16.760822296142578, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.42704626334519574, |
|
"grad_norm": 0.054443359375, |
|
"learning_rate": 3.540880356889376e-06, |
|
"logits/chosen": 0.6666916012763977, |
|
"logits/rejected": 1.773199439048767, |
|
"logps/chosen": -83.08412170410156, |
|
"logps/rejected": -1565.4173583984375, |
|
"loss": 0.0018, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.39527103304862976, |
|
"rewards/margins": 14.483154296875, |
|
"rewards/rejected": -14.878425598144531, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.4295436099144659, |
|
"grad_norm": 0.036865234375, |
|
"learning_rate": 3.5210218145100934e-06, |
|
"logits/chosen": 0.6796804666519165, |
|
"logits/rejected": 1.826575517654419, |
|
"logps/chosen": -76.29814147949219, |
|
"logps/rejected": -1558.442626953125, |
|
"loss": 0.0028, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3252686560153961, |
|
"rewards/margins": 14.555532455444336, |
|
"rewards/rejected": -14.8808012008667, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.432040956483736, |
|
"grad_norm": 0.037841796875, |
|
"learning_rate": 3.5010856467952335e-06, |
|
"logits/chosen": 0.6157870292663574, |
|
"logits/rejected": 1.7099930047988892, |
|
"logps/chosen": -81.55091094970703, |
|
"logps/rejected": -1618.1546630859375, |
|
"loss": 0.0021, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3764324188232422, |
|
"rewards/margins": 14.980290412902832, |
|
"rewards/rejected": -15.356722831726074, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.43453830305300617, |
|
"grad_norm": 0.1484375, |
|
"learning_rate": 3.4810733694339687e-06, |
|
"logits/chosen": 0.5888208150863647, |
|
"logits/rejected": 1.7958781719207764, |
|
"logps/chosen": -84.57051086425781, |
|
"logps/rejected": -1871.361083984375, |
|
"loss": 0.0017, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4017999768257141, |
|
"rewards/margins": 17.520339965820312, |
|
"rewards/rejected": -17.922138214111328, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.4370356496222763, |
|
"grad_norm": 0.032958984375, |
|
"learning_rate": 3.4609865039018676e-06, |
|
"logits/chosen": 0.682072639465332, |
|
"logits/rejected": 1.7670128345489502, |
|
"logps/chosen": -83.45240783691406, |
|
"logps/rejected": -1766.69140625, |
|
"loss": 0.0031, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.40751171112060547, |
|
"rewards/margins": 16.532672882080078, |
|
"rewards/rejected": -16.940181732177734, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.43953299619154645, |
|
"grad_norm": 0.024658203125, |
|
"learning_rate": 3.4408265773452226e-06, |
|
"logits/chosen": 0.6357883214950562, |
|
"logits/rejected": 1.7647396326065063, |
|
"logps/chosen": -75.90863037109375, |
|
"logps/rejected": -1793.2620849609375, |
|
"loss": 0.003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.32462552189826965, |
|
"rewards/margins": 16.89077377319336, |
|
"rewards/rejected": -17.21540069580078, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.44203034276081665, |
|
"grad_norm": 0.00022411346435546875, |
|
"learning_rate": 3.420595122464942e-06, |
|
"logits/chosen": 0.5758832693099976, |
|
"logits/rejected": 1.7814972400665283, |
|
"logps/chosen": -79.77733612060547, |
|
"logps/rejected": -1759.0218505859375, |
|
"loss": 0.0016, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.36440029740333557, |
|
"rewards/margins": 16.545846939086914, |
|
"rewards/rejected": -16.910245895385742, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.4445276893300868, |
|
"grad_norm": 0.0712890625, |
|
"learning_rate": 3.4002936774000284e-06, |
|
"logits/chosen": 0.5318555235862732, |
|
"logits/rejected": 1.8811362981796265, |
|
"logps/chosen": -77.92293548583984, |
|
"logps/rejected": -2195.729736328125, |
|
"loss": 0.0009, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3443797826766968, |
|
"rewards/margins": 20.8763370513916, |
|
"rewards/rejected": -21.22071647644043, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.44702503589935694, |
|
"grad_norm": 0.035888671875, |
|
"learning_rate": 3.3799237856106348e-06, |
|
"logits/chosen": 0.5407482385635376, |
|
"logits/rejected": 1.725608229637146, |
|
"logps/chosen": -77.46542358398438, |
|
"logps/rejected": -1750.359619140625, |
|
"loss": 0.001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.34000295400619507, |
|
"rewards/margins": 16.431018829345703, |
|
"rewards/rejected": -16.77102279663086, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.4495223824686271, |
|
"grad_norm": 0.0003185272216796875, |
|
"learning_rate": 3.35948699576072e-06, |
|
"logits/chosen": 0.5788090825080872, |
|
"logits/rejected": 1.8735895156860352, |
|
"logps/chosen": -83.07856750488281, |
|
"logps/rejected": -2100.754638671875, |
|
"loss": 0.0009, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3939378559589386, |
|
"rewards/margins": 19.880746841430664, |
|
"rewards/rejected": -20.274681091308594, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.4520197290378972, |
|
"grad_norm": 0.13671875, |
|
"learning_rate": 3.3389848616003085e-06, |
|
"logits/chosen": 0.5929907560348511, |
|
"logits/rejected": 1.6974430084228516, |
|
"logps/chosen": -79.56812286376953, |
|
"logps/rejected": -1787.6068115234375, |
|
"loss": 0.0033, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3681466281414032, |
|
"rewards/margins": 16.794055938720703, |
|
"rewards/rejected": -17.16220474243164, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.45451707560716736, |
|
"grad_norm": 0.0390625, |
|
"learning_rate": 3.3184189418473674e-06, |
|
"logits/chosen": 0.5751794576644897, |
|
"logits/rejected": 1.7812267541885376, |
|
"logps/chosen": -77.59693908691406, |
|
"logps/rejected": -1778.1439208984375, |
|
"loss": 0.0014, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3408336043357849, |
|
"rewards/margins": 16.739145278930664, |
|
"rewards/rejected": -17.079978942871094, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.45701442217643756, |
|
"grad_norm": 0.001373291015625, |
|
"learning_rate": 3.2977908000692925e-06, |
|
"logits/chosen": 0.5487096905708313, |
|
"logits/rejected": 1.74447500705719, |
|
"logps/chosen": -80.25045013427734, |
|
"logps/rejected": -1946.0765380859375, |
|
"loss": 0.0023, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3701193928718567, |
|
"rewards/margins": 18.401947021484375, |
|
"rewards/rejected": -18.77206802368164, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.4595117687457077, |
|
"grad_norm": 0.048095703125, |
|
"learning_rate": 3.2771020045640435e-06, |
|
"logits/chosen": 0.6444208025932312, |
|
"logits/rejected": 1.6972076892852783, |
|
"logps/chosen": -78.00311279296875, |
|
"logps/rejected": -1579.7557373046875, |
|
"loss": 0.0023, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3551621735095978, |
|
"rewards/margins": 14.743237495422363, |
|
"rewards/rejected": -15.09839916229248, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.46200911531497785, |
|
"grad_norm": 0.07177734375, |
|
"learning_rate": 3.256354128240907e-06, |
|
"logits/chosen": 0.6255194544792175, |
|
"logits/rejected": 1.7124531269073486, |
|
"logps/chosen": -85.12455749511719, |
|
"logps/rejected": -1608.01171875, |
|
"loss": 0.0024, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.401846319437027, |
|
"rewards/margins": 14.84345531463623, |
|
"rewards/rejected": -15.245302200317383, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.464506461884248, |
|
"grad_norm": 1.191438059322536e-10, |
|
"learning_rate": 3.235548748500914e-06, |
|
"logits/chosen": 0.5620906352996826, |
|
"logits/rejected": 1.8212181329727173, |
|
"logps/chosen": -78.25764465332031, |
|
"logps/rejected": -1836.809326171875, |
|
"loss": 0.0014, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3476276397705078, |
|
"rewards/margins": 17.330211639404297, |
|
"rewards/rejected": -17.677841186523438, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.46700380845351813, |
|
"grad_norm": 0.038330078125, |
|
"learning_rate": 3.214687447116913e-06, |
|
"logits/chosen": 0.5774132609367371, |
|
"logits/rejected": 1.7261114120483398, |
|
"logps/chosen": -76.27984619140625, |
|
"logps/rejected": -1707.5220947265625, |
|
"loss": 0.002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3342064321041107, |
|
"rewards/margins": 15.912832260131836, |
|
"rewards/rejected": -16.247039794921875, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.4695011550227883, |
|
"grad_norm": 0.002532958984375, |
|
"learning_rate": 3.193771810113313e-06, |
|
"logits/chosen": 0.5532559752464294, |
|
"logits/rejected": 1.8629133701324463, |
|
"logps/chosen": -79.43685150146484, |
|
"logps/rejected": -2138.56884765625, |
|
"loss": 0.001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3567715287208557, |
|
"rewards/margins": 20.29428482055664, |
|
"rewards/rejected": -20.65105628967285, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.4719985015920584, |
|
"grad_norm": 0.018310546875, |
|
"learning_rate": 3.1728034276455032e-06, |
|
"logits/chosen": 0.6407243609428406, |
|
"logits/rejected": 1.7773427963256836, |
|
"logps/chosen": -75.46717834472656, |
|
"logps/rejected": -1624.9876708984375, |
|
"loss": 0.0017, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3326609432697296, |
|
"rewards/margins": 15.156428337097168, |
|
"rewards/rejected": -15.489087104797363, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.4744958481613286, |
|
"grad_norm": 0.001556396484375, |
|
"learning_rate": 3.1517838938789597e-06, |
|
"logits/chosen": 0.5151150822639465, |
|
"logits/rejected": 1.6990512609481812, |
|
"logps/chosen": -79.35011291503906, |
|
"logps/rejected": -1993.761962890625, |
|
"loss": 0.0018, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3504992127418518, |
|
"rewards/margins": 18.6396427154541, |
|
"rewards/rejected": -18.990140914916992, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.47699319473059876, |
|
"grad_norm": 0.036376953125, |
|
"learning_rate": 3.130714806868041e-06, |
|
"logits/chosen": 0.5437807440757751, |
|
"logits/rejected": 1.6498979330062866, |
|
"logps/chosen": -77.74958801269531, |
|
"logps/rejected": -1746.6015625, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3441064655780792, |
|
"rewards/margins": 16.398990631103516, |
|
"rewards/rejected": -16.74309730529785, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.4794905412998689, |
|
"grad_norm": 0.0269775390625, |
|
"learning_rate": 3.1095977684344976e-06, |
|
"logits/chosen": 0.6197426319122314, |
|
"logits/rejected": 1.865501046180725, |
|
"logps/chosen": -83.05316162109375, |
|
"logps/rejected": -1912.490234375, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.40501928329467773, |
|
"rewards/margins": 18.013139724731445, |
|
"rewards/rejected": -18.41815757751465, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.48198788786913904, |
|
"grad_norm": 0.004058837890625, |
|
"learning_rate": 3.0884343840456874e-06, |
|
"logits/chosen": 0.5581328868865967, |
|
"logits/rejected": 1.818427324295044, |
|
"logps/chosen": -82.58245849609375, |
|
"logps/rejected": -2075.47509765625, |
|
"loss": 0.0012, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.39019304513931274, |
|
"rewards/margins": 19.626462936401367, |
|
"rewards/rejected": -20.016658782958984, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.4844852344384092, |
|
"grad_norm": 5.0067901611328125e-06, |
|
"learning_rate": 3.0672262626925174e-06, |
|
"logits/chosen": 0.49209919571876526, |
|
"logits/rejected": 1.718467354774475, |
|
"logps/chosen": -82.48509216308594, |
|
"logps/rejected": -1921.3333740234375, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.35821837186813354, |
|
"rewards/margins": 18.051937103271484, |
|
"rewards/rejected": -18.410158157348633, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.4869825810076793, |
|
"grad_norm": 0.0255126953125, |
|
"learning_rate": 3.0459750167671147e-06, |
|
"logits/chosen": 0.4969088137149811, |
|
"logits/rejected": 1.7654139995574951, |
|
"logps/chosen": -79.9859390258789, |
|
"logps/rejected": -2075.444580078125, |
|
"loss": 0.0009, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3472353518009186, |
|
"rewards/margins": 19.543991088867188, |
|
"rewards/rejected": -19.89122772216797, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.48947992757694947, |
|
"grad_norm": 0.1552734375, |
|
"learning_rate": 3.024682261940247e-06, |
|
"logits/chosen": 0.5588921904563904, |
|
"logits/rejected": 1.6852495670318604, |
|
"logps/chosen": -83.98374938964844, |
|
"logps/rejected": -1691.5599365234375, |
|
"loss": 0.0013, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.39910078048706055, |
|
"rewards/margins": 15.737649917602539, |
|
"rewards/rejected": -16.136751174926758, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.49197727414621967, |
|
"grad_norm": 1.4185905456542969e-05, |
|
"learning_rate": 3.0033496170384803e-06, |
|
"logits/chosen": 0.6266374588012695, |
|
"logits/rejected": 1.8179903030395508, |
|
"logps/chosen": -77.69737243652344, |
|
"logps/rejected": -1697.776123046875, |
|
"loss": 0.0017, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3449680805206299, |
|
"rewards/margins": 15.93467903137207, |
|
"rewards/rejected": -16.279645919799805, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.4944746207154898, |
|
"grad_norm": 0.0458984375, |
|
"learning_rate": 2.9819787039211068e-06, |
|
"logits/chosen": 0.5513324737548828, |
|
"logits/rejected": 1.6900783777236938, |
|
"logps/chosen": -76.17434692382812, |
|
"logps/rejected": -1829.5364990234375, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.31986111402511597, |
|
"rewards/margins": 17.185441970825195, |
|
"rewards/rejected": -17.50530242919922, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.49697196728475995, |
|
"grad_norm": 0.002777099609375, |
|
"learning_rate": 2.960571147356845e-06, |
|
"logits/chosen": 0.5562096834182739, |
|
"logits/rejected": 1.8595008850097656, |
|
"logps/chosen": -83.13392639160156, |
|
"logps/rejected": -2010.169921875, |
|
"loss": 0.0007, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.395429790019989, |
|
"rewards/margins": 18.9808292388916, |
|
"rewards/rejected": -19.37626075744629, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.4994693138540301, |
|
"grad_norm": 0.000293731689453125, |
|
"learning_rate": 2.9391285749003046e-06, |
|
"logits/chosen": 0.5312787294387817, |
|
"logits/rejected": 1.7356303930282593, |
|
"logps/chosen": -95.15312194824219, |
|
"logps/rejected": -2160.408935546875, |
|
"loss": 0.0013, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.5149081349372864, |
|
"rewards/margins": 20.321773529052734, |
|
"rewards/rejected": -20.836681365966797, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.4994693138540301, |
|
"eval_logits/chosen": 0.6455119848251343, |
|
"eval_logits/rejected": 1.5546293258666992, |
|
"eval_logps/chosen": -82.33647155761719, |
|
"eval_logps/rejected": -980.0130615234375, |
|
"eval_loss": 0.0030529608484357595, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/chosen": -0.37980592250823975, |
|
"eval_rewards/margins": 8.809426307678223, |
|
"eval_rewards/rejected": -9.189233779907227, |
|
"eval_runtime": 0.6247, |
|
"eval_samples_per_second": 8.004, |
|
"eval_steps_per_second": 8.004, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.5019666604233003, |
|
"grad_norm": 0.06494140625, |
|
"learning_rate": 2.9176526167682543e-06, |
|
"logits/chosen": 0.6404844522476196, |
|
"logits/rejected": 1.8602796792984009, |
|
"logps/chosen": -82.97758483886719, |
|
"logps/rejected": -1836.1568603515625, |
|
"loss": 0.0024, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4049321115016937, |
|
"rewards/margins": 17.25887107849121, |
|
"rewards/rejected": -17.663803100585938, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.5044640069925704, |
|
"grad_norm": 0.0166015625, |
|
"learning_rate": 2.8961449057156775e-06, |
|
"logits/chosen": 0.5347205400466919, |
|
"logits/rejected": 1.700486421585083, |
|
"logps/chosen": -84.12736511230469, |
|
"logps/rejected": -1874.214111328125, |
|
"loss": 0.0007, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3977189064025879, |
|
"rewards/margins": 17.567378997802734, |
|
"rewards/rejected": -17.965099334716797, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.5069613535618406, |
|
"grad_norm": 0.0013427734375, |
|
"learning_rate": 2.874607076911642e-06, |
|
"logits/chosen": 0.5987354516983032, |
|
"logits/rejected": 1.7991135120391846, |
|
"logps/chosen": -81.83995819091797, |
|
"logps/rejected": -1828.4136962890625, |
|
"loss": 0.0015, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.38373640179634094, |
|
"rewards/margins": 17.181564331054688, |
|
"rewards/rejected": -17.5653018951416, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.5094587001311107, |
|
"grad_norm": 0.000850677490234375, |
|
"learning_rate": 2.8530407678149806e-06, |
|
"logits/chosen": 0.6027461886405945, |
|
"logits/rejected": 1.730103850364685, |
|
"logps/chosen": -81.79703521728516, |
|
"logps/rejected": -1646.1201171875, |
|
"loss": 0.0018, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.37951746582984924, |
|
"rewards/margins": 15.314178466796875, |
|
"rewards/rejected": -15.693696975708008, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.5119560467003809, |
|
"grad_norm": 0.00055694580078125, |
|
"learning_rate": 2.8314476180498003e-06, |
|
"logits/chosen": 0.6401151418685913, |
|
"logits/rejected": 1.7924197912216187, |
|
"logps/chosen": -85.15666198730469, |
|
"logps/rejected": -1746.1314697265625, |
|
"loss": 0.0023, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.421166330575943, |
|
"rewards/margins": 16.335796356201172, |
|
"rewards/rejected": -16.756961822509766, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.514453393269651, |
|
"grad_norm": 0.032958984375, |
|
"learning_rate": 2.8098292692808253e-06, |
|
"logits/chosen": 0.6529192328453064, |
|
"logits/rejected": 1.7113368511199951, |
|
"logps/chosen": -83.26264953613281, |
|
"logps/rejected": -1448.375732421875, |
|
"loss": 0.0023, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3913651704788208, |
|
"rewards/margins": 13.431114196777344, |
|
"rewards/rejected": -13.822479248046875, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.5169507398389211, |
|
"grad_norm": 0.035400390625, |
|
"learning_rate": 2.7881873650885904e-06, |
|
"logits/chosen": 0.6235641241073608, |
|
"logits/rejected": 1.7722196578979492, |
|
"logps/chosen": -85.73550415039062, |
|
"logps/rejected": -1683.2877197265625, |
|
"loss": 0.0015, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.42536306381225586, |
|
"rewards/margins": 15.712361335754395, |
|
"rewards/rejected": -16.13772201538086, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.5194480864081913, |
|
"grad_norm": 0.07470703125, |
|
"learning_rate": 2.7665235508444772e-06, |
|
"logits/chosen": 0.5478901267051697, |
|
"logits/rejected": 1.8091357946395874, |
|
"logps/chosen": -79.84373474121094, |
|
"logps/rejected": -1996.998779296875, |
|
"loss": 0.0018, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.368811696767807, |
|
"rewards/margins": 18.87316131591797, |
|
"rewards/rejected": -19.241975784301758, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.5219454329774614, |
|
"grad_norm": 0.000518798828125, |
|
"learning_rate": 2.7448394735856275e-06, |
|
"logits/chosen": 0.5016141533851624, |
|
"logits/rejected": 1.7318010330200195, |
|
"logps/chosen": -88.0289306640625, |
|
"logps/rejected": -2137.91552734375, |
|
"loss": 0.0025, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.43785446882247925, |
|
"rewards/margins": 20.17669105529785, |
|
"rewards/rejected": -20.614543914794922, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.5244427795467316, |
|
"grad_norm": 0.041748046875, |
|
"learning_rate": 2.723136781889722e-06, |
|
"logits/chosen": 0.6009372472763062, |
|
"logits/rejected": 1.8194977045059204, |
|
"logps/chosen": -82.27381896972656, |
|
"logps/rejected": -1805.30859375, |
|
"loss": 0.0026, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.38621821999549866, |
|
"rewards/margins": 16.96898651123047, |
|
"rewards/rejected": -17.355205535888672, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.5269401261160017, |
|
"grad_norm": 0.0242919921875, |
|
"learning_rate": 2.7014171257496414e-06, |
|
"logits/chosen": 0.5697668790817261, |
|
"logits/rejected": 1.7131723165512085, |
|
"logps/chosen": -84.2120132446289, |
|
"logps/rejected": -1700.6185302734375, |
|
"loss": 0.0016, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3910997211933136, |
|
"rewards/margins": 15.752738952636719, |
|
"rewards/rejected": -16.14383888244629, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.5294374726852719, |
|
"grad_norm": 0.052978515625, |
|
"learning_rate": 2.6796821564480237e-06, |
|
"logits/chosen": 0.601753294467926, |
|
"logits/rejected": 1.729688048362732, |
|
"logps/chosen": -76.5484390258789, |
|
"logps/rejected": -1640.7828369140625, |
|
"loss": 0.0017, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.32075661420822144, |
|
"rewards/margins": 15.291044235229492, |
|
"rewards/rejected": -15.611801147460938, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.531934819254542, |
|
"grad_norm": 0.0012664794921875, |
|
"learning_rate": 2.6579335264317253e-06, |
|
"logits/chosen": 0.5816048383712769, |
|
"logits/rejected": 1.7906360626220703, |
|
"logps/chosen": -85.20054626464844, |
|
"logps/rejected": -1883.8203125, |
|
"loss": 0.0014, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.40961331129074097, |
|
"rewards/margins": 17.597332000732422, |
|
"rewards/rejected": -18.006946563720703, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.5344321658238121, |
|
"grad_norm": 0.00101470947265625, |
|
"learning_rate": 2.6361728891861843e-06, |
|
"logits/chosen": 0.5752017498016357, |
|
"logits/rejected": 1.6957403421401978, |
|
"logps/chosen": -86.33828735351562, |
|
"logps/rejected": -1814.455322265625, |
|
"loss": 0.0015, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4084859788417816, |
|
"rewards/margins": 16.83043670654297, |
|
"rewards/rejected": -17.238922119140625, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.5369295123930824, |
|
"grad_norm": 0.009521484375, |
|
"learning_rate": 2.614401899109716e-06, |
|
"logits/chosen": 0.5796340703964233, |
|
"logits/rejected": 1.7896589040756226, |
|
"logps/chosen": -78.46866607666016, |
|
"logps/rejected": -1804.115234375, |
|
"loss": 0.0006, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3526671528816223, |
|
"rewards/margins": 16.97123908996582, |
|
"rewards/rejected": -17.32390785217285, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.5394268589623525, |
|
"grad_norm": 0.0263671875, |
|
"learning_rate": 2.5926222113877282e-06, |
|
"logits/chosen": 0.5532792806625366, |
|
"logits/rejected": 1.7575023174285889, |
|
"logps/chosen": -86.9544906616211, |
|
"logps/rejected": -1865.7669677734375, |
|
"loss": 0.001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4337650239467621, |
|
"rewards/margins": 17.272930145263672, |
|
"rewards/rejected": -17.706693649291992, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.5419242055316227, |
|
"grad_norm": 0.0419921875, |
|
"learning_rate": 2.570835481866889e-06, |
|
"logits/chosen": 0.6227487921714783, |
|
"logits/rejected": 1.7569319009780884, |
|
"logps/chosen": -83.56333923339844, |
|
"logps/rejected": -1739.373046875, |
|
"loss": 0.0014, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4034454822540283, |
|
"rewards/margins": 16.28359031677246, |
|
"rewards/rejected": -16.687036514282227, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.5444215521008928, |
|
"grad_norm": 0.0240478515625, |
|
"learning_rate": 2.5490433669292337e-06, |
|
"logits/chosen": 0.5318483114242554, |
|
"logits/rejected": 1.7802917957305908, |
|
"logps/chosen": -83.16242218017578, |
|
"logps/rejected": -2065.672119140625, |
|
"loss": 0.0012, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3906503915786743, |
|
"rewards/margins": 19.537628173828125, |
|
"rewards/rejected": -19.92827796936035, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.546918898670163, |
|
"grad_norm": 0.00081634521484375, |
|
"learning_rate": 2.527247523366232e-06, |
|
"logits/chosen": 0.55711829662323, |
|
"logits/rejected": 1.7834659814834595, |
|
"logps/chosen": -89.28174591064453, |
|
"logps/rejected": -1952.4468994140625, |
|
"loss": 0.0006, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.45531249046325684, |
|
"rewards/margins": 18.357410430908203, |
|
"rewards/rejected": -18.812725067138672, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.5494162452394331, |
|
"grad_norm": 0.0498046875, |
|
"learning_rate": 2.5054496082528336e-06, |
|
"logits/chosen": 0.6078628897666931, |
|
"logits/rejected": 1.8645546436309814, |
|
"logps/chosen": -78.72160339355469, |
|
"logps/rejected": -1901.3984375, |
|
"loss": 0.0008, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3558036983013153, |
|
"rewards/margins": 17.979564666748047, |
|
"rewards/rejected": -18.335365295410156, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.5519135918087033, |
|
"grad_norm": 0.030029296875, |
|
"learning_rate": 2.483651278821481e-06, |
|
"logits/chosen": 0.6357477903366089, |
|
"logits/rejected": 1.8168609142303467, |
|
"logps/chosen": -86.10234069824219, |
|
"logps/rejected": -1748.644775390625, |
|
"loss": 0.001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.43243059515953064, |
|
"rewards/margins": 16.351675033569336, |
|
"rewards/rejected": -16.78410530090332, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.5544109383779734, |
|
"grad_norm": 0.01177978515625, |
|
"learning_rate": 2.4618541923361166e-06, |
|
"logits/chosen": 0.6292850971221924, |
|
"logits/rejected": 1.7243268489837646, |
|
"logps/chosen": -83.60933685302734, |
|
"logps/rejected": -1522.750244140625, |
|
"loss": 0.0015, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.39073851704597473, |
|
"rewards/margins": 14.038189888000488, |
|
"rewards/rejected": -14.428926467895508, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.5569082849472435, |
|
"grad_norm": 0.00070953369140625, |
|
"learning_rate": 2.4400600059661836e-06, |
|
"logits/chosen": 0.5282065868377686, |
|
"logits/rejected": 1.8849893808364868, |
|
"logps/chosen": -86.94523620605469, |
|
"logps/rejected": -2129.426025390625, |
|
"loss": 0.0015, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.42179185152053833, |
|
"rewards/margins": 20.111438751220703, |
|
"rewards/rejected": -20.53322982788086, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.5594056315165137, |
|
"grad_norm": 0.008544921875, |
|
"learning_rate": 2.41827037666064e-06, |
|
"logits/chosen": 0.6786268353462219, |
|
"logits/rejected": 1.8249973058700562, |
|
"logps/chosen": -76.79341888427734, |
|
"logps/rejected": -1632.072265625, |
|
"loss": 0.0015, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.33309558033943176, |
|
"rewards/margins": 15.309873580932617, |
|
"rewards/rejected": -15.642970085144043, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.5619029780857838, |
|
"grad_norm": 0.018798828125, |
|
"learning_rate": 2.396486961021983e-06, |
|
"logits/chosen": 0.617296576499939, |
|
"logits/rejected": 1.860708236694336, |
|
"logps/chosen": -89.55140686035156, |
|
"logps/rejected": -1905.1920166015625, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4716108441352844, |
|
"rewards/margins": 17.87620735168457, |
|
"rewards/rejected": -18.347820281982422, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.564400324655054, |
|
"grad_norm": 0.00726318359375, |
|
"learning_rate": 2.3747114151802993e-06, |
|
"logits/chosen": 0.6001085638999939, |
|
"logits/rejected": 1.8411632776260376, |
|
"logps/chosen": -79.05329895019531, |
|
"logps/rejected": -1769.6693115234375, |
|
"loss": 0.0014, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3584744334220886, |
|
"rewards/margins": 16.644119262695312, |
|
"rewards/rejected": -17.002593994140625, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.5668976712243241, |
|
"grad_norm": 0.038330078125, |
|
"learning_rate": 2.352945394667363e-06, |
|
"logits/chosen": 0.5482415556907654, |
|
"logits/rejected": 1.7739299535751343, |
|
"logps/chosen": -88.25926971435547, |
|
"logps/rejected": -2113.56884765625, |
|
"loss": 0.0008, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.43419378995895386, |
|
"rewards/margins": 19.882293701171875, |
|
"rewards/rejected": -20.31648826599121, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.5693950177935944, |
|
"grad_norm": 0.06494140625, |
|
"learning_rate": 2.3311905542907627e-06, |
|
"logits/chosen": 0.6261372566223145, |
|
"logits/rejected": 1.787941336631775, |
|
"logps/chosen": -80.4935302734375, |
|
"logps/rejected": -1684.802001953125, |
|
"loss": 0.002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.378140389919281, |
|
"rewards/margins": 15.795267105102539, |
|
"rewards/rejected": -16.17340660095215, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.5718923643628645, |
|
"grad_norm": 0.033935546875, |
|
"learning_rate": 2.30944854800809e-06, |
|
"logits/chosen": 0.6286464929580688, |
|
"logits/rejected": 1.8051517009735107, |
|
"logps/chosen": -80.61420440673828, |
|
"logps/rejected": -1804.312744140625, |
|
"loss": 0.0019, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.37235134840011597, |
|
"rewards/margins": 16.959814071655273, |
|
"rewards/rejected": -17.332164764404297, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.5743897109321346, |
|
"grad_norm": 0.0019683837890625, |
|
"learning_rate": 2.287721028801204e-06, |
|
"logits/chosen": 0.5823894739151001, |
|
"logits/rejected": 1.7553184032440186, |
|
"logps/chosen": -89.6335678100586, |
|
"logps/rejected": -1704.480712890625, |
|
"loss": 0.001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.45537322759628296, |
|
"rewards/margins": 15.833767890930176, |
|
"rewards/rejected": -16.289142608642578, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.5768870575014048, |
|
"grad_norm": 0.021728515625, |
|
"learning_rate": 2.26600964855055e-06, |
|
"logits/chosen": 0.6238933205604553, |
|
"logits/rejected": 1.7979097366333008, |
|
"logps/chosen": -79.57666778564453, |
|
"logps/rejected": -1692.2340087890625, |
|
"loss": 0.0009, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3568623960018158, |
|
"rewards/margins": 15.8856201171875, |
|
"rewards/rejected": -16.242483139038086, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.5793844040706749, |
|
"grad_norm": 0.007476806640625, |
|
"learning_rate": 2.244316057909573e-06, |
|
"logits/chosen": 0.6190879344940186, |
|
"logits/rejected": 1.7797908782958984, |
|
"logps/chosen": -86.66450500488281, |
|
"logps/rejected": -1799.299560546875, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4307888448238373, |
|
"rewards/margins": 16.876020431518555, |
|
"rewards/rejected": -17.30681037902832, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.5818817506399451, |
|
"grad_norm": 0.005279541015625, |
|
"learning_rate": 2.2226419061792282e-06, |
|
"logits/chosen": 0.5849915742874146, |
|
"logits/rejected": 1.8162180185317993, |
|
"logps/chosen": -85.55912017822266, |
|
"logps/rejected": -1866.6416015625, |
|
"loss": 0.0008, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.42593854665756226, |
|
"rewards/margins": 17.541982650756836, |
|
"rewards/rejected": -17.96792221069336, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.5843790972092152, |
|
"grad_norm": 0.002044677734375, |
|
"learning_rate": 2.200988841182589e-06, |
|
"logits/chosen": 0.6237704157829285, |
|
"logits/rejected": 1.8964016437530518, |
|
"logps/chosen": -95.87744140625, |
|
"logps/rejected": -2077.869140625, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.5327093005180359, |
|
"rewards/margins": 19.543682098388672, |
|
"rewards/rejected": -20.076391220092773, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.5868764437784854, |
|
"grad_norm": 0.00469970703125, |
|
"learning_rate": 2.179358509139559e-06, |
|
"logits/chosen": 0.6188510060310364, |
|
"logits/rejected": 1.7551387548446655, |
|
"logps/chosen": -82.06452941894531, |
|
"logps/rejected": -1564.6396484375, |
|
"loss": 0.0014, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.359652578830719, |
|
"rewards/margins": 14.480381965637207, |
|
"rewards/rejected": -14.840034484863281, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.5893737903477555, |
|
"grad_norm": 0.05322265625, |
|
"learning_rate": 2.1577525545417254e-06, |
|
"logits/chosen": 0.642662525177002, |
|
"logits/rejected": 1.8487951755523682, |
|
"logps/chosen": -85.49614715576172, |
|
"logps/rejected": -1861.0296630859375, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.41231250762939453, |
|
"rewards/margins": 17.494285583496094, |
|
"rewards/rejected": -17.906597137451172, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.5918711369170256, |
|
"grad_norm": 0.059326171875, |
|
"learning_rate": 2.1361726200273293e-06, |
|
"logits/chosen": 0.6013755202293396, |
|
"logits/rejected": 1.8614768981933594, |
|
"logps/chosen": -82.80476379394531, |
|
"logps/rejected": -1880.697021484375, |
|
"loss": 0.0013, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.38074877858161926, |
|
"rewards/margins": 17.66275978088379, |
|
"rewards/rejected": -18.043506622314453, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.5943684834862958, |
|
"grad_norm": 0.04638671875, |
|
"learning_rate": 2.1146203462563773e-06, |
|
"logits/chosen": 0.6672108769416809, |
|
"logits/rejected": 1.8760160207748413, |
|
"logps/chosen": -85.3284912109375, |
|
"logps/rejected": -1663.7691650390625, |
|
"loss": 0.0014, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.42937812209129333, |
|
"rewards/margins": 15.528106689453125, |
|
"rewards/rejected": -15.957483291625977, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.5968658300555659, |
|
"grad_norm": 0.037109375, |
|
"learning_rate": 2.0930973717859117e-06, |
|
"logits/chosen": 0.5693127512931824, |
|
"logits/rejected": 1.8059355020523071, |
|
"logps/chosen": -86.84693145751953, |
|
"logps/rejected": -1841.8323974609375, |
|
"loss": 0.0018, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4406910836696625, |
|
"rewards/margins": 17.286680221557617, |
|
"rewards/rejected": -17.727371215820312, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.5993631766248361, |
|
"grad_norm": 4.8160552978515625e-05, |
|
"learning_rate": 2.0716053329454337e-06, |
|
"logits/chosen": 0.633589506149292, |
|
"logits/rejected": 1.8425014019012451, |
|
"logps/chosen": -84.12596130371094, |
|
"logps/rejected": -1987.7154541015625, |
|
"loss": 0.0029, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.40977373719215393, |
|
"rewards/margins": 18.74795913696289, |
|
"rewards/rejected": -19.157733917236328, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.6018605231941062, |
|
"grad_norm": 0.020751953125, |
|
"learning_rate": 2.0501458637124963e-06, |
|
"logits/chosen": 0.6005128026008606, |
|
"logits/rejected": 1.9649635553359985, |
|
"logps/chosen": -89.92936706542969, |
|
"logps/rejected": -2303.25341796875, |
|
"loss": 0.0008, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.46929341554641724, |
|
"rewards/margins": 21.84885597229004, |
|
"rewards/rejected": -22.31814956665039, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.6043578697633765, |
|
"grad_norm": 0.0059814453125, |
|
"learning_rate": 2.0287205955884812e-06, |
|
"logits/chosen": 0.5659859776496887, |
|
"logits/rejected": 1.7156604528427124, |
|
"logps/chosen": -82.71956634521484, |
|
"logps/rejected": -1716.356689453125, |
|
"loss": 0.0021, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.37312421202659607, |
|
"rewards/margins": 15.841397285461426, |
|
"rewards/rejected": -16.214521408081055, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.6068552163326466, |
|
"grad_norm": 0.031494140625, |
|
"learning_rate": 2.0073311574745583e-06, |
|
"logits/chosen": 0.615592896938324, |
|
"logits/rejected": 1.8998111486434937, |
|
"logps/chosen": -83.0928726196289, |
|
"logps/rejected": -2058.5458984375, |
|
"loss": 0.0007, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3958067297935486, |
|
"rewards/margins": 19.46796226501465, |
|
"rewards/rejected": -19.863767623901367, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.6093525629019167, |
|
"grad_norm": 0.0108642578125, |
|
"learning_rate": 1.9859791755478453e-06, |
|
"logits/chosen": 0.612500786781311, |
|
"logits/rejected": 1.7525627613067627, |
|
"logps/chosen": -79.78617858886719, |
|
"logps/rejected": -1609.921142578125, |
|
"loss": 0.001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3665863871574402, |
|
"rewards/margins": 15.052395820617676, |
|
"rewards/rejected": -15.418981552124023, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.6118499094711869, |
|
"grad_norm": 0.05419921875, |
|
"learning_rate": 1.9646662731377737e-06, |
|
"logits/chosen": 0.6692113876342773, |
|
"logits/rejected": 1.816349983215332, |
|
"logps/chosen": -84.40538787841797, |
|
"logps/rejected": -1675.842041015625, |
|
"loss": 0.002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.40326443314552307, |
|
"rewards/margins": 15.622156143188477, |
|
"rewards/rejected": -16.025419235229492, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.614347256040457, |
|
"grad_norm": 0.00130462646484375, |
|
"learning_rate": 1.9433940706026743e-06, |
|
"logits/chosen": 0.5840574502944946, |
|
"logits/rejected": 1.8281028270721436, |
|
"logps/chosen": -86.7694320678711, |
|
"logps/rejected": -2069.530517578125, |
|
"loss": 0.0016, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4303639531135559, |
|
"rewards/margins": 19.5331974029541, |
|
"rewards/rejected": -19.963563919067383, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.6168446026097272, |
|
"grad_norm": 0.00012493133544921875, |
|
"learning_rate": 1.9221641852065807e-06, |
|
"logits/chosen": 0.6755739450454712, |
|
"logits/rejected": 1.845654845237732, |
|
"logps/chosen": -88.67765045166016, |
|
"logps/rejected": -1714.5185546875, |
|
"loss": 0.0021, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.46009987592697144, |
|
"rewards/margins": 15.997251510620117, |
|
"rewards/rejected": -16.457351684570312, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.6193419491789973, |
|
"grad_norm": 0.039794921875, |
|
"learning_rate": 1.9009782309962805e-06, |
|
"logits/chosen": 0.5677890181541443, |
|
"logits/rejected": 1.8127906322479248, |
|
"logps/chosen": -76.20381164550781, |
|
"logps/rejected": -1766.592529296875, |
|
"loss": 0.0008, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3160540759563446, |
|
"rewards/margins": 16.549949645996094, |
|
"rewards/rejected": -16.866003036499023, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.6218392957482675, |
|
"grad_norm": 0.037841796875, |
|
"learning_rate": 1.8798378186785979e-06, |
|
"logits/chosen": 0.6165963411331177, |
|
"logits/rejected": 1.7844451665878296, |
|
"logps/chosen": -80.53484344482422, |
|
"logps/rejected": -1799.0015869140625, |
|
"loss": 0.0021, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.37462836503982544, |
|
"rewards/margins": 16.907718658447266, |
|
"rewards/rejected": -17.282346725463867, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.6243366423175376, |
|
"grad_norm": 0.01007080078125, |
|
"learning_rate": 1.8587445554979404e-06, |
|
"logits/chosen": 0.6141692399978638, |
|
"logits/rejected": 1.8811423778533936, |
|
"logps/chosen": -87.24476623535156, |
|
"logps/rejected": -2009.0035400390625, |
|
"loss": 0.0019, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4570063054561615, |
|
"rewards/margins": 18.914798736572266, |
|
"rewards/rejected": -19.371807098388672, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.6268339888868077, |
|
"grad_norm": 0.00011968612670898438, |
|
"learning_rate": 1.8377000451141013e-06, |
|
"logits/chosen": 0.6391327977180481, |
|
"logits/rejected": 1.9281005859375, |
|
"logps/chosen": -86.39270782470703, |
|
"logps/rejected": -1954.652099609375, |
|
"loss": 0.0007, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4386569857597351, |
|
"rewards/margins": 18.369953155517578, |
|
"rewards/rejected": -18.808609008789062, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.6293313354560779, |
|
"grad_norm": 0.0277099609375, |
|
"learning_rate": 1.8167058874803405e-06, |
|
"logits/chosen": 0.5556064248085022, |
|
"logits/rejected": 1.718269944190979, |
|
"logps/chosen": -86.69864654541016, |
|
"logps/rejected": -1939.4332275390625, |
|
"loss": 0.0017, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4233566224575043, |
|
"rewards/margins": 18.154285430908203, |
|
"rewards/rejected": -18.5776424407959, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.631828682025348, |
|
"grad_norm": 0.0771484375, |
|
"learning_rate": 1.7957636787217451e-06, |
|
"logits/chosen": 0.5710119009017944, |
|
"logits/rejected": 1.7915983200073242, |
|
"logps/chosen": -79.21806335449219, |
|
"logps/rejected": -1942.8629150390625, |
|
"loss": 0.0014, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3619151711463928, |
|
"rewards/margins": 18.352802276611328, |
|
"rewards/rejected": -18.71471405029297, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.6343260285946182, |
|
"grad_norm": 0.0155029296875, |
|
"learning_rate": 1.7748750110138768e-06, |
|
"logits/chosen": 0.5197774171829224, |
|
"logits/rejected": 1.737969160079956, |
|
"logps/chosen": -88.46708679199219, |
|
"logps/rejected": -2104.29296875, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.42627063393592834, |
|
"rewards/margins": 19.79252052307129, |
|
"rewards/rejected": -20.21879005432129, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.6368233751638883, |
|
"grad_norm": 0.021484375, |
|
"learning_rate": 1.7540414724617282e-06, |
|
"logits/chosen": 0.5759893655776978, |
|
"logits/rejected": 1.7303228378295898, |
|
"logps/chosen": -76.38877868652344, |
|
"logps/rejected": -1828.7796630859375, |
|
"loss": 0.0012, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.32031676173210144, |
|
"rewards/margins": 17.18129539489746, |
|
"rewards/rejected": -17.50161361694336, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.6393207217331586, |
|
"grad_norm": 0.0196533203125, |
|
"learning_rate": 1.7332646469789827e-06, |
|
"logits/chosen": 0.6225037574768066, |
|
"logits/rejected": 1.779985785484314, |
|
"logps/chosen": -85.09439086914062, |
|
"logps/rejected": -1516.5433349609375, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4056459069252014, |
|
"rewards/margins": 14.090934753417969, |
|
"rewards/rejected": -14.496580123901367, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.6418180683024287, |
|
"grad_norm": 0.036376953125, |
|
"learning_rate": 1.7125461141675881e-06, |
|
"logits/chosen": 0.643700122833252, |
|
"logits/rejected": 1.8465179204940796, |
|
"logps/chosen": -80.20843505859375, |
|
"logps/rejected": -1810.455078125, |
|
"loss": 0.0015, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3701106905937195, |
|
"rewards/margins": 16.961139678955078, |
|
"rewards/rejected": -17.331249237060547, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.6443154148716989, |
|
"grad_norm": 0.0289306640625, |
|
"learning_rate": 1.6918874491976744e-06, |
|
"logits/chosen": 0.5704053640365601, |
|
"logits/rejected": 1.6976230144500732, |
|
"logps/chosen": -84.80411529541016, |
|
"logps/rejected": -1765.397216796875, |
|
"loss": 0.0008, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4006038308143616, |
|
"rewards/margins": 16.490291595458984, |
|
"rewards/rejected": -16.89089584350586, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.646812761440969, |
|
"grad_norm": 0.000751495361328125, |
|
"learning_rate": 1.6712902226877917e-06, |
|
"logits/chosen": 0.6289549469947815, |
|
"logits/rejected": 1.850502610206604, |
|
"logps/chosen": -88.18513488769531, |
|
"logps/rejected": -1988.833984375, |
|
"loss": 0.0012, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4497924745082855, |
|
"rewards/margins": 18.730804443359375, |
|
"rewards/rejected": -19.180593490600586, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.6493101080102391, |
|
"grad_norm": 0.0012664794921875, |
|
"learning_rate": 1.6507560005854977e-06, |
|
"logits/chosen": 0.5509642362594604, |
|
"logits/rejected": 1.7071406841278076, |
|
"logps/chosen": -83.9954833984375, |
|
"logps/rejected": -1830.5101318359375, |
|
"loss": 0.0014, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3968578577041626, |
|
"rewards/margins": 17.073257446289062, |
|
"rewards/rejected": -17.470115661621094, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.6518074545795093, |
|
"grad_norm": 0.008544921875, |
|
"learning_rate": 1.6302863440483121e-06, |
|
"logits/chosen": 0.5004338026046753, |
|
"logits/rejected": 1.7150554656982422, |
|
"logps/chosen": -82.43248748779297, |
|
"logps/rejected": -1880.423828125, |
|
"loss": 0.0014, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.36883944272994995, |
|
"rewards/margins": 17.624969482421875, |
|
"rewards/rejected": -17.99380874633789, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.6543048011487794, |
|
"grad_norm": 0.0341796875, |
|
"learning_rate": 1.6098828093250203e-06, |
|
"logits/chosen": 0.5160735845565796, |
|
"logits/rejected": 1.7385032176971436, |
|
"logps/chosen": -79.65149688720703, |
|
"logps/rejected": -2075.0966796875, |
|
"loss": 0.0017, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3529607653617859, |
|
"rewards/margins": 19.472675323486328, |
|
"rewards/rejected": -19.825634002685547, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.6568021477180496, |
|
"grad_norm": 0.027099609375, |
|
"learning_rate": 1.5895469476373545e-06, |
|
"logits/chosen": 0.5671316385269165, |
|
"logits/rejected": 1.722346305847168, |
|
"logps/chosen": -81.55863189697266, |
|
"logps/rejected": -1681.589111328125, |
|
"loss": 0.0013, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3755113184452057, |
|
"rewards/margins": 15.65942096710205, |
|
"rewards/rejected": -16.03493309020996, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.6592994942873197, |
|
"grad_norm": 0.091796875, |
|
"learning_rate": 1.5692803050620642e-06, |
|
"logits/chosen": 0.6067586541175842, |
|
"logits/rejected": 1.7199970483779907, |
|
"logps/chosen": -83.18370056152344, |
|
"logps/rejected": -1680.830322265625, |
|
"loss": 0.0012, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.38230761885643005, |
|
"rewards/margins": 15.612162590026855, |
|
"rewards/rejected": -15.994470596313477, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.6617968408565899, |
|
"grad_norm": 0.007720947265625, |
|
"learning_rate": 1.5490844224133717e-06, |
|
"logits/chosen": 0.6019744873046875, |
|
"logits/rejected": 1.8558555841445923, |
|
"logps/chosen": -80.30780792236328, |
|
"logps/rejected": -1929.556640625, |
|
"loss": 0.0007, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.37255150079727173, |
|
"rewards/margins": 18.213796615600586, |
|
"rewards/rejected": -18.586347579956055, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.66429418742586, |
|
"grad_norm": 0.050537109375, |
|
"learning_rate": 1.528960835125822e-06, |
|
"logits/chosen": 0.6771095991134644, |
|
"logits/rejected": 1.8372013568878174, |
|
"logps/chosen": -80.95626068115234, |
|
"logps/rejected": -1653.2200927734375, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3823738992214203, |
|
"rewards/margins": 15.478759765625, |
|
"rewards/rejected": -15.86113452911377, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.6667915339951301, |
|
"grad_norm": 0.0147705078125, |
|
"learning_rate": 1.5089110731375568e-06, |
|
"logits/chosen": 0.581728994846344, |
|
"logits/rejected": 1.7974720001220703, |
|
"logps/chosen": -79.06498718261719, |
|
"logps/rejected": -1824.8765869140625, |
|
"loss": 0.0009, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.35732579231262207, |
|
"rewards/margins": 17.178985595703125, |
|
"rewards/rejected": -17.53631019592285, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.6692888805644003, |
|
"grad_norm": 0.032470703125, |
|
"learning_rate": 1.4889366607739925e-06, |
|
"logits/chosen": 0.6092284917831421, |
|
"logits/rejected": 1.6554501056671143, |
|
"logps/chosen": -78.19693756103516, |
|
"logps/rejected": -1454.85107421875, |
|
"loss": 0.0015, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3566651940345764, |
|
"rewards/margins": 13.518289566040039, |
|
"rewards/rejected": -13.874954223632812, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.6717862271336704, |
|
"grad_norm": 0.017578125, |
|
"learning_rate": 1.4690391166319307e-06, |
|
"logits/chosen": 0.5935393571853638, |
|
"logits/rejected": 1.761783242225647, |
|
"logps/chosen": -85.10191345214844, |
|
"logps/rejected": -1834.072021484375, |
|
"loss": 0.0012, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4155469536781311, |
|
"rewards/margins": 17.14066505432129, |
|
"rewards/rejected": -17.556209564208984, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.6742835737029407, |
|
"grad_norm": 0.036376953125, |
|
"learning_rate": 1.4492199534641055e-06, |
|
"logits/chosen": 0.6022766828536987, |
|
"logits/rejected": 1.8546326160430908, |
|
"logps/chosen": -84.67176055908203, |
|
"logps/rejected": -1870.0103759765625, |
|
"loss": 0.0013, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.41111135482788086, |
|
"rewards/margins": 17.593767166137695, |
|
"rewards/rejected": -18.0048770904541, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.6767809202722108, |
|
"grad_norm": 0.001617431640625, |
|
"learning_rate": 1.429480678064174e-06, |
|
"logits/chosen": 0.4885890483856201, |
|
"logits/rejected": 1.7658706903457642, |
|
"logps/chosen": -85.19550323486328, |
|
"logps/rejected": -2298.07373046875, |
|
"loss": 0.0005, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4054490923881531, |
|
"rewards/margins": 21.81135368347168, |
|
"rewards/rejected": -22.2168025970459, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.679278266841481, |
|
"grad_norm": 0.021484375, |
|
"learning_rate": 1.4098227911521523e-06, |
|
"logits/chosen": 0.6109157204627991, |
|
"logits/rejected": 1.8104369640350342, |
|
"logps/chosen": -92.25324249267578, |
|
"logps/rejected": -1919.9534912109375, |
|
"loss": 0.001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4862859845161438, |
|
"rewards/margins": 18.005590438842773, |
|
"rewards/rejected": -18.49187660217285, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.6817756134107511, |
|
"grad_norm": 0.0732421875, |
|
"learning_rate": 1.3902477872603295e-06, |
|
"logits/chosen": 0.6768110990524292, |
|
"logits/rejected": 1.7166074514389038, |
|
"logps/chosen": -80.58265686035156, |
|
"logps/rejected": -1554.531982421875, |
|
"loss": 0.0024, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.36700159311294556, |
|
"rewards/margins": 14.375741958618164, |
|
"rewards/rejected": -14.742744445800781, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.6842729599800212, |
|
"grad_norm": 0.0478515625, |
|
"learning_rate": 1.370757154619638e-06, |
|
"logits/chosen": 0.5914765000343323, |
|
"logits/rejected": 1.823325753211975, |
|
"logps/chosen": -86.78914642333984, |
|
"logps/rejected": -1906.5355224609375, |
|
"loss": 0.0009, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.42601895332336426, |
|
"rewards/margins": 17.833972930908203, |
|
"rewards/rejected": -18.259990692138672, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.6867703065492914, |
|
"grad_norm": 0.005523681640625, |
|
"learning_rate": 1.3513523750465049e-06, |
|
"logits/chosen": 0.5616365075111389, |
|
"logits/rejected": 1.7267478704452515, |
|
"logps/chosen": -81.25291442871094, |
|
"logps/rejected": -1686.3486328125, |
|
"loss": 0.0013, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.36524245142936707, |
|
"rewards/margins": 15.707315444946289, |
|
"rewards/rejected": -16.07255744934082, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.6892676531185615, |
|
"grad_norm": 0.00193023681640625, |
|
"learning_rate": 1.332034923830199e-06, |
|
"logits/chosen": 0.5695074200630188, |
|
"logits/rejected": 1.8329308032989502, |
|
"logps/chosen": -82.82709503173828, |
|
"logps/rejected": -1805.4775390625, |
|
"loss": 0.0006, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.39727023243904114, |
|
"rewards/margins": 16.94883918762207, |
|
"rewards/rejected": -17.346107482910156, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.6917649996878317, |
|
"grad_norm": 0.040771484375, |
|
"learning_rate": 1.31280626962067e-06, |
|
"logits/chosen": 0.6029590368270874, |
|
"logits/rejected": 1.6939897537231445, |
|
"logps/chosen": -86.7250747680664, |
|
"logps/rejected": -1587.8193359375, |
|
"loss": 0.0013, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.42630377411842346, |
|
"rewards/margins": 14.680148124694824, |
|
"rewards/rejected": -15.106452941894531, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.6942623462571018, |
|
"grad_norm": 0.0003566741943359375, |
|
"learning_rate": 1.2936678743168813e-06, |
|
"logits/chosen": 0.5795254707336426, |
|
"logits/rejected": 1.7682584524154663, |
|
"logps/chosen": -83.47227478027344, |
|
"logps/rejected": -1894.138916015625, |
|
"loss": 0.0021, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.39181336760520935, |
|
"rewards/margins": 17.82851791381836, |
|
"rewards/rejected": -18.22032928466797, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.696759692826372, |
|
"grad_norm": 0.01373291015625, |
|
"learning_rate": 1.2746211929556777e-06, |
|
"logits/chosen": 0.5124091506004333, |
|
"logits/rejected": 2.0397300720214844, |
|
"logps/chosen": -85.71356201171875, |
|
"logps/rejected": -2490.38232421875, |
|
"loss": 0.0004, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.42116060853004456, |
|
"rewards/margins": 23.745744705200195, |
|
"rewards/rejected": -24.16690444946289, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.6992570393956421, |
|
"grad_norm": 1.30385160446167e-08, |
|
"learning_rate": 1.2556676736011558e-06, |
|
"logits/chosen": 0.6134932637214661, |
|
"logits/rejected": 1.816425085067749, |
|
"logps/chosen": -85.68560791015625, |
|
"logps/rejected": -1998.1363525390625, |
|
"loss": 0.0007, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.43286705017089844, |
|
"rewards/margins": 18.802690505981445, |
|
"rewards/rejected": -19.235559463500977, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.7017543859649122, |
|
"grad_norm": 0.0089111328125, |
|
"learning_rate": 1.2368087572345772e-06, |
|
"logits/chosen": 0.6667296886444092, |
|
"logits/rejected": 1.7410768270492554, |
|
"logps/chosen": -84.29058837890625, |
|
"logps/rejected": -1482.312255859375, |
|
"loss": 0.0019, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4156969487667084, |
|
"rewards/margins": 13.738250732421875, |
|
"rewards/rejected": -14.153947830200195, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.7042517325341824, |
|
"grad_norm": 0.0927734375, |
|
"learning_rate": 1.2180458776448067e-06, |
|
"logits/chosen": 0.5982272028923035, |
|
"logits/rejected": 1.7856439352035522, |
|
"logps/chosen": -89.98011016845703, |
|
"logps/rejected": -1943.0396728515625, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4640469551086426, |
|
"rewards/margins": 18.235332489013672, |
|
"rewards/rejected": -18.699377059936523, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.7067490791034525, |
|
"grad_norm": 0.02392578125, |
|
"learning_rate": 1.1993804613193158e-06, |
|
"logits/chosen": 0.6234604120254517, |
|
"logits/rejected": 1.765428900718689, |
|
"logps/chosen": -87.09599304199219, |
|
"logps/rejected": -1579.3837890625, |
|
"loss": 0.0012, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.427617609500885, |
|
"rewards/margins": 14.572957038879395, |
|
"rewards/rejected": -15.000572204589844, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.7092464256727228, |
|
"grad_norm": 3.7401914596557617e-06, |
|
"learning_rate": 1.1808139273357232e-06, |
|
"logits/chosen": 0.5544342398643494, |
|
"logits/rejected": 1.7727603912353516, |
|
"logps/chosen": -83.8676528930664, |
|
"logps/rejected": -1906.515380859375, |
|
"loss": 0.0018, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3836463987827301, |
|
"rewards/margins": 17.823671340942383, |
|
"rewards/rejected": -18.207317352294922, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.7117437722419929, |
|
"grad_norm": 5.5789947509765625e-05, |
|
"learning_rate": 1.1623476872539108e-06, |
|
"logits/chosen": 0.5153034925460815, |
|
"logits/rejected": 1.8462998867034912, |
|
"logps/chosen": -94.67253112792969, |
|
"logps/rejected": -2197.599609375, |
|
"loss": 0.0012, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.5202997326850891, |
|
"rewards/margins": 20.727157592773438, |
|
"rewards/rejected": -21.247455596923828, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.7142411188112631, |
|
"grad_norm": 0.042724609375, |
|
"learning_rate": 1.1439831450087032e-06, |
|
"logits/chosen": 0.580392062664032, |
|
"logits/rejected": 1.876275658607483, |
|
"logps/chosen": -87.93695068359375, |
|
"logps/rejected": -2129.860107421875, |
|
"loss": 0.002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4418310225009918, |
|
"rewards/margins": 20.121156692504883, |
|
"rewards/rejected": -20.56298828125, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.7167384653805332, |
|
"grad_norm": 0.0458984375, |
|
"learning_rate": 1.1257216968031357e-06, |
|
"logits/chosen": 0.6597843170166016, |
|
"logits/rejected": 1.8998768329620361, |
|
"logps/chosen": -80.04630279541016, |
|
"logps/rejected": -1752.5791015625, |
|
"loss": 0.0013, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.37010782957077026, |
|
"rewards/margins": 16.46237564086914, |
|
"rewards/rejected": -16.832483291625977, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.7192358119498033, |
|
"grad_norm": 0.00104522705078125, |
|
"learning_rate": 1.1075647310022974e-06, |
|
"logits/chosen": 0.634041965007782, |
|
"logits/rejected": 1.8106848001480103, |
|
"logps/chosen": -78.93439483642578, |
|
"logps/rejected": -1525.8900146484375, |
|
"loss": 0.0012, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.36263298988342285, |
|
"rewards/margins": 14.232080459594727, |
|
"rewards/rejected": -14.594714164733887, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.7217331585190735, |
|
"grad_norm": 0.002899169921875, |
|
"learning_rate": 1.0895136280277863e-06, |
|
"logits/chosen": 0.5515082478523254, |
|
"logits/rejected": 1.7851063013076782, |
|
"logps/chosen": -87.03413391113281, |
|
"logps/rejected": -2093.734375, |
|
"loss": 0.0012, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4326532781124115, |
|
"rewards/margins": 19.687620162963867, |
|
"rewards/rejected": -20.120275497436523, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.7242305050883436, |
|
"grad_norm": 0.060546875, |
|
"learning_rate": 1.0715697602527542e-06, |
|
"logits/chosen": 0.5289216041564941, |
|
"logits/rejected": 1.7743902206420898, |
|
"logps/chosen": -85.22486114501953, |
|
"logps/rejected": -1992.9351806640625, |
|
"loss": 0.0012, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4111485481262207, |
|
"rewards/margins": 18.597354888916016, |
|
"rewards/rejected": -19.00850486755371, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.7267278516576138, |
|
"grad_norm": 0.04248046875, |
|
"learning_rate": 1.0537344918975708e-06, |
|
"logits/chosen": 0.654784083366394, |
|
"logits/rejected": 1.7333883047103882, |
|
"logps/chosen": -85.55310821533203, |
|
"logps/rejected": -1545.492919921875, |
|
"loss": 0.001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4073718190193176, |
|
"rewards/margins": 14.238784790039062, |
|
"rewards/rejected": -14.646156311035156, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.7292251982268839, |
|
"grad_norm": 1.5079975128173828e-05, |
|
"learning_rate": 1.036009178926107e-06, |
|
"logits/chosen": 0.570530891418457, |
|
"logits/rejected": 1.8232314586639404, |
|
"logps/chosen": -87.81031799316406, |
|
"logps/rejected": -1891.7252197265625, |
|
"loss": 0.0012, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.44701051712036133, |
|
"rewards/margins": 17.766794204711914, |
|
"rewards/rejected": -18.213804244995117, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.7317225447961541, |
|
"grad_norm": 0.016357421875, |
|
"learning_rate": 1.0183951689426438e-06, |
|
"logits/chosen": 0.5162047147750854, |
|
"logits/rejected": 1.80562424659729, |
|
"logps/chosen": -78.40940856933594, |
|
"logps/rejected": -2212.948974609375, |
|
"loss": 0.0017, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.33928531408309937, |
|
"rewards/margins": 21.02674674987793, |
|
"rewards/rejected": -21.366031646728516, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.7342198913654242, |
|
"grad_norm": 0.01409912109375, |
|
"learning_rate": 1.0008938010894156e-06, |
|
"logits/chosen": 0.5077947974205017, |
|
"logits/rejected": 1.8344638347625732, |
|
"logps/chosen": -81.39566802978516, |
|
"logps/rejected": -2270.706298828125, |
|
"loss": 0.0013, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3777496814727783, |
|
"rewards/margins": 21.60778045654297, |
|
"rewards/rejected": -21.98552894592285, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.7367172379346943, |
|
"grad_norm": 0.0198974609375, |
|
"learning_rate": 9.83506405944804e-07, |
|
"logits/chosen": 0.5673650503158569, |
|
"logits/rejected": 1.745111107826233, |
|
"logps/chosen": -77.10914611816406, |
|
"logps/rejected": -1838.6285400390625, |
|
"loss": 0.0015, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.34020406007766724, |
|
"rewards/margins": 17.217037200927734, |
|
"rewards/rejected": -17.55724334716797, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.7392145845039645, |
|
"grad_norm": 0.00946044921875, |
|
"learning_rate": 9.662343054221743e-07, |
|
"logits/chosen": 0.5164293050765991, |
|
"logits/rejected": 1.726947546005249, |
|
"logps/chosen": -88.59376525878906, |
|
"logps/rejected": -2064.08642578125, |
|
"loss": 0.001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4376547336578369, |
|
"rewards/margins": 19.29999351501465, |
|
"rewards/rejected": -19.73764991760254, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.7417119310732347, |
|
"grad_norm": 0.0595703125, |
|
"learning_rate": 9.490788126693754e-07, |
|
"logits/chosen": 0.6247397661209106, |
|
"logits/rejected": 1.8680105209350586, |
|
"logps/chosen": -86.50829315185547, |
|
"logps/rejected": -1925.3023681640625, |
|
"loss": 0.0013, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4314423203468323, |
|
"rewards/margins": 17.977245330810547, |
|
"rewards/rejected": -18.408687591552734, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 0.7442092776425049, |
|
"grad_norm": 0.0030975341796875, |
|
"learning_rate": 9.32041231968904e-07, |
|
"logits/chosen": 0.582064151763916, |
|
"logits/rejected": 1.8307263851165771, |
|
"logps/chosen": -87.89469909667969, |
|
"logps/rejected": -2049.106689453125, |
|
"loss": 0.0009, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4329482614994049, |
|
"rewards/margins": 19.329082489013672, |
|
"rewards/rejected": -19.762033462524414, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.746706624211775, |
|
"grad_norm": 0.328125, |
|
"learning_rate": 9.151228586387464e-07, |
|
"logits/chosen": 0.6141242384910583, |
|
"logits/rejected": 1.747831106185913, |
|
"logps/chosen": -80.1594467163086, |
|
"logps/rejected": -1789.0576171875, |
|
"loss": 0.003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.36252784729003906, |
|
"rewards/margins": 16.812475204467773, |
|
"rewards/rejected": -17.175004959106445, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 0.7492039707810452, |
|
"grad_norm": 0.06494140625, |
|
"learning_rate": 8.983249789338941e-07, |
|
"logits/chosen": 0.6428495645523071, |
|
"logits/rejected": 1.7919700145721436, |
|
"logps/chosen": -82.71188354492188, |
|
"logps/rejected": -1678.5035400390625, |
|
"loss": 0.0019, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3992861807346344, |
|
"rewards/margins": 15.69371509552002, |
|
"rewards/rejected": -16.092998504638672, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.7492039707810452, |
|
"eval_logits/chosen": 0.656849205493927, |
|
"eval_logits/rejected": 1.5703133344650269, |
|
"eval_logps/chosen": -84.02084350585938, |
|
"eval_logps/rejected": -995.490234375, |
|
"eval_loss": 0.0028192740865051746, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/chosen": -0.39664965867996216, |
|
"eval_rewards/margins": 8.947355270385742, |
|
"eval_rewards/rejected": -9.34400463104248, |
|
"eval_runtime": 0.621, |
|
"eval_samples_per_second": 8.052, |
|
"eval_steps_per_second": 8.052, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.7517013173503153, |
|
"grad_norm": 0.0205078125, |
|
"learning_rate": 8.816488699485593e-07, |
|
"logits/chosen": 0.634880006313324, |
|
"logits/rejected": 1.8458068370819092, |
|
"logps/chosen": -89.79926300048828, |
|
"logps/rejected": -1852.7974853515625, |
|
"loss": 0.0014, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4646337628364563, |
|
"rewards/margins": 17.353429794311523, |
|
"rewards/rejected": -17.81806182861328, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 0.7541986639195855, |
|
"grad_norm": 4.553794860839844e-05, |
|
"learning_rate": 8.650957995190784e-07, |
|
"logits/chosen": 0.5151122212409973, |
|
"logits/rejected": 1.7481235265731812, |
|
"logps/chosen": -79.8306884765625, |
|
"logps/rejected": -2110.71923828125, |
|
"loss": 0.0008, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3657647669315338, |
|
"rewards/margins": 20.01117706298828, |
|
"rewards/rejected": -20.37693977355957, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.7566960104888556, |
|
"grad_norm": 0.045166015625, |
|
"learning_rate": 8.486670261275193e-07, |
|
"logits/chosen": 0.6202859878540039, |
|
"logits/rejected": 1.8134170770645142, |
|
"logps/chosen": -84.73997497558594, |
|
"logps/rejected": -1783.783203125, |
|
"loss": 0.0009, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.42085719108581543, |
|
"rewards/margins": 16.739408493041992, |
|
"rewards/rejected": -17.160266876220703, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 0.7591933570581257, |
|
"grad_norm": 0.0111083984375, |
|
"learning_rate": 8.32363798806011e-07, |
|
"logits/chosen": 0.5721080303192139, |
|
"logits/rejected": 1.739031195640564, |
|
"logps/chosen": -85.13414764404297, |
|
"logps/rejected": -1891.197509765625, |
|
"loss": 0.0016, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.42727169394493103, |
|
"rewards/margins": 17.766677856445312, |
|
"rewards/rejected": -18.193950653076172, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.7616907036273959, |
|
"grad_norm": 0.0208740234375, |
|
"learning_rate": 8.161873570417742e-07, |
|
"logits/chosen": 0.5966504812240601, |
|
"logits/rejected": 1.8666013479232788, |
|
"logps/chosen": -89.76878356933594, |
|
"logps/rejected": -1943.291015625, |
|
"loss": 0.0007, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4609198570251465, |
|
"rewards/margins": 18.261484146118164, |
|
"rewards/rejected": -18.722402572631836, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.764188050196666, |
|
"grad_norm": 0.0098876953125, |
|
"learning_rate": 8.001389306828897e-07, |
|
"logits/chosen": 0.4526674151420593, |
|
"logits/rejected": 1.7495372295379639, |
|
"logps/chosen": -82.58873748779297, |
|
"logps/rejected": -2175.00244140625, |
|
"loss": 0.0016, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3694322407245636, |
|
"rewards/margins": 20.39395523071289, |
|
"rewards/rejected": -20.763385772705078, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.7666853967659362, |
|
"grad_norm": 1.7508864402770996e-06, |
|
"learning_rate": 7.842197398447993e-07, |
|
"logits/chosen": 0.5828143358230591, |
|
"logits/rejected": 1.8392832279205322, |
|
"logps/chosen": -79.72120666503906, |
|
"logps/rejected": -1959.4478759765625, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3589129149913788, |
|
"rewards/margins": 18.44549560546875, |
|
"rewards/rejected": -18.804407119750977, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 0.7691827433352063, |
|
"grad_norm": 0.00104522705078125, |
|
"learning_rate": 7.684309948175414e-07, |
|
"logits/chosen": 0.5747276544570923, |
|
"logits/rejected": 1.7614377737045288, |
|
"logps/chosen": -83.3620834350586, |
|
"logps/rejected": -1877.7086181640625, |
|
"loss": 0.0009, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4084179401397705, |
|
"rewards/margins": 17.644577026367188, |
|
"rewards/rejected": -18.052997589111328, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.7716800899044765, |
|
"grad_norm": 0.00160980224609375, |
|
"learning_rate": 7.527738959737371e-07, |
|
"logits/chosen": 0.536163330078125, |
|
"logits/rejected": 1.8368165493011475, |
|
"logps/chosen": -81.3559799194336, |
|
"logps/rejected": -1827.6383056640625, |
|
"loss": 0.001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3685862720012665, |
|
"rewards/margins": 17.127788543701172, |
|
"rewards/rejected": -17.496374130249023, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 0.7741774364737466, |
|
"grad_norm": 0.061279296875, |
|
"learning_rate": 7.372496336773269e-07, |
|
"logits/chosen": 0.6259430050849915, |
|
"logits/rejected": 1.7605386972427368, |
|
"logps/chosen": -82.03521728515625, |
|
"logps/rejected": -1697.513916015625, |
|
"loss": 0.0024, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3883189260959625, |
|
"rewards/margins": 15.871994018554688, |
|
"rewards/rejected": -16.260311126708984, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.7766747830430168, |
|
"grad_norm": 0.0439453125, |
|
"learning_rate": 7.218593881930744e-07, |
|
"logits/chosen": 0.6127210259437561, |
|
"logits/rejected": 1.7632982730865479, |
|
"logps/chosen": -77.52657318115234, |
|
"logps/rejected": -1801.5595703125, |
|
"loss": 0.0017, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3411308526992798, |
|
"rewards/margins": 16.98233985900879, |
|
"rewards/rejected": -17.323471069335938, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 0.779172129612287, |
|
"grad_norm": 0.017333984375, |
|
"learning_rate": 7.066043295968342e-07, |
|
"logits/chosen": 0.5858328938484192, |
|
"logits/rejected": 1.7214057445526123, |
|
"logps/chosen": -82.28968048095703, |
|
"logps/rejected": -1686.96875, |
|
"loss": 0.0013, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.377600759267807, |
|
"rewards/margins": 15.575152397155762, |
|
"rewards/rejected": -15.952753067016602, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.7816694761815571, |
|
"grad_norm": 0.00028228759765625, |
|
"learning_rate": 6.914856176865891e-07, |
|
"logits/chosen": 0.5658802390098572, |
|
"logits/rejected": 1.7670371532440186, |
|
"logps/chosen": -78.00981140136719, |
|
"logps/rejected": -1716.184814453125, |
|
"loss": 0.0012, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.34367048740386963, |
|
"rewards/margins": 16.066198348999023, |
|
"rewards/rejected": -16.409870147705078, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 0.7841668227508273, |
|
"grad_norm": 6.198883056640625e-05, |
|
"learning_rate": 6.765044018942804e-07, |
|
"logits/chosen": 0.6243360042572021, |
|
"logits/rejected": 1.8233163356781006, |
|
"logps/chosen": -77.52008819580078, |
|
"logps/rejected": -1712.4619140625, |
|
"loss": 0.0008, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3422839641571045, |
|
"rewards/margins": 16.079269409179688, |
|
"rewards/rejected": -16.421554565429688, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.7866641693200974, |
|
"grad_norm": 0.134765625, |
|
"learning_rate": 6.616618211984169e-07, |
|
"logits/chosen": 0.617003321647644, |
|
"logits/rejected": 1.855446219444275, |
|
"logps/chosen": -81.59101104736328, |
|
"logps/rejected": -1846.211669921875, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.37706810235977173, |
|
"rewards/margins": 17.38241958618164, |
|
"rewards/rejected": -17.75948715209961, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.7891615158893676, |
|
"grad_norm": 0.04833984375, |
|
"learning_rate": 6.469590040374799e-07, |
|
"logits/chosen": 0.5514385104179382, |
|
"logits/rejected": 1.7037725448608398, |
|
"logps/chosen": -92.5042724609375, |
|
"logps/rejected": -1909.711181640625, |
|
"loss": 0.0013, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4816574156284332, |
|
"rewards/margins": 17.822921752929688, |
|
"rewards/rejected": -18.304576873779297, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.7916588624586377, |
|
"grad_norm": 0.03173828125, |
|
"learning_rate": 6.32397068224136e-07, |
|
"logits/chosen": 0.528624415397644, |
|
"logits/rejected": 1.6811710596084595, |
|
"logps/chosen": -89.05570983886719, |
|
"logps/rejected": -1862.1185302734375, |
|
"loss": 0.002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.44779324531555176, |
|
"rewards/margins": 17.432941436767578, |
|
"rewards/rejected": -17.880733489990234, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 0.7941562090279078, |
|
"grad_norm": 0.04052734375, |
|
"learning_rate": 6.17977120860249e-07, |
|
"logits/chosen": 0.5938631296157837, |
|
"logits/rejected": 1.7992160320281982, |
|
"logps/chosen": -80.42100524902344, |
|
"logps/rejected": -1828.5582275390625, |
|
"loss": 0.0013, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.37270471453666687, |
|
"rewards/margins": 17.214147567749023, |
|
"rewards/rejected": -17.586851119995117, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.796653555597178, |
|
"grad_norm": 0.07177734375, |
|
"learning_rate": 6.037002582527121e-07, |
|
"logits/chosen": 0.6156030893325806, |
|
"logits/rejected": 1.7690448760986328, |
|
"logps/chosen": -83.84468078613281, |
|
"logps/rejected": -1731.0732421875, |
|
"loss": 0.002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4016353189945221, |
|
"rewards/margins": 16.079975128173828, |
|
"rewards/rejected": -16.481611251831055, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 0.7991509021664481, |
|
"grad_norm": 0.03759765625, |
|
"learning_rate": 5.895675658300981e-07, |
|
"logits/chosen": 0.6333300471305847, |
|
"logits/rejected": 1.8136202096939087, |
|
"logps/chosen": -79.72032165527344, |
|
"logps/rejected": -1559.860595703125, |
|
"loss": 0.0009, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.37166255712509155, |
|
"rewards/margins": 14.575735092163086, |
|
"rewards/rejected": -14.947400093078613, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.8016482487357183, |
|
"grad_norm": 0.032470703125, |
|
"learning_rate": 5.755801180601381e-07, |
|
"logits/chosen": 0.5778881907463074, |
|
"logits/rejected": 1.754577875137329, |
|
"logps/chosen": -85.21940612792969, |
|
"logps/rejected": -1787.017333984375, |
|
"loss": 0.0008, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.40619927644729614, |
|
"rewards/margins": 16.748323440551758, |
|
"rewards/rejected": -17.154521942138672, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 0.8041455953049884, |
|
"grad_norm": 0.050537109375, |
|
"learning_rate": 5.617389783680307e-07, |
|
"logits/chosen": 0.5147963762283325, |
|
"logits/rejected": 1.858233094215393, |
|
"logps/chosen": -85.53825378417969, |
|
"logps/rejected": -2189.0078125, |
|
"loss": 0.0008, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.41369113326072693, |
|
"rewards/margins": 20.725154876708984, |
|
"rewards/rejected": -21.13884925842285, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.8066429418742586, |
|
"grad_norm": 0.035888671875, |
|
"learning_rate": 5.48045199055596e-07, |
|
"logits/chosen": 0.6537925004959106, |
|
"logits/rejected": 1.8616943359375, |
|
"logps/chosen": -81.87962341308594, |
|
"logps/rejected": -1831.1129150390625, |
|
"loss": 0.0024, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3843618333339691, |
|
"rewards/margins": 17.22643280029297, |
|
"rewards/rejected": -17.610795974731445, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 0.8091402884435287, |
|
"grad_norm": 0.02197265625, |
|
"learning_rate": 5.344998212212704e-07, |
|
"logits/chosen": 0.5282970070838928, |
|
"logits/rejected": 1.810681939125061, |
|
"logps/chosen": -85.24520111083984, |
|
"logps/rejected": -2183.88037109375, |
|
"loss": 0.0009, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4112313389778137, |
|
"rewards/margins": 20.611225128173828, |
|
"rewards/rejected": -21.022457122802734, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.811637635012799, |
|
"grad_norm": 0.0023956298828125, |
|
"learning_rate": 5.211038746809551e-07, |
|
"logits/chosen": 0.6539278030395508, |
|
"logits/rejected": 1.8353042602539062, |
|
"logps/chosen": -83.72335815429688, |
|
"logps/rejected": -1798.7239990234375, |
|
"loss": 0.0013, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4129239618778229, |
|
"rewards/margins": 16.881694793701172, |
|
"rewards/rejected": -17.294618606567383, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.8141349815820691, |
|
"grad_norm": 0.0294189453125, |
|
"learning_rate": 5.078583778897216e-07, |
|
"logits/chosen": 0.6602455377578735, |
|
"logits/rejected": 1.7690246105194092, |
|
"logps/chosen": -93.29869079589844, |
|
"logps/rejected": -1809.647216796875, |
|
"loss": 0.0017, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4940834641456604, |
|
"rewards/margins": 16.90252113342285, |
|
"rewards/rejected": -17.396602630615234, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.8166323281513392, |
|
"grad_norm": 0.0830078125, |
|
"learning_rate": 4.94764337864384e-07, |
|
"logits/chosen": 0.6102297902107239, |
|
"logits/rejected": 1.7652736902236938, |
|
"logps/chosen": -81.92073059082031, |
|
"logps/rejected": -1667.0634765625, |
|
"loss": 0.0013, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.38310879468917847, |
|
"rewards/margins": 15.526937484741211, |
|
"rewards/rejected": -15.910046577453613, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 0.8191296747206094, |
|
"grad_norm": 0.04150390625, |
|
"learning_rate": 4.818227501069328e-07, |
|
"logits/chosen": 0.5220754742622375, |
|
"logits/rejected": 1.9412825107574463, |
|
"logps/chosen": -81.39790344238281, |
|
"logps/rejected": -2294.091064453125, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3824032247066498, |
|
"rewards/margins": 21.823511123657227, |
|
"rewards/rejected": -22.205913543701172, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.8216270212898795, |
|
"grad_norm": 0.025146484375, |
|
"learning_rate": 4.690345985288572e-07, |
|
"logits/chosen": 0.5984300971031189, |
|
"logits/rejected": 1.787674903869629, |
|
"logps/chosen": -85.31007385253906, |
|
"logps/rejected": -1911.9349365234375, |
|
"loss": 0.0016, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.41559547185897827, |
|
"rewards/margins": 17.892559051513672, |
|
"rewards/rejected": -18.308155059814453, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 0.8241243678591497, |
|
"grad_norm": 8.791685104370117e-07, |
|
"learning_rate": 4.5640085537633633e-07, |
|
"logits/chosen": 0.5342472791671753, |
|
"logits/rejected": 1.8117504119873047, |
|
"logps/chosen": -77.17405700683594, |
|
"logps/rejected": -2120.84912109375, |
|
"loss": 0.0005, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.33397001028060913, |
|
"rewards/margins": 20.135189056396484, |
|
"rewards/rejected": -20.469158172607422, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.8266217144284198, |
|
"grad_norm": 7.581710815429688e-05, |
|
"learning_rate": 4.439224811563211e-07, |
|
"logits/chosen": 0.5074091553688049, |
|
"logits/rejected": 1.69931960105896, |
|
"logps/chosen": -87.99649047851562, |
|
"logps/rejected": -1984.584228515625, |
|
"loss": 0.0013, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.442207008600235, |
|
"rewards/margins": 18.59341049194336, |
|
"rewards/rejected": -19.03561782836914, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 0.82911906099769, |
|
"grad_norm": 0.00012111663818359375, |
|
"learning_rate": 4.316004245635158e-07, |
|
"logits/chosen": 0.533842921257019, |
|
"logits/rejected": 1.7812063694000244, |
|
"logps/chosen": -89.41615295410156, |
|
"logps/rejected": -2132.177734375, |
|
"loss": 0.0016, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4604712426662445, |
|
"rewards/margins": 20.115650177001953, |
|
"rewards/rejected": -20.576122283935547, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.8316164075669601, |
|
"grad_norm": 6.8247318267822266e-06, |
|
"learning_rate": 4.194356224082455e-07, |
|
"logits/chosen": 0.4998435378074646, |
|
"logits/rejected": 1.818884253501892, |
|
"logps/chosen": -89.97554779052734, |
|
"logps/rejected": -2246.93017578125, |
|
"loss": 0.0016, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4607468247413635, |
|
"rewards/margins": 21.1903133392334, |
|
"rewards/rejected": -21.651060104370117, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 0.8341137541362302, |
|
"grad_norm": 0.0008697509765625, |
|
"learning_rate": 4.074289995452338e-07, |
|
"logits/chosen": 0.663809597492218, |
|
"logits/rejected": 1.8902143239974976, |
|
"logps/chosen": -79.80634307861328, |
|
"logps/rejected": -1863.5374755859375, |
|
"loss": 0.0013, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3643765449523926, |
|
"rewards/margins": 17.57371711730957, |
|
"rewards/rejected": -17.938095092773438, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 0.8366111007055004, |
|
"grad_norm": 9.715557098388672e-06, |
|
"learning_rate": 3.9558146880329246e-07, |
|
"logits/chosen": 0.5806099772453308, |
|
"logits/rejected": 1.7180675268173218, |
|
"logps/chosen": -88.5436782836914, |
|
"logps/rejected": -1847.456298828125, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.44766122102737427, |
|
"rewards/margins": 17.159259796142578, |
|
"rewards/rejected": -17.606922149658203, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.8391084472747705, |
|
"grad_norm": 0.011962890625, |
|
"learning_rate": 3.838939309159187e-07, |
|
"logits/chosen": 0.6112891435623169, |
|
"logits/rejected": 1.7461353540420532, |
|
"logps/chosen": -85.6560287475586, |
|
"logps/rejected": -1812.320068359375, |
|
"loss": 0.0015, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.42908936738967896, |
|
"rewards/margins": 16.986312866210938, |
|
"rewards/rejected": -17.415403366088867, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.8416057938440407, |
|
"grad_norm": 0.00148773193359375, |
|
"learning_rate": 3.723672744528162e-07, |
|
"logits/chosen": 0.5621702671051025, |
|
"logits/rejected": 1.8088220357894897, |
|
"logps/chosen": -78.2458724975586, |
|
"logps/rejected": -1943.078125, |
|
"loss": 0.0005, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.34909874200820923, |
|
"rewards/margins": 18.354211807250977, |
|
"rewards/rejected": -18.703310012817383, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 0.8441031404133108, |
|
"grad_norm": 0.00182342529296875, |
|
"learning_rate": 3.6100237575233647e-07, |
|
"logits/chosen": 0.6288230419158936, |
|
"logits/rejected": 1.7829002141952515, |
|
"logps/chosen": -84.52963256835938, |
|
"logps/rejected": -1607.1246337890625, |
|
"loss": 0.001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.41369467973709106, |
|
"rewards/margins": 15.001932144165039, |
|
"rewards/rejected": -15.415626525878906, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 0.8466004869825811, |
|
"grad_norm": 0.0247802734375, |
|
"learning_rate": 3.4980009885486054e-07, |
|
"logits/chosen": 0.6984633803367615, |
|
"logits/rejected": 1.8022708892822266, |
|
"logps/chosen": -77.57188415527344, |
|
"logps/rejected": -1505.264892578125, |
|
"loss": 0.0019, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3510386645793915, |
|
"rewards/margins": 14.059832572937012, |
|
"rewards/rejected": -14.410870552062988, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 0.8490978335518512, |
|
"grad_norm": 0.0277099609375, |
|
"learning_rate": 3.3876129543710197e-07, |
|
"logits/chosen": 0.5471528768539429, |
|
"logits/rejected": 1.768148422241211, |
|
"logps/chosen": -87.0343246459961, |
|
"logps/rejected": -2079.26708984375, |
|
"loss": 0.0008, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.44154462218284607, |
|
"rewards/margins": 19.6082706451416, |
|
"rewards/rejected": -20.049814224243164, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.8515951801211213, |
|
"grad_norm": 0.00250244140625, |
|
"learning_rate": 3.2788680474735687e-07, |
|
"logits/chosen": 0.5990682244300842, |
|
"logits/rejected": 1.8559329509735107, |
|
"logps/chosen": -80.894287109375, |
|
"logps/rejected": -1869.932373046875, |
|
"loss": 0.0006, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3692544400691986, |
|
"rewards/margins": 17.629741668701172, |
|
"rewards/rejected": -17.99899673461914, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 0.8540925266903915, |
|
"grad_norm": 0.00060272216796875, |
|
"learning_rate": 3.1717745354170214e-07, |
|
"logits/chosen": 0.550452470779419, |
|
"logits/rejected": 1.9075467586517334, |
|
"logps/chosen": -88.12527465820312, |
|
"logps/rejected": -2150.641357421875, |
|
"loss": 0.0007, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4492851793766022, |
|
"rewards/margins": 20.334678649902344, |
|
"rewards/rejected": -20.78396224975586, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 0.8565898732596616, |
|
"grad_norm": 0.000537872314453125, |
|
"learning_rate": 3.0663405602113727e-07, |
|
"logits/chosen": 0.5784090757369995, |
|
"logits/rejected": 1.8440923690795898, |
|
"logps/chosen": -77.76791381835938, |
|
"logps/rejected": -1944.1751708984375, |
|
"loss": 0.0009, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.34689050912857056, |
|
"rewards/margins": 18.372732162475586, |
|
"rewards/rejected": -18.719623565673828, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 0.8590872198289318, |
|
"grad_norm": 0.01416015625, |
|
"learning_rate": 2.9625741376968107e-07, |
|
"logits/chosen": 0.5425665378570557, |
|
"logits/rejected": 1.7546192407608032, |
|
"logps/chosen": -84.84912109375, |
|
"logps/rejected": -2032.4622802734375, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.40073734521865845, |
|
"rewards/margins": 18.93886375427246, |
|
"rewards/rejected": -19.339599609375, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 0.8615845663982019, |
|
"grad_norm": 0.0206298828125, |
|
"learning_rate": 2.8604831569343324e-07, |
|
"logits/chosen": 0.5840142965316772, |
|
"logits/rejected": 1.6774394512176514, |
|
"logps/chosen": -87.08283996582031, |
|
"logps/rejected": -1656.0628662109375, |
|
"loss": 0.0017, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4280741214752197, |
|
"rewards/margins": 15.362825393676758, |
|
"rewards/rejected": -15.790898323059082, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.864081912967472, |
|
"grad_norm": 0.00439453125, |
|
"learning_rate": 2.760075379605942e-07, |
|
"logits/chosen": 0.5762545466423035, |
|
"logits/rejected": 1.788022756576538, |
|
"logps/chosen": -84.1275634765625, |
|
"logps/rejected": -1882.4407958984375, |
|
"loss": 0.0006, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.40409308671951294, |
|
"rewards/margins": 17.698461532592773, |
|
"rewards/rejected": -18.102556228637695, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 0.8665792595367422, |
|
"grad_norm": 0.042724609375, |
|
"learning_rate": 2.661358439424552e-07, |
|
"logits/chosen": 0.6203972697257996, |
|
"logits/rejected": 1.7815377712249756, |
|
"logps/chosen": -79.66865539550781, |
|
"logps/rejected": -1772.3359375, |
|
"loss": 0.0019, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.36624962091445923, |
|
"rewards/margins": 16.665634155273438, |
|
"rewards/rejected": -17.031885147094727, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 0.8690766061060123, |
|
"grad_norm": 0.0026397705078125, |
|
"learning_rate": 2.564339841553615e-07, |
|
"logits/chosen": 0.6519962549209595, |
|
"logits/rejected": 1.8045860528945923, |
|
"logps/chosen": -84.95247650146484, |
|
"logps/rejected": -1720.0745849609375, |
|
"loss": 0.0018, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4275601804256439, |
|
"rewards/margins": 16.091691970825195, |
|
"rewards/rejected": -16.51925277709961, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 0.8715739526752825, |
|
"grad_norm": 0.1845703125, |
|
"learning_rate": 2.469026962036539e-07, |
|
"logits/chosen": 0.5797117352485657, |
|
"logits/rejected": 1.6833524703979492, |
|
"logps/chosen": -88.14852905273438, |
|
"logps/rejected": -1696.3714599609375, |
|
"loss": 0.003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4267314374446869, |
|
"rewards/margins": 15.646469116210938, |
|
"rewards/rejected": -16.07320213317871, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 0.8740712992445526, |
|
"grad_norm": 0.046142578125, |
|
"learning_rate": 2.3754270472358786e-07, |
|
"logits/chosen": 0.6232503652572632, |
|
"logits/rejected": 1.6990222930908203, |
|
"logps/chosen": -83.14488983154297, |
|
"logps/rejected": -1672.453857421875, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.39773061871528625, |
|
"rewards/margins": 15.525070190429688, |
|
"rewards/rejected": -15.922798156738281, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.8765686458138228, |
|
"grad_norm": 0.07373046875, |
|
"learning_rate": 2.283547213282458e-07, |
|
"logits/chosen": 0.5654767155647278, |
|
"logits/rejected": 1.7425930500030518, |
|
"logps/chosen": -85.06395721435547, |
|
"logps/rejected": -1774.28125, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4012150168418884, |
|
"rewards/margins": 16.552427291870117, |
|
"rewards/rejected": -16.953643798828125, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 0.8790659923830929, |
|
"grad_norm": 0.052734375, |
|
"learning_rate": 2.1933944455343166e-07, |
|
"logits/chosen": 0.5508383512496948, |
|
"logits/rejected": 1.7986376285552979, |
|
"logps/chosen": -81.19587707519531, |
|
"logps/rejected": -2142.4736328125, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.374092161655426, |
|
"rewards/margins": 20.307262420654297, |
|
"rewards/rejected": -20.681354522705078, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.8815633389523632, |
|
"grad_norm": 0.003265380859375, |
|
"learning_rate": 2.104975598045647e-07, |
|
"logits/chosen": 0.5937038660049438, |
|
"logits/rejected": 1.7039823532104492, |
|
"logps/chosen": -81.80549621582031, |
|
"logps/rejected": -1617.2646484375, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.38183221220970154, |
|
"rewards/margins": 15.109405517578125, |
|
"rewards/rejected": -15.491238594055176, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 0.8840606855216333, |
|
"grad_norm": 0.032470703125, |
|
"learning_rate": 2.018297393045701e-07, |
|
"logits/chosen": 0.6291056871414185, |
|
"logits/rejected": 1.7945873737335205, |
|
"logps/chosen": -81.0144271850586, |
|
"logps/rejected": -1751.724609375, |
|
"loss": 0.0013, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3717043697834015, |
|
"rewards/margins": 16.441532135009766, |
|
"rewards/rejected": -16.813236236572266, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 0.8865580320909034, |
|
"grad_norm": 0.039306640625, |
|
"learning_rate": 1.9333664204277236e-07, |
|
"logits/chosen": 0.5141820907592773, |
|
"logits/rejected": 1.6924489736557007, |
|
"logps/chosen": -83.84037780761719, |
|
"logps/rejected": -2023.642822265625, |
|
"loss": 0.0015, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3969913423061371, |
|
"rewards/margins": 18.992799758911133, |
|
"rewards/rejected": -19.38979148864746, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.8890553786601736, |
|
"grad_norm": 2.4437904357910156e-06, |
|
"learning_rate": 1.8501891372479124e-07, |
|
"logits/chosen": 0.5262492895126343, |
|
"logits/rejected": 1.801138162612915, |
|
"logps/chosen": -82.75626373291016, |
|
"logps/rejected": -1913.7564697265625, |
|
"loss": 0.0009, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.37326088547706604, |
|
"rewards/margins": 17.95614242553711, |
|
"rewards/rejected": -18.329402923583984, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 0.8915527252294437, |
|
"grad_norm": 0.0230712890625, |
|
"learning_rate": 1.7687718672345533e-07, |
|
"logits/chosen": 0.5257088541984558, |
|
"logits/rejected": 1.7338473796844482, |
|
"logps/chosen": -84.81585693359375, |
|
"logps/rejected": -1994.5963134765625, |
|
"loss": 0.0007, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4195898473262787, |
|
"rewards/margins": 18.773387908935547, |
|
"rewards/rejected": -19.19297981262207, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 0.8940500717987139, |
|
"grad_norm": 0.032958984375, |
|
"learning_rate": 1.689120800307212e-07, |
|
"logits/chosen": 0.43529587984085083, |
|
"logits/rejected": 1.6352293491363525, |
|
"logps/chosen": -84.8902816772461, |
|
"logps/rejected": -2162.602294921875, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4021669030189514, |
|
"rewards/margins": 20.277816772460938, |
|
"rewards/rejected": -20.679983139038086, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 0.896547418367984, |
|
"grad_norm": 0.09033203125, |
|
"learning_rate": 1.6112419921061357e-07, |
|
"logits/chosen": 0.6369230151176453, |
|
"logits/rejected": 1.848402976989746, |
|
"logps/chosen": -89.04129791259766, |
|
"logps/rejected": -1788.65234375, |
|
"loss": 0.0025, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4632226526737213, |
|
"rewards/margins": 16.736385345458984, |
|
"rewards/rejected": -17.199607849121094, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 0.8990447649372542, |
|
"grad_norm": 0.039306640625, |
|
"learning_rate": 1.5351413635318807e-07, |
|
"logits/chosen": 0.5430204272270203, |
|
"logits/rejected": 1.6954717636108398, |
|
"logps/chosen": -80.37650299072266, |
|
"logps/rejected": -1739.5556640625, |
|
"loss": 0.001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.36847007274627686, |
|
"rewards/margins": 16.253259658813477, |
|
"rewards/rejected": -16.621726989746094, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.9015421115065243, |
|
"grad_norm": 0.00982666015625, |
|
"learning_rate": 1.460824700295138e-07, |
|
"logits/chosen": 0.5976991653442383, |
|
"logits/rejected": 1.8408482074737549, |
|
"logps/chosen": -83.05894470214844, |
|
"logps/rejected": -1974.3404541015625, |
|
"loss": 0.0016, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.40586838126182556, |
|
"rewards/margins": 18.630273818969727, |
|
"rewards/rejected": -19.036144256591797, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 0.9040394580757944, |
|
"grad_norm": 0.01531982421875, |
|
"learning_rate": 1.3882976524768694e-07, |
|
"logits/chosen": 0.6637327075004578, |
|
"logits/rejected": 1.75222909450531, |
|
"logps/chosen": -82.24571228027344, |
|
"logps/rejected": -1613.786865234375, |
|
"loss": 0.0023, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.39647185802459717, |
|
"rewards/margins": 15.058723449707031, |
|
"rewards/rejected": -15.455195426940918, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 0.9065368046450646, |
|
"grad_norm": 0.00054931640625, |
|
"learning_rate": 1.3175657340987664e-07, |
|
"logits/chosen": 0.6287505030632019, |
|
"logits/rejected": 1.799709677696228, |
|
"logps/chosen": -84.08810424804688, |
|
"logps/rejected": -1803.7301025390625, |
|
"loss": 0.0014, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4151592254638672, |
|
"rewards/margins": 16.90250015258789, |
|
"rewards/rejected": -17.317657470703125, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 0.9090341512143347, |
|
"grad_norm": 0.0010986328125, |
|
"learning_rate": 1.2486343227040122e-07, |
|
"logits/chosen": 0.5875022411346436, |
|
"logits/rejected": 1.7384836673736572, |
|
"logps/chosen": -87.7680435180664, |
|
"logps/rejected": -1834.406982421875, |
|
"loss": 0.0029, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4344062805175781, |
|
"rewards/margins": 17.189672470092773, |
|
"rewards/rejected": -17.62407875061035, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 0.9115314977836049, |
|
"grad_norm": 0.0419921875, |
|
"learning_rate": 1.181508658948452e-07, |
|
"logits/chosen": 0.6155994534492493, |
|
"logits/rejected": 1.7817541360855103, |
|
"logps/chosen": -80.59324645996094, |
|
"logps/rejected": -1767.1129150390625, |
|
"loss": 0.0012, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3722308278083801, |
|
"rewards/margins": 16.59187889099121, |
|
"rewards/rejected": -16.964111328125, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.9140288443528751, |
|
"grad_norm": 0.0458984375, |
|
"learning_rate": 1.1161938462021627e-07, |
|
"logits/chosen": 0.6269813776016235, |
|
"logits/rejected": 1.7340434789657593, |
|
"logps/chosen": -86.62757110595703, |
|
"logps/rejected": -1731.770751953125, |
|
"loss": 0.0028, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4310578405857086, |
|
"rewards/margins": 16.17133903503418, |
|
"rewards/rejected": -16.602397918701172, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 0.9165261909221453, |
|
"grad_norm": 0.1083984375, |
|
"learning_rate": 1.0526948501614536e-07, |
|
"logits/chosen": 0.5681526064872742, |
|
"logits/rejected": 1.8455768823623657, |
|
"logps/chosen": -88.42396545410156, |
|
"logps/rejected": -1962.5306396484375, |
|
"loss": 0.0017, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.44573092460632324, |
|
"rewards/margins": 18.391765594482422, |
|
"rewards/rejected": -18.837499618530273, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 0.9190235374914154, |
|
"grad_norm": 0.0791015625, |
|
"learning_rate": 9.910164984713477e-08, |
|
"logits/chosen": 0.5716847777366638, |
|
"logits/rejected": 1.790804147720337, |
|
"logps/chosen": -88.99705505371094, |
|
"logps/rejected": -2003.9739990234375, |
|
"loss": 0.0015, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.457981675863266, |
|
"rewards/margins": 18.8723087310791, |
|
"rewards/rejected": -19.330291748046875, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 0.9215208840606856, |
|
"grad_norm": 0.0830078125, |
|
"learning_rate": 9.311634803585323e-08, |
|
"logits/chosen": 0.5493127107620239, |
|
"logits/rejected": 1.8056682348251343, |
|
"logps/chosen": -80.04996490478516, |
|
"logps/rejected": -2063.024169921875, |
|
"loss": 0.0024, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3674994707107544, |
|
"rewards/margins": 19.52413558959961, |
|
"rewards/rejected": -19.891637802124023, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 0.9240182306299557, |
|
"grad_norm": 0.000400543212890625, |
|
"learning_rate": 8.7314034627487e-08, |
|
"logits/chosen": 0.5750405192375183, |
|
"logits/rejected": 1.8101009130477905, |
|
"logps/chosen": -78.46788024902344, |
|
"logps/rejected": -1988.633056640625, |
|
"loss": 0.0013, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.35628947615623474, |
|
"rewards/margins": 18.821575164794922, |
|
"rewards/rejected": -19.1778621673584, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.9265155771992258, |
|
"grad_norm": 0.03515625, |
|
"learning_rate": 8.16951507551439e-08, |
|
"logits/chosen": 0.6284887194633484, |
|
"logits/rejected": 1.7544790506362915, |
|
"logps/chosen": -78.0561752319336, |
|
"logps/rejected": -1719.0638427734375, |
|
"loss": 0.0012, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3490171432495117, |
|
"rewards/margins": 16.02911949157715, |
|
"rewards/rejected": -16.378137588500977, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 0.929012923768496, |
|
"grad_norm": 6.4849853515625e-05, |
|
"learning_rate": 7.626012360631291e-08, |
|
"logits/chosen": 0.5767999887466431, |
|
"logits/rejected": 1.8027598857879639, |
|
"logps/chosen": -87.67066192626953, |
|
"logps/rejected": -1751.861328125, |
|
"loss": 0.0015, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.44937849044799805, |
|
"rewards/margins": 16.375062942504883, |
|
"rewards/rejected": -16.82444190979004, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 0.9315102703377661, |
|
"grad_norm": 0.0006256103515625, |
|
"learning_rate": 7.100936639038936e-08, |
|
"logits/chosen": 0.5324774384498596, |
|
"logits/rejected": 1.9097219705581665, |
|
"logps/chosen": -89.81242370605469, |
|
"logps/rejected": -2373.812744140625, |
|
"loss": 0.0005, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4651169776916504, |
|
"rewards/margins": 22.532739639282227, |
|
"rewards/rejected": -22.997854232788086, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 0.9340076169070363, |
|
"grad_norm": 1.0356307029724121e-06, |
|
"learning_rate": 6.594327830725916e-08, |
|
"logits/chosen": 0.5782414674758911, |
|
"logits/rejected": 1.906734824180603, |
|
"logps/chosen": -80.02381896972656, |
|
"logps/rejected": -1915.576416015625, |
|
"loss": 0.0006, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3699369728565216, |
|
"rewards/margins": 18.08548927307129, |
|
"rewards/rejected": -18.455425262451172, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 0.9365049634763064, |
|
"grad_norm": 0.039306640625, |
|
"learning_rate": 6.106224451694592e-08, |
|
"logits/chosen": 0.5905268788337708, |
|
"logits/rejected": 1.7933048009872437, |
|
"logps/chosen": -77.8465347290039, |
|
"logps/rejected": -1931.9468994140625, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3471309244632721, |
|
"rewards/margins": 18.231327056884766, |
|
"rewards/rejected": -18.57845687866211, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.9390023100455765, |
|
"grad_norm": 0.07861328125, |
|
"learning_rate": 5.636663611033266e-08, |
|
"logits/chosen": 0.6453654170036316, |
|
"logits/rejected": 1.9545791149139404, |
|
"logps/chosen": -88.19773864746094, |
|
"logps/rejected": -2014.8583984375, |
|
"loss": 0.0013, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4550137519836426, |
|
"rewards/margins": 18.988948822021484, |
|
"rewards/rejected": -19.443960189819336, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 0.9414996566148467, |
|
"grad_norm": 0.00152587890625, |
|
"learning_rate": 5.185681008094579e-08, |
|
"logits/chosen": 0.5483246445655823, |
|
"logits/rejected": 1.754540205001831, |
|
"logps/chosen": -81.78535461425781, |
|
"logps/rejected": -1835.6578369140625, |
|
"loss": 0.0009, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3718385696411133, |
|
"rewards/margins": 17.2070369720459, |
|
"rewards/rejected": -17.578876495361328, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 0.9439970031841168, |
|
"grad_norm": 0.00086212158203125, |
|
"learning_rate": 4.753310929781513e-08, |
|
"logits/chosen": 0.6061893701553345, |
|
"logits/rejected": 1.7712287902832031, |
|
"logps/chosen": -84.43482971191406, |
|
"logps/rejected": -1739.1148681640625, |
|
"loss": 0.0015, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4173789620399475, |
|
"rewards/margins": 16.286354064941406, |
|
"rewards/rejected": -16.703731536865234, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 0.946494349753387, |
|
"grad_norm": 1.2874603271484375e-05, |
|
"learning_rate": 4.3395862479405914e-08, |
|
"logits/chosen": 0.5530301928520203, |
|
"logits/rejected": 1.790492296218872, |
|
"logps/chosen": -98.83070373535156, |
|
"logps/rejected": -1927.699462890625, |
|
"loss": 0.0005, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.5457450747489929, |
|
"rewards/margins": 17.921890258789062, |
|
"rewards/rejected": -18.467636108398438, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 0.9489916963226572, |
|
"grad_norm": 0.0028228759765625, |
|
"learning_rate": 3.9445384168628474e-08, |
|
"logits/chosen": 0.5836749076843262, |
|
"logits/rejected": 1.8246829509735107, |
|
"logps/chosen": -80.40269470214844, |
|
"logps/rejected": -1708.9019775390625, |
|
"loss": 0.0009, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3665417730808258, |
|
"rewards/margins": 15.90452766418457, |
|
"rewards/rejected": -16.271068572998047, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.9514890428919274, |
|
"grad_norm": 0.017578125, |
|
"learning_rate": 3.5681974708923484e-08, |
|
"logits/chosen": 0.6176645159721375, |
|
"logits/rejected": 1.7562223672866821, |
|
"logps/chosen": -82.15937805175781, |
|
"logps/rejected": -1655.8538818359375, |
|
"loss": 0.0012, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3880414068698883, |
|
"rewards/margins": 15.394001960754395, |
|
"rewards/rejected": -15.78204345703125, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 0.9539863894611975, |
|
"grad_norm": 0.003021240234375, |
|
"learning_rate": 3.210592022142717e-08, |
|
"logits/chosen": 0.6430649161338806, |
|
"logits/rejected": 1.7840299606323242, |
|
"logps/chosen": -88.11245727539062, |
|
"logps/rejected": -1835.171630859375, |
|
"loss": 0.0009, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4654541611671448, |
|
"rewards/margins": 17.16687774658203, |
|
"rewards/rejected": -17.6323299407959, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 0.9564837360304677, |
|
"grad_norm": 0.0260009765625, |
|
"learning_rate": 2.8717492583220095e-08, |
|
"logits/chosen": 0.6011831164360046, |
|
"logits/rejected": 1.8058007955551147, |
|
"logps/chosen": -83.06114959716797, |
|
"logps/rejected": -1898.2939453125, |
|
"loss": 0.0012, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.39993369579315186, |
|
"rewards/margins": 17.886281967163086, |
|
"rewards/rejected": -18.28621482849121, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 0.9589810825997378, |
|
"grad_norm": 0.109375, |
|
"learning_rate": 2.551694940665539e-08, |
|
"logits/chosen": 0.600081741809845, |
|
"logits/rejected": 1.7849693298339844, |
|
"logps/chosen": -82.53587341308594, |
|
"logps/rejected": -1787.7008056640625, |
|
"loss": 0.0015, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.39744192361831665, |
|
"rewards/margins": 16.777345657348633, |
|
"rewards/rejected": -17.174787521362305, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 0.9614784291690079, |
|
"grad_norm": 5.4836273193359375e-05, |
|
"learning_rate": 2.2504534019774092e-08, |
|
"logits/chosen": 0.712979257106781, |
|
"logits/rejected": 1.8468831777572632, |
|
"logps/chosen": -80.92863464355469, |
|
"logps/rejected": -1629.561767578125, |
|
"loss": 0.0012, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3758103847503662, |
|
"rewards/margins": 15.236900329589844, |
|
"rewards/rejected": -15.612710952758789, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 0.9639757757382781, |
|
"grad_norm": 0.058349609375, |
|
"learning_rate": 1.9680475447805826e-08, |
|
"logits/chosen": 0.6279615759849548, |
|
"logits/rejected": 1.7799345254898071, |
|
"logps/chosen": -81.30252838134766, |
|
"logps/rejected": -1729.193115234375, |
|
"loss": 0.0014, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3813311457633972, |
|
"rewards/margins": 16.206878662109375, |
|
"rewards/rejected": -16.58820915222168, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 0.9664731223075482, |
|
"grad_norm": 0.000812530517578125, |
|
"learning_rate": 1.70449883957563e-08, |
|
"logits/chosen": 0.5945799350738525, |
|
"logits/rejected": 1.7449548244476318, |
|
"logps/chosen": -79.51522064208984, |
|
"logps/rejected": -1772.429931640625, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3488074243068695, |
|
"rewards/margins": 16.572925567626953, |
|
"rewards/rejected": -16.921733856201172, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 0.9689704688768184, |
|
"grad_norm": 0.03662109375, |
|
"learning_rate": 1.4598273232083182e-08, |
|
"logits/chosen": 0.5940654873847961, |
|
"logits/rejected": 1.737255334854126, |
|
"logps/chosen": -89.48096466064453, |
|
"logps/rejected": -1763.216064453125, |
|
"loss": 0.0026, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4539431631565094, |
|
"rewards/margins": 16.4827880859375, |
|
"rewards/rejected": -16.93673324584961, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 0.9714678154460885, |
|
"grad_norm": 7.677078247070312e-05, |
|
"learning_rate": 1.2340515973464917e-08, |
|
"logits/chosen": 0.5371723771095276, |
|
"logits/rejected": 1.6967008113861084, |
|
"logps/chosen": -89.69468688964844, |
|
"logps/rejected": -1855.769287109375, |
|
"loss": 0.0005, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4602131247520447, |
|
"rewards/margins": 17.378524780273438, |
|
"rewards/rejected": -17.8387393951416, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 0.9739651620153587, |
|
"grad_norm": 0.01055908203125, |
|
"learning_rate": 1.0271888270655118e-08, |
|
"logits/chosen": 0.5918472409248352, |
|
"logits/rejected": 1.6669203042984009, |
|
"logps/chosen": -84.76191711425781, |
|
"logps/rejected": -1762.7152099609375, |
|
"loss": 0.0017, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.40269866585731506, |
|
"rewards/margins": 16.396747589111328, |
|
"rewards/rejected": -16.79944610595703, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.9764625085846288, |
|
"grad_norm": 0.005584716796875, |
|
"learning_rate": 8.392547395435769e-09, |
|
"logits/chosen": 0.6482867002487183, |
|
"logits/rejected": 1.7531925439834595, |
|
"logps/chosen": -84.86217498779297, |
|
"logps/rejected": -1594.6502685546875, |
|
"loss": 0.0014, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4176376461982727, |
|
"rewards/margins": 14.827906608581543, |
|
"rewards/rejected": -15.245546340942383, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 0.9789598551538989, |
|
"grad_norm": 1.0251998901367188e-05, |
|
"learning_rate": 6.702636228657911e-09, |
|
"logits/chosen": 0.6012560129165649, |
|
"logits/rejected": 1.7790956497192383, |
|
"logps/chosen": -85.35179138183594, |
|
"logps/rejected": -1768.960693359375, |
|
"loss": 0.001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.41833052039146423, |
|
"rewards/margins": 16.56346321105957, |
|
"rewards/rejected": -16.981792449951172, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 0.9814572017231691, |
|
"grad_norm": 0.049560546875, |
|
"learning_rate": 5.2022832493800465e-09, |
|
"logits/chosen": 0.575610339641571, |
|
"logits/rejected": 1.6641845703125, |
|
"logps/chosen": -87.38862609863281, |
|
"logps/rejected": -1609.995849609375, |
|
"loss": 0.001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.43747106194496155, |
|
"rewards/margins": 15.003010749816895, |
|
"rewards/rejected": -15.440483093261719, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 0.9839545482924393, |
|
"grad_norm": 0.00927734375, |
|
"learning_rate": 3.891602525100124e-09, |
|
"logits/chosen": 0.5365520119667053, |
|
"logits/rejected": 1.7841014862060547, |
|
"logps/chosen": -77.71090698242188, |
|
"logps/rejected": -1843.400634765625, |
|
"loss": 0.0006, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.33150431513786316, |
|
"rewards/margins": 17.311891555786133, |
|
"rewards/rejected": -17.643396377563477, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 0.9864518948617095, |
|
"grad_norm": 0.000896453857421875, |
|
"learning_rate": 2.7706937030827495e-09, |
|
"logits/chosen": 0.6269220113754272, |
|
"logits/rejected": 1.821447730064392, |
|
"logps/chosen": -79.47364807128906, |
|
"logps/rejected": -1583.856689453125, |
|
"loss": 0.0012, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.35821038484573364, |
|
"rewards/margins": 14.733774185180664, |
|
"rewards/rejected": -15.091984748840332, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 0.9889492414309796, |
|
"grad_norm": 0.021728515625, |
|
"learning_rate": 1.839642002783859e-09, |
|
"logits/chosen": 0.7017726302146912, |
|
"logits/rejected": 1.8069097995758057, |
|
"logps/chosen": -79.8754653930664, |
|
"logps/rejected": -1646.0501708984375, |
|
"loss": 0.002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3667237162590027, |
|
"rewards/margins": 15.337608337402344, |
|
"rewards/rejected": -15.704330444335938, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 0.9914465880002498, |
|
"grad_norm": 0.021728515625, |
|
"learning_rate": 1.0985182093714574e-09, |
|
"logits/chosen": 0.6416125297546387, |
|
"logits/rejected": 1.760498046875, |
|
"logps/chosen": -85.49261474609375, |
|
"logps/rejected": -1687.677978515625, |
|
"loss": 0.0006, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.42397341132164, |
|
"rewards/margins": 15.762815475463867, |
|
"rewards/rejected": -16.18678855895996, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 0.9939439345695199, |
|
"grad_norm": 0.003143310546875, |
|
"learning_rate": 5.473786683440896e-10, |
|
"logits/chosen": 0.5962403416633606, |
|
"logits/rejected": 1.8247146606445312, |
|
"logps/chosen": -83.6251220703125, |
|
"logps/rejected": -2010.3323974609375, |
|
"loss": 0.0019, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4074554443359375, |
|
"rewards/margins": 18.965452194213867, |
|
"rewards/rejected": -19.372909545898438, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 0.99644128113879, |
|
"grad_norm": 0.049560546875, |
|
"learning_rate": 1.862652812467669e-10, |
|
"logits/chosen": 0.5162760615348816, |
|
"logits/rejected": 1.6673294305801392, |
|
"logps/chosen": -83.96633911132812, |
|
"logps/rejected": -1847.5804443359375, |
|
"loss": 0.0008, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.39398157596588135, |
|
"rewards/margins": 17.087627410888672, |
|
"rewards/rejected": -17.481609344482422, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 0.9989386277080602, |
|
"grad_norm": 2.9802322387695312e-05, |
|
"learning_rate": 1.5205502486292932e-11, |
|
"logits/chosen": 0.582720935344696, |
|
"logits/rejected": 1.8548141717910767, |
|
"logps/chosen": -80.02593994140625, |
|
"logps/rejected": -1968.4027099609375, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.35573673248291016, |
|
"rewards/margins": 18.611202239990234, |
|
"rewards/rejected": -18.96693992614746, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.9989386277080602, |
|
"eval_logits/chosen": 0.6540641784667969, |
|
"eval_logits/rejected": 1.569779634475708, |
|
"eval_logps/chosen": -84.51407623291016, |
|
"eval_logps/rejected": -994.2071533203125, |
|
"eval_loss": 0.00282670627348125, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/chosen": -0.40158194303512573, |
|
"eval_rewards/margins": 8.929591178894043, |
|
"eval_rewards/rejected": -9.33117389678955, |
|
"eval_runtime": 0.6151, |
|
"eval_samples_per_second": 8.128, |
|
"eval_steps_per_second": 8.128, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.9999375663357682, |
|
"step": 4004, |
|
"total_flos": 0.0, |
|
"train_loss": 0.05122942747121405, |
|
"train_runtime": 6577.5594, |
|
"train_samples_per_second": 2.435, |
|
"train_steps_per_second": 0.609 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 4004, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 1000, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|