|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9994767137624281, |
|
"eval_steps": 100, |
|
"global_step": 955, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0010465724751439038, |
|
"grad_norm": 21.102116873134612, |
|
"learning_rate": 5.208333333333333e-09, |
|
"logits/chosen": -2.924262046813965, |
|
"logits/rejected": -2.7925047874450684, |
|
"logps/chosen": -380.8447570800781, |
|
"logps/rejected": -358.51123046875, |
|
"loss": 4.6506, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.010465724751439037, |
|
"grad_norm": 15.822543074567085, |
|
"learning_rate": 5.208333333333333e-08, |
|
"logits/chosen": -2.595761299133301, |
|
"logits/rejected": -2.569227457046509, |
|
"logps/chosen": -256.6064453125, |
|
"logps/rejected": -234.93408203125, |
|
"loss": 4.5621, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.00042897689854726195, |
|
"rewards/margins": 0.0009927540086209774, |
|
"rewards/rejected": -0.0005637770518660545, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.020931449502878074, |
|
"grad_norm": 18.010820015079055, |
|
"learning_rate": 1.0416666666666667e-07, |
|
"logits/chosen": -2.613164186477661, |
|
"logits/rejected": -2.5756287574768066, |
|
"logps/chosen": -283.0158996582031, |
|
"logps/rejected": -282.265869140625, |
|
"loss": 4.4053, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": 0.0006733193295076489, |
|
"rewards/margins": 0.0005819452926516533, |
|
"rewards/rejected": 9.137402230408043e-05, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.03139717425431711, |
|
"grad_norm": 21.44807572026145, |
|
"learning_rate": 1.5624999999999999e-07, |
|
"logits/chosen": -2.691143035888672, |
|
"logits/rejected": -2.6666667461395264, |
|
"logps/chosen": -269.9042053222656, |
|
"logps/rejected": -276.4795837402344, |
|
"loss": 5.105, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.0037794082891196012, |
|
"rewards/margins": 0.0018267262494191527, |
|
"rewards/rejected": 0.0019526820397004485, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.04186289900575615, |
|
"grad_norm": 17.302023991146115, |
|
"learning_rate": 2.0833333333333333e-07, |
|
"logits/chosen": -2.6577816009521484, |
|
"logits/rejected": -2.5818943977355957, |
|
"logps/chosen": -288.9285888671875, |
|
"logps/rejected": -280.9770202636719, |
|
"loss": 4.9032, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": 0.020702064037322998, |
|
"rewards/margins": 0.009830506518483162, |
|
"rewards/rejected": 0.01087155845016241, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.052328623757195186, |
|
"grad_norm": 22.46337927130885, |
|
"learning_rate": 2.604166666666667e-07, |
|
"logits/chosen": -2.6507585048675537, |
|
"logits/rejected": -2.5627222061157227, |
|
"logps/chosen": -263.1905212402344, |
|
"logps/rejected": -234.9305419921875, |
|
"loss": 4.8274, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": 0.044054824858903885, |
|
"rewards/margins": 0.02749818004667759, |
|
"rewards/rejected": 0.016556641086935997, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.06279434850863422, |
|
"grad_norm": 18.98737987603255, |
|
"learning_rate": 3.1249999999999997e-07, |
|
"logits/chosen": -2.5976526737213135, |
|
"logits/rejected": -2.5587098598480225, |
|
"logps/chosen": -299.9574890136719, |
|
"logps/rejected": -276.1783142089844, |
|
"loss": 4.5279, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": 0.040667824447155, |
|
"rewards/margins": 0.04492232948541641, |
|
"rewards/rejected": -0.004254504106938839, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.07326007326007326, |
|
"grad_norm": 20.501382800234886, |
|
"learning_rate": 3.645833333333333e-07, |
|
"logits/chosen": -2.469130039215088, |
|
"logits/rejected": -2.452857732772827, |
|
"logps/chosen": -265.96978759765625, |
|
"logps/rejected": -271.6788330078125, |
|
"loss": 4.6703, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.0633089542388916, |
|
"rewards/margins": 0.07126398384571075, |
|
"rewards/rejected": -0.13457295298576355, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.0837257980115123, |
|
"grad_norm": 25.49997843488533, |
|
"learning_rate": 4.1666666666666667e-07, |
|
"logits/chosen": -2.4551777839660645, |
|
"logits/rejected": -2.3624327182769775, |
|
"logps/chosen": -285.5320739746094, |
|
"logps/rejected": -276.4596252441406, |
|
"loss": 4.5605, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.09489366412162781, |
|
"rewards/margins": 0.15657536685466766, |
|
"rewards/rejected": -0.2514690160751343, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.09419152276295134, |
|
"grad_norm": 30.61647338954573, |
|
"learning_rate": 4.6874999999999996e-07, |
|
"logits/chosen": -2.3756256103515625, |
|
"logits/rejected": -2.332918882369995, |
|
"logps/chosen": -277.46014404296875, |
|
"logps/rejected": -290.0049743652344, |
|
"loss": 4.1231, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.21862252056598663, |
|
"rewards/margins": 0.033695660531520844, |
|
"rewards/rejected": -0.25231820344924927, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.10465724751439037, |
|
"grad_norm": 38.124561793065574, |
|
"learning_rate": 4.999732492681437e-07, |
|
"logits/chosen": -2.332035779953003, |
|
"logits/rejected": -2.2253689765930176, |
|
"logps/chosen": -314.4341125488281, |
|
"logps/rejected": -317.18695068359375, |
|
"loss": 4.5854, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.1460995227098465, |
|
"rewards/margins": 0.22573721408843994, |
|
"rewards/rejected": -0.37183672189712524, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.10465724751439037, |
|
"eval_logits/chosen": -2.2812609672546387, |
|
"eval_logits/rejected": -2.192293167114258, |
|
"eval_logps/chosen": -309.1551818847656, |
|
"eval_logps/rejected": -310.1242370605469, |
|
"eval_loss": 4.381103515625, |
|
"eval_rewards/accuracies": 0.648809552192688, |
|
"eval_rewards/chosen": -0.2718724012374878, |
|
"eval_rewards/margins": 0.2273014634847641, |
|
"eval_rewards/rejected": -0.4991738498210907, |
|
"eval_runtime": 176.2372, |
|
"eval_samples_per_second": 11.348, |
|
"eval_steps_per_second": 0.357, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.1151229722658294, |
|
"grad_norm": 47.336977780094564, |
|
"learning_rate": 4.996723692767926e-07, |
|
"logits/chosen": -2.0436112880706787, |
|
"logits/rejected": -1.9534924030303955, |
|
"logps/chosen": -310.6973571777344, |
|
"logps/rejected": -324.1681823730469, |
|
"loss": 3.758, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.6924275755882263, |
|
"rewards/margins": 0.17653007805347443, |
|
"rewards/rejected": -0.8689576387405396, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.12558869701726844, |
|
"grad_norm": 109.43376131471078, |
|
"learning_rate": 4.990375746213598e-07, |
|
"logits/chosen": -0.08515436947345734, |
|
"logits/rejected": 0.34949326515197754, |
|
"logps/chosen": -343.26495361328125, |
|
"logps/rejected": -412.98577880859375, |
|
"loss": 4.0333, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.8068662881851196, |
|
"rewards/margins": 0.438527911901474, |
|
"rewards/rejected": -1.2453943490982056, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.1360544217687075, |
|
"grad_norm": 95.04671304091885, |
|
"learning_rate": 4.980697142834314e-07, |
|
"logits/chosen": 0.396954745054245, |
|
"logits/rejected": 1.0232269763946533, |
|
"logps/chosen": -406.28521728515625, |
|
"logps/rejected": -430.10760498046875, |
|
"loss": 4.2005, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -1.2501262426376343, |
|
"rewards/margins": 0.5063079595565796, |
|
"rewards/rejected": -1.7564342021942139, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.14652014652014653, |
|
"grad_norm": 144.39035434160894, |
|
"learning_rate": 4.967700826904229e-07, |
|
"logits/chosen": -0.1560676395893097, |
|
"logits/rejected": 0.6105406880378723, |
|
"logps/chosen": -416.2538146972656, |
|
"logps/rejected": -463.2472229003906, |
|
"loss": 3.7876, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -1.2516638040542603, |
|
"rewards/margins": 0.49105915427207947, |
|
"rewards/rejected": -1.7427231073379517, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.15698587127158556, |
|
"grad_norm": 125.21681673589694, |
|
"learning_rate": 4.951404179843962e-07, |
|
"logits/chosen": 2.0407581329345703, |
|
"logits/rejected": 2.8481547832489014, |
|
"logps/chosen": -510.521484375, |
|
"logps/rejected": -534.6341552734375, |
|
"loss": 3.898, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -2.226250648498535, |
|
"rewards/margins": 0.6501102447509766, |
|
"rewards/rejected": -2.876361131668091, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.1674515960230246, |
|
"grad_norm": 66.88313091855639, |
|
"learning_rate": 4.931828996974498e-07, |
|
"logits/chosen": 2.163175106048584, |
|
"logits/rejected": 3.5420451164245605, |
|
"logps/chosen": -585.4688720703125, |
|
"logps/rejected": -635.2697143554688, |
|
"loss": 3.9393, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -2.89656138420105, |
|
"rewards/margins": 0.8964195251464844, |
|
"rewards/rejected": -3.792980909347534, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.17791732077446362, |
|
"grad_norm": 188.98325062900707, |
|
"learning_rate": 4.909001458367866e-07, |
|
"logits/chosen": 0.49319368600845337, |
|
"logits/rejected": 1.3766599893569946, |
|
"logps/chosen": -599.5331420898438, |
|
"logps/rejected": -654.1383056640625, |
|
"loss": 3.9922, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -3.210639476776123, |
|
"rewards/margins": 0.922272801399231, |
|
"rewards/rejected": -4.132911682128906, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.18838304552590268, |
|
"grad_norm": 320.6202106283321, |
|
"learning_rate": 4.882952093833627e-07, |
|
"logits/chosen": 0.6820823550224304, |
|
"logits/rejected": 1.588409185409546, |
|
"logps/chosen": -1040.5491943359375, |
|
"logps/rejected": -1233.1207275390625, |
|
"loss": 3.36, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -8.006011009216309, |
|
"rewards/margins": 1.8573882579803467, |
|
"rewards/rejected": -9.86340045928955, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.1988487702773417, |
|
"grad_norm": 157.79546381015746, |
|
"learning_rate": 4.853715742087946e-07, |
|
"logits/chosen": 3.3087031841278076, |
|
"logits/rejected": 4.11985445022583, |
|
"logps/chosen": -1690.8167724609375, |
|
"logps/rejected": -1890.6634521484375, |
|
"loss": 2.6799, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -14.548372268676758, |
|
"rewards/margins": 1.8595011234283447, |
|
"rewards/rejected": -16.407875061035156, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.20931449502878074, |
|
"grad_norm": 178.97245767319544, |
|
"learning_rate": 4.821331504159906e-07, |
|
"logits/chosen": 0.3337511122226715, |
|
"logits/rejected": 1.9961885213851929, |
|
"logps/chosen": -1578.712158203125, |
|
"logps/rejected": -1801.65625, |
|
"loss": 2.6464, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -12.680551528930664, |
|
"rewards/margins": 2.764849901199341, |
|
"rewards/rejected": -15.445402145385742, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.20931449502878074, |
|
"eval_logits/chosen": -0.35622134804725647, |
|
"eval_logits/rejected": 0.6981890797615051, |
|
"eval_logps/chosen": -1244.43603515625, |
|
"eval_logps/rejected": -1423.3580322265625, |
|
"eval_loss": 2.606262683868408, |
|
"eval_rewards/accuracies": 0.625, |
|
"eval_rewards/chosen": -9.624680519104004, |
|
"eval_rewards/margins": 2.0068302154541016, |
|
"eval_rewards/rejected": -11.631510734558105, |
|
"eval_runtime": 177.3795, |
|
"eval_samples_per_second": 11.275, |
|
"eval_steps_per_second": 0.355, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.21978021978021978, |
|
"grad_norm": 221.39959720400535, |
|
"learning_rate": 4.785842691097342e-07, |
|
"logits/chosen": 0.43124809861183167, |
|
"logits/rejected": 1.6196168661117554, |
|
"logps/chosen": -1394.329345703125, |
|
"logps/rejected": -1612.8701171875, |
|
"loss": 2.2192, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -11.115188598632812, |
|
"rewards/margins": 2.29093337059021, |
|
"rewards/rejected": -13.406122207641602, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.2302459445316588, |
|
"grad_norm": 107.97254065213261, |
|
"learning_rate": 4.7472967660421603e-07, |
|
"logits/chosen": 0.5400440096855164, |
|
"logits/rejected": 1.9760030508041382, |
|
"logps/chosen": -1507.001220703125, |
|
"logps/rejected": -1713.616455078125, |
|
"loss": 2.018, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -12.197932243347168, |
|
"rewards/margins": 2.5764663219451904, |
|
"rewards/rejected": -14.774396896362305, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.24071166928309787, |
|
"grad_norm": 217.88193736039008, |
|
"learning_rate": 4.705745280752585e-07, |
|
"logits/chosen": 1.4225207567214966, |
|
"logits/rejected": 2.4756038188934326, |
|
"logps/chosen": -1726.320068359375, |
|
"logps/rejected": -2005.7041015625, |
|
"loss": 1.9719, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -14.375930786132812, |
|
"rewards/margins": 2.995753526687622, |
|
"rewards/rejected": -17.37168312072754, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.25117739403453687, |
|
"grad_norm": 109.77258728949327, |
|
"learning_rate": 4.6612438066572555e-07, |
|
"logits/chosen": 2.2113587856292725, |
|
"logits/rejected": 3.125591993331909, |
|
"logps/chosen": -1894.770751953125, |
|
"logps/rejected": -2110.86376953125, |
|
"loss": 1.9847, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -16.27196502685547, |
|
"rewards/margins": 2.294943332672119, |
|
"rewards/rejected": -18.56690788269043, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.2616431187859759, |
|
"grad_norm": 276.53415343052893, |
|
"learning_rate": 4.6138518605333664e-07, |
|
"logits/chosen": 1.203977108001709, |
|
"logits/rejected": 1.9225616455078125, |
|
"logps/chosen": -1561.0047607421875, |
|
"logps/rejected": -1763.075439453125, |
|
"loss": 2.257, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -12.832204818725586, |
|
"rewards/margins": 2.222775936126709, |
|
"rewards/rejected": -15.05497932434082, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.272108843537415, |
|
"grad_norm": 159.14963627253198, |
|
"learning_rate": 4.5636328249082514e-07, |
|
"logits/chosen": 1.134037733078003, |
|
"logits/rejected": 2.1568219661712646, |
|
"logps/chosen": -1608.8623046875, |
|
"logps/rejected": -1763.599853515625, |
|
"loss": 2.2606, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -13.198956489562988, |
|
"rewards/margins": 1.6145280599594116, |
|
"rewards/rejected": -14.813486099243164, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.282574568288854, |
|
"grad_norm": 199.45417630865836, |
|
"learning_rate": 4.510653863290871e-07, |
|
"logits/chosen": 0.3547247350215912, |
|
"logits/rejected": 1.2751286029815674, |
|
"logps/chosen": -1781.0726318359375, |
|
"logps/rejected": -2089.05615234375, |
|
"loss": 1.7211, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -14.814226150512695, |
|
"rewards/margins": 3.540767192840576, |
|
"rewards/rejected": -18.354991912841797, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.29304029304029305, |
|
"grad_norm": 162.5497817330968, |
|
"learning_rate": 4.4549858303465737e-07, |
|
"logits/chosen": 0.21130748093128204, |
|
"logits/rejected": 1.2269564867019653, |
|
"logps/chosen": -1743.0787353515625, |
|
"logps/rejected": -2033.669921875, |
|
"loss": 1.9445, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -14.468345642089844, |
|
"rewards/margins": 3.0355849266052246, |
|
"rewards/rejected": -17.50392723083496, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.3035060177917321, |
|
"grad_norm": 307.15808847538113, |
|
"learning_rate": 4.396703177135261e-07, |
|
"logits/chosen": 0.7419403791427612, |
|
"logits/rejected": 1.9202260971069336, |
|
"logps/chosen": -1948.1787109375, |
|
"logps/rejected": -2273.5205078125, |
|
"loss": 1.9864, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -16.753948211669922, |
|
"rewards/margins": 3.6111111640930176, |
|
"rewards/rejected": -20.36505699157715, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.3139717425431711, |
|
"grad_norm": 90.00202577382801, |
|
"learning_rate": 4.335883851539693e-07, |
|
"logits/chosen": 0.30849236249923706, |
|
"logits/rejected": 1.1072229146957397, |
|
"logps/chosen": -1431.3275146484375, |
|
"logps/rejected": -1653.4029541015625, |
|
"loss": 1.9069, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -11.623054504394531, |
|
"rewards/margins": 2.2337088584899902, |
|
"rewards/rejected": -13.856762886047363, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.3139717425431711, |
|
"eval_logits/chosen": 0.45899611711502075, |
|
"eval_logits/rejected": 1.5569082498550415, |
|
"eval_logps/chosen": -1266.6490478515625, |
|
"eval_logps/rejected": -1452.7674560546875, |
|
"eval_loss": 2.262396812438965, |
|
"eval_rewards/accuracies": 0.6329365372657776, |
|
"eval_rewards/chosen": -9.846811294555664, |
|
"eval_rewards/margins": 2.0787949562072754, |
|
"eval_rewards/rejected": -11.925606727600098, |
|
"eval_runtime": 176.5188, |
|
"eval_samples_per_second": 11.33, |
|
"eval_steps_per_second": 0.357, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.32443746729461015, |
|
"grad_norm": 177.8700388917398, |
|
"learning_rate": 4.272609194017105e-07, |
|
"logits/chosen": 0.647371768951416, |
|
"logits/rejected": 2.9104599952697754, |
|
"logps/chosen": -1395.496826171875, |
|
"logps/rejected": -1711.9573974609375, |
|
"loss": 2.3095, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -11.117349624633789, |
|
"rewards/margins": 3.674748182296753, |
|
"rewards/rejected": -14.792098999023438, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.3349031920460492, |
|
"grad_norm": 180.92515200199898, |
|
"learning_rate": 4.2069638288135547e-07, |
|
"logits/chosen": 0.9543863534927368, |
|
"logits/rejected": 1.7447538375854492, |
|
"logps/chosen": -1926.299560546875, |
|
"logps/rejected": -2217.88037109375, |
|
"loss": 2.1724, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -16.73282814025879, |
|
"rewards/margins": 2.939984083175659, |
|
"rewards/rejected": -19.672813415527344, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.3453689167974882, |
|
"grad_norm": 145.6894284610869, |
|
"learning_rate": 4.139035550786494e-07, |
|
"logits/chosen": -0.039321091026067734, |
|
"logits/rejected": 0.5018073320388794, |
|
"logps/chosen": -1734.091796875, |
|
"logps/rejected": -1908.339111328125, |
|
"loss": 1.716, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -14.77747917175293, |
|
"rewards/margins": 1.9848415851593018, |
|
"rewards/rejected": -16.76232147216797, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.35583464154892724, |
|
"grad_norm": 183.78050890033984, |
|
"learning_rate": 4.0689152079869306e-07, |
|
"logits/chosen": -0.5724295377731323, |
|
"logits/rejected": 0.023262571543455124, |
|
"logps/chosen": -1660.732177734375, |
|
"logps/rejected": -1876.1025390625, |
|
"loss": 1.8439, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -13.912447929382324, |
|
"rewards/margins": 2.488671064376831, |
|
"rewards/rejected": -16.401119232177734, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.3663003663003663, |
|
"grad_norm": 149.28700648360655, |
|
"learning_rate": 3.99669658015821e-07, |
|
"logits/chosen": 0.006322336383163929, |
|
"logits/rejected": 0.6332755088806152, |
|
"logps/chosen": -1966.5765380859375, |
|
"logps/rejected": -2201.843505859375, |
|
"loss": 1.6671, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -16.753414154052734, |
|
"rewards/margins": 2.7041499614715576, |
|
"rewards/rejected": -19.457565307617188, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.37676609105180536, |
|
"grad_norm": 237.65668361495474, |
|
"learning_rate": 3.92247625331392e-07, |
|
"logits/chosen": -0.21500203013420105, |
|
"logits/rejected": 0.6255682110786438, |
|
"logps/chosen": -1989.7509765625, |
|
"logps/rejected": -2207.83349609375, |
|
"loss": 1.6927, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -17.038667678833008, |
|
"rewards/margins": 2.4120330810546875, |
|
"rewards/rejected": -19.450698852539062, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.3872318158032444, |
|
"grad_norm": 152.55773033990448, |
|
"learning_rate": 3.846353490562664e-07, |
|
"logits/chosen": -0.39199286699295044, |
|
"logits/rejected": -0.043508779257535934, |
|
"logps/chosen": -1889.5286865234375, |
|
"logps/rejected": -2139.589111328125, |
|
"loss": 1.7098, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -16.262523651123047, |
|
"rewards/margins": 2.7768733501434326, |
|
"rewards/rejected": -19.039398193359375, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.3976975405546834, |
|
"grad_norm": 239.86422108427834, |
|
"learning_rate": 3.768430099352445e-07, |
|
"logits/chosen": -0.5338395833969116, |
|
"logits/rejected": -0.10323655605316162, |
|
"logps/chosen": -1830.7080078125, |
|
"logps/rejected": -2104.773681640625, |
|
"loss": 1.786, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -15.76060962677002, |
|
"rewards/margins": 2.8237688541412354, |
|
"rewards/rejected": -18.58437728881836, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.40816326530612246, |
|
"grad_norm": 137.89263121746114, |
|
"learning_rate": 3.6888102953122304e-07, |
|
"logits/chosen": -0.3421451449394226, |
|
"logits/rejected": 0.2877078056335449, |
|
"logps/chosen": -1774.384765625, |
|
"logps/rejected": -2007.7366943359375, |
|
"loss": 1.9274, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -14.92773151397705, |
|
"rewards/margins": 2.465951442718506, |
|
"rewards/rejected": -17.393680572509766, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.4186289900575615, |
|
"grad_norm": 164.86784545063486, |
|
"learning_rate": 3.607600562872785e-07, |
|
"logits/chosen": -0.7335325479507446, |
|
"logits/rejected": -0.33919858932495117, |
|
"logps/chosen": -1733.375244140625, |
|
"logps/rejected": -1963.0279541015625, |
|
"loss": 1.6642, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -14.63383960723877, |
|
"rewards/margins": 2.319460391998291, |
|
"rewards/rejected": -16.95330047607422, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.4186289900575615, |
|
"eval_logits/chosen": -0.7751028537750244, |
|
"eval_logits/rejected": -0.08748837560415268, |
|
"eval_logps/chosen": -1731.152587890625, |
|
"eval_logps/rejected": -2045.1492919921875, |
|
"eval_loss": 1.6421091556549072, |
|
"eval_rewards/accuracies": 0.625, |
|
"eval_rewards/chosen": -14.491846084594727, |
|
"eval_rewards/margins": 3.3575782775878906, |
|
"eval_rewards/rejected": -17.849422454833984, |
|
"eval_runtime": 176.0651, |
|
"eval_samples_per_second": 11.359, |
|
"eval_steps_per_second": 0.358, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.4290947148090005, |
|
"grad_norm": 128.91689311765836, |
|
"learning_rate": 3.5249095128531856e-07, |
|
"logits/chosen": -0.10633065551519394, |
|
"logits/rejected": 0.350477933883667, |
|
"logps/chosen": -1862.1099853515625, |
|
"logps/rejected": -2067.15673828125, |
|
"loss": 1.7556, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -15.903741836547852, |
|
"rewards/margins": 2.2993245124816895, |
|
"rewards/rejected": -18.203065872192383, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.43956043956043955, |
|
"grad_norm": 187.2282869549343, |
|
"learning_rate": 3.4408477372034736e-07, |
|
"logits/chosen": -0.2209610939025879, |
|
"logits/rejected": 0.7663095593452454, |
|
"logps/chosen": -1825.959228515625, |
|
"logps/rejected": -2182.580810546875, |
|
"loss": 1.8542, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -15.437875747680664, |
|
"rewards/margins": 3.867755174636841, |
|
"rewards/rejected": -19.30562973022461, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.4500261643118786, |
|
"grad_norm": 150.13979068919696, |
|
"learning_rate": 3.3555276610977276e-07, |
|
"logits/chosen": -1.128701090812683, |
|
"logits/rejected": -0.5558885335922241, |
|
"logps/chosen": -1832.6103515625, |
|
"logps/rejected": -2176.197265625, |
|
"loss": 1.5079, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -15.788568496704102, |
|
"rewards/margins": 3.47161602973938, |
|
"rewards/rejected": -19.26018714904785, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.4604918890633176, |
|
"grad_norm": 163.41066719667168, |
|
"learning_rate": 3.269063392575352e-07, |
|
"logits/chosen": -0.6949409246444702, |
|
"logits/rejected": -0.05746125057339668, |
|
"logps/chosen": -1597.5341796875, |
|
"logps/rejected": -1821.0198974609375, |
|
"loss": 1.4868, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -13.023595809936523, |
|
"rewards/margins": 2.632272481918335, |
|
"rewards/rejected": -15.655868530273438, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.47095761381475665, |
|
"grad_norm": 133.46596474594617, |
|
"learning_rate": 3.1815705699316964e-07, |
|
"logits/chosen": -0.4808398187160492, |
|
"logits/rejected": 0.3264926075935364, |
|
"logps/chosen": -1599.6370849609375, |
|
"logps/rejected": -1936.6884765625, |
|
"loss": 1.5413, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -13.172491073608398, |
|
"rewards/margins": 3.4112179279327393, |
|
"rewards/rejected": -16.583707809448242, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.48142333856619574, |
|
"grad_norm": 155.84007478164062, |
|
"learning_rate": 3.0931662070620794e-07, |
|
"logits/chosen": -0.719369113445282, |
|
"logits/rejected": -0.06152462959289551, |
|
"logps/chosen": -1643.2447509765625, |
|
"logps/rejected": -1872.9976806640625, |
|
"loss": 1.7906, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -13.85786247253418, |
|
"rewards/margins": 2.4219117164611816, |
|
"rewards/rejected": -16.279773712158203, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.49188906331763477, |
|
"grad_norm": 203.3322056694353, |
|
"learning_rate": 3.003968536966078e-07, |
|
"logits/chosen": -0.4609583020210266, |
|
"logits/rejected": -0.09374441206455231, |
|
"logps/chosen": -1654.1614990234375, |
|
"logps/rejected": -1845.5618896484375, |
|
"loss": 1.7718, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -13.703729629516602, |
|
"rewards/margins": 2.2525086402893066, |
|
"rewards/rejected": -15.956239700317383, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.5023547880690737, |
|
"grad_norm": 156.4799546194198, |
|
"learning_rate": 2.9140968536213693e-07, |
|
"logits/chosen": -0.2353781908750534, |
|
"logits/rejected": 0.5946909785270691, |
|
"logps/chosen": -1859.3265380859375, |
|
"logps/rejected": -2325.88134765625, |
|
"loss": 1.3829, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -15.921140670776367, |
|
"rewards/margins": 4.824706077575684, |
|
"rewards/rejected": -20.745845794677734, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.5128205128205128, |
|
"grad_norm": 160.19325879757844, |
|
"learning_rate": 2.823671352438608e-07, |
|
"logits/chosen": -0.9654836654663086, |
|
"logits/rejected": -0.002035105135291815, |
|
"logps/chosen": -1637.873291015625, |
|
"logps/rejected": -2143.010986328125, |
|
"loss": 1.6206, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -13.259417533874512, |
|
"rewards/margins": 5.606515407562256, |
|
"rewards/rejected": -18.86593246459961, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.5232862375719518, |
|
"grad_norm": 221.83952267135834, |
|
"learning_rate": 2.73281296951072e-07, |
|
"logits/chosen": -0.6597784161567688, |
|
"logits/rejected": -0.14649493992328644, |
|
"logps/chosen": -1530.5738525390625, |
|
"logps/rejected": -1781.8070068359375, |
|
"loss": 1.6328, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -12.7192964553833, |
|
"rewards/margins": 2.8244967460632324, |
|
"rewards/rejected": -15.543792724609375, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.5232862375719518, |
|
"eval_logits/chosen": -0.6590258479118347, |
|
"eval_logits/rejected": -0.091790109872818, |
|
"eval_logps/chosen": -1589.3370361328125, |
|
"eval_logps/rejected": -1890.562255859375, |
|
"eval_loss": 1.5119922161102295, |
|
"eval_rewards/accuracies": 0.6388888955116272, |
|
"eval_rewards/chosen": -13.073691368103027, |
|
"eval_rewards/margins": 3.229863166809082, |
|
"eval_rewards/rejected": -16.303556442260742, |
|
"eval_runtime": 177.8158, |
|
"eval_samples_per_second": 11.248, |
|
"eval_steps_per_second": 0.354, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.533751962323391, |
|
"grad_norm": 187.4336485549293, |
|
"learning_rate": 2.641643219871597e-07, |
|
"logits/chosen": -0.5598984360694885, |
|
"logits/rejected": -0.2727218270301819, |
|
"logps/chosen": -1694.568359375, |
|
"logps/rejected": -2086.98193359375, |
|
"loss": 1.4069, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -14.106300354003906, |
|
"rewards/margins": 4.125433921813965, |
|
"rewards/rejected": -18.231733322143555, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.54421768707483, |
|
"grad_norm": 180.24950333654212, |
|
"learning_rate": 2.550284034980507e-07, |
|
"logits/chosen": -0.652435302734375, |
|
"logits/rejected": -0.25857192277908325, |
|
"logps/chosen": -1941.6849365234375, |
|
"logps/rejected": -2231.46337890625, |
|
"loss": 1.6022, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -16.69613265991211, |
|
"rewards/margins": 3.1190426349639893, |
|
"rewards/rejected": -19.815174102783203, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.554683411826269, |
|
"grad_norm": 147.71519410172087, |
|
"learning_rate": 2.4588575996495794e-07, |
|
"logits/chosen": -0.6198351979255676, |
|
"logits/rejected": -0.19036616384983063, |
|
"logps/chosen": -1601.6470947265625, |
|
"logps/rejected": -1820.4556884765625, |
|
"loss": 1.5136, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -13.329002380371094, |
|
"rewards/margins": 2.3253164291381836, |
|
"rewards/rejected": -15.654316902160645, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.565149136577708, |
|
"grad_norm": 146.6770433780799, |
|
"learning_rate": 2.367486188632446e-07, |
|
"logits/chosen": -0.7303057909011841, |
|
"logits/rejected": 0.15564236044883728, |
|
"logps/chosen": -1670.916015625, |
|
"logps/rejected": -2011.5406494140625, |
|
"loss": 1.5458, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -13.805659294128418, |
|
"rewards/margins": 3.703829288482666, |
|
"rewards/rejected": -17.509489059448242, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.5756148613291471, |
|
"grad_norm": 206.94359776232758, |
|
"learning_rate": 2.276292003092593e-07, |
|
"logits/chosen": -0.22513580322265625, |
|
"logits/rejected": 0.4895138740539551, |
|
"logps/chosen": -1914.7532958984375, |
|
"logps/rejected": -2300.30322265625, |
|
"loss": 1.6801, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -16.434810638427734, |
|
"rewards/margins": 4.275403022766113, |
|
"rewards/rejected": -20.71021270751953, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.5860805860805861, |
|
"grad_norm": 175.41735239090949, |
|
"learning_rate": 2.185397007170141e-07, |
|
"logits/chosen": -0.1453290730714798, |
|
"logits/rejected": 0.3121495842933655, |
|
"logps/chosen": -1876.300537109375, |
|
"logps/rejected": -2229.38134765625, |
|
"loss": 1.3878, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -15.934832572937012, |
|
"rewards/margins": 3.8099570274353027, |
|
"rewards/rejected": -19.744789123535156, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.5965463108320251, |
|
"grad_norm": 142.79294258337345, |
|
"learning_rate": 2.094922764865619e-07, |
|
"logits/chosen": -0.276650995016098, |
|
"logits/rejected": 0.13945253193378448, |
|
"logps/chosen": -1827.0634765625, |
|
"logps/rejected": -2034.280517578125, |
|
"loss": 1.4902, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -15.475687980651855, |
|
"rewards/margins": 2.2050392627716064, |
|
"rewards/rejected": -17.680728912353516, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.6070120355834642, |
|
"grad_norm": 245.80968468908674, |
|
"learning_rate": 2.0049902774588797e-07, |
|
"logits/chosen": -0.011815989390015602, |
|
"logits/rejected": 0.42436084151268005, |
|
"logps/chosen": -1794.5543212890625, |
|
"logps/rejected": -2061.93310546875, |
|
"loss": 1.4461, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -15.412150382995605, |
|
"rewards/margins": 3.078895330429077, |
|
"rewards/rejected": -18.491044998168945, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.6174777603349032, |
|
"grad_norm": 175.38280547329734, |
|
"learning_rate": 1.9157198216806238e-07, |
|
"logits/chosen": -0.3044319152832031, |
|
"logits/rejected": 0.3406422734260559, |
|
"logps/chosen": -1649.8509521484375, |
|
"logps/rejected": -2006.366455078125, |
|
"loss": 1.5446, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -13.71898365020752, |
|
"rewards/margins": 3.657163143157959, |
|
"rewards/rejected": -17.376148223876953, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.6279434850863422, |
|
"grad_norm": 203.04339818262545, |
|
"learning_rate": 1.8272307888529274e-07, |
|
"logits/chosen": 0.16477735340595245, |
|
"logits/rejected": 0.6171606183052063, |
|
"logps/chosen": -1870.41015625, |
|
"logps/rejected": -2165.638427734375, |
|
"loss": 1.6032, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -15.878863334655762, |
|
"rewards/margins": 3.1766743659973145, |
|
"rewards/rejected": -19.055538177490234, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.6279434850863422, |
|
"eval_logits/chosen": 0.01903720200061798, |
|
"eval_logits/rejected": 0.6402472853660583, |
|
"eval_logps/chosen": -2015.7071533203125, |
|
"eval_logps/rejected": -2402.58447265625, |
|
"eval_loss": 1.4751698970794678, |
|
"eval_rewards/accuracies": 0.6230158805847168, |
|
"eval_rewards/chosen": -17.33738899230957, |
|
"eval_rewards/margins": 4.086385250091553, |
|
"eval_rewards/rejected": -21.42377471923828, |
|
"eval_runtime": 176.4506, |
|
"eval_samples_per_second": 11.335, |
|
"eval_steps_per_second": 0.357, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.6384092098377813, |
|
"grad_norm": 184.64896406440843, |
|
"learning_rate": 1.7396415252139288e-07, |
|
"logits/chosen": 0.0034784465096890926, |
|
"logits/rejected": 0.6044633388519287, |
|
"logps/chosen": -2050.113037109375, |
|
"logps/rejected": -2622.564453125, |
|
"loss": 1.5229, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -17.44542694091797, |
|
"rewards/margins": 6.333140850067139, |
|
"rewards/rejected": -23.778566360473633, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.6488749345892203, |
|
"grad_norm": 150.92780625161797, |
|
"learning_rate": 1.6530691736402316e-07, |
|
"logits/chosen": -0.05873150750994682, |
|
"logits/rejected": 0.2572210133075714, |
|
"logps/chosen": -1822.690185546875, |
|
"logps/rejected": -2140.002685546875, |
|
"loss": 1.3047, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -15.516670227050781, |
|
"rewards/margins": 3.519291400909424, |
|
"rewards/rejected": -19.035961151123047, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.6593406593406593, |
|
"grad_norm": 158.62413320623054, |
|
"learning_rate": 1.5676295169786864e-07, |
|
"logits/chosen": -0.5535549521446228, |
|
"logits/rejected": -0.16974008083343506, |
|
"logps/chosen": -1799.411376953125, |
|
"logps/rejected": -2184.095458984375, |
|
"loss": 1.4004, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -15.247261047363281, |
|
"rewards/margins": 4.233900547027588, |
|
"rewards/rejected": -19.481159210205078, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.6698063840920984, |
|
"grad_norm": 174.63990723873954, |
|
"learning_rate": 1.483436823197092e-07, |
|
"logits/chosen": -0.49727511405944824, |
|
"logits/rejected": -0.09024439752101898, |
|
"logps/chosen": -1910.181396484375, |
|
"logps/rejected": -2272.175537109375, |
|
"loss": 1.2582, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -16.431602478027344, |
|
"rewards/margins": 4.003415107727051, |
|
"rewards/rejected": -20.43501853942871, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.6802721088435374, |
|
"grad_norm": 212.30897956956616, |
|
"learning_rate": 1.4006036925609243e-07, |
|
"logits/chosen": -0.5441917777061462, |
|
"logits/rejected": -0.3759006857872009, |
|
"logps/chosen": -1762.1038818359375, |
|
"logps/rejected": -1993.853515625, |
|
"loss": 1.3183, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -14.94421100616455, |
|
"rewards/margins": 2.2772457599639893, |
|
"rewards/rejected": -17.221454620361328, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.6907378335949764, |
|
"grad_norm": 122.40725726992933, |
|
"learning_rate": 1.319240907040458e-07, |
|
"logits/chosen": -0.578727126121521, |
|
"logits/rejected": -0.15290720760822296, |
|
"logps/chosen": -1786.3648681640625, |
|
"logps/rejected": -2103.92919921875, |
|
"loss": 1.5482, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -15.042015075683594, |
|
"rewards/margins": 3.422727584838867, |
|
"rewards/rejected": -18.46474266052246, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.7012035583464155, |
|
"grad_norm": 273.40146184819037, |
|
"learning_rate": 1.239457282149695e-07, |
|
"logits/chosen": -0.6542818546295166, |
|
"logits/rejected": -0.6405806541442871, |
|
"logps/chosen": -1718.8697509765625, |
|
"logps/rejected": -2025.167236328125, |
|
"loss": 1.1528, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -14.303213119506836, |
|
"rewards/margins": 3.1175124645233154, |
|
"rewards/rejected": -17.420726776123047, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.7116692830978545, |
|
"grad_norm": 246.28508875936285, |
|
"learning_rate": 1.1613595214152711e-07, |
|
"logits/chosen": -0.6755629777908325, |
|
"logits/rejected": -0.26193898916244507, |
|
"logps/chosen": -1862.4000244140625, |
|
"logps/rejected": -2191.969482421875, |
|
"loss": 1.3671, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -15.507433891296387, |
|
"rewards/margins": 3.5333023071289062, |
|
"rewards/rejected": -19.04073715209961, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.7221350078492935, |
|
"grad_norm": 216.14843384209277, |
|
"learning_rate": 1.0850520736699362e-07, |
|
"logits/chosen": -0.6002136468887329, |
|
"logits/rejected": 0.03606845811009407, |
|
"logps/chosen": -1838.6982421875, |
|
"logps/rejected": -2214.07470703125, |
|
"loss": 1.3895, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -15.680435180664062, |
|
"rewards/margins": 4.133326053619385, |
|
"rewards/rejected": -19.813762664794922, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.7326007326007326, |
|
"grad_norm": 162.01079027631573, |
|
"learning_rate": 1.0106369933615042e-07, |
|
"logits/chosen": -0.7846351861953735, |
|
"logits/rejected": -0.5915166735649109, |
|
"logps/chosen": -1752.784423828125, |
|
"logps/rejected": -2021.7802734375, |
|
"loss": 1.5039, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -14.986889839172363, |
|
"rewards/margins": 2.845428943634033, |
|
"rewards/rejected": -17.832317352294922, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.7326007326007326, |
|
"eval_logits/chosen": -0.8898468017578125, |
|
"eval_logits/rejected": -0.4967605769634247, |
|
"eval_logps/chosen": -1694.96240234375, |
|
"eval_logps/rejected": -2016.4490966796875, |
|
"eval_loss": 1.3852962255477905, |
|
"eval_rewards/accuracies": 0.6527777910232544, |
|
"eval_rewards/chosen": -14.129942893981934, |
|
"eval_rewards/margins": 3.432478666305542, |
|
"eval_rewards/rejected": -17.562421798706055, |
|
"eval_runtime": 176.0679, |
|
"eval_samples_per_second": 11.359, |
|
"eval_steps_per_second": 0.358, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.7430664573521716, |
|
"grad_norm": 177.45761000957364, |
|
"learning_rate": 9.382138040640714e-08, |
|
"logits/chosen": -1.012629747390747, |
|
"logits/rejected": -0.6268833875656128, |
|
"logps/chosen": -1776.499755859375, |
|
"logps/rejected": -2017.539794921875, |
|
"loss": 1.5264, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -15.180249214172363, |
|
"rewards/margins": 2.565770387649536, |
|
"rewards/rejected": -17.74601936340332, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.7535321821036107, |
|
"grad_norm": 140.94359920373847, |
|
"learning_rate": 8.678793653740632e-08, |
|
"logits/chosen": -0.9271895289421082, |
|
"logits/rejected": -0.47789469361305237, |
|
"logps/chosen": -1664.4437255859375, |
|
"logps/rejected": -1977.908447265625, |
|
"loss": 1.3295, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -13.8070650100708, |
|
"rewards/margins": 3.4972636699676514, |
|
"rewards/rejected": -17.3043270111084, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.7639979068550498, |
|
"grad_norm": 190.75937551525504, |
|
"learning_rate": 7.997277433690983e-08, |
|
"logits/chosen": -0.8303499221801758, |
|
"logits/rejected": -0.2948521077632904, |
|
"logps/chosen": -1813.2340087890625, |
|
"logps/rejected": -2049.240234375, |
|
"loss": 1.4631, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -15.478715896606445, |
|
"rewards/margins": 2.6438088417053223, |
|
"rewards/rejected": -18.12252426147461, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.7744636316064888, |
|
"grad_norm": 164.74206538760382, |
|
"learning_rate": 7.338500848029602e-08, |
|
"logits/chosen": -0.6835179924964905, |
|
"logits/rejected": -0.42263850569725037, |
|
"logps/chosen": -1808.6490478515625, |
|
"logps/rejected": -2096.81396484375, |
|
"loss": 1.2242, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -15.557826042175293, |
|
"rewards/margins": 3.0879101753234863, |
|
"rewards/rejected": -18.645736694335938, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.7849293563579278, |
|
"grad_norm": 135.0757551116068, |
|
"learning_rate": 6.70334495204884e-08, |
|
"logits/chosen": -0.5583680868148804, |
|
"logits/rejected": -0.36530551314353943, |
|
"logps/chosen": -1854.912353515625, |
|
"logps/rejected": -2177.262451171875, |
|
"loss": 1.3344, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -16.08046531677246, |
|
"rewards/margins": 3.2556281089782715, |
|
"rewards/rejected": -19.33609390258789, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.7953950811093669, |
|
"grad_norm": 158.01784405358154, |
|
"learning_rate": 6.092659210462231e-08, |
|
"logits/chosen": -0.653573215007782, |
|
"logits/rejected": -0.4876467287540436, |
|
"logps/chosen": -1903.880615234375, |
|
"logps/rejected": -2182.48291015625, |
|
"loss": 1.4038, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -16.625337600708008, |
|
"rewards/margins": 2.7693800926208496, |
|
"rewards/rejected": -19.394718170166016, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.8058608058608059, |
|
"grad_norm": 169.97964049682443, |
|
"learning_rate": 5.507260361320737e-08, |
|
"logits/chosen": -1.0366981029510498, |
|
"logits/rejected": -0.9037246704101562, |
|
"logps/chosen": -1879.755126953125, |
|
"logps/rejected": -2001.697265625, |
|
"loss": 1.286, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -15.764042854309082, |
|
"rewards/margins": 1.033178687095642, |
|
"rewards/rejected": -16.797222137451172, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.8163265306122449, |
|
"grad_norm": 162.02338031146334, |
|
"learning_rate": 4.947931323697982e-08, |
|
"logits/chosen": -1.0304605960845947, |
|
"logits/rejected": -0.9400796890258789, |
|
"logps/chosen": -1669.2073974609375, |
|
"logps/rejected": -2004.525390625, |
|
"loss": 1.5927, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -13.502462387084961, |
|
"rewards/margins": 3.8671538829803467, |
|
"rewards/rejected": -17.369617462158203, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.826792255363684, |
|
"grad_norm": 140.7368428333841, |
|
"learning_rate": 4.415420150605398e-08, |
|
"logits/chosen": -1.0811887979507446, |
|
"logits/rejected": -0.5253428220748901, |
|
"logps/chosen": -1726.182373046875, |
|
"logps/rejected": -2063.27099609375, |
|
"loss": 1.4648, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -14.476069450378418, |
|
"rewards/margins": 3.7039875984191895, |
|
"rewards/rejected": -18.180057525634766, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.837257980115123, |
|
"grad_norm": 202.82775780509928, |
|
"learning_rate": 3.9104390285376374e-08, |
|
"logits/chosen": -0.835501492023468, |
|
"logits/rejected": -0.5900505781173706, |
|
"logps/chosen": -1749.853759765625, |
|
"logps/rejected": -1951.329345703125, |
|
"loss": 1.3527, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -14.83747673034668, |
|
"rewards/margins": 2.0425891876220703, |
|
"rewards/rejected": -16.88006591796875, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.837257980115123, |
|
"eval_logits/chosen": -1.0374784469604492, |
|
"eval_logits/rejected": -0.6750361919403076, |
|
"eval_logps/chosen": -1672.130615234375, |
|
"eval_logps/rejected": -1986.035888671875, |
|
"eval_loss": 1.366306185722351, |
|
"eval_rewards/accuracies": 0.6448412537574768, |
|
"eval_rewards/chosen": -13.901623725891113, |
|
"eval_rewards/margins": 3.3566668033599854, |
|
"eval_rewards/rejected": -17.25829315185547, |
|
"eval_runtime": 176.0547, |
|
"eval_samples_per_second": 11.36, |
|
"eval_steps_per_second": 0.358, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.847723704866562, |
|
"grad_norm": 218.80895490878117, |
|
"learning_rate": 3.433663324986208e-08, |
|
"logits/chosen": -1.2597501277923584, |
|
"logits/rejected": -0.7243804931640625, |
|
"logps/chosen": -1665.3489990234375, |
|
"logps/rejected": -2045.541259765625, |
|
"loss": 1.4186, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -13.741106986999512, |
|
"rewards/margins": 4.249786853790283, |
|
"rewards/rejected": -17.990894317626953, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.858189429618001, |
|
"grad_norm": 175.67069527310957, |
|
"learning_rate": 2.9857306851953897e-08, |
|
"logits/chosen": -1.075448751449585, |
|
"logits/rejected": -0.8459098935127258, |
|
"logps/chosen": -1705.802734375, |
|
"logps/rejected": -1971.207275390625, |
|
"loss": 1.1819, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -14.010282516479492, |
|
"rewards/margins": 2.8707687854766846, |
|
"rewards/rejected": -16.881052017211914, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.8686551543694401, |
|
"grad_norm": 150.14969837730865, |
|
"learning_rate": 2.567240179368185e-08, |
|
"logits/chosen": -0.8211779594421387, |
|
"logits/rejected": -0.672277569770813, |
|
"logps/chosen": -1724.1959228515625, |
|
"logps/rejected": -1975.289306640625, |
|
"loss": 1.3771, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -14.66050910949707, |
|
"rewards/margins": 2.5944151878356934, |
|
"rewards/rejected": -17.25492286682129, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.8791208791208791, |
|
"grad_norm": 143.51050018041488, |
|
"learning_rate": 2.1787515014630357e-08, |
|
"logits/chosen": -0.9592329263687134, |
|
"logits/rejected": -0.6304475665092468, |
|
"logps/chosen": -1664.050048828125, |
|
"logps/rejected": -2090.85107421875, |
|
"loss": 1.1841, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -13.817761421203613, |
|
"rewards/margins": 4.430028915405273, |
|
"rewards/rejected": -18.247791290283203, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.8895866038723181, |
|
"grad_norm": 160.09590738302992, |
|
"learning_rate": 1.820784220652766e-08, |
|
"logits/chosen": -0.8976573944091797, |
|
"logits/rejected": -0.619744598865509, |
|
"logps/chosen": -1732.6185302734375, |
|
"logps/rejected": -2009.6126708984375, |
|
"loss": 1.3946, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -14.51783561706543, |
|
"rewards/margins": 3.2437214851379395, |
|
"rewards/rejected": -17.761554718017578, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.9000523286237572, |
|
"grad_norm": 140.45079725700174, |
|
"learning_rate": 1.4938170864468636e-08, |
|
"logits/chosen": -1.2183126211166382, |
|
"logits/rejected": -0.7451462149620056, |
|
"logps/chosen": -1663.8861083984375, |
|
"logps/rejected": -2030.5501708984375, |
|
"loss": 1.403, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -13.755941390991211, |
|
"rewards/margins": 3.9360270500183105, |
|
"rewards/rejected": -17.691970825195312, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.9105180533751962, |
|
"grad_norm": 177.87764974909854, |
|
"learning_rate": 1.1982873884064465e-08, |
|
"logits/chosen": -1.142114281654358, |
|
"logits/rejected": -0.8570957183837891, |
|
"logps/chosen": -1702.1165771484375, |
|
"logps/rejected": -2053.07568359375, |
|
"loss": 1.364, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -14.30346393585205, |
|
"rewards/margins": 3.762028932571411, |
|
"rewards/rejected": -18.06549072265625, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.9209837781266352, |
|
"grad_norm": 138.3301348415624, |
|
"learning_rate": 9.345903713082304e-09, |
|
"logits/chosen": -1.0760080814361572, |
|
"logits/rejected": -0.866096019744873, |
|
"logps/chosen": -1735.3382568359375, |
|
"logps/rejected": -2023.660888671875, |
|
"loss": 1.355, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -14.312044143676758, |
|
"rewards/margins": 2.978205680847168, |
|
"rewards/rejected": -17.290246963500977, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.9314495028780743, |
|
"grad_norm": 179.16273994251034, |
|
"learning_rate": 7.030787065396865e-09, |
|
"logits/chosen": -1.0234577655792236, |
|
"logits/rejected": -0.9720734357833862, |
|
"logps/chosen": -1736.5269775390625, |
|
"logps/rejected": -2083.37939453125, |
|
"loss": 1.4332, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -14.488537788391113, |
|
"rewards/margins": 3.543290376663208, |
|
"rewards/rejected": -18.031827926635742, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.9419152276295133, |
|
"grad_norm": 163.4835379161221, |
|
"learning_rate": 5.04062020432286e-09, |
|
"logits/chosen": -0.8189510107040405, |
|
"logits/rejected": -0.8584410548210144, |
|
"logps/chosen": -1706.8818359375, |
|
"logps/rejected": -1968.8441162109375, |
|
"loss": 1.5137, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -14.264904975891113, |
|
"rewards/margins": 2.609503984451294, |
|
"rewards/rejected": -16.874408721923828, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.9419152276295133, |
|
"eval_logits/chosen": -1.001752495765686, |
|
"eval_logits/rejected": -0.673967182636261, |
|
"eval_logps/chosen": -1735.9151611328125, |
|
"eval_logps/rejected": -2073.3388671875, |
|
"eval_loss": 1.3373700380325317, |
|
"eval_rewards/accuracies": 0.6408730149269104, |
|
"eval_rewards/chosen": -14.539473533630371, |
|
"eval_rewards/margins": 3.5918467044830322, |
|
"eval_rewards/rejected": -18.13132095336914, |
|
"eval_runtime": 176.3334, |
|
"eval_samples_per_second": 11.342, |
|
"eval_steps_per_second": 0.357, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.9523809523809523, |
|
"grad_norm": 190.32378571700949, |
|
"learning_rate": 3.3780648016376866e-09, |
|
"logits/chosen": -0.9321626424789429, |
|
"logits/rejected": -0.5902298092842102, |
|
"logps/chosen": -1696.779296875, |
|
"logps/rejected": -1922.1607666015625, |
|
"loss": 1.4578, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -14.7335786819458, |
|
"rewards/margins": 2.3592441082000732, |
|
"rewards/rejected": -17.092823028564453, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.9628466771323915, |
|
"grad_norm": 183.98567167006505, |
|
"learning_rate": 2.0453443778310766e-09, |
|
"logits/chosen": -1.0600922107696533, |
|
"logits/rejected": -0.7931039929389954, |
|
"logps/chosen": -1763.392822265625, |
|
"logps/rejected": -2107.805419921875, |
|
"loss": 1.3202, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -14.767558097839355, |
|
"rewards/margins": 3.794466495513916, |
|
"rewards/rejected": -18.562023162841797, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.9733124018838305, |
|
"grad_norm": 181.56437725274117, |
|
"learning_rate": 1.0442413283435758e-09, |
|
"logits/chosen": -1.1890182495117188, |
|
"logits/rejected": -0.5295430421829224, |
|
"logps/chosen": -1729.0921630859375, |
|
"logps/rejected": -1985.2783203125, |
|
"loss": 1.5669, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -14.14258098602295, |
|
"rewards/margins": 3.068037748336792, |
|
"rewards/rejected": -17.210617065429688, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.9837781266352695, |
|
"grad_norm": 173.28786175289625, |
|
"learning_rate": 3.760945397705828e-10, |
|
"logits/chosen": -0.856045126914978, |
|
"logits/rejected": -0.7398639917373657, |
|
"logps/chosen": -1713.3883056640625, |
|
"logps/rejected": -2039.740966796875, |
|
"loss": 1.266, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -14.502069473266602, |
|
"rewards/margins": 3.2981293201446533, |
|
"rewards/rejected": -17.800199508666992, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.9942438513867086, |
|
"grad_norm": 188.65879146663107, |
|
"learning_rate": 4.17975992204056e-11, |
|
"logits/chosen": -1.168084740638733, |
|
"logits/rejected": -0.8855546116828918, |
|
"logps/chosen": -1736.102783203125, |
|
"logps/rejected": -1955.3255615234375, |
|
"loss": 1.4604, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -14.33259391784668, |
|
"rewards/margins": 2.370856761932373, |
|
"rewards/rejected": -16.70345115661621, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.9994767137624281, |
|
"step": 955, |
|
"total_flos": 0.0, |
|
"train_loss": 2.1165736393154604, |
|
"train_runtime": 18133.1885, |
|
"train_samples_per_second": 3.371, |
|
"train_steps_per_second": 0.053 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 955, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|