zephyr-7b-dpo-full-ensemble-mixv3 / trainer_state.json
bigheiniuJ's picture
Model save
6b46ae5 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9994767137624281,
"eval_steps": 100,
"global_step": 955,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0010465724751439038,
"grad_norm": 9.51283368668585,
"learning_rate": 5.208333333333333e-09,
"logits/chosen": -3.21875,
"logits/rejected": -3.21875,
"logps/chosen": -250.0,
"logps/rejected": -364.0,
"loss": 0.6914,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.010465724751439037,
"grad_norm": 9.570547962527824,
"learning_rate": 5.208333333333333e-08,
"logits/chosen": -3.25,
"logits/rejected": -3.28125,
"logps/chosen": -298.0,
"logps/rejected": -278.0,
"loss": 0.6918,
"rewards/accuracies": 0.1875,
"rewards/chosen": -0.0004787445068359375,
"rewards/margins": -0.000843048095703125,
"rewards/rejected": 0.0003643035888671875,
"step": 10
},
{
"epoch": 0.020931449502878074,
"grad_norm": 9.308254903442457,
"learning_rate": 1.0416666666666667e-07,
"logits/chosen": -3.078125,
"logits/rejected": -3.171875,
"logps/chosen": -286.0,
"logps/rejected": -294.0,
"loss": 0.6916,
"rewards/accuracies": 0.25,
"rewards/chosen": -0.0016326904296875,
"rewards/margins": 0.00031280517578125,
"rewards/rejected": -0.00194549560546875,
"step": 20
},
{
"epoch": 0.03139717425431711,
"grad_norm": 8.758887635948838,
"learning_rate": 1.5624999999999999e-07,
"logits/chosen": -3.21875,
"logits/rejected": -3.25,
"logps/chosen": -294.0,
"logps/rejected": -264.0,
"loss": 0.6899,
"rewards/accuracies": 0.44999998807907104,
"rewards/chosen": -0.00150299072265625,
"rewards/margins": 0.002899169921875,
"rewards/rejected": -0.00439453125,
"step": 30
},
{
"epoch": 0.04186289900575615,
"grad_norm": 9.50313648843035,
"learning_rate": 2.0833333333333333e-07,
"logits/chosen": -3.1875,
"logits/rejected": -3.125,
"logps/chosen": -288.0,
"logps/rejected": -302.0,
"loss": 0.684,
"rewards/accuracies": 0.65625,
"rewards/chosen": -0.00579833984375,
"rewards/margins": 0.0184326171875,
"rewards/rejected": -0.0242919921875,
"step": 40
},
{
"epoch": 0.052328623757195186,
"grad_norm": 8.682508739901571,
"learning_rate": 2.604166666666667e-07,
"logits/chosen": -3.25,
"logits/rejected": -3.21875,
"logps/chosen": -320.0,
"logps/rejected": -302.0,
"loss": 0.6706,
"rewards/accuracies": 0.7875000238418579,
"rewards/chosen": -0.025146484375,
"rewards/margins": 0.0498046875,
"rewards/rejected": -0.07470703125,
"step": 50
},
{
"epoch": 0.06279434850863422,
"grad_norm": 11.261511786558849,
"learning_rate": 3.1249999999999997e-07,
"logits/chosen": -3.15625,
"logits/rejected": -3.171875,
"logps/chosen": -296.0,
"logps/rejected": -316.0,
"loss": 0.6462,
"rewards/accuracies": 0.762499988079071,
"rewards/chosen": -0.047119140625,
"rewards/margins": 0.09326171875,
"rewards/rejected": -0.140625,
"step": 60
},
{
"epoch": 0.07326007326007326,
"grad_norm": 12.224158124661995,
"learning_rate": 3.645833333333333e-07,
"logits/chosen": -3.109375,
"logits/rejected": -3.109375,
"logps/chosen": -310.0,
"logps/rejected": -304.0,
"loss": 0.5955,
"rewards/accuracies": 0.856249988079071,
"rewards/chosen": -0.0294189453125,
"rewards/margins": 0.26171875,
"rewards/rejected": -0.29296875,
"step": 70
},
{
"epoch": 0.0837257980115123,
"grad_norm": 24.94058449952297,
"learning_rate": 4.1666666666666667e-07,
"logits/chosen": -3.046875,
"logits/rejected": -3.078125,
"logps/chosen": -326.0,
"logps/rejected": -348.0,
"loss": 0.5178,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.10498046875,
"rewards/margins": 0.57421875,
"rewards/rejected": -0.6796875,
"step": 80
},
{
"epoch": 0.09419152276295134,
"grad_norm": 25.450706815664475,
"learning_rate": 4.6874999999999996e-07,
"logits/chosen": -3.0625,
"logits/rejected": -3.046875,
"logps/chosen": -328.0,
"logps/rejected": -356.0,
"loss": 0.4379,
"rewards/accuracies": 0.856249988079071,
"rewards/chosen": -0.283203125,
"rewards/margins": 0.7109375,
"rewards/rejected": -0.9921875,
"step": 90
},
{
"epoch": 0.10465724751439037,
"grad_norm": 27.228385560373255,
"learning_rate": 4.999732492681437e-07,
"logits/chosen": -2.953125,
"logits/rejected": -3.0,
"logps/chosen": -350.0,
"logps/rejected": -498.0,
"loss": 0.3176,
"rewards/accuracies": 0.918749988079071,
"rewards/chosen": -0.498046875,
"rewards/margins": 1.5078125,
"rewards/rejected": -2.0,
"step": 100
},
{
"epoch": 0.1151229722658294,
"grad_norm": 45.59466990825789,
"learning_rate": 4.996723692767926e-07,
"logits/chosen": -2.96875,
"logits/rejected": -2.96875,
"logps/chosen": -344.0,
"logps/rejected": -540.0,
"loss": 0.2646,
"rewards/accuracies": 0.9125000238418579,
"rewards/chosen": -0.64453125,
"rewards/margins": 1.9609375,
"rewards/rejected": -2.609375,
"step": 110
},
{
"epoch": 0.12558869701726844,
"grad_norm": 27.15030514765242,
"learning_rate": 4.990375746213598e-07,
"logits/chosen": -2.875,
"logits/rejected": -2.84375,
"logps/chosen": -358.0,
"logps/rejected": -604.0,
"loss": 0.2701,
"rewards/accuracies": 0.887499988079071,
"rewards/chosen": -0.87890625,
"rewards/margins": 2.1875,
"rewards/rejected": -3.0625,
"step": 120
},
{
"epoch": 0.1360544217687075,
"grad_norm": 27.883434269977027,
"learning_rate": 4.980697142834314e-07,
"logits/chosen": -2.796875,
"logits/rejected": -2.765625,
"logps/chosen": -390.0,
"logps/rejected": -588.0,
"loss": 0.2224,
"rewards/accuracies": 0.9312499761581421,
"rewards/chosen": -0.9296875,
"rewards/margins": 2.296875,
"rewards/rejected": -3.21875,
"step": 130
},
{
"epoch": 0.14652014652014653,
"grad_norm": 29.52150895374023,
"learning_rate": 4.967700826904229e-07,
"logits/chosen": -2.875,
"logits/rejected": -2.796875,
"logps/chosen": -368.0,
"logps/rejected": -672.0,
"loss": 0.2261,
"rewards/accuracies": 0.925000011920929,
"rewards/chosen": -0.95703125,
"rewards/margins": 2.921875,
"rewards/rejected": -3.875,
"step": 140
},
{
"epoch": 0.15698587127158556,
"grad_norm": 45.4969620914659,
"learning_rate": 4.951404179843962e-07,
"logits/chosen": -2.75,
"logits/rejected": -2.625,
"logps/chosen": -438.0,
"logps/rejected": -688.0,
"loss": 0.2018,
"rewards/accuracies": 0.90625,
"rewards/chosen": -1.2734375,
"rewards/margins": 3.015625,
"rewards/rejected": -4.28125,
"step": 150
},
{
"epoch": 0.1674515960230246,
"grad_norm": 32.79270513780942,
"learning_rate": 4.931828996974498e-07,
"logits/chosen": -2.75,
"logits/rejected": -2.65625,
"logps/chosen": -440.0,
"logps/rejected": -700.0,
"loss": 0.2027,
"rewards/accuracies": 0.925000011920929,
"rewards/chosen": -1.1640625,
"rewards/margins": 3.015625,
"rewards/rejected": -4.1875,
"step": 160
},
{
"epoch": 0.17791732077446362,
"grad_norm": 30.949462462321232,
"learning_rate": 4.909001458367866e-07,
"logits/chosen": -2.671875,
"logits/rejected": -2.609375,
"logps/chosen": -372.0,
"logps/rejected": -724.0,
"loss": 0.1997,
"rewards/accuracies": 0.9375,
"rewards/chosen": -1.0703125,
"rewards/margins": 3.171875,
"rewards/rejected": -4.25,
"step": 170
},
{
"epoch": 0.18838304552590268,
"grad_norm": 26.340401250142335,
"learning_rate": 4.882952093833627e-07,
"logits/chosen": -2.703125,
"logits/rejected": -2.578125,
"logps/chosen": -382.0,
"logps/rejected": -716.0,
"loss": 0.1926,
"rewards/accuracies": 0.9312499761581421,
"rewards/chosen": -1.078125,
"rewards/margins": 3.359375,
"rewards/rejected": -4.4375,
"step": 180
},
{
"epoch": 0.1988487702773417,
"grad_norm": 18.968097604368335,
"learning_rate": 4.853715742087946e-07,
"logits/chosen": -2.53125,
"logits/rejected": -2.484375,
"logps/chosen": -428.0,
"logps/rejected": -752.0,
"loss": 0.1612,
"rewards/accuracies": 0.918749988079071,
"rewards/chosen": -1.5,
"rewards/margins": 3.53125,
"rewards/rejected": -5.03125,
"step": 190
},
{
"epoch": 0.20931449502878074,
"grad_norm": 20.782646308534286,
"learning_rate": 4.821331504159906e-07,
"logits/chosen": -2.6875,
"logits/rejected": -2.484375,
"logps/chosen": -462.0,
"logps/rejected": -784.0,
"loss": 0.1607,
"rewards/accuracies": 0.9312499761581421,
"rewards/chosen": -1.5703125,
"rewards/margins": 3.5,
"rewards/rejected": -5.09375,
"step": 200
},
{
"epoch": 0.21978021978021978,
"grad_norm": 23.863300215477395,
"learning_rate": 4.785842691097342e-07,
"logits/chosen": -2.609375,
"logits/rejected": -2.484375,
"logps/chosen": -410.0,
"logps/rejected": -820.0,
"loss": 0.1596,
"rewards/accuracies": 0.9312499761581421,
"rewards/chosen": -1.21875,
"rewards/margins": 4.25,
"rewards/rejected": -5.46875,
"step": 210
},
{
"epoch": 0.2302459445316588,
"grad_norm": 40.35606043050439,
"learning_rate": 4.7472967660421603e-07,
"logits/chosen": -2.59375,
"logits/rejected": -2.46875,
"logps/chosen": -494.0,
"logps/rejected": -932.0,
"loss": 0.1764,
"rewards/accuracies": 0.925000011920929,
"rewards/chosen": -1.9609375,
"rewards/margins": 4.46875,
"rewards/rejected": -6.4375,
"step": 220
},
{
"epoch": 0.24071166928309787,
"grad_norm": 25.181446808734336,
"learning_rate": 4.705745280752585e-07,
"logits/chosen": -2.6875,
"logits/rejected": -2.515625,
"logps/chosen": -442.0,
"logps/rejected": -832.0,
"loss": 0.1588,
"rewards/accuracies": 0.9375,
"rewards/chosen": -1.46875,
"rewards/margins": 4.0625,
"rewards/rejected": -5.53125,
"step": 230
},
{
"epoch": 0.25117739403453687,
"grad_norm": 32.49967985504463,
"learning_rate": 4.6612438066572555e-07,
"logits/chosen": -2.4375,
"logits/rejected": -2.1875,
"logps/chosen": -480.0,
"logps/rejected": -940.0,
"loss": 0.16,
"rewards/accuracies": 0.925000011920929,
"rewards/chosen": -1.6875,
"rewards/margins": 4.78125,
"rewards/rejected": -6.4375,
"step": 240
},
{
"epoch": 0.2616431187859759,
"grad_norm": 31.230972977495156,
"learning_rate": 4.6138518605333664e-07,
"logits/chosen": -2.5625,
"logits/rejected": -2.421875,
"logps/chosen": -426.0,
"logps/rejected": -800.0,
"loss": 0.1537,
"rewards/accuracies": 0.9437500238418579,
"rewards/chosen": -1.421875,
"rewards/margins": 4.0625,
"rewards/rejected": -5.46875,
"step": 250
},
{
"epoch": 0.272108843537415,
"grad_norm": 29.71689511008149,
"learning_rate": 4.5636328249082514e-07,
"logits/chosen": -2.453125,
"logits/rejected": -2.296875,
"logps/chosen": -474.0,
"logps/rejected": -948.0,
"loss": 0.1394,
"rewards/accuracies": 0.9375,
"rewards/chosen": -1.828125,
"rewards/margins": 4.625,
"rewards/rejected": -6.4375,
"step": 260
},
{
"epoch": 0.282574568288854,
"grad_norm": 28.295583463414996,
"learning_rate": 4.510653863290871e-07,
"logits/chosen": -2.40625,
"logits/rejected": -2.078125,
"logps/chosen": -438.0,
"logps/rejected": -896.0,
"loss": 0.1481,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": -1.390625,
"rewards/margins": 4.84375,
"rewards/rejected": -6.25,
"step": 270
},
{
"epoch": 0.29304029304029305,
"grad_norm": 28.398306620399453,
"learning_rate": 4.4549858303465737e-07,
"logits/chosen": -2.5,
"logits/rejected": -2.3125,
"logps/chosen": -484.0,
"logps/rejected": -944.0,
"loss": 0.1392,
"rewards/accuracies": 0.887499988079071,
"rewards/chosen": -1.9453125,
"rewards/margins": 4.5625,
"rewards/rejected": -6.5,
"step": 280
},
{
"epoch": 0.3035060177917321,
"grad_norm": 40.00756374405055,
"learning_rate": 4.396703177135261e-07,
"logits/chosen": -2.28125,
"logits/rejected": -2.109375,
"logps/chosen": -544.0,
"logps/rejected": -1072.0,
"loss": 0.1322,
"rewards/accuracies": 0.9437500238418579,
"rewards/chosen": -2.734375,
"rewards/margins": 5.53125,
"rewards/rejected": -8.25,
"step": 290
},
{
"epoch": 0.3139717425431711,
"grad_norm": 37.30671040470767,
"learning_rate": 4.335883851539693e-07,
"logits/chosen": -2.34375,
"logits/rejected": -2.015625,
"logps/chosen": -680.0,
"logps/rejected": -1200.0,
"loss": 0.1326,
"rewards/accuracies": 0.925000011920929,
"rewards/chosen": -3.65625,
"rewards/margins": 5.46875,
"rewards/rejected": -9.125,
"step": 300
},
{
"epoch": 0.32443746729461015,
"grad_norm": 34.70455152314579,
"learning_rate": 4.272609194017105e-07,
"logits/chosen": -2.21875,
"logits/rejected": -1.7265625,
"logps/chosen": -832.0,
"logps/rejected": -1368.0,
"loss": 0.1371,
"rewards/accuracies": 0.9312499761581421,
"rewards/chosen": -5.25,
"rewards/margins": 5.40625,
"rewards/rejected": -10.625,
"step": 310
},
{
"epoch": 0.3349031920460492,
"grad_norm": 24.846518493181428,
"learning_rate": 4.2069638288135547e-07,
"logits/chosen": -2.34375,
"logits/rejected": -2.109375,
"logps/chosen": -696.0,
"logps/rejected": -1264.0,
"loss": 0.1284,
"rewards/accuracies": 0.9312499761581421,
"rewards/chosen": -3.9375,
"rewards/margins": 5.71875,
"rewards/rejected": -9.625,
"step": 320
},
{
"epoch": 0.3453689167974882,
"grad_norm": 45.84342601245407,
"learning_rate": 4.139035550786494e-07,
"logits/chosen": -2.484375,
"logits/rejected": -2.375,
"logps/chosen": -684.0,
"logps/rejected": -1200.0,
"loss": 0.1228,
"rewards/accuracies": 0.918749988079071,
"rewards/chosen": -4.03125,
"rewards/margins": 5.0625,
"rewards/rejected": -9.125,
"step": 330
},
{
"epoch": 0.35583464154892724,
"grad_norm": 49.94021964049473,
"learning_rate": 4.0689152079869306e-07,
"logits/chosen": -2.21875,
"logits/rejected": -1.8515625,
"logps/chosen": -740.0,
"logps/rejected": -1456.0,
"loss": 0.1026,
"rewards/accuracies": 0.9750000238418579,
"rewards/chosen": -4.8125,
"rewards/margins": 7.34375,
"rewards/rejected": -12.1875,
"step": 340
},
{
"epoch": 0.3663003663003663,
"grad_norm": 39.72539996550781,
"learning_rate": 3.99669658015821e-07,
"logits/chosen": -2.25,
"logits/rejected": -1.9609375,
"logps/chosen": -696.0,
"logps/rejected": -1328.0,
"loss": 0.1202,
"rewards/accuracies": 0.9437500238418579,
"rewards/chosen": -4.375,
"rewards/margins": 6.59375,
"rewards/rejected": -10.9375,
"step": 350
},
{
"epoch": 0.37676609105180536,
"grad_norm": 45.84961303802077,
"learning_rate": 3.92247625331392e-07,
"logits/chosen": -2.21875,
"logits/rejected": -2.0,
"logps/chosen": -764.0,
"logps/rejected": -1384.0,
"loss": 0.1073,
"rewards/accuracies": 0.9437500238418579,
"rewards/chosen": -4.71875,
"rewards/margins": 6.40625,
"rewards/rejected": -11.125,
"step": 360
},
{
"epoch": 0.3872318158032444,
"grad_norm": 20.54553148683269,
"learning_rate": 3.846353490562664e-07,
"logits/chosen": -2.3125,
"logits/rejected": -1.796875,
"logps/chosen": -680.0,
"logps/rejected": -1360.0,
"loss": 0.1308,
"rewards/accuracies": 0.96875,
"rewards/chosen": -4.1875,
"rewards/margins": 6.90625,
"rewards/rejected": -11.125,
"step": 370
},
{
"epoch": 0.3976975405546834,
"grad_norm": 30.114773714834385,
"learning_rate": 3.768430099352445e-07,
"logits/chosen": -2.4375,
"logits/rejected": -2.234375,
"logps/chosen": -668.0,
"logps/rejected": -1200.0,
"loss": 0.1285,
"rewards/accuracies": 0.918749988079071,
"rewards/chosen": -3.609375,
"rewards/margins": 5.53125,
"rewards/rejected": -9.125,
"step": 380
},
{
"epoch": 0.40816326530612246,
"grad_norm": 22.860559479058303,
"learning_rate": 3.6888102953122304e-07,
"logits/chosen": -2.421875,
"logits/rejected": -2.15625,
"logps/chosen": -652.0,
"logps/rejected": -1272.0,
"loss": 0.1018,
"rewards/accuracies": 0.981249988079071,
"rewards/chosen": -3.609375,
"rewards/margins": 6.5,
"rewards/rejected": -10.125,
"step": 390
},
{
"epoch": 0.4186289900575615,
"grad_norm": 19.494795072649943,
"learning_rate": 3.607600562872785e-07,
"logits/chosen": -2.4375,
"logits/rejected": -2.03125,
"logps/chosen": -892.0,
"logps/rejected": -1640.0,
"loss": 0.091,
"rewards/accuracies": 0.9624999761581421,
"rewards/chosen": -5.46875,
"rewards/margins": 7.90625,
"rewards/rejected": -13.375,
"step": 400
},
{
"epoch": 0.4290947148090005,
"grad_norm": 34.802523467101764,
"learning_rate": 3.5249095128531856e-07,
"logits/chosen": -2.453125,
"logits/rejected": -2.203125,
"logps/chosen": -812.0,
"logps/rejected": -1392.0,
"loss": 0.1044,
"rewards/accuracies": 0.9437500238418579,
"rewards/chosen": -4.75,
"rewards/margins": 6.4375,
"rewards/rejected": -11.1875,
"step": 410
},
{
"epoch": 0.43956043956043955,
"grad_norm": 16.064797665253533,
"learning_rate": 3.4408477372034736e-07,
"logits/chosen": -2.421875,
"logits/rejected": -2.03125,
"logps/chosen": -856.0,
"logps/rejected": -1536.0,
"loss": 0.1125,
"rewards/accuracies": 0.9624999761581421,
"rewards/chosen": -5.40625,
"rewards/margins": 7.375,
"rewards/rejected": -12.75,
"step": 420
},
{
"epoch": 0.4500261643118786,
"grad_norm": 26.48155910496492,
"learning_rate": 3.3555276610977276e-07,
"logits/chosen": -2.40625,
"logits/rejected": -2.171875,
"logps/chosen": -776.0,
"logps/rejected": -1456.0,
"loss": 0.0939,
"rewards/accuracies": 0.96875,
"rewards/chosen": -4.90625,
"rewards/margins": 6.78125,
"rewards/rejected": -11.6875,
"step": 430
},
{
"epoch": 0.4604918890633176,
"grad_norm": 20.80477653317148,
"learning_rate": 3.269063392575352e-07,
"logits/chosen": -2.4375,
"logits/rejected": -2.1875,
"logps/chosen": -748.0,
"logps/rejected": -1472.0,
"loss": 0.1046,
"rewards/accuracies": 0.9750000238418579,
"rewards/chosen": -4.75,
"rewards/margins": 7.21875,
"rewards/rejected": -12.0,
"step": 440
},
{
"epoch": 0.47095761381475665,
"grad_norm": 29.44200512203946,
"learning_rate": 3.1815705699316964e-07,
"logits/chosen": -2.359375,
"logits/rejected": -2.0625,
"logps/chosen": -792.0,
"logps/rejected": -1496.0,
"loss": 0.0963,
"rewards/accuracies": 0.981249988079071,
"rewards/chosen": -4.90625,
"rewards/margins": 7.125,
"rewards/rejected": -12.0625,
"step": 450
},
{
"epoch": 0.48142333856619574,
"grad_norm": 26.271372385880735,
"learning_rate": 3.0931662070620794e-07,
"logits/chosen": -2.203125,
"logits/rejected": -2.0,
"logps/chosen": -908.0,
"logps/rejected": -1608.0,
"loss": 0.0973,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": -6.34375,
"rewards/margins": 7.1875,
"rewards/rejected": -13.5625,
"step": 460
},
{
"epoch": 0.49188906331763477,
"grad_norm": 31.154091413440536,
"learning_rate": 3.003968536966078e-07,
"logits/chosen": -2.390625,
"logits/rejected": -2.109375,
"logps/chosen": -868.0,
"logps/rejected": -1600.0,
"loss": 0.0846,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": -5.53125,
"rewards/margins": 7.40625,
"rewards/rejected": -12.9375,
"step": 470
},
{
"epoch": 0.5023547880690737,
"grad_norm": 25.37606130345961,
"learning_rate": 2.9140968536213693e-07,
"logits/chosen": -2.5,
"logits/rejected": -2.125,
"logps/chosen": -812.0,
"logps/rejected": -1504.0,
"loss": 0.0895,
"rewards/accuracies": 0.9624999761581421,
"rewards/chosen": -5.0625,
"rewards/margins": 7.25,
"rewards/rejected": -12.3125,
"step": 480
},
{
"epoch": 0.5128205128205128,
"grad_norm": 27.895788314669506,
"learning_rate": 2.823671352438608e-07,
"logits/chosen": -2.515625,
"logits/rejected": -2.265625,
"logps/chosen": -820.0,
"logps/rejected": -1488.0,
"loss": 0.0997,
"rewards/accuracies": 0.9437500238418579,
"rewards/chosen": -5.34375,
"rewards/margins": 6.8125,
"rewards/rejected": -12.1875,
"step": 490
},
{
"epoch": 0.5232862375719518,
"grad_norm": 31.69949419586544,
"learning_rate": 2.73281296951072e-07,
"logits/chosen": -2.453125,
"logits/rejected": -2.03125,
"logps/chosen": -952.0,
"logps/rejected": -1672.0,
"loss": 0.0871,
"rewards/accuracies": 0.9624999761581421,
"rewards/chosen": -6.5625,
"rewards/margins": 7.59375,
"rewards/rejected": -14.125,
"step": 500
},
{
"epoch": 0.533751962323391,
"grad_norm": 18.91909774862979,
"learning_rate": 2.641643219871597e-07,
"logits/chosen": -2.375,
"logits/rejected": -2.046875,
"logps/chosen": -824.0,
"logps/rejected": -1472.0,
"loss": 0.0982,
"rewards/accuracies": 0.9750000238418579,
"rewards/chosen": -5.1875,
"rewards/margins": 6.84375,
"rewards/rejected": -12.0,
"step": 510
},
{
"epoch": 0.54421768707483,
"grad_norm": 31.301196837566955,
"learning_rate": 2.550284034980507e-07,
"logits/chosen": -2.46875,
"logits/rejected": -2.3125,
"logps/chosen": -736.0,
"logps/rejected": -1368.0,
"loss": 0.1158,
"rewards/accuracies": 0.96875,
"rewards/chosen": -4.34375,
"rewards/margins": 6.5,
"rewards/rejected": -10.8125,
"step": 520
},
{
"epoch": 0.554683411826269,
"grad_norm": 35.72913678676417,
"learning_rate": 2.4588575996495794e-07,
"logits/chosen": -2.640625,
"logits/rejected": -2.34375,
"logps/chosen": -684.0,
"logps/rejected": -1336.0,
"loss": 0.1081,
"rewards/accuracies": 0.9375,
"rewards/chosen": -4.125,
"rewards/margins": 6.78125,
"rewards/rejected": -10.9375,
"step": 530
},
{
"epoch": 0.565149136577708,
"grad_norm": 22.499329779979547,
"learning_rate": 2.367486188632446e-07,
"logits/chosen": -2.609375,
"logits/rejected": -2.34375,
"logps/chosen": -668.0,
"logps/rejected": -1336.0,
"loss": 0.0892,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": -3.84375,
"rewards/margins": 6.65625,
"rewards/rejected": -10.5,
"step": 540
},
{
"epoch": 0.5756148613291471,
"grad_norm": 18.90148104971076,
"learning_rate": 2.276292003092593e-07,
"logits/chosen": -2.484375,
"logits/rejected": -2.203125,
"logps/chosen": -816.0,
"logps/rejected": -1520.0,
"loss": 0.0834,
"rewards/accuracies": 0.9624999761581421,
"rewards/chosen": -5.28125,
"rewards/margins": 7.28125,
"rewards/rejected": -12.5625,
"step": 550
},
{
"epoch": 0.5860805860805861,
"grad_norm": 34.885386057496255,
"learning_rate": 2.185397007170141e-07,
"logits/chosen": -2.421875,
"logits/rejected": -2.0625,
"logps/chosen": -952.0,
"logps/rejected": -1680.0,
"loss": 0.0802,
"rewards/accuracies": 0.981249988079071,
"rewards/chosen": -6.71875,
"rewards/margins": 7.75,
"rewards/rejected": -14.4375,
"step": 560
},
{
"epoch": 0.5965463108320251,
"grad_norm": 31.26740690349814,
"learning_rate": 2.094922764865619e-07,
"logits/chosen": -2.34375,
"logits/rejected": -2.21875,
"logps/chosen": -892.0,
"logps/rejected": -1640.0,
"loss": 0.073,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": -5.9375,
"rewards/margins": 7.71875,
"rewards/rejected": -13.625,
"step": 570
},
{
"epoch": 0.6070120355834642,
"grad_norm": 26.503557168812993,
"learning_rate": 2.0049902774588797e-07,
"logits/chosen": -2.4375,
"logits/rejected": -2.125,
"logps/chosen": -892.0,
"logps/rejected": -1680.0,
"loss": 0.0786,
"rewards/accuracies": 0.9624999761581421,
"rewards/chosen": -6.1875,
"rewards/margins": 8.0,
"rewards/rejected": -14.25,
"step": 580
},
{
"epoch": 0.6174777603349032,
"grad_norm": 26.760637852696387,
"learning_rate": 1.9157198216806238e-07,
"logits/chosen": -2.453125,
"logits/rejected": -2.09375,
"logps/chosen": -888.0,
"logps/rejected": -1736.0,
"loss": 0.0796,
"rewards/accuracies": 0.9750000238418579,
"rewards/chosen": -6.125,
"rewards/margins": 8.4375,
"rewards/rejected": -14.5625,
"step": 590
},
{
"epoch": 0.6279434850863422,
"grad_norm": 33.575414387335336,
"learning_rate": 1.8272307888529274e-07,
"logits/chosen": -2.625,
"logits/rejected": -2.40625,
"logps/chosen": -908.0,
"logps/rejected": -1776.0,
"loss": 0.0901,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": -5.9375,
"rewards/margins": 8.5,
"rewards/rejected": -14.4375,
"step": 600
},
{
"epoch": 0.6384092098377813,
"grad_norm": 30.280173464201145,
"learning_rate": 1.7396415252139288e-07,
"logits/chosen": -2.484375,
"logits/rejected": -2.171875,
"logps/chosen": -820.0,
"logps/rejected": -1584.0,
"loss": 0.0848,
"rewards/accuracies": 0.9437500238418579,
"rewards/chosen": -5.28125,
"rewards/margins": 7.78125,
"rewards/rejected": -13.0625,
"step": 610
},
{
"epoch": 0.6488749345892203,
"grad_norm": 31.809087141096686,
"learning_rate": 1.6530691736402316e-07,
"logits/chosen": -2.515625,
"logits/rejected": -2.25,
"logps/chosen": -828.0,
"logps/rejected": -1576.0,
"loss": 0.0699,
"rewards/accuracies": 0.956250011920929,
"rewards/chosen": -5.375,
"rewards/margins": 7.6875,
"rewards/rejected": -13.0625,
"step": 620
},
{
"epoch": 0.6593406593406593,
"grad_norm": 20.492927408780787,
"learning_rate": 1.5676295169786864e-07,
"logits/chosen": -2.515625,
"logits/rejected": -2.203125,
"logps/chosen": -928.0,
"logps/rejected": -1608.0,
"loss": 0.0906,
"rewards/accuracies": 0.9437500238418579,
"rewards/chosen": -6.1875,
"rewards/margins": 7.15625,
"rewards/rejected": -13.375,
"step": 630
},
{
"epoch": 0.6698063840920984,
"grad_norm": 24.302749821696764,
"learning_rate": 1.483436823197092e-07,
"logits/chosen": -2.484375,
"logits/rejected": -2.265625,
"logps/chosen": -916.0,
"logps/rejected": -1680.0,
"loss": 0.0811,
"rewards/accuracies": 0.96875,
"rewards/chosen": -6.375,
"rewards/margins": 7.78125,
"rewards/rejected": -14.1875,
"step": 640
},
{
"epoch": 0.6802721088435374,
"grad_norm": 30.640711717907518,
"learning_rate": 1.4006036925609243e-07,
"logits/chosen": -2.484375,
"logits/rejected": -2.28125,
"logps/chosen": -984.0,
"logps/rejected": -1704.0,
"loss": 0.0736,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": -6.8125,
"rewards/margins": 7.53125,
"rewards/rejected": -14.3125,
"step": 650
},
{
"epoch": 0.6907378335949764,
"grad_norm": 28.697044539390294,
"learning_rate": 1.319240907040458e-07,
"logits/chosen": -2.421875,
"logits/rejected": -2.125,
"logps/chosen": -960.0,
"logps/rejected": -1752.0,
"loss": 0.0737,
"rewards/accuracies": 0.96875,
"rewards/chosen": -6.59375,
"rewards/margins": 7.84375,
"rewards/rejected": -14.4375,
"step": 660
},
{
"epoch": 0.7012035583464155,
"grad_norm": 17.345360984589735,
"learning_rate": 1.2394572821496948e-07,
"logits/chosen": -2.3125,
"logits/rejected": -2.03125,
"logps/chosen": -988.0,
"logps/rejected": -1848.0,
"loss": 0.0695,
"rewards/accuracies": 0.9750000238418579,
"rewards/chosen": -7.15625,
"rewards/margins": 8.625,
"rewards/rejected": -15.75,
"step": 670
},
{
"epoch": 0.7116692830978545,
"grad_norm": 33.23147099676755,
"learning_rate": 1.1613595214152711e-07,
"logits/chosen": -2.375,
"logits/rejected": -2.09375,
"logps/chosen": -1064.0,
"logps/rejected": -1880.0,
"loss": 0.0806,
"rewards/accuracies": 0.956250011920929,
"rewards/chosen": -7.53125,
"rewards/margins": 8.4375,
"rewards/rejected": -15.9375,
"step": 680
},
{
"epoch": 0.7221350078492935,
"grad_norm": 20.63657009936635,
"learning_rate": 1.0850520736699362e-07,
"logits/chosen": -2.515625,
"logits/rejected": -2.265625,
"logps/chosen": -1004.0,
"logps/rejected": -1792.0,
"loss": 0.08,
"rewards/accuracies": 0.9624999761581421,
"rewards/chosen": -6.875,
"rewards/margins": 8.0,
"rewards/rejected": -14.875,
"step": 690
},
{
"epoch": 0.7326007326007326,
"grad_norm": 18.557890534440453,
"learning_rate": 1.0106369933615042e-07,
"logits/chosen": -2.390625,
"logits/rejected": -2.234375,
"logps/chosen": -960.0,
"logps/rejected": -1840.0,
"loss": 0.0614,
"rewards/accuracies": 0.9750000238418579,
"rewards/chosen": -6.5625,
"rewards/margins": 8.75,
"rewards/rejected": -15.25,
"step": 700
},
{
"epoch": 0.7430664573521716,
"grad_norm": 17.0748564532371,
"learning_rate": 9.382138040640714e-08,
"logits/chosen": -2.53125,
"logits/rejected": -2.15625,
"logps/chosen": -960.0,
"logps/rejected": -1888.0,
"loss": 0.0656,
"rewards/accuracies": 0.96875,
"rewards/chosen": -6.71875,
"rewards/margins": 9.125,
"rewards/rejected": -15.875,
"step": 710
},
{
"epoch": 0.7535321821036107,
"grad_norm": 28.894806455872907,
"learning_rate": 8.678793653740632e-08,
"logits/chosen": -2.5,
"logits/rejected": -2.203125,
"logps/chosen": -1040.0,
"logps/rejected": -1840.0,
"loss": 0.0632,
"rewards/accuracies": 0.96875,
"rewards/chosen": -7.0625,
"rewards/margins": 8.4375,
"rewards/rejected": -15.5,
"step": 720
},
{
"epoch": 0.7639979068550498,
"grad_norm": 26.829195320759613,
"learning_rate": 7.997277433690983e-08,
"logits/chosen": -2.34375,
"logits/rejected": -2.15625,
"logps/chosen": -1024.0,
"logps/rejected": -1872.0,
"loss": 0.0744,
"rewards/accuracies": 0.96875,
"rewards/chosen": -7.46875,
"rewards/margins": 8.5625,
"rewards/rejected": -16.0,
"step": 730
},
{
"epoch": 0.7744636316064888,
"grad_norm": 19.48203742338746,
"learning_rate": 7.338500848029602e-08,
"logits/chosen": -2.375,
"logits/rejected": -2.0625,
"logps/chosen": -1056.0,
"logps/rejected": -1976.0,
"loss": 0.0672,
"rewards/accuracies": 0.9750000238418579,
"rewards/chosen": -7.53125,
"rewards/margins": 9.4375,
"rewards/rejected": -17.0,
"step": 740
},
{
"epoch": 0.7849293563579278,
"grad_norm": 14.527749472798101,
"learning_rate": 6.70334495204884e-08,
"logits/chosen": -2.265625,
"logits/rejected": -1.9453125,
"logps/chosen": -996.0,
"logps/rejected": -1920.0,
"loss": 0.0481,
"rewards/accuracies": 0.96875,
"rewards/chosen": -7.375,
"rewards/margins": 9.375,
"rewards/rejected": -16.75,
"step": 750
},
{
"epoch": 0.7953950811093669,
"grad_norm": 15.609318913343236,
"learning_rate": 6.092659210462231e-08,
"logits/chosen": -2.40625,
"logits/rejected": -1.9921875,
"logps/chosen": -1056.0,
"logps/rejected": -1976.0,
"loss": 0.0532,
"rewards/accuracies": 0.96875,
"rewards/chosen": -7.78125,
"rewards/margins": 9.4375,
"rewards/rejected": -17.25,
"step": 760
},
{
"epoch": 0.8058608058608059,
"grad_norm": 7.809802817809847,
"learning_rate": 5.507260361320737e-08,
"logits/chosen": -2.25,
"logits/rejected": -1.96875,
"logps/chosen": -1056.0,
"logps/rejected": -2064.0,
"loss": 0.0419,
"rewards/accuracies": 0.981249988079071,
"rewards/chosen": -7.6875,
"rewards/margins": 10.25,
"rewards/rejected": -17.875,
"step": 770
},
{
"epoch": 0.8163265306122449,
"grad_norm": 22.462937925254167,
"learning_rate": 4.947931323697982e-08,
"logits/chosen": -2.359375,
"logits/rejected": -2.015625,
"logps/chosen": -1120.0,
"logps/rejected": -2064.0,
"loss": 0.0616,
"rewards/accuracies": 0.9624999761581421,
"rewards/chosen": -8.125,
"rewards/margins": 9.75,
"rewards/rejected": -17.875,
"step": 780
},
{
"epoch": 0.826792255363684,
"grad_norm": 25.68320010476297,
"learning_rate": 4.415420150605398e-08,
"logits/chosen": -2.171875,
"logits/rejected": -1.890625,
"logps/chosen": -1048.0,
"logps/rejected": -2040.0,
"loss": 0.0657,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": -7.8125,
"rewards/margins": 9.8125,
"rewards/rejected": -17.625,
"step": 790
},
{
"epoch": 0.837257980115123,
"grad_norm": 26.750079370530244,
"learning_rate": 3.9104390285376374e-08,
"logits/chosen": -2.296875,
"logits/rejected": -1.9765625,
"logps/chosen": -1040.0,
"logps/rejected": -2016.0,
"loss": 0.0513,
"rewards/accuracies": 0.9750000238418579,
"rewards/chosen": -7.5,
"rewards/margins": 9.6875,
"rewards/rejected": -17.125,
"step": 800
},
{
"epoch": 0.847723704866562,
"grad_norm": 15.408792175860983,
"learning_rate": 3.433663324986208e-08,
"logits/chosen": -2.3125,
"logits/rejected": -2.015625,
"logps/chosen": -1024.0,
"logps/rejected": -1936.0,
"loss": 0.0679,
"rewards/accuracies": 0.96875,
"rewards/chosen": -7.28125,
"rewards/margins": 9.0625,
"rewards/rejected": -16.375,
"step": 810
},
{
"epoch": 0.858189429618001,
"grad_norm": 16.721851816987886,
"learning_rate": 2.9857306851953897e-08,
"logits/chosen": -2.34375,
"logits/rejected": -1.875,
"logps/chosen": -1032.0,
"logps/rejected": -1960.0,
"loss": 0.0581,
"rewards/accuracies": 0.96875,
"rewards/chosen": -7.59375,
"rewards/margins": 9.5,
"rewards/rejected": -17.125,
"step": 820
},
{
"epoch": 0.8686551543694401,
"grad_norm": 25.788818922540266,
"learning_rate": 2.567240179368185e-08,
"logits/chosen": -2.34375,
"logits/rejected": -1.75,
"logps/chosen": -1072.0,
"logps/rejected": -2064.0,
"loss": 0.052,
"rewards/accuracies": 0.96875,
"rewards/chosen": -7.6875,
"rewards/margins": 10.3125,
"rewards/rejected": -18.0,
"step": 830
},
{
"epoch": 0.8791208791208791,
"grad_norm": 14.284086558067754,
"learning_rate": 2.1787515014630357e-08,
"logits/chosen": -2.203125,
"logits/rejected": -1.875,
"logps/chosen": -1040.0,
"logps/rejected": -2128.0,
"loss": 0.0524,
"rewards/accuracies": 0.96875,
"rewards/chosen": -7.75,
"rewards/margins": 10.75,
"rewards/rejected": -18.5,
"step": 840
},
{
"epoch": 0.8895866038723181,
"grad_norm": 30.104620767056684,
"learning_rate": 1.820784220652766e-08,
"logits/chosen": -2.25,
"logits/rejected": -1.9375,
"logps/chosen": -1088.0,
"logps/rejected": -2112.0,
"loss": 0.0408,
"rewards/accuracies": 0.981249988079071,
"rewards/chosen": -7.84375,
"rewards/margins": 10.4375,
"rewards/rejected": -18.25,
"step": 850
},
{
"epoch": 0.9000523286237572,
"grad_norm": 30.017914965835946,
"learning_rate": 1.4938170864468636e-08,
"logits/chosen": -2.234375,
"logits/rejected": -1.75,
"logps/chosen": -1072.0,
"logps/rejected": -2192.0,
"loss": 0.0575,
"rewards/accuracies": 0.96875,
"rewards/chosen": -8.0625,
"rewards/margins": 11.25,
"rewards/rejected": -19.375,
"step": 860
},
{
"epoch": 0.9105180533751962,
"grad_norm": 22.849493479324984,
"learning_rate": 1.1982873884064465e-08,
"logits/chosen": -2.296875,
"logits/rejected": -1.875,
"logps/chosen": -1120.0,
"logps/rejected": -2080.0,
"loss": 0.0585,
"rewards/accuracies": 0.981249988079071,
"rewards/chosen": -8.125,
"rewards/margins": 10.0625,
"rewards/rejected": -18.125,
"step": 870
},
{
"epoch": 0.9209837781266352,
"grad_norm": 13.839322356316236,
"learning_rate": 9.345903713082304e-09,
"logits/chosen": -2.234375,
"logits/rejected": -1.7109375,
"logps/chosen": -1184.0,
"logps/rejected": -2208.0,
"loss": 0.0595,
"rewards/accuracies": 0.96875,
"rewards/chosen": -8.5,
"rewards/margins": 10.5625,
"rewards/rejected": -19.125,
"step": 880
},
{
"epoch": 0.9314495028780743,
"grad_norm": 17.72445198042151,
"learning_rate": 7.030787065396865e-09,
"logits/chosen": -2.21875,
"logits/rejected": -1.7578125,
"logps/chosen": -1080.0,
"logps/rejected": -2176.0,
"loss": 0.0598,
"rewards/accuracies": 0.9937499761581421,
"rewards/chosen": -7.84375,
"rewards/margins": 11.25,
"rewards/rejected": -19.125,
"step": 890
},
{
"epoch": 0.9419152276295133,
"grad_norm": 7.302557618391421,
"learning_rate": 5.04062020432286e-09,
"logits/chosen": -2.40625,
"logits/rejected": -1.859375,
"logps/chosen": -1112.0,
"logps/rejected": -2128.0,
"loss": 0.0641,
"rewards/accuracies": 0.9750000238418579,
"rewards/chosen": -8.0,
"rewards/margins": 10.4375,
"rewards/rejected": -18.5,
"step": 900
},
{
"epoch": 0.9523809523809523,
"grad_norm": 26.036561376029788,
"learning_rate": 3.3780648016376866e-09,
"logits/chosen": -2.234375,
"logits/rejected": -1.890625,
"logps/chosen": -1120.0,
"logps/rejected": -2064.0,
"loss": 0.0606,
"rewards/accuracies": 0.9624999761581421,
"rewards/chosen": -8.0625,
"rewards/margins": 9.6875,
"rewards/rejected": -17.75,
"step": 910
},
{
"epoch": 0.9628466771323915,
"grad_norm": 42.338882753255156,
"learning_rate": 2.0453443778310766e-09,
"logits/chosen": -2.25,
"logits/rejected": -1.8203125,
"logps/chosen": -1048.0,
"logps/rejected": -2032.0,
"loss": 0.0514,
"rewards/accuracies": 0.9750000238418579,
"rewards/chosen": -7.59375,
"rewards/margins": 10.0625,
"rewards/rejected": -17.625,
"step": 920
},
{
"epoch": 0.9733124018838305,
"grad_norm": 25.61689367855155,
"learning_rate": 1.0442413283435758e-09,
"logits/chosen": -2.25,
"logits/rejected": -1.7265625,
"logps/chosen": -1040.0,
"logps/rejected": -1992.0,
"loss": 0.048,
"rewards/accuracies": 0.956250011920929,
"rewards/chosen": -7.78125,
"rewards/margins": 9.625,
"rewards/rejected": -17.375,
"step": 930
},
{
"epoch": 0.9837781266352695,
"grad_norm": 23.291401299662382,
"learning_rate": 3.760945397705828e-10,
"logits/chosen": -2.25,
"logits/rejected": -1.96875,
"logps/chosen": -1064.0,
"logps/rejected": -2096.0,
"loss": 0.0548,
"rewards/accuracies": 0.96875,
"rewards/chosen": -7.5625,
"rewards/margins": 10.375,
"rewards/rejected": -17.875,
"step": 940
},
{
"epoch": 0.9942438513867086,
"grad_norm": 25.219313507903824,
"learning_rate": 4.17975992204056e-11,
"logits/chosen": -2.21875,
"logits/rejected": -1.8125,
"logps/chosen": -1096.0,
"logps/rejected": -2128.0,
"loss": 0.0409,
"rewards/accuracies": 0.981249988079071,
"rewards/chosen": -7.84375,
"rewards/margins": 10.5,
"rewards/rejected": -18.375,
"step": 950
},
{
"epoch": 0.9994767137624281,
"step": 955,
"total_flos": 0.0,
"train_loss": 0.15506600241386453,
"train_runtime": 14070.209,
"train_samples_per_second": 8.69,
"train_steps_per_second": 0.068
}
],
"logging_steps": 10,
"max_steps": 955,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}