|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9994767137624281, |
|
"eval_steps": 100, |
|
"global_step": 955, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0010465724751439038, |
|
"grad_norm": 9.51283368668585, |
|
"learning_rate": 5.208333333333333e-09, |
|
"logits/chosen": -3.21875, |
|
"logits/rejected": -3.21875, |
|
"logps/chosen": -250.0, |
|
"logps/rejected": -364.0, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.010465724751439037, |
|
"grad_norm": 9.570547962527824, |
|
"learning_rate": 5.208333333333333e-08, |
|
"logits/chosen": -3.25, |
|
"logits/rejected": -3.28125, |
|
"logps/chosen": -298.0, |
|
"logps/rejected": -278.0, |
|
"loss": 0.6918, |
|
"rewards/accuracies": 0.1875, |
|
"rewards/chosen": -0.0004787445068359375, |
|
"rewards/margins": -0.000843048095703125, |
|
"rewards/rejected": 0.0003643035888671875, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.020931449502878074, |
|
"grad_norm": 9.308254903442457, |
|
"learning_rate": 1.0416666666666667e-07, |
|
"logits/chosen": -3.078125, |
|
"logits/rejected": -3.171875, |
|
"logps/chosen": -286.0, |
|
"logps/rejected": -294.0, |
|
"loss": 0.6916, |
|
"rewards/accuracies": 0.25, |
|
"rewards/chosen": -0.0016326904296875, |
|
"rewards/margins": 0.00031280517578125, |
|
"rewards/rejected": -0.00194549560546875, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.03139717425431711, |
|
"grad_norm": 8.758887635948838, |
|
"learning_rate": 1.5624999999999999e-07, |
|
"logits/chosen": -3.21875, |
|
"logits/rejected": -3.25, |
|
"logps/chosen": -294.0, |
|
"logps/rejected": -264.0, |
|
"loss": 0.6899, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -0.00150299072265625, |
|
"rewards/margins": 0.002899169921875, |
|
"rewards/rejected": -0.00439453125, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.04186289900575615, |
|
"grad_norm": 9.50313648843035, |
|
"learning_rate": 2.0833333333333333e-07, |
|
"logits/chosen": -3.1875, |
|
"logits/rejected": -3.125, |
|
"logps/chosen": -288.0, |
|
"logps/rejected": -302.0, |
|
"loss": 0.684, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.00579833984375, |
|
"rewards/margins": 0.0184326171875, |
|
"rewards/rejected": -0.0242919921875, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.052328623757195186, |
|
"grad_norm": 8.682508739901571, |
|
"learning_rate": 2.604166666666667e-07, |
|
"logits/chosen": -3.25, |
|
"logits/rejected": -3.21875, |
|
"logps/chosen": -320.0, |
|
"logps/rejected": -302.0, |
|
"loss": 0.6706, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.025146484375, |
|
"rewards/margins": 0.0498046875, |
|
"rewards/rejected": -0.07470703125, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.06279434850863422, |
|
"grad_norm": 11.261511786558849, |
|
"learning_rate": 3.1249999999999997e-07, |
|
"logits/chosen": -3.15625, |
|
"logits/rejected": -3.171875, |
|
"logps/chosen": -296.0, |
|
"logps/rejected": -316.0, |
|
"loss": 0.6462, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.047119140625, |
|
"rewards/margins": 0.09326171875, |
|
"rewards/rejected": -0.140625, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.07326007326007326, |
|
"grad_norm": 12.224158124661995, |
|
"learning_rate": 3.645833333333333e-07, |
|
"logits/chosen": -3.109375, |
|
"logits/rejected": -3.109375, |
|
"logps/chosen": -310.0, |
|
"logps/rejected": -304.0, |
|
"loss": 0.5955, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -0.0294189453125, |
|
"rewards/margins": 0.26171875, |
|
"rewards/rejected": -0.29296875, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.0837257980115123, |
|
"grad_norm": 24.94058449952297, |
|
"learning_rate": 4.1666666666666667e-07, |
|
"logits/chosen": -3.046875, |
|
"logits/rejected": -3.078125, |
|
"logps/chosen": -326.0, |
|
"logps/rejected": -348.0, |
|
"loss": 0.5178, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.10498046875, |
|
"rewards/margins": 0.57421875, |
|
"rewards/rejected": -0.6796875, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.09419152276295134, |
|
"grad_norm": 25.450706815664475, |
|
"learning_rate": 4.6874999999999996e-07, |
|
"logits/chosen": -3.0625, |
|
"logits/rejected": -3.046875, |
|
"logps/chosen": -328.0, |
|
"logps/rejected": -356.0, |
|
"loss": 0.4379, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -0.283203125, |
|
"rewards/margins": 0.7109375, |
|
"rewards/rejected": -0.9921875, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.10465724751439037, |
|
"grad_norm": 27.228385560373255, |
|
"learning_rate": 4.999732492681437e-07, |
|
"logits/chosen": -2.953125, |
|
"logits/rejected": -3.0, |
|
"logps/chosen": -350.0, |
|
"logps/rejected": -498.0, |
|
"loss": 0.3176, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -0.498046875, |
|
"rewards/margins": 1.5078125, |
|
"rewards/rejected": -2.0, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.1151229722658294, |
|
"grad_norm": 45.59466990825789, |
|
"learning_rate": 4.996723692767926e-07, |
|
"logits/chosen": -2.96875, |
|
"logits/rejected": -2.96875, |
|
"logps/chosen": -344.0, |
|
"logps/rejected": -540.0, |
|
"loss": 0.2646, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -0.64453125, |
|
"rewards/margins": 1.9609375, |
|
"rewards/rejected": -2.609375, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.12558869701726844, |
|
"grad_norm": 27.15030514765242, |
|
"learning_rate": 4.990375746213598e-07, |
|
"logits/chosen": -2.875, |
|
"logits/rejected": -2.84375, |
|
"logps/chosen": -358.0, |
|
"logps/rejected": -604.0, |
|
"loss": 0.2701, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -0.87890625, |
|
"rewards/margins": 2.1875, |
|
"rewards/rejected": -3.0625, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.1360544217687075, |
|
"grad_norm": 27.883434269977027, |
|
"learning_rate": 4.980697142834314e-07, |
|
"logits/chosen": -2.796875, |
|
"logits/rejected": -2.765625, |
|
"logps/chosen": -390.0, |
|
"logps/rejected": -588.0, |
|
"loss": 0.2224, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -0.9296875, |
|
"rewards/margins": 2.296875, |
|
"rewards/rejected": -3.21875, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.14652014652014653, |
|
"grad_norm": 29.52150895374023, |
|
"learning_rate": 4.967700826904229e-07, |
|
"logits/chosen": -2.875, |
|
"logits/rejected": -2.796875, |
|
"logps/chosen": -368.0, |
|
"logps/rejected": -672.0, |
|
"loss": 0.2261, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.95703125, |
|
"rewards/margins": 2.921875, |
|
"rewards/rejected": -3.875, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.15698587127158556, |
|
"grad_norm": 45.4969620914659, |
|
"learning_rate": 4.951404179843962e-07, |
|
"logits/chosen": -2.75, |
|
"logits/rejected": -2.625, |
|
"logps/chosen": -438.0, |
|
"logps/rejected": -688.0, |
|
"loss": 0.2018, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -1.2734375, |
|
"rewards/margins": 3.015625, |
|
"rewards/rejected": -4.28125, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.1674515960230246, |
|
"grad_norm": 32.79270513780942, |
|
"learning_rate": 4.931828996974498e-07, |
|
"logits/chosen": -2.75, |
|
"logits/rejected": -2.65625, |
|
"logps/chosen": -440.0, |
|
"logps/rejected": -700.0, |
|
"loss": 0.2027, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -1.1640625, |
|
"rewards/margins": 3.015625, |
|
"rewards/rejected": -4.1875, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.17791732077446362, |
|
"grad_norm": 30.949462462321232, |
|
"learning_rate": 4.909001458367866e-07, |
|
"logits/chosen": -2.671875, |
|
"logits/rejected": -2.609375, |
|
"logps/chosen": -372.0, |
|
"logps/rejected": -724.0, |
|
"loss": 0.1997, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -1.0703125, |
|
"rewards/margins": 3.171875, |
|
"rewards/rejected": -4.25, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.18838304552590268, |
|
"grad_norm": 26.340401250142335, |
|
"learning_rate": 4.882952093833627e-07, |
|
"logits/chosen": -2.703125, |
|
"logits/rejected": -2.578125, |
|
"logps/chosen": -382.0, |
|
"logps/rejected": -716.0, |
|
"loss": 0.1926, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -1.078125, |
|
"rewards/margins": 3.359375, |
|
"rewards/rejected": -4.4375, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.1988487702773417, |
|
"grad_norm": 18.968097604368335, |
|
"learning_rate": 4.853715742087946e-07, |
|
"logits/chosen": -2.53125, |
|
"logits/rejected": -2.484375, |
|
"logps/chosen": -428.0, |
|
"logps/rejected": -752.0, |
|
"loss": 0.1612, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -1.5, |
|
"rewards/margins": 3.53125, |
|
"rewards/rejected": -5.03125, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.20931449502878074, |
|
"grad_norm": 20.782646308534286, |
|
"learning_rate": 4.821331504159906e-07, |
|
"logits/chosen": -2.6875, |
|
"logits/rejected": -2.484375, |
|
"logps/chosen": -462.0, |
|
"logps/rejected": -784.0, |
|
"loss": 0.1607, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -1.5703125, |
|
"rewards/margins": 3.5, |
|
"rewards/rejected": -5.09375, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.21978021978021978, |
|
"grad_norm": 23.863300215477395, |
|
"learning_rate": 4.785842691097342e-07, |
|
"logits/chosen": -2.609375, |
|
"logits/rejected": -2.484375, |
|
"logps/chosen": -410.0, |
|
"logps/rejected": -820.0, |
|
"loss": 0.1596, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -1.21875, |
|
"rewards/margins": 4.25, |
|
"rewards/rejected": -5.46875, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.2302459445316588, |
|
"grad_norm": 40.35606043050439, |
|
"learning_rate": 4.7472967660421603e-07, |
|
"logits/chosen": -2.59375, |
|
"logits/rejected": -2.46875, |
|
"logps/chosen": -494.0, |
|
"logps/rejected": -932.0, |
|
"loss": 0.1764, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -1.9609375, |
|
"rewards/margins": 4.46875, |
|
"rewards/rejected": -6.4375, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.24071166928309787, |
|
"grad_norm": 25.181446808734336, |
|
"learning_rate": 4.705745280752585e-07, |
|
"logits/chosen": -2.6875, |
|
"logits/rejected": -2.515625, |
|
"logps/chosen": -442.0, |
|
"logps/rejected": -832.0, |
|
"loss": 0.1588, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -1.46875, |
|
"rewards/margins": 4.0625, |
|
"rewards/rejected": -5.53125, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.25117739403453687, |
|
"grad_norm": 32.49967985504463, |
|
"learning_rate": 4.6612438066572555e-07, |
|
"logits/chosen": -2.4375, |
|
"logits/rejected": -2.1875, |
|
"logps/chosen": -480.0, |
|
"logps/rejected": -940.0, |
|
"loss": 0.16, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -1.6875, |
|
"rewards/margins": 4.78125, |
|
"rewards/rejected": -6.4375, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.2616431187859759, |
|
"grad_norm": 31.230972977495156, |
|
"learning_rate": 4.6138518605333664e-07, |
|
"logits/chosen": -2.5625, |
|
"logits/rejected": -2.421875, |
|
"logps/chosen": -426.0, |
|
"logps/rejected": -800.0, |
|
"loss": 0.1537, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -1.421875, |
|
"rewards/margins": 4.0625, |
|
"rewards/rejected": -5.46875, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.272108843537415, |
|
"grad_norm": 29.71689511008149, |
|
"learning_rate": 4.5636328249082514e-07, |
|
"logits/chosen": -2.453125, |
|
"logits/rejected": -2.296875, |
|
"logps/chosen": -474.0, |
|
"logps/rejected": -948.0, |
|
"loss": 0.1394, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -1.828125, |
|
"rewards/margins": 4.625, |
|
"rewards/rejected": -6.4375, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.282574568288854, |
|
"grad_norm": 28.295583463414996, |
|
"learning_rate": 4.510653863290871e-07, |
|
"logits/chosen": -2.40625, |
|
"logits/rejected": -2.078125, |
|
"logps/chosen": -438.0, |
|
"logps/rejected": -896.0, |
|
"loss": 0.1481, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -1.390625, |
|
"rewards/margins": 4.84375, |
|
"rewards/rejected": -6.25, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.29304029304029305, |
|
"grad_norm": 28.398306620399453, |
|
"learning_rate": 4.4549858303465737e-07, |
|
"logits/chosen": -2.5, |
|
"logits/rejected": -2.3125, |
|
"logps/chosen": -484.0, |
|
"logps/rejected": -944.0, |
|
"loss": 0.1392, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -1.9453125, |
|
"rewards/margins": 4.5625, |
|
"rewards/rejected": -6.5, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.3035060177917321, |
|
"grad_norm": 40.00756374405055, |
|
"learning_rate": 4.396703177135261e-07, |
|
"logits/chosen": -2.28125, |
|
"logits/rejected": -2.109375, |
|
"logps/chosen": -544.0, |
|
"logps/rejected": -1072.0, |
|
"loss": 0.1322, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -2.734375, |
|
"rewards/margins": 5.53125, |
|
"rewards/rejected": -8.25, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.3139717425431711, |
|
"grad_norm": 37.30671040470767, |
|
"learning_rate": 4.335883851539693e-07, |
|
"logits/chosen": -2.34375, |
|
"logits/rejected": -2.015625, |
|
"logps/chosen": -680.0, |
|
"logps/rejected": -1200.0, |
|
"loss": 0.1326, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -3.65625, |
|
"rewards/margins": 5.46875, |
|
"rewards/rejected": -9.125, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.32443746729461015, |
|
"grad_norm": 34.70455152314579, |
|
"learning_rate": 4.272609194017105e-07, |
|
"logits/chosen": -2.21875, |
|
"logits/rejected": -1.7265625, |
|
"logps/chosen": -832.0, |
|
"logps/rejected": -1368.0, |
|
"loss": 0.1371, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -5.25, |
|
"rewards/margins": 5.40625, |
|
"rewards/rejected": -10.625, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.3349031920460492, |
|
"grad_norm": 24.846518493181428, |
|
"learning_rate": 4.2069638288135547e-07, |
|
"logits/chosen": -2.34375, |
|
"logits/rejected": -2.109375, |
|
"logps/chosen": -696.0, |
|
"logps/rejected": -1264.0, |
|
"loss": 0.1284, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -3.9375, |
|
"rewards/margins": 5.71875, |
|
"rewards/rejected": -9.625, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.3453689167974882, |
|
"grad_norm": 45.84342601245407, |
|
"learning_rate": 4.139035550786494e-07, |
|
"logits/chosen": -2.484375, |
|
"logits/rejected": -2.375, |
|
"logps/chosen": -684.0, |
|
"logps/rejected": -1200.0, |
|
"loss": 0.1228, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -4.03125, |
|
"rewards/margins": 5.0625, |
|
"rewards/rejected": -9.125, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.35583464154892724, |
|
"grad_norm": 49.94021964049473, |
|
"learning_rate": 4.0689152079869306e-07, |
|
"logits/chosen": -2.21875, |
|
"logits/rejected": -1.8515625, |
|
"logps/chosen": -740.0, |
|
"logps/rejected": -1456.0, |
|
"loss": 0.1026, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -4.8125, |
|
"rewards/margins": 7.34375, |
|
"rewards/rejected": -12.1875, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.3663003663003663, |
|
"grad_norm": 39.72539996550781, |
|
"learning_rate": 3.99669658015821e-07, |
|
"logits/chosen": -2.25, |
|
"logits/rejected": -1.9609375, |
|
"logps/chosen": -696.0, |
|
"logps/rejected": -1328.0, |
|
"loss": 0.1202, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -4.375, |
|
"rewards/margins": 6.59375, |
|
"rewards/rejected": -10.9375, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.37676609105180536, |
|
"grad_norm": 45.84961303802077, |
|
"learning_rate": 3.92247625331392e-07, |
|
"logits/chosen": -2.21875, |
|
"logits/rejected": -2.0, |
|
"logps/chosen": -764.0, |
|
"logps/rejected": -1384.0, |
|
"loss": 0.1073, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -4.71875, |
|
"rewards/margins": 6.40625, |
|
"rewards/rejected": -11.125, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.3872318158032444, |
|
"grad_norm": 20.54553148683269, |
|
"learning_rate": 3.846353490562664e-07, |
|
"logits/chosen": -2.3125, |
|
"logits/rejected": -1.796875, |
|
"logps/chosen": -680.0, |
|
"logps/rejected": -1360.0, |
|
"loss": 0.1308, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -4.1875, |
|
"rewards/margins": 6.90625, |
|
"rewards/rejected": -11.125, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.3976975405546834, |
|
"grad_norm": 30.114773714834385, |
|
"learning_rate": 3.768430099352445e-07, |
|
"logits/chosen": -2.4375, |
|
"logits/rejected": -2.234375, |
|
"logps/chosen": -668.0, |
|
"logps/rejected": -1200.0, |
|
"loss": 0.1285, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -3.609375, |
|
"rewards/margins": 5.53125, |
|
"rewards/rejected": -9.125, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.40816326530612246, |
|
"grad_norm": 22.860559479058303, |
|
"learning_rate": 3.6888102953122304e-07, |
|
"logits/chosen": -2.421875, |
|
"logits/rejected": -2.15625, |
|
"logps/chosen": -652.0, |
|
"logps/rejected": -1272.0, |
|
"loss": 0.1018, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/chosen": -3.609375, |
|
"rewards/margins": 6.5, |
|
"rewards/rejected": -10.125, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.4186289900575615, |
|
"grad_norm": 19.494795072649943, |
|
"learning_rate": 3.607600562872785e-07, |
|
"logits/chosen": -2.4375, |
|
"logits/rejected": -2.03125, |
|
"logps/chosen": -892.0, |
|
"logps/rejected": -1640.0, |
|
"loss": 0.091, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -5.46875, |
|
"rewards/margins": 7.90625, |
|
"rewards/rejected": -13.375, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.4290947148090005, |
|
"grad_norm": 34.802523467101764, |
|
"learning_rate": 3.5249095128531856e-07, |
|
"logits/chosen": -2.453125, |
|
"logits/rejected": -2.203125, |
|
"logps/chosen": -812.0, |
|
"logps/rejected": -1392.0, |
|
"loss": 0.1044, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -4.75, |
|
"rewards/margins": 6.4375, |
|
"rewards/rejected": -11.1875, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.43956043956043955, |
|
"grad_norm": 16.064797665253533, |
|
"learning_rate": 3.4408477372034736e-07, |
|
"logits/chosen": -2.421875, |
|
"logits/rejected": -2.03125, |
|
"logps/chosen": -856.0, |
|
"logps/rejected": -1536.0, |
|
"loss": 0.1125, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -5.40625, |
|
"rewards/margins": 7.375, |
|
"rewards/rejected": -12.75, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.4500261643118786, |
|
"grad_norm": 26.48155910496492, |
|
"learning_rate": 3.3555276610977276e-07, |
|
"logits/chosen": -2.40625, |
|
"logits/rejected": -2.171875, |
|
"logps/chosen": -776.0, |
|
"logps/rejected": -1456.0, |
|
"loss": 0.0939, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -4.90625, |
|
"rewards/margins": 6.78125, |
|
"rewards/rejected": -11.6875, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.4604918890633176, |
|
"grad_norm": 20.80477653317148, |
|
"learning_rate": 3.269063392575352e-07, |
|
"logits/chosen": -2.4375, |
|
"logits/rejected": -2.1875, |
|
"logps/chosen": -748.0, |
|
"logps/rejected": -1472.0, |
|
"loss": 0.1046, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -4.75, |
|
"rewards/margins": 7.21875, |
|
"rewards/rejected": -12.0, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.47095761381475665, |
|
"grad_norm": 29.44200512203946, |
|
"learning_rate": 3.1815705699316964e-07, |
|
"logits/chosen": -2.359375, |
|
"logits/rejected": -2.0625, |
|
"logps/chosen": -792.0, |
|
"logps/rejected": -1496.0, |
|
"loss": 0.0963, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/chosen": -4.90625, |
|
"rewards/margins": 7.125, |
|
"rewards/rejected": -12.0625, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.48142333856619574, |
|
"grad_norm": 26.271372385880735, |
|
"learning_rate": 3.0931662070620794e-07, |
|
"logits/chosen": -2.203125, |
|
"logits/rejected": -2.0, |
|
"logps/chosen": -908.0, |
|
"logps/rejected": -1608.0, |
|
"loss": 0.0973, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -6.34375, |
|
"rewards/margins": 7.1875, |
|
"rewards/rejected": -13.5625, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.49188906331763477, |
|
"grad_norm": 31.154091413440536, |
|
"learning_rate": 3.003968536966078e-07, |
|
"logits/chosen": -2.390625, |
|
"logits/rejected": -2.109375, |
|
"logps/chosen": -868.0, |
|
"logps/rejected": -1600.0, |
|
"loss": 0.0846, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -5.53125, |
|
"rewards/margins": 7.40625, |
|
"rewards/rejected": -12.9375, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.5023547880690737, |
|
"grad_norm": 25.37606130345961, |
|
"learning_rate": 2.9140968536213693e-07, |
|
"logits/chosen": -2.5, |
|
"logits/rejected": -2.125, |
|
"logps/chosen": -812.0, |
|
"logps/rejected": -1504.0, |
|
"loss": 0.0895, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -5.0625, |
|
"rewards/margins": 7.25, |
|
"rewards/rejected": -12.3125, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.5128205128205128, |
|
"grad_norm": 27.895788314669506, |
|
"learning_rate": 2.823671352438608e-07, |
|
"logits/chosen": -2.515625, |
|
"logits/rejected": -2.265625, |
|
"logps/chosen": -820.0, |
|
"logps/rejected": -1488.0, |
|
"loss": 0.0997, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -5.34375, |
|
"rewards/margins": 6.8125, |
|
"rewards/rejected": -12.1875, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.5232862375719518, |
|
"grad_norm": 31.69949419586544, |
|
"learning_rate": 2.73281296951072e-07, |
|
"logits/chosen": -2.453125, |
|
"logits/rejected": -2.03125, |
|
"logps/chosen": -952.0, |
|
"logps/rejected": -1672.0, |
|
"loss": 0.0871, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -6.5625, |
|
"rewards/margins": 7.59375, |
|
"rewards/rejected": -14.125, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.533751962323391, |
|
"grad_norm": 18.91909774862979, |
|
"learning_rate": 2.641643219871597e-07, |
|
"logits/chosen": -2.375, |
|
"logits/rejected": -2.046875, |
|
"logps/chosen": -824.0, |
|
"logps/rejected": -1472.0, |
|
"loss": 0.0982, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -5.1875, |
|
"rewards/margins": 6.84375, |
|
"rewards/rejected": -12.0, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.54421768707483, |
|
"grad_norm": 31.301196837566955, |
|
"learning_rate": 2.550284034980507e-07, |
|
"logits/chosen": -2.46875, |
|
"logits/rejected": -2.3125, |
|
"logps/chosen": -736.0, |
|
"logps/rejected": -1368.0, |
|
"loss": 0.1158, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -4.34375, |
|
"rewards/margins": 6.5, |
|
"rewards/rejected": -10.8125, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.554683411826269, |
|
"grad_norm": 35.72913678676417, |
|
"learning_rate": 2.4588575996495794e-07, |
|
"logits/chosen": -2.640625, |
|
"logits/rejected": -2.34375, |
|
"logps/chosen": -684.0, |
|
"logps/rejected": -1336.0, |
|
"loss": 0.1081, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -4.125, |
|
"rewards/margins": 6.78125, |
|
"rewards/rejected": -10.9375, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.565149136577708, |
|
"grad_norm": 22.499329779979547, |
|
"learning_rate": 2.367486188632446e-07, |
|
"logits/chosen": -2.609375, |
|
"logits/rejected": -2.34375, |
|
"logps/chosen": -668.0, |
|
"logps/rejected": -1336.0, |
|
"loss": 0.0892, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -3.84375, |
|
"rewards/margins": 6.65625, |
|
"rewards/rejected": -10.5, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.5756148613291471, |
|
"grad_norm": 18.90148104971076, |
|
"learning_rate": 2.276292003092593e-07, |
|
"logits/chosen": -2.484375, |
|
"logits/rejected": -2.203125, |
|
"logps/chosen": -816.0, |
|
"logps/rejected": -1520.0, |
|
"loss": 0.0834, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -5.28125, |
|
"rewards/margins": 7.28125, |
|
"rewards/rejected": -12.5625, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.5860805860805861, |
|
"grad_norm": 34.885386057496255, |
|
"learning_rate": 2.185397007170141e-07, |
|
"logits/chosen": -2.421875, |
|
"logits/rejected": -2.0625, |
|
"logps/chosen": -952.0, |
|
"logps/rejected": -1680.0, |
|
"loss": 0.0802, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/chosen": -6.71875, |
|
"rewards/margins": 7.75, |
|
"rewards/rejected": -14.4375, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.5965463108320251, |
|
"grad_norm": 31.26740690349814, |
|
"learning_rate": 2.094922764865619e-07, |
|
"logits/chosen": -2.34375, |
|
"logits/rejected": -2.21875, |
|
"logps/chosen": -892.0, |
|
"logps/rejected": -1640.0, |
|
"loss": 0.073, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -5.9375, |
|
"rewards/margins": 7.71875, |
|
"rewards/rejected": -13.625, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.6070120355834642, |
|
"grad_norm": 26.503557168812993, |
|
"learning_rate": 2.0049902774588797e-07, |
|
"logits/chosen": -2.4375, |
|
"logits/rejected": -2.125, |
|
"logps/chosen": -892.0, |
|
"logps/rejected": -1680.0, |
|
"loss": 0.0786, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -6.1875, |
|
"rewards/margins": 8.0, |
|
"rewards/rejected": -14.25, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.6174777603349032, |
|
"grad_norm": 26.760637852696387, |
|
"learning_rate": 1.9157198216806238e-07, |
|
"logits/chosen": -2.453125, |
|
"logits/rejected": -2.09375, |
|
"logps/chosen": -888.0, |
|
"logps/rejected": -1736.0, |
|
"loss": 0.0796, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -6.125, |
|
"rewards/margins": 8.4375, |
|
"rewards/rejected": -14.5625, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.6279434850863422, |
|
"grad_norm": 33.575414387335336, |
|
"learning_rate": 1.8272307888529274e-07, |
|
"logits/chosen": -2.625, |
|
"logits/rejected": -2.40625, |
|
"logps/chosen": -908.0, |
|
"logps/rejected": -1776.0, |
|
"loss": 0.0901, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -5.9375, |
|
"rewards/margins": 8.5, |
|
"rewards/rejected": -14.4375, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.6384092098377813, |
|
"grad_norm": 30.280173464201145, |
|
"learning_rate": 1.7396415252139288e-07, |
|
"logits/chosen": -2.484375, |
|
"logits/rejected": -2.171875, |
|
"logps/chosen": -820.0, |
|
"logps/rejected": -1584.0, |
|
"loss": 0.0848, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -5.28125, |
|
"rewards/margins": 7.78125, |
|
"rewards/rejected": -13.0625, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.6488749345892203, |
|
"grad_norm": 31.809087141096686, |
|
"learning_rate": 1.6530691736402316e-07, |
|
"logits/chosen": -2.515625, |
|
"logits/rejected": -2.25, |
|
"logps/chosen": -828.0, |
|
"logps/rejected": -1576.0, |
|
"loss": 0.0699, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": -5.375, |
|
"rewards/margins": 7.6875, |
|
"rewards/rejected": -13.0625, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.6593406593406593, |
|
"grad_norm": 20.492927408780787, |
|
"learning_rate": 1.5676295169786864e-07, |
|
"logits/chosen": -2.515625, |
|
"logits/rejected": -2.203125, |
|
"logps/chosen": -928.0, |
|
"logps/rejected": -1608.0, |
|
"loss": 0.0906, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -6.1875, |
|
"rewards/margins": 7.15625, |
|
"rewards/rejected": -13.375, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.6698063840920984, |
|
"grad_norm": 24.302749821696764, |
|
"learning_rate": 1.483436823197092e-07, |
|
"logits/chosen": -2.484375, |
|
"logits/rejected": -2.265625, |
|
"logps/chosen": -916.0, |
|
"logps/rejected": -1680.0, |
|
"loss": 0.0811, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -6.375, |
|
"rewards/margins": 7.78125, |
|
"rewards/rejected": -14.1875, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.6802721088435374, |
|
"grad_norm": 30.640711717907518, |
|
"learning_rate": 1.4006036925609243e-07, |
|
"logits/chosen": -2.484375, |
|
"logits/rejected": -2.28125, |
|
"logps/chosen": -984.0, |
|
"logps/rejected": -1704.0, |
|
"loss": 0.0736, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -6.8125, |
|
"rewards/margins": 7.53125, |
|
"rewards/rejected": -14.3125, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.6907378335949764, |
|
"grad_norm": 28.697044539390294, |
|
"learning_rate": 1.319240907040458e-07, |
|
"logits/chosen": -2.421875, |
|
"logits/rejected": -2.125, |
|
"logps/chosen": -960.0, |
|
"logps/rejected": -1752.0, |
|
"loss": 0.0737, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -6.59375, |
|
"rewards/margins": 7.84375, |
|
"rewards/rejected": -14.4375, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.7012035583464155, |
|
"grad_norm": 17.345360984589735, |
|
"learning_rate": 1.2394572821496948e-07, |
|
"logits/chosen": -2.3125, |
|
"logits/rejected": -2.03125, |
|
"logps/chosen": -988.0, |
|
"logps/rejected": -1848.0, |
|
"loss": 0.0695, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -7.15625, |
|
"rewards/margins": 8.625, |
|
"rewards/rejected": -15.75, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.7116692830978545, |
|
"grad_norm": 33.23147099676755, |
|
"learning_rate": 1.1613595214152711e-07, |
|
"logits/chosen": -2.375, |
|
"logits/rejected": -2.09375, |
|
"logps/chosen": -1064.0, |
|
"logps/rejected": -1880.0, |
|
"loss": 0.0806, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": -7.53125, |
|
"rewards/margins": 8.4375, |
|
"rewards/rejected": -15.9375, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.7221350078492935, |
|
"grad_norm": 20.63657009936635, |
|
"learning_rate": 1.0850520736699362e-07, |
|
"logits/chosen": -2.515625, |
|
"logits/rejected": -2.265625, |
|
"logps/chosen": -1004.0, |
|
"logps/rejected": -1792.0, |
|
"loss": 0.08, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -6.875, |
|
"rewards/margins": 8.0, |
|
"rewards/rejected": -14.875, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.7326007326007326, |
|
"grad_norm": 18.557890534440453, |
|
"learning_rate": 1.0106369933615042e-07, |
|
"logits/chosen": -2.390625, |
|
"logits/rejected": -2.234375, |
|
"logps/chosen": -960.0, |
|
"logps/rejected": -1840.0, |
|
"loss": 0.0614, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -6.5625, |
|
"rewards/margins": 8.75, |
|
"rewards/rejected": -15.25, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.7430664573521716, |
|
"grad_norm": 17.0748564532371, |
|
"learning_rate": 9.382138040640714e-08, |
|
"logits/chosen": -2.53125, |
|
"logits/rejected": -2.15625, |
|
"logps/chosen": -960.0, |
|
"logps/rejected": -1888.0, |
|
"loss": 0.0656, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -6.71875, |
|
"rewards/margins": 9.125, |
|
"rewards/rejected": -15.875, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.7535321821036107, |
|
"grad_norm": 28.894806455872907, |
|
"learning_rate": 8.678793653740632e-08, |
|
"logits/chosen": -2.5, |
|
"logits/rejected": -2.203125, |
|
"logps/chosen": -1040.0, |
|
"logps/rejected": -1840.0, |
|
"loss": 0.0632, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -7.0625, |
|
"rewards/margins": 8.4375, |
|
"rewards/rejected": -15.5, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.7639979068550498, |
|
"grad_norm": 26.829195320759613, |
|
"learning_rate": 7.997277433690983e-08, |
|
"logits/chosen": -2.34375, |
|
"logits/rejected": -2.15625, |
|
"logps/chosen": -1024.0, |
|
"logps/rejected": -1872.0, |
|
"loss": 0.0744, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -7.46875, |
|
"rewards/margins": 8.5625, |
|
"rewards/rejected": -16.0, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.7744636316064888, |
|
"grad_norm": 19.48203742338746, |
|
"learning_rate": 7.338500848029602e-08, |
|
"logits/chosen": -2.375, |
|
"logits/rejected": -2.0625, |
|
"logps/chosen": -1056.0, |
|
"logps/rejected": -1976.0, |
|
"loss": 0.0672, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -7.53125, |
|
"rewards/margins": 9.4375, |
|
"rewards/rejected": -17.0, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.7849293563579278, |
|
"grad_norm": 14.527749472798101, |
|
"learning_rate": 6.70334495204884e-08, |
|
"logits/chosen": -2.265625, |
|
"logits/rejected": -1.9453125, |
|
"logps/chosen": -996.0, |
|
"logps/rejected": -1920.0, |
|
"loss": 0.0481, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -7.375, |
|
"rewards/margins": 9.375, |
|
"rewards/rejected": -16.75, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.7953950811093669, |
|
"grad_norm": 15.609318913343236, |
|
"learning_rate": 6.092659210462231e-08, |
|
"logits/chosen": -2.40625, |
|
"logits/rejected": -1.9921875, |
|
"logps/chosen": -1056.0, |
|
"logps/rejected": -1976.0, |
|
"loss": 0.0532, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -7.78125, |
|
"rewards/margins": 9.4375, |
|
"rewards/rejected": -17.25, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.8058608058608059, |
|
"grad_norm": 7.809802817809847, |
|
"learning_rate": 5.507260361320737e-08, |
|
"logits/chosen": -2.25, |
|
"logits/rejected": -1.96875, |
|
"logps/chosen": -1056.0, |
|
"logps/rejected": -2064.0, |
|
"loss": 0.0419, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/chosen": -7.6875, |
|
"rewards/margins": 10.25, |
|
"rewards/rejected": -17.875, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.8163265306122449, |
|
"grad_norm": 22.462937925254167, |
|
"learning_rate": 4.947931323697982e-08, |
|
"logits/chosen": -2.359375, |
|
"logits/rejected": -2.015625, |
|
"logps/chosen": -1120.0, |
|
"logps/rejected": -2064.0, |
|
"loss": 0.0616, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -8.125, |
|
"rewards/margins": 9.75, |
|
"rewards/rejected": -17.875, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.826792255363684, |
|
"grad_norm": 25.68320010476297, |
|
"learning_rate": 4.415420150605398e-08, |
|
"logits/chosen": -2.171875, |
|
"logits/rejected": -1.890625, |
|
"logps/chosen": -1048.0, |
|
"logps/rejected": -2040.0, |
|
"loss": 0.0657, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -7.8125, |
|
"rewards/margins": 9.8125, |
|
"rewards/rejected": -17.625, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.837257980115123, |
|
"grad_norm": 26.750079370530244, |
|
"learning_rate": 3.9104390285376374e-08, |
|
"logits/chosen": -2.296875, |
|
"logits/rejected": -1.9765625, |
|
"logps/chosen": -1040.0, |
|
"logps/rejected": -2016.0, |
|
"loss": 0.0513, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -7.5, |
|
"rewards/margins": 9.6875, |
|
"rewards/rejected": -17.125, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.847723704866562, |
|
"grad_norm": 15.408792175860983, |
|
"learning_rate": 3.433663324986208e-08, |
|
"logits/chosen": -2.3125, |
|
"logits/rejected": -2.015625, |
|
"logps/chosen": -1024.0, |
|
"logps/rejected": -1936.0, |
|
"loss": 0.0679, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -7.28125, |
|
"rewards/margins": 9.0625, |
|
"rewards/rejected": -16.375, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.858189429618001, |
|
"grad_norm": 16.721851816987886, |
|
"learning_rate": 2.9857306851953897e-08, |
|
"logits/chosen": -2.34375, |
|
"logits/rejected": -1.875, |
|
"logps/chosen": -1032.0, |
|
"logps/rejected": -1960.0, |
|
"loss": 0.0581, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -7.59375, |
|
"rewards/margins": 9.5, |
|
"rewards/rejected": -17.125, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.8686551543694401, |
|
"grad_norm": 25.788818922540266, |
|
"learning_rate": 2.567240179368185e-08, |
|
"logits/chosen": -2.34375, |
|
"logits/rejected": -1.75, |
|
"logps/chosen": -1072.0, |
|
"logps/rejected": -2064.0, |
|
"loss": 0.052, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -7.6875, |
|
"rewards/margins": 10.3125, |
|
"rewards/rejected": -18.0, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.8791208791208791, |
|
"grad_norm": 14.284086558067754, |
|
"learning_rate": 2.1787515014630357e-08, |
|
"logits/chosen": -2.203125, |
|
"logits/rejected": -1.875, |
|
"logps/chosen": -1040.0, |
|
"logps/rejected": -2128.0, |
|
"loss": 0.0524, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -7.75, |
|
"rewards/margins": 10.75, |
|
"rewards/rejected": -18.5, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.8895866038723181, |
|
"grad_norm": 30.104620767056684, |
|
"learning_rate": 1.820784220652766e-08, |
|
"logits/chosen": -2.25, |
|
"logits/rejected": -1.9375, |
|
"logps/chosen": -1088.0, |
|
"logps/rejected": -2112.0, |
|
"loss": 0.0408, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/chosen": -7.84375, |
|
"rewards/margins": 10.4375, |
|
"rewards/rejected": -18.25, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.9000523286237572, |
|
"grad_norm": 30.017914965835946, |
|
"learning_rate": 1.4938170864468636e-08, |
|
"logits/chosen": -2.234375, |
|
"logits/rejected": -1.75, |
|
"logps/chosen": -1072.0, |
|
"logps/rejected": -2192.0, |
|
"loss": 0.0575, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -8.0625, |
|
"rewards/margins": 11.25, |
|
"rewards/rejected": -19.375, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.9105180533751962, |
|
"grad_norm": 22.849493479324984, |
|
"learning_rate": 1.1982873884064465e-08, |
|
"logits/chosen": -2.296875, |
|
"logits/rejected": -1.875, |
|
"logps/chosen": -1120.0, |
|
"logps/rejected": -2080.0, |
|
"loss": 0.0585, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/chosen": -8.125, |
|
"rewards/margins": 10.0625, |
|
"rewards/rejected": -18.125, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.9209837781266352, |
|
"grad_norm": 13.839322356316236, |
|
"learning_rate": 9.345903713082304e-09, |
|
"logits/chosen": -2.234375, |
|
"logits/rejected": -1.7109375, |
|
"logps/chosen": -1184.0, |
|
"logps/rejected": -2208.0, |
|
"loss": 0.0595, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -8.5, |
|
"rewards/margins": 10.5625, |
|
"rewards/rejected": -19.125, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.9314495028780743, |
|
"grad_norm": 17.72445198042151, |
|
"learning_rate": 7.030787065396865e-09, |
|
"logits/chosen": -2.21875, |
|
"logits/rejected": -1.7578125, |
|
"logps/chosen": -1080.0, |
|
"logps/rejected": -2176.0, |
|
"loss": 0.0598, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -7.84375, |
|
"rewards/margins": 11.25, |
|
"rewards/rejected": -19.125, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.9419152276295133, |
|
"grad_norm": 7.302557618391421, |
|
"learning_rate": 5.04062020432286e-09, |
|
"logits/chosen": -2.40625, |
|
"logits/rejected": -1.859375, |
|
"logps/chosen": -1112.0, |
|
"logps/rejected": -2128.0, |
|
"loss": 0.0641, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -8.0, |
|
"rewards/margins": 10.4375, |
|
"rewards/rejected": -18.5, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.9523809523809523, |
|
"grad_norm": 26.036561376029788, |
|
"learning_rate": 3.3780648016376866e-09, |
|
"logits/chosen": -2.234375, |
|
"logits/rejected": -1.890625, |
|
"logps/chosen": -1120.0, |
|
"logps/rejected": -2064.0, |
|
"loss": 0.0606, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -8.0625, |
|
"rewards/margins": 9.6875, |
|
"rewards/rejected": -17.75, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.9628466771323915, |
|
"grad_norm": 42.338882753255156, |
|
"learning_rate": 2.0453443778310766e-09, |
|
"logits/chosen": -2.25, |
|
"logits/rejected": -1.8203125, |
|
"logps/chosen": -1048.0, |
|
"logps/rejected": -2032.0, |
|
"loss": 0.0514, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -7.59375, |
|
"rewards/margins": 10.0625, |
|
"rewards/rejected": -17.625, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.9733124018838305, |
|
"grad_norm": 25.61689367855155, |
|
"learning_rate": 1.0442413283435758e-09, |
|
"logits/chosen": -2.25, |
|
"logits/rejected": -1.7265625, |
|
"logps/chosen": -1040.0, |
|
"logps/rejected": -1992.0, |
|
"loss": 0.048, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": -7.78125, |
|
"rewards/margins": 9.625, |
|
"rewards/rejected": -17.375, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.9837781266352695, |
|
"grad_norm": 23.291401299662382, |
|
"learning_rate": 3.760945397705828e-10, |
|
"logits/chosen": -2.25, |
|
"logits/rejected": -1.96875, |
|
"logps/chosen": -1064.0, |
|
"logps/rejected": -2096.0, |
|
"loss": 0.0548, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -7.5625, |
|
"rewards/margins": 10.375, |
|
"rewards/rejected": -17.875, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.9942438513867086, |
|
"grad_norm": 25.219313507903824, |
|
"learning_rate": 4.17975992204056e-11, |
|
"logits/chosen": -2.21875, |
|
"logits/rejected": -1.8125, |
|
"logps/chosen": -1096.0, |
|
"logps/rejected": -2128.0, |
|
"loss": 0.0409, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/chosen": -7.84375, |
|
"rewards/margins": 10.5, |
|
"rewards/rejected": -18.375, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.9994767137624281, |
|
"step": 955, |
|
"total_flos": 0.0, |
|
"train_loss": 0.15506600241386453, |
|
"train_runtime": 14070.209, |
|
"train_samples_per_second": 8.69, |
|
"train_steps_per_second": 0.068 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 955, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|