|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 3821, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00026171159382360636, |
|
"grad_norm": 1.421875, |
|
"learning_rate": 1.3054830287206266e-09, |
|
"logits/chosen": -2.9892377853393555, |
|
"logits/rejected": -2.938478946685791, |
|
"logps/chosen": -307.68707275390625, |
|
"logps/rejected": -392.1196594238281, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0026171159382360636, |
|
"grad_norm": 1.359375, |
|
"learning_rate": 1.3054830287206264e-08, |
|
"logits/chosen": -2.846788167953491, |
|
"logits/rejected": -2.834296941757202, |
|
"logps/chosen": -299.1590881347656, |
|
"logps/rejected": -260.9870300292969, |
|
"loss": 0.6927, |
|
"rewards/accuracies": 0.4791666567325592, |
|
"rewards/chosen": 0.00017009497969411314, |
|
"rewards/margins": 0.0008415079792030156, |
|
"rewards/rejected": -0.0006714130286127329, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.005234231876472127, |
|
"grad_norm": 1.609375, |
|
"learning_rate": 2.610966057441253e-08, |
|
"logits/chosen": -2.8615875244140625, |
|
"logits/rejected": -2.8269271850585938, |
|
"logps/chosen": -325.3974609375, |
|
"logps/rejected": -252.712158203125, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": 0.0003612989676184952, |
|
"rewards/margins": 2.1457055481732823e-05, |
|
"rewards/rejected": 0.00033984187757596374, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.007851347814708191, |
|
"grad_norm": 1.796875, |
|
"learning_rate": 3.91644908616188e-08, |
|
"logits/chosen": -2.8635482788085938, |
|
"logits/rejected": -2.83804988861084, |
|
"logps/chosen": -269.81329345703125, |
|
"logps/rejected": -268.55670166015625, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.000123085526865907, |
|
"rewards/margins": 6.434940587496385e-05, |
|
"rewards/rejected": -0.0001874349982244894, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.010468463752944255, |
|
"grad_norm": 1.1640625, |
|
"learning_rate": 5.221932114882506e-08, |
|
"logits/chosen": -2.8312931060791016, |
|
"logits/rejected": -2.821013927459717, |
|
"logps/chosen": -233.34909057617188, |
|
"logps/rejected": -238.37490844726562, |
|
"loss": 0.6934, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.0005193200195208192, |
|
"rewards/margins": -0.0004341518506407738, |
|
"rewards/rejected": -8.516813250025734e-05, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.01308557969118032, |
|
"grad_norm": 1.15625, |
|
"learning_rate": 6.527415143603133e-08, |
|
"logits/chosen": -2.866091251373291, |
|
"logits/rejected": -2.85339093208313, |
|
"logps/chosen": -290.05963134765625, |
|
"logps/rejected": -253.92349243164062, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": 0.00046620480134151876, |
|
"rewards/margins": 0.0002595257537905127, |
|
"rewards/rejected": 0.0002066790621029213, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.015702695629416383, |
|
"grad_norm": 1.25, |
|
"learning_rate": 7.83289817232376e-08, |
|
"logits/chosen": -2.825549364089966, |
|
"logits/rejected": -2.8121423721313477, |
|
"logps/chosen": -273.64691162109375, |
|
"logps/rejected": -246.85317993164062, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": 0.00024823236162774265, |
|
"rewards/margins": 0.00039805466076359153, |
|
"rewards/rejected": -0.0001498223573435098, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.018319811567652448, |
|
"grad_norm": 1.234375, |
|
"learning_rate": 9.138381201044386e-08, |
|
"logits/chosen": -2.8805994987487793, |
|
"logits/rejected": -2.8450770378112793, |
|
"logps/chosen": -293.1197814941406, |
|
"logps/rejected": -266.08135986328125, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": 0.0004158564261160791, |
|
"rewards/margins": 7.372137770289555e-05, |
|
"rewards/rejected": 0.0003421350847929716, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.02093692750588851, |
|
"grad_norm": 1.6328125, |
|
"learning_rate": 1.0443864229765012e-07, |
|
"logits/chosen": -2.820730209350586, |
|
"logits/rejected": -2.7984094619750977, |
|
"logps/chosen": -279.29498291015625, |
|
"logps/rejected": -266.357666015625, |
|
"loss": 0.6933, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": 9.654175664763898e-05, |
|
"rewards/margins": -0.00033630471443757415, |
|
"rewards/rejected": 0.0004328465147409588, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.023554043444124574, |
|
"grad_norm": 1.28125, |
|
"learning_rate": 1.174934725848564e-07, |
|
"logits/chosen": -2.8342747688293457, |
|
"logits/rejected": -2.8211700916290283, |
|
"logps/chosen": -270.66888427734375, |
|
"logps/rejected": -251.8229522705078, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": 0.0002475693472661078, |
|
"rewards/margins": 0.0005019751843065023, |
|
"rewards/rejected": -0.000254405866144225, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.02617115938236064, |
|
"grad_norm": 1.296875, |
|
"learning_rate": 1.3054830287206266e-07, |
|
"logits/chosen": -2.849017381668091, |
|
"logits/rejected": -2.842028856277466, |
|
"logps/chosen": -267.05035400390625, |
|
"logps/rejected": -248.63992309570312, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 0.00027873244835063815, |
|
"rewards/margins": 0.0004233802610542625, |
|
"rewards/rejected": -0.00014464779815170914, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.02617115938236064, |
|
"eval_logits/chosen": -2.865492343902588, |
|
"eval_logits/rejected": -2.838137626647949, |
|
"eval_logps/chosen": -282.7629699707031, |
|
"eval_logps/rejected": -261.4512023925781, |
|
"eval_loss": 0.6930338740348816, |
|
"eval_rewards/accuracies": 0.5134999752044678, |
|
"eval_rewards/chosen": 0.00010537073103478178, |
|
"eval_rewards/margins": 0.00023393578885588795, |
|
"eval_rewards/rejected": -0.00012856510875280946, |
|
"eval_runtime": 623.7252, |
|
"eval_samples_per_second": 3.207, |
|
"eval_steps_per_second": 0.401, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.028788275320596704, |
|
"grad_norm": 1.4375, |
|
"learning_rate": 1.4360313315926893e-07, |
|
"logits/chosen": -2.855942964553833, |
|
"logits/rejected": -2.822741985321045, |
|
"logps/chosen": -307.44110107421875, |
|
"logps/rejected": -257.2309265136719, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": 0.00018880394054576755, |
|
"rewards/margins": 0.0004303649184294045, |
|
"rewards/rejected": -0.0002415610069874674, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.031405391258832765, |
|
"grad_norm": 1.21875, |
|
"learning_rate": 1.566579634464752e-07, |
|
"logits/chosen": -2.86763334274292, |
|
"logits/rejected": -2.844106435775757, |
|
"logps/chosen": -310.5987854003906, |
|
"logps/rejected": -287.745361328125, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 0.00025308955810032785, |
|
"rewards/margins": 0.00023145000159274787, |
|
"rewards/rejected": 2.1639541955664754e-05, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.03402250719706883, |
|
"grad_norm": 1.4921875, |
|
"learning_rate": 1.6971279373368143e-07, |
|
"logits/chosen": -2.847980499267578, |
|
"logits/rejected": -2.8163723945617676, |
|
"logps/chosen": -271.6886291503906, |
|
"logps/rejected": -269.58660888671875, |
|
"loss": 0.6928, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 1.363331080028729e-06, |
|
"rewards/margins": 0.000652765913400799, |
|
"rewards/rejected": -0.0006514025735668838, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.036639623135304895, |
|
"grad_norm": 1.3125, |
|
"learning_rate": 1.8276762402088773e-07, |
|
"logits/chosen": -2.8673386573791504, |
|
"logits/rejected": -2.8119819164276123, |
|
"logps/chosen": -291.5235290527344, |
|
"logps/rejected": -247.7689971923828, |
|
"loss": 0.6928, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.0003679326910059899, |
|
"rewards/margins": 0.000704582198522985, |
|
"rewards/rejected": -0.00033664953662082553, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.03925673907354096, |
|
"grad_norm": 1.3828125, |
|
"learning_rate": 1.95822454308094e-07, |
|
"logits/chosen": -2.8565478324890137, |
|
"logits/rejected": -2.8365466594696045, |
|
"logps/chosen": -299.02996826171875, |
|
"logps/rejected": -255.97604370117188, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.0001471690193284303, |
|
"rewards/margins": 0.0002785170800052583, |
|
"rewards/rejected": -0.00013134813343640417, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.04187385501177702, |
|
"grad_norm": 1.359375, |
|
"learning_rate": 2.0887728459530023e-07, |
|
"logits/chosen": -2.8643641471862793, |
|
"logits/rejected": -2.8453617095947266, |
|
"logps/chosen": -275.17669677734375, |
|
"logps/rejected": -274.9828186035156, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -8.313215948874131e-05, |
|
"rewards/margins": 0.0004607086593750864, |
|
"rewards/rejected": -0.0005438407533802092, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.04449097095001309, |
|
"grad_norm": 1.546875, |
|
"learning_rate": 2.2193211488250652e-07, |
|
"logits/chosen": -2.8222973346710205, |
|
"logits/rejected": -2.803818941116333, |
|
"logps/chosen": -236.69189453125, |
|
"logps/rejected": -238.2162628173828, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 0.00023162660363595933, |
|
"rewards/margins": 0.00025372960953973234, |
|
"rewards/rejected": -2.2102987713878974e-05, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.04710808688824915, |
|
"grad_norm": 1.125, |
|
"learning_rate": 2.349869451697128e-07, |
|
"logits/chosen": -2.850526809692383, |
|
"logits/rejected": -2.8234286308288574, |
|
"logps/chosen": -276.2384338378906, |
|
"logps/rejected": -259.85089111328125, |
|
"loss": 0.6928, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": 0.0005887501174584031, |
|
"rewards/margins": 0.000796462525613606, |
|
"rewards/rejected": -0.000207712480914779, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.04972520282648522, |
|
"grad_norm": 2.234375, |
|
"learning_rate": 2.4804177545691903e-07, |
|
"logits/chosen": -2.887956380844116, |
|
"logits/rejected": -2.8700356483459473, |
|
"logps/chosen": -291.0037841796875, |
|
"logps/rejected": -257.3691711425781, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": 8.575538959121332e-05, |
|
"rewards/margins": 0.00044340407475829124, |
|
"rewards/rejected": -0.00035764873609878123, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.05234231876472128, |
|
"grad_norm": 1.3125, |
|
"learning_rate": 2.610966057441253e-07, |
|
"logits/chosen": -2.838761806488037, |
|
"logits/rejected": -2.828749179840088, |
|
"logps/chosen": -268.03924560546875, |
|
"logps/rejected": -225.5205078125, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -7.57282687118277e-05, |
|
"rewards/margins": 0.0003965885262005031, |
|
"rewards/rejected": -0.0004723168385680765, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.05234231876472128, |
|
"eval_logits/chosen": -2.8625597953796387, |
|
"eval_logits/rejected": -2.8349130153656006, |
|
"eval_logps/chosen": -282.7611389160156, |
|
"eval_logps/rejected": -261.49249267578125, |
|
"eval_loss": 0.6928190588951111, |
|
"eval_rewards/accuracies": 0.546999990940094, |
|
"eval_rewards/chosen": 0.00012387627793941647, |
|
"eval_rewards/margins": 0.0006650119903497398, |
|
"eval_rewards/rejected": -0.0005411357851698995, |
|
"eval_runtime": 622.9697, |
|
"eval_samples_per_second": 3.21, |
|
"eval_steps_per_second": 0.401, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.05495943470295734, |
|
"grad_norm": 1.25, |
|
"learning_rate": 2.7415143603133156e-07, |
|
"logits/chosen": -2.875335931777954, |
|
"logits/rejected": -2.841496229171753, |
|
"logps/chosen": -276.1015930175781, |
|
"logps/rejected": -245.19223022460938, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.0002998802810907364, |
|
"rewards/margins": 5.954801963525824e-05, |
|
"rewards/rejected": -0.00035942820250056684, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.05757655064119341, |
|
"grad_norm": 1.1953125, |
|
"learning_rate": 2.8720626631853785e-07, |
|
"logits/chosen": -2.8162028789520264, |
|
"logits/rejected": -2.810290575027466, |
|
"logps/chosen": -274.1748962402344, |
|
"logps/rejected": -242.88381958007812, |
|
"loss": 0.6926, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.00047311707749031484, |
|
"rewards/margins": 0.0010585600975900888, |
|
"rewards/rejected": -0.0005854429909959435, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.06019366657942947, |
|
"grad_norm": 1.4140625, |
|
"learning_rate": 3.002610966057441e-07, |
|
"logits/chosen": -2.886976957321167, |
|
"logits/rejected": -2.862199544906616, |
|
"logps/chosen": -322.8957824707031, |
|
"logps/rejected": -285.7581787109375, |
|
"loss": 0.6925, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": 0.0009428686462342739, |
|
"rewards/margins": 0.0013226759620010853, |
|
"rewards/rejected": -0.0003798073739744723, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.06281078251766553, |
|
"grad_norm": 1.2890625, |
|
"learning_rate": 3.133159268929504e-07, |
|
"logits/chosen": -2.8522121906280518, |
|
"logits/rejected": -2.838016986846924, |
|
"logps/chosen": -312.5648498535156, |
|
"logps/rejected": -297.47650146484375, |
|
"loss": 0.6924, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.0015294912736862898, |
|
"rewards/margins": 0.0015876994002610445, |
|
"rewards/rejected": -5.820817386847921e-05, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.06542789845590159, |
|
"grad_norm": 1.1640625, |
|
"learning_rate": 3.263707571801567e-07, |
|
"logits/chosen": -2.815152883529663, |
|
"logits/rejected": -2.8188998699188232, |
|
"logps/chosen": -277.23309326171875, |
|
"logps/rejected": -249.0277862548828, |
|
"loss": 0.6924, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": 0.0006144286599010229, |
|
"rewards/margins": 0.0015933450777083635, |
|
"rewards/rejected": -0.0009789163013920188, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.06804501439413765, |
|
"grad_norm": 1.1171875, |
|
"learning_rate": 3.3942558746736286e-07, |
|
"logits/chosen": -2.8725204467773438, |
|
"logits/rejected": -2.8254947662353516, |
|
"logps/chosen": -297.4732971191406, |
|
"logps/rejected": -277.87225341796875, |
|
"loss": 0.6927, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.000995874172076583, |
|
"rewards/margins": 0.0009019881254062057, |
|
"rewards/rejected": 9.38860684982501e-05, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.07066213033237373, |
|
"grad_norm": 0.99609375, |
|
"learning_rate": 3.5248041775456916e-07, |
|
"logits/chosen": -2.8370730876922607, |
|
"logits/rejected": -2.825009346008301, |
|
"logps/chosen": -281.54547119140625, |
|
"logps/rejected": -245.32528686523438, |
|
"loss": 0.692, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": 0.0012482403544709086, |
|
"rewards/margins": 0.002398666925728321, |
|
"rewards/rejected": -0.0011504264548420906, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.07327924627060979, |
|
"grad_norm": 1.203125, |
|
"learning_rate": 3.6553524804177545e-07, |
|
"logits/chosen": -2.8796634674072266, |
|
"logits/rejected": -2.836472272872925, |
|
"logps/chosen": -276.66485595703125, |
|
"logps/rejected": -253.39230346679688, |
|
"loss": 0.6922, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": 0.0009085072088055313, |
|
"rewards/margins": 0.0018395546358078718, |
|
"rewards/rejected": -0.0009310474852100015, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.07589636220884585, |
|
"grad_norm": 1.359375, |
|
"learning_rate": 3.785900783289817e-07, |
|
"logits/chosen": -2.8511414527893066, |
|
"logits/rejected": -2.840785264968872, |
|
"logps/chosen": -304.3522033691406, |
|
"logps/rejected": -279.17950439453125, |
|
"loss": 0.6922, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.0009422661969438195, |
|
"rewards/margins": 0.0019500417402014136, |
|
"rewards/rejected": -0.0010077755432575941, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.07851347814708191, |
|
"grad_norm": 1.4453125, |
|
"learning_rate": 3.91644908616188e-07, |
|
"logits/chosen": -2.8077988624572754, |
|
"logits/rejected": -2.763946294784546, |
|
"logps/chosen": -266.3786315917969, |
|
"logps/rejected": -248.56350708007812, |
|
"loss": 0.692, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.0008328545955009758, |
|
"rewards/margins": 0.0023509101010859013, |
|
"rewards/rejected": -0.0015180552145466208, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.07851347814708191, |
|
"eval_logits/chosen": -2.8650434017181396, |
|
"eval_logits/rejected": -2.8377885818481445, |
|
"eval_logps/chosen": -282.674560546875, |
|
"eval_logps/rejected": -261.546142578125, |
|
"eval_loss": 0.692122220993042, |
|
"eval_rewards/accuracies": 0.6050000190734863, |
|
"eval_rewards/chosen": 0.000989287393167615, |
|
"eval_rewards/margins": 0.0020671640522778034, |
|
"eval_rewards/rejected": -0.0010778764262795448, |
|
"eval_runtime": 622.8014, |
|
"eval_samples_per_second": 3.211, |
|
"eval_steps_per_second": 0.401, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.08113059408531798, |
|
"grad_norm": 1.4921875, |
|
"learning_rate": 4.046997389033943e-07, |
|
"logits/chosen": -2.895244598388672, |
|
"logits/rejected": -2.8767800331115723, |
|
"logps/chosen": -306.62994384765625, |
|
"logps/rejected": -250.0150909423828, |
|
"loss": 0.6919, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.0013422651682049036, |
|
"rewards/margins": 0.0025762903969734907, |
|
"rewards/rejected": -0.001234025345183909, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.08374771002355404, |
|
"grad_norm": 1.2265625, |
|
"learning_rate": 4.1775456919060046e-07, |
|
"logits/chosen": -2.8745856285095215, |
|
"logits/rejected": -2.8429815769195557, |
|
"logps/chosen": -273.4037170410156, |
|
"logps/rejected": -255.09585571289062, |
|
"loss": 0.6922, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.0011208092328161001, |
|
"rewards/margins": 0.001973007107153535, |
|
"rewards/rejected": -0.0008521980489604175, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.08636482596179011, |
|
"grad_norm": 1.234375, |
|
"learning_rate": 4.3080939947780675e-07, |
|
"logits/chosen": -2.8409087657928467, |
|
"logits/rejected": -2.8410866260528564, |
|
"logps/chosen": -277.77545166015625, |
|
"logps/rejected": -250.94821166992188, |
|
"loss": 0.6917, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.002120083197951317, |
|
"rewards/margins": 0.0029364789370447397, |
|
"rewards/rejected": -0.0008163956226781011, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.08898194190002617, |
|
"grad_norm": 1.3984375, |
|
"learning_rate": 4.4386422976501305e-07, |
|
"logits/chosen": -2.8706066608428955, |
|
"logits/rejected": -2.857938766479492, |
|
"logps/chosen": -307.44732666015625, |
|
"logps/rejected": -284.9738464355469, |
|
"loss": 0.6912, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.0030747547280043364, |
|
"rewards/margins": 0.00399099662899971, |
|
"rewards/rejected": -0.0009162420174106956, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.09159905783826224, |
|
"grad_norm": 1.234375, |
|
"learning_rate": 4.569190600522193e-07, |
|
"logits/chosen": -2.8302149772644043, |
|
"logits/rejected": -2.803089141845703, |
|
"logps/chosen": -309.71893310546875, |
|
"logps/rejected": -296.48101806640625, |
|
"loss": 0.6917, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.002736264606937766, |
|
"rewards/margins": 0.002880766289308667, |
|
"rewards/rejected": -0.00014450155140366405, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.0942161737764983, |
|
"grad_norm": 0.890625, |
|
"learning_rate": 4.699738903394256e-07, |
|
"logits/chosen": -2.8377342224121094, |
|
"logits/rejected": -2.8193764686584473, |
|
"logps/chosen": -256.7732238769531, |
|
"logps/rejected": -236.75698852539062, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": 0.0019551387522369623, |
|
"rewards/margins": 0.003464858280494809, |
|
"rewards/rejected": -0.0015097195282578468, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.09683328971473436, |
|
"grad_norm": 1.4609375, |
|
"learning_rate": 4.830287206266319e-07, |
|
"logits/chosen": -2.8519506454467773, |
|
"logits/rejected": -2.822915554046631, |
|
"logps/chosen": -295.97418212890625, |
|
"logps/rejected": -251.2534637451172, |
|
"loss": 0.6908, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.002289209049195051, |
|
"rewards/margins": 0.004647364374250174, |
|
"rewards/rejected": -0.002358155557885766, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.09945040565297043, |
|
"grad_norm": 1.3046875, |
|
"learning_rate": 4.960835509138381e-07, |
|
"logits/chosen": -2.8553032875061035, |
|
"logits/rejected": -2.8058464527130127, |
|
"logps/chosen": -316.52178955078125, |
|
"logps/rejected": -279.59136962890625, |
|
"loss": 0.6912, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.0028482459019869566, |
|
"rewards/margins": 0.0039718905463814735, |
|
"rewards/rejected": -0.0011236447608098388, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.1020675215912065, |
|
"grad_norm": 1.6328125, |
|
"learning_rate": 4.999948856244767e-07, |
|
"logits/chosen": -2.8345859050750732, |
|
"logits/rejected": -2.829207420349121, |
|
"logps/chosen": -298.51348876953125, |
|
"logps/rejected": -278.06976318359375, |
|
"loss": 0.6904, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": 0.004357966594398022, |
|
"rewards/margins": 0.00553758442401886, |
|
"rewards/rejected": -0.00117961794603616, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.10468463752944256, |
|
"grad_norm": 1.25, |
|
"learning_rate": 4.999698361256577e-07, |
|
"logits/chosen": -2.8570194244384766, |
|
"logits/rejected": -2.820826768875122, |
|
"logps/chosen": -280.4349670410156, |
|
"logps/rejected": -238.0439453125, |
|
"loss": 0.6913, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": 0.003575119422748685, |
|
"rewards/margins": 0.0037900402676314116, |
|
"rewards/rejected": -0.00021492131054401398, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.10468463752944256, |
|
"eval_logits/chosen": -2.862203598022461, |
|
"eval_logits/rejected": -2.8348591327667236, |
|
"eval_logps/chosen": -282.4126892089844, |
|
"eval_logps/rejected": -261.5210876464844, |
|
"eval_loss": 0.6909525394439697, |
|
"eval_rewards/accuracies": 0.6395000219345093, |
|
"eval_rewards/chosen": 0.003608107101172209, |
|
"eval_rewards/margins": 0.004435193259268999, |
|
"eval_rewards/rejected": -0.0008270857506431639, |
|
"eval_runtime": 623.9261, |
|
"eval_samples_per_second": 3.206, |
|
"eval_steps_per_second": 0.401, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.10730175346767862, |
|
"grad_norm": 1.34375, |
|
"learning_rate": 4.99923914217458e-07, |
|
"logits/chosen": -2.8254337310791016, |
|
"logits/rejected": -2.810080051422119, |
|
"logps/chosen": -257.35760498046875, |
|
"logps/rejected": -256.613525390625, |
|
"loss": 0.6921, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": 0.0025605126284062862, |
|
"rewards/margins": 0.0021402649581432343, |
|
"rewards/rejected": 0.000420247990405187, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.10991886940591468, |
|
"grad_norm": 2.890625, |
|
"learning_rate": 4.99857123734344e-07, |
|
"logits/chosen": -2.823087215423584, |
|
"logits/rejected": -2.776906967163086, |
|
"logps/chosen": -245.77804565429688, |
|
"logps/rejected": -238.0629119873047, |
|
"loss": 0.6908, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": 0.0032352313864976168, |
|
"rewards/margins": 0.004795724991708994, |
|
"rewards/rejected": -0.0015604936052113771, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.11253598534415074, |
|
"grad_norm": 1.5703125, |
|
"learning_rate": 4.997694702533016e-07, |
|
"logits/chosen": -2.8463032245635986, |
|
"logits/rejected": -2.815331220626831, |
|
"logps/chosen": -295.5052490234375, |
|
"logps/rejected": -272.6297912597656, |
|
"loss": 0.6903, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": 0.004903838969767094, |
|
"rewards/margins": 0.005789603106677532, |
|
"rewards/rejected": -0.0008857650682330132, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.11515310128238682, |
|
"grad_norm": 1.2578125, |
|
"learning_rate": 4.996609610933712e-07, |
|
"logits/chosen": -2.88322114944458, |
|
"logits/rejected": -2.861856460571289, |
|
"logps/chosen": -286.9052734375, |
|
"logps/rejected": -257.013427734375, |
|
"loss": 0.6898, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": 0.005403473507612944, |
|
"rewards/margins": 0.0067200749181210995, |
|
"rewards/rejected": -0.0013166010612621903, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.11777021722062288, |
|
"grad_norm": 1.21875, |
|
"learning_rate": 4.995316053150366e-07, |
|
"logits/chosen": -2.814988374710083, |
|
"logits/rejected": -2.8178412914276123, |
|
"logps/chosen": -290.4037170410156, |
|
"logps/rejected": -260.14459228515625, |
|
"loss": 0.6903, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.006219993345439434, |
|
"rewards/margins": 0.005712195299565792, |
|
"rewards/rejected": 0.0005077989189885557, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.12038733315885894, |
|
"grad_norm": 2.140625, |
|
"learning_rate": 4.99381413719468e-07, |
|
"logits/chosen": -2.8341031074523926, |
|
"logits/rejected": -2.8203091621398926, |
|
"logps/chosen": -282.1357421875, |
|
"logps/rejected": -269.10565185546875, |
|
"loss": 0.6887, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.006647266447544098, |
|
"rewards/margins": 0.00890478678047657, |
|
"rewards/rejected": -0.0022575196344405413, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.123004449097095, |
|
"grad_norm": 1.3046875, |
|
"learning_rate": 4.992103988476205e-07, |
|
"logits/chosen": -2.846776247024536, |
|
"logits/rejected": -2.8195786476135254, |
|
"logps/chosen": -259.63775634765625, |
|
"logps/rejected": -245.67117309570312, |
|
"loss": 0.6905, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": 0.005352762993425131, |
|
"rewards/margins": 0.0052968584932386875, |
|
"rewards/rejected": 5.59051513846498e-05, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.12562156503533106, |
|
"grad_norm": 1.359375, |
|
"learning_rate": 4.990185749791864e-07, |
|
"logits/chosen": -2.8792474269866943, |
|
"logits/rejected": -2.8467297554016113, |
|
"logps/chosen": -273.77880859375, |
|
"logps/rejected": -274.2110595703125, |
|
"loss": 0.6895, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.005960241891443729, |
|
"rewards/margins": 0.007407790515571833, |
|
"rewards/rejected": -0.001447548856958747, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.12823868097356714, |
|
"grad_norm": 1.3828125, |
|
"learning_rate": 4.988059581314039e-07, |
|
"logits/chosen": -2.858649730682373, |
|
"logits/rejected": -2.8390355110168457, |
|
"logps/chosen": -307.80267333984375, |
|
"logps/rejected": -269.5003662109375, |
|
"loss": 0.6891, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.007140771951526403, |
|
"rewards/margins": 0.008223852142691612, |
|
"rewards/rejected": -0.0010830799583345652, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.13085579691180318, |
|
"grad_norm": 1.34375, |
|
"learning_rate": 4.985725660577184e-07, |
|
"logits/chosen": -2.8739092350006104, |
|
"logits/rejected": -2.8554844856262207, |
|
"logps/chosen": -290.1658935546875, |
|
"logps/rejected": -249.3054656982422, |
|
"loss": 0.689, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.005505991168320179, |
|
"rewards/margins": 0.008494934067130089, |
|
"rewards/rejected": -0.0029889424331486225, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.13085579691180318, |
|
"eval_logits/chosen": -2.8655941486358643, |
|
"eval_logits/rejected": -2.838855028152466, |
|
"eval_logps/chosen": -282.2831115722656, |
|
"eval_logps/rejected": -261.68048095703125, |
|
"eval_loss": 0.6895392537117004, |
|
"eval_rewards/accuracies": 0.6700000166893005, |
|
"eval_rewards/chosen": 0.004903781693428755, |
|
"eval_rewards/margins": 0.007324740756303072, |
|
"eval_rewards/rejected": -0.00242095859721303, |
|
"eval_runtime": 622.8706, |
|
"eval_samples_per_second": 3.211, |
|
"eval_steps_per_second": 0.401, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.13347291285003926, |
|
"grad_norm": 1.546875, |
|
"learning_rate": 4.983184182463008e-07, |
|
"logits/chosen": -2.8507940769195557, |
|
"logits/rejected": -2.828244686126709, |
|
"logps/chosen": -294.1042785644531, |
|
"logps/rejected": -255.8568572998047, |
|
"loss": 0.6886, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.0062326351180672646, |
|
"rewards/margins": 0.009330684319138527, |
|
"rewards/rejected": -0.003098049433901906, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.1360900287882753, |
|
"grad_norm": 1.546875, |
|
"learning_rate": 4.980435359184203e-07, |
|
"logits/chosen": -2.8747315406799316, |
|
"logits/rejected": -2.8765642642974854, |
|
"logps/chosen": -287.198486328125, |
|
"logps/rejected": -270.84130859375, |
|
"loss": 0.6888, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": 0.006150488276034594, |
|
"rewards/margins": 0.008788048289716244, |
|
"rewards/rejected": -0.002637560246512294, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.13870714472651138, |
|
"grad_norm": 1.6015625, |
|
"learning_rate": 4.977479420266723e-07, |
|
"logits/chosen": -2.8206260204315186, |
|
"logits/rejected": -2.8258962631225586, |
|
"logps/chosen": -280.0619201660156, |
|
"logps/rejected": -288.264892578125, |
|
"loss": 0.6891, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.005794334691017866, |
|
"rewards/margins": 0.008307529613375664, |
|
"rewards/rejected": -0.0025131958536803722, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.14132426066474746, |
|
"grad_norm": 1.1953125, |
|
"learning_rate": 4.974316612530614e-07, |
|
"logits/chosen": -2.813945770263672, |
|
"logits/rejected": -2.796184778213501, |
|
"logps/chosen": -298.8610534667969, |
|
"logps/rejected": -258.8526611328125, |
|
"loss": 0.6865, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": 0.0080220652744174, |
|
"rewards/margins": 0.01355043239891529, |
|
"rewards/rejected": -0.00552836898714304, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.1439413766029835, |
|
"grad_norm": 1.3359375, |
|
"learning_rate": 4.970947200069415e-07, |
|
"logits/chosen": -2.829272747039795, |
|
"logits/rejected": -2.816063404083252, |
|
"logps/chosen": -298.9976806640625, |
|
"logps/rejected": -276.99444580078125, |
|
"loss": 0.6894, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.005816199816763401, |
|
"rewards/margins": 0.007670801132917404, |
|
"rewards/rejected": -0.0018546013161540031, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.14655849254121958, |
|
"grad_norm": 1.1875, |
|
"learning_rate": 4.967371464228095e-07, |
|
"logits/chosen": -2.890547513961792, |
|
"logits/rejected": -2.869276762008667, |
|
"logps/chosen": -271.36053466796875, |
|
"logps/rejected": -272.12469482421875, |
|
"loss": 0.689, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": 0.005742850713431835, |
|
"rewards/margins": 0.00846049003303051, |
|
"rewards/rejected": -0.0027176393195986748, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.14917560847945563, |
|
"grad_norm": 1.3984375, |
|
"learning_rate": 4.963589703579569e-07, |
|
"logits/chosen": -2.9156060218811035, |
|
"logits/rejected": -2.888892412185669, |
|
"logps/chosen": -315.22613525390625, |
|
"logps/rejected": -279.62847900390625, |
|
"loss": 0.6881, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": 0.007057487033307552, |
|
"rewards/margins": 0.010187765583395958, |
|
"rewards/rejected": -0.0031302771531045437, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.1517927244176917, |
|
"grad_norm": 1.15625, |
|
"learning_rate": 4.959602233899761e-07, |
|
"logits/chosen": -2.9088664054870605, |
|
"logits/rejected": -2.8700101375579834, |
|
"logps/chosen": -314.38787841796875, |
|
"logps/rejected": -272.05511474609375, |
|
"loss": 0.6876, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.009098478592932224, |
|
"rewards/margins": 0.011330665089190006, |
|
"rewards/rejected": -0.002232185797765851, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.15440984035592778, |
|
"grad_norm": 1.4609375, |
|
"learning_rate": 4.955409388141243e-07, |
|
"logits/chosen": -2.843714475631714, |
|
"logits/rejected": -2.8304061889648438, |
|
"logps/chosen": -275.0677490234375, |
|
"logps/rejected": -249.8322296142578, |
|
"loss": 0.6884, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.005214322358369827, |
|
"rewards/margins": 0.009766822680830956, |
|
"rewards/rejected": -0.004552501253783703, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.15702695629416383, |
|
"grad_norm": 1.2109375, |
|
"learning_rate": 4.951011516405429e-07, |
|
"logits/chosen": -2.858010768890381, |
|
"logits/rejected": -2.856701374053955, |
|
"logps/chosen": -266.87335205078125, |
|
"logps/rejected": -251.0322265625, |
|
"loss": 0.6875, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.007665629498660564, |
|
"rewards/margins": 0.011526472866535187, |
|
"rewards/rejected": -0.0038608419708907604, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.15702695629416383, |
|
"eval_logits/chosen": -2.860320568084717, |
|
"eval_logits/rejected": -2.8332278728485107, |
|
"eval_logps/chosen": -282.1841125488281, |
|
"eval_logps/rejected": -261.906005859375, |
|
"eval_loss": 0.6879660487174988, |
|
"eval_rewards/accuracies": 0.6690000295639038, |
|
"eval_rewards/chosen": 0.005893694702535868, |
|
"eval_rewards/margins": 0.010570226237177849, |
|
"eval_rewards/rejected": -0.004676531068980694, |
|
"eval_runtime": 622.7218, |
|
"eval_samples_per_second": 3.212, |
|
"eval_steps_per_second": 0.401, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.1596440722323999, |
|
"grad_norm": 1.421875, |
|
"learning_rate": 4.946408985913344e-07, |
|
"logits/chosen": -2.852583169937134, |
|
"logits/rejected": -2.8311781883239746, |
|
"logps/chosen": -263.86956787109375, |
|
"logps/rejected": -244.29763793945312, |
|
"loss": 0.6885, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.007280237972736359, |
|
"rewards/margins": 0.009582052007317543, |
|
"rewards/rejected": -0.002301814965903759, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.16226118817063595, |
|
"grad_norm": 1.40625, |
|
"learning_rate": 4.941602180974958e-07, |
|
"logits/chosen": -2.8539230823516846, |
|
"logits/rejected": -2.8148884773254395, |
|
"logps/chosen": -304.7937316894531, |
|
"logps/rejected": -242.7307891845703, |
|
"loss": 0.6874, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.007980446331202984, |
|
"rewards/margins": 0.011698475107550621, |
|
"rewards/rejected": -0.003718029009178281, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.16487830410887203, |
|
"grad_norm": 1.296875, |
|
"learning_rate": 4.936591502957101e-07, |
|
"logits/chosen": -2.857060194015503, |
|
"logits/rejected": -2.83305025100708, |
|
"logps/chosen": -262.7440490722656, |
|
"logps/rejected": -254.7294464111328, |
|
"loss": 0.6863, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": 0.00918588787317276, |
|
"rewards/margins": 0.014022831805050373, |
|
"rewards/rejected": -0.004836943931877613, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.16749542004710807, |
|
"grad_norm": 1.2734375, |
|
"learning_rate": 4.931377370249945e-07, |
|
"logits/chosen": -2.8656134605407715, |
|
"logits/rejected": -2.8077850341796875, |
|
"logps/chosen": -280.6070251464844, |
|
"logps/rejected": -258.4964599609375, |
|
"loss": 0.6866, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.005175912287086248, |
|
"rewards/margins": 0.013334142975509167, |
|
"rewards/rejected": -0.008158231154084206, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.17011253598534415, |
|
"grad_norm": 1.3125, |
|
"learning_rate": 4.925960218232072e-07, |
|
"logits/chosen": -2.84588885307312, |
|
"logits/rejected": -2.82362699508667, |
|
"logps/chosen": -269.46539306640625, |
|
"logps/rejected": -259.7959899902344, |
|
"loss": 0.6864, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.006764856167137623, |
|
"rewards/margins": 0.013819174841046333, |
|
"rewards/rejected": -0.007054319139569998, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.17272965192358022, |
|
"grad_norm": 2.046875, |
|
"learning_rate": 4.920340499234116e-07, |
|
"logits/chosen": -2.81691312789917, |
|
"logits/rejected": -2.7776267528533936, |
|
"logps/chosen": -285.55377197265625, |
|
"logps/rejected": -248.0377960205078, |
|
"loss": 0.6868, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.008050017058849335, |
|
"rewards/margins": 0.012922885827720165, |
|
"rewards/rejected": -0.0048728687688708305, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.17534676786181627, |
|
"grad_norm": 1.1875, |
|
"learning_rate": 4.914518682500995e-07, |
|
"logits/chosen": -2.8940651416778564, |
|
"logits/rejected": -2.864454507827759, |
|
"logps/chosen": -299.08636474609375, |
|
"logps/rejected": -257.27215576171875, |
|
"loss": 0.6848, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.011382282711565495, |
|
"rewards/margins": 0.017072781920433044, |
|
"rewards/rejected": -0.005690500605851412, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.17796388380005235, |
|
"grad_norm": 1.8515625, |
|
"learning_rate": 4.90849525415273e-07, |
|
"logits/chosen": -2.8536484241485596, |
|
"logits/rejected": -2.832000255584717, |
|
"logps/chosen": -289.3675231933594, |
|
"logps/rejected": -240.21029663085938, |
|
"loss": 0.6854, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.010842313058674335, |
|
"rewards/margins": 0.01590149477124214, |
|
"rewards/rejected": -0.005059181712567806, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.1805809997382884, |
|
"grad_norm": 1.3515625, |
|
"learning_rate": 4.902270717143858e-07, |
|
"logits/chosen": -2.862431049346924, |
|
"logits/rejected": -2.8454391956329346, |
|
"logps/chosen": -255.1441650390625, |
|
"logps/rejected": -264.6862487792969, |
|
"loss": 0.685, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": 0.008327770046889782, |
|
"rewards/margins": 0.016681838780641556, |
|
"rewards/rejected": -0.0083540678024292, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.18319811567652447, |
|
"grad_norm": 1.65625, |
|
"learning_rate": 4.895845591221426e-07, |
|
"logits/chosen": -2.85901141166687, |
|
"logits/rejected": -2.8616912364959717, |
|
"logps/chosen": -268.5135803222656, |
|
"logps/rejected": -264.209716796875, |
|
"loss": 0.6874, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.00641227001324296, |
|
"rewards/margins": 0.011734376661479473, |
|
"rewards/rejected": -0.005322106648236513, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.18319811567652447, |
|
"eval_logits/chosen": -2.861030340194702, |
|
"eval_logits/rejected": -2.8341639041900635, |
|
"eval_logps/chosen": -281.93695068359375, |
|
"eval_logps/rejected": -261.9841613769531, |
|
"eval_loss": 0.686406135559082, |
|
"eval_rewards/accuracies": 0.6784999966621399, |
|
"eval_rewards/chosen": 0.008365440182387829, |
|
"eval_rewards/margins": 0.013823293149471283, |
|
"eval_rewards/rejected": -0.005457851104438305, |
|
"eval_runtime": 622.4947, |
|
"eval_samples_per_second": 3.213, |
|
"eval_steps_per_second": 0.402, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.18581523161476055, |
|
"grad_norm": 1.4921875, |
|
"learning_rate": 4.8892204128816e-07, |
|
"logits/chosen": -2.8912875652313232, |
|
"logits/rejected": -2.8667664527893066, |
|
"logps/chosen": -280.8445739746094, |
|
"logps/rejected": -267.3077392578125, |
|
"loss": 0.6874, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": 0.006677532102912664, |
|
"rewards/margins": 0.011775776743888855, |
|
"rewards/rejected": -0.005098243709653616, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.1884323475529966, |
|
"grad_norm": 1.1953125, |
|
"learning_rate": 4.882395735324863e-07, |
|
"logits/chosen": -2.8655221462249756, |
|
"logits/rejected": -2.8227005004882812, |
|
"logps/chosen": -280.4420166015625, |
|
"logps/rejected": -267.8427429199219, |
|
"loss": 0.6841, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": 0.00945108663290739, |
|
"rewards/margins": 0.01858513429760933, |
|
"rewards/rejected": -0.009134046733379364, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.19104946349123267, |
|
"grad_norm": 1.2578125, |
|
"learning_rate": 4.875372128409829e-07, |
|
"logits/chosen": -2.8432908058166504, |
|
"logits/rejected": -2.813136577606201, |
|
"logps/chosen": -282.8307800292969, |
|
"logps/rejected": -251.0844268798828, |
|
"loss": 0.6851, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": 0.009715097956359386, |
|
"rewards/margins": 0.01650545559823513, |
|
"rewards/rejected": -0.006790356244891882, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.19366657942946872, |
|
"grad_norm": 1.0390625, |
|
"learning_rate": 4.868150178605653e-07, |
|
"logits/chosen": -2.8426120281219482, |
|
"logits/rejected": -2.8164243698120117, |
|
"logps/chosen": -242.0150146484375, |
|
"logps/rejected": -210.2700653076172, |
|
"loss": 0.6844, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": 0.0059540546499192715, |
|
"rewards/margins": 0.017786705866456032, |
|
"rewards/rejected": -0.011832650750875473, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.1962836953677048, |
|
"grad_norm": 1.5078125, |
|
"learning_rate": 4.860730488943068e-07, |
|
"logits/chosen": -2.8057663440704346, |
|
"logits/rejected": -2.794985294342041, |
|
"logps/chosen": -250.8291778564453, |
|
"logps/rejected": -247.78134155273438, |
|
"loss": 0.6854, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.010436545126140118, |
|
"rewards/margins": 0.0157928504049778, |
|
"rewards/rejected": -0.005356303416192532, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.19890081130594087, |
|
"grad_norm": 1.578125, |
|
"learning_rate": 4.853113678964021e-07, |
|
"logits/chosen": -2.8220317363739014, |
|
"logits/rejected": -2.8117756843566895, |
|
"logps/chosen": -293.8874206542969, |
|
"logps/rejected": -279.42962646484375, |
|
"loss": 0.6839, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": 0.012539887800812721, |
|
"rewards/margins": 0.019003767520189285, |
|
"rewards/rejected": -0.006463879253715277, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.20151792724417691, |
|
"grad_norm": 1.1484375, |
|
"learning_rate": 4.845300384669957e-07, |
|
"logits/chosen": -2.839818239212036, |
|
"logits/rejected": -2.8078839778900146, |
|
"logps/chosen": -269.07232666015625, |
|
"logps/rejected": -247.03567504882812, |
|
"loss": 0.6861, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.009587548673152924, |
|
"rewards/margins": 0.014613436535000801, |
|
"rewards/rejected": -0.005025886930525303, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.204135043182413, |
|
"grad_norm": 1.3671875, |
|
"learning_rate": 4.8372912584687e-07, |
|
"logits/chosen": -2.8615410327911377, |
|
"logits/rejected": -2.827815055847168, |
|
"logps/chosen": -300.0118103027344, |
|
"logps/rejected": -276.13140869140625, |
|
"loss": 0.6856, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": 0.01048839557915926, |
|
"rewards/margins": 0.015544673427939415, |
|
"rewards/rejected": -0.005056279711425304, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.20675215912064904, |
|
"grad_norm": 1.9140625, |
|
"learning_rate": 4.829086969119983e-07, |
|
"logits/chosen": -2.827129602432251, |
|
"logits/rejected": -2.8344006538391113, |
|
"logps/chosen": -273.4031066894531, |
|
"logps/rejected": -268.6723937988281, |
|
"loss": 0.6875, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.0065932185389101505, |
|
"rewards/margins": 0.011614800430834293, |
|
"rewards/rejected": -0.00502158235758543, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.2093692750588851, |
|
"grad_norm": 1.265625, |
|
"learning_rate": 4.820688201679605e-07, |
|
"logits/chosen": -2.8842103481292725, |
|
"logits/rejected": -2.836937189102173, |
|
"logps/chosen": -276.0343933105469, |
|
"logps/rejected": -212.7479705810547, |
|
"loss": 0.682, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.015654196962714195, |
|
"rewards/margins": 0.022758139297366142, |
|
"rewards/rejected": -0.007103943265974522, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.2093692750588851, |
|
"eval_logits/chosen": -2.857757568359375, |
|
"eval_logits/rejected": -2.8307132720947266, |
|
"eval_logps/chosen": -281.70330810546875, |
|
"eval_logps/rejected": -262.04193115234375, |
|
"eval_loss": 0.6850252151489258, |
|
"eval_rewards/accuracies": 0.6800000071525574, |
|
"eval_rewards/chosen": 0.010701690800487995, |
|
"eval_rewards/margins": 0.016737323254346848, |
|
"eval_rewards/rejected": -0.006035633385181427, |
|
"eval_runtime": 622.1863, |
|
"eval_samples_per_second": 3.214, |
|
"eval_steps_per_second": 0.402, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.21198639099712116, |
|
"grad_norm": 1.2734375, |
|
"learning_rate": 4.812095657442231e-07, |
|
"logits/chosen": -2.8657875061035156, |
|
"logits/rejected": -2.8741297721862793, |
|
"logps/chosen": -288.73614501953125, |
|
"logps/rejected": -281.89031982421875, |
|
"loss": 0.6884, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.008292586542665958, |
|
"rewards/margins": 0.010020687244832516, |
|
"rewards/rejected": -0.001728100934997201, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.21460350693535724, |
|
"grad_norm": 1.34375, |
|
"learning_rate": 4.803310053882831e-07, |
|
"logits/chosen": -2.851111650466919, |
|
"logits/rejected": -2.863678455352783, |
|
"logps/chosen": -248.24319458007812, |
|
"logps/rejected": -259.7985534667969, |
|
"loss": 0.6865, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": 0.009849630296230316, |
|
"rewards/margins": 0.013707734644412994, |
|
"rewards/rejected": -0.003858105046674609, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.2172206228735933, |
|
"grad_norm": 1.484375, |
|
"learning_rate": 4.794332124596775e-07, |
|
"logits/chosen": -2.881307363510132, |
|
"logits/rejected": -2.8686277866363525, |
|
"logps/chosen": -284.14605712890625, |
|
"logps/rejected": -279.83477783203125, |
|
"loss": 0.6849, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.014169754460453987, |
|
"rewards/margins": 0.017167720943689346, |
|
"rewards/rejected": -0.00299796718172729, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.21983773881182936, |
|
"grad_norm": 1.5859375, |
|
"learning_rate": 4.785162619238574e-07, |
|
"logits/chosen": -2.824626922607422, |
|
"logits/rejected": -2.7853519916534424, |
|
"logps/chosen": -269.13934326171875, |
|
"logps/rejected": -243.8739471435547, |
|
"loss": 0.6831, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": 0.011793789453804493, |
|
"rewards/margins": 0.020573901012539864, |
|
"rewards/rejected": -0.008780110627412796, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.22245485475006543, |
|
"grad_norm": 1.625, |
|
"learning_rate": 4.775802303459287e-07, |
|
"logits/chosen": -2.8298261165618896, |
|
"logits/rejected": -2.8166985511779785, |
|
"logps/chosen": -262.9281311035156, |
|
"logps/rejected": -260.26129150390625, |
|
"loss": 0.686, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.010038264095783234, |
|
"rewards/margins": 0.014955776743590832, |
|
"rewards/rejected": -0.004917514510452747, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.22507197068830148, |
|
"grad_norm": 1.8125, |
|
"learning_rate": 4.766251958842589e-07, |
|
"logits/chosen": -2.800764799118042, |
|
"logits/rejected": -2.7910823822021484, |
|
"logps/chosen": -290.7630615234375, |
|
"logps/rejected": -278.87103271484375, |
|
"loss": 0.6842, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.012506058439612389, |
|
"rewards/margins": 0.018407398834824562, |
|
"rewards/rejected": -0.005901341326534748, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.22768908662653756, |
|
"grad_norm": 1.21875, |
|
"learning_rate": 4.756512382839506e-07, |
|
"logits/chosen": -2.82393217086792, |
|
"logits/rejected": -2.7991511821746826, |
|
"logps/chosen": -269.1101989746094, |
|
"logps/rejected": -271.6678161621094, |
|
"loss": 0.6838, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": 0.01023150235414505, |
|
"rewards/margins": 0.01938125677406788, |
|
"rewards/rejected": -0.009149751625955105, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.23030620256477363, |
|
"grad_norm": 1.2109375, |
|
"learning_rate": 4.746584388701831e-07, |
|
"logits/chosen": -2.840731620788574, |
|
"logits/rejected": -2.840230941772461, |
|
"logps/chosen": -278.5453186035156, |
|
"logps/rejected": -264.83380126953125, |
|
"loss": 0.6835, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": 0.014049595221877098, |
|
"rewards/margins": 0.019881747663021088, |
|
"rewards/rejected": -0.005832154303789139, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.23292331850300968, |
|
"grad_norm": 1.75, |
|
"learning_rate": 4.736468805414218e-07, |
|
"logits/chosen": -2.811013698577881, |
|
"logits/rejected": -2.8108229637145996, |
|
"logps/chosen": -266.8439636230469, |
|
"logps/rejected": -278.64306640625, |
|
"loss": 0.6825, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.013364280574023724, |
|
"rewards/margins": 0.021861828863620758, |
|
"rewards/rejected": -0.008497546426951885, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.23554043444124576, |
|
"grad_norm": 1.3359375, |
|
"learning_rate": 4.7261664776249595e-07, |
|
"logits/chosen": -2.783407211303711, |
|
"logits/rejected": -2.7699193954467773, |
|
"logps/chosen": -245.31137084960938, |
|
"logps/rejected": -236.346923828125, |
|
"loss": 0.6837, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.008871756494045258, |
|
"rewards/margins": 0.01949758641421795, |
|
"rewards/rejected": -0.010625829920172691, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.23554043444124576, |
|
"eval_logits/chosen": -2.8573083877563477, |
|
"eval_logits/rejected": -2.8303797245025635, |
|
"eval_logps/chosen": -281.4179992675781, |
|
"eval_logps/rejected": -261.9797058105469, |
|
"eval_loss": 0.6839740872383118, |
|
"eval_rewards/accuracies": 0.6840000152587891, |
|
"eval_rewards/chosen": 0.013555029407143593, |
|
"eval_rewards/margins": 0.01896839775145054, |
|
"eval_rewards/rejected": -0.005413366016000509, |
|
"eval_runtime": 621.6447, |
|
"eval_samples_per_second": 3.217, |
|
"eval_steps_per_second": 0.402, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.2381575503794818, |
|
"grad_norm": 1.203125, |
|
"learning_rate": 4.7156782655754624e-07, |
|
"logits/chosen": -2.847557544708252, |
|
"logits/rejected": -2.807833194732666, |
|
"logps/chosen": -298.7544250488281, |
|
"logps/rejected": -243.92807006835938, |
|
"loss": 0.6823, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.01836327835917473, |
|
"rewards/margins": 0.0223236046731472, |
|
"rewards/rejected": -0.003960323985666037, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.24077466631771788, |
|
"grad_norm": 1.25, |
|
"learning_rate": 4.705005045028414e-07, |
|
"logits/chosen": -2.8043971061706543, |
|
"logits/rejected": -2.775317430496216, |
|
"logps/chosen": -279.80584716796875, |
|
"logps/rejected": -261.52227783203125, |
|
"loss": 0.6838, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.014174017123878002, |
|
"rewards/margins": 0.01934727467596531, |
|
"rewards/rejected": -0.005173257552087307, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.24339178225595393, |
|
"grad_norm": 1.3984375, |
|
"learning_rate": 4.694147707194659e-07, |
|
"logits/chosen": -2.871277332305908, |
|
"logits/rejected": -2.861692428588867, |
|
"logps/chosen": -286.360107421875, |
|
"logps/rejected": -268.2128601074219, |
|
"loss": 0.6821, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": 0.016537340357899666, |
|
"rewards/margins": 0.022989634424448013, |
|
"rewards/rejected": -0.00645229360088706, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.24600889819419, |
|
"grad_norm": 2.578125, |
|
"learning_rate": 4.683107158658781e-07, |
|
"logits/chosen": -2.818206548690796, |
|
"logits/rejected": -2.799956798553467, |
|
"logps/chosen": -306.58203125, |
|
"logps/rejected": -278.14752197265625, |
|
"loss": 0.678, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": 0.021548133343458176, |
|
"rewards/margins": 0.03120315447449684, |
|
"rewards/rejected": -0.009655019268393517, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.24862601413242608, |
|
"grad_norm": 1.4609375, |
|
"learning_rate": 4.6718843213034066e-07, |
|
"logits/chosen": -2.831376314163208, |
|
"logits/rejected": -2.815030097961426, |
|
"logps/chosen": -261.57659912109375, |
|
"logps/rejected": -250.5101318359375, |
|
"loss": 0.6823, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": 0.014541561715304852, |
|
"rewards/margins": 0.022364726290106773, |
|
"rewards/rejected": -0.007823166437447071, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.2512431300706621, |
|
"grad_norm": 1.3984375, |
|
"learning_rate": 4.660480132232224e-07, |
|
"logits/chosen": -2.8427586555480957, |
|
"logits/rejected": -2.8429322242736816, |
|
"logps/chosen": -285.56378173828125, |
|
"logps/rejected": -263.73260498046875, |
|
"loss": 0.6849, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.014430510811507702, |
|
"rewards/margins": 0.01709624193608761, |
|
"rewards/rejected": -0.002665730658918619, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.25386024600889817, |
|
"grad_norm": 1.1484375, |
|
"learning_rate": 4.64889554369174e-07, |
|
"logits/chosen": -2.8453879356384277, |
|
"logits/rejected": -2.8087105751037598, |
|
"logps/chosen": -296.9171142578125, |
|
"logps/rejected": -250.25, |
|
"loss": 0.6784, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": 0.022362882271409035, |
|
"rewards/margins": 0.030544385313987732, |
|
"rewards/rejected": -0.008181498385965824, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.2564773619471343, |
|
"grad_norm": 1.328125, |
|
"learning_rate": 4.637131522991764e-07, |
|
"logits/chosen": -2.8417975902557373, |
|
"logits/rejected": -2.8379454612731934, |
|
"logps/chosen": -304.47784423828125, |
|
"logps/rejected": -279.02911376953125, |
|
"loss": 0.6831, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.01869601011276245, |
|
"rewards/margins": 0.02095440961420536, |
|
"rewards/rejected": -0.002258400898426771, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.2590944778853703, |
|
"grad_norm": 1.171875, |
|
"learning_rate": 4.6251890524246375e-07, |
|
"logits/chosen": -2.8454818725585938, |
|
"logits/rejected": -2.8245933055877686, |
|
"logps/chosen": -253.8804931640625, |
|
"logps/rejected": -232.57418823242188, |
|
"loss": 0.6802, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.015828203409910202, |
|
"rewards/margins": 0.02681964635848999, |
|
"rewards/rejected": -0.010991444811224937, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.26171159382360637, |
|
"grad_norm": 1.21875, |
|
"learning_rate": 4.613069129183218e-07, |
|
"logits/chosen": -2.8802895545959473, |
|
"logits/rejected": -2.840637683868408, |
|
"logps/chosen": -319.56268310546875, |
|
"logps/rejected": -281.52264404296875, |
|
"loss": 0.6819, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.018990501761436462, |
|
"rewards/margins": 0.023325005546212196, |
|
"rewards/rejected": -0.004334500525146723, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.26171159382360637, |
|
"eval_logits/chosen": -2.853975534439087, |
|
"eval_logits/rejected": -2.8269295692443848, |
|
"eval_logps/chosen": -281.1678466796875, |
|
"eval_logps/rejected": -261.9830017089844, |
|
"eval_loss": 0.6827893257141113, |
|
"eval_rewards/accuracies": 0.6809999942779541, |
|
"eval_rewards/chosen": 0.016056543216109276, |
|
"eval_rewards/margins": 0.021502956748008728, |
|
"eval_rewards/rejected": -0.005446411669254303, |
|
"eval_runtime": 621.9705, |
|
"eval_samples_per_second": 3.216, |
|
"eval_steps_per_second": 0.402, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.2643287097618425, |
|
"grad_norm": 1.265625, |
|
"learning_rate": 4.6007727652776065e-07, |
|
"logits/chosen": -2.8141977787017822, |
|
"logits/rejected": -2.7999210357666016, |
|
"logps/chosen": -249.3959197998047, |
|
"logps/rejected": -245.47830200195312, |
|
"loss": 0.6824, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.016399413347244263, |
|
"rewards/margins": 0.02237236499786377, |
|
"rewards/rejected": -0.0059729525819420815, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.2669458257000785, |
|
"grad_norm": 1.578125, |
|
"learning_rate": 4.588300987450652e-07, |
|
"logits/chosen": -2.86116099357605, |
|
"logits/rejected": -2.8352913856506348, |
|
"logps/chosen": -268.90374755859375, |
|
"logps/rejected": -237.9334259033203, |
|
"loss": 0.682, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.016041552647948265, |
|
"rewards/margins": 0.02327124960720539, |
|
"rewards/rejected": -0.007229696027934551, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.26956294163831457, |
|
"grad_norm": 1.0859375, |
|
"learning_rate": 4.5756548370922134e-07, |
|
"logits/chosen": -2.8210678100585938, |
|
"logits/rejected": -2.8014864921569824, |
|
"logps/chosen": -254.616455078125, |
|
"logps/rejected": -245.8887939453125, |
|
"loss": 0.6857, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.01615295186638832, |
|
"rewards/margins": 0.015799041837453842, |
|
"rewards/rejected": 0.0003539065073709935, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.2721800575765506, |
|
"grad_norm": 1.1328125, |
|
"learning_rate": 4.5628353701522047e-07, |
|
"logits/chosen": -2.8566808700561523, |
|
"logits/rejected": -2.823983669281006, |
|
"logps/chosen": -317.4922180175781, |
|
"logps/rejected": -287.9608154296875, |
|
"loss": 0.6771, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.024755671620368958, |
|
"rewards/margins": 0.03346611559391022, |
|
"rewards/rejected": -0.00871044397354126, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.2747971735147867, |
|
"grad_norm": 1.6875, |
|
"learning_rate": 4.549843657052429e-07, |
|
"logits/chosen": -2.8731515407562256, |
|
"logits/rejected": -2.845651388168335, |
|
"logps/chosen": -282.2266540527344, |
|
"logps/rejected": -279.3399658203125, |
|
"loss": 0.677, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.02262326516211033, |
|
"rewards/margins": 0.03333864361047745, |
|
"rewards/rejected": -0.010715381242334843, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.27741428945302277, |
|
"grad_norm": 1.140625, |
|
"learning_rate": 4.5366807825971907e-07, |
|
"logits/chosen": -2.8223326206207275, |
|
"logits/rejected": -2.813047409057617, |
|
"logps/chosen": -252.23336791992188, |
|
"logps/rejected": -246.99490356445312, |
|
"loss": 0.6827, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": 0.015223952010273933, |
|
"rewards/margins": 0.02188166417181492, |
|
"rewards/rejected": -0.006657709833234549, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.2800314053912588, |
|
"grad_norm": 1.2734375, |
|
"learning_rate": 4.5233478458827176e-07, |
|
"logits/chosen": -2.856292247772217, |
|
"logits/rejected": -2.8288464546203613, |
|
"logps/chosen": -306.0169372558594, |
|
"logps/rejected": -254.22042846679688, |
|
"loss": 0.6782, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": 0.0232028029859066, |
|
"rewards/margins": 0.03090915083885193, |
|
"rewards/rejected": -0.007706350646913052, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.2826485213294949, |
|
"grad_norm": 1.328125, |
|
"learning_rate": 4.509845960205389e-07, |
|
"logits/chosen": -2.791149854660034, |
|
"logits/rejected": -2.7939980030059814, |
|
"logps/chosen": -294.93231201171875, |
|
"logps/rejected": -263.44830322265625, |
|
"loss": 0.6818, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.020023692399263382, |
|
"rewards/margins": 0.023503463715314865, |
|
"rewards/rejected": -0.0034797731786966324, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.28526563726773096, |
|
"grad_norm": 1.765625, |
|
"learning_rate": 4.4961762529687736e-07, |
|
"logits/chosen": -2.8478240966796875, |
|
"logits/rejected": -2.8274638652801514, |
|
"logps/chosen": -277.98541259765625, |
|
"logps/rejected": -260.0039978027344, |
|
"loss": 0.6833, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.01708926074206829, |
|
"rewards/margins": 0.020394863560795784, |
|
"rewards/rejected": -0.0033055986277759075, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.287882753205967, |
|
"grad_norm": 1.2890625, |
|
"learning_rate": 4.482339865589492e-07, |
|
"logits/chosen": -2.8545358180999756, |
|
"logits/rejected": -2.8111648559570312, |
|
"logps/chosen": -281.62652587890625, |
|
"logps/rejected": -238.7964324951172, |
|
"loss": 0.6836, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.013560217805206776, |
|
"rewards/margins": 0.019967440515756607, |
|
"rewards/rejected": -0.006407222244888544, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.287882753205967, |
|
"eval_logits/chosen": -2.852851152420044, |
|
"eval_logits/rejected": -2.825786590576172, |
|
"eval_logps/chosen": -280.98529052734375, |
|
"eval_logps/rejected": -262.00518798828125, |
|
"eval_loss": 0.6818436980247498, |
|
"eval_rewards/accuracies": 0.6784999966621399, |
|
"eval_rewards/chosen": 0.01788218505680561, |
|
"eval_rewards/margins": 0.023550525307655334, |
|
"eval_rewards/rejected": -0.00566834257915616, |
|
"eval_runtime": 621.9928, |
|
"eval_samples_per_second": 3.215, |
|
"eval_steps_per_second": 0.402, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.2904998691442031, |
|
"grad_norm": 1.2890625, |
|
"learning_rate": 4.4683379534019076e-07, |
|
"logits/chosen": -2.8489432334899902, |
|
"logits/rejected": -2.8446106910705566, |
|
"logps/chosen": -284.64337158203125, |
|
"logps/rejected": -280.3296203613281, |
|
"loss": 0.6832, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": 0.01724550686776638, |
|
"rewards/margins": 0.020626548677682877, |
|
"rewards/rejected": -0.0033810404129326344, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.29311698508243916, |
|
"grad_norm": 1.28125, |
|
"learning_rate": 4.4541716855616593e-07, |
|
"logits/chosen": -2.822422742843628, |
|
"logits/rejected": -2.8002758026123047, |
|
"logps/chosen": -256.53265380859375, |
|
"logps/rejected": -259.35723876953125, |
|
"loss": 0.684, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": 0.01423877477645874, |
|
"rewards/margins": 0.019155513495206833, |
|
"rewards/rejected": -0.004916741047054529, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.2957341010206752, |
|
"grad_norm": 1.203125, |
|
"learning_rate": 4.4398422449480357e-07, |
|
"logits/chosen": -2.8172736167907715, |
|
"logits/rejected": -2.7678089141845703, |
|
"logps/chosen": -278.84429931640625, |
|
"logps/rejected": -282.52862548828125, |
|
"loss": 0.6835, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.014500883407890797, |
|
"rewards/margins": 0.020229389891028404, |
|
"rewards/rejected": -0.0057285078801214695, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.29835121695891126, |
|
"grad_norm": 1.1953125, |
|
"learning_rate": 4.4253508280652036e-07, |
|
"logits/chosen": -2.838125705718994, |
|
"logits/rejected": -2.791625738143921, |
|
"logps/chosen": -301.6747741699219, |
|
"logps/rejected": -253.36123657226562, |
|
"loss": 0.6789, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.02180757373571396, |
|
"rewards/margins": 0.029516074806451797, |
|
"rewards/rejected": -0.007708498742431402, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.30096833289714736, |
|
"grad_norm": 1.1640625, |
|
"learning_rate": 4.410698644942302e-07, |
|
"logits/chosen": -2.879281997680664, |
|
"logits/rejected": -2.851644277572632, |
|
"logps/chosen": -285.1121520996094, |
|
"logps/rejected": -263.08416748046875, |
|
"loss": 0.6796, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": 0.022570660337805748, |
|
"rewards/margins": 0.02802709862589836, |
|
"rewards/rejected": -0.005456441547721624, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.3035854488353834, |
|
"grad_norm": 1.546875, |
|
"learning_rate": 4.3958869190324057e-07, |
|
"logits/chosen": -2.8084969520568848, |
|
"logits/rejected": -2.7690627574920654, |
|
"logps/chosen": -277.8807067871094, |
|
"logps/rejected": -252.42825317382812, |
|
"loss": 0.6801, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.018787220120429993, |
|
"rewards/margins": 0.027080217376351357, |
|
"rewards/rejected": -0.008292997255921364, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.30620256477361946, |
|
"grad_norm": 1.4140625, |
|
"learning_rate": 4.380916887110365e-07, |
|
"logits/chosen": -2.869062900543213, |
|
"logits/rejected": -2.8376870155334473, |
|
"logps/chosen": -273.46063232421875, |
|
"logps/rejected": -233.0785675048828, |
|
"loss": 0.6807, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": 0.01758132129907608, |
|
"rewards/margins": 0.026008691638708115, |
|
"rewards/rejected": -0.008427368476986885, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.30881968071185556, |
|
"grad_norm": 1.3046875, |
|
"learning_rate": 4.3657897991695394e-07, |
|
"logits/chosen": -2.7800498008728027, |
|
"logits/rejected": -2.8185646533966064, |
|
"logps/chosen": -268.2494201660156, |
|
"logps/rejected": -269.7010498046875, |
|
"loss": 0.6818, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.019908545538783073, |
|
"rewards/margins": 0.023755352944135666, |
|
"rewards/rejected": -0.0038468041457235813, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.3114367966500916, |
|
"grad_norm": 1.2265625, |
|
"learning_rate": 4.350506918317416e-07, |
|
"logits/chosen": -2.856717824935913, |
|
"logits/rejected": -2.824436664581299, |
|
"logps/chosen": -260.1253967285156, |
|
"logps/rejected": -256.15118408203125, |
|
"loss": 0.6829, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.017839016392827034, |
|
"rewards/margins": 0.021472811698913574, |
|
"rewards/rejected": -0.003633796004578471, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.31405391258832765, |
|
"grad_norm": 1.515625, |
|
"learning_rate": 4.335069520670149e-07, |
|
"logits/chosen": -2.8279824256896973, |
|
"logits/rejected": -2.802241325378418, |
|
"logps/chosen": -241.59786987304688, |
|
"logps/rejected": -255.2316131591797, |
|
"loss": 0.685, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.017605960369110107, |
|
"rewards/margins": 0.01734979636967182, |
|
"rewards/rejected": 0.0002561651053838432, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.31405391258832765, |
|
"eval_logits/chosen": -2.8509681224823, |
|
"eval_logits/rejected": -2.8237783908843994, |
|
"eval_logps/chosen": -280.56793212890625, |
|
"eval_logps/rejected": -261.7609558105469, |
|
"eval_loss": 0.6810342073440552, |
|
"eval_rewards/accuracies": 0.6809999942779541, |
|
"eval_rewards/chosen": 0.022055484354496002, |
|
"eval_rewards/margins": 0.02528143860399723, |
|
"eval_rewards/rejected": -0.003225954482331872, |
|
"eval_runtime": 623.5714, |
|
"eval_samples_per_second": 3.207, |
|
"eval_steps_per_second": 0.401, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.3166710285265637, |
|
"grad_norm": 1.2578125, |
|
"learning_rate": 4.319478895245999e-07, |
|
"logits/chosen": -2.846019744873047, |
|
"logits/rejected": -2.8138914108276367, |
|
"logps/chosen": -262.89471435546875, |
|
"logps/rejected": -238.1546173095703, |
|
"loss": 0.6785, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.02249886468052864, |
|
"rewards/margins": 0.030421704053878784, |
|
"rewards/rejected": -0.007922842167317867, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.3192881444647998, |
|
"grad_norm": 1.359375, |
|
"learning_rate": 4.3037363438577036e-07, |
|
"logits/chosen": -2.8656246662139893, |
|
"logits/rejected": -2.828981399536133, |
|
"logps/chosen": -269.7254943847656, |
|
"logps/rejected": -284.9521789550781, |
|
"loss": 0.6799, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.025678550824522972, |
|
"rewards/margins": 0.027557630091905594, |
|
"rewards/rejected": -0.001879077055491507, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.32190526040303585, |
|
"grad_norm": 1.3671875, |
|
"learning_rate": 4.2878431810037716e-07, |
|
"logits/chosen": -2.8651747703552246, |
|
"logits/rejected": -2.851069927215576, |
|
"logps/chosen": -309.12872314453125, |
|
"logps/rejected": -264.8939514160156, |
|
"loss": 0.6775, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.025112558156251907, |
|
"rewards/margins": 0.03247564285993576, |
|
"rewards/rejected": -0.007363081909716129, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.3245223763412719, |
|
"grad_norm": 1.34375, |
|
"learning_rate": 4.271800733758729e-07, |
|
"logits/chosen": -2.838114023208618, |
|
"logits/rejected": -2.8366000652313232, |
|
"logps/chosen": -301.5270690917969, |
|
"logps/rejected": -268.2883605957031, |
|
"loss": 0.6769, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": 0.027721276506781578, |
|
"rewards/margins": 0.03389520198106766, |
|
"rewards/rejected": -0.006173927802592516, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.327139492279508, |
|
"grad_norm": 1.3359375, |
|
"learning_rate": 4.255610341662304e-07, |
|
"logits/chosen": -2.863908529281616, |
|
"logits/rejected": -2.806837797164917, |
|
"logps/chosen": -273.0283203125, |
|
"logps/rejected": -253.21798706054688, |
|
"loss": 0.6806, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": 0.023084726184606552, |
|
"rewards/margins": 0.026232142001390457, |
|
"rewards/rejected": -0.003147417213767767, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.32975660821774405, |
|
"grad_norm": 1.4453125, |
|
"learning_rate": 4.2392733566075757e-07, |
|
"logits/chosen": -2.8411412239074707, |
|
"logits/rejected": -2.812453508377075, |
|
"logps/chosen": -271.3999938964844, |
|
"logps/rejected": -254.3863983154297, |
|
"loss": 0.6838, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": 0.021707288920879364, |
|
"rewards/margins": 0.019517619162797928, |
|
"rewards/rejected": 0.002189669292420149, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.3323737241559801, |
|
"grad_norm": 1.2265625, |
|
"learning_rate": 4.2227911427280973e-07, |
|
"logits/chosen": -2.8064496517181396, |
|
"logits/rejected": -2.777616500854492, |
|
"logps/chosen": -263.7330627441406, |
|
"logps/rejected": -234.1592559814453, |
|
"loss": 0.6807, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.0242301132529974, |
|
"rewards/margins": 0.026022180914878845, |
|
"rewards/rejected": -0.0017920676618814468, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.33499084009421615, |
|
"grad_norm": 1.359375, |
|
"learning_rate": 4.206165076283982e-07, |
|
"logits/chosen": -2.8404831886291504, |
|
"logits/rejected": -2.81711483001709, |
|
"logps/chosen": -259.4391174316406, |
|
"logps/rejected": -243.4575653076172, |
|
"loss": 0.6796, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.02131321281194687, |
|
"rewards/margins": 0.028222400695085526, |
|
"rewards/rejected": -0.00690918555483222, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.33760795603245225, |
|
"grad_norm": 1.4296875, |
|
"learning_rate": 4.1893965455469946e-07, |
|
"logits/chosen": -2.855498790740967, |
|
"logits/rejected": -2.832735538482666, |
|
"logps/chosen": -263.16021728515625, |
|
"logps/rejected": -243.6949462890625, |
|
"loss": 0.6818, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": 0.01883005164563656, |
|
"rewards/margins": 0.02386125549674034, |
|
"rewards/rejected": -0.005031202454119921, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.3402250719706883, |
|
"grad_norm": 1.1328125, |
|
"learning_rate": 4.172486950684626e-07, |
|
"logits/chosen": -2.851036548614502, |
|
"logits/rejected": -2.842118501663208, |
|
"logps/chosen": -266.0923767089844, |
|
"logps/rejected": -266.4703369140625, |
|
"loss": 0.6785, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": 0.02591409906744957, |
|
"rewards/margins": 0.030536871403455734, |
|
"rewards/rejected": -0.004622773267328739, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.3402250719706883, |
|
"eval_logits/chosen": -2.85286283493042, |
|
"eval_logits/rejected": -2.825887441635132, |
|
"eval_logps/chosen": -280.6852111816406, |
|
"eval_logps/rejected": -262.0453186035156, |
|
"eval_loss": 0.6802608966827393, |
|
"eval_rewards/accuracies": 0.6840000152587891, |
|
"eval_rewards/chosen": 0.02088269591331482, |
|
"eval_rewards/margins": 0.026952272281050682, |
|
"eval_rewards/rejected": -0.006069576367735863, |
|
"eval_runtime": 623.414, |
|
"eval_samples_per_second": 3.208, |
|
"eval_steps_per_second": 0.401, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.34284218790892435, |
|
"grad_norm": 1.265625, |
|
"learning_rate": 4.155437703643181e-07, |
|
"logits/chosen": -2.877864360809326, |
|
"logits/rejected": -2.8368418216705322, |
|
"logps/chosen": -258.5326843261719, |
|
"logps/rejected": -233.0809326171875, |
|
"loss": 0.6764, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": 0.025921067222952843, |
|
"rewards/margins": 0.034750454127788544, |
|
"rewards/rejected": -0.008829386904835701, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.34545930384716045, |
|
"grad_norm": 1.21875, |
|
"learning_rate": 4.138250228029881e-07, |
|
"logits/chosen": -2.8507418632507324, |
|
"logits/rejected": -2.8341784477233887, |
|
"logps/chosen": -270.1946716308594, |
|
"logps/rejected": -279.32489013671875, |
|
"loss": 0.6829, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": 0.018970279023051262, |
|
"rewards/margins": 0.021766219288110733, |
|
"rewards/rejected": -0.002795940963551402, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.3480764197853965, |
|
"grad_norm": 1.1796875, |
|
"learning_rate": 4.1209259589939935e-07, |
|
"logits/chosen": -2.8348724842071533, |
|
"logits/rejected": -2.8295791149139404, |
|
"logps/chosen": -247.55770874023438, |
|
"logps/rejected": -242.4851837158203, |
|
"loss": 0.6819, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": 0.022017816081643105, |
|
"rewards/margins": 0.02364877425134182, |
|
"rewards/rejected": -0.00163096038158983, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.35069353572363254, |
|
"grad_norm": 1.2578125, |
|
"learning_rate": 4.103466343106998e-07, |
|
"logits/chosen": -2.868483543395996, |
|
"logits/rejected": -2.855750560760498, |
|
"logps/chosen": -287.6753845214844, |
|
"logps/rejected": -257.07257080078125, |
|
"loss": 0.6827, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.01955850049853325, |
|
"rewards/margins": 0.02184746228158474, |
|
"rewards/rejected": -0.002288959687575698, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.35331065166186865, |
|
"grad_norm": 1.1328125, |
|
"learning_rate": 4.085872838241796e-07, |
|
"logits/chosen": -2.8042919635772705, |
|
"logits/rejected": -2.765010356903076, |
|
"logps/chosen": -293.25677490234375, |
|
"logps/rejected": -261.722412109375, |
|
"loss": 0.6818, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.015371786430478096, |
|
"rewards/margins": 0.023923274129629135, |
|
"rewards/rejected": -0.008551487699151039, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.3559277676001047, |
|
"grad_norm": 1.390625, |
|
"learning_rate": 4.06814691345098e-07, |
|
"logits/chosen": -2.7857346534729004, |
|
"logits/rejected": -2.756810426712036, |
|
"logps/chosen": -272.96185302734375, |
|
"logps/rejected": -253.2128448486328, |
|
"loss": 0.6801, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": 0.02000296302139759, |
|
"rewards/margins": 0.027280131354928017, |
|
"rewards/rejected": -0.00727717112749815, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.35854488353834074, |
|
"grad_norm": 1.6328125, |
|
"learning_rate": 4.0502900488441707e-07, |
|
"logits/chosen": -2.8389689922332764, |
|
"logits/rejected": -2.8258564472198486, |
|
"logps/chosen": -283.4964294433594, |
|
"logps/rejected": -280.92559814453125, |
|
"loss": 0.6809, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.0220597293227911, |
|
"rewards/margins": 0.025511642917990685, |
|
"rewards/rejected": -0.003451913595199585, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.3611619994765768, |
|
"grad_norm": 1.3046875, |
|
"learning_rate": 4.032303735464422e-07, |
|
"logits/chosen": -2.9172284603118896, |
|
"logits/rejected": -2.868638753890991, |
|
"logps/chosen": -287.5843811035156, |
|
"logps/rejected": -264.72808837890625, |
|
"loss": 0.6775, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.02606380544602871, |
|
"rewards/margins": 0.032886773347854614, |
|
"rewards/rejected": -0.006822962313890457, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.3637791154148129, |
|
"grad_norm": 1.4296875, |
|
"learning_rate": 4.014189475163726e-07, |
|
"logits/chosen": -2.8349239826202393, |
|
"logits/rejected": -2.8192358016967773, |
|
"logps/chosen": -270.93731689453125, |
|
"logps/rejected": -261.95611572265625, |
|
"loss": 0.6779, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.019413406029343605, |
|
"rewards/margins": 0.0317561998963356, |
|
"rewards/rejected": -0.012342792935669422, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.36639623135304894, |
|
"grad_norm": 1.59375, |
|
"learning_rate": 3.995948780477605e-07, |
|
"logits/chosen": -2.8566317558288574, |
|
"logits/rejected": -2.821171998977661, |
|
"logps/chosen": -283.40374755859375, |
|
"logps/rejected": -261.3340148925781, |
|
"loss": 0.6828, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.015268507413566113, |
|
"rewards/margins": 0.022029511630535126, |
|
"rewards/rejected": -0.0067610046826303005, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.36639623135304894, |
|
"eval_logits/chosen": -2.850475549697876, |
|
"eval_logits/rejected": -2.8233399391174316, |
|
"eval_logps/chosen": -280.606201171875, |
|
"eval_logps/rejected": -262.1006774902344, |
|
"eval_loss": 0.6796398758888245, |
|
"eval_rewards/accuracies": 0.6865000128746033, |
|
"eval_rewards/chosen": 0.02167338877916336, |
|
"eval_rewards/margins": 0.028296444565057755, |
|
"eval_rewards/rejected": -0.006623056251555681, |
|
"eval_runtime": 622.3734, |
|
"eval_samples_per_second": 3.214, |
|
"eval_steps_per_second": 0.402, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.369013347291285, |
|
"grad_norm": 1.3515625, |
|
"learning_rate": 3.977583174498816e-07, |
|
"logits/chosen": -2.856717824935913, |
|
"logits/rejected": -2.8365931510925293, |
|
"logps/chosen": -283.3439025878906, |
|
"logps/rejected": -262.3489990234375, |
|
"loss": 0.676, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": 0.025825675576925278, |
|
"rewards/margins": 0.03570712357759476, |
|
"rewards/rejected": -0.009881444275379181, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.3716304632295211, |
|
"grad_norm": 1.3984375, |
|
"learning_rate": 3.9590941907501717e-07, |
|
"logits/chosen": -2.86562180519104, |
|
"logits/rejected": -2.84371280670166, |
|
"logps/chosen": -298.9716796875, |
|
"logps/rejected": -272.981689453125, |
|
"loss": 0.676, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": 0.03262133151292801, |
|
"rewards/margins": 0.03562031686306, |
|
"rewards/rejected": -0.002998985815793276, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.37424757916775714, |
|
"grad_norm": 2.0625, |
|
"learning_rate": 3.9404833730564974e-07, |
|
"logits/chosen": -2.766550302505493, |
|
"logits/rejected": -2.7500851154327393, |
|
"logps/chosen": -270.97052001953125, |
|
"logps/rejected": -261.961181640625, |
|
"loss": 0.6788, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": 0.02307405136525631, |
|
"rewards/margins": 0.0301786120980978, |
|
"rewards/rejected": -0.007104557007551193, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.3768646951059932, |
|
"grad_norm": 1.296875, |
|
"learning_rate": 3.9217522754157117e-07, |
|
"logits/chosen": -2.84255051612854, |
|
"logits/rejected": -2.838588237762451, |
|
"logps/chosen": -266.6410827636719, |
|
"logps/rejected": -246.63916015625, |
|
"loss": 0.6756, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.02127646468579769, |
|
"rewards/margins": 0.036509204655885696, |
|
"rewards/rejected": -0.015232739970088005, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.37948181104422923, |
|
"grad_norm": 1.296875, |
|
"learning_rate": 3.9029024618690785e-07, |
|
"logits/chosen": -2.854072093963623, |
|
"logits/rejected": -2.8262181282043457, |
|
"logps/chosen": -253.68600463867188, |
|
"logps/rejected": -238.5004425048828, |
|
"loss": 0.6816, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.01315716840326786, |
|
"rewards/margins": 0.024385813623666763, |
|
"rewards/rejected": -0.011228645220398903, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.38209892698246534, |
|
"grad_norm": 1.4609375, |
|
"learning_rate": 3.883935506370605e-07, |
|
"logits/chosen": -2.816702365875244, |
|
"logits/rejected": -2.8033461570739746, |
|
"logps/chosen": -267.4139404296875, |
|
"logps/rejected": -239.7705535888672, |
|
"loss": 0.6758, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.027285242453217506, |
|
"rewards/margins": 0.036063503473997116, |
|
"rewards/rejected": -0.008778261952102184, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.3847160429207014, |
|
"grad_norm": 1.734375, |
|
"learning_rate": 3.864852992655616e-07, |
|
"logits/chosen": -2.8303184509277344, |
|
"logits/rejected": -2.813981533050537, |
|
"logps/chosen": -266.30218505859375, |
|
"logps/rejected": -254.7268524169922, |
|
"loss": 0.6749, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.02994285151362419, |
|
"rewards/margins": 0.03793361037969589, |
|
"rewards/rejected": -0.007990758866071701, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.38733315885893743, |
|
"grad_norm": 1.140625, |
|
"learning_rate": 3.845656514108515e-07, |
|
"logits/chosen": -2.8377737998962402, |
|
"logits/rejected": -2.815836191177368, |
|
"logps/chosen": -279.17205810546875, |
|
"logps/rejected": -219.8304901123047, |
|
"loss": 0.6792, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.02100013568997383, |
|
"rewards/margins": 0.029268179088830948, |
|
"rewards/rejected": -0.008268042467534542, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.38995027479717354, |
|
"grad_norm": 1.4609375, |
|
"learning_rate": 3.8263476736297375e-07, |
|
"logits/chosen": -2.8322536945343018, |
|
"logits/rejected": -2.7860350608825684, |
|
"logps/chosen": -266.8233947753906, |
|
"logps/rejected": -243.4827880859375, |
|
"loss": 0.6787, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": 0.018916089087724686, |
|
"rewards/margins": 0.03032476268708706, |
|
"rewards/rejected": -0.011408672668039799, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.3925673907354096, |
|
"grad_norm": 1.9453125, |
|
"learning_rate": 3.8069280835019055e-07, |
|
"logits/chosen": -2.822990894317627, |
|
"logits/rejected": -2.789095878601074, |
|
"logps/chosen": -282.0010070800781, |
|
"logps/rejected": -262.74383544921875, |
|
"loss": 0.6795, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.024773618206381798, |
|
"rewards/margins": 0.028730124235153198, |
|
"rewards/rejected": -0.0039565060287714005, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.3925673907354096, |
|
"eval_logits/chosen": -2.851984977722168, |
|
"eval_logits/rejected": -2.825019121170044, |
|
"eval_logps/chosen": -280.51751708984375, |
|
"eval_logps/rejected": -262.1142883300781, |
|
"eval_loss": 0.6791806221008301, |
|
"eval_rewards/accuracies": 0.6830000281333923, |
|
"eval_rewards/chosen": 0.022559717297554016, |
|
"eval_rewards/margins": 0.029319126158952713, |
|
"eval_rewards/rejected": -0.0067594097927212715, |
|
"eval_runtime": 621.2203, |
|
"eval_samples_per_second": 3.219, |
|
"eval_steps_per_second": 0.402, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.39518450667364563, |
|
"grad_norm": 1.3125, |
|
"learning_rate": 3.7873993652552073e-07, |
|
"logits/chosen": -2.8283066749572754, |
|
"logits/rejected": -2.811255693435669, |
|
"logps/chosen": -247.559814453125, |
|
"logps/rejected": -242.0292205810547, |
|
"loss": 0.6857, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": 0.015011796727776527, |
|
"rewards/margins": 0.016142752021551132, |
|
"rewards/rejected": -0.0011309570400044322, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.39780162261188173, |
|
"grad_norm": 1.34375, |
|
"learning_rate": 3.767763149531995e-07, |
|
"logits/chosen": -2.8359994888305664, |
|
"logits/rejected": -2.819225311279297, |
|
"logps/chosen": -277.1551208496094, |
|
"logps/rejected": -260.8001708984375, |
|
"loss": 0.6772, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.02346893586218357, |
|
"rewards/margins": 0.03324516490101814, |
|
"rewards/rejected": -0.009776233695447445, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.4004187385501178, |
|
"grad_norm": 1.5546875, |
|
"learning_rate": 3.7480210759506326e-07, |
|
"logits/chosen": -2.808189868927002, |
|
"logits/rejected": -2.8010077476501465, |
|
"logps/chosen": -292.74847412109375, |
|
"logps/rejected": -281.989013671875, |
|
"loss": 0.6814, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": 0.02831420861184597, |
|
"rewards/margins": 0.025223467499017715, |
|
"rewards/rejected": 0.003090745070949197, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.40303585448835383, |
|
"grad_norm": 1.234375, |
|
"learning_rate": 3.728174792968582e-07, |
|
"logits/chosen": -2.8126258850097656, |
|
"logits/rejected": -2.782135248184204, |
|
"logps/chosen": -253.31851196289062, |
|
"logps/rejected": -239.48095703125, |
|
"loss": 0.6811, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.016257187351584435, |
|
"rewards/margins": 0.02541721798479557, |
|
"rewards/rejected": -0.00916003156453371, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.4056529704265899, |
|
"grad_norm": 1.3828125, |
|
"learning_rate": 3.70822595774476e-07, |
|
"logits/chosen": -2.8408150672912598, |
|
"logits/rejected": -2.8108041286468506, |
|
"logps/chosen": -285.0970153808594, |
|
"logps/rejected": -271.54425048828125, |
|
"loss": 0.6743, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.029356488958001137, |
|
"rewards/margins": 0.03932540863752365, |
|
"rewards/rejected": -0.009968922473490238, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.408270086364826, |
|
"grad_norm": 1.4609375, |
|
"learning_rate": 3.688176236001168e-07, |
|
"logits/chosen": -2.837639331817627, |
|
"logits/rejected": -2.7985403537750244, |
|
"logps/chosen": -294.97491455078125, |
|
"logps/rejected": -259.5853271484375, |
|
"loss": 0.6779, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.028945360332727432, |
|
"rewards/margins": 0.03233477473258972, |
|
"rewards/rejected": -0.0033894157968461514, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.410887202303062, |
|
"grad_norm": 1.34375, |
|
"learning_rate": 3.6680273018838016e-07, |
|
"logits/chosen": -2.847684144973755, |
|
"logits/rejected": -2.8297903537750244, |
|
"logps/chosen": -267.1182556152344, |
|
"logps/rejected": -251.4414825439453, |
|
"loss": 0.6751, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": 0.0260789655148983, |
|
"rewards/margins": 0.03772038221359253, |
|
"rewards/rejected": -0.011641415767371655, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.4135043182412981, |
|
"grad_norm": 1.265625, |
|
"learning_rate": 3.6477808378228596e-07, |
|
"logits/chosen": -2.8226513862609863, |
|
"logits/rejected": -2.8190042972564697, |
|
"logps/chosen": -268.88055419921875, |
|
"logps/rejected": -301.400146484375, |
|
"loss": 0.678, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.022145092487335205, |
|
"rewards/margins": 0.03182849660515785, |
|
"rewards/rejected": -0.009683402255177498, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.4161214341795342, |
|
"grad_norm": 1.140625, |
|
"learning_rate": 3.6274385343922674e-07, |
|
"logits/chosen": -2.8877930641174316, |
|
"logits/rejected": -2.8846001625061035, |
|
"logps/chosen": -250.56411743164062, |
|
"logps/rejected": -259.74505615234375, |
|
"loss": 0.6826, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.015636231750249863, |
|
"rewards/margins": 0.0221557579934597, |
|
"rewards/rejected": -0.006519525311887264, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.4187385501177702, |
|
"grad_norm": 1.140625, |
|
"learning_rate": 3.6070020901685057e-07, |
|
"logits/chosen": -2.8079447746276855, |
|
"logits/rejected": -2.812084197998047, |
|
"logps/chosen": -280.13079833984375, |
|
"logps/rejected": -259.9898681640625, |
|
"loss": 0.6801, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.02125183865427971, |
|
"rewards/margins": 0.027416234835982323, |
|
"rewards/rejected": -0.00616439338773489, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.4187385501177702, |
|
"eval_logits/chosen": -2.8515522480010986, |
|
"eval_logits/rejected": -2.824509382247925, |
|
"eval_logps/chosen": -280.82861328125, |
|
"eval_logps/rejected": -262.506591796875, |
|
"eval_loss": 0.6788100600242615, |
|
"eval_rewards/accuracies": 0.684499979019165, |
|
"eval_rewards/chosen": 0.01944848708808422, |
|
"eval_rewards/margins": 0.030131228268146515, |
|
"eval_rewards/rejected": -0.010682739317417145, |
|
"eval_runtime": 623.5252, |
|
"eval_samples_per_second": 3.208, |
|
"eval_steps_per_second": 0.401, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.4213556660560063, |
|
"grad_norm": 1.3046875, |
|
"learning_rate": 3.5864732115887863e-07, |
|
"logits/chosen": -2.8428633213043213, |
|
"logits/rejected": -2.8304831981658936, |
|
"logps/chosen": -258.84423828125, |
|
"logps/rejected": -267.82415771484375, |
|
"loss": 0.6772, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.021647287532687187, |
|
"rewards/margins": 0.03337367996573448, |
|
"rewards/rejected": -0.011726390570402145, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.4239727819942423, |
|
"grad_norm": 1.515625, |
|
"learning_rate": 3.565853612808562e-07, |
|
"logits/chosen": -2.8622894287109375, |
|
"logits/rejected": -2.8286705017089844, |
|
"logps/chosen": -278.792724609375, |
|
"logps/rejected": -251.3448028564453, |
|
"loss": 0.6813, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.011435525491833687, |
|
"rewards/margins": 0.02519642934203148, |
|
"rewards/rejected": -0.013760904781520367, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.4265898979324784, |
|
"grad_norm": 1.234375, |
|
"learning_rate": 3.5451450155583984e-07, |
|
"logits/chosen": -2.776291608810425, |
|
"logits/rejected": -2.8099074363708496, |
|
"logps/chosen": -247.71432495117188, |
|
"logps/rejected": -233.94900512695312, |
|
"loss": 0.6799, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": 0.01177397184073925, |
|
"rewards/margins": 0.027832742780447006, |
|
"rewards/rejected": -0.016058770939707756, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.42920701387071447, |
|
"grad_norm": 1.6171875, |
|
"learning_rate": 3.5243491490002055e-07, |
|
"logits/chosen": -2.8553478717803955, |
|
"logits/rejected": -2.8476006984710693, |
|
"logps/chosen": -271.2582702636719, |
|
"logps/rejected": -265.4175720214844, |
|
"loss": 0.681, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": 0.014447445049881935, |
|
"rewards/margins": 0.026055917143821716, |
|
"rewards/rejected": -0.011608473025262356, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.4318241298089505, |
|
"grad_norm": 1.3515625, |
|
"learning_rate": 3.503467749582857e-07, |
|
"logits/chosen": -2.834573984146118, |
|
"logits/rejected": -2.7920243740081787, |
|
"logps/chosen": -269.9908142089844, |
|
"logps/rejected": -235.1788787841797, |
|
"loss": 0.6829, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.01244247704744339, |
|
"rewards/margins": 0.021913422271609306, |
|
"rewards/rejected": -0.009470945224165916, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.4344412457471866, |
|
"grad_norm": 1.15625, |
|
"learning_rate": 3.482502560897194e-07, |
|
"logits/chosen": -2.8074707984924316, |
|
"logits/rejected": -2.7939584255218506, |
|
"logps/chosen": -236.347412109375, |
|
"logps/rejected": -241.1566925048828, |
|
"loss": 0.684, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": 0.011268834583461285, |
|
"rewards/margins": 0.019097527489066124, |
|
"rewards/rejected": -0.007828695699572563, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.43705836168542267, |
|
"grad_norm": 1.5546875, |
|
"learning_rate": 3.4614553335304403e-07, |
|
"logits/chosen": -2.8423566818237305, |
|
"logits/rejected": -2.7879586219787598, |
|
"logps/chosen": -288.4642639160156, |
|
"logps/rejected": -253.7875213623047, |
|
"loss": 0.6767, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": 0.02120782807469368, |
|
"rewards/margins": 0.034634821116924286, |
|
"rewards/rejected": -0.013426998630166054, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.4396754776236587, |
|
"grad_norm": 1.75, |
|
"learning_rate": 3.440327824920022e-07, |
|
"logits/chosen": -2.831993579864502, |
|
"logits/rejected": -2.8056693077087402, |
|
"logps/chosen": -299.0148010253906, |
|
"logps/rejected": -260.6773681640625, |
|
"loss": 0.6741, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": 0.026383286342024803, |
|
"rewards/margins": 0.03987019881606102, |
|
"rewards/rejected": -0.013486906886100769, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.44229259356189476, |
|
"grad_norm": 1.671875, |
|
"learning_rate": 3.4191217992068287e-07, |
|
"logits/chosen": -2.870518207550049, |
|
"logits/rejected": -2.841670513153076, |
|
"logps/chosen": -292.0727844238281, |
|
"logps/rejected": -247.9720916748047, |
|
"loss": 0.678, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": 0.026544999331235886, |
|
"rewards/margins": 0.03184535354375839, |
|
"rewards/rejected": -0.005300348624587059, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.44490970950013087, |
|
"grad_norm": 1.3828125, |
|
"learning_rate": 3.3978390270879056e-07, |
|
"logits/chosen": -2.8237035274505615, |
|
"logits/rejected": -2.810272455215454, |
|
"logps/chosen": -227.2367706298828, |
|
"logps/rejected": -232.7034454345703, |
|
"loss": 0.6839, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.008710166439414024, |
|
"rewards/margins": 0.01971607096493244, |
|
"rewards/rejected": -0.011005903594195843, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.44490970950013087, |
|
"eval_logits/chosen": -2.853027105331421, |
|
"eval_logits/rejected": -2.8261446952819824, |
|
"eval_logps/chosen": -280.7289123535156, |
|
"eval_logps/rejected": -262.4769592285156, |
|
"eval_loss": 0.6784868240356445, |
|
"eval_rewards/accuracies": 0.6855000257492065, |
|
"eval_rewards/chosen": 0.02044598199427128, |
|
"eval_rewards/margins": 0.030832206830382347, |
|
"eval_rewards/rejected": -0.010386227630078793, |
|
"eval_runtime": 622.0841, |
|
"eval_samples_per_second": 3.215, |
|
"eval_steps_per_second": 0.402, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.4475268254383669, |
|
"grad_norm": 1.2734375, |
|
"learning_rate": 3.376481285668599e-07, |
|
"logits/chosen": -2.8446857929229736, |
|
"logits/rejected": -2.8485488891601562, |
|
"logps/chosen": -237.1801300048828, |
|
"logps/rejected": -253.5928192138672, |
|
"loss": 0.6814, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.013800742104649544, |
|
"rewards/margins": 0.024945253506302834, |
|
"rewards/rejected": -0.01114450953900814, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.45014394137660296, |
|
"grad_norm": 1.3046875, |
|
"learning_rate": 3.355050358314172e-07, |
|
"logits/chosen": -2.874572515487671, |
|
"logits/rejected": -2.8544344902038574, |
|
"logps/chosen": -282.2732238769531, |
|
"logps/rejected": -267.3336181640625, |
|
"loss": 0.6765, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.026168251410126686, |
|
"rewards/margins": 0.03479185700416565, |
|
"rewards/rejected": -0.008623604662716389, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.45276105731483907, |
|
"grad_norm": 1.3515625, |
|
"learning_rate": 3.33354803450089e-07, |
|
"logits/chosen": -2.7801265716552734, |
|
"logits/rejected": -2.7793445587158203, |
|
"logps/chosen": -282.25518798828125, |
|
"logps/rejected": -262.77978515625, |
|
"loss": 0.6811, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.016791898757219315, |
|
"rewards/margins": 0.025678789243102074, |
|
"rewards/rejected": -0.008886890485882759, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.4553781732530751, |
|
"grad_norm": 1.1953125, |
|
"learning_rate": 3.311976109666605e-07, |
|
"logits/chosen": -2.8067824840545654, |
|
"logits/rejected": -2.7824745178222656, |
|
"logps/chosen": -292.15130615234375, |
|
"logps/rejected": -263.1966247558594, |
|
"loss": 0.6782, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": 0.027290191501379013, |
|
"rewards/margins": 0.03162240982055664, |
|
"rewards/rejected": -0.004332221113145351, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.45799528919131116, |
|
"grad_norm": 1.3125, |
|
"learning_rate": 3.2903363850608317e-07, |
|
"logits/chosen": -2.899350166320801, |
|
"logits/rejected": -2.855714797973633, |
|
"logps/chosen": -263.13433837890625, |
|
"logps/rejected": -244.21298217773438, |
|
"loss": 0.6777, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.012938638217747211, |
|
"rewards/margins": 0.03214184567332268, |
|
"rewards/rejected": -0.01920320652425289, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.46061240512954726, |
|
"grad_norm": 1.25, |
|
"learning_rate": 3.2686306675943477e-07, |
|
"logits/chosen": -2.8296382427215576, |
|
"logits/rejected": -2.8444466590881348, |
|
"logps/chosen": -271.3722839355469, |
|
"logps/rejected": -247.5081787109375, |
|
"loss": 0.6782, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": 0.02265056222677231, |
|
"rewards/margins": 0.031140562146902084, |
|
"rewards/rejected": -0.008490001782774925, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.4632295210677833, |
|
"grad_norm": 1.5, |
|
"learning_rate": 3.2468607696883145e-07, |
|
"logits/chosen": -2.8013827800750732, |
|
"logits/rejected": -2.793203592300415, |
|
"logps/chosen": -266.6105651855469, |
|
"logps/rejected": -276.02911376953125, |
|
"loss": 0.6764, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.01664569415152073, |
|
"rewards/margins": 0.034995269030332565, |
|
"rewards/rejected": -0.018349576741456985, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.46584663700601936, |
|
"grad_norm": 1.2421875, |
|
"learning_rate": 3.2250285091229435e-07, |
|
"logits/chosen": -2.863778591156006, |
|
"logits/rejected": -2.8398165702819824, |
|
"logps/chosen": -248.42300415039062, |
|
"logps/rejected": -239.9966583251953, |
|
"loss": 0.682, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.011377891525626183, |
|
"rewards/margins": 0.02343577891588211, |
|
"rewards/rejected": -0.012057888321578503, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.4684637529442554, |
|
"grad_norm": 7.71875, |
|
"learning_rate": 3.2031357088857083e-07, |
|
"logits/chosen": -2.851457118988037, |
|
"logits/rejected": -2.8426060676574707, |
|
"logps/chosen": -291.23236083984375, |
|
"logps/rejected": -300.08843994140625, |
|
"loss": 0.6808, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": 0.015203478746116161, |
|
"rewards/margins": 0.02637268602848053, |
|
"rewards/rejected": -0.011169209145009518, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.4710808688824915, |
|
"grad_norm": 1.4375, |
|
"learning_rate": 3.1811841970191267e-07, |
|
"logits/chosen": -2.7736434936523438, |
|
"logits/rejected": -2.7497920989990234, |
|
"logps/chosen": -245.38912963867188, |
|
"logps/rejected": -276.73297119140625, |
|
"loss": 0.6793, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.009697729721665382, |
|
"rewards/margins": 0.029323875904083252, |
|
"rewards/rejected": -0.01962614618241787, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.4710808688824915, |
|
"eval_logits/chosen": -2.851853132247925, |
|
"eval_logits/rejected": -2.8248438835144043, |
|
"eval_logps/chosen": -280.89361572265625, |
|
"eval_logps/rejected": -262.69610595703125, |
|
"eval_loss": 0.6782403588294983, |
|
"eval_rewards/accuracies": 0.6869999766349792, |
|
"eval_rewards/chosen": 0.018798967823386192, |
|
"eval_rewards/margins": 0.03137620911002159, |
|
"eval_rewards/rejected": -0.012577244080603123, |
|
"eval_runtime": 622.7177, |
|
"eval_samples_per_second": 3.212, |
|
"eval_steps_per_second": 0.401, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.47369798482072756, |
|
"grad_norm": 1.28125, |
|
"learning_rate": 3.1591758064681257e-07, |
|
"logits/chosen": -2.779759407043457, |
|
"logits/rejected": -2.7464611530303955, |
|
"logps/chosen": -269.7041015625, |
|
"logps/rejected": -234.5542755126953, |
|
"loss": 0.6773, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.017099203541874886, |
|
"rewards/margins": 0.03334728628396988, |
|
"rewards/rejected": -0.01624808833003044, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.4763151007589636, |
|
"grad_norm": 1.484375, |
|
"learning_rate": 3.13711237492698e-07, |
|
"logits/chosen": -2.8304784297943115, |
|
"logits/rejected": -2.8185439109802246, |
|
"logps/chosen": -296.6722717285156, |
|
"logps/rejected": -284.0693664550781, |
|
"loss": 0.6829, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.016499513760209084, |
|
"rewards/margins": 0.02195136621594429, |
|
"rewards/rejected": -0.005451851524412632, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.4789322166971997, |
|
"grad_norm": 1.046875, |
|
"learning_rate": 3.1149957446858767e-07, |
|
"logits/chosen": -2.82464599609375, |
|
"logits/rejected": -2.8389458656311035, |
|
"logps/chosen": -263.2919006347656, |
|
"logps/rejected": -250.74813842773438, |
|
"loss": 0.6841, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": 0.0153631791472435, |
|
"rewards/margins": 0.019609825685620308, |
|
"rewards/rejected": -0.004246644675731659, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.48154933263543576, |
|
"grad_norm": 1.4609375, |
|
"learning_rate": 3.0928277624770736e-07, |
|
"logits/chosen": -2.8787219524383545, |
|
"logits/rejected": -2.8530170917510986, |
|
"logps/chosen": -300.767333984375, |
|
"logps/rejected": -275.71856689453125, |
|
"loss": 0.6736, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.02286524511873722, |
|
"rewards/margins": 0.041333895176649094, |
|
"rewards/rejected": -0.018468648195266724, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.4841664485736718, |
|
"grad_norm": 1.3203125, |
|
"learning_rate": 3.0706102793207073e-07, |
|
"logits/chosen": -2.8641512393951416, |
|
"logits/rejected": -2.832724094390869, |
|
"logps/chosen": -301.1673278808594, |
|
"logps/rejected": -282.6499938964844, |
|
"loss": 0.6723, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": 0.030217334628105164, |
|
"rewards/margins": 0.043759047985076904, |
|
"rewards/rejected": -0.01354171335697174, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.48678356451190785, |
|
"grad_norm": 1.28125, |
|
"learning_rate": 3.048345150370226e-07, |
|
"logits/chosen": -2.8586244583129883, |
|
"logits/rejected": -2.8518083095550537, |
|
"logps/chosen": -300.2522888183594, |
|
"logps/rejected": -283.9441223144531, |
|
"loss": 0.6775, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": 0.020278874784708023, |
|
"rewards/margins": 0.03307543322443962, |
|
"rewards/rejected": -0.012796561233699322, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.48940068045014395, |
|
"grad_norm": 1.1171875, |
|
"learning_rate": 3.0260342347574913e-07, |
|
"logits/chosen": -2.8430685997009277, |
|
"logits/rejected": -2.8185229301452637, |
|
"logps/chosen": -285.382568359375, |
|
"logps/rejected": -269.14886474609375, |
|
"loss": 0.6767, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.026956235989928246, |
|
"rewards/margins": 0.03441258519887924, |
|
"rewards/rejected": -0.007456351071596146, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.49201779638838, |
|
"grad_norm": 1.21875, |
|
"learning_rate": 3.0036793954375357e-07, |
|
"logits/chosen": -2.8681893348693848, |
|
"logits/rejected": -2.8438541889190674, |
|
"logps/chosen": -283.40582275390625, |
|
"logps/rejected": -243.862060546875, |
|
"loss": 0.6752, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.026450032368302345, |
|
"rewards/margins": 0.037637047469615936, |
|
"rewards/rejected": -0.01118701882660389, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.49463491232661605, |
|
"grad_norm": 1.375, |
|
"learning_rate": 2.9812824990330085e-07, |
|
"logits/chosen": -2.837024211883545, |
|
"logits/rejected": -2.825876235961914, |
|
"logps/chosen": -290.41644287109375, |
|
"logps/rejected": -267.0772705078125, |
|
"loss": 0.6775, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.017299989238381386, |
|
"rewards/margins": 0.03299538046121597, |
|
"rewards/rejected": -0.015695389360189438, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.49725202826485215, |
|
"grad_norm": 1.421875, |
|
"learning_rate": 2.958845415678316e-07, |
|
"logits/chosen": -2.8465914726257324, |
|
"logits/rejected": -2.8124914169311523, |
|
"logps/chosen": -293.70098876953125, |
|
"logps/rejected": -275.6326904296875, |
|
"loss": 0.6766, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.020625559613108635, |
|
"rewards/margins": 0.034684114158153534, |
|
"rewards/rejected": -0.01405855268239975, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.49725202826485215, |
|
"eval_logits/chosen": -2.854793071746826, |
|
"eval_logits/rejected": -2.8281238079071045, |
|
"eval_logps/chosen": -280.89208984375, |
|
"eval_logps/rejected": -262.73114013671875, |
|
"eval_loss": 0.6780784726142883, |
|
"eval_rewards/accuracies": 0.6809999942779541, |
|
"eval_rewards/chosen": 0.01881374977529049, |
|
"eval_rewards/margins": 0.03174133226275444, |
|
"eval_rewards/rejected": -0.012927580624818802, |
|
"eval_runtime": 623.0648, |
|
"eval_samples_per_second": 3.21, |
|
"eval_steps_per_second": 0.401, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.4998691442030882, |
|
"grad_norm": 1.0625, |
|
"learning_rate": 2.936370018863459e-07, |
|
"logits/chosen": -2.86594295501709, |
|
"logits/rejected": -2.852074384689331, |
|
"logps/chosen": -278.3857727050781, |
|
"logps/rejected": -242.82290649414062, |
|
"loss": 0.6795, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": 0.016295427456498146, |
|
"rewards/margins": 0.028570901602506638, |
|
"rewards/rejected": -0.012275472283363342, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.5024862601413242, |
|
"grad_norm": 1.3125, |
|
"learning_rate": 2.913858185277605e-07, |
|
"logits/chosen": -2.8365845680236816, |
|
"logits/rejected": -2.8241991996765137, |
|
"logps/chosen": -274.61334228515625, |
|
"logps/rejected": -262.88934326171875, |
|
"loss": 0.6764, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.025087693706154823, |
|
"rewards/margins": 0.03509819880127907, |
|
"rewards/rejected": -0.010010505095124245, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.5051033760795604, |
|
"grad_norm": 1.3828125, |
|
"learning_rate": 2.89131179465238e-07, |
|
"logits/chosen": -2.802734375, |
|
"logits/rejected": -2.7592692375183105, |
|
"logps/chosen": -286.10986328125, |
|
"logps/rejected": -250.1353759765625, |
|
"loss": 0.6755, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.019024692475795746, |
|
"rewards/margins": 0.03733197599649429, |
|
"rewards/rejected": -0.018307287245988846, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.5077204920177963, |
|
"grad_norm": 1.2265625, |
|
"learning_rate": 2.8687327296049125e-07, |
|
"logits/chosen": -2.841648578643799, |
|
"logits/rejected": -2.817791700363159, |
|
"logps/chosen": -272.06378173828125, |
|
"logps/rejected": -273.47930908203125, |
|
"loss": 0.6797, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.0162358395755291, |
|
"rewards/margins": 0.02848033234477043, |
|
"rewards/rejected": -0.012244494631886482, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.5103376079560324, |
|
"grad_norm": 1.21875, |
|
"learning_rate": 2.846122875480637e-07, |
|
"logits/chosen": -2.8606886863708496, |
|
"logits/rejected": -2.822801113128662, |
|
"logps/chosen": -288.4855651855469, |
|
"logps/rejected": -264.97247314453125, |
|
"loss": 0.6769, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": 0.024403361603617668, |
|
"rewards/margins": 0.03406291827559471, |
|
"rewards/rejected": -0.009659556671977043, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.5129547238942685, |
|
"grad_norm": 1.1484375, |
|
"learning_rate": 2.8234841201958647e-07, |
|
"logits/chosen": -2.8555562496185303, |
|
"logits/rejected": -2.819885730743408, |
|
"logps/chosen": -297.9819641113281, |
|
"logps/rejected": -261.5909423828125, |
|
"loss": 0.6754, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": 0.02403775416314602, |
|
"rewards/margins": 0.03708943352103233, |
|
"rewards/rejected": -0.013051679357886314, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.5155718398325045, |
|
"grad_norm": 1.4375, |
|
"learning_rate": 2.800818354080148e-07, |
|
"logits/chosen": -2.83642840385437, |
|
"logits/rejected": -2.805063486099243, |
|
"logps/chosen": -287.24420166015625, |
|
"logps/rejected": -243.9567413330078, |
|
"loss": 0.6775, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": 0.021363835781812668, |
|
"rewards/margins": 0.032995063811540604, |
|
"rewards/rejected": -0.011631224304437637, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.5181889557707406, |
|
"grad_norm": 1.265625, |
|
"learning_rate": 2.778127469718435e-07, |
|
"logits/chosen": -2.7859811782836914, |
|
"logits/rejected": -2.7979307174682617, |
|
"logps/chosen": -245.3804168701172, |
|
"logps/rejected": -266.1874694824219, |
|
"loss": 0.6803, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": 0.015351531095802784, |
|
"rewards/margins": 0.02699609100818634, |
|
"rewards/rejected": -0.011644558981060982, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.5208060717089767, |
|
"grad_norm": 1.3515625, |
|
"learning_rate": 2.755413361793039e-07, |
|
"logits/chosen": -2.8089661598205566, |
|
"logits/rejected": -2.779783010482788, |
|
"logps/chosen": -262.85589599609375, |
|
"logps/rejected": -253.60653686523438, |
|
"loss": 0.6757, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.026462215930223465, |
|
"rewards/margins": 0.03666644170880318, |
|
"rewards/rejected": -0.010204223915934563, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.5234231876472127, |
|
"grad_norm": 1.3125, |
|
"learning_rate": 2.7326779269254356e-07, |
|
"logits/chosen": -2.8754069805145264, |
|
"logits/rejected": -2.8527140617370605, |
|
"logps/chosen": -303.8473205566406, |
|
"logps/rejected": -247.6015625, |
|
"loss": 0.6762, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.026804108172655106, |
|
"rewards/margins": 0.03586486726999283, |
|
"rewards/rejected": -0.009060760028660297, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.5234231876472127, |
|
"eval_logits/chosen": -2.853806972503662, |
|
"eval_logits/rejected": -2.8270227909088135, |
|
"eval_logps/chosen": -280.87493896484375, |
|
"eval_logps/rejected": -262.7651062011719, |
|
"eval_loss": 0.6778436303138733, |
|
"eval_rewards/accuracies": 0.6840000152587891, |
|
"eval_rewards/chosen": 0.01898558810353279, |
|
"eval_rewards/margins": 0.03225287050008774, |
|
"eval_rewards/rejected": -0.013267277739942074, |
|
"eval_runtime": 622.6334, |
|
"eval_samples_per_second": 3.212, |
|
"eval_steps_per_second": 0.402, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.5260403035854488, |
|
"grad_norm": 1.4296875, |
|
"learning_rate": 2.709923063517895e-07, |
|
"logits/chosen": -2.8158721923828125, |
|
"logits/rejected": -2.8292183876037598, |
|
"logps/chosen": -277.1296691894531, |
|
"logps/rejected": -276.7500915527344, |
|
"loss": 0.6757, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": 0.020963847637176514, |
|
"rewards/margins": 0.03652495518326759, |
|
"rewards/rejected": -0.015561106614768505, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.528657419523685, |
|
"grad_norm": 1.5390625, |
|
"learning_rate": 2.68715067159496e-07, |
|
"logits/chosen": -2.8547072410583496, |
|
"logits/rejected": -2.8296151161193848, |
|
"logps/chosen": -266.697265625, |
|
"logps/rejected": -248.7971649169922, |
|
"loss": 0.6773, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": 0.023486215621232986, |
|
"rewards/margins": 0.0331868901848793, |
|
"rewards/rejected": -0.009700671769678593, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.5312745354619209, |
|
"grad_norm": 1.5625, |
|
"learning_rate": 2.664362652644806e-07, |
|
"logits/chosen": -2.871127128601074, |
|
"logits/rejected": -2.859767198562622, |
|
"logps/chosen": -309.3524475097656, |
|
"logps/rejected": -267.5863342285156, |
|
"loss": 0.6745, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.03075565956532955, |
|
"rewards/margins": 0.039490751922130585, |
|
"rewards/rejected": -0.008735088631510735, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.533891651400157, |
|
"grad_norm": 1.2421875, |
|
"learning_rate": 2.6415609094604555e-07, |
|
"logits/chosen": -2.8490989208221436, |
|
"logits/rejected": -2.849595308303833, |
|
"logps/chosen": -284.77679443359375, |
|
"logps/rejected": -266.975830078125, |
|
"loss": 0.6771, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": 0.029801562428474426, |
|
"rewards/margins": 0.034142639487981796, |
|
"rewards/rejected": -0.004341077525168657, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.5365087673383931, |
|
"grad_norm": 1.1875, |
|
"learning_rate": 2.618747345980904e-07, |
|
"logits/chosen": -2.85640287399292, |
|
"logits/rejected": -2.8106446266174316, |
|
"logps/chosen": -262.911376953125, |
|
"logps/rejected": -212.4907989501953, |
|
"loss": 0.6744, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.0176672525703907, |
|
"rewards/margins": 0.039339274168014526, |
|
"rewards/rejected": -0.021672027185559273, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.5391258832766291, |
|
"grad_norm": 1.421875, |
|
"learning_rate": 2.595923867132136e-07, |
|
"logits/chosen": -2.8832926750183105, |
|
"logits/rejected": -2.872882604598999, |
|
"logps/chosen": -296.1038818359375, |
|
"logps/rejected": -274.6143493652344, |
|
"loss": 0.6772, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": 0.01775936782360077, |
|
"rewards/margins": 0.03422468900680542, |
|
"rewards/rejected": -0.0164653230458498, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.5417429992148652, |
|
"grad_norm": 1.3359375, |
|
"learning_rate": 2.5730923786680667e-07, |
|
"logits/chosen": -2.860802173614502, |
|
"logits/rejected": -2.862802028656006, |
|
"logps/chosen": -264.1892395019531, |
|
"logps/rejected": -275.02532958984375, |
|
"loss": 0.6787, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.016188761219382286, |
|
"rewards/margins": 0.030575359240174294, |
|
"rewards/rejected": -0.014386599883437157, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.5443601151531012, |
|
"grad_norm": 1.2890625, |
|
"learning_rate": 2.5502547870114135e-07, |
|
"logits/chosen": -2.8470053672790527, |
|
"logits/rejected": -2.8130552768707275, |
|
"logps/chosen": -269.6037902832031, |
|
"logps/rejected": -240.1824493408203, |
|
"loss": 0.6779, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.01747250370681286, |
|
"rewards/margins": 0.03254149109125137, |
|
"rewards/rejected": -0.015068987384438515, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.5469772310913373, |
|
"grad_norm": 3.09375, |
|
"learning_rate": 2.527412999094506e-07, |
|
"logits/chosen": -2.8123087882995605, |
|
"logits/rejected": -2.7862212657928467, |
|
"logps/chosen": -315.90447998046875, |
|
"logps/rejected": -302.16552734375, |
|
"loss": 0.6781, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": 0.022080183029174805, |
|
"rewards/margins": 0.031787317246198654, |
|
"rewards/rejected": -0.009707136079668999, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.5495943470295734, |
|
"grad_norm": 1.828125, |
|
"learning_rate": 2.5045689222000636e-07, |
|
"logits/chosen": -2.798381805419922, |
|
"logits/rejected": -2.7811503410339355, |
|
"logps/chosen": -256.8103332519531, |
|
"logps/rejected": -242.29971313476562, |
|
"loss": 0.6796, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": 0.01742711290717125, |
|
"rewards/margins": 0.02845141850411892, |
|
"rewards/rejected": -0.011024304665625095, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.5495943470295734, |
|
"eval_logits/chosen": -2.856419324874878, |
|
"eval_logits/rejected": -2.8298940658569336, |
|
"eval_logps/chosen": -280.9320983886719, |
|
"eval_logps/rejected": -262.8512878417969, |
|
"eval_loss": 0.6777089834213257, |
|
"eval_rewards/accuracies": 0.6794999837875366, |
|
"eval_rewards/chosen": 0.018413949757814407, |
|
"eval_rewards/margins": 0.03254299610853195, |
|
"eval_rewards/rejected": -0.01412904355674982, |
|
"eval_runtime": 623.6848, |
|
"eval_samples_per_second": 3.207, |
|
"eval_steps_per_second": 0.401, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.5522114629678094, |
|
"grad_norm": 1.2734375, |
|
"learning_rate": 2.481724463801933e-07, |
|
"logits/chosen": -2.837977170944214, |
|
"logits/rejected": -2.8143982887268066, |
|
"logps/chosen": -293.23687744140625, |
|
"logps/rejected": -254.9249725341797, |
|
"loss": 0.6749, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.01989181898534298, |
|
"rewards/margins": 0.03839176893234253, |
|
"rewards/rejected": -0.01849994622170925, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.5548285789060455, |
|
"grad_norm": 1.3984375, |
|
"learning_rate": 2.4588815314058154e-07, |
|
"logits/chosen": -2.828207492828369, |
|
"logits/rejected": -2.825892448425293, |
|
"logps/chosen": -257.57012939453125, |
|
"logps/rejected": -226.6698455810547, |
|
"loss": 0.677, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.021512161940336227, |
|
"rewards/margins": 0.03372306749224663, |
|
"rewards/rejected": -0.01221090741455555, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.5574456948442816, |
|
"grad_norm": 1.2265625, |
|
"learning_rate": 2.4360420323899917e-07, |
|
"logits/chosen": -2.8333821296691895, |
|
"logits/rejected": -2.8204522132873535, |
|
"logps/chosen": -294.45135498046875, |
|
"logps/rejected": -261.3866271972656, |
|
"loss": 0.6785, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": 0.021041734144091606, |
|
"rewards/margins": 0.031500063836574554, |
|
"rewards/rejected": -0.01045832596719265, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.5600628107825176, |
|
"grad_norm": 1.2578125, |
|
"learning_rate": 2.4132078738460583e-07, |
|
"logits/chosen": -2.8641979694366455, |
|
"logits/rejected": -2.8372373580932617, |
|
"logps/chosen": -277.0169677734375, |
|
"logps/rejected": -240.1663055419922, |
|
"loss": 0.6765, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": 0.020255176350474358, |
|
"rewards/margins": 0.03485842049121857, |
|
"rewards/rejected": -0.014603245072066784, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.5626799267207537, |
|
"grad_norm": 1.3984375, |
|
"learning_rate": 2.390380962419682e-07, |
|
"logits/chosen": -2.8402717113494873, |
|
"logits/rejected": -2.82948899269104, |
|
"logps/chosen": -248.21908569335938, |
|
"logps/rejected": -215.9970703125, |
|
"loss": 0.6813, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.013856288976967335, |
|
"rewards/margins": 0.02503989078104496, |
|
"rewards/rejected": -0.011183603666722775, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.5652970426589898, |
|
"grad_norm": 1.203125, |
|
"learning_rate": 2.3675632041513977e-07, |
|
"logits/chosen": -2.8817086219787598, |
|
"logits/rejected": -2.830371856689453, |
|
"logps/chosen": -299.80419921875, |
|
"logps/rejected": -237.6551055908203, |
|
"loss": 0.6704, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.029662128537893295, |
|
"rewards/margins": 0.0477459654211998, |
|
"rewards/rejected": -0.018083838745951653, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.5679141585972258, |
|
"grad_norm": 1.1953125, |
|
"learning_rate": 2.344756504317453e-07, |
|
"logits/chosen": -2.8310768604278564, |
|
"logits/rejected": -2.7922732830047607, |
|
"logps/chosen": -273.2166442871094, |
|
"logps/rejected": -238.2533721923828, |
|
"loss": 0.6782, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.013544699177145958, |
|
"rewards/margins": 0.031235402449965477, |
|
"rewards/rejected": -0.01769069954752922, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.5705312745354619, |
|
"grad_norm": 1.3203125, |
|
"learning_rate": 2.3219627672707237e-07, |
|
"logits/chosen": -2.814478874206543, |
|
"logits/rejected": -2.807798147201538, |
|
"logps/chosen": -271.345703125, |
|
"logps/rejected": -229.5119171142578, |
|
"loss": 0.6808, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.008285674266517162, |
|
"rewards/margins": 0.025873666629195213, |
|
"rewards/rejected": -0.017587993294000626, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.573148390473698, |
|
"grad_norm": 1.359375, |
|
"learning_rate": 2.2991838962816918e-07, |
|
"logits/chosen": -2.812224864959717, |
|
"logits/rejected": -2.787701368331909, |
|
"logps/chosen": -268.98724365234375, |
|
"logps/rejected": -268.3625793457031, |
|
"loss": 0.681, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.01591324433684349, |
|
"rewards/margins": 0.025850754231214523, |
|
"rewards/rejected": -0.009937510825693607, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.575765506411934, |
|
"grad_norm": 1.4453125, |
|
"learning_rate": 2.2764217933795297e-07, |
|
"logits/chosen": -2.8286900520324707, |
|
"logits/rejected": -2.8099260330200195, |
|
"logps/chosen": -274.784912109375, |
|
"logps/rejected": -258.30084228515625, |
|
"loss": 0.6736, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": 0.02573958970606327, |
|
"rewards/margins": 0.04115080088376999, |
|
"rewards/rejected": -0.015411211177706718, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.575765506411934, |
|
"eval_logits/chosen": -2.857100486755371, |
|
"eval_logits/rejected": -2.8306167125701904, |
|
"eval_logps/chosen": -280.9635314941406, |
|
"eval_logps/rejected": -262.8892822265625, |
|
"eval_loss": 0.6776819825172424, |
|
"eval_rewards/accuracies": 0.6825000047683716, |
|
"eval_rewards/chosen": 0.018099820241332054, |
|
"eval_rewards/margins": 0.03260912373661995, |
|
"eval_rewards/rejected": -0.014509301632642746, |
|
"eval_runtime": 623.717, |
|
"eval_samples_per_second": 3.207, |
|
"eval_steps_per_second": 0.401, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.5783826223501701, |
|
"grad_norm": 1.6171875, |
|
"learning_rate": 2.253678359193278e-07, |
|
"logits/chosen": -2.901681423187256, |
|
"logits/rejected": -2.858135223388672, |
|
"logps/chosen": -292.3751525878906, |
|
"logps/rejected": -273.216064453125, |
|
"loss": 0.6782, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.017452511936426163, |
|
"rewards/margins": 0.0317564532160759, |
|
"rewards/rejected": -0.014303937554359436, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.5809997382884062, |
|
"grad_norm": 1.2421875, |
|
"learning_rate": 2.230955492793149e-07, |
|
"logits/chosen": -2.785632371902466, |
|
"logits/rejected": -2.791489362716675, |
|
"logps/chosen": -290.7291259765625, |
|
"logps/rejected": -277.0824890136719, |
|
"loss": 0.6816, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": 0.015981845557689667, |
|
"rewards/margins": 0.02465016394853592, |
|
"rewards/rejected": -0.008668316528201103, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.5836168542266422, |
|
"grad_norm": 1.5, |
|
"learning_rate": 2.2082550915319468e-07, |
|
"logits/chosen": -2.797928810119629, |
|
"logits/rejected": -2.795860767364502, |
|
"logps/chosen": -292.92291259765625, |
|
"logps/rejected": -257.6455078125, |
|
"loss": 0.6766, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": 0.02159210667014122, |
|
"rewards/margins": 0.03476356342434883, |
|
"rewards/rejected": -0.013171456754207611, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.5862339701648783, |
|
"grad_norm": 1.3203125, |
|
"learning_rate": 2.1855790508866433e-07, |
|
"logits/chosen": -2.8182015419006348, |
|
"logits/rejected": -2.815925121307373, |
|
"logps/chosen": -324.13592529296875, |
|
"logps/rejected": -299.08135986328125, |
|
"loss": 0.6768, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.02183537557721138, |
|
"rewards/margins": 0.034798912703990936, |
|
"rewards/rejected": -0.012963538058102131, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.5888510861031143, |
|
"grad_norm": 1.2421875, |
|
"learning_rate": 2.162929264300107e-07, |
|
"logits/chosen": -2.79923939704895, |
|
"logits/rejected": -2.7914280891418457, |
|
"logps/chosen": -282.1067810058594, |
|
"logps/rejected": -265.30364990234375, |
|
"loss": 0.6733, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": 0.02532428503036499, |
|
"rewards/margins": 0.04165149852633476, |
|
"rewards/rejected": -0.016327213495969772, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.5914682020413504, |
|
"grad_norm": 1.390625, |
|
"learning_rate": 2.1403076230230005e-07, |
|
"logits/chosen": -2.816969394683838, |
|
"logits/rejected": -2.7890102863311768, |
|
"logps/chosen": -290.90618896484375, |
|
"logps/rejected": -261.3204345703125, |
|
"loss": 0.6801, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.02144182100892067, |
|
"rewards/margins": 0.028353065252304077, |
|
"rewards/rejected": -0.006911243312060833, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.5940853179795865, |
|
"grad_norm": 1.703125, |
|
"learning_rate": 2.1177160159558596e-07, |
|
"logits/chosen": -2.8060302734375, |
|
"logits/rejected": -2.788020610809326, |
|
"logps/chosen": -297.41741943359375, |
|
"logps/rejected": -247.78640747070312, |
|
"loss": 0.6745, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": 0.02653617225587368, |
|
"rewards/margins": 0.03934457153081894, |
|
"rewards/rejected": -0.01280839741230011, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.5967024339178225, |
|
"grad_norm": 1.3203125, |
|
"learning_rate": 2.0951563294913734e-07, |
|
"logits/chosen": -2.8132269382476807, |
|
"logits/rejected": -2.7866339683532715, |
|
"logps/chosen": -277.17449951171875, |
|
"logps/rejected": -250.9734344482422, |
|
"loss": 0.6754, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": 0.01868070289492607, |
|
"rewards/margins": 0.03689347952604294, |
|
"rewards/rejected": -0.018212776631116867, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.5993195498560586, |
|
"grad_norm": 1.1875, |
|
"learning_rate": 2.072630447356869e-07, |
|
"logits/chosen": -2.8403782844543457, |
|
"logits/rejected": -2.8337533473968506, |
|
"logps/chosen": -274.53546142578125, |
|
"logps/rejected": -242.531494140625, |
|
"loss": 0.6767, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": 0.01636110618710518, |
|
"rewards/margins": 0.03431684896349907, |
|
"rewards/rejected": -0.01795574650168419, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.6019366657942947, |
|
"grad_norm": 1.5625, |
|
"learning_rate": 2.0501402504570232e-07, |
|
"logits/chosen": -2.87614107131958, |
|
"logits/rejected": -2.8186376094818115, |
|
"logps/chosen": -293.62347412109375, |
|
"logps/rejected": -262.20794677734375, |
|
"loss": 0.6779, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": 0.020630482584238052, |
|
"rewards/margins": 0.032361775636672974, |
|
"rewards/rejected": -0.011731292121112347, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.6019366657942947, |
|
"eval_logits/chosen": -2.8548264503479004, |
|
"eval_logits/rejected": -2.8281185626983643, |
|
"eval_logps/chosen": -281.0184326171875, |
|
"eval_logps/rejected": -262.955810546875, |
|
"eval_loss": 0.6776320934295654, |
|
"eval_rewards/accuracies": 0.6875, |
|
"eval_rewards/chosen": 0.017550628632307053, |
|
"eval_rewards/margins": 0.032725006341934204, |
|
"eval_rewards/rejected": -0.015174377709627151, |
|
"eval_runtime": 623.5172, |
|
"eval_samples_per_second": 3.208, |
|
"eval_steps_per_second": 0.401, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.6045537817325307, |
|
"grad_norm": 1.3125, |
|
"learning_rate": 2.027687616716804e-07, |
|
"logits/chosen": -2.776857614517212, |
|
"logits/rejected": -2.766300916671753, |
|
"logps/chosen": -245.21249389648438, |
|
"logps/rejected": -210.9021453857422, |
|
"loss": 0.6796, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": 0.012185106053948402, |
|
"rewards/margins": 0.028403136879205704, |
|
"rewards/rejected": -0.016218028962612152, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.6071708976707668, |
|
"grad_norm": 1.5859375, |
|
"learning_rate": 2.005274420924668e-07, |
|
"logits/chosen": -2.842299699783325, |
|
"logits/rejected": -2.8256301879882812, |
|
"logps/chosen": -268.92669677734375, |
|
"logps/rejected": -236.4410858154297, |
|
"loss": 0.6773, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": 0.01880219765007496, |
|
"rewards/margins": 0.03351093456149101, |
|
"rewards/rejected": -0.014708739705383778, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.6097880136090029, |
|
"grad_norm": 1.8203125, |
|
"learning_rate": 1.9829025345760121e-07, |
|
"logits/chosen": -2.8297770023345947, |
|
"logits/rejected": -2.830937147140503, |
|
"logps/chosen": -295.0563049316406, |
|
"logps/rejected": -287.86016845703125, |
|
"loss": 0.6799, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.02504206821322441, |
|
"rewards/margins": 0.02800445258617401, |
|
"rewards/rejected": -0.002962383907288313, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.6124051295472389, |
|
"grad_norm": 1.4453125, |
|
"learning_rate": 1.960573825716911e-07, |
|
"logits/chosen": -2.8119211196899414, |
|
"logits/rejected": -2.7910213470458984, |
|
"logps/chosen": -250.70425415039062, |
|
"logps/rejected": -246.34921264648438, |
|
"loss": 0.6817, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": 0.005780251231044531, |
|
"rewards/margins": 0.024395998567342758, |
|
"rewards/rejected": -0.01861574873328209, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.615022245485475, |
|
"grad_norm": 1.4453125, |
|
"learning_rate": 1.9382901587881273e-07, |
|
"logits/chosen": -2.8759961128234863, |
|
"logits/rejected": -2.864570379257202, |
|
"logps/chosen": -273.0665588378906, |
|
"logps/rejected": -240.42294311523438, |
|
"loss": 0.6727, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.027859041467308998, |
|
"rewards/margins": 0.04289738088846207, |
|
"rewards/rejected": -0.015038339421153069, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.6176393614237111, |
|
"grad_norm": 1.53125, |
|
"learning_rate": 1.9160533944694364e-07, |
|
"logits/chosen": -2.870702028274536, |
|
"logits/rejected": -2.8234288692474365, |
|
"logps/chosen": -276.2862854003906, |
|
"logps/rejected": -267.0870666503906, |
|
"loss": 0.6738, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": 0.025392061099410057, |
|
"rewards/margins": 0.04046647623181343, |
|
"rewards/rejected": -0.015074415132403374, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.6202564773619471, |
|
"grad_norm": 1.3671875, |
|
"learning_rate": 1.8938653895242602e-07, |
|
"logits/chosen": -2.861572027206421, |
|
"logits/rejected": -2.8287158012390137, |
|
"logps/chosen": -277.0164489746094, |
|
"logps/rejected": -251.5226593017578, |
|
"loss": 0.6715, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.02161785028874874, |
|
"rewards/margins": 0.045254360884428024, |
|
"rewards/rejected": -0.02363651618361473, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.6228735933001832, |
|
"grad_norm": 1.3515625, |
|
"learning_rate": 1.8717279966446264e-07, |
|
"logits/chosen": -2.7649269104003906, |
|
"logits/rejected": -2.748934268951416, |
|
"logps/chosen": -266.9613037109375, |
|
"logps/rejected": -256.705810546875, |
|
"loss": 0.6789, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": 0.01746024750173092, |
|
"rewards/margins": 0.030255427584052086, |
|
"rewards/rejected": -0.012795181944966316, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.6254907092384192, |
|
"grad_norm": 1.3046875, |
|
"learning_rate": 1.8496430642964694e-07, |
|
"logits/chosen": -2.8276329040527344, |
|
"logits/rejected": -2.8031527996063232, |
|
"logps/chosen": -289.2098083496094, |
|
"logps/rejected": -266.8305969238281, |
|
"loss": 0.6785, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.020906824618577957, |
|
"rewards/margins": 0.031347136944532394, |
|
"rewards/rejected": -0.010440316051244736, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.6281078251766553, |
|
"grad_norm": 1.375, |
|
"learning_rate": 1.8276124365652855e-07, |
|
"logits/chosen": -2.8458991050720215, |
|
"logits/rejected": -2.7970054149627686, |
|
"logps/chosen": -278.1546325683594, |
|
"logps/rejected": -264.06427001953125, |
|
"loss": 0.6782, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.01725105755031109, |
|
"rewards/margins": 0.031516797840595245, |
|
"rewards/rejected": -0.014265733771026134, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.6281078251766553, |
|
"eval_logits/chosen": -2.8540356159210205, |
|
"eval_logits/rejected": -2.8272616863250732, |
|
"eval_logps/chosen": -280.9810485839844, |
|
"eval_logps/rejected": -262.9154968261719, |
|
"eval_loss": 0.6776500344276428, |
|
"eval_rewards/accuracies": 0.6834999918937683, |
|
"eval_rewards/chosen": 0.01792425848543644, |
|
"eval_rewards/margins": 0.032695669680833817, |
|
"eval_rewards/rejected": -0.014771413058042526, |
|
"eval_runtime": 623.7982, |
|
"eval_samples_per_second": 3.206, |
|
"eval_steps_per_second": 0.401, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.6307249411148914, |
|
"grad_norm": 1.328125, |
|
"learning_rate": 1.805637953002149e-07, |
|
"logits/chosen": -2.8647313117980957, |
|
"logits/rejected": -2.8551206588745117, |
|
"logps/chosen": -258.0870666503906, |
|
"logps/rejected": -236.39266967773438, |
|
"loss": 0.6787, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.016582269221544266, |
|
"rewards/margins": 0.030466347932815552, |
|
"rewards/rejected": -0.013884077779948711, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.6333420570531274, |
|
"grad_norm": 1.1484375, |
|
"learning_rate": 1.7837214484701153e-07, |
|
"logits/chosen": -2.8555073738098145, |
|
"logits/rejected": -2.837476968765259, |
|
"logps/chosen": -266.9813537597656, |
|
"logps/rejected": -244.21142578125, |
|
"loss": 0.673, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": 0.023186923936009407, |
|
"rewards/margins": 0.04209943115711212, |
|
"rewards/rejected": -0.018912509083747864, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.6359591729913635, |
|
"grad_norm": 1.28125, |
|
"learning_rate": 1.761864752991004e-07, |
|
"logits/chosen": -2.8415451049804688, |
|
"logits/rejected": -2.8176722526550293, |
|
"logps/chosen": -272.5819396972656, |
|
"logps/rejected": -260.46868896484375, |
|
"loss": 0.6765, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.019747793674468994, |
|
"rewards/margins": 0.034796275198459625, |
|
"rewards/rejected": -0.015048478730022907, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.6385762889295996, |
|
"grad_norm": 1.3515625, |
|
"learning_rate": 1.7400696915925995e-07, |
|
"logits/chosen": -2.8523507118225098, |
|
"logits/rejected": -2.8236594200134277, |
|
"logps/chosen": -287.25445556640625, |
|
"logps/rejected": -227.38986206054688, |
|
"loss": 0.6748, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.017432263121008873, |
|
"rewards/margins": 0.03874523937702179, |
|
"rewards/rejected": -0.021312978118658066, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.6411934048678356, |
|
"grad_norm": 1.6875, |
|
"learning_rate": 1.718338084156254e-07, |
|
"logits/chosen": -2.797588586807251, |
|
"logits/rejected": -2.780844211578369, |
|
"logps/chosen": -304.15277099609375, |
|
"logps/rejected": -267.3579406738281, |
|
"loss": 0.6756, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.023083947598934174, |
|
"rewards/margins": 0.036666251718997955, |
|
"rewards/rejected": -0.013582308776676655, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.6438105208060717, |
|
"grad_norm": 1.09375, |
|
"learning_rate": 1.696671745264937e-07, |
|
"logits/chosen": -2.860663652420044, |
|
"logits/rejected": -2.863227128982544, |
|
"logps/chosen": -295.4175720214844, |
|
"logps/rejected": -240.9844207763672, |
|
"loss": 0.6724, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": 0.025130432099103928, |
|
"rewards/margins": 0.04342951625585556, |
|
"rewards/rejected": -0.018299078568816185, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.6464276367443078, |
|
"grad_norm": 1.3359375, |
|
"learning_rate": 1.67507248405171e-07, |
|
"logits/chosen": -2.846250534057617, |
|
"logits/rejected": -2.82800030708313, |
|
"logps/chosen": -270.4811706542969, |
|
"logps/rejected": -273.17401123046875, |
|
"loss": 0.6794, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": 0.020759141072630882, |
|
"rewards/margins": 0.029174262657761574, |
|
"rewards/rejected": -0.00841512344777584, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.6490447526825438, |
|
"grad_norm": 1.2890625, |
|
"learning_rate": 1.6535421040486683e-07, |
|
"logits/chosen": -2.760650396347046, |
|
"logits/rejected": -2.743494987487793, |
|
"logps/chosen": -270.25933837890625, |
|
"logps/rejected": -240.8209228515625, |
|
"loss": 0.6748, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": 0.016713624820113182, |
|
"rewards/margins": 0.03836838901042938, |
|
"rewards/rejected": -0.02165476605296135, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.6516618686207799, |
|
"grad_norm": 1.234375, |
|
"learning_rate": 1.6320824030363456e-07, |
|
"logits/chosen": -2.8263466358184814, |
|
"logits/rejected": -2.828117609024048, |
|
"logps/chosen": -248.79129028320312, |
|
"logps/rejected": -235.21774291992188, |
|
"loss": 0.6765, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.017327528446912766, |
|
"rewards/margins": 0.03489188104867935, |
|
"rewards/rejected": -0.01756434701383114, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.654278984559016, |
|
"grad_norm": 1.359375, |
|
"learning_rate": 1.6106951728936024e-07, |
|
"logits/chosen": -2.8797359466552734, |
|
"logits/rejected": -2.8334743976593018, |
|
"logps/chosen": -271.38677978515625, |
|
"logps/rejected": -267.6949768066406, |
|
"loss": 0.6753, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": 0.020725153386592865, |
|
"rewards/margins": 0.03783208504319191, |
|
"rewards/rejected": -0.017106933519244194, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.654278984559016, |
|
"eval_logits/chosen": -2.852537155151367, |
|
"eval_logits/rejected": -2.8256473541259766, |
|
"eval_logps/chosen": -280.9631042480469, |
|
"eval_logps/rejected": -262.90740966796875, |
|
"eval_loss": 0.6776077151298523, |
|
"eval_rewards/accuracies": 0.6804999709129333, |
|
"eval_rewards/chosen": 0.01810392364859581, |
|
"eval_rewards/margins": 0.0327942781150341, |
|
"eval_rewards/rejected": -0.014690355397760868, |
|
"eval_runtime": 623.8344, |
|
"eval_samples_per_second": 3.206, |
|
"eval_steps_per_second": 0.401, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.656896100497252, |
|
"grad_norm": 1.3125, |
|
"learning_rate": 1.5893821994479994e-07, |
|
"logits/chosen": -2.860830307006836, |
|
"logits/rejected": -2.8481099605560303, |
|
"logps/chosen": -290.0372009277344, |
|
"logps/rejected": -253.4038848876953, |
|
"loss": 0.6757, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": 0.024327334016561508, |
|
"rewards/margins": 0.03673567250370979, |
|
"rewards/rejected": -0.012408342212438583, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.6595132164354881, |
|
"grad_norm": 1.34375, |
|
"learning_rate": 1.5681452623266867e-07, |
|
"logits/chosen": -2.8527517318725586, |
|
"logits/rejected": -2.8060853481292725, |
|
"logps/chosen": -301.49542236328125, |
|
"logps/rejected": -247.3957977294922, |
|
"loss": 0.6683, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": 0.03281102329492569, |
|
"rewards/margins": 0.05217736214399338, |
|
"rewards/rejected": -0.019366348162293434, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.6621303323737242, |
|
"grad_norm": 2.484375, |
|
"learning_rate": 1.546986134807801e-07, |
|
"logits/chosen": -2.8651843070983887, |
|
"logits/rejected": -2.8340165615081787, |
|
"logps/chosen": -263.162109375, |
|
"logps/rejected": -252.37625122070312, |
|
"loss": 0.6783, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.015812261030077934, |
|
"rewards/margins": 0.0313270129263401, |
|
"rewards/rejected": -0.015514750964939594, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.6647474483119602, |
|
"grad_norm": 1.296875, |
|
"learning_rate": 1.5259065836724034e-07, |
|
"logits/chosen": -2.7946388721466064, |
|
"logits/rejected": -2.7776710987091064, |
|
"logps/chosen": -262.2623291015625, |
|
"logps/rejected": -254.7265167236328, |
|
"loss": 0.6787, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": 0.01517055369913578, |
|
"rewards/margins": 0.030337844043970108, |
|
"rewards/rejected": -0.015167290344834328, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.6673645642501963, |
|
"grad_norm": 1.25, |
|
"learning_rate": 1.5049083690569454e-07, |
|
"logits/chosen": -2.809542179107666, |
|
"logits/rejected": -2.793139934539795, |
|
"logps/chosen": -251.2034454345703, |
|
"logps/rejected": -249.8028106689453, |
|
"loss": 0.6761, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.018413711339235306, |
|
"rewards/margins": 0.03606470674276352, |
|
"rewards/rejected": -0.017650997266173363, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.6699816801884323, |
|
"grad_norm": 1.2421875, |
|
"learning_rate": 1.4839932443063056e-07, |
|
"logits/chosen": -2.837368965148926, |
|
"logits/rejected": -2.810147523880005, |
|
"logps/chosen": -305.4731750488281, |
|
"logps/rejected": -251.56161499023438, |
|
"loss": 0.6732, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": 0.024250676855444908, |
|
"rewards/margins": 0.041700925678014755, |
|
"rewards/rejected": -0.017450252547860146, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.6725987961266684, |
|
"grad_norm": 1.1171875, |
|
"learning_rate": 1.46316295582738e-07, |
|
"logits/chosen": -2.8204097747802734, |
|
"logits/rejected": -2.8028788566589355, |
|
"logps/chosen": -257.53228759765625, |
|
"logps/rejected": -245.14895629882812, |
|
"loss": 0.6809, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.006483917590230703, |
|
"rewards/margins": 0.025950897485017776, |
|
"rewards/rejected": -0.019466979429125786, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.6752159120649045, |
|
"grad_norm": 1.6171875, |
|
"learning_rate": 1.4424192429432655e-07, |
|
"logits/chosen": -2.848489999771118, |
|
"logits/rejected": -2.8286855220794678, |
|
"logps/chosen": -270.4859619140625, |
|
"logps/rejected": -277.18206787109375, |
|
"loss": 0.6742, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": 0.024375300854444504, |
|
"rewards/margins": 0.03957264870405197, |
|
"rewards/rejected": -0.015197351574897766, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.6778330280031405, |
|
"grad_norm": 2.0625, |
|
"learning_rate": 1.4217638377480158e-07, |
|
"logits/chosen": -2.829794406890869, |
|
"logits/rejected": -2.8167781829833984, |
|
"logps/chosen": -274.6668395996094, |
|
"logps/rejected": -262.61700439453125, |
|
"loss": 0.6789, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.014721376821398735, |
|
"rewards/margins": 0.02995181456208229, |
|
"rewards/rejected": -0.015230434946715832, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.6804501439413766, |
|
"grad_norm": 1.3203125, |
|
"learning_rate": 1.401198464962021e-07, |
|
"logits/chosen": -2.8342947959899902, |
|
"logits/rejected": -2.8125643730163574, |
|
"logps/chosen": -284.03253173828125, |
|
"logps/rejected": -243.7402801513672, |
|
"loss": 0.6776, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.015603385865688324, |
|
"rewards/margins": 0.03253168612718582, |
|
"rewards/rejected": -0.016928300261497498, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.6804501439413766, |
|
"eval_logits/chosen": -2.849823474884033, |
|
"eval_logits/rejected": -2.822629690170288, |
|
"eval_logps/chosen": -280.9640808105469, |
|
"eval_logps/rejected": -262.91668701171875, |
|
"eval_loss": 0.6775689721107483, |
|
"eval_rewards/accuracies": 0.6775000095367432, |
|
"eval_rewards/chosen": 0.018093857914209366, |
|
"eval_rewards/margins": 0.0328776054084301, |
|
"eval_rewards/rejected": -0.014783743768930435, |
|
"eval_runtime": 624.015, |
|
"eval_samples_per_second": 3.205, |
|
"eval_steps_per_second": 0.401, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.6830672598796127, |
|
"grad_norm": 1.15625, |
|
"learning_rate": 1.3807248417879894e-07, |
|
"logits/chosen": -2.866183280944824, |
|
"logits/rejected": -2.85908842086792, |
|
"logps/chosen": -286.482666015625, |
|
"logps/rejected": -269.18145751953125, |
|
"loss": 0.6742, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": 0.024856090545654297, |
|
"rewards/margins": 0.03993413224816322, |
|
"rewards/rejected": -0.015078043565154076, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.6856843758178487, |
|
"grad_norm": 1.40625, |
|
"learning_rate": 1.3603446777675665e-07, |
|
"logits/chosen": -2.7812695503234863, |
|
"logits/rejected": -2.7601253986358643, |
|
"logps/chosen": -280.71826171875, |
|
"logps/rejected": -258.6421813964844, |
|
"loss": 0.6757, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.018343228846788406, |
|
"rewards/margins": 0.036758117377758026, |
|
"rewards/rejected": -0.01841488853096962, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.6883014917560848, |
|
"grad_norm": 1.3359375, |
|
"learning_rate": 1.3400596746385814e-07, |
|
"logits/chosen": -2.844383716583252, |
|
"logits/rejected": -2.8064093589782715, |
|
"logps/chosen": -286.34820556640625, |
|
"logps/rejected": -258.3263244628906, |
|
"loss": 0.6781, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.018320731818675995, |
|
"rewards/margins": 0.03194695711135864, |
|
"rewards/rejected": -0.013626225292682648, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.6909186076943209, |
|
"grad_norm": 1.1484375, |
|
"learning_rate": 1.3198715261929586e-07, |
|
"logits/chosen": -2.8701038360595703, |
|
"logits/rejected": -2.835305690765381, |
|
"logps/chosen": -248.2649688720703, |
|
"logps/rejected": -243.41397094726562, |
|
"loss": 0.6755, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.015776509419083595, |
|
"rewards/margins": 0.03662073612213135, |
|
"rewards/rejected": -0.02084423042833805, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.6935357236325569, |
|
"grad_norm": 1.3359375, |
|
"learning_rate": 1.299781918135282e-07, |
|
"logits/chosen": -2.85074782371521, |
|
"logits/rejected": -2.8091163635253906, |
|
"logps/chosen": -315.61590576171875, |
|
"logps/rejected": -293.9820251464844, |
|
"loss": 0.6697, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.035952307283878326, |
|
"rewards/margins": 0.04937596619129181, |
|
"rewards/rejected": -0.013423657044768333, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.696152839570793, |
|
"grad_norm": 1.3359375, |
|
"learning_rate": 1.279792527942045e-07, |
|
"logits/chosen": -2.8586459159851074, |
|
"logits/rejected": -2.8158531188964844, |
|
"logps/chosen": -284.60882568359375, |
|
"logps/rejected": -277.387451171875, |
|
"loss": 0.6764, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": 0.0246993750333786, |
|
"rewards/margins": 0.035598695278167725, |
|
"rewards/rejected": -0.010899320244789124, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.6987699555090291, |
|
"grad_norm": 1.6796875, |
|
"learning_rate": 1.259905024721576e-07, |
|
"logits/chosen": -2.8398678302764893, |
|
"logits/rejected": -2.823967695236206, |
|
"logps/chosen": -273.98944091796875, |
|
"logps/rejected": -254.6985321044922, |
|
"loss": 0.6742, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.019164234399795532, |
|
"rewards/margins": 0.039530009031295776, |
|
"rewards/rejected": -0.020365772768855095, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.7013870714472651, |
|
"grad_norm": 1.3984375, |
|
"learning_rate": 1.2401210690746703e-07, |
|
"logits/chosen": -2.8383288383483887, |
|
"logits/rejected": -2.8142457008361816, |
|
"logps/chosen": -283.2543029785156, |
|
"logps/rejected": -252.313720703125, |
|
"loss": 0.6763, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.02543952502310276, |
|
"rewards/margins": 0.03542017191648483, |
|
"rewards/rejected": -0.009980651549994946, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.7040041873855012, |
|
"grad_norm": 1.5625, |
|
"learning_rate": 1.2204423129559305e-07, |
|
"logits/chosen": -2.861647844314575, |
|
"logits/rejected": -2.8627748489379883, |
|
"logps/chosen": -281.1255798339844, |
|
"logps/rejected": -280.34027099609375, |
|
"loss": 0.6749, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": 0.021706473082304, |
|
"rewards/margins": 0.03822758048772812, |
|
"rewards/rejected": -0.016521107405424118, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.7066213033237373, |
|
"grad_norm": 2.171875, |
|
"learning_rate": 1.2008703995358299e-07, |
|
"logits/chosen": -2.837878704071045, |
|
"logits/rejected": -2.8239123821258545, |
|
"logps/chosen": -279.67852783203125, |
|
"logps/rejected": -253.0082550048828, |
|
"loss": 0.6774, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.020584728568792343, |
|
"rewards/margins": 0.033195436000823975, |
|
"rewards/rejected": -0.012610706500709057, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.7066213033237373, |
|
"eval_logits/chosen": -2.85300874710083, |
|
"eval_logits/rejected": -2.82612943649292, |
|
"eval_logps/chosen": -280.95526123046875, |
|
"eval_logps/rejected": -262.92626953125, |
|
"eval_loss": 0.6774773001670837, |
|
"eval_rewards/accuracies": 0.6859999895095825, |
|
"eval_rewards/chosen": 0.018182458356022835, |
|
"eval_rewards/margins": 0.03306160494685173, |
|
"eval_rewards/rejected": -0.014879145659506321, |
|
"eval_runtime": 623.2959, |
|
"eval_samples_per_second": 3.209, |
|
"eval_steps_per_second": 0.401, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.7092384192619733, |
|
"grad_norm": 1.6171875, |
|
"learning_rate": 1.1814069630635068e-07, |
|
"logits/chosen": -2.8202016353607178, |
|
"logits/rejected": -2.8188061714172363, |
|
"logps/chosen": -286.5379333496094, |
|
"logps/rejected": -281.89678955078125, |
|
"loss": 0.6787, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.02239087037742138, |
|
"rewards/margins": 0.030664747580885887, |
|
"rewards/rejected": -0.008273878134787083, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.7118555352002094, |
|
"grad_norm": 1.7109375, |
|
"learning_rate": 1.1620536287303051e-07, |
|
"logits/chosen": -2.8520309925079346, |
|
"logits/rejected": -2.8322553634643555, |
|
"logps/chosen": -306.9613342285156, |
|
"logps/rejected": -276.6466979980469, |
|
"loss": 0.6798, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.024515343829989433, |
|
"rewards/margins": 0.02836497500538826, |
|
"rewards/rejected": -0.0038496279157698154, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.7144726511384454, |
|
"grad_norm": 1.2265625, |
|
"learning_rate": 1.1428120125340716e-07, |
|
"logits/chosen": -2.8408806324005127, |
|
"logits/rejected": -2.8193554878234863, |
|
"logps/chosen": -278.6138000488281, |
|
"logps/rejected": -233.0282440185547, |
|
"loss": 0.6728, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": 0.020976107567548752, |
|
"rewards/margins": 0.042338818311691284, |
|
"rewards/rejected": -0.021362707018852234, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.7170897670766815, |
|
"grad_norm": 1.3515625, |
|
"learning_rate": 1.123683721144223e-07, |
|
"logits/chosen": -2.8390605449676514, |
|
"logits/rejected": -2.817472457885742, |
|
"logps/chosen": -299.55767822265625, |
|
"logps/rejected": -270.6512145996094, |
|
"loss": 0.6779, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.022130262106657028, |
|
"rewards/margins": 0.03234560787677765, |
|
"rewards/rejected": -0.010215344838798046, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.7197068830149176, |
|
"grad_norm": 1.4296875, |
|
"learning_rate": 1.1046703517675845e-07, |
|
"logits/chosen": -2.8513295650482178, |
|
"logits/rejected": -2.834134578704834, |
|
"logps/chosen": -269.61492919921875, |
|
"logps/rejected": -279.116455078125, |
|
"loss": 0.6781, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": 0.020882591605186462, |
|
"rewards/margins": 0.03162946552038193, |
|
"rewards/rejected": -0.010746878571808338, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.7223239989531536, |
|
"grad_norm": 1.25, |
|
"learning_rate": 1.085773492015028e-07, |
|
"logits/chosen": -2.837613821029663, |
|
"logits/rejected": -2.811807155609131, |
|
"logps/chosen": -262.3995361328125, |
|
"logps/rejected": -229.64999389648438, |
|
"loss": 0.671, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.02249622717499733, |
|
"rewards/margins": 0.04641376808285713, |
|
"rewards/rejected": -0.023917539045214653, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.7249411148913897, |
|
"grad_norm": 1.421875, |
|
"learning_rate": 1.0669947197689033e-07, |
|
"logits/chosen": -2.829591989517212, |
|
"logits/rejected": -2.78979754447937, |
|
"logps/chosen": -289.36322021484375, |
|
"logps/rejected": -266.07537841796875, |
|
"loss": 0.6777, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.016385111957788467, |
|
"rewards/margins": 0.0324719175696373, |
|
"rewards/rejected": -0.01608681119978428, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.7275582308296258, |
|
"grad_norm": 1.53125, |
|
"learning_rate": 1.048335603051291e-07, |
|
"logits/chosen": -2.811000347137451, |
|
"logits/rejected": -2.7975831031799316, |
|
"logps/chosen": -303.7996520996094, |
|
"logps/rejected": -278.1279296875, |
|
"loss": 0.6693, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": 0.022211695089936256, |
|
"rewards/margins": 0.05009465292096138, |
|
"rewards/rejected": -0.027882959693670273, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.7301753467678618, |
|
"grad_norm": 2.515625, |
|
"learning_rate": 1.0297976998930663e-07, |
|
"logits/chosen": -2.853620767593384, |
|
"logits/rejected": -2.842625379562378, |
|
"logps/chosen": -290.62567138671875, |
|
"logps/rejected": -259.65020751953125, |
|
"loss": 0.6734, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": 0.018552960827946663, |
|
"rewards/margins": 0.04151231050491333, |
|
"rewards/rejected": -0.022959351539611816, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.7327924627060979, |
|
"grad_norm": 1.421875, |
|
"learning_rate": 1.0113825582038077e-07, |
|
"logits/chosen": -2.854717493057251, |
|
"logits/rejected": -2.836151123046875, |
|
"logps/chosen": -279.61346435546875, |
|
"logps/rejected": -265.38531494140625, |
|
"loss": 0.679, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": 0.020294269546866417, |
|
"rewards/margins": 0.02987518534064293, |
|
"rewards/rejected": -0.009580916725099087, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.7327924627060979, |
|
"eval_logits/chosen": -2.853910446166992, |
|
"eval_logits/rejected": -2.8271100521087646, |
|
"eval_logps/chosen": -280.93585205078125, |
|
"eval_logps/rejected": -262.9162292480469, |
|
"eval_loss": 0.6774327754974365, |
|
"eval_rewards/accuracies": 0.6850000023841858, |
|
"eval_rewards/chosen": 0.018376635387539864, |
|
"eval_rewards/margins": 0.0331551730632782, |
|
"eval_rewards/rejected": -0.014778541401028633, |
|
"eval_runtime": 623.7205, |
|
"eval_samples_per_second": 3.207, |
|
"eval_steps_per_second": 0.401, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.735409578644334, |
|
"grad_norm": 1.453125, |
|
"learning_rate": 9.930917156425475e-08, |
|
"logits/chosen": -2.86027193069458, |
|
"logits/rejected": -2.840989828109741, |
|
"logps/chosen": -278.8993225097656, |
|
"logps/rejected": -277.65216064453125, |
|
"loss": 0.6774, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": 0.01882421225309372, |
|
"rewards/margins": 0.033416565507650375, |
|
"rewards/rejected": -0.01459235418587923, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.73802669458257, |
|
"grad_norm": 1.484375, |
|
"learning_rate": 9.749266994893754e-08, |
|
"logits/chosen": -2.8003342151641846, |
|
"logits/rejected": -2.7634270191192627, |
|
"logps/chosen": -253.88516235351562, |
|
"logps/rejected": -244.5758056640625, |
|
"loss": 0.6833, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.008167347870767117, |
|
"rewards/margins": 0.02078983187675476, |
|
"rewards/rejected": -0.012622484937310219, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.7406438105208061, |
|
"grad_norm": 1.265625, |
|
"learning_rate": 9.568890265179128e-08, |
|
"logits/chosen": -2.8219611644744873, |
|
"logits/rejected": -2.818441867828369, |
|
"logps/chosen": -277.968994140625, |
|
"logps/rejected": -250.047119140625, |
|
"loss": 0.6772, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.020925423130393028, |
|
"rewards/margins": 0.033755991607904434, |
|
"rewards/rejected": -0.012830562889575958, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.7432609264590422, |
|
"grad_norm": 1.265625, |
|
"learning_rate": 9.389802028686616e-08, |
|
"logits/chosen": -2.8413894176483154, |
|
"logits/rejected": -2.823989152908325, |
|
"logps/chosen": -277.80926513671875, |
|
"logps/rejected": -246.23593139648438, |
|
"loss": 0.6805, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": 0.011136185377836227, |
|
"rewards/margins": 0.02661561407148838, |
|
"rewards/rejected": -0.015479430556297302, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.7458780423972782, |
|
"grad_norm": 1.2734375, |
|
"learning_rate": 9.212017239232426e-08, |
|
"logits/chosen": -2.831408977508545, |
|
"logits/rejected": -2.8223624229431152, |
|
"logps/chosen": -287.7478942871094, |
|
"logps/rejected": -267.5721435546875, |
|
"loss": 0.673, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": 0.021497588604688644, |
|
"rewards/margins": 0.04216768592596054, |
|
"rewards/rejected": -0.020670095458626747, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.7484951583355143, |
|
"grad_norm": 1.609375, |
|
"learning_rate": 9.035550741795328e-08, |
|
"logits/chosen": -2.814898729324341, |
|
"logits/rejected": -2.8209142684936523, |
|
"logps/chosen": -271.9150085449219, |
|
"logps/rejected": -279.5975646972656, |
|
"loss": 0.6727, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.02707098424434662, |
|
"rewards/margins": 0.04284884035587311, |
|
"rewards/rejected": -0.01577785238623619, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.7511122742737504, |
|
"grad_norm": 1.2109375, |
|
"learning_rate": 8.860417271277065e-08, |
|
"logits/chosen": -2.8868813514709473, |
|
"logits/rejected": -2.88276743888855, |
|
"logps/chosen": -283.67864990234375, |
|
"logps/rejected": -275.3718566894531, |
|
"loss": 0.6819, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.018196921795606613, |
|
"rewards/margins": 0.02416098490357399, |
|
"rewards/rejected": -0.005964064504951239, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.7537293902119864, |
|
"grad_norm": 1.3359375, |
|
"learning_rate": 8.686631451272029e-08, |
|
"logits/chosen": -2.869019031524658, |
|
"logits/rejected": -2.84165096282959, |
|
"logps/chosen": -268.4243469238281, |
|
"logps/rejected": -247.5635986328125, |
|
"loss": 0.6799, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.0169739481061697, |
|
"rewards/margins": 0.028122667223215103, |
|
"rewards/rejected": -0.011148716323077679, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.7563465061502225, |
|
"grad_norm": 1.4453125, |
|
"learning_rate": 8.514207792846168e-08, |
|
"logits/chosen": -2.8492226600646973, |
|
"logits/rejected": -2.844165086746216, |
|
"logps/chosen": -265.9365539550781, |
|
"logps/rejected": -237.9735107421875, |
|
"loss": 0.6782, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": 0.012092510238289833, |
|
"rewards/margins": 0.031133780255913734, |
|
"rewards/rejected": -0.0190412737429142, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.7589636220884585, |
|
"grad_norm": 1.34375, |
|
"learning_rate": 8.343160693325355e-08, |
|
"logits/chosen": -2.815880060195923, |
|
"logits/rejected": -2.804381847381592, |
|
"logps/chosen": -269.6540832519531, |
|
"logps/rejected": -268.2055358886719, |
|
"loss": 0.6782, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.017512865364551544, |
|
"rewards/margins": 0.03190717473626137, |
|
"rewards/rejected": -0.014394307509064674, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.7589636220884585, |
|
"eval_logits/chosen": -2.852909803390503, |
|
"eval_logits/rejected": -2.826012134552002, |
|
"eval_logps/chosen": -280.9681396484375, |
|
"eval_logps/rejected": -262.93359375, |
|
"eval_loss": 0.6775044202804565, |
|
"eval_rewards/accuracies": 0.684499979019165, |
|
"eval_rewards/chosen": 0.01805364154279232, |
|
"eval_rewards/margins": 0.033006127923727036, |
|
"eval_rewards/rejected": -0.014952489174902439, |
|
"eval_runtime": 622.9143, |
|
"eval_samples_per_second": 3.211, |
|
"eval_steps_per_second": 0.401, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.7615807380266946, |
|
"grad_norm": 1.09375, |
|
"learning_rate": 8.173504435093173e-08, |
|
"logits/chosen": -2.832644462585449, |
|
"logits/rejected": -2.7984976768493652, |
|
"logps/chosen": -263.4750061035156, |
|
"logps/rejected": -228.9986114501953, |
|
"loss": 0.674, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": 0.018485140055418015, |
|
"rewards/margins": 0.040302351117134094, |
|
"rewards/rejected": -0.02181720733642578, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.7641978539649307, |
|
"grad_norm": 1.2890625, |
|
"learning_rate": 8.005253184398359e-08, |
|
"logits/chosen": -2.8306822776794434, |
|
"logits/rejected": -2.8159360885620117, |
|
"logps/chosen": -292.702880859375, |
|
"logps/rejected": -288.18695068359375, |
|
"loss": 0.6762, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": 0.02734486199915409, |
|
"rewards/margins": 0.03592860698699951, |
|
"rewards/rejected": -0.008583742193877697, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.7668149699031667, |
|
"grad_norm": 1.21875, |
|
"learning_rate": 7.838420990171926e-08, |
|
"logits/chosen": -2.8607592582702637, |
|
"logits/rejected": -2.825814962387085, |
|
"logps/chosen": -286.3602600097656, |
|
"logps/rejected": -260.51641845703125, |
|
"loss": 0.6777, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.01990603655576706, |
|
"rewards/margins": 0.03237393498420715, |
|
"rewards/rejected": -0.012467900291085243, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.7694320858414028, |
|
"grad_norm": 1.2578125, |
|
"learning_rate": 7.673021782854083e-08, |
|
"logits/chosen": -2.780974864959717, |
|
"logits/rejected": -2.763579845428467, |
|
"logps/chosen": -284.2770690917969, |
|
"logps/rejected": -233.38668823242188, |
|
"loss": 0.6742, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.02143119089305401, |
|
"rewards/margins": 0.04003281518816948, |
|
"rewards/rejected": -0.01860162802040577, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.7720492017796389, |
|
"grad_norm": 1.234375, |
|
"learning_rate": 7.509069373231039e-08, |
|
"logits/chosen": -2.8137221336364746, |
|
"logits/rejected": -2.788684368133545, |
|
"logps/chosen": -266.8929138183594, |
|
"logps/rejected": -250.69326782226562, |
|
"loss": 0.6767, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": 0.018960343673825264, |
|
"rewards/margins": 0.034537579864263535, |
|
"rewards/rejected": -0.015577234327793121, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.7746663177178749, |
|
"grad_norm": 1.3203125, |
|
"learning_rate": 7.346577451281821e-08, |
|
"logits/chosen": -2.823444366455078, |
|
"logits/rejected": -2.826974391937256, |
|
"logps/chosen": -279.7121276855469, |
|
"logps/rejected": -261.50518798828125, |
|
"loss": 0.6768, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": 0.018806222826242447, |
|
"rewards/margins": 0.03476772829890251, |
|
"rewards/rejected": -0.015961505472660065, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.777283433656111, |
|
"grad_norm": 1.59375, |
|
"learning_rate": 7.185559585035136e-08, |
|
"logits/chosen": -2.84146785736084, |
|
"logits/rejected": -2.8069043159484863, |
|
"logps/chosen": -296.74932861328125, |
|
"logps/rejected": -284.635009765625, |
|
"loss": 0.6736, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": 0.02409261278808117, |
|
"rewards/margins": 0.041189759969711304, |
|
"rewards/rejected": -0.017097145318984985, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 0.7799005495943471, |
|
"grad_norm": 1.5703125, |
|
"learning_rate": 7.026029219436502e-08, |
|
"logits/chosen": -2.816230535507202, |
|
"logits/rejected": -2.792483329772949, |
|
"logps/chosen": -270.294189453125, |
|
"logps/rejected": -262.2767333984375, |
|
"loss": 0.6756, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": 0.01601272262632847, |
|
"rewards/margins": 0.03693497180938721, |
|
"rewards/rejected": -0.02092224732041359, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.7825176655325831, |
|
"grad_norm": 1.1875, |
|
"learning_rate": 6.867999675225522e-08, |
|
"logits/chosen": -2.864154815673828, |
|
"logits/rejected": -2.833688259124756, |
|
"logps/chosen": -245.67288208007812, |
|
"logps/rejected": -232.77743530273438, |
|
"loss": 0.6775, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.017845796421170235, |
|
"rewards/margins": 0.03295399993658066, |
|
"rewards/rejected": -0.015108207240700722, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 0.7851347814708192, |
|
"grad_norm": 1.203125, |
|
"learning_rate": 6.711484147823662e-08, |
|
"logits/chosen": -2.8105015754699707, |
|
"logits/rejected": -2.8049824237823486, |
|
"logps/chosen": -248.8085479736328, |
|
"logps/rejected": -257.3614807128906, |
|
"loss": 0.6784, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.017226997762918472, |
|
"rewards/margins": 0.03089422546327114, |
|
"rewards/rejected": -0.01366722583770752, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.7851347814708192, |
|
"eval_logits/chosen": -2.8550119400024414, |
|
"eval_logits/rejected": -2.8283438682556152, |
|
"eval_logps/chosen": -280.97314453125, |
|
"eval_logps/rejected": -262.9586486816406, |
|
"eval_loss": 0.67740797996521, |
|
"eval_rewards/accuracies": 0.6890000104904175, |
|
"eval_rewards/chosen": 0.018003566190600395, |
|
"eval_rewards/margins": 0.03320648893713951, |
|
"eval_rewards/rejected": -0.015202920883893967, |
|
"eval_runtime": 623.2623, |
|
"eval_samples_per_second": 3.209, |
|
"eval_steps_per_second": 0.401, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.7877518974090553, |
|
"grad_norm": 1.4609375, |
|
"learning_rate": 6.556495706232412e-08, |
|
"logits/chosen": -2.820091724395752, |
|
"logits/rejected": -2.819214105606079, |
|
"logps/chosen": -285.6153869628906, |
|
"logps/rejected": -269.1045227050781, |
|
"loss": 0.6751, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": 0.017025303095579147, |
|
"rewards/margins": 0.03793289139866829, |
|
"rewards/rejected": -0.020907586440443993, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 0.7903690133472913, |
|
"grad_norm": 1.4375, |
|
"learning_rate": 6.403047291942057e-08, |
|
"logits/chosen": -2.7955071926116943, |
|
"logits/rejected": -2.757645606994629, |
|
"logps/chosen": -243.58535766601562, |
|
"logps/rejected": -218.78341674804688, |
|
"loss": 0.6802, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.010361788794398308, |
|
"rewards/margins": 0.02738323248922825, |
|
"rewards/rejected": -0.017021439969539642, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.7929861292855274, |
|
"grad_norm": 1.6328125, |
|
"learning_rate": 6.251151717851021e-08, |
|
"logits/chosen": -2.819065570831299, |
|
"logits/rejected": -2.8068315982818604, |
|
"logps/chosen": -249.41567993164062, |
|
"logps/rejected": -235.5353546142578, |
|
"loss": 0.681, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": 0.016130346804857254, |
|
"rewards/margins": 0.025897834450006485, |
|
"rewards/rejected": -0.009767485782504082, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 0.7956032452237635, |
|
"grad_norm": 1.1328125, |
|
"learning_rate": 6.100821667196041e-08, |
|
"logits/chosen": -2.893716812133789, |
|
"logits/rejected": -2.835082530975342, |
|
"logps/chosen": -288.95574951171875, |
|
"logps/rejected": -224.43881225585938, |
|
"loss": 0.6735, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.01886625960469246, |
|
"rewards/margins": 0.04120669886469841, |
|
"rewards/rejected": -0.0223404411226511, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.7982203611619995, |
|
"grad_norm": 1.4140625, |
|
"learning_rate": 5.952069692493061e-08, |
|
"logits/chosen": -2.790799617767334, |
|
"logits/rejected": -2.7859864234924316, |
|
"logps/chosen": -243.0890350341797, |
|
"logps/rejected": -251.2998504638672, |
|
"loss": 0.6737, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": 0.025469347834587097, |
|
"rewards/margins": 0.041153885424137115, |
|
"rewards/rejected": -0.01568453758955002, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.8008374771002356, |
|
"grad_norm": 1.578125, |
|
"learning_rate": 5.8049082144891794e-08, |
|
"logits/chosen": -2.7803027629852295, |
|
"logits/rejected": -2.7686164379119873, |
|
"logps/chosen": -278.40985107421875, |
|
"logps/rejected": -323.7679443359375, |
|
"loss": 0.6815, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": 0.01774556189775467, |
|
"rewards/margins": 0.024822643026709557, |
|
"rewards/rejected": -0.007077082060277462, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.8034545930384716, |
|
"grad_norm": 1.1953125, |
|
"learning_rate": 5.659349521125459e-08, |
|
"logits/chosen": -2.9022018909454346, |
|
"logits/rejected": -2.901132822036743, |
|
"logps/chosen": -296.6135559082031, |
|
"logps/rejected": -276.8060607910156, |
|
"loss": 0.6787, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": 0.020619522780179977, |
|
"rewards/margins": 0.030655449256300926, |
|
"rewards/rejected": -0.010035926476120949, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 0.8060717089767077, |
|
"grad_norm": 1.2578125, |
|
"learning_rate": 5.5154057665109e-08, |
|
"logits/chosen": -2.8536829948425293, |
|
"logits/rejected": -2.8322811126708984, |
|
"logps/chosen": -274.37506103515625, |
|
"logps/rejected": -250.5011749267578, |
|
"loss": 0.6774, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.017293009907007217, |
|
"rewards/margins": 0.03290316089987755, |
|
"rewards/rejected": -0.015610149130225182, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.8086888249149438, |
|
"grad_norm": 1.34375, |
|
"learning_rate": 5.3730889699075853e-08, |
|
"logits/chosen": -2.864671230316162, |
|
"logits/rejected": -2.833627700805664, |
|
"logps/chosen": -294.9132995605469, |
|
"logps/rejected": -240.95458984375, |
|
"loss": 0.6754, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.020536890253424644, |
|
"rewards/margins": 0.037192363291978836, |
|
"rewards/rejected": -0.016655471175909042, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 0.8113059408531798, |
|
"grad_norm": 1.34375, |
|
"learning_rate": 5.2324110147270893e-08, |
|
"logits/chosen": -2.837832450866699, |
|
"logits/rejected": -2.827129602432251, |
|
"logps/chosen": -296.73492431640625, |
|
"logps/rejected": -290.35296630859375, |
|
"loss": 0.6713, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": 0.03090183436870575, |
|
"rewards/margins": 0.04617828503251076, |
|
"rewards/rejected": -0.015276448801159859, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.8113059408531798, |
|
"eval_logits/chosen": -2.8546648025512695, |
|
"eval_logits/rejected": -2.827969789505005, |
|
"eval_logps/chosen": -280.9596252441406, |
|
"eval_logps/rejected": -262.92376708984375, |
|
"eval_loss": 0.6775153279304504, |
|
"eval_rewards/accuracies": 0.6825000047683716, |
|
"eval_rewards/chosen": 0.01813914068043232, |
|
"eval_rewards/margins": 0.03299335017800331, |
|
"eval_rewards/rejected": -0.014854210428893566, |
|
"eval_runtime": 623.3449, |
|
"eval_samples_per_second": 3.208, |
|
"eval_steps_per_second": 0.401, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.8139230567914159, |
|
"grad_norm": 1.2578125, |
|
"learning_rate": 5.0933836475381795e-08, |
|
"logits/chosen": -2.8541502952575684, |
|
"logits/rejected": -2.819565534591675, |
|
"logps/chosen": -299.7816467285156, |
|
"logps/rejected": -286.51507568359375, |
|
"loss": 0.6744, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.02967965044081211, |
|
"rewards/margins": 0.039316095411777496, |
|
"rewards/rejected": -0.00963644403964281, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 0.816540172729652, |
|
"grad_norm": 1.8203125, |
|
"learning_rate": 4.956018477086005e-08, |
|
"logits/chosen": -2.8387677669525146, |
|
"logits/rejected": -2.8084654808044434, |
|
"logps/chosen": -289.3456115722656, |
|
"logps/rejected": -263.594970703125, |
|
"loss": 0.6773, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": 0.019268421456217766, |
|
"rewards/margins": 0.03356018662452698, |
|
"rewards/rejected": -0.014291766099631786, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.819157288667888, |
|
"grad_norm": 1.203125, |
|
"learning_rate": 4.820326973322763e-08, |
|
"logits/chosen": -2.839634656906128, |
|
"logits/rejected": -2.8185315132141113, |
|
"logps/chosen": -266.79034423828125, |
|
"logps/rejected": -267.4413146972656, |
|
"loss": 0.6779, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": 0.010893099941313267, |
|
"rewards/margins": 0.03200749307870865, |
|
"rewards/rejected": -0.021114394068717957, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 0.821774404606124, |
|
"grad_norm": 1.2578125, |
|
"learning_rate": 4.686320466449981e-08, |
|
"logits/chosen": -2.8295464515686035, |
|
"logits/rejected": -2.778109312057495, |
|
"logps/chosen": -256.5823059082031, |
|
"logps/rejected": -256.2238464355469, |
|
"loss": 0.6793, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.012616870924830437, |
|
"rewards/margins": 0.029145419597625732, |
|
"rewards/rejected": -0.016528548672795296, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.8243915205443602, |
|
"grad_norm": 1.3359375, |
|
"learning_rate": 4.554010145972417e-08, |
|
"logits/chosen": -2.8855738639831543, |
|
"logits/rejected": -2.836945056915283, |
|
"logps/chosen": -278.258544921875, |
|
"logps/rejected": -268.62969970703125, |
|
"loss": 0.6786, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.016889285296201706, |
|
"rewards/margins": 0.030887436121702194, |
|
"rewards/rejected": -0.013998152688145638, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.8270086364825961, |
|
"grad_norm": 1.25, |
|
"learning_rate": 4.423407059763745e-08, |
|
"logits/chosen": -2.8406424522399902, |
|
"logits/rejected": -2.825413227081299, |
|
"logps/chosen": -289.0135192871094, |
|
"logps/rejected": -282.2171630859375, |
|
"loss": 0.6774, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": 0.020682137459516525, |
|
"rewards/margins": 0.0332643985748291, |
|
"rewards/rejected": -0.012582260183990002, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.8296257524208323, |
|
"grad_norm": 1.15625, |
|
"learning_rate": 4.294522113144078e-08, |
|
"logits/chosen": -2.791628360748291, |
|
"logits/rejected": -2.753540515899658, |
|
"logps/chosen": -284.7926025390625, |
|
"logps/rejected": -252.89364624023438, |
|
"loss": 0.6749, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": 0.02161264792084694, |
|
"rewards/margins": 0.03834725171327591, |
|
"rewards/rejected": -0.01673460379242897, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 0.8322428683590684, |
|
"grad_norm": 1.140625, |
|
"learning_rate": 4.1673660679693804e-08, |
|
"logits/chosen": -2.8362486362457275, |
|
"logits/rejected": -2.8258681297302246, |
|
"logps/chosen": -235.7146759033203, |
|
"logps/rejected": -263.33929443359375, |
|
"loss": 0.6785, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.01654769480228424, |
|
"rewards/margins": 0.03069342114031315, |
|
"rewards/rejected": -0.014145726338028908, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.8348599842973043, |
|
"grad_norm": 1.4609375, |
|
"learning_rate": 4.041949541732825e-08, |
|
"logits/chosen": -2.846217632293701, |
|
"logits/rejected": -2.845409870147705, |
|
"logps/chosen": -278.4856872558594, |
|
"logps/rejected": -266.3340759277344, |
|
"loss": 0.678, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": 0.018012622371315956, |
|
"rewards/margins": 0.03179007023572922, |
|
"rewards/rejected": -0.01377745158970356, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 0.8374771002355405, |
|
"grad_norm": 2.15625, |
|
"learning_rate": 3.9182830066782605e-08, |
|
"logits/chosen": -2.809504747390747, |
|
"logits/rejected": -2.8133652210235596, |
|
"logps/chosen": -273.1795349121094, |
|
"logps/rejected": -288.18035888671875, |
|
"loss": 0.6774, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.016945619136095047, |
|
"rewards/margins": 0.03313397616147995, |
|
"rewards/rejected": -0.016188358888030052, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.8374771002355405, |
|
"eval_logits/chosen": -2.854266405105591, |
|
"eval_logits/rejected": -2.8275365829467773, |
|
"eval_logps/chosen": -280.958251953125, |
|
"eval_logps/rejected": -262.9411315917969, |
|
"eval_loss": 0.6774209141731262, |
|
"eval_rewards/accuracies": 0.6830000281333923, |
|
"eval_rewards/chosen": 0.018152602016925812, |
|
"eval_rewards/margins": 0.03318041190505028, |
|
"eval_rewards/rejected": -0.015027807094156742, |
|
"eval_runtime": 624.1731, |
|
"eval_samples_per_second": 3.204, |
|
"eval_steps_per_second": 0.401, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.8400942161737766, |
|
"grad_norm": 1.484375, |
|
"learning_rate": 3.79637678892577e-08, |
|
"logits/chosen": -2.8115928173065186, |
|
"logits/rejected": -2.8144474029541016, |
|
"logps/chosen": -290.44683837890625, |
|
"logps/rejected": -275.8350830078125, |
|
"loss": 0.6819, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.01646428182721138, |
|
"rewards/margins": 0.0238783098757267, |
|
"rewards/rejected": -0.007414024323225021, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 0.8427113321120125, |
|
"grad_norm": 1.203125, |
|
"learning_rate": 3.6762410676094645e-08, |
|
"logits/chosen": -2.822303533554077, |
|
"logits/rejected": -2.8179895877838135, |
|
"logps/chosen": -316.88623046875, |
|
"logps/rejected": -273.47967529296875, |
|
"loss": 0.6733, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.02506067231297493, |
|
"rewards/margins": 0.04178461804986, |
|
"rewards/rejected": -0.01672394946217537, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.8453284480502486, |
|
"grad_norm": 1.21875, |
|
"learning_rate": 3.557885874027497e-08, |
|
"logits/chosen": -2.8261001110076904, |
|
"logits/rejected": -2.814418077468872, |
|
"logps/chosen": -276.53558349609375, |
|
"logps/rejected": -266.6348571777344, |
|
"loss": 0.6794, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.018592124804854393, |
|
"rewards/margins": 0.029115628451108932, |
|
"rewards/rejected": -0.010523504577577114, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 0.8479455639884846, |
|
"grad_norm": 1.359375, |
|
"learning_rate": 3.441321090804469e-08, |
|
"logits/chosen": -2.8752944469451904, |
|
"logits/rejected": -2.841357469558716, |
|
"logps/chosen": -281.1075744628906, |
|
"logps/rejected": -243.8683319091797, |
|
"loss": 0.6785, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.01994512416422367, |
|
"rewards/margins": 0.030809426680207253, |
|
"rewards/rejected": -0.010864300653338432, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.8505626799267207, |
|
"grad_norm": 1.25, |
|
"learning_rate": 3.326556451066234e-08, |
|
"logits/chosen": -2.877654552459717, |
|
"logits/rejected": -2.847358465194702, |
|
"logps/chosen": -308.5765380859375, |
|
"logps/rejected": -283.89654541015625, |
|
"loss": 0.6741, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.02807055041193962, |
|
"rewards/margins": 0.04025264084339142, |
|
"rewards/rejected": -0.012182091362774372, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.8531797958649568, |
|
"grad_norm": 1.2578125, |
|
"learning_rate": 3.2136015376271946e-08, |
|
"logits/chosen": -2.8326189517974854, |
|
"logits/rejected": -2.801771640777588, |
|
"logps/chosen": -274.5975646972656, |
|
"logps/rejected": -257.2004699707031, |
|
"loss": 0.6812, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": 0.010432440787553787, |
|
"rewards/margins": 0.02520870603621006, |
|
"rewards/rejected": -0.014776261523365974, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.8557969118031928, |
|
"grad_norm": 1.6640625, |
|
"learning_rate": 3.102465782190106e-08, |
|
"logits/chosen": -2.84391713142395, |
|
"logits/rejected": -2.8385825157165527, |
|
"logps/chosen": -264.6709899902344, |
|
"logps/rejected": -251.43215942382812, |
|
"loss": 0.6785, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.017527025192975998, |
|
"rewards/margins": 0.0312645398080349, |
|
"rewards/rejected": -0.01373751275241375, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 0.8584140277414289, |
|
"grad_norm": 1.28125, |
|
"learning_rate": 2.993158464558565e-08, |
|
"logits/chosen": -2.8273541927337646, |
|
"logits/rejected": -2.8198060989379883, |
|
"logps/chosen": -289.29791259765625, |
|
"logps/rejected": -293.8225402832031, |
|
"loss": 0.6809, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": 0.02297678217291832, |
|
"rewards/margins": 0.026020046323537827, |
|
"rewards/rejected": -0.0030432622879743576, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.861031143679665, |
|
"grad_norm": 1.359375, |
|
"learning_rate": 2.8856887118621358e-08, |
|
"logits/chosen": -2.870941638946533, |
|
"logits/rejected": -2.8777921199798584, |
|
"logps/chosen": -274.54119873046875, |
|
"logps/rejected": -275.3123474121094, |
|
"loss": 0.6788, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.013969512656331062, |
|
"rewards/margins": 0.030474882572889328, |
|
"rewards/rejected": -0.016505368053913116, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 0.863648259617901, |
|
"grad_norm": 1.296875, |
|
"learning_rate": 2.7800654977942482e-08, |
|
"logits/chosen": -2.828477382659912, |
|
"logits/rejected": -2.7952821254730225, |
|
"logps/chosen": -273.37109375, |
|
"logps/rejected": -293.2996520996094, |
|
"loss": 0.6781, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.017902836203575134, |
|
"rewards/margins": 0.03182779997587204, |
|
"rewards/rejected": -0.013924960978329182, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.863648259617901, |
|
"eval_logits/chosen": -2.8559255599975586, |
|
"eval_logits/rejected": -2.8293371200561523, |
|
"eval_logps/chosen": -280.9559326171875, |
|
"eval_logps/rejected": -262.91455078125, |
|
"eval_loss": 0.6775384545326233, |
|
"eval_rewards/accuracies": 0.6809999942779541, |
|
"eval_rewards/chosen": 0.018175845965743065, |
|
"eval_rewards/margins": 0.03293789178133011, |
|
"eval_rewards/rejected": -0.014762048609554768, |
|
"eval_runtime": 623.0311, |
|
"eval_samples_per_second": 3.21, |
|
"eval_steps_per_second": 0.401, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.8662653755561371, |
|
"grad_norm": 1.0625, |
|
"learning_rate": 2.676297641862879e-08, |
|
"logits/chosen": -2.8453030586242676, |
|
"logits/rejected": -2.831481456756592, |
|
"logps/chosen": -240.8975830078125, |
|
"logps/rejected": -200.9988250732422, |
|
"loss": 0.6755, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.016030047088861465, |
|
"rewards/margins": 0.03695772588253021, |
|
"rewards/rejected": -0.020927678793668747, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 0.8688824914943732, |
|
"grad_norm": 1.3125, |
|
"learning_rate": 2.5743938086541352e-08, |
|
"logits/chosen": -2.8288021087646484, |
|
"logits/rejected": -2.802476167678833, |
|
"logps/chosen": -278.8105773925781, |
|
"logps/rejected": -255.1897430419922, |
|
"loss": 0.6772, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.013786676339805126, |
|
"rewards/margins": 0.03420311585068703, |
|
"rewards/rejected": -0.020416438579559326, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.8714996074326092, |
|
"grad_norm": 1.40625, |
|
"learning_rate": 2.474362507108757e-08, |
|
"logits/chosen": -2.8980019092559814, |
|
"logits/rejected": -2.8595707416534424, |
|
"logps/chosen": -289.9441833496094, |
|
"logps/rejected": -271.2764587402344, |
|
"loss": 0.6707, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.026317168027162552, |
|
"rewards/margins": 0.047435760498046875, |
|
"rewards/rejected": -0.02111859992146492, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 0.8741167233708453, |
|
"grad_norm": 1.40625, |
|
"learning_rate": 2.3762120898116495e-08, |
|
"logits/chosen": -2.849844455718994, |
|
"logits/rejected": -2.834979772567749, |
|
"logps/chosen": -287.1720886230469, |
|
"logps/rejected": -279.83941650390625, |
|
"loss": 0.6816, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": 0.00895099900662899, |
|
"rewards/margins": 0.0245995931327343, |
|
"rewards/rejected": -0.01564859412610531, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 0.8767338393090814, |
|
"grad_norm": 1.546875, |
|
"learning_rate": 2.2799507522944044e-08, |
|
"logits/chosen": -2.7699649333953857, |
|
"logits/rejected": -2.7505178451538086, |
|
"logps/chosen": -284.6213073730469, |
|
"logps/rejected": -281.0083923339844, |
|
"loss": 0.6758, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.02020053006708622, |
|
"rewards/margins": 0.036288876086473465, |
|
"rewards/rejected": -0.016088349744677544, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.8793509552473174, |
|
"grad_norm": 1.4296875, |
|
"learning_rate": 2.1855865323510054e-08, |
|
"logits/chosen": -2.8084092140197754, |
|
"logits/rejected": -2.7662644386291504, |
|
"logps/chosen": -292.76629638671875, |
|
"logps/rejected": -290.1708679199219, |
|
"loss": 0.6738, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.022515032440423965, |
|
"rewards/margins": 0.04099477082490921, |
|
"rewards/rejected": -0.018479738384485245, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.8819680711855535, |
|
"grad_norm": 1.3125, |
|
"learning_rate": 2.0931273093666573e-08, |
|
"logits/chosen": -2.817469358444214, |
|
"logits/rejected": -2.792358875274658, |
|
"logps/chosen": -256.8840637207031, |
|
"logps/rejected": -239.3094024658203, |
|
"loss": 0.6749, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": 0.013278042897582054, |
|
"rewards/margins": 0.03780357167124748, |
|
"rewards/rejected": -0.02452552691102028, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 0.8845851871237895, |
|
"grad_norm": 1.3671875, |
|
"learning_rate": 2.002580803659873e-08, |
|
"logits/chosen": -2.821927070617676, |
|
"logits/rejected": -2.779125213623047, |
|
"logps/chosen": -268.94305419921875, |
|
"logps/rejected": -259.77130126953125, |
|
"loss": 0.6793, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.011967052705585957, |
|
"rewards/margins": 0.02943551540374756, |
|
"rewards/rejected": -0.017468463629484177, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 0.8872023030620256, |
|
"grad_norm": 1.171875, |
|
"learning_rate": 1.9139545758378256e-08, |
|
"logits/chosen": -2.8498919010162354, |
|
"logits/rejected": -2.797402858734131, |
|
"logps/chosen": -284.4458923339844, |
|
"logps/rejected": -238.79989624023438, |
|
"loss": 0.6721, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": 0.028188347816467285, |
|
"rewards/margins": 0.04387947544455528, |
|
"rewards/rejected": -0.015691127628087997, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 0.8898194190002617, |
|
"grad_norm": 1.3671875, |
|
"learning_rate": 1.8272560261650277e-08, |
|
"logits/chosen": -2.8581271171569824, |
|
"logits/rejected": -2.828350305557251, |
|
"logps/chosen": -329.14361572265625, |
|
"logps/rejected": -273.26776123046875, |
|
"loss": 0.6733, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.02928735874593258, |
|
"rewards/margins": 0.04170341044664383, |
|
"rewards/rejected": -0.012416050769388676, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.8898194190002617, |
|
"eval_logits/chosen": -2.850832462310791, |
|
"eval_logits/rejected": -2.823747396469116, |
|
"eval_logps/chosen": -280.9770202636719, |
|
"eval_logps/rejected": -262.9403381347656, |
|
"eval_loss": 0.6775196194648743, |
|
"eval_rewards/accuracies": 0.6825000047683716, |
|
"eval_rewards/chosen": 0.017964746803045273, |
|
"eval_rewards/margins": 0.03298423811793327, |
|
"eval_rewards/rejected": -0.01501949317753315, |
|
"eval_runtime": 623.1437, |
|
"eval_samples_per_second": 3.21, |
|
"eval_steps_per_second": 0.401, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.8924365349384977, |
|
"grad_norm": 1.40625, |
|
"learning_rate": 1.742492393945427e-08, |
|
"logits/chosen": -2.830068826675415, |
|
"logits/rejected": -2.787055015563965, |
|
"logps/chosen": -295.30035400390625, |
|
"logps/rejected": -255.3282470703125, |
|
"loss": 0.6776, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": 0.01680312678217888, |
|
"rewards/margins": 0.032698854804039, |
|
"rewards/rejected": -0.015895728021860123, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 0.8950536508767338, |
|
"grad_norm": 1.46875, |
|
"learning_rate": 1.6596707569179302e-08, |
|
"logits/chosen": -2.8651199340820312, |
|
"logits/rejected": -2.8412108421325684, |
|
"logps/chosen": -294.6651916503906, |
|
"logps/rejected": -264.3734130859375, |
|
"loss": 0.6762, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.022434063255786896, |
|
"rewards/margins": 0.035665739327669144, |
|
"rewards/rejected": -0.013231677003204823, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 0.8976707668149699, |
|
"grad_norm": 1.265625, |
|
"learning_rate": 1.5787980306653848e-08, |
|
"logits/chosen": -2.8364651203155518, |
|
"logits/rejected": -2.7913882732391357, |
|
"logps/chosen": -288.8866271972656, |
|
"logps/rejected": -276.93865966796875, |
|
"loss": 0.6742, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.024098176509141922, |
|
"rewards/margins": 0.03978481888771057, |
|
"rewards/rejected": -0.01568664237856865, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 0.9002878827532059, |
|
"grad_norm": 1.203125, |
|
"learning_rate": 1.499880968037165e-08, |
|
"logits/chosen": -2.8334012031555176, |
|
"logits/rejected": -2.8106682300567627, |
|
"logps/chosen": -267.6552734375, |
|
"logps/rejected": -231.9657745361328, |
|
"loss": 0.6767, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.02195141650736332, |
|
"rewards/margins": 0.03444907069206238, |
|
"rewards/rejected": -0.012497651390731335, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 0.902904998691442, |
|
"grad_norm": 1.2109375, |
|
"learning_rate": 1.4229261585852803e-08, |
|
"logits/chosen": -2.8546454906463623, |
|
"logits/rejected": -2.8432843685150146, |
|
"logps/chosen": -280.5713806152344, |
|
"logps/rejected": -257.9490966796875, |
|
"loss": 0.6755, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": 0.022586923092603683, |
|
"rewards/margins": 0.037085022777318954, |
|
"rewards/rejected": -0.01449810154736042, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.9055221146296781, |
|
"grad_norm": 1.328125, |
|
"learning_rate": 1.3479400280141883e-08, |
|
"logits/chosen": -2.823387384414673, |
|
"logits/rejected": -2.8125298023223877, |
|
"logps/chosen": -262.6939697265625, |
|
"logps/rejected": -266.7622985839844, |
|
"loss": 0.6774, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.018851932138204575, |
|
"rewards/margins": 0.03330928832292557, |
|
"rewards/rejected": -0.014457357116043568, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 0.9081392305679141, |
|
"grad_norm": 1.203125, |
|
"learning_rate": 1.2749288376442042e-08, |
|
"logits/chosen": -2.839914083480835, |
|
"logits/rejected": -2.8061881065368652, |
|
"logps/chosen": -314.83599853515625, |
|
"logps/rejected": -256.56256103515625, |
|
"loss": 0.6731, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": 0.027168557047843933, |
|
"rewards/margins": 0.041992831975221634, |
|
"rewards/rejected": -0.01482427679002285, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 0.9107563465061502, |
|
"grad_norm": 1.3828125, |
|
"learning_rate": 1.2038986838887127e-08, |
|
"logits/chosen": -2.8704135417938232, |
|
"logits/rejected": -2.8492181301116943, |
|
"logps/chosen": -257.75701904296875, |
|
"logps/rejected": -257.2181091308594, |
|
"loss": 0.6845, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.006689242087304592, |
|
"rewards/margins": 0.019121108576655388, |
|
"rewards/rejected": -0.01243186742067337, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 0.9133734624443863, |
|
"grad_norm": 1.25, |
|
"learning_rate": 1.1348554977451131e-08, |
|
"logits/chosen": -2.879945993423462, |
|
"logits/rejected": -2.854792356491089, |
|
"logps/chosen": -299.6977233886719, |
|
"logps/rejected": -266.33148193359375, |
|
"loss": 0.6776, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": 0.01960437372326851, |
|
"rewards/margins": 0.03308872506022453, |
|
"rewards/rejected": -0.013484349474310875, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 0.9159905783826223, |
|
"grad_norm": 1.2890625, |
|
"learning_rate": 1.06780504429958e-08, |
|
"logits/chosen": -2.860222578048706, |
|
"logits/rejected": -2.8316166400909424, |
|
"logps/chosen": -295.9383850097656, |
|
"logps/rejected": -253.4501495361328, |
|
"loss": 0.6739, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.025537196546792984, |
|
"rewards/margins": 0.040846120566129684, |
|
"rewards/rejected": -0.015308921225368977, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.9159905783826223, |
|
"eval_logits/chosen": -2.8575375080108643, |
|
"eval_logits/rejected": -2.8311452865600586, |
|
"eval_logps/chosen": -280.9686279296875, |
|
"eval_logps/rejected": -262.9413146972656, |
|
"eval_loss": 0.677466869354248, |
|
"eval_rewards/accuracies": 0.6850000023841858, |
|
"eval_rewards/chosen": 0.01804887317121029, |
|
"eval_rewards/margins": 0.033078454434871674, |
|
"eval_rewards/rejected": -0.015029575675725937, |
|
"eval_runtime": 623.0737, |
|
"eval_samples_per_second": 3.21, |
|
"eval_steps_per_second": 0.401, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.9186076943208584, |
|
"grad_norm": 2.078125, |
|
"learning_rate": 1.0027529222456754e-08, |
|
"logits/chosen": -2.80438232421875, |
|
"logits/rejected": -2.773073673248291, |
|
"logps/chosen": -268.601318359375, |
|
"logps/rejected": -252.92831420898438, |
|
"loss": 0.6728, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": 0.02063935436308384, |
|
"rewards/margins": 0.0424073152244091, |
|
"rewards/rejected": -0.021767962723970413, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 0.9212248102590945, |
|
"grad_norm": 1.2734375, |
|
"learning_rate": 9.397045634168766e-09, |
|
"logits/chosen": -2.8710060119628906, |
|
"logits/rejected": -2.8575310707092285, |
|
"logps/chosen": -283.4559020996094, |
|
"logps/rejected": -289.69219970703125, |
|
"loss": 0.6729, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": 0.031010348349809647, |
|
"rewards/margins": 0.04308422654867172, |
|
"rewards/rejected": -0.012073880061507225, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.9238419261973305, |
|
"grad_norm": 1.3125, |
|
"learning_rate": 8.78665232332998e-09, |
|
"logits/chosen": -2.8076834678649902, |
|
"logits/rejected": -2.7891430854797363, |
|
"logps/chosen": -245.83383178710938, |
|
"logps/rejected": -245.38418579101562, |
|
"loss": 0.6793, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": 0.013311171904206276, |
|
"rewards/margins": 0.028882578015327454, |
|
"rewards/rejected": -0.015571406111121178, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 0.9264590421355666, |
|
"grad_norm": 1.21875, |
|
"learning_rate": 8.196400257606206e-09, |
|
"logits/chosen": -2.8521595001220703, |
|
"logits/rejected": -2.810176134109497, |
|
"logps/chosen": -298.8065490722656, |
|
"logps/rejected": -297.49627685546875, |
|
"loss": 0.6753, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.018893834203481674, |
|
"rewards/margins": 0.037972934544086456, |
|
"rewards/rejected": -0.019079100340604782, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 0.9290761580738026, |
|
"grad_norm": 1.421875, |
|
"learning_rate": 7.626338722875075e-09, |
|
"logits/chosen": -2.8442888259887695, |
|
"logits/rejected": -2.8555784225463867, |
|
"logps/chosen": -271.51593017578125, |
|
"logps/rejected": -271.29620361328125, |
|
"loss": 0.679, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": 0.014854473061859608, |
|
"rewards/margins": 0.02983902022242546, |
|
"rewards/rejected": -0.014984548091888428, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.9316932740120387, |
|
"grad_norm": 1.3203125, |
|
"learning_rate": 7.0765153191106875e-09, |
|
"logits/chosen": -2.8553731441497803, |
|
"logits/rejected": -2.8395111560821533, |
|
"logps/chosen": -269.2403564453125, |
|
"logps/rejected": -229.14599609375, |
|
"loss": 0.6757, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": 0.017869068309664726, |
|
"rewards/margins": 0.03671371936798096, |
|
"rewards/rejected": -0.01884464919567108, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 0.9343103899502748, |
|
"grad_norm": 1.0, |
|
"learning_rate": 6.54697595640899e-09, |
|
"logits/chosen": -2.8470935821533203, |
|
"logits/rejected": -2.827101945877075, |
|
"logps/chosen": -307.4560241699219, |
|
"logps/rejected": -287.3361511230469, |
|
"loss": 0.6755, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": 0.024625394493341446, |
|
"rewards/margins": 0.037567656487226486, |
|
"rewards/rejected": -0.01294226385653019, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 0.9369275058885108, |
|
"grad_norm": 1.2578125, |
|
"learning_rate": 6.037764851154425e-09, |
|
"logits/chosen": -2.817866802215576, |
|
"logits/rejected": -2.8075547218322754, |
|
"logps/chosen": -280.808837890625, |
|
"logps/rejected": -287.5721740722656, |
|
"loss": 0.6757, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": 0.019267046824097633, |
|
"rewards/margins": 0.03673623502254486, |
|
"rewards/rejected": -0.017469191923737526, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 0.9395446218267469, |
|
"grad_norm": 1.171875, |
|
"learning_rate": 5.548924522327747e-09, |
|
"logits/chosen": -2.839773178100586, |
|
"logits/rejected": -2.8247604370117188, |
|
"logps/chosen": -277.45343017578125, |
|
"logps/rejected": -264.47119140625, |
|
"loss": 0.6772, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.015755945816636086, |
|
"rewards/margins": 0.03340662270784378, |
|
"rewards/rejected": -0.017650676891207695, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 0.942161737764983, |
|
"grad_norm": 1.34375, |
|
"learning_rate": 5.080495787955691e-09, |
|
"logits/chosen": -2.8103365898132324, |
|
"logits/rejected": -2.794466257095337, |
|
"logps/chosen": -242.8784637451172, |
|
"logps/rejected": -246.01123046875, |
|
"loss": 0.6807, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.014323743060231209, |
|
"rewards/margins": 0.02595413103699684, |
|
"rewards/rejected": -0.011630385182797909, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.942161737764983, |
|
"eval_logits/chosen": -2.8526551723480225, |
|
"eval_logits/rejected": -2.825742483139038, |
|
"eval_logps/chosen": -280.9523620605469, |
|
"eval_logps/rejected": -262.9205017089844, |
|
"eval_loss": 0.6774939298629761, |
|
"eval_rewards/accuracies": 0.6855000257492065, |
|
"eval_rewards/chosen": 0.01821131445467472, |
|
"eval_rewards/margins": 0.03303277865052223, |
|
"eval_rewards/rejected": -0.014821460470557213, |
|
"eval_runtime": 622.8509, |
|
"eval_samples_per_second": 3.211, |
|
"eval_steps_per_second": 0.401, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.944778853703219, |
|
"grad_norm": 2.9375, |
|
"learning_rate": 4.632517761702814e-09, |
|
"logits/chosen": -2.7846920490264893, |
|
"logits/rejected": -2.756865978240967, |
|
"logps/chosen": -257.6873779296875, |
|
"logps/rejected": -247.35317993164062, |
|
"loss": 0.6763, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": 0.009808182716369629, |
|
"rewards/margins": 0.03539792820811272, |
|
"rewards/rejected": -0.025589745491743088, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 0.9473959696414551, |
|
"grad_norm": 1.3515625, |
|
"learning_rate": 4.205027849605358e-09, |
|
"logits/chosen": -2.8126158714294434, |
|
"logits/rejected": -2.7998046875, |
|
"logps/chosen": -264.48272705078125, |
|
"logps/rejected": -232.7639617919922, |
|
"loss": 0.6783, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": 0.010889967903494835, |
|
"rewards/margins": 0.031071290373802185, |
|
"rewards/rejected": -0.020181316882371902, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 0.9500130855796912, |
|
"grad_norm": 1.34375, |
|
"learning_rate": 3.798061746947995e-09, |
|
"logits/chosen": -2.8637187480926514, |
|
"logits/rejected": -2.8383963108062744, |
|
"logps/chosen": -279.2807922363281, |
|
"logps/rejected": -244.65072631835938, |
|
"loss": 0.6745, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.01557912863790989, |
|
"rewards/margins": 0.03933199122548103, |
|
"rewards/rejected": -0.023752864450216293, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 0.9526302015179272, |
|
"grad_norm": 1.296875, |
|
"learning_rate": 3.411653435283157e-09, |
|
"logits/chosen": -2.8357815742492676, |
|
"logits/rejected": -2.8046395778656006, |
|
"logps/chosen": -287.30950927734375, |
|
"logps/rejected": -232.4551239013672, |
|
"loss": 0.6757, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": 0.02017480693757534, |
|
"rewards/margins": 0.03672494366765022, |
|
"rewards/rejected": -0.016550134867429733, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 0.9552473174561633, |
|
"grad_norm": 1.390625, |
|
"learning_rate": 3.0458351795936698e-09, |
|
"logits/chosen": -2.8704733848571777, |
|
"logits/rejected": -2.8465213775634766, |
|
"logps/chosen": -264.4095764160156, |
|
"logps/rejected": -236.84811401367188, |
|
"loss": 0.6731, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": 0.023878643289208412, |
|
"rewards/margins": 0.04207443445920944, |
|
"rewards/rejected": -0.01819578930735588, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.9578644333943994, |
|
"grad_norm": 1.5078125, |
|
"learning_rate": 2.700637525598598e-09, |
|
"logits/chosen": -2.8182005882263184, |
|
"logits/rejected": -2.823500156402588, |
|
"logps/chosen": -287.9983825683594, |
|
"logps/rejected": -288.73779296875, |
|
"loss": 0.681, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.018164271488785744, |
|
"rewards/margins": 0.02582050859928131, |
|
"rewards/rejected": -0.007656236179172993, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 0.9604815493326354, |
|
"grad_norm": 1.3515625, |
|
"learning_rate": 2.3760892972027324e-09, |
|
"logits/chosen": -2.886582612991333, |
|
"logits/rejected": -2.8624680042266846, |
|
"logps/chosen": -286.31439208984375, |
|
"logps/rejected": -253.89181518554688, |
|
"loss": 0.6793, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": 0.013353118672966957, |
|
"rewards/margins": 0.029065540060400963, |
|
"rewards/rejected": -0.015712425112724304, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 0.9630986652708715, |
|
"grad_norm": 1.4765625, |
|
"learning_rate": 2.0722175940897645e-09, |
|
"logits/chosen": -2.8091981410980225, |
|
"logits/rejected": -2.827847719192505, |
|
"logps/chosen": -275.06439208984375, |
|
"logps/rejected": -267.3816833496094, |
|
"loss": 0.6769, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": 0.014090280048549175, |
|
"rewards/margins": 0.03396814316511154, |
|
"rewards/rejected": -0.01987786404788494, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 0.9657157812091076, |
|
"grad_norm": 2.0, |
|
"learning_rate": 1.7890477894593748e-09, |
|
"logits/chosen": -2.8381717205047607, |
|
"logits/rejected": -2.8093409538269043, |
|
"logps/chosen": -335.9226379394531, |
|
"logps/rejected": -286.562744140625, |
|
"loss": 0.6684, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": 0.035428646951913834, |
|
"rewards/margins": 0.05196043848991394, |
|
"rewards/rejected": -0.016531798988580704, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 0.9683328971473436, |
|
"grad_norm": 1.2890625, |
|
"learning_rate": 1.5266035279088708e-09, |
|
"logits/chosen": -2.7718958854675293, |
|
"logits/rejected": -2.762516498565674, |
|
"logps/chosen": -317.29156494140625, |
|
"logps/rejected": -293.4043884277344, |
|
"loss": 0.6731, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.02600114978849888, |
|
"rewards/margins": 0.042024485766887665, |
|
"rewards/rejected": -0.016023332253098488, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.9683328971473436, |
|
"eval_logits/chosen": -2.850998640060425, |
|
"eval_logits/rejected": -2.8239121437072754, |
|
"eval_logps/chosen": -280.9513854980469, |
|
"eval_logps/rejected": -262.9112854003906, |
|
"eval_loss": 0.6775330901145935, |
|
"eval_rewards/accuracies": 0.6834999918937683, |
|
"eval_rewards/chosen": 0.018221192061901093, |
|
"eval_rewards/margins": 0.03295028209686279, |
|
"eval_rewards/rejected": -0.014729092828929424, |
|
"eval_runtime": 623.4667, |
|
"eval_samples_per_second": 3.208, |
|
"eval_steps_per_second": 0.401, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.9709500130855797, |
|
"grad_norm": 1.5390625, |
|
"learning_rate": 1.2849067234584621e-09, |
|
"logits/chosen": -2.7960381507873535, |
|
"logits/rejected": -2.789794921875, |
|
"logps/chosen": -251.1592559814453, |
|
"logps/rejected": -244.37112426757812, |
|
"loss": 0.6784, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": 0.014368300326168537, |
|
"rewards/margins": 0.031433962285518646, |
|
"rewards/rejected": -0.017065661028027534, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 0.9735671290238157, |
|
"grad_norm": 1.65625, |
|
"learning_rate": 1.0639775577218625e-09, |
|
"logits/chosen": -2.7958970069885254, |
|
"logits/rejected": -2.7415878772735596, |
|
"logps/chosen": -266.13140869140625, |
|
"logps/rejected": -232.8394775390625, |
|
"loss": 0.677, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.015364277176558971, |
|
"rewards/margins": 0.03415878862142563, |
|
"rewards/rejected": -0.018794508650898933, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 0.9761842449620518, |
|
"grad_norm": 1.578125, |
|
"learning_rate": 8.638344782207485e-10, |
|
"logits/chosen": -2.802359104156494, |
|
"logits/rejected": -2.7990007400512695, |
|
"logps/chosen": -271.896240234375, |
|
"logps/rejected": -248.9223175048828, |
|
"loss": 0.6758, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": 0.018408995121717453, |
|
"rewards/margins": 0.036677196621894836, |
|
"rewards/rejected": -0.018268201500177383, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 0.9788013609002879, |
|
"grad_norm": 1.5859375, |
|
"learning_rate": 6.844941968447149e-10, |
|
"logits/chosen": -2.8409347534179688, |
|
"logits/rejected": -2.8161139488220215, |
|
"logps/chosen": -288.1478576660156, |
|
"logps/rejected": -280.65985107421875, |
|
"loss": 0.6694, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.027417827397584915, |
|
"rewards/margins": 0.049938250333070755, |
|
"rewards/rejected": -0.02252042479813099, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 0.9814184768385239, |
|
"grad_norm": 1.25, |
|
"learning_rate": 5.25971688455612e-10, |
|
"logits/chosen": -2.8641200065612793, |
|
"logits/rejected": -2.8417608737945557, |
|
"logps/chosen": -288.54437255859375, |
|
"logps/rejected": -287.37908935546875, |
|
"loss": 0.6733, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": 0.0248104277998209, |
|
"rewards/margins": 0.04141296073794365, |
|
"rewards/rejected": -0.016602538526058197, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.98403559277676, |
|
"grad_norm": 1.3515625, |
|
"learning_rate": 3.882801896372967e-10, |
|
"logits/chosen": -2.8650496006011963, |
|
"logits/rejected": -2.8601956367492676, |
|
"logps/chosen": -280.47113037109375, |
|
"logps/rejected": -251.7021942138672, |
|
"loss": 0.6775, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.017177898436784744, |
|
"rewards/margins": 0.03319885581731796, |
|
"rewards/rejected": -0.016020962968468666, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 0.9866527087149961, |
|
"grad_norm": 1.2578125, |
|
"learning_rate": 2.714311975902661e-10, |
|
"logits/chosen": -2.8119492530822754, |
|
"logits/rejected": -2.7749814987182617, |
|
"logps/chosen": -303.2042541503906, |
|
"logps/rejected": -277.75457763671875, |
|
"loss": 0.6761, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.020443648099899292, |
|
"rewards/margins": 0.03567901626229286, |
|
"rewards/rejected": -0.015235371887683868, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 0.9892698246532321, |
|
"grad_norm": 1.4453125, |
|
"learning_rate": 1.754344691717591e-10, |
|
"logits/chosen": -2.8344626426696777, |
|
"logits/rejected": -2.811007022857666, |
|
"logps/chosen": -266.02685546875, |
|
"logps/rejected": -288.6179504394531, |
|
"loss": 0.6835, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": 0.019811708480119705, |
|
"rewards/margins": 0.020612578839063644, |
|
"rewards/rejected": -0.0008008688455447555, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 0.9918869405914682, |
|
"grad_norm": 1.4609375, |
|
"learning_rate": 1.0029802008096333e-10, |
|
"logits/chosen": -2.843888521194458, |
|
"logits/rejected": -2.801081418991089, |
|
"logps/chosen": -288.82562255859375, |
|
"logps/rejected": -270.85028076171875, |
|
"loss": 0.6735, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": 0.020014481619000435, |
|
"rewards/margins": 0.04117124527692795, |
|
"rewards/rejected": -0.021156763657927513, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 0.9945040565297043, |
|
"grad_norm": 1.296875, |
|
"learning_rate": 4.602812418974533e-11, |
|
"logits/chosen": -2.866516351699829, |
|
"logits/rejected": -2.8406145572662354, |
|
"logps/chosen": -301.0005798339844, |
|
"logps/rejected": -279.62908935546875, |
|
"loss": 0.675, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": 0.02747102454304695, |
|
"rewards/margins": 0.03847536817193031, |
|
"rewards/rejected": -0.011004343628883362, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.9945040565297043, |
|
"eval_logits/chosen": -2.850416660308838, |
|
"eval_logits/rejected": -2.8232762813568115, |
|
"eval_logps/chosen": -280.95458984375, |
|
"eval_logps/rejected": -262.90020751953125, |
|
"eval_loss": 0.6776041388511658, |
|
"eval_rewards/accuracies": 0.6855000257492065, |
|
"eval_rewards/chosen": 0.018189024180173874, |
|
"eval_rewards/margins": 0.03280767798423767, |
|
"eval_rewards/rejected": -0.014618655666708946, |
|
"eval_runtime": 622.8726, |
|
"eval_samples_per_second": 3.211, |
|
"eval_steps_per_second": 0.401, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.9971211724679403, |
|
"grad_norm": 1.2890625, |
|
"learning_rate": 1.2629313018819309e-11, |
|
"logits/chosen": -2.8219265937805176, |
|
"logits/rejected": -2.7998881340026855, |
|
"logps/chosen": -272.62823486328125, |
|
"logps/rejected": -255.8968048095703, |
|
"loss": 0.676, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.015002429485321045, |
|
"rewards/margins": 0.03602247312664986, |
|
"rewards/rejected": -0.021020041778683662, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 0.9997382884061764, |
|
"grad_norm": 3.515625, |
|
"learning_rate": 1.0437535929996855e-13, |
|
"logits/chosen": -2.8465044498443604, |
|
"logits/rejected": -2.825206995010376, |
|
"logps/chosen": -305.00775146484375, |
|
"logps/rejected": -262.2654724121094, |
|
"loss": 0.6739, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.018057797104120255, |
|
"rewards/margins": 0.04028897359967232, |
|
"rewards/rejected": -0.02223118022084236, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 3821, |
|
"total_flos": 0.0, |
|
"train_loss": 0.680465580483626, |
|
"train_runtime": 64957.9706, |
|
"train_samples_per_second": 0.941, |
|
"train_steps_per_second": 0.059 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 3821, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|