|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.9765925925925925, |
|
"eval_steps": 500, |
|
"global_step": 315, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.047407407407407405, |
|
"grad_norm": 1915.3714599609375, |
|
"learning_rate": 7.8125e-06, |
|
"log_odds_chosen": 1.65981125831604, |
|
"log_odds_ratio": -11.16843032836914, |
|
"logps/chosen": -22.020946502685547, |
|
"logps/rejected": -23.68042755126953, |
|
"loss": 320.1571, |
|
"nll_loss": 8.666691780090332, |
|
"rewards/accuracies": 0.534375011920929, |
|
"rewards/chosen": -11.010473251342773, |
|
"rewards/margins": 0.8297405242919922, |
|
"rewards/rejected": -11.840213775634766, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.09481481481481481, |
|
"grad_norm": 2617.00927734375, |
|
"learning_rate": 1.5625e-05, |
|
"log_odds_chosen": 1.09341299533844, |
|
"log_odds_ratio": -8.355111122131348, |
|
"logps/chosen": -19.82636833190918, |
|
"logps/rejected": -20.919193267822266, |
|
"loss": 223.029, |
|
"nll_loss": 7.8865966796875, |
|
"rewards/accuracies": 0.528124988079071, |
|
"rewards/chosen": -9.91318416595459, |
|
"rewards/margins": 0.5464121699333191, |
|
"rewards/rejected": -10.459596633911133, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.14222222222222222, |
|
"grad_norm": 1221.8326416015625, |
|
"learning_rate": 2.34375e-05, |
|
"log_odds_chosen": 4.4873456954956055, |
|
"log_odds_ratio": -6.951984405517578, |
|
"logps/chosen": -18.489765167236328, |
|
"logps/rejected": -22.975828170776367, |
|
"loss": 226.6759, |
|
"nll_loss": 8.182887077331543, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -9.244882583618164, |
|
"rewards/margins": 2.2430315017700195, |
|
"rewards/rejected": -11.487914085388184, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.18962962962962962, |
|
"grad_norm": 2359.64111328125, |
|
"learning_rate": 3.125e-05, |
|
"log_odds_chosen": 0.6630983352661133, |
|
"log_odds_ratio": -8.002729415893555, |
|
"logps/chosen": -18.08315086364746, |
|
"logps/rejected": -18.74709129333496, |
|
"loss": 342.1809, |
|
"nll_loss": 8.18604564666748, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -9.04157543182373, |
|
"rewards/margins": 0.3319700062274933, |
|
"rewards/rejected": -9.37354564666748, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.23703703703703705, |
|
"grad_norm": 2270.639892578125, |
|
"learning_rate": 3.90625e-05, |
|
"log_odds_chosen": 4.7319722175598145, |
|
"log_odds_ratio": -6.239144802093506, |
|
"logps/chosen": -15.107877731323242, |
|
"logps/rejected": -19.83966636657715, |
|
"loss": 59.8623, |
|
"nll_loss": 6.319466590881348, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -7.553938865661621, |
|
"rewards/margins": 2.3658950328826904, |
|
"rewards/rejected": -9.919833183288574, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.28444444444444444, |
|
"grad_norm": 42054.76953125, |
|
"learning_rate": 4.6875e-05, |
|
"log_odds_chosen": 3.2522056102752686, |
|
"log_odds_ratio": -5.358423709869385, |
|
"logps/chosen": -19.751956939697266, |
|
"logps/rejected": -23.000202178955078, |
|
"loss": 210.8639, |
|
"nll_loss": 11.341104507446289, |
|
"rewards/accuracies": 0.6031249761581421, |
|
"rewards/chosen": -9.875978469848633, |
|
"rewards/margins": 1.6241226196289062, |
|
"rewards/rejected": -11.500101089477539, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.33185185185185184, |
|
"grad_norm": 1386.9453125, |
|
"learning_rate": 4.998613757348784e-05, |
|
"log_odds_chosen": 2.529534339904785, |
|
"log_odds_ratio": -4.246035575866699, |
|
"logps/chosen": -15.55242919921875, |
|
"logps/rejected": -18.077491760253906, |
|
"loss": 259.2666, |
|
"nll_loss": 11.016453742980957, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -7.776214599609375, |
|
"rewards/margins": 1.2625317573547363, |
|
"rewards/rejected": -9.038745880126953, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.37925925925925924, |
|
"grad_norm": 1015.165771484375, |
|
"learning_rate": 4.990147841143462e-05, |
|
"log_odds_chosen": 0.2810021936893463, |
|
"log_odds_ratio": -0.8303823471069336, |
|
"logps/chosen": -2.1894371509552, |
|
"logps/rejected": -2.4453959465026855, |
|
"loss": 75.6876, |
|
"nll_loss": 2.393183469772339, |
|
"rewards/accuracies": 0.590624988079071, |
|
"rewards/chosen": -1.0947185754776, |
|
"rewards/margins": 0.12797939777374268, |
|
"rewards/rejected": -1.2226979732513428, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.4266666666666667, |
|
"grad_norm": 1208.3837890625, |
|
"learning_rate": 4.97401218720448e-05, |
|
"log_odds_chosen": 0.16454455256462097, |
|
"log_odds_ratio": -1.0962440967559814, |
|
"logps/chosen": -2.4815239906311035, |
|
"logps/rejected": -2.6206681728363037, |
|
"loss": 61.1886, |
|
"nll_loss": 2.248060464859009, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -1.2407619953155518, |
|
"rewards/margins": 0.0695720762014389, |
|
"rewards/rejected": -1.3103340864181519, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.4740740740740741, |
|
"grad_norm": 4378.82275390625, |
|
"learning_rate": 4.9502564938797946e-05, |
|
"log_odds_chosen": 0.6479941010475159, |
|
"log_odds_ratio": -1.2961242198944092, |
|
"logps/chosen": -3.2210915088653564, |
|
"logps/rejected": -3.855274200439453, |
|
"loss": 7.3654, |
|
"nll_loss": 2.6344618797302246, |
|
"rewards/accuracies": 0.565625011920929, |
|
"rewards/chosen": -1.6105457544326782, |
|
"rewards/margins": 0.3170911371707916, |
|
"rewards/rejected": -1.9276371002197266, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.5214814814814814, |
|
"grad_norm": 1226.081787109375, |
|
"learning_rate": 4.918953929490768e-05, |
|
"log_odds_chosen": 1.278847098350525, |
|
"log_odds_ratio": -2.351318836212158, |
|
"logps/chosen": -5.5019121170043945, |
|
"logps/rejected": -6.7642693519592285, |
|
"loss": 69.5193, |
|
"nll_loss": 3.3521831035614014, |
|
"rewards/accuracies": 0.5718749761581421, |
|
"rewards/chosen": -2.7509560585021973, |
|
"rewards/margins": 0.6311787366867065, |
|
"rewards/rejected": -3.3821346759796143, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.5688888888888889, |
|
"grad_norm": 3815.735595703125, |
|
"learning_rate": 4.88020090697132e-05, |
|
"log_odds_chosen": 0.8591831922531128, |
|
"log_odds_ratio": -1.0965297222137451, |
|
"logps/chosen": -2.902388334274292, |
|
"logps/rejected": -3.7349720001220703, |
|
"loss": 24.2741, |
|
"nll_loss": 2.398529529571533, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -1.451194167137146, |
|
"rewards/margins": 0.41629156470298767, |
|
"rewards/rejected": -1.8674860000610352, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.6162962962962963, |
|
"grad_norm": 1931.8660888671875, |
|
"learning_rate": 4.834116786912897e-05, |
|
"log_odds_chosen": 0.31718695163726807, |
|
"log_odds_ratio": -1.2613378763198853, |
|
"logps/chosen": -2.781818389892578, |
|
"logps/rejected": -3.0610077381134033, |
|
"loss": 81.3175, |
|
"nll_loss": 2.3013808727264404, |
|
"rewards/accuracies": 0.5843750238418579, |
|
"rewards/chosen": -1.390909194946289, |
|
"rewards/margins": 0.13959458470344543, |
|
"rewards/rejected": -1.5305038690567017, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.6637037037037037, |
|
"grad_norm": 787.8285522460938, |
|
"learning_rate": 4.7808435099299045e-05, |
|
"log_odds_chosen": 0.5639177560806274, |
|
"log_odds_ratio": -0.8356220126152039, |
|
"logps/chosen": -2.251502752304077, |
|
"logps/rejected": -2.7904326915740967, |
|
"loss": 47.3395, |
|
"nll_loss": 2.168375015258789, |
|
"rewards/accuracies": 0.578125, |
|
"rewards/chosen": -1.1257513761520386, |
|
"rewards/margins": 0.2694648802280426, |
|
"rewards/rejected": -1.3952163457870483, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.7111111111111111, |
|
"grad_norm": 2055.1884765625, |
|
"learning_rate": 4.720545159477922e-05, |
|
"log_odds_chosen": 1.0343811511993408, |
|
"log_odds_ratio": -1.943116545677185, |
|
"logps/chosen": -4.0394768714904785, |
|
"logps/rejected": -5.053744792938232, |
|
"loss": 15.8044, |
|
"nll_loss": 2.568474292755127, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -2.0197384357452393, |
|
"rewards/margins": 0.5071338415145874, |
|
"rewards/rejected": -2.526872396469116, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.7585185185185185, |
|
"grad_norm": 3419.63525390625, |
|
"learning_rate": 4.653407456471222e-05, |
|
"log_odds_chosen": 1.538326621055603, |
|
"log_odds_ratio": -2.196194648742676, |
|
"logps/chosen": -5.185378074645996, |
|
"logps/rejected": -6.7065582275390625, |
|
"loss": 39.0928, |
|
"nll_loss": 2.9203972816467285, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -2.592689037322998, |
|
"rewards/margins": 0.7605901956558228, |
|
"rewards/rejected": -3.3532791137695312, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.8059259259259259, |
|
"grad_norm": 3812.28271484375, |
|
"learning_rate": 4.579637187256222e-05, |
|
"log_odds_chosen": 2.206240177154541, |
|
"log_odds_ratio": -2.366931438446045, |
|
"logps/chosen": -6.021973609924316, |
|
"logps/rejected": -8.210701942443848, |
|
"loss": 57.2016, |
|
"nll_loss": 2.832935333251953, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -3.010986804962158, |
|
"rewards/margins": 1.0943641662597656, |
|
"rewards/rejected": -4.105350971221924, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.8533333333333334, |
|
"grad_norm": 2624.8759765625, |
|
"learning_rate": 4.499461566702685e-05, |
|
"log_odds_chosen": 0.8822873830795288, |
|
"log_odds_ratio": -2.509356737136841, |
|
"logps/chosen": -4.698742389678955, |
|
"logps/rejected": -5.556033134460449, |
|
"loss": 103.7346, |
|
"nll_loss": 2.442578077316284, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -2.3493711948394775, |
|
"rewards/margins": 0.42864537239074707, |
|
"rewards/rejected": -2.7780165672302246, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.9007407407407407, |
|
"grad_norm": 12379.837890625, |
|
"learning_rate": 4.413127538374411e-05, |
|
"log_odds_chosen": 0.29022759199142456, |
|
"log_odds_ratio": -1.0547641515731812, |
|
"logps/chosen": -2.508532762527466, |
|
"logps/rejected": -2.7733073234558105, |
|
"loss": 61.9094, |
|
"nll_loss": 2.3397486209869385, |
|
"rewards/accuracies": 0.5718749761581421, |
|
"rewards/chosen": -1.254266381263733, |
|
"rewards/margins": 0.1323871910572052, |
|
"rewards/rejected": -1.3866536617279053, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.9481481481481482, |
|
"grad_norm": 2379.628173828125, |
|
"learning_rate": 4.320901013934887e-05, |
|
"log_odds_chosen": 0.09418745338916779, |
|
"log_odds_ratio": -1.3399364948272705, |
|
"logps/chosen": -2.7042553424835205, |
|
"logps/rejected": -2.785879373550415, |
|
"loss": 86.4669, |
|
"nll_loss": 2.2360591888427734, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -1.3521276712417603, |
|
"rewards/margins": 0.040812067687511444, |
|
"rewards/rejected": -1.3929396867752075, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.9955555555555555, |
|
"grad_norm": 1130.39892578125, |
|
"learning_rate": 4.223066054130568e-05, |
|
"log_odds_chosen": 0.4360111653804779, |
|
"log_odds_ratio": -0.8215211629867554, |
|
"logps/chosen": -2.013885498046875, |
|
"logps/rejected": -2.4152350425720215, |
|
"loss": 57.9738, |
|
"nll_loss": 2.038440704345703, |
|
"rewards/accuracies": 0.6031249761581421, |
|
"rewards/chosen": -1.0069427490234375, |
|
"rewards/margins": 0.2006748616695404, |
|
"rewards/rejected": -1.2076175212860107, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 1.037925925925926, |
|
"grad_norm": 3500.71533203125, |
|
"learning_rate": 4.1199239938743797e-05, |
|
"log_odds_chosen": 0.5743904709815979, |
|
"log_odds_ratio": -0.8129003047943115, |
|
"logps/chosen": -1.8548295497894287, |
|
"logps/rejected": -2.392976760864258, |
|
"loss": 30.7734, |
|
"nll_loss": 1.9196120500564575, |
|
"rewards/accuracies": 0.5769230723381042, |
|
"rewards/chosen": -0.9274147748947144, |
|
"rewards/margins": 0.2690735161304474, |
|
"rewards/rejected": -1.196488380432129, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.0853333333333333, |
|
"grad_norm": 7732.44091796875, |
|
"learning_rate": 4.0117925141242174e-05, |
|
"log_odds_chosen": 0.8173832893371582, |
|
"log_odds_ratio": -1.355067253112793, |
|
"logps/chosen": -3.0429067611694336, |
|
"logps/rejected": -3.800440549850464, |
|
"loss": 32.8992, |
|
"nll_loss": 2.336174726486206, |
|
"rewards/accuracies": 0.653124988079071, |
|
"rewards/chosen": -1.5214533805847168, |
|
"rewards/margins": 0.3787666857242584, |
|
"rewards/rejected": -1.900220274925232, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 1.1327407407407408, |
|
"grad_norm": 4230.470703125, |
|
"learning_rate": 3.899004663415084e-05, |
|
"log_odds_chosen": 1.6969165802001953, |
|
"log_odds_ratio": -1.324521541595459, |
|
"logps/chosen": -3.4614341259002686, |
|
"logps/rejected": -5.1248674392700195, |
|
"loss": -9.7343, |
|
"nll_loss": 2.7742135524749756, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -1.7307170629501343, |
|
"rewards/margins": 0.8317165374755859, |
|
"rewards/rejected": -2.5624337196350098, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.1801481481481482, |
|
"grad_norm": 3288.0673828125, |
|
"learning_rate": 3.781907832058587e-05, |
|
"log_odds_chosen": 1.6559861898422241, |
|
"log_odds_ratio": -1.8177845478057861, |
|
"logps/chosen": -4.351998329162598, |
|
"logps/rejected": -5.973184108734131, |
|
"loss": 18.5195, |
|
"nll_loss": 2.8550593852996826, |
|
"rewards/accuracies": 0.6468750238418579, |
|
"rewards/chosen": -2.175999164581299, |
|
"rewards/margins": 0.8105929493904114, |
|
"rewards/rejected": -2.9865920543670654, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 1.2275555555555555, |
|
"grad_norm": 30743.359375, |
|
"learning_rate": 3.660862682169282e-05, |
|
"log_odds_chosen": 0.7961785793304443, |
|
"log_odds_ratio": -2.717101573944092, |
|
"logps/chosen": -9.171316146850586, |
|
"logps/rejected": -9.938522338867188, |
|
"loss": 169.7847, |
|
"nll_loss": 5.80072546005249, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -4.585658073425293, |
|
"rewards/margins": 0.38360315561294556, |
|
"rewards/rejected": -4.969261169433594, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.274962962962963, |
|
"grad_norm": 33299.71484375, |
|
"learning_rate": 3.5362420368134356e-05, |
|
"log_odds_chosen": 2.2118756771087646, |
|
"log_odds_ratio": -4.363597869873047, |
|
"logps/chosen": -21.064800262451172, |
|
"logps/rejected": -23.25614356994629, |
|
"loss": 276.0252, |
|
"nll_loss": 12.389029502868652, |
|
"rewards/accuracies": 0.6156250238418579, |
|
"rewards/chosen": -10.532400131225586, |
|
"rewards/margins": 1.0956722497940063, |
|
"rewards/rejected": -11.628071784973145, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 1.3223703703703704, |
|
"grad_norm": 9129.3916015625, |
|
"learning_rate": 3.408429731701635e-05, |
|
"log_odds_chosen": -0.8955130577087402, |
|
"log_odds_ratio": -6.262269020080566, |
|
"logps/chosen": -13.717962265014648, |
|
"logps/rejected": -12.80627155303955, |
|
"loss": 237.298, |
|
"nll_loss": 6.492087364196777, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -6.858981132507324, |
|
"rewards/margins": -0.4558447003364563, |
|
"rewards/rejected": -6.403135776519775, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.3697777777777778, |
|
"grad_norm": 4406.74853515625, |
|
"learning_rate": 3.2778194329621104e-05, |
|
"log_odds_chosen": 2.508333206176758, |
|
"log_odds_ratio": -1.729018211364746, |
|
"logps/chosen": -4.257506847381592, |
|
"logps/rejected": -6.73601770401001, |
|
"loss": -21.3999, |
|
"nll_loss": 2.9763360023498535, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -2.128753423690796, |
|
"rewards/margins": 1.2392549514770508, |
|
"rewards/rejected": -3.368008852005005, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 1.417185185185185, |
|
"grad_norm": 3294.3505859375, |
|
"learning_rate": 3.144813424636031e-05, |
|
"log_odds_chosen": 0.41120272874832153, |
|
"log_odds_ratio": -2.209031581878662, |
|
"logps/chosen": -5.029679298400879, |
|
"logps/rejected": -5.439136981964111, |
|
"loss": 73.5667, |
|
"nll_loss": 3.1203856468200684, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -2.5148396492004395, |
|
"rewards/margins": 0.20472900569438934, |
|
"rewards/rejected": -2.7195684909820557, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.4645925925925927, |
|
"grad_norm": 2727.369140625, |
|
"learning_rate": 3.0098213696293542e-05, |
|
"log_odds_chosen": 1.2927907705307007, |
|
"log_odds_ratio": -1.953330636024475, |
|
"logps/chosen": -5.769057273864746, |
|
"logps/rejected": -7.043553829193115, |
|
"loss": 66.51, |
|
"nll_loss": 3.7798728942871094, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -2.884528636932373, |
|
"rewards/margins": 0.6372483968734741, |
|
"rewards/rejected": -3.5217769145965576, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 1.512, |
|
"grad_norm": 2540.416748046875, |
|
"learning_rate": 2.8732590479375165e-05, |
|
"log_odds_chosen": 1.328902006149292, |
|
"log_odds_ratio": -1.8827491998672485, |
|
"logps/chosen": -4.703896999359131, |
|
"logps/rejected": -6.023054599761963, |
|
"loss": 35.3034, |
|
"nll_loss": 3.127842664718628, |
|
"rewards/accuracies": 0.578125, |
|
"rewards/chosen": -2.3519484996795654, |
|
"rewards/margins": 0.6595786809921265, |
|
"rewards/rejected": -3.0115272998809814, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.5594074074074074, |
|
"grad_norm": 2457.10205078125, |
|
"learning_rate": 2.7355470760292956e-05, |
|
"log_odds_chosen": 0.4289638102054596, |
|
"log_odds_ratio": -1.7100918292999268, |
|
"logps/chosen": -3.7828564643859863, |
|
"logps/rejected": -4.20039701461792, |
|
"loss": 72.3472, |
|
"nll_loss": 2.577268123626709, |
|
"rewards/accuracies": 0.546875, |
|
"rewards/chosen": -1.8914282321929932, |
|
"rewards/margins": 0.20877020061016083, |
|
"rewards/rejected": -2.10019850730896, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 1.6068148148148147, |
|
"grad_norm": 2422.359130859375, |
|
"learning_rate": 2.597109611334169e-05, |
|
"log_odds_chosen": 0.3876183331012726, |
|
"log_odds_ratio": -1.1971830129623413, |
|
"logps/chosen": -2.719515800476074, |
|
"logps/rejected": -3.0870518684387207, |
|
"loss": 49.8125, |
|
"nll_loss": 2.2820823192596436, |
|
"rewards/accuracies": 0.5531250238418579, |
|
"rewards/chosen": -1.359757900238037, |
|
"rewards/margins": 0.1837681084871292, |
|
"rewards/rejected": -1.5435259342193604, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.6542222222222223, |
|
"grad_norm": 601.5615234375, |
|
"learning_rate": 2.458373045823404e-05, |
|
"log_odds_chosen": 0.5851010680198669, |
|
"log_odds_ratio": -0.9167743921279907, |
|
"logps/chosen": -2.2165403366088867, |
|
"logps/rejected": -2.7729554176330566, |
|
"loss": 50.6215, |
|
"nll_loss": 2.1689133644104004, |
|
"rewards/accuracies": 0.5843750238418579, |
|
"rewards/chosen": -1.1082701683044434, |
|
"rewards/margins": 0.2782076299190521, |
|
"rewards/rejected": -1.3864777088165283, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 1.7016296296296296, |
|
"grad_norm": 463.87890625, |
|
"learning_rate": 2.3197646927086697e-05, |
|
"log_odds_chosen": 0.9187017679214478, |
|
"log_odds_ratio": -1.155874490737915, |
|
"logps/chosen": -2.5220038890838623, |
|
"logps/rejected": -3.416661024093628, |
|
"loss": 9.5969, |
|
"nll_loss": 2.0762057304382324, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -1.2610019445419312, |
|
"rewards/margins": 0.4473283886909485, |
|
"rewards/rejected": -1.708330512046814, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.749037037037037, |
|
"grad_norm": 3273.33935546875, |
|
"learning_rate": 2.1817114703032176e-05, |
|
"log_odds_chosen": 1.7500969171524048, |
|
"log_odds_ratio": -1.275059461593628, |
|
"logps/chosen": -3.396794557571411, |
|
"logps/rejected": -5.1389851570129395, |
|
"loss": -60.3306, |
|
"nll_loss": 2.238281726837158, |
|
"rewards/accuracies": 0.5718749761581421, |
|
"rewards/chosen": -1.6983972787857056, |
|
"rewards/margins": 0.8710952997207642, |
|
"rewards/rejected": -2.5694925785064697, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 1.7964444444444445, |
|
"grad_norm": 1209.9384765625, |
|
"learning_rate": 2.0446385870993467e-05, |
|
"log_odds_chosen": 0.5481420755386353, |
|
"log_odds_ratio": -1.067756175994873, |
|
"logps/chosen": -2.3263049125671387, |
|
"logps/rejected": -2.851926803588867, |
|
"loss": 32.6693, |
|
"nll_loss": 2.037564754486084, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -1.1631524562835693, |
|
"rewards/margins": 0.26281076669692993, |
|
"rewards/rejected": -1.4259634017944336, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.8438518518518519, |
|
"grad_norm": 2658.087646484375, |
|
"learning_rate": 1.9089682321121834e-05, |
|
"log_odds_chosen": 1.4211509227752686, |
|
"log_odds_ratio": -0.9490826725959778, |
|
"logps/chosen": -2.6055521965026855, |
|
"logps/rejected": -3.9891953468322754, |
|
"loss": -18.5069, |
|
"nll_loss": 2.2056031227111816, |
|
"rewards/accuracies": 0.6031249761581421, |
|
"rewards/chosen": -1.3027760982513428, |
|
"rewards/margins": 0.6918215155601501, |
|
"rewards/rejected": -1.9945976734161377, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 1.8912592592592592, |
|
"grad_norm": 526.1814575195312, |
|
"learning_rate": 1.775118274523545e-05, |
|
"log_odds_chosen": 0.7827054858207703, |
|
"log_odds_ratio": -2.516727924346924, |
|
"logps/chosen": -4.788647651672363, |
|
"logps/rejected": -5.54547643661499, |
|
"loss": 63.2911, |
|
"nll_loss": 2.443591356277466, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -2.3943238258361816, |
|
"rewards/margins": 0.37841445207595825, |
|
"rewards/rejected": -2.772738218307495, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.9386666666666668, |
|
"grad_norm": 1198.92138671875, |
|
"learning_rate": 1.643500976631037e-05, |
|
"log_odds_chosen": 0.6334174275398254, |
|
"log_odds_ratio": -1.686605453491211, |
|
"logps/chosen": -3.5008976459503174, |
|
"logps/rejected": -4.093779563903809, |
|
"loss": 57.0721, |
|
"nll_loss": 2.26170015335083, |
|
"rewards/accuracies": 0.565625011920929, |
|
"rewards/chosen": -1.7504488229751587, |
|
"rewards/margins": 0.29644104838371277, |
|
"rewards/rejected": -2.0468897819519043, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 1.986074074074074, |
|
"grad_norm": 360.5643310546875, |
|
"learning_rate": 1.514521724066537e-05, |
|
"log_odds_chosen": 0.5420491099357605, |
|
"log_odds_ratio": -1.02804696559906, |
|
"logps/chosen": -2.0554840564727783, |
|
"logps/rejected": -2.5570180416107178, |
|
"loss": 26.4923, |
|
"nll_loss": 1.8558366298675537, |
|
"rewards/accuracies": 0.578125, |
|
"rewards/chosen": -1.0277420282363892, |
|
"rewards/margins": 0.25076690316200256, |
|
"rewards/rejected": -1.2785090208053589, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 2.0284444444444443, |
|
"grad_norm": 10238.283203125, |
|
"learning_rate": 1.3885777771950348e-05, |
|
"log_odds_chosen": 0.5111017823219299, |
|
"log_odds_ratio": -0.8272331953048706, |
|
"logps/chosen": -1.7190930843353271, |
|
"logps/rejected": -2.203624963760376, |
|
"loss": 29.5791, |
|
"nll_loss": 1.8271044492721558, |
|
"rewards/accuracies": 0.5524475574493408, |
|
"rewards/chosen": -0.8595465421676636, |
|
"rewards/margins": 0.24226588010787964, |
|
"rewards/rejected": -1.101812481880188, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 2.075851851851852, |
|
"grad_norm": 2326.157958984375, |
|
"learning_rate": 1.2660570475395683e-05, |
|
"log_odds_chosen": 0.5695599913597107, |
|
"log_odds_ratio": -0.9038535952568054, |
|
"logps/chosen": -1.8567044734954834, |
|
"logps/rejected": -2.3801803588867188, |
|
"loss": 42.0315, |
|
"nll_loss": 1.7851667404174805, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.9283522367477417, |
|
"rewards/margins": 0.26173779368400574, |
|
"rewards/rejected": -1.1900901794433594, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 2.1232592592592594, |
|
"grad_norm": 1981.0318603515625, |
|
"learning_rate": 1.1473369030008974e-05, |
|
"log_odds_chosen": 0.4965842664241791, |
|
"log_odds_ratio": -0.9662116169929504, |
|
"logps/chosen": -1.9954092502593994, |
|
"logps/rejected": -2.455885648727417, |
|
"loss": 24.2707, |
|
"nll_loss": 1.8060328960418701, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.9977046251296997, |
|
"rewards/margins": 0.2302381992340088, |
|
"rewards/rejected": -1.2279428243637085, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 2.1706666666666665, |
|
"grad_norm": 2371.164794921875, |
|
"learning_rate": 1.0327830055518842e-05, |
|
"log_odds_chosen": 0.8104255795478821, |
|
"log_odds_ratio": -0.9122349619865417, |
|
"logps/chosen": -1.928625464439392, |
|
"logps/rejected": -2.700075626373291, |
|
"loss": 0.4953, |
|
"nll_loss": 1.8198583126068115, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.964312732219696, |
|
"rewards/margins": 0.3857249617576599, |
|
"rewards/rejected": -1.3500378131866455, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 2.218074074074074, |
|
"grad_norm": 510.7680969238281, |
|
"learning_rate": 9.227481849865235e-06, |
|
"log_odds_chosen": 0.9049872159957886, |
|
"log_odds_ratio": -0.8861944079399109, |
|
"logps/chosen": -2.1324591636657715, |
|
"logps/rejected": -2.9944934844970703, |
|
"loss": 4.731, |
|
"nll_loss": 1.871519684791565, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -1.0662295818328857, |
|
"rewards/margins": 0.4310172200202942, |
|
"rewards/rejected": -1.4972467422485352, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 2.2654814814814817, |
|
"grad_norm": 2696.190185546875, |
|
"learning_rate": 8.175713521924978e-06, |
|
"log_odds_chosen": 0.4651837944984436, |
|
"log_odds_ratio": -1.2659015655517578, |
|
"logps/chosen": -2.480325937271118, |
|
"logps/rejected": -2.9123668670654297, |
|
"loss": 49.3961, |
|
"nll_loss": 1.8576265573501587, |
|
"rewards/accuracies": 0.5718749761581421, |
|
"rewards/chosen": -1.240162968635559, |
|
"rewards/margins": 0.21602031588554382, |
|
"rewards/rejected": -1.4561834335327148, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 2.3128888888888888, |
|
"grad_norm": 1326.971435546875, |
|
"learning_rate": 7.1757645529443665e-06, |
|
"log_odds_chosen": 1.1274998188018799, |
|
"log_odds_ratio": -0.9266605377197266, |
|
"logps/chosen": -2.0514588356018066, |
|
"logps/rejected": -3.127443790435791, |
|
"loss": -21.5027, |
|
"nll_loss": 1.8539222478866577, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -1.0257294178009033, |
|
"rewards/margins": 0.5379923582077026, |
|
"rewards/rejected": -1.5637218952178955, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 2.3602962962962963, |
|
"grad_norm": 4184.349609375, |
|
"learning_rate": 6.230714818829733e-06, |
|
"log_odds_chosen": 0.7846413850784302, |
|
"log_odds_ratio": -0.9509506225585938, |
|
"logps/chosen": -2.0468831062316895, |
|
"logps/rejected": -2.7701876163482666, |
|
"loss": 34.0259, |
|
"nll_loss": 1.8205235004425049, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -1.0234415531158447, |
|
"rewards/margins": 0.36165231466293335, |
|
"rewards/rejected": -1.3850938081741333, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 2.407703703703704, |
|
"grad_norm": 72418.359375, |
|
"learning_rate": 5.343475104027743e-06, |
|
"log_odds_chosen": 0.7129810452461243, |
|
"log_odds_ratio": -1.4154326915740967, |
|
"logps/chosen": -2.65124249458313, |
|
"logps/rejected": -3.2982776165008545, |
|
"loss": 19.2975, |
|
"nll_loss": 1.873997688293457, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -1.325621247291565, |
|
"rewards/margins": 0.32351773977279663, |
|
"rewards/rejected": -1.6491388082504272, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 2.455111111111111, |
|
"grad_norm": 6894.69677734375, |
|
"learning_rate": 4.516778136213037e-06, |
|
"log_odds_chosen": 1.0349304676055908, |
|
"log_odds_ratio": -1.0125768184661865, |
|
"logps/chosen": -2.1709659099578857, |
|
"logps/rejected": -3.1571593284606934, |
|
"loss": 5.3407, |
|
"nll_loss": 1.8703029155731201, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": -1.0854829549789429, |
|
"rewards/margins": 0.4930966794490814, |
|
"rewards/rejected": -1.5785796642303467, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 2.5025185185185186, |
|
"grad_norm": 864.9271850585938, |
|
"learning_rate": 3.7531701693965554e-06, |
|
"log_odds_chosen": 0.46679940819740295, |
|
"log_odds_ratio": -1.316929817199707, |
|
"logps/chosen": -2.535597562789917, |
|
"logps/rejected": -2.952941656112671, |
|
"loss": 71.3618, |
|
"nll_loss": 1.8557851314544678, |
|
"rewards/accuracies": 0.590624988079071, |
|
"rewards/chosen": -1.2677987813949585, |
|
"rewards/margins": 0.20867201685905457, |
|
"rewards/rejected": -1.4764708280563354, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 2.549925925925926, |
|
"grad_norm": 3560.4990234375, |
|
"learning_rate": 3.055003141378948e-06, |
|
"log_odds_chosen": 1.4798504114151, |
|
"log_odds_ratio": -1.3126966953277588, |
|
"logps/chosen": -2.62119722366333, |
|
"logps/rejected": -4.038577079772949, |
|
"loss": -37.0258, |
|
"nll_loss": 1.893930435180664, |
|
"rewards/accuracies": 0.621874988079071, |
|
"rewards/chosen": -1.310598611831665, |
|
"rewards/margins": 0.7086899876594543, |
|
"rewards/rejected": -2.0192885398864746, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 2.5973333333333333, |
|
"grad_norm": 1490.2098388671875, |
|
"learning_rate": 2.424427429704365e-06, |
|
"log_odds_chosen": 0.9033193588256836, |
|
"log_odds_ratio": -1.1426560878753662, |
|
"logps/chosen": -2.5527491569519043, |
|
"logps/rejected": -3.397388458251953, |
|
"loss": 29.2458, |
|
"nll_loss": 1.9291893243789673, |
|
"rewards/accuracies": 0.6156250238418579, |
|
"rewards/chosen": -1.2763745784759521, |
|
"rewards/margins": 0.42231959104537964, |
|
"rewards/rejected": -1.6986942291259766, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 2.644740740740741, |
|
"grad_norm": 10416.1259765625, |
|
"learning_rate": 1.8633852284264508e-06, |
|
"log_odds_chosen": 0.7765440940856934, |
|
"log_odds_ratio": -1.2765750885009766, |
|
"logps/chosen": -2.5416433811187744, |
|
"logps/rejected": -3.276179552078247, |
|
"loss": 16.2592, |
|
"nll_loss": 1.885671615600586, |
|
"rewards/accuracies": 0.578125, |
|
"rewards/chosen": -1.2708216905593872, |
|
"rewards/margins": 0.3672682046890259, |
|
"rewards/rejected": -1.6380897760391235, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 2.6921481481481484, |
|
"grad_norm": 987.6349487304688, |
|
"learning_rate": 1.3736045660864034e-06, |
|
"log_odds_chosen": 1.4070631265640259, |
|
"log_odds_ratio": -0.9240388870239258, |
|
"logps/chosen": -2.1706430912017822, |
|
"logps/rejected": -3.544604778289795, |
|
"loss": -18.614, |
|
"nll_loss": 1.9531824588775635, |
|
"rewards/accuracies": 0.5718749761581421, |
|
"rewards/chosen": -1.0853215456008911, |
|
"rewards/margins": 0.6869809031486511, |
|
"rewards/rejected": -1.7723023891448975, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 2.7395555555555555, |
|
"grad_norm": 1785.149658203125, |
|
"learning_rate": 9.565939833279192e-07, |
|
"log_odds_chosen": 1.1818147897720337, |
|
"log_odds_ratio": -1.5213770866394043, |
|
"logps/chosen": -2.8310532569885254, |
|
"logps/rejected": -3.944000244140625, |
|
"loss": -20.8544, |
|
"nll_loss": 1.9278194904327393, |
|
"rewards/accuracies": 0.621874988079071, |
|
"rewards/chosen": -1.4155266284942627, |
|
"rewards/margins": 0.5564736127853394, |
|
"rewards/rejected": -1.9720001220703125, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 2.786962962962963, |
|
"grad_norm": 3843.544677734375, |
|
"learning_rate": 6.136378865420872e-07, |
|
"log_odds_chosen": 0.7062476277351379, |
|
"log_odds_ratio": -1.199745535850525, |
|
"logps/chosen": -2.419466495513916, |
|
"logps/rejected": -3.0821032524108887, |
|
"loss": 9.6821, |
|
"nll_loss": 1.8187296390533447, |
|
"rewards/accuracies": 0.5843750238418579, |
|
"rewards/chosen": -1.209733247756958, |
|
"rewards/margins": 0.3313182294368744, |
|
"rewards/rejected": -1.5410516262054443, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 2.83437037037037, |
|
"grad_norm": 1985.0562744140625, |
|
"learning_rate": 3.45792591853214e-07, |
|
"log_odds_chosen": 1.1894285678863525, |
|
"log_odds_ratio": -1.35343337059021, |
|
"logps/chosen": -2.6638786792755127, |
|
"logps/rejected": -3.7862019538879395, |
|
"loss": 5.4621, |
|
"nll_loss": 1.938586950302124, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.3319393396377563, |
|
"rewards/margins": 0.5611615777015686, |
|
"rewards/rejected": -1.8931009769439697, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.8817777777777778, |
|
"grad_norm": 1326.342529296875, |
|
"learning_rate": 1.538830716302092e-07, |
|
"log_odds_chosen": 2.1142306327819824, |
|
"log_odds_ratio": -0.9648601412773132, |
|
"logps/chosen": -2.3394112586975098, |
|
"logps/rejected": -4.382277011871338, |
|
"loss": -80.5969, |
|
"nll_loss": 1.8355882167816162, |
|
"rewards/accuracies": 0.659375011920929, |
|
"rewards/chosen": -1.1697056293487549, |
|
"rewards/margins": 1.0214331150054932, |
|
"rewards/rejected": -2.191138505935669, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 2.9291851851851853, |
|
"grad_norm": 3704.05322265625, |
|
"learning_rate": 3.8500413544415025e-08, |
|
"log_odds_chosen": 1.72466242313385, |
|
"log_odds_ratio": -1.052673578262329, |
|
"logps/chosen": -2.2251315116882324, |
|
"logps/rejected": -3.870131731033325, |
|
"loss": -35.5622, |
|
"nll_loss": 1.8819787502288818, |
|
"rewards/accuracies": 0.684374988079071, |
|
"rewards/chosen": -1.1125657558441162, |
|
"rewards/margins": 0.8225000500679016, |
|
"rewards/rejected": -1.9350658655166626, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 2.9765925925925925, |
|
"grad_norm": 492.51934814453125, |
|
"learning_rate": 0.0, |
|
"log_odds_chosen": 1.213081955909729, |
|
"log_odds_ratio": -1.0762965679168701, |
|
"logps/chosen": -2.213444232940674, |
|
"logps/rejected": -3.3574657440185547, |
|
"loss": -9.4288, |
|
"nll_loss": 1.8365122079849243, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -1.106722116470337, |
|
"rewards/margins": 0.5720106363296509, |
|
"rewards/rejected": -1.6787328720092773, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 2.9765925925925925, |
|
"step": 315, |
|
"total_flos": 0.0, |
|
"train_loss": 59.38279808892144, |
|
"train_runtime": 9551.9831, |
|
"train_samples_per_second": 2.12, |
|
"train_steps_per_second": 0.033 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 315, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 100000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|