|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.5052631578947369, |
|
"eval_steps": 10000, |
|
"global_step": 300, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.003368421052631579, |
|
"grad_norm": 1021.0826362249041, |
|
"learning_rate": 8e-09, |
|
"logits/chosen": -6.098826885223389, |
|
"logits/rejected": -8.285457611083984, |
|
"logps/chosen": -1105.89208984375, |
|
"logps/rejected": -1840.68798828125, |
|
"loss": 0.6675, |
|
"nll_loss": 4.724418640136719, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.03404197469353676, |
|
"rewards/margins": 0.10088615119457245, |
|
"rewards/rejected": -0.06684418022632599, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.006736842105263158, |
|
"grad_norm": 775.2893008419502, |
|
"learning_rate": 1.6e-08, |
|
"logits/chosen": -6.404820919036865, |
|
"logits/rejected": -8.010771751403809, |
|
"logps/chosen": -1317.140625, |
|
"logps/rejected": -1826.8807373046875, |
|
"loss": 0.7069, |
|
"nll_loss": 4.841824054718018, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.024968720972537994, |
|
"rewards/margins": 0.0651191771030426, |
|
"rewards/rejected": -0.04015045240521431, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.010105263157894737, |
|
"grad_norm": 1152.6434834761353, |
|
"learning_rate": 2.3999999999999997e-08, |
|
"logits/chosen": -7.275630950927734, |
|
"logits/rejected": -7.599463939666748, |
|
"logps/chosen": -1705.133544921875, |
|
"logps/rejected": -1831.8782958984375, |
|
"loss": 0.7266, |
|
"nll_loss": 4.776337623596191, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.00952148623764515, |
|
"rewards/margins": -0.00301208533346653, |
|
"rewards/rejected": 0.01253356970846653, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.013473684210526317, |
|
"grad_norm": 1340.686158278663, |
|
"learning_rate": 3.2e-08, |
|
"logits/chosen": -7.0847063064575195, |
|
"logits/rejected": -7.663762092590332, |
|
"logps/chosen": -1612.9189453125, |
|
"logps/rejected": -1863.4921875, |
|
"loss": 0.6938, |
|
"nll_loss": 4.129978656768799, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": 0.05372562259435654, |
|
"rewards/margins": -0.047937583178281784, |
|
"rewards/rejected": 0.10166320949792862, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.016842105263157894, |
|
"grad_norm": 830.1962870309648, |
|
"learning_rate": 4e-08, |
|
"logits/chosen": -7.328038692474365, |
|
"logits/rejected": -7.968114852905273, |
|
"logps/chosen": -1624.5255126953125, |
|
"logps/rejected": -1856.871826171875, |
|
"loss": 0.7296, |
|
"nll_loss": 4.658226490020752, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": 0.03902282938361168, |
|
"rewards/margins": -0.01064453274011612, |
|
"rewards/rejected": 0.0496673583984375, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.020210526315789474, |
|
"grad_norm": 663.540527209906, |
|
"learning_rate": 4.799999999999999e-08, |
|
"logits/chosen": -7.396481513977051, |
|
"logits/rejected": -7.322075843811035, |
|
"logps/chosen": -1599.8494873046875, |
|
"logps/rejected": -1831.381103515625, |
|
"loss": 0.6782, |
|
"nll_loss": 4.17366361618042, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.10295410454273224, |
|
"rewards/margins": 0.06537018716335297, |
|
"rewards/rejected": -0.168324276804924, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.023578947368421053, |
|
"grad_norm": 1099.037021669425, |
|
"learning_rate": 5.6000000000000005e-08, |
|
"logits/chosen": -7.1363630294799805, |
|
"logits/rejected": -7.741977691650391, |
|
"logps/chosen": -1470.75537109375, |
|
"logps/rejected": -1858.2095947265625, |
|
"loss": 0.6512, |
|
"nll_loss": 4.299507141113281, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.2573639154434204, |
|
"rewards/margins": 0.10828553140163422, |
|
"rewards/rejected": -0.36564940214157104, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.026947368421052633, |
|
"grad_norm": 566.7332650815549, |
|
"learning_rate": 6.4e-08, |
|
"logits/chosen": -6.818307876586914, |
|
"logits/rejected": -7.9370036125183105, |
|
"logps/chosen": -1442.171875, |
|
"logps/rejected": -1850.56298828125, |
|
"loss": 0.6185, |
|
"nll_loss": 4.646788597106934, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.3849685788154602, |
|
"rewards/margins": 0.29212191700935364, |
|
"rewards/rejected": -0.6770904660224915, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.03031578947368421, |
|
"grad_norm": 743.3397608092816, |
|
"learning_rate": 7.2e-08, |
|
"logits/chosen": -7.3267436027526855, |
|
"logits/rejected": -7.727769374847412, |
|
"logps/chosen": -1639.974853515625, |
|
"logps/rejected": -1839.6697998046875, |
|
"loss": 0.5848, |
|
"nll_loss": 4.681390285491943, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.806835949420929, |
|
"rewards/margins": 0.1156768724322319, |
|
"rewards/rejected": -0.9225128889083862, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.03368421052631579, |
|
"grad_norm": 555.7381352759589, |
|
"learning_rate": 8e-08, |
|
"logits/chosen": -6.984310150146484, |
|
"logits/rejected": -7.973010540008545, |
|
"logps/chosen": -1658.0970458984375, |
|
"logps/rejected": -1865.5997314453125, |
|
"loss": 0.5718, |
|
"nll_loss": 4.629044532775879, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.5665146112442017, |
|
"rewards/margins": 0.23055119812488556, |
|
"rewards/rejected": -1.7970658540725708, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.03705263157894737, |
|
"grad_norm": 537.2764069824767, |
|
"learning_rate": 8.8e-08, |
|
"logits/chosen": -7.248239040374756, |
|
"logits/rejected": -7.833005428314209, |
|
"logps/chosen": -1698.72998046875, |
|
"logps/rejected": -1851.3116455078125, |
|
"loss": 0.5141, |
|
"nll_loss": 4.778887748718262, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.040907382965088, |
|
"rewards/margins": 0.4460693299770355, |
|
"rewards/rejected": -2.4869766235351562, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.04042105263157895, |
|
"grad_norm": 427.6716943228766, |
|
"learning_rate": 9.599999999999999e-08, |
|
"logits/chosen": -6.897974967956543, |
|
"logits/rejected": -8.095926284790039, |
|
"logps/chosen": -1618.291748046875, |
|
"logps/rejected": -1899.2071533203125, |
|
"loss": 0.5628, |
|
"nll_loss": 4.640469074249268, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -2.707034111022949, |
|
"rewards/margins": 0.4983123540878296, |
|
"rewards/rejected": -3.2053468227386475, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.043789473684210524, |
|
"grad_norm": 502.48124965017354, |
|
"learning_rate": 1.04e-07, |
|
"logits/chosen": -6.930876731872559, |
|
"logits/rejected": -7.992618083953857, |
|
"logps/chosen": -1484.425048828125, |
|
"logps/rejected": -1923.680908203125, |
|
"loss": 0.5462, |
|
"nll_loss": 4.688654899597168, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -4.368743419647217, |
|
"rewards/margins": 1.1556202173233032, |
|
"rewards/rejected": -5.524363994598389, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.04715789473684211, |
|
"grad_norm": 383.7692991889789, |
|
"learning_rate": 1.1200000000000001e-07, |
|
"logits/chosen": -7.199338912963867, |
|
"logits/rejected": -7.51033353805542, |
|
"logps/chosen": -1769.854248046875, |
|
"logps/rejected": -1886.64501953125, |
|
"loss": 0.4697, |
|
"nll_loss": 4.6844096183776855, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -6.4538960456848145, |
|
"rewards/margins": 0.6539719104766846, |
|
"rewards/rejected": -7.107868194580078, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.05052631578947368, |
|
"grad_norm": 481.75715160752935, |
|
"learning_rate": 1.2e-07, |
|
"logits/chosen": -6.897581577301025, |
|
"logits/rejected": -7.895030498504639, |
|
"logps/chosen": -1615.928466796875, |
|
"logps/rejected": -1956.94287109375, |
|
"loss": 0.4974, |
|
"nll_loss": 4.612817764282227, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -6.480710029602051, |
|
"rewards/margins": 1.3521087169647217, |
|
"rewards/rejected": -7.832818984985352, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.053894736842105266, |
|
"grad_norm": 433.1144839993232, |
|
"learning_rate": 1.28e-07, |
|
"logits/chosen": -7.140986442565918, |
|
"logits/rejected": -7.891180515289307, |
|
"logps/chosen": -1722.1290283203125, |
|
"logps/rejected": -1961.19140625, |
|
"loss": 0.3723, |
|
"nll_loss": 4.612584590911865, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -7.3035502433776855, |
|
"rewards/margins": 1.1011128425598145, |
|
"rewards/rejected": -8.4046630859375, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.05726315789473684, |
|
"grad_norm": 460.5421567294356, |
|
"learning_rate": 1.36e-07, |
|
"logits/chosen": -5.8924641609191895, |
|
"logits/rejected": -7.968005657196045, |
|
"logps/chosen": -1063.8079833984375, |
|
"logps/rejected": -1961.5302734375, |
|
"loss": 0.3717, |
|
"nll_loss": 4.887433052062988, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -4.523468017578125, |
|
"rewards/margins": 4.804460525512695, |
|
"rewards/rejected": -9.327927589416504, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.06063157894736842, |
|
"grad_norm": 382.3217926367787, |
|
"learning_rate": 1.44e-07, |
|
"logits/chosen": -7.277335166931152, |
|
"logits/rejected": -8.038162231445312, |
|
"logps/chosen": -1612.2705078125, |
|
"logps/rejected": -1970.5556640625, |
|
"loss": 0.4893, |
|
"nll_loss": 4.758667469024658, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -8.101587295532227, |
|
"rewards/margins": 1.8269003629684448, |
|
"rewards/rejected": -9.928487777709961, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.064, |
|
"grad_norm": 327.81609268765317, |
|
"learning_rate": 1.5199999999999998e-07, |
|
"logits/chosen": -6.232708930969238, |
|
"logits/rejected": -7.9314703941345215, |
|
"logps/chosen": -1311.58984375, |
|
"logps/rejected": -1898.37353515625, |
|
"loss": 0.4384, |
|
"nll_loss": 4.995038986206055, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -7.171569347381592, |
|
"rewards/margins": 4.5418925285339355, |
|
"rewards/rejected": -11.713461875915527, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.06736842105263158, |
|
"grad_norm": 411.60155946011673, |
|
"learning_rate": 1.6e-07, |
|
"logits/chosen": -7.439314365386963, |
|
"logits/rejected": -7.884708404541016, |
|
"logps/chosen": -1801.27978515625, |
|
"logps/rejected": -2006.94921875, |
|
"loss": 0.3972, |
|
"nll_loss": 4.832527160644531, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -12.396756172180176, |
|
"rewards/margins": 1.0853164196014404, |
|
"rewards/rejected": -13.482072830200195, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.07073684210526315, |
|
"grad_norm": 353.27396194384323, |
|
"learning_rate": 1.68e-07, |
|
"logits/chosen": -6.52924108505249, |
|
"logits/rejected": -7.981770038604736, |
|
"logps/chosen": -1366.396484375, |
|
"logps/rejected": -2025.9642333984375, |
|
"loss": 0.5679, |
|
"nll_loss": 4.827330112457275, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -9.584479331970215, |
|
"rewards/margins": 5.323737621307373, |
|
"rewards/rejected": -14.90821647644043, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.07410526315789474, |
|
"grad_norm": 259.81954374623166, |
|
"learning_rate": 1.76e-07, |
|
"logits/chosen": -7.801054000854492, |
|
"logits/rejected": -7.639019966125488, |
|
"logps/chosen": -1827.47314453125, |
|
"logps/rejected": -1996.3192138671875, |
|
"loss": 0.3678, |
|
"nll_loss": 4.5667243003845215, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -14.310271263122559, |
|
"rewards/margins": 2.2248902320861816, |
|
"rewards/rejected": -16.5351619720459, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.07747368421052632, |
|
"grad_norm": 406.5448161027269, |
|
"learning_rate": 1.84e-07, |
|
"logits/chosen": -8.297408103942871, |
|
"logits/rejected": -7.499330043792725, |
|
"logps/chosen": -1998.0411376953125, |
|
"logps/rejected": -1991.69921875, |
|
"loss": 0.3749, |
|
"nll_loss": 4.313129425048828, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -16.31363296508789, |
|
"rewards/margins": 0.6383074522018433, |
|
"rewards/rejected": -16.951940536499023, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.0808421052631579, |
|
"grad_norm": 462.82095282625227, |
|
"learning_rate": 1.9199999999999997e-07, |
|
"logits/chosen": -7.885348320007324, |
|
"logits/rejected": -8.057937622070312, |
|
"logps/chosen": -1869.8287353515625, |
|
"logps/rejected": -1985.5797119140625, |
|
"loss": 0.4191, |
|
"nll_loss": 4.887444019317627, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -15.957789421081543, |
|
"rewards/margins": 1.4249342679977417, |
|
"rewards/rejected": -17.382722854614258, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.08421052631578947, |
|
"grad_norm": 612.0204500259774, |
|
"learning_rate": 2e-07, |
|
"logits/chosen": -5.61109733581543, |
|
"logits/rejected": -7.90653657913208, |
|
"logps/chosen": -1070.6937255859375, |
|
"logps/rejected": -2036.2265625, |
|
"loss": 0.3329, |
|
"nll_loss": 5.1021575927734375, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -8.67391586303711, |
|
"rewards/margins": 8.88609504699707, |
|
"rewards/rejected": -17.560009002685547, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.08757894736842105, |
|
"grad_norm": 450.4075931864382, |
|
"learning_rate": 1.9999330539070613e-07, |
|
"logits/chosen": -7.206405162811279, |
|
"logits/rejected": -8.117046356201172, |
|
"logps/chosen": -1794.2418212890625, |
|
"logps/rejected": -2010.4686279296875, |
|
"loss": 0.3961, |
|
"nll_loss": 5.458052158355713, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -15.731528282165527, |
|
"rewards/margins": 1.6168347597122192, |
|
"rewards/rejected": -17.348363876342773, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.09094736842105264, |
|
"grad_norm": 521.1752044658302, |
|
"learning_rate": 1.9997322245918037e-07, |
|
"logits/chosen": -7.272748947143555, |
|
"logits/rejected": -8.118557929992676, |
|
"logps/chosen": -1745.1031494140625, |
|
"logps/rejected": -2038.525390625, |
|
"loss": 0.4714, |
|
"nll_loss": 5.170174598693848, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -15.204635620117188, |
|
"rewards/margins": 2.791910409927368, |
|
"rewards/rejected": -17.996545791625977, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.09431578947368421, |
|
"grad_norm": 217.38647360461505, |
|
"learning_rate": 1.9993975389437037e-07, |
|
"logits/chosen": -6.186915397644043, |
|
"logits/rejected": -8.047518730163574, |
|
"logps/chosen": -1388.68603515625, |
|
"logps/rejected": -2048.745361328125, |
|
"loss": 0.2578, |
|
"nll_loss": 5.35891056060791, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -11.742466926574707, |
|
"rewards/margins": 7.281023025512695, |
|
"rewards/rejected": -19.023488998413086, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.09768421052631579, |
|
"grad_norm": 405.6486896009896, |
|
"learning_rate": 1.9989290417745539e-07, |
|
"logits/chosen": -6.964948654174805, |
|
"logits/rejected": -8.299407005310059, |
|
"logps/chosen": -1703.4879150390625, |
|
"logps/rejected": -2057.886962890625, |
|
"loss": 0.4002, |
|
"nll_loss": 5.260710716247559, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -14.706157684326172, |
|
"rewards/margins": 3.9658584594726562, |
|
"rewards/rejected": -18.672016143798828, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.10105263157894737, |
|
"grad_norm": 475.2633809268288, |
|
"learning_rate": 1.9983267958124644e-07, |
|
"logits/chosen": -6.648141384124756, |
|
"logits/rejected": -7.772933006286621, |
|
"logps/chosen": -1497.2598876953125, |
|
"logps/rejected": -2023.4912109375, |
|
"loss": 0.474, |
|
"nll_loss": 5.289524555206299, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -13.703365325927734, |
|
"rewards/margins": 5.52022647857666, |
|
"rewards/rejected": -19.22359275817871, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.10442105263157894, |
|
"grad_norm": 295.526199134696, |
|
"learning_rate": 1.9975908816934638e-07, |
|
"logits/chosen": -7.280607223510742, |
|
"logits/rejected": -7.836946964263916, |
|
"logps/chosen": -1767.7076416015625, |
|
"logps/rejected": -2027.5374755859375, |
|
"loss": 0.3311, |
|
"nll_loss": 4.935500144958496, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -16.52497100830078, |
|
"rewards/margins": 2.8025190830230713, |
|
"rewards/rejected": -19.327489852905273, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.10778947368421053, |
|
"grad_norm": 451.01621367751295, |
|
"learning_rate": 1.9967213979507017e-07, |
|
"logits/chosen": -7.747291564941406, |
|
"logits/rejected": -7.578754901885986, |
|
"logps/chosen": -1903.654052734375, |
|
"logps/rejected": -1995.362060546875, |
|
"loss": 0.2961, |
|
"nll_loss": 4.874569416046143, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -18.415132522583008, |
|
"rewards/margins": 1.7832107543945312, |
|
"rewards/rejected": -20.19834327697754, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.11115789473684211, |
|
"grad_norm": 391.98836079659304, |
|
"learning_rate": 1.995718461001257e-07, |
|
"logits/chosen": -6.887873649597168, |
|
"logits/rejected": -7.892035484313965, |
|
"logps/chosen": -1613.168701171875, |
|
"logps/rejected": -2109.185546875, |
|
"loss": 0.2514, |
|
"nll_loss": 4.979562282562256, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -15.48417854309082, |
|
"rewards/margins": 5.469597816467285, |
|
"rewards/rejected": -20.953777313232422, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.11452631578947368, |
|
"grad_norm": 357.9128298203349, |
|
"learning_rate": 1.9945822051305505e-07, |
|
"logits/chosen": -6.562501430511475, |
|
"logits/rejected": -8.093667984008789, |
|
"logps/chosen": -1624.415771484375, |
|
"logps/rejected": -2102.240966796875, |
|
"loss": 0.2549, |
|
"nll_loss": 5.153851509094238, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -15.476547241210938, |
|
"rewards/margins": 6.269686222076416, |
|
"rewards/rejected": -21.746234893798828, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.11789473684210526, |
|
"grad_norm": 324.2814050905479, |
|
"learning_rate": 1.9933127824743643e-07, |
|
"logits/chosen": -7.621331214904785, |
|
"logits/rejected": -7.522034645080566, |
|
"logps/chosen": -1808.6651611328125, |
|
"logps/rejected": -2016.3822021484375, |
|
"loss": 0.1968, |
|
"nll_loss": 5.224420070648193, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -17.95054817199707, |
|
"rewards/margins": 3.378469944000244, |
|
"rewards/rejected": -21.329015731811523, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.12126315789473684, |
|
"grad_norm": 507.77270997957345, |
|
"learning_rate": 1.9919103629984725e-07, |
|
"logits/chosen": -7.971831321716309, |
|
"logits/rejected": -7.918793201446533, |
|
"logps/chosen": -1930.361083984375, |
|
"logps/rejected": -2080.251953125, |
|
"loss": 0.3995, |
|
"nll_loss": 4.725856781005859, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -19.6121826171875, |
|
"rewards/margins": 1.6514708995819092, |
|
"rewards/rejected": -21.263654708862305, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.12463157894736843, |
|
"grad_norm": 405.2199441352807, |
|
"learning_rate": 1.9903751344758845e-07, |
|
"logits/chosen": -7.292391300201416, |
|
"logits/rejected": -8.180887222290039, |
|
"logps/chosen": -1775.805908203125, |
|
"logps/rejected": -2013.7442626953125, |
|
"loss": 0.4428, |
|
"nll_loss": 5.191011905670166, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -17.799779891967773, |
|
"rewards/margins": 3.329552173614502, |
|
"rewards/rejected": -21.129331588745117, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.128, |
|
"grad_norm": 240.9685453409843, |
|
"learning_rate": 1.9887073024617028e-07, |
|
"logits/chosen": -6.470153331756592, |
|
"logits/rejected": -7.746395587921143, |
|
"logps/chosen": -1489.3653564453125, |
|
"logps/rejected": -2056.020751953125, |
|
"loss": 0.2493, |
|
"nll_loss": 5.174279689788818, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -15.556097030639648, |
|
"rewards/margins": 6.754159927368164, |
|
"rewards/rejected": -22.310256958007812, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.13136842105263158, |
|
"grad_norm": 493.99746962914696, |
|
"learning_rate": 1.9869070902656017e-07, |
|
"logits/chosen": -5.889953136444092, |
|
"logits/rejected": -8.0224027633667, |
|
"logps/chosen": -1382.2691650390625, |
|
"logps/rejected": -2080.0107421875, |
|
"loss": 0.485, |
|
"nll_loss": 5.714942932128906, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -14.33112907409668, |
|
"rewards/margins": 7.287192344665527, |
|
"rewards/rejected": -21.61832046508789, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.13473684210526315, |
|
"grad_norm": 137.24435752290512, |
|
"learning_rate": 1.984974738921927e-07, |
|
"logits/chosen": -6.03630256652832, |
|
"logits/rejected": -8.135429382324219, |
|
"logps/chosen": -1310.54833984375, |
|
"logps/rejected": -2067.27490234375, |
|
"loss": 0.2926, |
|
"nll_loss": 5.408031463623047, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -13.000334739685059, |
|
"rewards/margins": 9.575756072998047, |
|
"rewards/rejected": -22.57608985900879, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.13810526315789473, |
|
"grad_norm": 332.6447170969351, |
|
"learning_rate": 1.982910507157424e-07, |
|
"logits/chosen": -7.524165630340576, |
|
"logits/rejected": -7.871054649353027, |
|
"logps/chosen": -1923.2384033203125, |
|
"logps/rejected": -2104.744384765625, |
|
"loss": 0.4062, |
|
"nll_loss": 4.740773677825928, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -20.172786712646484, |
|
"rewards/margins": 2.7083492279052734, |
|
"rewards/rejected": -22.881135940551758, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.1414736842105263, |
|
"grad_norm": 434.7785715165825, |
|
"learning_rate": 1.9807146713565955e-07, |
|
"logits/chosen": -7.366245746612549, |
|
"logits/rejected": -7.99586296081543, |
|
"logps/chosen": -1834.388916015625, |
|
"logps/rejected": -2039.752685546875, |
|
"loss": 0.4183, |
|
"nll_loss": 5.197725296020508, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -19.303865432739258, |
|
"rewards/margins": 3.0796072483062744, |
|
"rewards/rejected": -22.38347053527832, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.14484210526315788, |
|
"grad_norm": 394.456469398082, |
|
"learning_rate": 1.9783875255246973e-07, |
|
"logits/chosen": -7.369488716125488, |
|
"logits/rejected": -7.899564743041992, |
|
"logps/chosen": -2014.7353515625, |
|
"logps/rejected": -2079.622802734375, |
|
"loss": 0.435, |
|
"nll_loss": 5.506885051727295, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -22.415340423583984, |
|
"rewards/margins": 1.3391568660736084, |
|
"rewards/rejected": -23.75449562072754, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.1482105263157895, |
|
"grad_norm": 474.52038286042045, |
|
"learning_rate": 1.9759293812483712e-07, |
|
"logits/chosen": -6.315229892730713, |
|
"logits/rejected": -7.918384075164795, |
|
"logps/chosen": -1505.7802734375, |
|
"logps/rejected": -2153.10986328125, |
|
"loss": 0.518, |
|
"nll_loss": 5.55679988861084, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -16.11681365966797, |
|
"rewards/margins": 8.099884033203125, |
|
"rewards/rejected": -24.216697692871094, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.15157894736842106, |
|
"grad_norm": 463.6020653534667, |
|
"learning_rate": 1.973340567653928e-07, |
|
"logits/chosen": -7.8903656005859375, |
|
"logits/rejected": -7.871662616729736, |
|
"logps/chosen": -2007.9490966796875, |
|
"logps/rejected": -2079.7353515625, |
|
"loss": 0.3285, |
|
"nll_loss": 4.673924922943115, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -21.700756072998047, |
|
"rewards/margins": 2.0794034004211426, |
|
"rewards/rejected": -23.78015899658203, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.15494736842105264, |
|
"grad_norm": 469.1854810466752, |
|
"learning_rate": 1.9706214313632782e-07, |
|
"logits/chosen": -7.816376686096191, |
|
"logits/rejected": -7.4226765632629395, |
|
"logps/chosen": -1935.4766845703125, |
|
"logps/rejected": -2112.067138671875, |
|
"loss": 0.4765, |
|
"nll_loss": 4.788717746734619, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -21.18560791015625, |
|
"rewards/margins": 3.7070724964141846, |
|
"rewards/rejected": -24.892681121826172, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.15831578947368422, |
|
"grad_norm": 203.68587948112653, |
|
"learning_rate": 1.9677723364475236e-07, |
|
"logits/chosen": -7.06157112121582, |
|
"logits/rejected": -8.203742027282715, |
|
"logps/chosen": -1748.9493408203125, |
|
"logps/rejected": -2103.68212890625, |
|
"loss": 0.2824, |
|
"nll_loss": 5.13286828994751, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -18.6251220703125, |
|
"rewards/margins": 5.7565789222717285, |
|
"rewards/rejected": -24.381698608398438, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.1616842105263158, |
|
"grad_norm": 454.0753654405841, |
|
"learning_rate": 1.9647936643782106e-07, |
|
"logits/chosen": -7.063906669616699, |
|
"logits/rejected": -7.930819511413574, |
|
"logps/chosen": -1851.276611328125, |
|
"logps/rejected": -2139.397216796875, |
|
"loss": 0.4842, |
|
"nll_loss": 4.915232181549072, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -20.60812759399414, |
|
"rewards/margins": 3.6515331268310547, |
|
"rewards/rejected": -24.259662628173828, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.16505263157894737, |
|
"grad_norm": 290.76223910181324, |
|
"learning_rate": 1.961685813976253e-07, |
|
"logits/chosen": -7.524170875549316, |
|
"logits/rejected": -8.006389617919922, |
|
"logps/chosen": -1945.13623046875, |
|
"logps/rejected": -2088.755126953125, |
|
"loss": 0.3862, |
|
"nll_loss": 4.894111633300781, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -22.57034683227539, |
|
"rewards/margins": 2.1726748943328857, |
|
"rewards/rejected": -24.74302101135254, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.16842105263157894, |
|
"grad_norm": 245.46799212168173, |
|
"learning_rate": 1.9584492013585354e-07, |
|
"logits/chosen": -7.0596513748168945, |
|
"logits/rejected": -7.93139123916626, |
|
"logps/chosen": -1557.81787109375, |
|
"logps/rejected": -2036.077880859375, |
|
"loss": 0.192, |
|
"nll_loss": 5.297558784484863, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -16.414541244506836, |
|
"rewards/margins": 8.017242431640625, |
|
"rewards/rejected": -24.431781768798828, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.17178947368421052, |
|
"grad_norm": 404.1873598654466, |
|
"learning_rate": 1.955084259882195e-07, |
|
"logits/chosen": -5.742030143737793, |
|
"logits/rejected": -7.751094818115234, |
|
"logps/chosen": -1235.095947265625, |
|
"logps/rejected": -2130.07080078125, |
|
"loss": 0.3401, |
|
"nll_loss": 5.310590744018555, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -13.479273796081543, |
|
"rewards/margins": 12.036299705505371, |
|
"rewards/rejected": -25.515575408935547, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.1751578947368421, |
|
"grad_norm": 116.3442830030767, |
|
"learning_rate": 1.9515914400866017e-07, |
|
"logits/chosen": -7.13689661026001, |
|
"logits/rejected": -7.9674553871154785, |
|
"logps/chosen": -1809.794677734375, |
|
"logps/rejected": -2110.8828125, |
|
"loss": 0.2132, |
|
"nll_loss": 5.315598487854004, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -21.165102005004883, |
|
"rewards/margins": 4.648441791534424, |
|
"rewards/rejected": -25.81354522705078, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.17852631578947367, |
|
"grad_norm": 290.204628775111, |
|
"learning_rate": 1.9479712096330334e-07, |
|
"logits/chosen": -7.378734111785889, |
|
"logits/rejected": -7.894670486450195, |
|
"logps/chosen": -1803.5947265625, |
|
"logps/rejected": -2066.1181640625, |
|
"loss": 0.3482, |
|
"nll_loss": 5.543490409851074, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -20.527360916137695, |
|
"rewards/margins": 5.067646026611328, |
|
"rewards/rejected": -25.595006942749023, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.18189473684210528, |
|
"grad_norm": 361.78273867060045, |
|
"learning_rate": 1.944224053242058e-07, |
|
"logits/chosen": -7.142321586608887, |
|
"logits/rejected": -7.959826946258545, |
|
"logps/chosen": -1579.7891845703125, |
|
"logps/rejected": -2068.02197265625, |
|
"loss": 0.2804, |
|
"nll_loss": 4.833759307861328, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -18.271129608154297, |
|
"rewards/margins": 7.227394104003906, |
|
"rewards/rejected": -25.498523712158203, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.18526315789473685, |
|
"grad_norm": 235.0435714261294, |
|
"learning_rate": 1.9403504726286367e-07, |
|
"logits/chosen": -5.593753337860107, |
|
"logits/rejected": -8.2781400680542, |
|
"logps/chosen": -1415.476806640625, |
|
"logps/rejected": -2148.1337890625, |
|
"loss": 0.259, |
|
"nll_loss": 4.7400031089782715, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -16.4371337890625, |
|
"rewards/margins": 9.744308471679688, |
|
"rewards/rejected": -26.18144416809082, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.18863157894736843, |
|
"grad_norm": 347.2912929912007, |
|
"learning_rate": 1.9363509864349436e-07, |
|
"logits/chosen": -6.396492004394531, |
|
"logits/rejected": -8.129210472106934, |
|
"logps/chosen": -1428.15185546875, |
|
"logps/rejected": -2139.29931640625, |
|
"loss": 0.1951, |
|
"nll_loss": 5.208731174468994, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -17.481124877929688, |
|
"rewards/margins": 9.886039733886719, |
|
"rewards/rejected": -27.367164611816406, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.192, |
|
"grad_norm": 443.2420860827636, |
|
"learning_rate": 1.9322261301609284e-07, |
|
"logits/chosen": -7.289253234863281, |
|
"logits/rejected": -8.456584930419922, |
|
"logps/chosen": -1952.02685546875, |
|
"logps/rejected": -2108.48876953125, |
|
"loss": 0.4061, |
|
"nll_loss": 5.445844650268555, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -24.95429229736328, |
|
"rewards/margins": 2.7193362712860107, |
|
"rewards/rejected": -27.673629760742188, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.19536842105263158, |
|
"grad_norm": 322.7074429727334, |
|
"learning_rate": 1.927976456092614e-07, |
|
"logits/chosen": -7.254305362701416, |
|
"logits/rejected": -8.170782089233398, |
|
"logps/chosen": -1910.8201904296875, |
|
"logps/rejected": -2137.612548828125, |
|
"loss": 0.3789, |
|
"nll_loss": 5.32393217086792, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -23.50674819946289, |
|
"rewards/margins": 3.4448273181915283, |
|
"rewards/rejected": -26.951576232910156, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.19873684210526316, |
|
"grad_norm": 230.28383471544416, |
|
"learning_rate": 1.9236025332281506e-07, |
|
"logits/chosen": -6.197360515594482, |
|
"logits/rejected": -8.328180313110352, |
|
"logps/chosen": -1564.211669921875, |
|
"logps/rejected": -2164.58056640625, |
|
"loss": 0.1884, |
|
"nll_loss": 5.403533458709717, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -18.896282196044922, |
|
"rewards/margins": 8.365097999572754, |
|
"rewards/rejected": -27.261381149291992, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.20210526315789473, |
|
"grad_norm": 375.89177298791736, |
|
"learning_rate": 1.9191049472016313e-07, |
|
"logits/chosen": -7.113523006439209, |
|
"logits/rejected": -7.964775562286377, |
|
"logps/chosen": -1672.96923828125, |
|
"logps/rejected": -2136.83642578125, |
|
"loss": 0.2439, |
|
"nll_loss": 4.739628791809082, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -20.55191993713379, |
|
"rewards/margins": 5.649931907653809, |
|
"rewards/rejected": -26.20184898376465, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.2054736842105263, |
|
"grad_norm": 323.0978953815701, |
|
"learning_rate": 1.9144843002046805e-07, |
|
"logits/chosen": -6.809453964233398, |
|
"logits/rejected": -8.00958251953125, |
|
"logps/chosen": -1658.2227783203125, |
|
"logps/rejected": -2084.864990234375, |
|
"loss": 0.5958, |
|
"nll_loss": 5.457843780517578, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -20.677547454833984, |
|
"rewards/margins": 6.124557018280029, |
|
"rewards/rejected": -26.802101135253906, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.20884210526315788, |
|
"grad_norm": 258.6436979707799, |
|
"learning_rate": 1.9097412109058243e-07, |
|
"logits/chosen": -7.692294597625732, |
|
"logits/rejected": -8.052206993103027, |
|
"logps/chosen": -1873.5733642578125, |
|
"logps/rejected": -2030.232177734375, |
|
"loss": 0.4711, |
|
"nll_loss": 5.26876163482666, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -22.60447120666504, |
|
"rewards/margins": 3.8998751640319824, |
|
"rewards/rejected": -26.50434684753418, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.21221052631578946, |
|
"grad_norm": 372.3923082583742, |
|
"learning_rate": 1.9048763143676575e-07, |
|
"logits/chosen": -7.407994747161865, |
|
"logits/rejected": -7.782034397125244, |
|
"logps/chosen": -1862.9501953125, |
|
"logps/rejected": -2138.9091796875, |
|
"loss": 0.1584, |
|
"nll_loss": 5.26196813583374, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -21.54726791381836, |
|
"rewards/margins": 5.700539588928223, |
|
"rewards/rejected": -27.247806549072266, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.21557894736842106, |
|
"grad_norm": 176.97835981766247, |
|
"learning_rate": 1.8998902619618113e-07, |
|
"logits/chosen": -6.254648208618164, |
|
"logits/rejected": -8.410988807678223, |
|
"logps/chosen": -1320.6219482421875, |
|
"logps/rejected": -2096.9287109375, |
|
"loss": 0.2115, |
|
"nll_loss": 5.321068286895752, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -15.538348197937012, |
|
"rewards/margins": 11.237432479858398, |
|
"rewards/rejected": -26.775779724121094, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.21894736842105264, |
|
"grad_norm": 547.3377613503384, |
|
"learning_rate": 1.8947837212817413e-07, |
|
"logits/chosen": -6.936631202697754, |
|
"logits/rejected": -8.201074600219727, |
|
"logps/chosen": -1785.872314453125, |
|
"logps/rejected": -2129.34375, |
|
"loss": 0.3615, |
|
"nll_loss": 5.191522598266602, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -21.109575271606445, |
|
"rewards/margins": 5.265467643737793, |
|
"rewards/rejected": -26.37504005432129, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.22231578947368422, |
|
"grad_norm": 353.0941136935024, |
|
"learning_rate": 1.8895573760533412e-07, |
|
"logits/chosen": -7.306629180908203, |
|
"logits/rejected": -7.669928073883057, |
|
"logps/chosen": -1730.935546875, |
|
"logps/rejected": -2167.73095703125, |
|
"loss": 0.293, |
|
"nll_loss": 4.846038818359375, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -20.911489486694336, |
|
"rewards/margins": 6.074740409851074, |
|
"rewards/rejected": -26.986228942871094, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.2256842105263158, |
|
"grad_norm": 212.13304713669675, |
|
"learning_rate": 1.884211926043398e-07, |
|
"logits/chosen": -6.941915035247803, |
|
"logits/rejected": -8.13010311126709, |
|
"logps/chosen": -1714.9908447265625, |
|
"logps/rejected": -2106.353515625, |
|
"loss": 0.1782, |
|
"nll_loss": 5.203182220458984, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -20.565366744995117, |
|
"rewards/margins": 5.9251708984375, |
|
"rewards/rejected": -26.490537643432617, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.22905263157894737, |
|
"grad_norm": 279.1889760986275, |
|
"learning_rate": 1.8787480869658978e-07, |
|
"logits/chosen": -7.706783294677734, |
|
"logits/rejected": -7.740918159484863, |
|
"logps/chosen": -2039.910888671875, |
|
"logps/rejected": -2072.723876953125, |
|
"loss": 0.2864, |
|
"nll_loss": 4.911942958831787, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -23.88863754272461, |
|
"rewards/margins": 1.29465651512146, |
|
"rewards/rejected": -25.183292388916016, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.23242105263157894, |
|
"grad_norm": 550.1062681428017, |
|
"learning_rate": 1.8731665903861985e-07, |
|
"logits/chosen": -6.753871917724609, |
|
"logits/rejected": -8.267376899719238, |
|
"logps/chosen": -1722.739990234375, |
|
"logps/rejected": -2134.793212890625, |
|
"loss": 0.2628, |
|
"nll_loss": 5.156876087188721, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -19.4663143157959, |
|
"rewards/margins": 6.938652038574219, |
|
"rewards/rejected": -26.404966354370117, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.23578947368421052, |
|
"grad_norm": 387.2010540404042, |
|
"learning_rate": 1.8674681836230768e-07, |
|
"logits/chosen": -7.421821117401123, |
|
"logits/rejected": -8.06303882598877, |
|
"logps/chosen": -1804.21875, |
|
"logps/rejected": -2085.41943359375, |
|
"loss": 0.4242, |
|
"nll_loss": 4.941443920135498, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -21.495800018310547, |
|
"rewards/margins": 4.65245246887207, |
|
"rewards/rejected": -26.148252487182617, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.2391578947368421, |
|
"grad_norm": 319.34935073129554, |
|
"learning_rate": 1.8616536296486708e-07, |
|
"logits/chosen": -6.959980487823486, |
|
"logits/rejected": -8.008804321289062, |
|
"logps/chosen": -1645.2440185546875, |
|
"logps/rejected": -2129.713623046875, |
|
"loss": 0.2486, |
|
"nll_loss": 5.249794960021973, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -19.205081939697266, |
|
"rewards/margins": 6.898474216461182, |
|
"rewards/rejected": -26.103557586669922, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.24252631578947367, |
|
"grad_norm": 179.51700063120762, |
|
"learning_rate": 1.855723706986322e-07, |
|
"logits/chosen": -6.863515377044678, |
|
"logits/rejected": -8.17676067352295, |
|
"logps/chosen": -1595.7620849609375, |
|
"logps/rejected": -2139.49951171875, |
|
"loss": 0.2893, |
|
"nll_loss": 5.239679336547852, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -19.65725326538086, |
|
"rewards/margins": 7.443434238433838, |
|
"rewards/rejected": -27.10068702697754, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.24589473684210525, |
|
"grad_norm": 275.3682193899652, |
|
"learning_rate": 1.8496792096063377e-07, |
|
"logits/chosen": -6.194632530212402, |
|
"logits/rejected": -8.473308563232422, |
|
"logps/chosen": -1469.7490234375, |
|
"logps/rejected": -2113.530029296875, |
|
"loss": 0.2238, |
|
"nll_loss": 5.825500965118408, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -18.392675399780273, |
|
"rewards/margins": 9.299560546875, |
|
"rewards/rejected": -27.692235946655273, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.24926315789473685, |
|
"grad_norm": 323.66578296523716, |
|
"learning_rate": 1.8435209468196847e-07, |
|
"logits/chosen": -7.083681106567383, |
|
"logits/rejected": -8.149633407592773, |
|
"logps/chosen": -1779.7303466796875, |
|
"logps/rejected": -2133.3251953125, |
|
"loss": 0.5144, |
|
"nll_loss": 4.9437971115112305, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -22.35186767578125, |
|
"rewards/margins": 6.203129291534424, |
|
"rewards/rejected": -28.55499839782715, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.25263157894736843, |
|
"grad_norm": 461.451311082762, |
|
"learning_rate": 1.8372497431696285e-07, |
|
"logits/chosen": -7.620912551879883, |
|
"logits/rejected": -8.02045726776123, |
|
"logps/chosen": -1918.67724609375, |
|
"logps/rejected": -2131.48193359375, |
|
"loss": 0.4635, |
|
"nll_loss": 4.790881633758545, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -24.607433319091797, |
|
"rewards/margins": 3.646448850631714, |
|
"rewards/rejected": -28.25388526916504, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.256, |
|
"grad_norm": 405.566848682187, |
|
"learning_rate": 1.830866438321334e-07, |
|
"logits/chosen": -7.111250877380371, |
|
"logits/rejected": -8.096766471862793, |
|
"logps/chosen": -1748.1270751953125, |
|
"logps/rejected": -2097.0595703125, |
|
"loss": 0.4518, |
|
"nll_loss": 5.408980846405029, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -23.680131912231445, |
|
"rewards/margins": 5.77828311920166, |
|
"rewards/rejected": -29.458415985107422, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.2593684210526316, |
|
"grad_norm": 298.617818561619, |
|
"learning_rate": 1.8243718869494405e-07, |
|
"logits/chosen": -7.182644844055176, |
|
"logits/rejected": -8.200958251953125, |
|
"logps/chosen": -1616.265625, |
|
"logps/rejected": -2086.645751953125, |
|
"loss": 0.2766, |
|
"nll_loss": 5.574889183044434, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -21.693281173706055, |
|
"rewards/margins": 7.690347671508789, |
|
"rewards/rejected": -29.38362693786621, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.26273684210526316, |
|
"grad_norm": 297.68208713323685, |
|
"learning_rate": 1.8177669586236274e-07, |
|
"logits/chosen": -7.177743434906006, |
|
"logits/rejected": -8.266185760498047, |
|
"logps/chosen": -1737.14892578125, |
|
"logps/rejected": -2121.933837890625, |
|
"loss": 0.1476, |
|
"nll_loss": 5.227602481842041, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -23.120718002319336, |
|
"rewards/margins": 7.246493816375732, |
|
"rewards/rejected": -30.367212295532227, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.26610526315789473, |
|
"grad_norm": 321.45477422959516, |
|
"learning_rate": 1.811052537692186e-07, |
|
"logits/chosen": -7.373076915740967, |
|
"logits/rejected": -8.062271118164062, |
|
"logps/chosen": -1772.658447265625, |
|
"logps/rejected": -2124.108642578125, |
|
"loss": 0.1706, |
|
"nll_loss": 5.48671817779541, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -22.967531204223633, |
|
"rewards/margins": 7.354568004608154, |
|
"rewards/rejected": -30.322099685668945, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.2694736842105263, |
|
"grad_norm": 265.3611042051943, |
|
"learning_rate": 1.8042295231636113e-07, |
|
"logits/chosen": -6.719966888427734, |
|
"logits/rejected": -8.064167976379395, |
|
"logps/chosen": -1674.589111328125, |
|
"logps/rejected": -2176.885009765625, |
|
"loss": 0.3049, |
|
"nll_loss": 5.396821022033691, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -22.00328254699707, |
|
"rewards/margins": 8.764394760131836, |
|
"rewards/rejected": -30.767677307128906, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.2728421052631579, |
|
"grad_norm": 442.39075275710724, |
|
"learning_rate": 1.7972988285862333e-07, |
|
"logits/chosen": -7.613976001739502, |
|
"logits/rejected": -8.213547706604004, |
|
"logps/chosen": -2117.5400390625, |
|
"logps/rejected": -2208.39599609375, |
|
"loss": 0.2877, |
|
"nll_loss": 5.046686172485352, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -28.775413513183594, |
|
"rewards/margins": 1.5463881492614746, |
|
"rewards/rejected": -30.32179832458496, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.27621052631578946, |
|
"grad_norm": 232.11714950375813, |
|
"learning_rate": 1.7902613819258983e-07, |
|
"logits/chosen": -6.078405857086182, |
|
"logits/rejected": -8.073712348937988, |
|
"logps/chosen": -1325.989990234375, |
|
"logps/rejected": -2109.148681640625, |
|
"loss": 0.1362, |
|
"nll_loss": 5.590985298156738, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -17.27791976928711, |
|
"rewards/margins": 13.148313522338867, |
|
"rewards/rejected": -30.426233291625977, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.27957894736842104, |
|
"grad_norm": 178.08792377292178, |
|
"learning_rate": 1.7831181254417226e-07, |
|
"logits/chosen": -7.393200397491455, |
|
"logits/rejected": -7.933506965637207, |
|
"logps/chosen": -1770.2550048828125, |
|
"logps/rejected": -2183.81884765625, |
|
"loss": 0.1839, |
|
"nll_loss": 4.8053669929504395, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -22.983407974243164, |
|
"rewards/margins": 7.7809247970581055, |
|
"rewards/rejected": -30.764333724975586, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.2829473684210526, |
|
"grad_norm": 422.36016546899873, |
|
"learning_rate": 1.7758700155599316e-07, |
|
"logits/chosen": -6.5516037940979, |
|
"logits/rejected": -7.964224815368652, |
|
"logps/chosen": -1520.2445068359375, |
|
"logps/rejected": -2135.012451171875, |
|
"loss": 0.1957, |
|
"nll_loss": 5.540028095245361, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -20.019882202148438, |
|
"rewards/margins": 11.298989295959473, |
|
"rewards/rejected": -31.318870544433594, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.2863157894736842, |
|
"grad_norm": 496.76355668466664, |
|
"learning_rate": 1.7685180227458002e-07, |
|
"logits/chosen": -7.281660556793213, |
|
"logits/rejected": -8.243330955505371, |
|
"logps/chosen": -1699.278564453125, |
|
"logps/rejected": -2149.191650390625, |
|
"loss": 0.4263, |
|
"nll_loss": 5.4164533615112305, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -24.19361686706543, |
|
"rewards/margins": 6.679977893829346, |
|
"rewards/rejected": -30.873594284057617, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.28968421052631577, |
|
"grad_norm": 368.2965007979305, |
|
"learning_rate": 1.7610631313737172e-07, |
|
"logits/chosen": -7.080427646636963, |
|
"logits/rejected": -8.119704246520996, |
|
"logps/chosen": -1611.391357421875, |
|
"logps/rejected": -2172.8203125, |
|
"loss": 0.3694, |
|
"nll_loss": 5.258100509643555, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -22.043684005737305, |
|
"rewards/margins": 9.61347770690918, |
|
"rewards/rejected": -31.657163619995117, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.29305263157894734, |
|
"grad_norm": 670.6700241176729, |
|
"learning_rate": 1.753506339595384e-07, |
|
"logits/chosen": -7.517750263214111, |
|
"logits/rejected": -7.757349967956543, |
|
"logps/chosen": -1787.9560546875, |
|
"logps/rejected": -2129.67822265625, |
|
"loss": 0.298, |
|
"nll_loss": 5.433893203735352, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -25.235340118408203, |
|
"rewards/margins": 6.193837642669678, |
|
"rewards/rejected": -31.42917823791504, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.296421052631579, |
|
"grad_norm": 410.62113772917246, |
|
"learning_rate": 1.7458486592061701e-07, |
|
"logits/chosen": -6.625953674316406, |
|
"logits/rejected": -8.243332862854004, |
|
"logps/chosen": -1637.386474609375, |
|
"logps/rejected": -2249.2197265625, |
|
"loss": 0.3263, |
|
"nll_loss": 5.135756969451904, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -22.93773651123047, |
|
"rewards/margins": 10.093299865722656, |
|
"rewards/rejected": -33.03103256225586, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.29978947368421055, |
|
"grad_norm": 518.14733064062, |
|
"learning_rate": 1.7380911155096407e-07, |
|
"logits/chosen": -6.312992572784424, |
|
"logits/rejected": -7.9758687019348145, |
|
"logps/chosen": -1488.8402099609375, |
|
"logps/rejected": -2148.6962890625, |
|
"loss": 0.591, |
|
"nll_loss": 5.555506706237793, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -20.87957763671875, |
|
"rewards/margins": 10.946573257446289, |
|
"rewards/rejected": -31.82615089416504, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.3031578947368421, |
|
"grad_norm": 230.12282725753218, |
|
"learning_rate": 1.7302347471802795e-07, |
|
"logits/chosen": -6.5913238525390625, |
|
"logits/rejected": -8.363908767700195, |
|
"logps/chosen": -1680.2467041015625, |
|
"logps/rejected": -2223.9873046875, |
|
"loss": 0.3578, |
|
"nll_loss": 5.693438529968262, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -23.647768020629883, |
|
"rewards/margins": 8.622623443603516, |
|
"rewards/rejected": -32.27039337158203, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.3065263157894737, |
|
"grad_norm": 290.90448926580694, |
|
"learning_rate": 1.7222806061244146e-07, |
|
"logits/chosen": -6.316551208496094, |
|
"logits/rejected": -8.457919120788574, |
|
"logps/chosen": -1597.56103515625, |
|
"logps/rejected": -2268.66259765625, |
|
"loss": 0.1266, |
|
"nll_loss": 5.54366397857666, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -22.09076499938965, |
|
"rewards/margins": 9.565703392028809, |
|
"rewards/rejected": -31.656469345092773, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.3098947368421053, |
|
"grad_norm": 219.15996405308613, |
|
"learning_rate": 1.7142297573393788e-07, |
|
"logits/chosen": -7.972108840942383, |
|
"logits/rejected": -8.042094230651855, |
|
"logps/chosen": -1998.596923828125, |
|
"logps/rejected": -2172.12255859375, |
|
"loss": 0.3072, |
|
"nll_loss": 5.00222110748291, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -28.76752281188965, |
|
"rewards/margins": 3.3450047969818115, |
|
"rewards/rejected": -32.112525939941406, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.31326315789473685, |
|
"grad_norm": 322.85198814627114, |
|
"learning_rate": 1.7060832787709138e-07, |
|
"logits/chosen": -6.03144645690918, |
|
"logits/rejected": -8.47148323059082, |
|
"logps/chosen": -1274.5035400390625, |
|
"logps/rejected": -2136.2109375, |
|
"loss": 0.3631, |
|
"nll_loss": 5.840561866760254, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -17.418729782104492, |
|
"rewards/margins": 15.012275695800781, |
|
"rewards/rejected": -32.43100357055664, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.31663157894736843, |
|
"grad_norm": 513.7476885592771, |
|
"learning_rate": 1.697842261168843e-07, |
|
"logits/chosen": -7.334516525268555, |
|
"logits/rejected": -8.225534439086914, |
|
"logps/chosen": -1927.53515625, |
|
"logps/rejected": -2148.278564453125, |
|
"loss": 0.3361, |
|
"nll_loss": 5.305893898010254, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -26.597455978393555, |
|
"rewards/margins": 5.804078102111816, |
|
"rewards/rejected": -32.40153503417969, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 163.682762913509, |
|
"learning_rate": 1.6895078079410266e-07, |
|
"logits/chosen": -5.538216590881348, |
|
"logits/rejected": -8.494758605957031, |
|
"logps/chosen": -1233.1563720703125, |
|
"logps/rejected": -2203.232177734375, |
|
"loss": 0.179, |
|
"nll_loss": 5.9964189529418945, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -17.165884017944336, |
|
"rewards/margins": 15.676169395446777, |
|
"rewards/rejected": -32.84205627441406, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.3233684210526316, |
|
"grad_norm": 308.2898266039604, |
|
"learning_rate": 1.6810810350056258e-07, |
|
"logits/chosen": -6.971872329711914, |
|
"logits/rejected": -8.188929557800293, |
|
"logps/chosen": -1851.5859375, |
|
"logps/rejected": -2194.031982421875, |
|
"loss": 0.4115, |
|
"nll_loss": 5.3788743019104, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -26.422870635986328, |
|
"rewards/margins": 6.5016984939575195, |
|
"rewards/rejected": -32.92456817626953, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.32673684210526316, |
|
"grad_norm": 412.5565491541735, |
|
"learning_rate": 1.672563070641688e-07, |
|
"logits/chosen": -7.640994548797607, |
|
"logits/rejected": -8.178326606750488, |
|
"logps/chosen": -2078.18115234375, |
|
"logps/rejected": -2178.9091796875, |
|
"loss": 0.2875, |
|
"nll_loss": 5.410080432891846, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -30.432947158813477, |
|
"rewards/margins": 2.4267215728759766, |
|
"rewards/rejected": -32.85966873168945, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.33010526315789473, |
|
"grad_norm": 469.2498668087184, |
|
"learning_rate": 1.6639550553380816e-07, |
|
"logits/chosen": -7.088956356048584, |
|
"logits/rejected": -8.258764266967773, |
|
"logps/chosen": -1782.86376953125, |
|
"logps/rejected": -2211.70703125, |
|
"loss": 0.369, |
|
"nll_loss": 5.0121073722839355, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -26.950332641601562, |
|
"rewards/margins": 7.496822357177734, |
|
"rewards/rejected": -34.44715118408203, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.3334736842105263, |
|
"grad_norm": 236.59088336990078, |
|
"learning_rate": 1.6552581416407916e-07, |
|
"logits/chosen": -8.196027755737305, |
|
"logits/rejected": -7.7416181564331055, |
|
"logps/chosen": -2154.635498046875, |
|
"logps/rejected": -2205.575927734375, |
|
"loss": 0.3022, |
|
"nll_loss": 4.687681198120117, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -32.5702018737793, |
|
"rewards/margins": 2.931349515914917, |
|
"rewards/rejected": -35.501556396484375, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.3368421052631579, |
|
"grad_norm": 413.82963735092727, |
|
"learning_rate": 1.6464734939986035e-07, |
|
"logits/chosen": -7.169932842254639, |
|
"logits/rejected": -8.24282455444336, |
|
"logps/chosen": -1997.083984375, |
|
"logps/rejected": -2223.7470703125, |
|
"loss": 0.3572, |
|
"nll_loss": 5.281017303466797, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -30.46786117553711, |
|
"rewards/margins": 5.10554313659668, |
|
"rewards/rejected": -35.573402404785156, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.34021052631578946, |
|
"grad_norm": 415.7728496426865, |
|
"learning_rate": 1.637602288607192e-07, |
|
"logits/chosen": -6.219871520996094, |
|
"logits/rejected": -8.299065589904785, |
|
"logps/chosen": -1463.1075439453125, |
|
"logps/rejected": -2223.69775390625, |
|
"loss": 0.2563, |
|
"nll_loss": 5.272424697875977, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -20.643104553222656, |
|
"rewards/margins": 13.95057487487793, |
|
"rewards/rejected": -34.59368133544922, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.34357894736842104, |
|
"grad_norm": 422.85323621204554, |
|
"learning_rate": 1.6286457132516383e-07, |
|
"logits/chosen": -6.83880615234375, |
|
"logits/rejected": -8.385416030883789, |
|
"logps/chosen": -1577.858154296875, |
|
"logps/rejected": -2194.265869140625, |
|
"loss": 0.3599, |
|
"nll_loss": 5.622811317443848, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -22.948638916015625, |
|
"rewards/margins": 11.672065734863281, |
|
"rewards/rejected": -34.620704650878906, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.3469473684210526, |
|
"grad_norm": 490.11145097314346, |
|
"learning_rate": 1.6196049671473953e-07, |
|
"logits/chosen": -7.446739196777344, |
|
"logits/rejected": -8.425362586975098, |
|
"logps/chosen": -1768.897705078125, |
|
"logps/rejected": -2202.29443359375, |
|
"loss": 0.2982, |
|
"nll_loss": 5.6991496086120605, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -28.04994773864746, |
|
"rewards/margins": 6.368475914001465, |
|
"rewards/rejected": -34.41842269897461, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.3503157894736842, |
|
"grad_norm": 530.9413220874789, |
|
"learning_rate": 1.61048126077972e-07, |
|
"logits/chosen": -7.343864440917969, |
|
"logits/rejected": -8.190047264099121, |
|
"logps/chosen": -1754.0421142578125, |
|
"logps/rejected": -2148.49072265625, |
|
"loss": 0.2674, |
|
"nll_loss": 5.666429042816162, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -25.846054077148438, |
|
"rewards/margins": 8.071301460266113, |
|
"rewards/rejected": -33.917354583740234, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.35368421052631577, |
|
"grad_norm": 166.06683989436303, |
|
"learning_rate": 1.6012758157416018e-07, |
|
"logits/chosen": -7.329439640045166, |
|
"logits/rejected": -7.938960075378418, |
|
"logps/chosen": -1708.5814208984375, |
|
"logps/rejected": -2173.199462890625, |
|
"loss": 0.1976, |
|
"nll_loss": 5.0503830909729, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -24.269054412841797, |
|
"rewards/margins": 9.374040603637695, |
|
"rewards/rejected": -33.643096923828125, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.35705263157894734, |
|
"grad_norm": 525.5297922097282, |
|
"learning_rate": 1.5919898645701988e-07, |
|
"logits/chosen": -6.739994049072266, |
|
"logits/rejected": -8.293057441711426, |
|
"logps/chosen": -1717.248046875, |
|
"logps/rejected": -2190.51171875, |
|
"loss": 0.4755, |
|
"nll_loss": 4.879263877868652, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -24.138900756835938, |
|
"rewards/margins": 8.57326602935791, |
|
"rewards/rejected": -32.71216583251953, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.3604210526315789, |
|
"grad_norm": 548.5346673183018, |
|
"learning_rate": 1.5826246505818112e-07, |
|
"logits/chosen": -6.99255895614624, |
|
"logits/rejected": -8.398011207580566, |
|
"logps/chosen": -1611.18701171875, |
|
"logps/rejected": -2188.08642578125, |
|
"loss": 0.432, |
|
"nll_loss": 5.333250999450684, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -23.24384880065918, |
|
"rewards/margins": 10.111581802368164, |
|
"rewards/rejected": -33.355430603027344, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.36378947368421055, |
|
"grad_norm": 185.93773109229338, |
|
"learning_rate": 1.573181427705411e-07, |
|
"logits/chosen": -7.210558891296387, |
|
"logits/rejected": -8.003637313842773, |
|
"logps/chosen": -1898.2894287109375, |
|
"logps/rejected": -2259.249755859375, |
|
"loss": 0.1156, |
|
"nll_loss": 5.2748212814331055, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -27.62005043029785, |
|
"rewards/margins": 6.235610485076904, |
|
"rewards/rejected": -33.85565948486328, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.3671578947368421, |
|
"grad_norm": 298.31090901524203, |
|
"learning_rate": 1.5636614603147512e-07, |
|
"logits/chosen": -7.3358025550842285, |
|
"logits/rejected": -8.385098457336426, |
|
"logps/chosen": -1998.4923095703125, |
|
"logps/rejected": -2187.198486328125, |
|
"loss": 0.2525, |
|
"nll_loss": 5.315785884857178, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -29.09065818786621, |
|
"rewards/margins": 3.6531078815460205, |
|
"rewards/rejected": -32.74376678466797, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.3705263157894737, |
|
"grad_norm": 377.3450841073725, |
|
"learning_rate": 1.5540660230590748e-07, |
|
"logits/chosen": -6.510415077209473, |
|
"logits/rejected": -8.296943664550781, |
|
"logps/chosen": -1665.1639404296875, |
|
"logps/rejected": -2214.30078125, |
|
"loss": 0.438, |
|
"nll_loss": 5.418496131896973, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -23.495925903320312, |
|
"rewards/margins": 10.108709335327148, |
|
"rewards/rejected": -33.604637145996094, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.3738947368421053, |
|
"grad_norm": 443.30578117705613, |
|
"learning_rate": 1.5443964006924507e-07, |
|
"logits/chosen": -6.681422710418701, |
|
"logits/rejected": -8.464709281921387, |
|
"logps/chosen": -1588.643310546875, |
|
"logps/rejected": -2177.72705078125, |
|
"loss": 0.2768, |
|
"nll_loss": 6.007861137390137, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -23.003562927246094, |
|
"rewards/margins": 10.166757583618164, |
|
"rewards/rejected": -33.17032241821289, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.37726315789473686, |
|
"grad_norm": 163.54164941613496, |
|
"learning_rate": 1.534653887901754e-07, |
|
"logits/chosen": -5.698173999786377, |
|
"logits/rejected": -8.45270824432373, |
|
"logps/chosen": -1357.1298828125, |
|
"logps/rejected": -2201.384765625, |
|
"loss": 0.1406, |
|
"nll_loss": 5.912946701049805, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -17.897022247314453, |
|
"rewards/margins": 14.996172904968262, |
|
"rewards/rejected": -32.8931999206543, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.38063157894736843, |
|
"grad_norm": 319.17016425411697, |
|
"learning_rate": 1.5248397891333183e-07, |
|
"logits/chosen": -6.813119888305664, |
|
"logits/rejected": -8.12168025970459, |
|
"logps/chosen": -1691.3763427734375, |
|
"logps/rejected": -2194.62646484375, |
|
"loss": 0.2993, |
|
"nll_loss": 5.037168025970459, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -24.049442291259766, |
|
"rewards/margins": 8.878725051879883, |
|
"rewards/rejected": -32.928165435791016, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.384, |
|
"grad_norm": 284.69578061502773, |
|
"learning_rate": 1.51495541841828e-07, |
|
"logits/chosen": -6.420520305633545, |
|
"logits/rejected": -8.212552070617676, |
|
"logps/chosen": -1653.12890625, |
|
"logps/rejected": -2208.427734375, |
|
"loss": 0.2744, |
|
"nll_loss": 5.3444905281066895, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -23.33785629272461, |
|
"rewards/margins": 9.556280136108398, |
|
"rewards/rejected": -32.894134521484375, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.3873684210526316, |
|
"grad_norm": 284.69191132696415, |
|
"learning_rate": 1.5050020991966403e-07, |
|
"logits/chosen": -6.635988712310791, |
|
"logits/rejected": -7.83236837387085, |
|
"logps/chosen": -1533.177978515625, |
|
"logps/rejected": -2170.71044921875, |
|
"loss": 0.1905, |
|
"nll_loss": 5.389209270477295, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -21.042217254638672, |
|
"rewards/margins": 11.956886291503906, |
|
"rewards/rejected": -32.99910354614258, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.39073684210526316, |
|
"grad_norm": 733.6966760862421, |
|
"learning_rate": 1.4949811641400668e-07, |
|
"logits/chosen": -7.570094108581543, |
|
"logits/rejected": -7.502762794494629, |
|
"logps/chosen": -1841.1671142578125, |
|
"logps/rejected": -2136.026611328125, |
|
"loss": 0.5221, |
|
"nll_loss": 4.983242988586426, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -25.91825294494629, |
|
"rewards/margins": 6.6433634757995605, |
|
"rewards/rejected": -32.561614990234375, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.39410526315789474, |
|
"grad_norm": 188.5369451526676, |
|
"learning_rate": 1.484893954973458e-07, |
|
"logits/chosen": -6.149730205535889, |
|
"logits/rejected": -8.597711563110352, |
|
"logps/chosen": -1442.3626708984375, |
|
"logps/rejected": -2134.160888671875, |
|
"loss": 0.129, |
|
"nll_loss": 5.606182098388672, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -18.86325454711914, |
|
"rewards/margins": 13.016372680664062, |
|
"rewards/rejected": -31.879629135131836, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.3974736842105263, |
|
"grad_norm": 305.89732243842866, |
|
"learning_rate": 1.4747418222952995e-07, |
|
"logits/chosen": -6.757414817810059, |
|
"logits/rejected": -8.115114212036133, |
|
"logps/chosen": -1697.4263916015625, |
|
"logps/rejected": -2184.919921875, |
|
"loss": 0.2413, |
|
"nll_loss": 5.372623920440674, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -22.677663803100586, |
|
"rewards/margins": 8.713125228881836, |
|
"rewards/rejected": -31.39078712463379, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.4008421052631579, |
|
"grad_norm": 341.4679249212142, |
|
"learning_rate": 1.4645261253968259e-07, |
|
"logits/chosen": -6.988116264343262, |
|
"logits/rejected": -8.155961036682129, |
|
"logps/chosen": -1654.0662841796875, |
|
"logps/rejected": -2215.248291015625, |
|
"loss": 0.2575, |
|
"nll_loss": 5.236739158630371, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -21.976306915283203, |
|
"rewards/margins": 10.24085807800293, |
|
"rewards/rejected": -32.2171630859375, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.40421052631578946, |
|
"grad_norm": 132.8280211518827, |
|
"learning_rate": 1.454248232080026e-07, |
|
"logits/chosen": -7.433531761169434, |
|
"logits/rejected": -8.119049072265625, |
|
"logps/chosen": -1924.980712890625, |
|
"logps/rejected": -2134.167724609375, |
|
"loss": 0.2044, |
|
"nll_loss": 5.498749256134033, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -26.4937801361084, |
|
"rewards/margins": 5.2426652908325195, |
|
"rewards/rejected": -31.736446380615234, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.40757894736842104, |
|
"grad_norm": 340.2818569766834, |
|
"learning_rate": 1.4439095184745024e-07, |
|
"logits/chosen": -6.053187370300293, |
|
"logits/rejected": -8.49085807800293, |
|
"logps/chosen": -1365.32568359375, |
|
"logps/rejected": -2177.580078125, |
|
"loss": 0.271, |
|
"nll_loss": 5.570743083953857, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -18.466915130615234, |
|
"rewards/margins": 12.624629974365234, |
|
"rewards/rejected": -31.09154510498047, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.4109473684210526, |
|
"grad_norm": 214.41734275706338, |
|
"learning_rate": 1.4335113688532182e-07, |
|
"logits/chosen": -6.56601619720459, |
|
"logits/rejected": -8.04720401763916, |
|
"logps/chosen": -1554.8018798828125, |
|
"logps/rejected": -2144.083251953125, |
|
"loss": 0.138, |
|
"nll_loss": 5.4142374992370605, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -20.857574462890625, |
|
"rewards/margins": 11.797863006591797, |
|
"rewards/rejected": -32.655433654785156, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.4143157894736842, |
|
"grad_norm": 337.35948463767846, |
|
"learning_rate": 1.423055175447155e-07, |
|
"logits/chosen": -6.994697570800781, |
|
"logits/rejected": -7.979827880859375, |
|
"logps/chosen": -1652.507568359375, |
|
"logps/rejected": -2181.640625, |
|
"loss": 0.329, |
|
"nll_loss": 5.518115043640137, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -23.70856475830078, |
|
"rewards/margins": 8.838756561279297, |
|
"rewards/rejected": -32.54732131958008, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.41768421052631577, |
|
"grad_norm": 274.454131527474, |
|
"learning_rate": 1.4125423382589048e-07, |
|
"logits/chosen": -6.993724346160889, |
|
"logits/rejected": -8.257303237915039, |
|
"logps/chosen": -1779.2265625, |
|
"logps/rejected": -2149.052490234375, |
|
"loss": 0.2965, |
|
"nll_loss": 5.484703063964844, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -24.876686096191406, |
|
"rewards/margins": 7.389163970947266, |
|
"rewards/rejected": -32.26585006713867, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.42105263157894735, |
|
"grad_norm": 352.35818290838654, |
|
"learning_rate": 1.401974264875218e-07, |
|
"logits/chosen": -7.193760871887207, |
|
"logits/rejected": -8.0586519241333, |
|
"logps/chosen": -1831.6397705078125, |
|
"logps/rejected": -2227.03857421875, |
|
"loss": 0.2662, |
|
"nll_loss": 5.117783546447754, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -25.96319007873535, |
|
"rewards/margins": 6.454355239868164, |
|
"rewards/rejected": -32.41754150390625, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.4244210526315789, |
|
"grad_norm": 255.0171256504159, |
|
"learning_rate": 1.391352370278541e-07, |
|
"logits/chosen": -6.700958728790283, |
|
"logits/rejected": -8.45335865020752, |
|
"logps/chosen": -1634.654541015625, |
|
"logps/rejected": -2221.049072265625, |
|
"loss": 0.2647, |
|
"nll_loss": 6.0273308753967285, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -23.497007369995117, |
|
"rewards/margins": 10.51921558380127, |
|
"rewards/rejected": -34.0162239074707, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.42778947368421055, |
|
"grad_norm": 481.3772072138439, |
|
"learning_rate": 1.3806780766575588e-07, |
|
"logits/chosen": -7.795599460601807, |
|
"logits/rejected": -8.443253517150879, |
|
"logps/chosen": -1971.610107421875, |
|
"logps/rejected": -2192.519287109375, |
|
"loss": 0.203, |
|
"nll_loss": 5.141743183135986, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -28.521339416503906, |
|
"rewards/margins": 5.977480888366699, |
|
"rewards/rejected": -34.49882125854492, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.43115789473684213, |
|
"grad_norm": 451.03866242263535, |
|
"learning_rate": 1.3699528132167776e-07, |
|
"logits/chosen": -7.792696475982666, |
|
"logits/rejected": -7.79255485534668, |
|
"logps/chosen": -1888.9193115234375, |
|
"logps/rejected": -2135.4482421875, |
|
"loss": 0.3684, |
|
"nll_loss": 4.749884128570557, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -26.909269332885742, |
|
"rewards/margins": 6.3855719566345215, |
|
"rewards/rejected": -33.29484176635742, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.4345263157894737, |
|
"grad_norm": 242.47130951413612, |
|
"learning_rate": 1.3591780159851627e-07, |
|
"logits/chosen": -6.771208763122559, |
|
"logits/rejected": -8.258780479431152, |
|
"logps/chosen": -1740.8980712890625, |
|
"logps/rejected": -2198.968994140625, |
|
"loss": 0.1598, |
|
"nll_loss": 5.290096759796143, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -24.707876205444336, |
|
"rewards/margins": 9.988024711608887, |
|
"rewards/rejected": -34.695899963378906, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.4378947368421053, |
|
"grad_norm": 499.1497771210561, |
|
"learning_rate": 1.3483551276238686e-07, |
|
"logits/chosen": -6.881796836853027, |
|
"logits/rejected": -8.311073303222656, |
|
"logps/chosen": -1801.54443359375, |
|
"logps/rejected": -2261.04150390625, |
|
"loss": 0.5297, |
|
"nll_loss": 4.906632900238037, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -25.078868865966797, |
|
"rewards/margins": 9.138972282409668, |
|
"rewards/rejected": -34.217838287353516, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.44126315789473686, |
|
"grad_norm": 340.10309787989286, |
|
"learning_rate": 1.3374855972330756e-07, |
|
"logits/chosen": -7.035615921020508, |
|
"logits/rejected": -8.248941421508789, |
|
"logps/chosen": -1872.8837890625, |
|
"logps/rejected": -2217.878173828125, |
|
"loss": 0.306, |
|
"nll_loss": 5.385906219482422, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -26.4267520904541, |
|
"rewards/margins": 7.073578357696533, |
|
"rewards/rejected": -33.500328063964844, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.44463157894736843, |
|
"grad_norm": 225.67805816583055, |
|
"learning_rate": 1.3265708801579666e-07, |
|
"logits/chosen": -6.937837600708008, |
|
"logits/rejected": -8.376426696777344, |
|
"logps/chosen": -1765.403076171875, |
|
"logps/rejected": -2198.61767578125, |
|
"loss": 0.1877, |
|
"nll_loss": 5.633481502532959, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -25.389394760131836, |
|
"rewards/margins": 7.419838905334473, |
|
"rewards/rejected": -32.80923080444336, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.448, |
|
"grad_norm": 91.96492729058454, |
|
"learning_rate": 1.3156124377938697e-07, |
|
"logits/chosen": -7.8583831787109375, |
|
"logits/rejected": -8.153332710266113, |
|
"logps/chosen": -2142.6328125, |
|
"logps/rejected": -2167.61376953125, |
|
"loss": 0.2784, |
|
"nll_loss": 4.982174396514893, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -30.6492919921875, |
|
"rewards/margins": 2.7910072803497314, |
|
"rewards/rejected": -33.4402961730957, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.4513684210526316, |
|
"grad_norm": 199.4223395992786, |
|
"learning_rate": 1.3046117373905865e-07, |
|
"logits/chosen": -6.933570861816406, |
|
"logits/rejected": -8.099403381347656, |
|
"logps/chosen": -1728.806396484375, |
|
"logps/rejected": -2209.177734375, |
|
"loss": 0.0685, |
|
"nll_loss": 5.4580488204956055, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -23.95172882080078, |
|
"rewards/margins": 10.292177200317383, |
|
"rewards/rejected": -34.2439079284668, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.45473684210526316, |
|
"grad_norm": 216.84232462782722, |
|
"learning_rate": 1.2935702518559397e-07, |
|
"logits/chosen": -7.183603763580322, |
|
"logits/rejected": -8.098438262939453, |
|
"logps/chosen": -1903.0699462890625, |
|
"logps/rejected": -2204.51904296875, |
|
"loss": 0.2503, |
|
"nll_loss": 5.397114276885986, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -27.064674377441406, |
|
"rewards/margins": 5.608869552612305, |
|
"rewards/rejected": -32.673545837402344, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.45810526315789474, |
|
"grad_norm": 380.9095200963084, |
|
"learning_rate": 1.2824894595585636e-07, |
|
"logits/chosen": -6.462231636047363, |
|
"logits/rejected": -8.369811058044434, |
|
"logps/chosen": -1741.0838623046875, |
|
"logps/rejected": -2199.78466796875, |
|
"loss": 0.2068, |
|
"nll_loss": 6.009423732757568, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -26.056018829345703, |
|
"rewards/margins": 7.744605541229248, |
|
"rewards/rejected": -33.80062484741211, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.4614736842105263, |
|
"grad_norm": 137.60298699973777, |
|
"learning_rate": 1.27137084412996e-07, |
|
"logits/chosen": -6.126290321350098, |
|
"logits/rejected": -8.357515335083008, |
|
"logps/chosen": -1254.0537109375, |
|
"logps/rejected": -2182.109130859375, |
|
"loss": 0.1443, |
|
"nll_loss": 5.386259078979492, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -17.421083450317383, |
|
"rewards/margins": 17.08145523071289, |
|
"rewards/rejected": -34.50253677368164, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.4648421052631579, |
|
"grad_norm": 238.72881941389062, |
|
"learning_rate": 1.260215894265852e-07, |
|
"logits/chosen": -6.577013969421387, |
|
"logits/rejected": -8.490443229675293, |
|
"logps/chosen": -1655.7412109375, |
|
"logps/rejected": -2204.900390625, |
|
"loss": 0.2314, |
|
"nll_loss": 5.632746696472168, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -23.928146362304688, |
|
"rewards/margins": 11.161284446716309, |
|
"rewards/rejected": -35.08943176269531, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.46821052631578947, |
|
"grad_norm": 205.0849481007809, |
|
"learning_rate": 1.2490261035268613e-07, |
|
"logits/chosen": -7.492171287536621, |
|
"logits/rejected": -8.131207466125488, |
|
"logps/chosen": -1958.902099609375, |
|
"logps/rejected": -2155.704345703125, |
|
"loss": 0.1155, |
|
"nll_loss": 5.272936820983887, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -30.05508804321289, |
|
"rewards/margins": 5.22835111618042, |
|
"rewards/rejected": -35.2834358215332, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.47157894736842104, |
|
"grad_norm": 226.262655032818, |
|
"learning_rate": 1.2378029701385286e-07, |
|
"logits/chosen": -6.374780178070068, |
|
"logits/rejected": -8.473326683044434, |
|
"logps/chosen": -1493.491455078125, |
|
"logps/rejected": -2233.69140625, |
|
"loss": 0.1602, |
|
"nll_loss": 5.51650333404541, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -22.54718017578125, |
|
"rewards/margins": 13.27789306640625, |
|
"rewards/rejected": -35.8250732421875, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.4749473684210526, |
|
"grad_norm": 304.14594820990135, |
|
"learning_rate": 1.2265479967907158e-07, |
|
"logits/chosen": -7.050374507904053, |
|
"logits/rejected": -8.2347412109375, |
|
"logps/chosen": -1850.6962890625, |
|
"logps/rejected": -2199.18798828125, |
|
"loss": 0.3842, |
|
"nll_loss": 5.64528226852417, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -28.668458938598633, |
|
"rewards/margins": 7.283104419708252, |
|
"rewards/rejected": -35.95156478881836, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.4783157894736842, |
|
"grad_norm": 250.05279911200753, |
|
"learning_rate": 1.2152626904364064e-07, |
|
"logits/chosen": -6.778600215911865, |
|
"logits/rejected": -8.34770679473877, |
|
"logps/chosen": -1604.2724609375, |
|
"logps/rejected": -2214.350830078125, |
|
"loss": 0.1214, |
|
"nll_loss": 5.6927690505981445, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -24.976463317871094, |
|
"rewards/margins": 11.123945236206055, |
|
"rewards/rejected": -36.10041046142578, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.48168421052631577, |
|
"grad_norm": 49.88924745592731, |
|
"learning_rate": 1.2039485620899367e-07, |
|
"logits/chosen": -7.990779876708984, |
|
"logits/rejected": -7.396949291229248, |
|
"logps/chosen": -2097.546142578125, |
|
"logps/rejected": -2177.14501953125, |
|
"loss": 0.092, |
|
"nll_loss": 5.315004348754883, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -33.4441032409668, |
|
"rewards/margins": 4.195204734802246, |
|
"rewards/rejected": -37.63930892944336, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.48505263157894735, |
|
"grad_norm": 298.7045075195552, |
|
"learning_rate": 1.1926071266246824e-07, |
|
"logits/chosen": -7.262898921966553, |
|
"logits/rejected": -7.93293571472168, |
|
"logps/chosen": -1817.0556640625, |
|
"logps/rejected": -2241.826904296875, |
|
"loss": 0.378, |
|
"nll_loss": 5.347080230712891, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -29.16703224182129, |
|
"rewards/margins": 7.644922733306885, |
|
"rewards/rejected": -36.81195831298828, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.4884210526315789, |
|
"grad_norm": 305.83167858317097, |
|
"learning_rate": 1.1812399025702289e-07, |
|
"logits/chosen": -7.386429309844971, |
|
"logits/rejected": -7.993374347686768, |
|
"logps/chosen": -1947.408935546875, |
|
"logps/rejected": -2193.298583984375, |
|
"loss": 0.2834, |
|
"nll_loss": 5.327097415924072, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -30.927370071411133, |
|
"rewards/margins": 5.908793926239014, |
|
"rewards/rejected": -36.83616256713867, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.4917894736842105, |
|
"grad_norm": 457.2760039079243, |
|
"learning_rate": 1.1698484119090518e-07, |
|
"logits/chosen": -7.0947346687316895, |
|
"logits/rejected": -8.351741790771484, |
|
"logps/chosen": -1785.537353515625, |
|
"logps/rejected": -2246.34912109375, |
|
"loss": 0.2948, |
|
"nll_loss": 5.337271213531494, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -28.525087356567383, |
|
"rewards/margins": 9.390640258789062, |
|
"rewards/rejected": -37.91572952270508, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.49515789473684213, |
|
"grad_norm": 296.42125851564697, |
|
"learning_rate": 1.1584341798727364e-07, |
|
"logits/chosen": -6.732482433319092, |
|
"logits/rejected": -8.335723876953125, |
|
"logps/chosen": -1633.8388671875, |
|
"logps/rejected": -2277.510498046875, |
|
"loss": 0.2649, |
|
"nll_loss": 5.798310279846191, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -25.833433151245117, |
|
"rewards/margins": 11.791748046875, |
|
"rewards/rejected": -37.62518310546875, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.4985263157894737, |
|
"grad_norm": 439.27146717605046, |
|
"learning_rate": 1.1469987347377601e-07, |
|
"logits/chosen": -7.6690354347229, |
|
"logits/rejected": -7.856517791748047, |
|
"logps/chosen": -2050.155029296875, |
|
"logps/rejected": -2208.57666015625, |
|
"loss": 0.3627, |
|
"nll_loss": 5.227299213409424, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -33.6103515625, |
|
"rewards/margins": 2.915879249572754, |
|
"rewards/rejected": -36.52622985839844, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.5018947368421053, |
|
"grad_norm": 318.9522275161282, |
|
"learning_rate": 1.1355436076208687e-07, |
|
"logits/chosen": -7.616082668304443, |
|
"logits/rejected": -8.118494033813477, |
|
"logps/chosen": -1956.5233154296875, |
|
"logps/rejected": -2223.263916015625, |
|
"loss": 0.2973, |
|
"nll_loss": 5.363955497741699, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -32.195335388183594, |
|
"rewards/margins": 5.070850849151611, |
|
"rewards/rejected": -37.26618576049805, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.5052631578947369, |
|
"grad_norm": 141.04978897095168, |
|
"learning_rate": 1.124070332274071e-07, |
|
"logits/chosen": -7.996880054473877, |
|
"logits/rejected": -8.085912704467773, |
|
"logps/chosen": -2062.050537109375, |
|
"logps/rejected": -2186.66162109375, |
|
"loss": 0.0746, |
|
"nll_loss": 5.05142879486084, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -33.29553985595703, |
|
"rewards/margins": 4.88584041595459, |
|
"rewards/rejected": -38.18138122558594, |
|
"step": 300 |
|
} |
|
], |
|
"logging_steps": 2, |
|
"max_steps": 593, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|