|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.998691442030882, |
|
"eval_steps": 500, |
|
"global_step": 477, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.010468463752944255, |
|
"grad_norm": 10.21879193399377, |
|
"learning_rate": 6.25e-08, |
|
"logits/chosen": -0.49816179275512695, |
|
"logits/rejected": -0.5135830640792847, |
|
"logps/chosen": -1.1745846271514893, |
|
"logps/rejected": -1.3595685958862305, |
|
"loss": -0.035, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.1745846271514893, |
|
"rewards/margins": 0.18498393893241882, |
|
"rewards/rejected": -1.3595685958862305, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.02093692750588851, |
|
"grad_norm": 2.7101486132785926, |
|
"learning_rate": 1.25e-07, |
|
"logits/chosen": -0.5216172933578491, |
|
"logits/rejected": -0.4984247088432312, |
|
"logps/chosen": -1.1580203771591187, |
|
"logps/rejected": -1.262268304824829, |
|
"loss": -0.0307, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -1.1580203771591187, |
|
"rewards/margins": 0.1042477935552597, |
|
"rewards/rejected": -1.262268304824829, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.031405391258832765, |
|
"grad_norm": 5.679326865922718, |
|
"learning_rate": 1.875e-07, |
|
"logits/chosen": -0.46298742294311523, |
|
"logits/rejected": -0.45184773206710815, |
|
"logps/chosen": -1.1014564037322998, |
|
"logps/rejected": -1.357534408569336, |
|
"loss": -0.0466, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.1014564037322998, |
|
"rewards/margins": 0.2560780644416809, |
|
"rewards/rejected": -1.357534408569336, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.04187385501177702, |
|
"grad_norm": 3.747855502948124, |
|
"learning_rate": 2.5e-07, |
|
"logits/chosen": -0.4439946711063385, |
|
"logits/rejected": -0.45637279748916626, |
|
"logps/chosen": -1.143970251083374, |
|
"logps/rejected": -1.249586582183838, |
|
"loss": -0.0343, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.143970251083374, |
|
"rewards/margins": 0.10561631619930267, |
|
"rewards/rejected": -1.249586582183838, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.05234231876472128, |
|
"grad_norm": 4.753383333287764, |
|
"learning_rate": 3.125e-07, |
|
"logits/chosen": -0.5120896697044373, |
|
"logits/rejected": -0.4878992438316345, |
|
"logps/chosen": -1.1282974481582642, |
|
"logps/rejected": -1.20094895362854, |
|
"loss": -0.0405, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -1.1282974481582642, |
|
"rewards/margins": 0.07265140116214752, |
|
"rewards/rejected": -1.20094895362854, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.06281078251766553, |
|
"grad_norm": 5.921390795122043, |
|
"learning_rate": 3.75e-07, |
|
"logits/chosen": -0.5101591348648071, |
|
"logits/rejected": -0.5148754715919495, |
|
"logps/chosen": -1.0519568920135498, |
|
"logps/rejected": -1.1866623163223267, |
|
"loss": -0.0316, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -1.0519568920135498, |
|
"rewards/margins": 0.13470537960529327, |
|
"rewards/rejected": -1.1866623163223267, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.07327924627060979, |
|
"grad_norm": 2.925534488741588, |
|
"learning_rate": 4.3749999999999994e-07, |
|
"logits/chosen": -0.523992657661438, |
|
"logits/rejected": -0.4819146990776062, |
|
"logps/chosen": -1.0300133228302002, |
|
"logps/rejected": -1.1558997631072998, |
|
"loss": -0.0438, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -1.0300133228302002, |
|
"rewards/margins": 0.12588649988174438, |
|
"rewards/rejected": -1.1558997631072998, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.08374771002355404, |
|
"grad_norm": 2.41534372661926, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": -0.5426082611083984, |
|
"logits/rejected": -0.5616889595985413, |
|
"logps/chosen": -0.9607571363449097, |
|
"logps/rejected": -1.2749502658843994, |
|
"loss": -0.0597, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.9607571363449097, |
|
"rewards/margins": 0.31419312953948975, |
|
"rewards/rejected": -1.2749502658843994, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.0942161737764983, |
|
"grad_norm": 3.5651412454593747, |
|
"learning_rate": 5.625e-07, |
|
"logits/chosen": -0.5252939462661743, |
|
"logits/rejected": -0.5452172160148621, |
|
"logps/chosen": -0.9730573892593384, |
|
"logps/rejected": -1.1473325490951538, |
|
"loss": -0.0559, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.9730573892593384, |
|
"rewards/margins": 0.174275204539299, |
|
"rewards/rejected": -1.1473325490951538, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.10468463752944256, |
|
"grad_norm": 4.149376436833766, |
|
"learning_rate": 5.999678242522831e-07, |
|
"logits/chosen": -0.5748304128646851, |
|
"logits/rejected": -0.6049160957336426, |
|
"logps/chosen": -0.9965342283248901, |
|
"logps/rejected": -1.2540613412857056, |
|
"loss": -0.0464, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.9965342283248901, |
|
"rewards/margins": 0.25752708315849304, |
|
"rewards/rejected": -1.2540613412857056, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.11515310128238682, |
|
"grad_norm": 2.9691689665037373, |
|
"learning_rate": 5.996059263493219e-07, |
|
"logits/chosen": -0.5686520338058472, |
|
"logits/rejected": -0.5740174055099487, |
|
"logps/chosen": -0.9575145840644836, |
|
"logps/rejected": -1.1497360467910767, |
|
"loss": -0.051, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.9575145840644836, |
|
"rewards/margins": 0.1922214925289154, |
|
"rewards/rejected": -1.1497360467910767, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.12562156503533106, |
|
"grad_norm": 2.6253711163069537, |
|
"learning_rate": 5.988423976115163e-07, |
|
"logits/chosen": -0.6203932762145996, |
|
"logits/rejected": -0.633256196975708, |
|
"logps/chosen": -1.0102077722549438, |
|
"logps/rejected": -1.1339367628097534, |
|
"loss": -0.062, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.0102077722549438, |
|
"rewards/margins": 0.12372910976409912, |
|
"rewards/rejected": -1.1339367628097534, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.1360900287882753, |
|
"grad_norm": 6.143760308041501, |
|
"learning_rate": 5.976782615723061e-07, |
|
"logits/chosen": -0.5896092653274536, |
|
"logits/rejected": -0.5990695357322693, |
|
"logps/chosen": -0.9534575343132019, |
|
"logps/rejected": -1.3051955699920654, |
|
"loss": -0.0542, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.9534575343132019, |
|
"rewards/margins": 0.3517380654811859, |
|
"rewards/rejected": -1.3051955699920654, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.14655849254121958, |
|
"grad_norm": 2.425535094463044, |
|
"learning_rate": 5.961150787913738e-07, |
|
"logits/chosen": -0.5713956952095032, |
|
"logits/rejected": -0.5825516581535339, |
|
"logps/chosen": -0.9968436360359192, |
|
"logps/rejected": -1.1801879405975342, |
|
"loss": -0.0566, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.9968436360359192, |
|
"rewards/margins": 0.18334420025348663, |
|
"rewards/rejected": -1.1801879405975342, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.15702695629416383, |
|
"grad_norm": 3.936791088850445, |
|
"learning_rate": 5.941549447626671e-07, |
|
"logits/chosen": -0.5951550006866455, |
|
"logits/rejected": -0.601603090763092, |
|
"logps/chosen": -0.9617071151733398, |
|
"logps/rejected": -1.2262237071990967, |
|
"loss": -0.0623, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.9617071151733398, |
|
"rewards/margins": 0.2645165026187897, |
|
"rewards/rejected": -1.2262237071990967, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.16749542004710807, |
|
"grad_norm": 3.1037840831495154, |
|
"learning_rate": 5.918004871053251e-07, |
|
"logits/chosen": -0.6256829500198364, |
|
"logits/rejected": -0.6605373620986938, |
|
"logps/chosen": -0.9740772247314453, |
|
"logps/rejected": -1.2311257123947144, |
|
"loss": -0.0545, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.9740772247314453, |
|
"rewards/margins": 0.25704842805862427, |
|
"rewards/rejected": -1.2311257123947144, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.17796388380005235, |
|
"grad_norm": 3.517502431455176, |
|
"learning_rate": 5.890548620412763e-07, |
|
"logits/chosen": -0.6996882557868958, |
|
"logits/rejected": -0.6915110349655151, |
|
"logps/chosen": -0.9287127256393433, |
|
"logps/rejected": -1.1633974313735962, |
|
"loss": -0.0628, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.9287127256393433, |
|
"rewards/margins": 0.23468473553657532, |
|
"rewards/rejected": -1.1633974313735962, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.1884323475529966, |
|
"grad_norm": 3.017999402299648, |
|
"learning_rate": 5.859217501642258e-07, |
|
"logits/chosen": -0.6648964285850525, |
|
"logits/rejected": -0.6696600914001465, |
|
"logps/chosen": -1.020616888999939, |
|
"logps/rejected": -1.240464210510254, |
|
"loss": -0.0681, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.020616888999939, |
|
"rewards/margins": 0.2198474109172821, |
|
"rewards/rejected": -1.240464210510254, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.19890081130594087, |
|
"grad_norm": 4.972742617412184, |
|
"learning_rate": 5.824053515057091e-07, |
|
"logits/chosen": -0.6936327815055847, |
|
"logits/rejected": -0.6650416254997253, |
|
"logps/chosen": -1.010335922241211, |
|
"logps/rejected": -1.212616205215454, |
|
"loss": -0.0554, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -1.010335922241211, |
|
"rewards/margins": 0.20228025317192078, |
|
"rewards/rejected": -1.212616205215454, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.2093692750588851, |
|
"grad_norm": 4.305605640056122, |
|
"learning_rate": 5.785103799048218e-07, |
|
"logits/chosen": -0.6884914636611938, |
|
"logits/rejected": -0.7089473009109497, |
|
"logps/chosen": -1.0421329736709595, |
|
"logps/rejected": -1.3643444776535034, |
|
"loss": -0.0577, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -1.0421329736709595, |
|
"rewards/margins": 0.32221153378486633, |
|
"rewards/rejected": -1.3643444776535034, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.21983773881182936, |
|
"grad_norm": 4.401763969089556, |
|
"learning_rate": 5.742420566891749e-07, |
|
"logits/chosen": -0.6878229379653931, |
|
"logits/rejected": -0.6928119659423828, |
|
"logps/chosen": -0.9930433034896851, |
|
"logps/rejected": -1.3710923194885254, |
|
"loss": -0.0667, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.9930433034896851, |
|
"rewards/margins": 0.3780490458011627, |
|
"rewards/rejected": -1.3710923194885254, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.23030620256477363, |
|
"grad_norm": 6.081502646176958, |
|
"learning_rate": 5.696061036755478e-07, |
|
"logits/chosen": -0.7217090725898743, |
|
"logits/rejected": -0.7287099361419678, |
|
"logps/chosen": -1.03863525390625, |
|
"logps/rejected": -1.384284257888794, |
|
"loss": -0.0744, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.03863525390625, |
|
"rewards/margins": 0.3456490635871887, |
|
"rewards/rejected": -1.384284257888794, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.24077466631771788, |
|
"grad_norm": 4.443555154250914, |
|
"learning_rate": 5.64608735499618e-07, |
|
"logits/chosen": -0.6417717337608337, |
|
"logits/rejected": -0.6641663312911987, |
|
"logps/chosen": -1.0144702196121216, |
|
"logps/rejected": -1.4784486293792725, |
|
"loss": -0.0904, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.0144702196121216, |
|
"rewards/margins": 0.46397843956947327, |
|
"rewards/rejected": -1.4784486293792725, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.2512431300706621, |
|
"grad_norm": 4.21476483098945, |
|
"learning_rate": 5.592566512850545e-07, |
|
"logits/chosen": -0.6441653966903687, |
|
"logits/rejected": -0.6482559442520142, |
|
"logps/chosen": -1.126665711402893, |
|
"logps/rejected": -1.395385980606079, |
|
"loss": -0.0685, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.126665711402893, |
|
"rewards/margins": 0.2687203884124756, |
|
"rewards/rejected": -1.395385980606079, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.26171159382360637, |
|
"grad_norm": 5.871972559220826, |
|
"learning_rate": 5.535570256631384e-07, |
|
"logits/chosen": -0.6881023645401001, |
|
"logits/rejected": -0.6507277488708496, |
|
"logps/chosen": -1.030970811843872, |
|
"logps/rejected": -1.3154019117355347, |
|
"loss": -0.0735, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -1.030970811843872, |
|
"rewards/margins": 0.28443121910095215, |
|
"rewards/rejected": -1.3154019117355347, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.2721800575765506, |
|
"grad_norm": 3.8967984948431176, |
|
"learning_rate": 5.475174991549528e-07, |
|
"logits/chosen": -0.6212294697761536, |
|
"logits/rejected": -0.6462045907974243, |
|
"logps/chosen": -1.055466651916504, |
|
"logps/rejected": -1.2383654117584229, |
|
"loss": -0.0712, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -1.055466651916504, |
|
"rewards/margins": 0.18289880454540253, |
|
"rewards/rejected": -1.2383654117584229, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.2826485213294949, |
|
"grad_norm": 4.954091803088075, |
|
"learning_rate": 5.411461679290317e-07, |
|
"logits/chosen": -0.6356550455093384, |
|
"logits/rejected": -0.6022456884384155, |
|
"logps/chosen": -1.1107438802719116, |
|
"logps/rejected": -1.643451452255249, |
|
"loss": -0.0826, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.1107438802719116, |
|
"rewards/margins": 0.532707691192627, |
|
"rewards/rejected": -1.643451452255249, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.29311698508243916, |
|
"grad_norm": 6.396288923391441, |
|
"learning_rate": 5.34451572948201e-07, |
|
"logits/chosen": -0.5937852263450623, |
|
"logits/rejected": -0.5691717863082886, |
|
"logps/chosen": -1.0965867042541504, |
|
"logps/rejected": -1.5603969097137451, |
|
"loss": -0.0915, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.0965867042541504, |
|
"rewards/margins": 0.4638102948665619, |
|
"rewards/rejected": -1.5603969097137451, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.3035854488353834, |
|
"grad_norm": 4.065147326048241, |
|
"learning_rate": 5.274426885201582e-07, |
|
"logits/chosen": -0.5843586921691895, |
|
"logits/rejected": -0.5910850763320923, |
|
"logps/chosen": -1.1310163736343384, |
|
"logps/rejected": -1.4976770877838135, |
|
"loss": -0.0849, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.1310163736343384, |
|
"rewards/margins": 0.3666609227657318, |
|
"rewards/rejected": -1.4976770877838135, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.31405391258832765, |
|
"grad_norm": 4.014586941737209, |
|
"learning_rate": 5.201289102671411e-07, |
|
"logits/chosen": -0.546478271484375, |
|
"logits/rejected": -0.5195995569229126, |
|
"logps/chosen": -1.0407493114471436, |
|
"logps/rejected": -1.436926245689392, |
|
"loss": -0.0898, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -1.0407493114471436, |
|
"rewards/margins": 0.3961769640445709, |
|
"rewards/rejected": -1.436926245689392, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.3245223763412719, |
|
"grad_norm": 6.577906404260762, |
|
"learning_rate": 5.12520042530811e-07, |
|
"logits/chosen": -0.5657233595848083, |
|
"logits/rejected": -0.5251718163490295, |
|
"logps/chosen": -1.1016614437103271, |
|
"logps/rejected": -1.4624617099761963, |
|
"loss": -0.0923, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -1.1016614437103271, |
|
"rewards/margins": 0.36080020666122437, |
|
"rewards/rejected": -1.4624617099761963, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.33499084009421615, |
|
"grad_norm": 5.297524478469823, |
|
"learning_rate": 5.046262852292346e-07, |
|
"logits/chosen": -0.4731084704399109, |
|
"logits/rejected": -0.4319906234741211, |
|
"logps/chosen": -1.1443315744400024, |
|
"logps/rejected": -1.5339367389678955, |
|
"loss": -0.0887, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -1.1443315744400024, |
|
"rewards/margins": 0.38960522413253784, |
|
"rewards/rejected": -1.5339367389678955, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.34545930384716045, |
|
"grad_norm": 6.8937105755776065, |
|
"learning_rate": 4.964582201835856e-07, |
|
"logits/chosen": -0.4270719885826111, |
|
"logits/rejected": -0.3794442415237427, |
|
"logps/chosen": -1.1176317930221558, |
|
"logps/rejected": -1.639172911643982, |
|
"loss": -0.0965, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -1.1176317930221558, |
|
"rewards/margins": 0.5215411186218262, |
|
"rewards/rejected": -1.639172911643982, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.3559277676001047, |
|
"grad_norm": 4.460892002893491, |
|
"learning_rate": 4.880267969328908e-07, |
|
"logits/chosen": -0.32646840810775757, |
|
"logits/rejected": -0.22941944003105164, |
|
"logps/chosen": -1.2465507984161377, |
|
"logps/rejected": -1.7467823028564453, |
|
"loss": -0.0851, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.2465507984161377, |
|
"rewards/margins": 0.5002316236495972, |
|
"rewards/rejected": -1.7467823028564453, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.36639623135304894, |
|
"grad_norm": 10.536194204055317, |
|
"learning_rate": 4.793433180558423e-07, |
|
"logits/chosen": -0.2930789887905121, |
|
"logits/rejected": -0.15167564153671265, |
|
"logps/chosen": -1.1869597434997559, |
|
"logps/rejected": -1.7165695428848267, |
|
"loss": -0.088, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.1869597434997559, |
|
"rewards/margins": 0.5296097993850708, |
|
"rewards/rejected": -1.7165695428848267, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.3768646951059932, |
|
"grad_norm": 14.792822735814111, |
|
"learning_rate": 4.704194240193467e-07, |
|
"logits/chosen": -0.14493033289909363, |
|
"logits/rejected": -0.08077652007341385, |
|
"logps/chosen": -1.1866719722747803, |
|
"logps/rejected": -1.724832534790039, |
|
"loss": -0.0932, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.1866719722747803, |
|
"rewards/margins": 0.5381606221199036, |
|
"rewards/rejected": -1.724832534790039, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.38733315885893743, |
|
"grad_norm": 5.012043699075584, |
|
"learning_rate": 4.6126707757412686e-07, |
|
"logits/chosen": -0.19216454029083252, |
|
"logits/rejected": -0.06508170068264008, |
|
"logps/chosen": -1.2008949518203735, |
|
"logps/rejected": -2.0086560249328613, |
|
"loss": -0.1086, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -1.2008949518203735, |
|
"rewards/margins": 0.807761013507843, |
|
"rewards/rejected": -2.0086560249328613, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.39780162261188173, |
|
"grad_norm": 6.108194509753534, |
|
"learning_rate": 4.5189854771829086e-07, |
|
"logits/chosen": -0.14359740912914276, |
|
"logits/rejected": -0.04716472700238228, |
|
"logps/chosen": -1.2239508628845215, |
|
"logps/rejected": -1.738565444946289, |
|
"loss": -0.0845, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -1.2239508628845215, |
|
"rewards/margins": 0.5146147012710571, |
|
"rewards/rejected": -1.738565444946289, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.408270086364826, |
|
"grad_norm": 12.413660235408443, |
|
"learning_rate": 4.4232639325036807e-07, |
|
"logits/chosen": -0.16371646523475647, |
|
"logits/rejected": -0.08154813945293427, |
|
"logps/chosen": -1.3641008138656616, |
|
"logps/rejected": -1.8681023120880127, |
|
"loss": -0.0883, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -1.3641008138656616, |
|
"rewards/margins": 0.5040014386177063, |
|
"rewards/rejected": -1.8681023120880127, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.4187385501177702, |
|
"grad_norm": 5.664592254947892, |
|
"learning_rate": 4.32563445933859e-07, |
|
"logits/chosen": -0.3630138039588928, |
|
"logits/rejected": -0.31867215037345886, |
|
"logps/chosen": -1.2496740818023682, |
|
"logps/rejected": -1.7695592641830444, |
|
"loss": -0.0991, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -1.2496740818023682, |
|
"rewards/margins": 0.5198851823806763, |
|
"rewards/rejected": -1.7695592641830444, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.42920701387071447, |
|
"grad_norm": 8.615954171040675, |
|
"learning_rate": 4.226227932958664e-07, |
|
"logits/chosen": -0.2460884302854538, |
|
"logits/rejected": -0.1599723994731903, |
|
"logps/chosen": -1.0948972702026367, |
|
"logps/rejected": -1.7847169637680054, |
|
"loss": -0.1097, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.0948972702026367, |
|
"rewards/margins": 0.6898195743560791, |
|
"rewards/rejected": -1.7847169637680054, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.4396754776236587, |
|
"grad_norm": 15.728835997323346, |
|
"learning_rate": 4.1251776108286854e-07, |
|
"logits/chosen": -0.16430199146270752, |
|
"logits/rejected": -0.09520161151885986, |
|
"logps/chosen": -1.3307850360870361, |
|
"logps/rejected": -1.7031431198120117, |
|
"loss": -0.0888, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -1.3307850360870361, |
|
"rewards/margins": 0.37235820293426514, |
|
"rewards/rejected": -1.7031431198120117, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.45014394137660296, |
|
"grad_norm": 9.404924688577674, |
|
"learning_rate": 4.022618953971514e-07, |
|
"logits/chosen": -0.11717267334461212, |
|
"logits/rejected": -0.0059516532346606255, |
|
"logps/chosen": -1.2097961902618408, |
|
"logps/rejected": -1.8957335948944092, |
|
"loss": -0.1077, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -1.2097961902618408, |
|
"rewards/margins": 0.6859374046325684, |
|
"rewards/rejected": -1.8957335948944092, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.46061240512954726, |
|
"grad_norm": 7.3279364906626485, |
|
"learning_rate": 3.918689445378477e-07, |
|
"logits/chosen": -0.09480677545070648, |
|
"logits/rejected": 0.13137385249137878, |
|
"logps/chosen": -1.2646963596343994, |
|
"logps/rejected": -1.9554264545440674, |
|
"loss": -0.0804, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -1.2646963596343994, |
|
"rewards/margins": 0.690730094909668, |
|
"rewards/rejected": -1.9554264545440674, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.4710808688824915, |
|
"grad_norm": 5.249390449834869, |
|
"learning_rate": 3.813528405709251e-07, |
|
"logits/chosen": 0.017690464854240417, |
|
"logits/rejected": 0.24274654686450958, |
|
"logps/chosen": -1.2406516075134277, |
|
"logps/rejected": -2.0346226692199707, |
|
"loss": -0.0963, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.2406516075134277, |
|
"rewards/margins": 0.7939712405204773, |
|
"rewards/rejected": -2.0346226692199707, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.48154933263543576, |
|
"grad_norm": 5.840384716420158, |
|
"learning_rate": 3.707276806528282e-07, |
|
"logits/chosen": -0.034771956503391266, |
|
"logits/rejected": 0.25075453519821167, |
|
"logps/chosen": -1.307732105255127, |
|
"logps/rejected": -2.3069005012512207, |
|
"loss": -0.1106, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -1.307732105255127, |
|
"rewards/margins": 0.9991682171821594, |
|
"rewards/rejected": -2.3069005012512207, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.49201779638838, |
|
"grad_norm": 5.708169285500334, |
|
"learning_rate": 3.6000770813281334e-07, |
|
"logits/chosen": -0.04531233012676239, |
|
"logits/rejected": 0.1216268315911293, |
|
"logps/chosen": -1.0852802991867065, |
|
"logps/rejected": -1.7644765377044678, |
|
"loss": -0.1067, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.0852802991867065, |
|
"rewards/margins": 0.6791960597038269, |
|
"rewards/rejected": -1.7644765377044678, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.5024862601413242, |
|
"grad_norm": 5.2328738831340615, |
|
"learning_rate": 3.4920729345930654e-07, |
|
"logits/chosen": -0.15298782289028168, |
|
"logits/rejected": -0.021981507539749146, |
|
"logps/chosen": -1.181814432144165, |
|
"logps/rejected": -1.84041428565979, |
|
"loss": -0.0907, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -1.181814432144165, |
|
"rewards/margins": 0.6585996747016907, |
|
"rewards/rejected": -1.84041428565979, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.5129547238942685, |
|
"grad_norm": 4.454620572722302, |
|
"learning_rate": 3.383409149158814e-07, |
|
"logits/chosen": -0.14406219124794006, |
|
"logits/rejected": 0.021218538284301758, |
|
"logps/chosen": -1.3196625709533691, |
|
"logps/rejected": -1.8837369680404663, |
|
"loss": -0.1005, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.3196625709533691, |
|
"rewards/margins": 0.5640743970870972, |
|
"rewards/rejected": -1.8837369680404663, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.5234231876472127, |
|
"grad_norm": 16.864698084247127, |
|
"learning_rate": 3.2742313921268035e-07, |
|
"logits/chosen": -0.04993357136845589, |
|
"logits/rejected": 0.09763963520526886, |
|
"logps/chosen": -1.1896789073944092, |
|
"logps/rejected": -1.9447057247161865, |
|
"loss": -0.1026, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.1896789073944092, |
|
"rewards/margins": 0.7550268769264221, |
|
"rewards/rejected": -1.9447057247161865, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.533891651400157, |
|
"grad_norm": 4.006389891258429, |
|
"learning_rate": 3.1646860195929825e-07, |
|
"logits/chosen": 0.054258793592453, |
|
"logits/rejected": 0.3202013075351715, |
|
"logps/chosen": -1.37346613407135, |
|
"logps/rejected": -2.1130452156066895, |
|
"loss": -0.1166, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.37346613407135, |
|
"rewards/margins": 0.7395793199539185, |
|
"rewards/rejected": -2.1130452156066895, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.5443601151531012, |
|
"grad_norm": 11.296019715540536, |
|
"learning_rate": 3.054919880453032e-07, |
|
"logits/chosen": 0.08209206908941269, |
|
"logits/rejected": 0.305607408285141, |
|
"logps/chosen": -1.1226253509521484, |
|
"logps/rejected": -2.1423118114471436, |
|
"loss": -0.1085, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.1226253509521484, |
|
"rewards/margins": 1.0196868181228638, |
|
"rewards/rejected": -2.1423118114471436, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.5548285789060455, |
|
"grad_norm": 12.257070737010752, |
|
"learning_rate": 2.9450801195469686e-07, |
|
"logits/chosen": 0.13827550411224365, |
|
"logits/rejected": 0.33571916818618774, |
|
"logps/chosen": -1.2839715480804443, |
|
"logps/rejected": -1.8086509704589844, |
|
"loss": -0.1095, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.2839715480804443, |
|
"rewards/margins": 0.5246793031692505, |
|
"rewards/rejected": -1.8086509704589844, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.5652970426589898, |
|
"grad_norm": 7.650172024925882, |
|
"learning_rate": 2.835313980407017e-07, |
|
"logits/chosen": 0.2891995906829834, |
|
"logits/rejected": 0.4797445237636566, |
|
"logps/chosen": -1.3987174034118652, |
|
"logps/rejected": -1.9334684610366821, |
|
"loss": -0.0939, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.3987174034118652, |
|
"rewards/margins": 0.5347510576248169, |
|
"rewards/rejected": -1.9334684610366821, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.575765506411934, |
|
"grad_norm": 9.750902438039956, |
|
"learning_rate": 2.7257686078731973e-07, |
|
"logits/chosen": 0.3225773572921753, |
|
"logits/rejected": 0.6765245795249939, |
|
"logps/chosen": -1.2831330299377441, |
|
"logps/rejected": -2.208591938018799, |
|
"loss": -0.111, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.2831330299377441, |
|
"rewards/margins": 0.9254589080810547, |
|
"rewards/rejected": -2.208591938018799, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.5862339701648783, |
|
"grad_norm": 7.363369128681095, |
|
"learning_rate": 2.6165908508411857e-07, |
|
"logits/chosen": 0.29373425245285034, |
|
"logits/rejected": 0.5836144685745239, |
|
"logps/chosen": -1.2897415161132812, |
|
"logps/rejected": -1.9501053094863892, |
|
"loss": -0.1031, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.2897415161132812, |
|
"rewards/margins": 0.6603637933731079, |
|
"rewards/rejected": -1.9501053094863892, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.5967024339178225, |
|
"grad_norm": 13.183162679494803, |
|
"learning_rate": 2.5079270654069354e-07, |
|
"logits/chosen": 0.18902722001075745, |
|
"logits/rejected": 0.29711785912513733, |
|
"logps/chosen": -1.2677323818206787, |
|
"logps/rejected": -2.1915152072906494, |
|
"loss": -0.1109, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.2677323818206787, |
|
"rewards/margins": 0.9237826466560364, |
|
"rewards/rejected": -2.1915152072906494, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.6071708976707668, |
|
"grad_norm": 6.687429291622309, |
|
"learning_rate": 2.399922918671867e-07, |
|
"logits/chosen": 0.022776653990149498, |
|
"logits/rejected": 0.2391856163740158, |
|
"logps/chosen": -1.3181854486465454, |
|
"logps/rejected": -2.3116376399993896, |
|
"loss": -0.116, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.3181854486465454, |
|
"rewards/margins": 0.993452250957489, |
|
"rewards/rejected": -2.3116376399993896, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.6176393614237111, |
|
"grad_norm": 5.281502633271325, |
|
"learning_rate": 2.2927231934717176e-07, |
|
"logits/chosen": -0.0016543983947485685, |
|
"logits/rejected": 0.17889878153800964, |
|
"logps/chosen": -1.3387067317962646, |
|
"logps/rejected": -2.2106659412384033, |
|
"loss": -0.1028, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -1.3387067317962646, |
|
"rewards/margins": 0.8719590902328491, |
|
"rewards/rejected": -2.2106659412384033, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.6281078251766553, |
|
"grad_norm": 7.373204926178573, |
|
"learning_rate": 2.1864715942907487e-07, |
|
"logits/chosen": -0.007572178728878498, |
|
"logits/rejected": 0.14625847339630127, |
|
"logps/chosen": -1.4602124691009521, |
|
"logps/rejected": -2.1073670387268066, |
|
"loss": -0.1046, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.4602124691009521, |
|
"rewards/margins": 0.6471549272537231, |
|
"rewards/rejected": -2.1073670387268066, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.6385762889295996, |
|
"grad_norm": 4.736107509318446, |
|
"learning_rate": 2.081310554621522e-07, |
|
"logits/chosen": 0.19010517001152039, |
|
"logits/rejected": 0.3840242624282837, |
|
"logps/chosen": -1.4273982048034668, |
|
"logps/rejected": -2.3235268592834473, |
|
"loss": -0.1246, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.4273982048034668, |
|
"rewards/margins": 0.89612877368927, |
|
"rewards/rejected": -2.3235268592834473, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.6490447526825438, |
|
"grad_norm": 23.409368660389333, |
|
"learning_rate": 1.9773810460284862e-07, |
|
"logits/chosen": 0.2859404981136322, |
|
"logits/rejected": 0.3260739743709564, |
|
"logps/chosen": -1.2994334697723389, |
|
"logps/rejected": -2.061152935028076, |
|
"loss": -0.1079, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -1.2994334697723389, |
|
"rewards/margins": 0.7617196440696716, |
|
"rewards/rejected": -2.061152935028076, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.6595132164354881, |
|
"grad_norm": 22.542628144583606, |
|
"learning_rate": 1.874822389171314e-07, |
|
"logits/chosen": 0.22070816159248352, |
|
"logits/rejected": 0.5137720704078674, |
|
"logps/chosen": -1.2794992923736572, |
|
"logps/rejected": -2.3934812545776367, |
|
"loss": -0.1217, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.2794992923736572, |
|
"rewards/margins": 1.11398184299469, |
|
"rewards/rejected": -2.3934812545776367, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.6699816801884323, |
|
"grad_norm": 9.283703258871379, |
|
"learning_rate": 1.7737720670413356e-07, |
|
"logits/chosen": 0.24648892879486084, |
|
"logits/rejected": 0.33991724252700806, |
|
"logps/chosen": -1.365140676498413, |
|
"logps/rejected": -2.166224241256714, |
|
"loss": -0.1155, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -1.365140676498413, |
|
"rewards/margins": 0.8010835647583008, |
|
"rewards/rejected": -2.166224241256714, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.6804501439413766, |
|
"grad_norm": 7.959464499237016, |
|
"learning_rate": 1.6743655406614095e-07, |
|
"logits/chosen": 0.14846596121788025, |
|
"logits/rejected": 0.3873682916164398, |
|
"logps/chosen": -1.3291183710098267, |
|
"logps/rejected": -2.1744306087493896, |
|
"loss": -0.1209, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.3291183710098267, |
|
"rewards/margins": 0.845312237739563, |
|
"rewards/rejected": -2.1744306087493896, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.6909186076943209, |
|
"grad_norm": 10.067079168168558, |
|
"learning_rate": 1.5767360674963198e-07, |
|
"logits/chosen": 0.08330237120389938, |
|
"logits/rejected": 0.29010123014450073, |
|
"logps/chosen": -1.225388765335083, |
|
"logps/rejected": -1.9910337924957275, |
|
"loss": -0.1019, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -1.225388765335083, |
|
"rewards/margins": 0.7656451463699341, |
|
"rewards/rejected": -1.9910337924957275, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.7013870714472651, |
|
"grad_norm": 9.133946690623846, |
|
"learning_rate": 1.4810145228170922e-07, |
|
"logits/chosen": -0.036666952073574066, |
|
"logits/rejected": 0.15409226715564728, |
|
"logps/chosen": -1.2429730892181396, |
|
"logps/rejected": -1.8629066944122314, |
|
"loss": -0.0986, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.2429730892181396, |
|
"rewards/margins": 0.6199334859848022, |
|
"rewards/rejected": -1.8629066944122314, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.7118555352002094, |
|
"grad_norm": 10.242157594508342, |
|
"learning_rate": 1.3873292242587306e-07, |
|
"logits/chosen": 0.028749173507094383, |
|
"logits/rejected": 0.23107752203941345, |
|
"logps/chosen": -1.430831789970398, |
|
"logps/rejected": -2.205451488494873, |
|
"loss": -0.1094, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -1.430831789970398, |
|
"rewards/margins": 0.7746195197105408, |
|
"rewards/rejected": -2.205451488494873, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.7223239989531536, |
|
"grad_norm": 5.706648979502026, |
|
"learning_rate": 1.295805759806533e-07, |
|
"logits/chosen": 0.007116416003555059, |
|
"logits/rejected": 0.24577870965003967, |
|
"logps/chosen": -1.297646164894104, |
|
"logps/rejected": -2.2263598442077637, |
|
"loss": -0.1067, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.297646164894104, |
|
"rewards/margins": 0.9287137985229492, |
|
"rewards/rejected": -2.2263598442077637, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.7327924627060979, |
|
"grad_norm": 7.792589886443301, |
|
"learning_rate": 1.2065668194415777e-07, |
|
"logits/chosen": 0.1685815453529358, |
|
"logits/rejected": 0.2779741585254669, |
|
"logps/chosen": -1.3478834629058838, |
|
"logps/rejected": -2.127234935760498, |
|
"loss": -0.1044, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -1.3478834629058838, |
|
"rewards/margins": 0.7793514132499695, |
|
"rewards/rejected": -2.127234935760498, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.7432609264590422, |
|
"grad_norm": 8.782679171257993, |
|
"learning_rate": 1.1197320306710923e-07, |
|
"logits/chosen": 0.1377880573272705, |
|
"logits/rejected": 0.32959434390068054, |
|
"logps/chosen": -1.1817530393600464, |
|
"logps/rejected": -2.1875174045562744, |
|
"loss": -0.1184, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.1817530393600464, |
|
"rewards/margins": 1.0057642459869385, |
|
"rewards/rejected": -2.1875174045562744, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.7537293902119864, |
|
"grad_norm": 6.862511018163533, |
|
"learning_rate": 1.035417798164145e-07, |
|
"logits/chosen": 0.01301775686442852, |
|
"logits/rejected": 0.2229972630739212, |
|
"logps/chosen": -1.1256128549575806, |
|
"logps/rejected": -1.9575172662734985, |
|
"loss": -0.1212, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.1256128549575806, |
|
"rewards/margins": 0.8319045901298523, |
|
"rewards/rejected": -1.9575172662734985, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.7641978539649307, |
|
"grad_norm": 9.505229420957962, |
|
"learning_rate": 9.537371477076535e-08, |
|
"logits/chosen": 0.11367920786142349, |
|
"logits/rejected": 0.356434166431427, |
|
"logps/chosen": -1.4002251625061035, |
|
"logps/rejected": -2.392739772796631, |
|
"loss": -0.1187, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.4002251625061035, |
|
"rewards/margins": 0.9925147294998169, |
|
"rewards/rejected": -2.392739772796631, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.7746663177178749, |
|
"grad_norm": 10.965226669329015, |
|
"learning_rate": 8.747995746918898e-08, |
|
"logits/chosen": 0.20804345607757568, |
|
"logits/rejected": 0.45443806052207947, |
|
"logps/chosen": -1.363114356994629, |
|
"logps/rejected": -2.378051280975342, |
|
"loss": -0.1203, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.363114356994629, |
|
"rewards/margins": 1.0149368047714233, |
|
"rewards/rejected": -2.378051280975342, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.7851347814708192, |
|
"grad_norm": 6.16813334899156, |
|
"learning_rate": 7.987108973285888e-08, |
|
"logits/chosen": 0.26107341051101685, |
|
"logits/rejected": 0.22848454117774963, |
|
"logps/chosen": -1.387460708618164, |
|
"logps/rejected": -2.197563886642456, |
|
"loss": -0.1101, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -1.387460708618164, |
|
"rewards/margins": 0.8101032972335815, |
|
"rewards/rejected": -2.197563886642456, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.7956032452237635, |
|
"grad_norm": 12.918343537826871, |
|
"learning_rate": 7.255731147984174e-08, |
|
"logits/chosen": 0.22713570296764374, |
|
"logits/rejected": 0.3289734423160553, |
|
"logps/chosen": -1.4456373453140259, |
|
"logps/rejected": -2.084005832672119, |
|
"loss": -0.117, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -1.4456373453140259, |
|
"rewards/margins": 0.638368546962738, |
|
"rewards/rejected": -2.084005832672119, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.8060717089767077, |
|
"grad_norm": 6.060272813011622, |
|
"learning_rate": 6.554842705179898e-08, |
|
"logits/chosen": 0.12520398199558258, |
|
"logits/rejected": 0.29297417402267456, |
|
"logps/chosen": -1.1945627927780151, |
|
"logps/rejected": -2.0077576637268066, |
|
"loss": -0.1195, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -1.1945627927780151, |
|
"rewards/margins": 0.8131949305534363, |
|
"rewards/rejected": -2.0077576637268066, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.816540172729652, |
|
"grad_norm": 17.713346450689524, |
|
"learning_rate": 5.885383207096832e-08, |
|
"logits/chosen": 0.13393369317054749, |
|
"logits/rejected": 0.3967200517654419, |
|
"logps/chosen": -1.2682616710662842, |
|
"logps/rejected": -2.303426504135132, |
|
"loss": -0.1115, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.2682616710662842, |
|
"rewards/margins": 1.0351645946502686, |
|
"rewards/rejected": -2.303426504135132, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.8270086364825961, |
|
"grad_norm": 6.0463426455293385, |
|
"learning_rate": 5.2482500845047165e-08, |
|
"logits/chosen": 0.09920735657215118, |
|
"logits/rejected": 0.4265053868293762, |
|
"logps/chosen": -1.2478829622268677, |
|
"logps/rejected": -2.2664952278137207, |
|
"loss": -0.1059, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.2478829622268677, |
|
"rewards/margins": 1.018612027168274, |
|
"rewards/rejected": -2.2664952278137207, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.8374771002355405, |
|
"grad_norm": 7.849951661296689, |
|
"learning_rate": 4.644297433686162e-08, |
|
"logits/chosen": 0.20306222140789032, |
|
"logits/rejected": 0.37799519300460815, |
|
"logps/chosen": -1.167974591255188, |
|
"logps/rejected": -2.061584949493408, |
|
"loss": -0.1211, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -1.167974591255188, |
|
"rewards/margins": 0.8936103582382202, |
|
"rewards/rejected": -2.061584949493408, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.8479455639884846, |
|
"grad_norm": 9.305371166707902, |
|
"learning_rate": 4.074334871494558e-08, |
|
"logits/chosen": 0.12849920988082886, |
|
"logits/rejected": 0.27528905868530273, |
|
"logps/chosen": -1.4915854930877686, |
|
"logps/rejected": -2.3600451946258545, |
|
"loss": -0.0987, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -1.4915854930877686, |
|
"rewards/margins": 0.8684597015380859, |
|
"rewards/rejected": -2.3600451946258545, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.8584140277414289, |
|
"grad_norm": 7.721976646915507, |
|
"learning_rate": 3.5391264500382e-08, |
|
"logits/chosen": 0.15187588334083557, |
|
"logits/rejected": 0.3307904601097107, |
|
"logps/chosen": -1.2111918926239014, |
|
"logps/rejected": -2.036677837371826, |
|
"loss": -0.1121, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.2111918926239014, |
|
"rewards/margins": 0.8254860043525696, |
|
"rewards/rejected": -2.036677837371826, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.8688824914943732, |
|
"grad_norm": 7.610516456597608, |
|
"learning_rate": 3.0393896324452226e-08, |
|
"logits/chosen": 0.14874114096164703, |
|
"logits/rejected": 0.44959506392478943, |
|
"logps/chosen": -1.3082196712493896, |
|
"logps/rejected": -2.282323122024536, |
|
"loss": -0.1227, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.3082196712493896, |
|
"rewards/margins": 0.9741032719612122, |
|
"rewards/rejected": -2.282323122024536, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.8793509552473174, |
|
"grad_norm": 7.385339190408951, |
|
"learning_rate": 2.5757943310825026e-08, |
|
"logits/chosen": 0.149868905544281, |
|
"logits/rejected": 0.3381766676902771, |
|
"logps/chosen": -1.2985490560531616, |
|
"logps/rejected": -2.2531745433807373, |
|
"loss": -0.1037, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -1.2985490560531616, |
|
"rewards/margins": 0.9546254873275757, |
|
"rewards/rejected": -2.2531745433807373, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.8898194190002617, |
|
"grad_norm": 5.355483863860493, |
|
"learning_rate": 2.148962009517823e-08, |
|
"logits/chosen": 0.22056706249713898, |
|
"logits/rejected": 0.38606297969818115, |
|
"logps/chosen": -1.380305528640747, |
|
"logps/rejected": -2.1738598346710205, |
|
"loss": -0.1192, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.380305528640747, |
|
"rewards/margins": 0.793554425239563, |
|
"rewards/rejected": -2.1738598346710205, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.9002878827532059, |
|
"grad_norm": 10.975418852072519, |
|
"learning_rate": 1.759464849429082e-08, |
|
"logits/chosen": 0.17666544020175934, |
|
"logits/rejected": 0.34687134623527527, |
|
"logps/chosen": -1.3843985795974731, |
|
"logps/rejected": -2.1382744312286377, |
|
"loss": -0.1132, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.3843985795974731, |
|
"rewards/margins": 0.7538760304450989, |
|
"rewards/rejected": -2.1382744312286377, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.9107563465061502, |
|
"grad_norm": 8.94416140345287, |
|
"learning_rate": 1.4078249835774169e-08, |
|
"logits/chosen": 0.16215559840202332, |
|
"logits/rejected": 0.30989065766334534, |
|
"logps/chosen": -1.3915765285491943, |
|
"logps/rejected": -2.4042232036590576, |
|
"loss": -0.1263, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.3915765285491943, |
|
"rewards/margins": 1.0126469135284424, |
|
"rewards/rejected": -2.4042232036590576, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.9212248102590945, |
|
"grad_norm": 9.337983704389515, |
|
"learning_rate": 1.0945137958723705e-08, |
|
"logits/chosen": 0.33620771765708923, |
|
"logits/rejected": 0.4521760940551758, |
|
"logps/chosen": -1.2900922298431396, |
|
"logps/rejected": -2.0487289428710938, |
|
"loss": -0.1176, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.2900922298431396, |
|
"rewards/margins": 0.7586369514465332, |
|
"rewards/rejected": -2.0487289428710938, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.9316932740120387, |
|
"grad_norm": 7.908523894571034, |
|
"learning_rate": 8.19951289467482e-09, |
|
"logits/chosen": 0.16882726550102234, |
|
"logits/rejected": 0.34132689237594604, |
|
"logps/chosen": -1.2635581493377686, |
|
"logps/rejected": -2.0731050968170166, |
|
"loss": -0.1061, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -1.2635581493377686, |
|
"rewards/margins": 0.809546947479248, |
|
"rewards/rejected": -2.0731050968170166, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.942161737764983, |
|
"grad_norm": 4.689735375861538, |
|
"learning_rate": 5.84505523733293e-09, |
|
"logits/chosen": 0.29893386363983154, |
|
"logits/rejected": 0.4135954976081848, |
|
"logps/chosen": -1.2849633693695068, |
|
"logps/rejected": -2.084097385406494, |
|
"loss": -0.1116, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.2849633693695068, |
|
"rewards/margins": 0.7991340160369873, |
|
"rewards/rejected": -2.084097385406494, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.9526302015179272, |
|
"grad_norm": 10.72581336853298, |
|
"learning_rate": 3.8849212086261466e-09, |
|
"logits/chosen": 0.24026727676391602, |
|
"logits/rejected": 0.4279538691043854, |
|
"logps/chosen": -1.4815961122512817, |
|
"logps/rejected": -2.078247547149658, |
|
"loss": -0.111, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.4815961122512817, |
|
"rewards/margins": 0.596651554107666, |
|
"rewards/rejected": -2.078247547149658, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.9630986652708715, |
|
"grad_norm": 8.569087978528602, |
|
"learning_rate": 2.3217384276938756e-09, |
|
"logits/chosen": 0.3192306160926819, |
|
"logits/rejected": 0.4579745829105377, |
|
"logps/chosen": -1.2749390602111816, |
|
"logps/rejected": -2.1251206398010254, |
|
"loss": -0.1167, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.2749390602111816, |
|
"rewards/margins": 0.8501815795898438, |
|
"rewards/rejected": -2.1251206398010254, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.9735671290238157, |
|
"grad_norm": 8.07831527929665, |
|
"learning_rate": 1.1576023884836472e-09, |
|
"logits/chosen": 0.14747342467308044, |
|
"logits/rejected": 0.4368831515312195, |
|
"logps/chosen": -1.3681727647781372, |
|
"logps/rejected": -2.154388427734375, |
|
"loss": -0.1145, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.3681727647781372, |
|
"rewards/margins": 0.7862156629562378, |
|
"rewards/rejected": -2.154388427734375, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.98403559277676, |
|
"grad_norm": 10.228166489996983, |
|
"learning_rate": 3.940736506780395e-10, |
|
"logits/chosen": 0.13486842811107635, |
|
"logits/rejected": 0.33838891983032227, |
|
"logps/chosen": -1.2538591623306274, |
|
"logps/rejected": -1.9874036312103271, |
|
"loss": -0.1067, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.2538591623306274, |
|
"rewards/margins": 0.7335445284843445, |
|
"rewards/rejected": -1.9874036312103271, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.9945040565297043, |
|
"grad_norm": 21.73443269439748, |
|
"learning_rate": 3.2175747716822744e-11, |
|
"logits/chosen": 0.1331544667482376, |
|
"logits/rejected": 0.45593690872192383, |
|
"logps/chosen": -1.3638273477554321, |
|
"logps/rejected": -2.1485755443573, |
|
"loss": -0.0976, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -1.3638273477554321, |
|
"rewards/margins": 0.7847483158111572, |
|
"rewards/rejected": -2.1485755443573, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.998691442030882, |
|
"step": 477, |
|
"total_flos": 0.0, |
|
"train_loss": -0.09237800735347676, |
|
"train_runtime": 17432.5225, |
|
"train_samples_per_second": 3.507, |
|
"train_steps_per_second": 0.027 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 477, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 1000000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|