{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.998691442030882, "eval_steps": 500, "global_step": 477, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.010468463752944255, "grad_norm": 10.21879193399377, "learning_rate": 6.25e-08, "logits/chosen": -0.49816179275512695, "logits/rejected": -0.5135830640792847, "logps/chosen": -1.1745846271514893, "logps/rejected": -1.3595685958862305, "loss": -0.035, "rewards/accuracies": 0.5625, "rewards/chosen": -1.1745846271514893, "rewards/margins": 0.18498393893241882, "rewards/rejected": -1.3595685958862305, "step": 5 }, { "epoch": 0.02093692750588851, "grad_norm": 2.7101486132785926, "learning_rate": 1.25e-07, "logits/chosen": -0.5216172933578491, "logits/rejected": -0.4984247088432312, "logps/chosen": -1.1580203771591187, "logps/rejected": -1.262268304824829, "loss": -0.0307, "rewards/accuracies": 0.5062500238418579, "rewards/chosen": -1.1580203771591187, "rewards/margins": 0.1042477935552597, "rewards/rejected": -1.262268304824829, "step": 10 }, { "epoch": 0.031405391258832765, "grad_norm": 5.679326865922718, "learning_rate": 1.875e-07, "logits/chosen": -0.46298742294311523, "logits/rejected": -0.45184773206710815, "logps/chosen": -1.1014564037322998, "logps/rejected": -1.357534408569336, "loss": -0.0466, "rewards/accuracies": 0.65625, "rewards/chosen": -1.1014564037322998, "rewards/margins": 0.2560780644416809, "rewards/rejected": -1.357534408569336, "step": 15 }, { "epoch": 0.04187385501177702, "grad_norm": 3.747855502948124, "learning_rate": 2.5e-07, "logits/chosen": -0.4439946711063385, "logits/rejected": -0.45637279748916626, "logps/chosen": -1.143970251083374, "logps/rejected": -1.249586582183838, "loss": -0.0343, "rewards/accuracies": 0.5625, "rewards/chosen": -1.143970251083374, "rewards/margins": 0.10561631619930267, "rewards/rejected": -1.249586582183838, "step": 20 }, { "epoch": 0.05234231876472128, "grad_norm": 4.753383333287764, "learning_rate": 3.125e-07, "logits/chosen": -0.5120896697044373, "logits/rejected": -0.4878992438316345, "logps/chosen": -1.1282974481582642, "logps/rejected": -1.20094895362854, "loss": -0.0405, "rewards/accuracies": 0.53125, "rewards/chosen": -1.1282974481582642, "rewards/margins": 0.07265140116214752, "rewards/rejected": -1.20094895362854, "step": 25 }, { "epoch": 0.06281078251766553, "grad_norm": 5.921390795122043, "learning_rate": 3.75e-07, "logits/chosen": -0.5101591348648071, "logits/rejected": -0.5148754715919495, "logps/chosen": -1.0519568920135498, "logps/rejected": -1.1866623163223267, "loss": -0.0316, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -1.0519568920135498, "rewards/margins": 0.13470537960529327, "rewards/rejected": -1.1866623163223267, "step": 30 }, { "epoch": 0.07327924627060979, "grad_norm": 2.925534488741588, "learning_rate": 4.3749999999999994e-07, "logits/chosen": -0.523992657661438, "logits/rejected": -0.4819146990776062, "logps/chosen": -1.0300133228302002, "logps/rejected": -1.1558997631072998, "loss": -0.0438, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -1.0300133228302002, "rewards/margins": 0.12588649988174438, "rewards/rejected": -1.1558997631072998, "step": 35 }, { "epoch": 0.08374771002355404, "grad_norm": 2.41534372661926, "learning_rate": 5e-07, "logits/chosen": -0.5426082611083984, "logits/rejected": -0.5616889595985413, "logps/chosen": -0.9607571363449097, "logps/rejected": -1.2749502658843994, "loss": -0.0597, "rewards/accuracies": 0.65625, "rewards/chosen": -0.9607571363449097, "rewards/margins": 0.31419312953948975, "rewards/rejected": -1.2749502658843994, "step": 40 }, { "epoch": 0.0942161737764983, "grad_norm": 3.5651412454593747, "learning_rate": 5.625e-07, "logits/chosen": -0.5252939462661743, "logits/rejected": -0.5452172160148621, "logps/chosen": -0.9730573892593384, "logps/rejected": -1.1473325490951538, "loss": -0.0559, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.9730573892593384, "rewards/margins": 0.174275204539299, "rewards/rejected": -1.1473325490951538, "step": 45 }, { "epoch": 0.10468463752944256, "grad_norm": 4.149376436833766, "learning_rate": 5.999678242522831e-07, "logits/chosen": -0.5748304128646851, "logits/rejected": -0.6049160957336426, "logps/chosen": -0.9965342283248901, "logps/rejected": -1.2540613412857056, "loss": -0.0464, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -0.9965342283248901, "rewards/margins": 0.25752708315849304, "rewards/rejected": -1.2540613412857056, "step": 50 }, { "epoch": 0.11515310128238682, "grad_norm": 2.9691689665037373, "learning_rate": 5.996059263493219e-07, "logits/chosen": -0.5686520338058472, "logits/rejected": -0.5740174055099487, "logps/chosen": -0.9575145840644836, "logps/rejected": -1.1497360467910767, "loss": -0.051, "rewards/accuracies": 0.543749988079071, "rewards/chosen": -0.9575145840644836, "rewards/margins": 0.1922214925289154, "rewards/rejected": -1.1497360467910767, "step": 55 }, { "epoch": 0.12562156503533106, "grad_norm": 2.6253711163069537, "learning_rate": 5.988423976115163e-07, "logits/chosen": -0.6203932762145996, "logits/rejected": -0.633256196975708, "logps/chosen": -1.0102077722549438, "logps/rejected": -1.1339367628097534, "loss": -0.062, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -1.0102077722549438, "rewards/margins": 0.12372910976409912, "rewards/rejected": -1.1339367628097534, "step": 60 }, { "epoch": 0.1360900287882753, "grad_norm": 6.143760308041501, "learning_rate": 5.976782615723061e-07, "logits/chosen": -0.5896092653274536, "logits/rejected": -0.5990695357322693, "logps/chosen": -0.9534575343132019, "logps/rejected": -1.3051955699920654, "loss": -0.0542, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.9534575343132019, "rewards/margins": 0.3517380654811859, "rewards/rejected": -1.3051955699920654, "step": 65 }, { "epoch": 0.14655849254121958, "grad_norm": 2.425535094463044, "learning_rate": 5.961150787913738e-07, "logits/chosen": -0.5713956952095032, "logits/rejected": -0.5825516581535339, "logps/chosen": -0.9968436360359192, "logps/rejected": -1.1801879405975342, "loss": -0.0566, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.9968436360359192, "rewards/margins": 0.18334420025348663, "rewards/rejected": -1.1801879405975342, "step": 70 }, { "epoch": 0.15702695629416383, "grad_norm": 3.936791088850445, "learning_rate": 5.941549447626671e-07, "logits/chosen": -0.5951550006866455, "logits/rejected": -0.601603090763092, "logps/chosen": -0.9617071151733398, "logps/rejected": -1.2262237071990967, "loss": -0.0623, "rewards/accuracies": 0.59375, "rewards/chosen": -0.9617071151733398, "rewards/margins": 0.2645165026187897, "rewards/rejected": -1.2262237071990967, "step": 75 }, { "epoch": 0.16749542004710807, "grad_norm": 3.1037840831495154, "learning_rate": 5.918004871053251e-07, "logits/chosen": -0.6256829500198364, "logits/rejected": -0.6605373620986938, "logps/chosen": -0.9740772247314453, "logps/rejected": -1.2311257123947144, "loss": -0.0545, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.9740772247314453, "rewards/margins": 0.25704842805862427, "rewards/rejected": -1.2311257123947144, "step": 80 }, { "epoch": 0.17796388380005235, "grad_norm": 3.517502431455176, "learning_rate": 5.890548620412763e-07, "logits/chosen": -0.6996882557868958, "logits/rejected": -0.6915110349655151, "logps/chosen": -0.9287127256393433, "logps/rejected": -1.1633974313735962, "loss": -0.0628, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.9287127256393433, "rewards/margins": 0.23468473553657532, "rewards/rejected": -1.1633974313735962, "step": 85 }, { "epoch": 0.1884323475529966, "grad_norm": 3.017999402299648, "learning_rate": 5.859217501642258e-07, "logits/chosen": -0.6648964285850525, "logits/rejected": -0.6696600914001465, "logps/chosen": -1.020616888999939, "logps/rejected": -1.240464210510254, "loss": -0.0681, "rewards/accuracies": 0.625, "rewards/chosen": -1.020616888999939, "rewards/margins": 0.2198474109172821, "rewards/rejected": -1.240464210510254, "step": 90 }, { "epoch": 0.19890081130594087, "grad_norm": 4.972742617412184, "learning_rate": 5.824053515057091e-07, "logits/chosen": -0.6936327815055847, "logits/rejected": -0.6650416254997253, "logps/chosen": -1.010335922241211, "logps/rejected": -1.212616205215454, "loss": -0.0554, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -1.010335922241211, "rewards/margins": 0.20228025317192078, "rewards/rejected": -1.212616205215454, "step": 95 }, { "epoch": 0.2093692750588851, "grad_norm": 4.305605640056122, "learning_rate": 5.785103799048218e-07, "logits/chosen": -0.6884914636611938, "logits/rejected": -0.7089473009109497, "logps/chosen": -1.0421329736709595, "logps/rejected": -1.3643444776535034, "loss": -0.0577, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -1.0421329736709595, "rewards/margins": 0.32221153378486633, "rewards/rejected": -1.3643444776535034, "step": 100 }, { "epoch": 0.21983773881182936, "grad_norm": 4.401763969089556, "learning_rate": 5.742420566891749e-07, "logits/chosen": -0.6878229379653931, "logits/rejected": -0.6928119659423828, "logps/chosen": -0.9930433034896851, "logps/rejected": -1.3710923194885254, "loss": -0.0667, "rewards/accuracies": 0.59375, "rewards/chosen": -0.9930433034896851, "rewards/margins": 0.3780490458011627, "rewards/rejected": -1.3710923194885254, "step": 105 }, { "epoch": 0.23030620256477363, "grad_norm": 6.081502646176958, "learning_rate": 5.696061036755478e-07, "logits/chosen": -0.7217090725898743, "logits/rejected": -0.7287099361419678, "logps/chosen": -1.03863525390625, "logps/rejected": -1.384284257888794, "loss": -0.0744, "rewards/accuracies": 0.65625, "rewards/chosen": -1.03863525390625, "rewards/margins": 0.3456490635871887, "rewards/rejected": -1.384284257888794, "step": 110 }, { "epoch": 0.24077466631771788, "grad_norm": 4.443555154250914, "learning_rate": 5.64608735499618e-07, "logits/chosen": -0.6417717337608337, "logits/rejected": -0.6641663312911987, "logps/chosen": -1.0144702196121216, "logps/rejected": -1.4784486293792725, "loss": -0.0904, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.0144702196121216, "rewards/margins": 0.46397843956947327, "rewards/rejected": -1.4784486293792725, "step": 115 }, { "epoch": 0.2512431300706621, "grad_norm": 4.21476483098945, "learning_rate": 5.592566512850545e-07, "logits/chosen": -0.6441653966903687, "logits/rejected": -0.6482559442520142, "logps/chosen": -1.126665711402893, "logps/rejected": -1.395385980606079, "loss": -0.0685, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -1.126665711402893, "rewards/margins": 0.2687203884124756, "rewards/rejected": -1.395385980606079, "step": 120 }, { "epoch": 0.26171159382360637, "grad_norm": 5.871972559220826, "learning_rate": 5.535570256631384e-07, "logits/chosen": -0.6881023645401001, "logits/rejected": -0.6507277488708496, "logps/chosen": -1.030970811843872, "logps/rejected": -1.3154019117355347, "loss": -0.0735, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -1.030970811843872, "rewards/margins": 0.28443121910095215, "rewards/rejected": -1.3154019117355347, "step": 125 }, { "epoch": 0.2721800575765506, "grad_norm": 3.8967984948431176, "learning_rate": 5.475174991549528e-07, "logits/chosen": -0.6212294697761536, "logits/rejected": -0.6462045907974243, "logps/chosen": -1.055466651916504, "logps/rejected": -1.2383654117584229, "loss": -0.0712, "rewards/accuracies": 0.59375, "rewards/chosen": -1.055466651916504, "rewards/margins": 0.18289880454540253, "rewards/rejected": -1.2383654117584229, "step": 130 }, { "epoch": 0.2826485213294949, "grad_norm": 4.954091803088075, "learning_rate": 5.411461679290317e-07, "logits/chosen": -0.6356550455093384, "logits/rejected": -0.6022456884384155, "logps/chosen": -1.1107438802719116, "logps/rejected": -1.643451452255249, "loss": -0.0826, "rewards/accuracies": 0.75, "rewards/chosen": -1.1107438802719116, "rewards/margins": 0.532707691192627, "rewards/rejected": -1.643451452255249, "step": 135 }, { "epoch": 0.29311698508243916, "grad_norm": 6.396288923391441, "learning_rate": 5.34451572948201e-07, "logits/chosen": -0.5937852263450623, "logits/rejected": -0.5691717863082886, "logps/chosen": -1.0965867042541504, "logps/rejected": -1.5603969097137451, "loss": -0.0915, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.0965867042541504, "rewards/margins": 0.4638102948665619, "rewards/rejected": -1.5603969097137451, "step": 140 }, { "epoch": 0.3035854488353834, "grad_norm": 4.065147326048241, "learning_rate": 5.274426885201582e-07, "logits/chosen": -0.5843586921691895, "logits/rejected": -0.5910850763320923, "logps/chosen": -1.1310163736343384, "logps/rejected": -1.4976770877838135, "loss": -0.0849, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.1310163736343384, "rewards/margins": 0.3666609227657318, "rewards/rejected": -1.4976770877838135, "step": 145 }, { "epoch": 0.31405391258832765, "grad_norm": 4.014586941737209, "learning_rate": 5.201289102671411e-07, "logits/chosen": -0.546478271484375, "logits/rejected": -0.5195995569229126, "logps/chosen": -1.0407493114471436, "logps/rejected": -1.436926245689392, "loss": -0.0898, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -1.0407493114471436, "rewards/margins": 0.3961769640445709, "rewards/rejected": -1.436926245689392, "step": 150 }, { "epoch": 0.3245223763412719, "grad_norm": 6.577906404260762, "learning_rate": 5.12520042530811e-07, "logits/chosen": -0.5657233595848083, "logits/rejected": -0.5251718163490295, "logps/chosen": -1.1016614437103271, "logps/rejected": -1.4624617099761963, "loss": -0.0923, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -1.1016614437103271, "rewards/margins": 0.36080020666122437, "rewards/rejected": -1.4624617099761963, "step": 155 }, { "epoch": 0.33499084009421615, "grad_norm": 5.297524478469823, "learning_rate": 5.046262852292346e-07, "logits/chosen": -0.4731084704399109, "logits/rejected": -0.4319906234741211, "logps/chosen": -1.1443315744400024, "logps/rejected": -1.5339367389678955, "loss": -0.0887, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": -1.1443315744400024, "rewards/margins": 0.38960522413253784, "rewards/rejected": -1.5339367389678955, "step": 160 }, { "epoch": 0.34545930384716045, "grad_norm": 6.8937105755776065, "learning_rate": 4.964582201835856e-07, "logits/chosen": -0.4270719885826111, "logits/rejected": -0.3794442415237427, "logps/chosen": -1.1176317930221558, "logps/rejected": -1.639172911643982, "loss": -0.0965, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -1.1176317930221558, "rewards/margins": 0.5215411186218262, "rewards/rejected": -1.639172911643982, "step": 165 }, { "epoch": 0.3559277676001047, "grad_norm": 4.460892002893491, "learning_rate": 4.880267969328908e-07, "logits/chosen": -0.32646840810775757, "logits/rejected": -0.22941944003105164, "logps/chosen": -1.2465507984161377, "logps/rejected": -1.7467823028564453, "loss": -0.0851, "rewards/accuracies": 0.65625, "rewards/chosen": -1.2465507984161377, "rewards/margins": 0.5002316236495972, "rewards/rejected": -1.7467823028564453, "step": 170 }, { "epoch": 0.36639623135304894, "grad_norm": 10.536194204055317, "learning_rate": 4.793433180558423e-07, "logits/chosen": -0.2930789887905121, "logits/rejected": -0.15167564153671265, "logps/chosen": -1.1869597434997559, "logps/rejected": -1.7165695428848267, "loss": -0.088, "rewards/accuracies": 0.65625, "rewards/chosen": -1.1869597434997559, "rewards/margins": 0.5296097993850708, "rewards/rejected": -1.7165695428848267, "step": 175 }, { "epoch": 0.3768646951059932, "grad_norm": 14.792822735814111, "learning_rate": 4.704194240193467e-07, "logits/chosen": -0.14493033289909363, "logits/rejected": -0.08077652007341385, "logps/chosen": -1.1866719722747803, "logps/rejected": -1.724832534790039, "loss": -0.0932, "rewards/accuracies": 0.625, "rewards/chosen": -1.1866719722747803, "rewards/margins": 0.5381606221199036, "rewards/rejected": -1.724832534790039, "step": 180 }, { "epoch": 0.38733315885893743, "grad_norm": 5.012043699075584, "learning_rate": 4.6126707757412686e-07, "logits/chosen": -0.19216454029083252, "logits/rejected": -0.06508170068264008, "logps/chosen": -1.2008949518203735, "logps/rejected": -2.0086560249328613, "loss": -0.1086, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -1.2008949518203735, "rewards/margins": 0.807761013507843, "rewards/rejected": -2.0086560249328613, "step": 185 }, { "epoch": 0.39780162261188173, "grad_norm": 6.108194509753534, "learning_rate": 4.5189854771829086e-07, "logits/chosen": -0.14359740912914276, "logits/rejected": -0.04716472700238228, "logps/chosen": -1.2239508628845215, "logps/rejected": -1.738565444946289, "loss": -0.0845, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": -1.2239508628845215, "rewards/margins": 0.5146147012710571, "rewards/rejected": -1.738565444946289, "step": 190 }, { "epoch": 0.408270086364826, "grad_norm": 12.413660235408443, "learning_rate": 4.4232639325036807e-07, "logits/chosen": -0.16371646523475647, "logits/rejected": -0.08154813945293427, "logps/chosen": -1.3641008138656616, "logps/rejected": -1.8681023120880127, "loss": -0.0883, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -1.3641008138656616, "rewards/margins": 0.5040014386177063, "rewards/rejected": -1.8681023120880127, "step": 195 }, { "epoch": 0.4187385501177702, "grad_norm": 5.664592254947892, "learning_rate": 4.32563445933859e-07, "logits/chosen": -0.3630138039588928, "logits/rejected": -0.31867215037345886, "logps/chosen": -1.2496740818023682, "logps/rejected": -1.7695592641830444, "loss": -0.0991, "rewards/accuracies": 0.643750011920929, "rewards/chosen": -1.2496740818023682, "rewards/margins": 0.5198851823806763, "rewards/rejected": -1.7695592641830444, "step": 200 }, { "epoch": 0.42920701387071447, "grad_norm": 8.615954171040675, "learning_rate": 4.226227932958664e-07, "logits/chosen": -0.2460884302854538, "logits/rejected": -0.1599723994731903, "logps/chosen": -1.0948972702026367, "logps/rejected": -1.7847169637680054, "loss": -0.1097, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.0948972702026367, "rewards/margins": 0.6898195743560791, "rewards/rejected": -1.7847169637680054, "step": 205 }, { "epoch": 0.4396754776236587, "grad_norm": 15.728835997323346, "learning_rate": 4.1251776108286854e-07, "logits/chosen": -0.16430199146270752, "logits/rejected": -0.09520161151885986, "logps/chosen": -1.3307850360870361, "logps/rejected": -1.7031431198120117, "loss": -0.0888, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": -1.3307850360870361, "rewards/margins": 0.37235820293426514, "rewards/rejected": -1.7031431198120117, "step": 210 }, { "epoch": 0.45014394137660296, "grad_norm": 9.404924688577674, "learning_rate": 4.022618953971514e-07, "logits/chosen": -0.11717267334461212, "logits/rejected": -0.0059516532346606255, "logps/chosen": -1.2097961902618408, "logps/rejected": -1.8957335948944092, "loss": -0.1077, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -1.2097961902618408, "rewards/margins": 0.6859374046325684, "rewards/rejected": -1.8957335948944092, "step": 215 }, { "epoch": 0.46061240512954726, "grad_norm": 7.3279364906626485, "learning_rate": 3.918689445378477e-07, "logits/chosen": -0.09480677545070648, "logits/rejected": 0.13137385249137878, "logps/chosen": -1.2646963596343994, "logps/rejected": -1.9554264545440674, "loss": -0.0804, "rewards/accuracies": 0.643750011920929, "rewards/chosen": -1.2646963596343994, "rewards/margins": 0.690730094909668, "rewards/rejected": -1.9554264545440674, "step": 220 }, { "epoch": 0.4710808688824915, "grad_norm": 5.249390449834869, "learning_rate": 3.813528405709251e-07, "logits/chosen": 0.017690464854240417, "logits/rejected": 0.24274654686450958, "logps/chosen": -1.2406516075134277, "logps/rejected": -2.0346226692199707, "loss": -0.0963, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.2406516075134277, "rewards/margins": 0.7939712405204773, "rewards/rejected": -2.0346226692199707, "step": 225 }, { "epoch": 0.48154933263543576, "grad_norm": 5.840384716420158, "learning_rate": 3.707276806528282e-07, "logits/chosen": -0.034771956503391266, "logits/rejected": 0.25075453519821167, "logps/chosen": -1.307732105255127, "logps/rejected": -2.3069005012512207, "loss": -0.1106, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -1.307732105255127, "rewards/margins": 0.9991682171821594, "rewards/rejected": -2.3069005012512207, "step": 230 }, { "epoch": 0.49201779638838, "grad_norm": 5.708169285500334, "learning_rate": 3.6000770813281334e-07, "logits/chosen": -0.04531233012676239, "logits/rejected": 0.1216268315911293, "logps/chosen": -1.0852802991867065, "logps/rejected": -1.7644765377044678, "loss": -0.1067, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -1.0852802991867065, "rewards/margins": 0.6791960597038269, "rewards/rejected": -1.7644765377044678, "step": 235 }, { "epoch": 0.5024862601413242, "grad_norm": 5.2328738831340615, "learning_rate": 3.4920729345930654e-07, "logits/chosen": -0.15298782289028168, "logits/rejected": -0.021981507539749146, "logps/chosen": -1.181814432144165, "logps/rejected": -1.84041428565979, "loss": -0.0907, "rewards/accuracies": 0.643750011920929, "rewards/chosen": -1.181814432144165, "rewards/margins": 0.6585996747016907, "rewards/rejected": -1.84041428565979, "step": 240 }, { "epoch": 0.5129547238942685, "grad_norm": 4.454620572722302, "learning_rate": 3.383409149158814e-07, "logits/chosen": -0.14406219124794006, "logits/rejected": 0.021218538284301758, "logps/chosen": -1.3196625709533691, "logps/rejected": -1.8837369680404663, "loss": -0.1005, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -1.3196625709533691, "rewards/margins": 0.5640743970870972, "rewards/rejected": -1.8837369680404663, "step": 245 }, { "epoch": 0.5234231876472127, "grad_norm": 16.864698084247127, "learning_rate": 3.2742313921268035e-07, "logits/chosen": -0.04993357136845589, "logits/rejected": 0.09763963520526886, "logps/chosen": -1.1896789073944092, "logps/rejected": -1.9447057247161865, "loss": -0.1026, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -1.1896789073944092, "rewards/margins": 0.7550268769264221, "rewards/rejected": -1.9447057247161865, "step": 250 }, { "epoch": 0.533891651400157, "grad_norm": 4.006389891258429, "learning_rate": 3.1646860195929825e-07, "logits/chosen": 0.054258793592453, "logits/rejected": 0.3202013075351715, "logps/chosen": -1.37346613407135, "logps/rejected": -2.1130452156066895, "loss": -0.1166, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.37346613407135, "rewards/margins": 0.7395793199539185, "rewards/rejected": -2.1130452156066895, "step": 255 }, { "epoch": 0.5443601151531012, "grad_norm": 11.296019715540536, "learning_rate": 3.054919880453032e-07, "logits/chosen": 0.08209206908941269, "logits/rejected": 0.305607408285141, "logps/chosen": -1.1226253509521484, "logps/rejected": -2.1423118114471436, "loss": -0.1085, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.1226253509521484, "rewards/margins": 1.0196868181228638, "rewards/rejected": -2.1423118114471436, "step": 260 }, { "epoch": 0.5548285789060455, "grad_norm": 12.257070737010752, "learning_rate": 2.9450801195469686e-07, "logits/chosen": 0.13827550411224365, "logits/rejected": 0.33571916818618774, "logps/chosen": -1.2839715480804443, "logps/rejected": -1.8086509704589844, "loss": -0.1095, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.2839715480804443, "rewards/margins": 0.5246793031692505, "rewards/rejected": -1.8086509704589844, "step": 265 }, { "epoch": 0.5652970426589898, "grad_norm": 7.650172024925882, "learning_rate": 2.835313980407017e-07, "logits/chosen": 0.2891995906829834, "logits/rejected": 0.4797445237636566, "logps/chosen": -1.3987174034118652, "logps/rejected": -1.9334684610366821, "loss": -0.0939, "rewards/accuracies": 0.6875, "rewards/chosen": -1.3987174034118652, "rewards/margins": 0.5347510576248169, "rewards/rejected": -1.9334684610366821, "step": 270 }, { "epoch": 0.575765506411934, "grad_norm": 9.750902438039956, "learning_rate": 2.7257686078731973e-07, "logits/chosen": 0.3225773572921753, "logits/rejected": 0.6765245795249939, "logps/chosen": -1.2831330299377441, "logps/rejected": -2.208591938018799, "loss": -0.111, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -1.2831330299377441, "rewards/margins": 0.9254589080810547, "rewards/rejected": -2.208591938018799, "step": 275 }, { "epoch": 0.5862339701648783, "grad_norm": 7.363369128681095, "learning_rate": 2.6165908508411857e-07, "logits/chosen": 0.29373425245285034, "logits/rejected": 0.5836144685745239, "logps/chosen": -1.2897415161132812, "logps/rejected": -1.9501053094863892, "loss": -0.1031, "rewards/accuracies": 0.6875, "rewards/chosen": -1.2897415161132812, "rewards/margins": 0.6603637933731079, "rewards/rejected": -1.9501053094863892, "step": 280 }, { "epoch": 0.5967024339178225, "grad_norm": 13.183162679494803, "learning_rate": 2.5079270654069354e-07, "logits/chosen": 0.18902722001075745, "logits/rejected": 0.29711785912513733, "logps/chosen": -1.2677323818206787, "logps/rejected": -2.1915152072906494, "loss": -0.1109, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.2677323818206787, "rewards/margins": 0.9237826466560364, "rewards/rejected": -2.1915152072906494, "step": 285 }, { "epoch": 0.6071708976707668, "grad_norm": 6.687429291622309, "learning_rate": 2.399922918671867e-07, "logits/chosen": 0.022776653990149498, "logits/rejected": 0.2391856163740158, "logps/chosen": -1.3181854486465454, "logps/rejected": -2.3116376399993896, "loss": -0.116, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -1.3181854486465454, "rewards/margins": 0.993452250957489, "rewards/rejected": -2.3116376399993896, "step": 290 }, { "epoch": 0.6176393614237111, "grad_norm": 5.281502633271325, "learning_rate": 2.2927231934717176e-07, "logits/chosen": -0.0016543983947485685, "logits/rejected": 0.17889878153800964, "logps/chosen": -1.3387067317962646, "logps/rejected": -2.2106659412384033, "loss": -0.1028, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -1.3387067317962646, "rewards/margins": 0.8719590902328491, "rewards/rejected": -2.2106659412384033, "step": 295 }, { "epoch": 0.6281078251766553, "grad_norm": 7.373204926178573, "learning_rate": 2.1864715942907487e-07, "logits/chosen": -0.007572178728878498, "logits/rejected": 0.14625847339630127, "logps/chosen": -1.4602124691009521, "logps/rejected": -2.1073670387268066, "loss": -0.1046, "rewards/accuracies": 0.65625, "rewards/chosen": -1.4602124691009521, "rewards/margins": 0.6471549272537231, "rewards/rejected": -2.1073670387268066, "step": 300 }, { "epoch": 0.6385762889295996, "grad_norm": 4.736107509318446, "learning_rate": 2.081310554621522e-07, "logits/chosen": 0.19010517001152039, "logits/rejected": 0.3840242624282837, "logps/chosen": -1.4273982048034668, "logps/rejected": -2.3235268592834473, "loss": -0.1246, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -1.4273982048034668, "rewards/margins": 0.89612877368927, "rewards/rejected": -2.3235268592834473, "step": 305 }, { "epoch": 0.6490447526825438, "grad_norm": 23.409368660389333, "learning_rate": 1.9773810460284862e-07, "logits/chosen": 0.2859404981136322, "logits/rejected": 0.3260739743709564, "logps/chosen": -1.2994334697723389, "logps/rejected": -2.061152935028076, "loss": -0.1079, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -1.2994334697723389, "rewards/margins": 0.7617196440696716, "rewards/rejected": -2.061152935028076, "step": 310 }, { "epoch": 0.6595132164354881, "grad_norm": 22.542628144583606, "learning_rate": 1.874822389171314e-07, "logits/chosen": 0.22070816159248352, "logits/rejected": 0.5137720704078674, "logps/chosen": -1.2794992923736572, "logps/rejected": -2.3934812545776367, "loss": -0.1217, "rewards/accuracies": 0.75, "rewards/chosen": -1.2794992923736572, "rewards/margins": 1.11398184299469, "rewards/rejected": -2.3934812545776367, "step": 315 }, { "epoch": 0.6699816801884323, "grad_norm": 9.283703258871379, "learning_rate": 1.7737720670413356e-07, "logits/chosen": 0.24648892879486084, "logits/rejected": 0.33991724252700806, "logps/chosen": -1.365140676498413, "logps/rejected": -2.166224241256714, "loss": -0.1155, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -1.365140676498413, "rewards/margins": 0.8010835647583008, "rewards/rejected": -2.166224241256714, "step": 320 }, { "epoch": 0.6804501439413766, "grad_norm": 7.959464499237016, "learning_rate": 1.6743655406614095e-07, "logits/chosen": 0.14846596121788025, "logits/rejected": 0.3873682916164398, "logps/chosen": -1.3291183710098267, "logps/rejected": -2.1744306087493896, "loss": -0.1209, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.3291183710098267, "rewards/margins": 0.845312237739563, "rewards/rejected": -2.1744306087493896, "step": 325 }, { "epoch": 0.6909186076943209, "grad_norm": 10.067079168168558, "learning_rate": 1.5767360674963198e-07, "logits/chosen": 0.08330237120389938, "logits/rejected": 0.29010123014450073, "logps/chosen": -1.225388765335083, "logps/rejected": -1.9910337924957275, "loss": -0.1019, "rewards/accuracies": 0.643750011920929, "rewards/chosen": -1.225388765335083, "rewards/margins": 0.7656451463699341, "rewards/rejected": -1.9910337924957275, "step": 330 }, { "epoch": 0.7013870714472651, "grad_norm": 9.133946690623846, "learning_rate": 1.4810145228170922e-07, "logits/chosen": -0.036666952073574066, "logits/rejected": 0.15409226715564728, "logps/chosen": -1.2429730892181396, "logps/rejected": -1.8629066944122314, "loss": -0.0986, "rewards/accuracies": 0.65625, "rewards/chosen": -1.2429730892181396, "rewards/margins": 0.6199334859848022, "rewards/rejected": -1.8629066944122314, "step": 335 }, { "epoch": 0.7118555352002094, "grad_norm": 10.242157594508342, "learning_rate": 1.3873292242587306e-07, "logits/chosen": 0.028749173507094383, "logits/rejected": 0.23107752203941345, "logps/chosen": -1.430831789970398, "logps/rejected": -2.205451488494873, "loss": -0.1094, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -1.430831789970398, "rewards/margins": 0.7746195197105408, "rewards/rejected": -2.205451488494873, "step": 340 }, { "epoch": 0.7223239989531536, "grad_norm": 5.706648979502026, "learning_rate": 1.295805759806533e-07, "logits/chosen": 0.007116416003555059, "logits/rejected": 0.24577870965003967, "logps/chosen": -1.297646164894104, "logps/rejected": -2.2263598442077637, "loss": -0.1067, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.297646164894104, "rewards/margins": 0.9287137985229492, "rewards/rejected": -2.2263598442077637, "step": 345 }, { "epoch": 0.7327924627060979, "grad_norm": 7.792589886443301, "learning_rate": 1.2065668194415777e-07, "logits/chosen": 0.1685815453529358, "logits/rejected": 0.2779741585254669, "logps/chosen": -1.3478834629058838, "logps/rejected": -2.127234935760498, "loss": -0.1044, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -1.3478834629058838, "rewards/margins": 0.7793514132499695, "rewards/rejected": -2.127234935760498, "step": 350 }, { "epoch": 0.7432609264590422, "grad_norm": 8.782679171257993, "learning_rate": 1.1197320306710923e-07, "logits/chosen": 0.1377880573272705, "logits/rejected": 0.32959434390068054, "logps/chosen": -1.1817530393600464, "logps/rejected": -2.1875174045562744, "loss": -0.1184, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.1817530393600464, "rewards/margins": 1.0057642459869385, "rewards/rejected": -2.1875174045562744, "step": 355 }, { "epoch": 0.7537293902119864, "grad_norm": 6.862511018163533, "learning_rate": 1.035417798164145e-07, "logits/chosen": 0.01301775686442852, "logits/rejected": 0.2229972630739212, "logps/chosen": -1.1256128549575806, "logps/rejected": -1.9575172662734985, "loss": -0.1212, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.1256128549575806, "rewards/margins": 0.8319045901298523, "rewards/rejected": -1.9575172662734985, "step": 360 }, { "epoch": 0.7641978539649307, "grad_norm": 9.505229420957962, "learning_rate": 9.537371477076535e-08, "logits/chosen": 0.11367920786142349, "logits/rejected": 0.356434166431427, "logps/chosen": -1.4002251625061035, "logps/rejected": -2.392739772796631, "loss": -0.1187, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.4002251625061035, "rewards/margins": 0.9925147294998169, "rewards/rejected": -2.392739772796631, "step": 365 }, { "epoch": 0.7746663177178749, "grad_norm": 10.965226669329015, "learning_rate": 8.747995746918898e-08, "logits/chosen": 0.20804345607757568, "logits/rejected": 0.45443806052207947, "logps/chosen": -1.363114356994629, "logps/rejected": -2.378051280975342, "loss": -0.1203, "rewards/accuracies": 0.6875, "rewards/chosen": -1.363114356994629, "rewards/margins": 1.0149368047714233, "rewards/rejected": -2.378051280975342, "step": 370 }, { "epoch": 0.7851347814708192, "grad_norm": 6.16813334899156, "learning_rate": 7.987108973285888e-08, "logits/chosen": 0.26107341051101685, "logits/rejected": 0.22848454117774963, "logps/chosen": -1.387460708618164, "logps/rejected": -2.197563886642456, "loss": -0.1101, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -1.387460708618164, "rewards/margins": 0.8101032972335815, "rewards/rejected": -2.197563886642456, "step": 375 }, { "epoch": 0.7956032452237635, "grad_norm": 12.918343537826871, "learning_rate": 7.255731147984174e-08, "logits/chosen": 0.22713570296764374, "logits/rejected": 0.3289734423160553, "logps/chosen": -1.4456373453140259, "logps/rejected": -2.084005832672119, "loss": -0.117, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -1.4456373453140259, "rewards/margins": 0.638368546962738, "rewards/rejected": -2.084005832672119, "step": 380 }, { "epoch": 0.8060717089767077, "grad_norm": 6.060272813011622, "learning_rate": 6.554842705179898e-08, "logits/chosen": 0.12520398199558258, "logits/rejected": 0.29297417402267456, "logps/chosen": -1.1945627927780151, "logps/rejected": -2.0077576637268066, "loss": -0.1195, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -1.1945627927780151, "rewards/margins": 0.8131949305534363, "rewards/rejected": -2.0077576637268066, "step": 385 }, { "epoch": 0.816540172729652, "grad_norm": 17.713346450689524, "learning_rate": 5.885383207096832e-08, "logits/chosen": 0.13393369317054749, "logits/rejected": 0.3967200517654419, "logps/chosen": -1.2682616710662842, "logps/rejected": -2.303426504135132, "loss": -0.1115, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -1.2682616710662842, "rewards/margins": 1.0351645946502686, "rewards/rejected": -2.303426504135132, "step": 390 }, { "epoch": 0.8270086364825961, "grad_norm": 6.0463426455293385, "learning_rate": 5.2482500845047165e-08, "logits/chosen": 0.09920735657215118, "logits/rejected": 0.4265053868293762, "logps/chosen": -1.2478829622268677, "logps/rejected": -2.2664952278137207, "loss": -0.1059, "rewards/accuracies": 0.71875, "rewards/chosen": -1.2478829622268677, "rewards/margins": 1.018612027168274, "rewards/rejected": -2.2664952278137207, "step": 395 }, { "epoch": 0.8374771002355405, "grad_norm": 7.849951661296689, "learning_rate": 4.644297433686162e-08, "logits/chosen": 0.20306222140789032, "logits/rejected": 0.37799519300460815, "logps/chosen": -1.167974591255188, "logps/rejected": -2.061584949493408, "loss": -0.1211, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -1.167974591255188, "rewards/margins": 0.8936103582382202, "rewards/rejected": -2.061584949493408, "step": 400 }, { "epoch": 0.8479455639884846, "grad_norm": 9.305371166707902, "learning_rate": 4.074334871494558e-08, "logits/chosen": 0.12849920988082886, "logits/rejected": 0.27528905868530273, "logps/chosen": -1.4915854930877686, "logps/rejected": -2.3600451946258545, "loss": -0.0987, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": -1.4915854930877686, "rewards/margins": 0.8684597015380859, "rewards/rejected": -2.3600451946258545, "step": 405 }, { "epoch": 0.8584140277414289, "grad_norm": 7.721976646915507, "learning_rate": 3.5391264500382e-08, "logits/chosen": 0.15187588334083557, "logits/rejected": 0.3307904601097107, "logps/chosen": -1.2111918926239014, "logps/rejected": -2.036677837371826, "loss": -0.1121, "rewards/accuracies": 0.625, "rewards/chosen": -1.2111918926239014, "rewards/margins": 0.8254860043525696, "rewards/rejected": -2.036677837371826, "step": 410 }, { "epoch": 0.8688824914943732, "grad_norm": 7.610516456597608, "learning_rate": 3.0393896324452226e-08, "logits/chosen": 0.14874114096164703, "logits/rejected": 0.44959506392478943, "logps/chosen": -1.3082196712493896, "logps/rejected": -2.282323122024536, "loss": -0.1227, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.3082196712493896, "rewards/margins": 0.9741032719612122, "rewards/rejected": -2.282323122024536, "step": 415 }, { "epoch": 0.8793509552473174, "grad_norm": 7.385339190408951, "learning_rate": 2.5757943310825026e-08, "logits/chosen": 0.149868905544281, "logits/rejected": 0.3381766676902771, "logps/chosen": -1.2985490560531616, "logps/rejected": -2.2531745433807373, "loss": -0.1037, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": -1.2985490560531616, "rewards/margins": 0.9546254873275757, "rewards/rejected": -2.2531745433807373, "step": 420 }, { "epoch": 0.8898194190002617, "grad_norm": 5.355483863860493, "learning_rate": 2.148962009517823e-08, "logits/chosen": 0.22056706249713898, "logits/rejected": 0.38606297969818115, "logps/chosen": -1.380305528640747, "logps/rejected": -2.1738598346710205, "loss": -0.1192, "rewards/accuracies": 0.625, "rewards/chosen": -1.380305528640747, "rewards/margins": 0.793554425239563, "rewards/rejected": -2.1738598346710205, "step": 425 }, { "epoch": 0.9002878827532059, "grad_norm": 10.975418852072519, "learning_rate": 1.759464849429082e-08, "logits/chosen": 0.17666544020175934, "logits/rejected": 0.34687134623527527, "logps/chosen": -1.3843985795974731, "logps/rejected": -2.1382744312286377, "loss": -0.1132, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.3843985795974731, "rewards/margins": 0.7538760304450989, "rewards/rejected": -2.1382744312286377, "step": 430 }, { "epoch": 0.9107563465061502, "grad_norm": 8.94416140345287, "learning_rate": 1.4078249835774169e-08, "logits/chosen": 0.16215559840202332, "logits/rejected": 0.30989065766334534, "logps/chosen": -1.3915765285491943, "logps/rejected": -2.4042232036590576, "loss": -0.1263, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.3915765285491943, "rewards/margins": 1.0126469135284424, "rewards/rejected": -2.4042232036590576, "step": 435 }, { "epoch": 0.9212248102590945, "grad_norm": 9.337983704389515, "learning_rate": 1.0945137958723705e-08, "logits/chosen": 0.33620771765708923, "logits/rejected": 0.4521760940551758, "logps/chosen": -1.2900922298431396, "logps/rejected": -2.0487289428710938, "loss": -0.1176, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.2900922298431396, "rewards/margins": 0.7586369514465332, "rewards/rejected": -2.0487289428710938, "step": 440 }, { "epoch": 0.9316932740120387, "grad_norm": 7.908523894571034, "learning_rate": 8.19951289467482e-09, "logits/chosen": 0.16882726550102234, "logits/rejected": 0.34132689237594604, "logps/chosen": -1.2635581493377686, "logps/rejected": -2.0731050968170166, "loss": -0.1061, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -1.2635581493377686, "rewards/margins": 0.809546947479248, "rewards/rejected": -2.0731050968170166, "step": 445 }, { "epoch": 0.942161737764983, "grad_norm": 4.689735375861538, "learning_rate": 5.84505523733293e-09, "logits/chosen": 0.29893386363983154, "logits/rejected": 0.4135954976081848, "logps/chosen": -1.2849633693695068, "logps/rejected": -2.084097385406494, "loss": -0.1116, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -1.2849633693695068, "rewards/margins": 0.7991340160369873, "rewards/rejected": -2.084097385406494, "step": 450 }, { "epoch": 0.9526302015179272, "grad_norm": 10.72581336853298, "learning_rate": 3.8849212086261466e-09, "logits/chosen": 0.24026727676391602, "logits/rejected": 0.4279538691043854, "logps/chosen": -1.4815961122512817, "logps/rejected": -2.078247547149658, "loss": -0.111, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.4815961122512817, "rewards/margins": 0.596651554107666, "rewards/rejected": -2.078247547149658, "step": 455 }, { "epoch": 0.9630986652708715, "grad_norm": 8.569087978528602, "learning_rate": 2.3217384276938756e-09, "logits/chosen": 0.3192306160926819, "logits/rejected": 0.4579745829105377, "logps/chosen": -1.2749390602111816, "logps/rejected": -2.1251206398010254, "loss": -0.1167, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.2749390602111816, "rewards/margins": 0.8501815795898438, "rewards/rejected": -2.1251206398010254, "step": 460 }, { "epoch": 0.9735671290238157, "grad_norm": 8.07831527929665, "learning_rate": 1.1576023884836472e-09, "logits/chosen": 0.14747342467308044, "logits/rejected": 0.4368831515312195, "logps/chosen": -1.3681727647781372, "logps/rejected": -2.154388427734375, "loss": -0.1145, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -1.3681727647781372, "rewards/margins": 0.7862156629562378, "rewards/rejected": -2.154388427734375, "step": 465 }, { "epoch": 0.98403559277676, "grad_norm": 10.228166489996983, "learning_rate": 3.940736506780395e-10, "logits/chosen": 0.13486842811107635, "logits/rejected": 0.33838891983032227, "logps/chosen": -1.2538591623306274, "logps/rejected": -1.9874036312103271, "loss": -0.1067, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -1.2538591623306274, "rewards/margins": 0.7335445284843445, "rewards/rejected": -1.9874036312103271, "step": 470 }, { "epoch": 0.9945040565297043, "grad_norm": 21.73443269439748, "learning_rate": 3.2175747716822744e-11, "logits/chosen": 0.1331544667482376, "logits/rejected": 0.45593690872192383, "logps/chosen": -1.3638273477554321, "logps/rejected": -2.1485755443573, "loss": -0.0976, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -1.3638273477554321, "rewards/margins": 0.7847483158111572, "rewards/rejected": -2.1485755443573, "step": 475 }, { "epoch": 0.998691442030882, "step": 477, "total_flos": 0.0, "train_loss": -0.09237800735347676, "train_runtime": 17432.5225, "train_samples_per_second": 3.507, "train_steps_per_second": 0.027 } ], "logging_steps": 5, "max_steps": 477, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1000000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }