{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9982631930527722, "eval_steps": 400, "global_step": 467, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01068804275217101, "grad_norm": 7.822805657905783, "learning_rate": 6.382978723404255e-08, "logits/chosen": 0.06214674562215805, "logits/rejected": 0.03797388821840286, "logps/chosen": -0.2699491083621979, "logps/rejected": -0.26826155185699463, "loss": 1.2748, "rewards/accuracies": 0.4937500059604645, "rewards/chosen": -0.2699491083621979, "rewards/margins": -0.0016875670989975333, "rewards/rejected": -0.26826155185699463, "step": 5 }, { "epoch": 0.02137608550434202, "grad_norm": 5.0967725327137074, "learning_rate": 1.276595744680851e-07, "logits/chosen": -0.010526341386139393, "logits/rejected": -0.012353870086371899, "logps/chosen": -0.2696549892425537, "logps/rejected": -0.2676162123680115, "loss": 1.2725, "rewards/accuracies": 0.48750001192092896, "rewards/chosen": -0.2696549892425537, "rewards/margins": -0.0020388036500662565, "rewards/rejected": -0.2676162123680115, "step": 10 }, { "epoch": 0.03206412825651302, "grad_norm": 6.6390016305878055, "learning_rate": 1.9148936170212767e-07, "logits/chosen": 0.0009885445469990373, "logits/rejected": 0.00387256289832294, "logps/chosen": -0.2789618670940399, "logps/rejected": -0.2836909592151642, "loss": 1.2796, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.2789618670940399, "rewards/margins": 0.00472906231880188, "rewards/rejected": -0.2836909592151642, "step": 15 }, { "epoch": 0.04275217100868404, "grad_norm": 9.068174455913743, "learning_rate": 2.553191489361702e-07, "logits/chosen": -0.06325958669185638, "logits/rejected": -0.06925094127655029, "logps/chosen": -0.2819739878177643, "logps/rejected": -0.2899174988269806, "loss": 1.2759, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.2819739878177643, "rewards/margins": 0.007943493314087391, "rewards/rejected": -0.2899174988269806, "step": 20 }, { "epoch": 0.053440213760855046, "grad_norm": 4.867685031446897, "learning_rate": 3.1914893617021275e-07, "logits/chosen": -0.06868849694728851, "logits/rejected": -0.04817543178796768, "logps/chosen": -0.25565916299819946, "logps/rejected": -0.2749556303024292, "loss": 1.2567, "rewards/accuracies": 0.5625, "rewards/chosen": -0.25565916299819946, "rewards/margins": 0.01929648406803608, "rewards/rejected": -0.2749556303024292, "step": 25 }, { "epoch": 0.06412825651302605, "grad_norm": 4.806810486248379, "learning_rate": 3.8297872340425535e-07, "logits/chosen": -0.014168953523039818, "logits/rejected": -0.00634436309337616, "logps/chosen": -0.2789873480796814, "logps/rejected": -0.2939203977584839, "loss": 1.2769, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.2789873480796814, "rewards/margins": 0.01493304967880249, "rewards/rejected": -0.2939203977584839, "step": 30 }, { "epoch": 0.07481629926519706, "grad_norm": 7.01406965287447, "learning_rate": 4.4680851063829783e-07, "logits/chosen": -0.029415354132652283, "logits/rejected": -0.009010488167405128, "logps/chosen": -0.2785240411758423, "logps/rejected": -0.29580387473106384, "loss": 1.2752, "rewards/accuracies": 0.518750011920929, "rewards/chosen": -0.2785240411758423, "rewards/margins": 0.01727980747818947, "rewards/rejected": -0.29580387473106384, "step": 35 }, { "epoch": 0.08550434201736808, "grad_norm": 5.373513139182376, "learning_rate": 5.106382978723404e-07, "logits/chosen": -0.06608792394399643, "logits/rejected": -0.07190172374248505, "logps/chosen": -0.26092082262039185, "logps/rejected": -0.2700851261615753, "loss": 1.2674, "rewards/accuracies": 0.518750011920929, "rewards/chosen": -0.26092082262039185, "rewards/margins": 0.009164294227957726, "rewards/rejected": -0.2700851261615753, "step": 40 }, { "epoch": 0.09619238476953908, "grad_norm": 5.609168218812681, "learning_rate": 5.74468085106383e-07, "logits/chosen": -0.05338377505540848, "logits/rejected": -0.01094720046967268, "logps/chosen": -0.2855256199836731, "logps/rejected": -0.28623315691947937, "loss": 1.2738, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.2855256199836731, "rewards/margins": 0.0007075363537296653, "rewards/rejected": -0.28623315691947937, "step": 45 }, { "epoch": 0.10688042752171009, "grad_norm": 4.336678776172025, "learning_rate": 5.999244704827519e-07, "logits/chosen": -0.017125016078352928, "logits/rejected": -0.004308671224862337, "logps/chosen": -0.28561219573020935, "logps/rejected": -0.299736350774765, "loss": 1.2688, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.28561219573020935, "rewards/margins": 0.014124127104878426, "rewards/rejected": -0.299736350774765, "step": 50 }, { "epoch": 0.11756847027388109, "grad_norm": 5.882470821439722, "learning_rate": 5.994630389303205e-07, "logits/chosen": 0.0162811242043972, "logits/rejected": -0.004544490482658148, "logps/chosen": -0.2731076776981354, "logps/rejected": -0.2809983193874359, "loss": 1.2631, "rewards/accuracies": 0.5, "rewards/chosen": -0.2731076776981354, "rewards/margins": 0.007890653796494007, "rewards/rejected": -0.2809983193874359, "step": 55 }, { "epoch": 0.1282565130260521, "grad_norm": 6.092605492824151, "learning_rate": 5.985827812395378e-07, "logits/chosen": -0.03923701494932175, "logits/rejected": -0.07081723213195801, "logps/chosen": -0.2858438491821289, "logps/rejected": -0.31485337018966675, "loss": 1.2628, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.2858438491821289, "rewards/margins": 0.029009530320763588, "rewards/rejected": -0.31485337018966675, "step": 60 }, { "epoch": 0.13894455577822312, "grad_norm": 5.246427419034069, "learning_rate": 5.972849285303804e-07, "logits/chosen": -0.024546677246689796, "logits/rejected": 0.03360120207071304, "logps/chosen": -0.29182225465774536, "logps/rejected": -0.31506821513175964, "loss": 1.2705, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.29182225465774536, "rewards/margins": 0.023245956748723984, "rewards/rejected": -0.31506821513175964, "step": 65 }, { "epoch": 0.14963259853039412, "grad_norm": 5.0996411858772115, "learning_rate": 5.955712959672177e-07, "logits/chosen": -0.016444489359855652, "logits/rejected": -0.020679041743278503, "logps/chosen": -0.29391151666641235, "logps/rejected": -0.3471246361732483, "loss": 1.2543, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.29391151666641235, "rewards/margins": 0.05321308970451355, "rewards/rejected": -0.3471246361732483, "step": 70 }, { "epoch": 0.16032064128256512, "grad_norm": 4.491905521876928, "learning_rate": 5.934442802201417e-07, "logits/chosen": 0.06254759430885315, "logits/rejected": 0.10311929881572723, "logps/chosen": -0.3031434714794159, "logps/rejected": -0.3355598449707031, "loss": 1.2679, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.3031434714794159, "rewards/margins": 0.03241636976599693, "rewards/rejected": -0.3355598449707031, "step": 75 }, { "epoch": 0.17100868403473615, "grad_norm": 6.478010906716982, "learning_rate": 5.909068561130061e-07, "logits/chosen": -0.01297207735478878, "logits/rejected": -0.004632393829524517, "logps/chosen": -0.29228898882865906, "logps/rejected": -0.322248637676239, "loss": 1.2618, "rewards/accuracies": 0.53125, "rewards/chosen": -0.29228898882865906, "rewards/margins": 0.02995964325964451, "rewards/rejected": -0.322248637676239, "step": 80 }, { "epoch": 0.18169672678690715, "grad_norm": 4.650638031490373, "learning_rate": 5.879625724628667e-07, "logits/chosen": 0.005947749130427837, "logits/rejected": 0.021510040387511253, "logps/chosen": -0.2952747642993927, "logps/rejected": -0.3337419927120209, "loss": 1.257, "rewards/accuracies": 0.5, "rewards/chosen": -0.2952747642993927, "rewards/margins": 0.03846726939082146, "rewards/rejected": -0.3337419927120209, "step": 85 }, { "epoch": 0.19238476953907815, "grad_norm": 7.530524241094077, "learning_rate": 5.846155471166399e-07, "logits/chosen": 0.015343578532338142, "logits/rejected": 0.03540420904755592, "logps/chosen": -0.3116888105869293, "logps/rejected": -0.3682340085506439, "loss": 1.253, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.3116888105869293, "rewards/margins": 0.05654525011777878, "rewards/rejected": -0.3682340085506439, "step": 90 }, { "epoch": 0.20307281229124916, "grad_norm": 6.23873980938112, "learning_rate": 5.808704611919212e-07, "logits/chosen": 0.0079043535515666, "logits/rejected": -0.009995353408157825, "logps/chosen": -0.30725741386413574, "logps/rejected": -0.3175857663154602, "loss": 1.2597, "rewards/accuracies": 0.48124998807907104, "rewards/chosen": -0.30725741386413574, "rewards/margins": 0.010328322649002075, "rewards/rejected": -0.3175857663154602, "step": 95 }, { "epoch": 0.21376085504342018, "grad_norm": 5.495931507709334, "learning_rate": 5.767325525300187e-07, "logits/chosen": 0.012924237176775932, "logits/rejected": 0.015158179216086864, "logps/chosen": -0.30597418546676636, "logps/rejected": -0.3576403558254242, "loss": 1.2572, "rewards/accuracies": 0.4937500059604645, "rewards/chosen": -0.30597418546676636, "rewards/margins": 0.051666177809238434, "rewards/rejected": -0.3576403558254242, "step": 100 }, { "epoch": 0.22444889779559118, "grad_norm": 8.704066182889123, "learning_rate": 5.722076083703594e-07, "logits/chosen": -0.011864040978252888, "logits/rejected": -0.015826348215341568, "logps/chosen": -0.2861265540122986, "logps/rejected": -0.3439098000526428, "loss": 1.2455, "rewards/accuracies": 0.5062500238418579, "rewards/chosen": -0.2861265540122986, "rewards/margins": 0.057783275842666626, "rewards/rejected": -0.3439098000526428, "step": 105 }, { "epoch": 0.23513694054776219, "grad_norm": 6.081543266921472, "learning_rate": 5.673019572565103e-07, "logits/chosen": -0.024934740737080574, "logits/rejected": -0.036910589784383774, "logps/chosen": -0.29488444328308105, "logps/rejected": -0.3499029576778412, "loss": 1.2384, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.29488444328308105, "rewards/margins": 0.05501857399940491, "rewards/rejected": -0.3499029576778412, "step": 110 }, { "epoch": 0.2458249832999332, "grad_norm": 5.083503334201838, "learning_rate": 5.620224601851389e-07, "logits/chosen": 0.0035224161110818386, "logits/rejected": 0.001966515090316534, "logps/chosen": -0.30457058548927307, "logps/rejected": -0.35604608058929443, "loss": 1.2561, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.30457058548927307, "rewards/margins": 0.051475513726472855, "rewards/rejected": -0.35604608058929443, "step": 115 }, { "epoch": 0.2565130260521042, "grad_norm": 7.099362190379442, "learning_rate": 5.563765010102885e-07, "logits/chosen": -0.06543167680501938, "logits/rejected": -0.0410967655479908, "logps/chosen": -0.3293083906173706, "logps/rejected": -0.3675723075866699, "loss": 1.2552, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.3293083906173706, "rewards/margins": 0.03826391324400902, "rewards/rejected": -0.3675723075866699, "step": 120 }, { "epoch": 0.26720106880427524, "grad_norm": 5.147990697882454, "learning_rate": 5.503719761163907e-07, "logits/chosen": -0.10343233495950699, "logits/rejected": -0.08113230764865875, "logps/chosen": -0.2962001860141754, "logps/rejected": -0.35733163356781006, "loss": 1.234, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.2962001860141754, "rewards/margins": 0.06113145500421524, "rewards/rejected": -0.35733163356781006, "step": 125 }, { "epoch": 0.27788911155644624, "grad_norm": 5.070905426510523, "learning_rate": 5.440172833744582e-07, "logits/chosen": -0.059284817427396774, "logits/rejected": -0.020249750465154648, "logps/chosen": -0.3295074701309204, "logps/rejected": -0.37299367785453796, "loss": 1.2644, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.3295074701309204, "rewards/margins": 0.04348624125123024, "rewards/rejected": -0.37299367785453796, "step": 130 }, { "epoch": 0.28857715430861725, "grad_norm": 8.434154523088012, "learning_rate": 5.373213103969024e-07, "logits/chosen": -0.09271787106990814, "logits/rejected": -0.0978003442287445, "logps/chosen": -0.3175578713417053, "logps/rejected": -0.3870469629764557, "loss": 1.2466, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.3175578713417053, "rewards/margins": 0.06948906183242798, "rewards/rejected": -0.3870469629764557, "step": 135 }, { "epoch": 0.29926519706078825, "grad_norm": 8.050573635697841, "learning_rate": 5.302934221074033e-07, "logits/chosen": -0.18472157418727875, "logits/rejected": -0.18296249210834503, "logps/chosen": -0.35015708208084106, "logps/rejected": -0.418283075094223, "loss": 1.2553, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.35015708208084106, "rewards/margins": 0.06812603026628494, "rewards/rejected": -0.418283075094223, "step": 140 }, { "epoch": 0.30995323981295925, "grad_norm": 5.537790942631876, "learning_rate": 5.229434476432182e-07, "logits/chosen": -0.04427188262343407, "logits/rejected": -0.07002754509449005, "logps/chosen": -0.3104066252708435, "logps/rejected": -0.36531931161880493, "loss": 1.2393, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.3104066252708435, "rewards/margins": 0.05491270869970322, "rewards/rejected": -0.36531931161880493, "step": 145 }, { "epoch": 0.32064128256513025, "grad_norm": 13.46796536991619, "learning_rate": 5.152816666082435e-07, "logits/chosen": -0.09154470264911652, "logits/rejected": -0.10489149391651154, "logps/chosen": -0.3233293890953064, "logps/rejected": -0.42510905861854553, "loss": 1.2451, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.3233293890953064, "rewards/margins": 0.10177962481975555, "rewards/rejected": -0.42510905861854553, "step": 150 }, { "epoch": 0.33132932531730125, "grad_norm": 7.7500923534994985, "learning_rate": 5.073187946960594e-07, "logits/chosen": -0.08783230930566788, "logits/rejected": -0.08713527768850327, "logps/chosen": -0.3179479241371155, "logps/rejected": -0.3679467737674713, "loss": 1.2488, "rewards/accuracies": 0.4625000059604645, "rewards/chosen": -0.3179479241371155, "rewards/margins": 0.04999883845448494, "rewards/rejected": -0.3679467737674713, "step": 155 }, { "epoch": 0.3420173680694723, "grad_norm": 7.025909512719848, "learning_rate": 4.990659687030634e-07, "logits/chosen": -0.1076837033033371, "logits/rejected": -0.08046683669090271, "logps/chosen": -0.31321102380752563, "logps/rejected": -0.3762710690498352, "loss": 1.2529, "rewards/accuracies": 0.5625, "rewards/chosen": -0.31321102380752563, "rewards/margins": 0.06306007504463196, "rewards/rejected": -0.3762710690498352, "step": 160 }, { "epoch": 0.3527054108216433, "grad_norm": 8.01239629928041, "learning_rate": 4.905347309526536e-07, "logits/chosen": -0.06136934086680412, "logits/rejected": -0.07382142543792725, "logps/chosen": -0.29660579562187195, "logps/rejected": -0.37172654271125793, "loss": 1.2427, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.29660579562187195, "rewards/margins": 0.07512073218822479, "rewards/rejected": -0.37172654271125793, "step": 165 }, { "epoch": 0.3633934535738143, "grad_norm": 5.642901073511358, "learning_rate": 4.817370131522459e-07, "logits/chosen": -0.0563310906291008, "logits/rejected": -0.030183713883161545, "logps/chosen": -0.31852108240127563, "logps/rejected": -0.39116546511650085, "loss": 1.2442, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.31852108240127563, "rewards/margins": 0.07264441251754761, "rewards/rejected": -0.39116546511650085, "step": 170 }, { "epoch": 0.3740814963259853, "grad_norm": 5.485147120245967, "learning_rate": 4.7268511970570207e-07, "logits/chosen": -0.08929944038391113, "logits/rejected": -0.08389794826507568, "logps/chosen": -0.30848273634910583, "logps/rejected": -0.3659656345844269, "loss": 1.2505, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.30848273634910583, "rewards/margins": 0.05748288705945015, "rewards/rejected": -0.3659656345844269, "step": 175 }, { "epoch": 0.3847695390781563, "grad_norm": 7.991274503438784, "learning_rate": 4.6339171050450815e-07, "logits/chosen": -0.10145304352045059, "logits/rejected": -0.09222683310508728, "logps/chosen": -0.31844446063041687, "logps/rejected": -0.3609256148338318, "loss": 1.2546, "rewards/accuracies": 0.48124998807907104, "rewards/chosen": -0.31844446063041687, "rewards/margins": 0.042481135576963425, "rewards/rejected": -0.3609256148338318, "step": 180 }, { "epoch": 0.3954575818303273, "grad_norm": 5.832967942513168, "learning_rate": 4.5386978322177184e-07, "logits/chosen": -0.051486529409885406, "logits/rejected": -0.07657450437545776, "logps/chosen": -0.3131783604621887, "logps/rejected": -0.35496917366981506, "loss": 1.2627, "rewards/accuracies": 0.543749988079071, "rewards/chosen": -0.3131783604621887, "rewards/margins": 0.04179079458117485, "rewards/rejected": -0.35496917366981506, "step": 185 }, { "epoch": 0.4061456245824983, "grad_norm": 8.218974322062975, "learning_rate": 4.4413265513380134e-07, "logits/chosen": -0.08528328686952591, "logits/rejected": -0.0627092644572258, "logps/chosen": -0.3054826855659485, "logps/rejected": -0.37131738662719727, "loss": 1.2456, "rewards/accuracies": 0.512499988079071, "rewards/chosen": -0.3054826855659485, "rewards/margins": 0.06583467870950699, "rewards/rejected": -0.37131738662719727, "step": 190 }, { "epoch": 0.4168336673346693, "grad_norm": 8.281751397477004, "learning_rate": 4.3419394449468975e-07, "logits/chosen": -0.0632157102227211, "logits/rejected": -0.039062272757291794, "logps/chosen": -0.3401602506637573, "logps/rejected": -0.42965516448020935, "loss": 1.2393, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.3401602506637573, "rewards/margins": 0.08949492126703262, "rewards/rejected": -0.42965516448020935, "step": 195 }, { "epoch": 0.42752171008684037, "grad_norm": 5.682547699952222, "learning_rate": 4.2406755148995617e-07, "logits/chosen": -0.036120522767305374, "logits/rejected": -0.00437445193529129, "logps/chosen": -0.30982089042663574, "logps/rejected": -0.3825121223926544, "loss": 1.2418, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.30982089042663574, "rewards/margins": 0.07269121706485748, "rewards/rejected": -0.3825121223926544, "step": 200 }, { "epoch": 0.43820975283901137, "grad_norm": 5.711897059557612, "learning_rate": 4.1376763879587855e-07, "logits/chosen": -0.08326585590839386, "logits/rejected": -0.12235681712627411, "logps/chosen": -0.34261685609817505, "logps/rejected": -0.4019942283630371, "loss": 1.2571, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.34261685609817505, "rewards/margins": 0.05937739089131355, "rewards/rejected": -0.4019942283630371, "step": 205 }, { "epoch": 0.44889779559118237, "grad_norm": 6.6702507208013895, "learning_rate": 4.0330861177171046e-07, "logits/chosen": -0.09191317856311798, "logits/rejected": -0.07536768168210983, "logps/chosen": -0.3210485577583313, "logps/rejected": -0.39176544547080994, "loss": 1.247, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.3210485577583313, "rewards/margins": 0.07071693241596222, "rewards/rejected": -0.39176544547080994, "step": 210 }, { "epoch": 0.45958583834335337, "grad_norm": 6.330049251313848, "learning_rate": 3.927050983124842e-07, "logits/chosen": -0.025531485676765442, "logits/rejected": -0.07240410149097443, "logps/chosen": -0.29885441064834595, "logps/rejected": -0.391807496547699, "loss": 1.238, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.29885441064834595, "rewards/margins": 0.09295307099819183, "rewards/rejected": -0.391807496547699, "step": 215 }, { "epoch": 0.47027388109552437, "grad_norm": 6.737595050647263, "learning_rate": 3.8197192839057603e-07, "logits/chosen": -0.1071164608001709, "logits/rejected": -0.12290854752063751, "logps/chosen": -0.30930382013320923, "logps/rejected": -0.4362809658050537, "loss": 1.2382, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.30930382013320923, "rewards/margins": 0.12697716057300568, "rewards/rejected": -0.4362809658050537, "step": 220 }, { "epoch": 0.48096192384769537, "grad_norm": 8.559735052947849, "learning_rate": 3.7112411331464923e-07, "logits/chosen": -0.02524995245039463, "logits/rejected": -0.030149292200803757, "logps/chosen": -0.3149697184562683, "logps/rejected": -0.3956434428691864, "loss": 1.2272, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.3149697184562683, "rewards/margins": 0.0806737095117569, "rewards/rejected": -0.3956434428691864, "step": 225 }, { "epoch": 0.4916499665998664, "grad_norm": 6.480287157306168, "learning_rate": 3.601768247349818e-07, "logits/chosen": -0.03261668235063553, "logits/rejected": -0.08516497910022736, "logps/chosen": -0.3169209659099579, "logps/rejected": -0.3873901069164276, "loss": 1.2404, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.3169209659099579, "rewards/margins": 0.07046912610530853, "rewards/rejected": -0.3873901069164276, "step": 230 }, { "epoch": 0.5023380093520374, "grad_norm": 8.225368883810985, "learning_rate": 3.491453734245413e-07, "logits/chosen": -0.06573788821697235, "logits/rejected": -0.0159236378967762, "logps/chosen": -0.3394278287887573, "logps/rejected": -0.4536859393119812, "loss": 1.2409, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.3394278287887573, "rewards/margins": 0.11425812542438507, "rewards/rejected": -0.4536859393119812, "step": 235 }, { "epoch": 0.5130260521042084, "grad_norm": 6.0501816929552135, "learning_rate": 3.3804518786548455e-07, "logits/chosen": -0.09407626837491989, "logits/rejected": -0.07616542279720306, "logps/chosen": -0.3101692199707031, "logps/rejected": -0.428670734167099, "loss": 1.2453, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.3101692199707031, "rewards/margins": 0.11850155889987946, "rewards/rejected": -0.428670734167099, "step": 240 }, { "epoch": 0.5237140948563794, "grad_norm": 5.475578816065332, "learning_rate": 3.2689179267103006e-07, "logits/chosen": -0.1301025003194809, "logits/rejected": -0.12063749134540558, "logps/chosen": -0.3209839463233948, "logps/rejected": -0.3626781404018402, "loss": 1.2299, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.3209839463233948, "rewards/margins": 0.04169422388076782, "rewards/rejected": -0.3626781404018402, "step": 245 }, { "epoch": 0.5344021376085505, "grad_norm": 7.848337245875008, "learning_rate": 3.1570078687288317e-07, "logits/chosen": -0.07203061133623123, "logits/rejected": -0.07748202979564667, "logps/chosen": -0.34228605031967163, "logps/rejected": -0.46370163559913635, "loss": 1.2274, "rewards/accuracies": 0.53125, "rewards/chosen": -0.34228605031967163, "rewards/margins": 0.12141555547714233, "rewards/rejected": -0.46370163559913635, "step": 250 }, { "epoch": 0.5450901803607214, "grad_norm": 8.463852280152306, "learning_rate": 3.0448782210457906e-07, "logits/chosen": -0.07813692837953568, "logits/rejected": -0.07056453824043274, "logps/chosen": -0.3696078360080719, "logps/rejected": -0.4517177939414978, "loss": 1.2467, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.3696078360080719, "rewards/margins": 0.08210990577936172, "rewards/rejected": -0.4517177939414978, "step": 255 }, { "epoch": 0.5557782231128925, "grad_norm": 8.673679370712454, "learning_rate": 2.932685807112585e-07, "logits/chosen": -0.13425521552562714, "logits/rejected": -0.13180285692214966, "logps/chosen": -0.3235534727573395, "logps/rejected": -0.4138403534889221, "loss": 1.2392, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.3235534727573395, "rewards/margins": 0.09028687328100204, "rewards/rejected": -0.4138403534889221, "step": 260 }, { "epoch": 0.5664662658650634, "grad_norm": 14.773817206579576, "learning_rate": 2.8205875381648974e-07, "logits/chosen": -0.10961911827325821, "logits/rejected": -0.10981354862451553, "logps/chosen": -0.31433889269828796, "logps/rejected": -0.40348243713378906, "loss": 1.2448, "rewards/accuracies": 0.543749988079071, "rewards/chosen": -0.31433889269828796, "rewards/margins": 0.08914351463317871, "rewards/rejected": -0.40348243713378906, "step": 265 }, { "epoch": 0.5771543086172345, "grad_norm": 10.511806133556902, "learning_rate": 2.708740193768135e-07, "logits/chosen": -0.08152172714471817, "logits/rejected": -0.07667910307645798, "logps/chosen": -0.33281245827674866, "logps/rejected": -0.4943714141845703, "loss": 1.2277, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.33281245827674866, "rewards/margins": 0.16155894100666046, "rewards/rejected": -0.4943714141845703, "step": 270 }, { "epoch": 0.5878423513694054, "grad_norm": 5.764021925950384, "learning_rate": 2.597300202547034e-07, "logits/chosen": -0.0671951025724411, "logits/rejected": -0.09161119163036346, "logps/chosen": -0.3234054148197174, "logps/rejected": -0.36444562673568726, "loss": 1.2404, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.3234054148197174, "rewards/margins": 0.04104021191596985, "rewards/rejected": -0.36444562673568726, "step": 275 }, { "epoch": 0.5985303941215765, "grad_norm": 6.254810248978168, "learning_rate": 2.4864234234060747e-07, "logits/chosen": -0.1233711987733841, "logits/rejected": -0.10507211834192276, "logps/chosen": -0.326472669839859, "logps/rejected": -0.4079364836215973, "loss": 1.2346, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": -0.326472669839859, "rewards/margins": 0.08146381378173828, "rewards/rejected": -0.4079364836215973, "step": 280 }, { "epoch": 0.6092184368737475, "grad_norm": 8.145566662679744, "learning_rate": 2.3762649275467223e-07, "logits/chosen": -0.11552796512842178, "logits/rejected": -0.12833945453166962, "logps/chosen": -0.3270297944545746, "logps/rejected": -0.40440672636032104, "loss": 1.2498, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.3270297944545746, "rewards/margins": 0.07737687975168228, "rewards/rejected": -0.40440672636032104, "step": 285 }, { "epoch": 0.6199064796259185, "grad_norm": 9.531079036404222, "learning_rate": 2.2669787815863174e-07, "logits/chosen": -0.03034001588821411, "logits/rejected": -0.0528348907828331, "logps/chosen": -0.3205064833164215, "logps/rejected": -0.4226464629173279, "loss": 1.2523, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.3205064833164215, "rewards/margins": 0.10214000940322876, "rewards/rejected": -0.4226464629173279, "step": 290 }, { "epoch": 0.6305945223780896, "grad_norm": 5.413246554100629, "learning_rate": 2.1587178320819919e-07, "logits/chosen": -0.060756783932447433, "logits/rejected": -0.0011257051955908537, "logps/chosen": -0.27187207341194153, "logps/rejected": -0.3691639006137848, "loss": 1.2341, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.27187207341194153, "rewards/margins": 0.09729186445474625, "rewards/rejected": -0.3691639006137848, "step": 295 }, { "epoch": 0.6412825651302605, "grad_norm": 12.664849670753055, "learning_rate": 2.0516334917609277e-07, "logits/chosen": -0.10225675255060196, "logits/rejected": -0.04843712970614433, "logps/chosen": -0.3377472758293152, "logps/rejected": -0.5128234028816223, "loss": 1.2347, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.3377472758293152, "rewards/margins": 0.17507611215114594, "rewards/rejected": -0.5128234028816223, "step": 300 }, { "epoch": 0.6519706078824316, "grad_norm": 7.8017671377878015, "learning_rate": 1.9458755277559716e-07, "logits/chosen": -0.11494015157222748, "logits/rejected": -0.10972355306148529, "logps/chosen": -0.3163761496543884, "logps/rejected": -0.4071407914161682, "loss": 1.2405, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.3163761496543884, "rewards/margins": 0.09076462686061859, "rewards/rejected": -0.4071407914161682, "step": 305 }, { "epoch": 0.6626586506346025, "grad_norm": 6.034077165873482, "learning_rate": 1.8415918521427613e-07, "logits/chosen": -0.1821509748697281, "logits/rejected": -0.19082587957382202, "logps/chosen": -0.31090688705444336, "logps/rejected": -0.3758618235588074, "loss": 1.2455, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.31090688705444336, "rewards/margins": 0.06495492160320282, "rewards/rejected": -0.3758618235588074, "step": 310 }, { "epoch": 0.6733466933867736, "grad_norm": 10.235181182403219, "learning_rate": 1.7389283150713038e-07, "logits/chosen": -0.1251331865787506, "logits/rejected": -0.11890840530395508, "logps/chosen": -0.3588525056838989, "logps/rejected": -0.425645649433136, "loss": 1.2543, "rewards/accuracies": 0.48124998807907104, "rewards/chosen": -0.3588525056838989, "rewards/margins": 0.06679315119981766, "rewards/rejected": -0.425645649433136, "step": 315 }, { "epoch": 0.6840347361389446, "grad_norm": 9.723759702295695, "learning_rate": 1.6380285007813597e-07, "logits/chosen": -0.1123957484960556, "logits/rejected": -0.1257510930299759, "logps/chosen": -0.3145357668399811, "logps/rejected": -0.3408251404762268, "loss": 1.2609, "rewards/accuracies": 0.512499988079071, "rewards/chosen": -0.3145357668399811, "rewards/margins": 0.026289362460374832, "rewards/rejected": -0.3408251404762268, "step": 320 }, { "epoch": 0.6947227788911156, "grad_norm": 7.286383815847668, "learning_rate": 1.539033526786898e-07, "logits/chosen": -0.1374741941690445, "logits/rejected": -0.11429701000452042, "logps/chosen": -0.32331573963165283, "logps/rejected": -0.4659709930419922, "loss": 1.2423, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -0.32331573963165283, "rewards/margins": 0.14265525341033936, "rewards/rejected": -0.4659709930419922, "step": 325 }, { "epoch": 0.7054108216432866, "grad_norm": 5.495961241803029, "learning_rate": 1.4420818465104924e-07, "logits/chosen": -0.1799645572900772, "logits/rejected": -0.17759008705615997, "logps/chosen": -0.30779215693473816, "logps/rejected": -0.3636534810066223, "loss": 1.2328, "rewards/accuracies": 0.543749988079071, "rewards/chosen": -0.30779215693473816, "rewards/margins": 0.05586131289601326, "rewards/rejected": -0.3636534810066223, "step": 330 }, { "epoch": 0.7160988643954576, "grad_norm": 6.732402042819091, "learning_rate": 1.3473090556436928e-07, "logits/chosen": -0.09900529682636261, "logits/rejected": -0.11673985421657562, "logps/chosen": -0.32374444603919983, "logps/rejected": -0.42279139161109924, "loss": 1.2482, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.32374444603919983, "rewards/margins": 0.0990469679236412, "rewards/rejected": -0.42279139161109924, "step": 335 }, { "epoch": 0.7267869071476286, "grad_norm": 6.164055292584424, "learning_rate": 1.2548477025041833e-07, "logits/chosen": -0.17352089285850525, "logits/rejected": -0.15723419189453125, "logps/chosen": -0.31769293546676636, "logps/rejected": -0.4277707040309906, "loss": 1.2469, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.31769293546676636, "rewards/margins": 0.11007778346538544, "rewards/rejected": -0.4277707040309906, "step": 340 }, { "epoch": 0.7374749498997996, "grad_norm": 7.386683624949419, "learning_rate": 1.1648271026549805e-07, "logits/chosen": -0.16501447558403015, "logits/rejected": -0.17034907639026642, "logps/chosen": -0.3037567734718323, "logps/rejected": -0.4147283136844635, "loss": 1.235, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.3037567734718323, "rewards/margins": 0.11097153276205063, "rewards/rejected": -0.4147283136844635, "step": 345 }, { "epoch": 0.7481629926519706, "grad_norm": 8.63440141406496, "learning_rate": 1.0773731580449275e-07, "logits/chosen": -0.0861009806394577, "logits/rejected": -0.10058856010437012, "logps/chosen": -0.3289971947669983, "logps/rejected": -0.43288707733154297, "loss": 1.2271, "rewards/accuracies": 0.59375, "rewards/chosen": -0.3289971947669983, "rewards/margins": 0.10388988256454468, "rewards/rejected": -0.43288707733154297, "step": 350 }, { "epoch": 0.7588510354041417, "grad_norm": 5.491722459194082, "learning_rate": 9.926081809234262e-08, "logits/chosen": -0.1492873877286911, "logits/rejected": -0.14633427560329437, "logps/chosen": -0.3535214960575104, "logps/rejected": -0.5062969923019409, "loss": 1.2331, "rewards/accuracies": 0.5625, "rewards/chosen": -0.3535214960575104, "rewards/margins": 0.15277548134326935, "rewards/rejected": -0.5062969923019409, "step": 355 }, { "epoch": 0.7695390781563126, "grad_norm": 16.598441497777067, "learning_rate": 9.106507227756998e-08, "logits/chosen": -0.10592007637023926, "logits/rejected": -0.1149587631225586, "logps/chosen": -0.355294406414032, "logps/rejected": -0.41237178444862366, "loss": 1.2541, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.355294406414032, "rewards/margins": 0.057077307254076004, "rewards/rejected": -0.41237178444862366, "step": 360 }, { "epoch": 0.7802271209084837, "grad_norm": 9.266627411766367, "learning_rate": 8.316154085178256e-08, "logits/chosen": -0.1599133014678955, "logits/rejected": -0.16612327098846436, "logps/chosen": -0.3537140488624573, "logps/rejected": -0.45664018392562866, "loss": 1.2415, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.3537140488624573, "rewards/margins": 0.10292615741491318, "rewards/rejected": -0.45664018392562866, "step": 365 }, { "epoch": 0.7909151636606546, "grad_norm": 7.428406479509353, "learning_rate": 7.55612776183419e-08, "logits/chosen": -0.10595826804637909, "logits/rejected": -0.09110520780086517, "logps/chosen": -0.35074084997177124, "logps/rejected": -0.40927591919898987, "loss": 1.2357, "rewards/accuracies": 0.59375, "rewards/chosen": -0.35074084997177124, "rewards/margins": 0.05853506922721863, "rewards/rejected": -0.40927591919898987, "step": 370 }, { "epoch": 0.8016032064128257, "grad_norm": 8.282266775931964, "learning_rate": 6.827491223262017e-08, "logits/chosen": -0.14613883197307587, "logits/rejected": -0.1305559277534485, "logps/chosen": -0.33350640535354614, "logps/rejected": -0.3941604495048523, "loss": 1.2317, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.33350640535354614, "rewards/margins": 0.06065405532717705, "rewards/rejected": -0.3941604495048523, "step": 375 }, { "epoch": 0.8122912491649966, "grad_norm": 7.257639288893235, "learning_rate": 6.131263533546572e-08, "logits/chosen": -0.13168776035308838, "logits/rejected": -0.13572274148464203, "logps/chosen": -0.32469362020492554, "logps/rejected": -0.4485169053077698, "loss": 1.2319, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.32469362020492554, "rewards/margins": 0.12382327020168304, "rewards/rejected": -0.4485169053077698, "step": 380 }, { "epoch": 0.8229792919171677, "grad_norm": 8.794582511790273, "learning_rate": 5.468418430067059e-08, "logits/chosen": -0.13690440356731415, "logits/rejected": -0.11999843269586563, "logps/chosen": -0.3403404653072357, "logps/rejected": -0.41022801399230957, "loss": 1.2458, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.3403404653072357, "rewards/margins": 0.06988750398159027, "rewards/rejected": -0.41022801399230957, "step": 385 }, { "epoch": 0.8336673346693386, "grad_norm": 6.914453118608116, "learning_rate": 4.839882961637282e-08, "logits/chosen": -0.14087721705436707, "logits/rejected": -0.11817269027233124, "logps/chosen": -0.32691115140914917, "logps/rejected": -0.39570215344429016, "loss": 1.2453, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.32691115140914917, "rewards/margins": 0.0687909945845604, "rewards/rejected": -0.39570215344429016, "step": 390 }, { "epoch": 0.8443553774215097, "grad_norm": 7.401358248565427, "learning_rate": 4.2465361919440165e-08, "logits/chosen": -0.17381078004837036, "logits/rejected": -0.1655048429965973, "logps/chosen": -0.32772788405418396, "logps/rejected": -0.36842280626296997, "loss": 1.2419, "rewards/accuracies": 0.518750011920929, "rewards/chosen": -0.32772788405418396, "rewards/margins": 0.040694937109947205, "rewards/rejected": -0.36842280626296997, "step": 395 }, { "epoch": 0.8550434201736807, "grad_norm": 6.499476994247458, "learning_rate": 3.6892079700970036e-08, "logits/chosen": -0.19311991333961487, "logits/rejected": -0.1830570548772812, "logps/chosen": -0.34295058250427246, "logps/rejected": -0.37818074226379395, "loss": 1.2439, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.34295058250427246, "rewards/margins": 0.035230137407779694, "rewards/rejected": -0.37818074226379395, "step": 400 }, { "epoch": 0.8550434201736807, "eval_logits/chosen": 0.01532436441630125, "eval_logits/rejected": 0.005963262636214495, "eval_logps/chosen": -0.3365793526172638, "eval_logps/rejected": -0.40151944756507874, "eval_loss": 1.2415482997894287, "eval_rewards/accuracies": 0.5873983502388, "eval_rewards/chosen": -0.3365793526172638, "eval_rewards/margins": 0.06494008004665375, "eval_rewards/rejected": -0.40151944756507874, "eval_runtime": 427.7978, "eval_samples_per_second": 4.584, "eval_steps_per_second": 0.288, "step": 400 }, { "epoch": 0.8657314629258517, "grad_norm": 8.655152423101507, "learning_rate": 3.1686777700099e-08, "logits/chosen": -0.11514046043157578, "logits/rejected": -0.16686634719371796, "logps/chosen": -0.3493112325668335, "logps/rejected": -0.37683025002479553, "loss": 1.245, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.3493112325668335, "rewards/margins": 0.027519047260284424, "rewards/rejected": -0.37683025002479553, "step": 405 }, { "epoch": 0.8764195056780227, "grad_norm": 8.013257979238155, "learning_rate": 2.685673600235524e-08, "logits/chosen": -0.10536377131938934, "logits/rejected": -0.1320020854473114, "logps/chosen": -0.3498873710632324, "logps/rejected": -0.39043301343917847, "loss": 1.2607, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -0.3498873710632324, "rewards/margins": 0.04054565355181694, "rewards/rejected": -0.39043301343917847, "step": 410 }, { "epoch": 0.8871075484301937, "grad_norm": 12.61073944196688, "learning_rate": 2.2408709857800988e-08, "logits/chosen": -0.12514375150203705, "logits/rejected": -0.10413704812526703, "logps/chosen": -0.2854083478450775, "logps/rejected": -0.3893025517463684, "loss": 1.233, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": -0.2854083478450775, "rewards/margins": 0.1038941740989685, "rewards/rejected": -0.3893025517463684, "step": 415 }, { "epoch": 0.8977955911823647, "grad_norm": 5.802098857336539, "learning_rate": 1.8348920233204167e-08, "logits/chosen": -0.08317883312702179, "logits/rejected": -0.06567595899105072, "logps/chosen": -0.3321346044540405, "logps/rejected": -0.4815450608730316, "loss": 1.2393, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.3321346044540405, "rewards/margins": 0.14941047132015228, "rewards/rejected": -0.4815450608730316, "step": 420 }, { "epoch": 0.9084836339345357, "grad_norm": 5.211099670695838, "learning_rate": 1.468304511145394e-08, "logits/chosen": -0.02274451218545437, "logits/rejected": -0.0685218870639801, "logps/chosen": -0.3114772439002991, "logps/rejected": -0.4261551797389984, "loss": 1.2356, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.3114772439002991, "rewards/margins": 0.11467792093753815, "rewards/rejected": -0.4261551797389984, "step": 425 }, { "epoch": 0.9191716766867067, "grad_norm": 5.865346057497481, "learning_rate": 1.1416211550388222e-08, "logits/chosen": -0.10939434915781021, "logits/rejected": -0.09104075282812119, "logps/chosen": -0.29765018820762634, "logps/rejected": -0.3629956841468811, "loss": 1.2343, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -0.29765018820762634, "rewards/margins": 0.06534545123577118, "rewards/rejected": -0.3629956841468811, "step": 430 }, { "epoch": 0.9298597194388778, "grad_norm": 7.151775664454521, "learning_rate": 8.552988512139748e-09, "logits/chosen": -0.09702922403812408, "logits/rejected": -0.11269289255142212, "logps/chosen": -0.3232804238796234, "logps/rejected": -0.4417162835597992, "loss": 1.2366, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.3232804238796234, "rewards/margins": 0.11843589693307877, "rewards/rejected": -0.4417162835597992, "step": 435 }, { "epoch": 0.9405477621910487, "grad_norm": 19.71341285143898, "learning_rate": 6.097380473029356e-09, "logits/chosen": -0.13407650589942932, "logits/rejected": -0.1464676707983017, "logps/chosen": -0.33517464995384216, "logps/rejected": -0.3925517201423645, "loss": 1.2475, "rewards/accuracies": 0.53125, "rewards/chosen": -0.33517464995384216, "rewards/margins": 0.05737708881497383, "rewards/rejected": -0.3925517201423645, "step": 440 }, { "epoch": 0.9512358049432198, "grad_norm": 7.374118674662329, "learning_rate": 4.052821822943597e-09, "logits/chosen": -0.04786144942045212, "logits/rejected": -0.04718126356601715, "logps/chosen": -0.3143005967140198, "logps/rejected": -0.38420677185058594, "loss": 1.258, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.3143005967140198, "rewards/margins": 0.06990616768598557, "rewards/rejected": -0.38420677185058594, "step": 445 }, { "epoch": 0.9619238476953907, "grad_norm": 7.332271422792315, "learning_rate": 2.4221720620301368e-09, "logits/chosen": -0.10571523010730743, "logits/rejected": -0.0989978164434433, "logps/chosen": -0.321834921836853, "logps/rejected": -0.40633755922317505, "loss": 1.2275, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.321834921836853, "rewards/margins": 0.08450265228748322, "rewards/rejected": -0.40633755922317505, "step": 450 }, { "epoch": 0.9726118904475618, "grad_norm": 7.032752985388992, "learning_rate": 1.2077118014282794e-09, "logits/chosen": -0.06323617696762085, "logits/rejected": -0.029714446514844894, "logps/chosen": -0.3421580493450165, "logps/rejected": -0.4160069525241852, "loss": 1.2498, "rewards/accuracies": 0.59375, "rewards/chosen": -0.3421580493450165, "rewards/margins": 0.0738489031791687, "rewards/rejected": -0.4160069525241852, "step": 455 }, { "epoch": 0.9832999331997327, "grad_norm": 6.513587335089515, "learning_rate": 4.1113957362785e-10, "logits/chosen": -0.05799049139022827, "logits/rejected": -0.08265287429094315, "logps/chosen": -0.3295963406562805, "logps/rejected": -0.37454092502593994, "loss": 1.2494, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.3295963406562805, "rewards/margins": 0.04494457319378853, "rewards/rejected": -0.37454092502593994, "step": 460 }, { "epoch": 0.9939879759519038, "grad_norm": 9.565280879421035, "learning_rate": 3.3569456917970085e-11, "logits/chosen": -0.030880967155098915, "logits/rejected": -0.05265098810195923, "logps/chosen": -0.31436887383461, "logps/rejected": -0.407276451587677, "loss": 1.2427, "rewards/accuracies": 0.5625, "rewards/chosen": -0.31436887383461, "rewards/margins": 0.0929076224565506, "rewards/rejected": -0.407276451587677, "step": 465 }, { "epoch": 0.9982631930527722, "step": 467, "total_flos": 0.0, "train_loss": 1.2480301234145237, "train_runtime": 21322.7535, "train_samples_per_second": 2.808, "train_steps_per_second": 0.022 } ], "logging_steps": 5, "max_steps": 467, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1000000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }