{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.998691442030882, "eval_steps": 500, "global_step": 477, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.010468463752944255, "grad_norm": 11.38107582220126, "learning_rate": 5.208333333333333e-08, "logits/chosen": -2.770763397216797, "logits/rejected": -2.728346824645996, "logps/chosen": -1.0277923345565796, "logps/rejected": -1.1729481220245361, "loss": 0.8182, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -1.0277923345565796, "rewards/margins": 0.145155668258667, "rewards/rejected": -1.1729481220245361, "step": 5 }, { "epoch": 0.02093692750588851, "grad_norm": 10.468333488013238, "learning_rate": 1.0416666666666667e-07, "logits/chosen": -2.7509100437164307, "logits/rejected": -2.72518253326416, "logps/chosen": -0.995405375957489, "logps/rejected": -1.071641206741333, "loss": 0.8182, "rewards/accuracies": 0.5, "rewards/chosen": -0.995405375957489, "rewards/margins": 0.07623584568500519, "rewards/rejected": -1.071641206741333, "step": 10 }, { "epoch": 0.031405391258832765, "grad_norm": 10.493977523397158, "learning_rate": 1.5624999999999999e-07, "logits/chosen": -2.7306554317474365, "logits/rejected": -2.697004795074463, "logps/chosen": -0.9894905090332031, "logps/rejected": -1.1477015018463135, "loss": 0.8176, "rewards/accuracies": 0.643750011920929, "rewards/chosen": -0.9894905090332031, "rewards/margins": 0.1582109034061432, "rewards/rejected": -1.1477015018463135, "step": 15 }, { "epoch": 0.04187385501177702, "grad_norm": 9.724221169965997, "learning_rate": 2.0833333333333333e-07, "logits/chosen": -2.803373336791992, "logits/rejected": -2.709907293319702, "logps/chosen": -1.023125410079956, "logps/rejected": -1.1238459348678589, "loss": 0.8147, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -1.023125410079956, "rewards/margins": 0.10072048753499985, "rewards/rejected": -1.1238459348678589, "step": 20 }, { "epoch": 0.05234231876472128, "grad_norm": 10.045674241744983, "learning_rate": 2.604166666666667e-07, "logits/chosen": -2.799170970916748, "logits/rejected": -2.775965929031372, "logps/chosen": -1.030667781829834, "logps/rejected": -1.0879056453704834, "loss": 0.8112, "rewards/accuracies": 0.512499988079071, "rewards/chosen": -1.030667781829834, "rewards/margins": 0.05723772197961807, "rewards/rejected": -1.0879056453704834, "step": 25 }, { "epoch": 0.06281078251766553, "grad_norm": 10.423264900774234, "learning_rate": 3.1249999999999997e-07, "logits/chosen": -2.7731029987335205, "logits/rejected": -2.7313244342803955, "logps/chosen": -0.9983895421028137, "logps/rejected": -1.1039111614227295, "loss": 0.8086, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -0.9983895421028137, "rewards/margins": 0.10552169382572174, "rewards/rejected": -1.1039111614227295, "step": 30 }, { "epoch": 0.07327924627060979, "grad_norm": 10.239422638364212, "learning_rate": 3.645833333333333e-07, "logits/chosen": -2.777191162109375, "logits/rejected": -2.6907501220703125, "logps/chosen": -1.053971290588379, "logps/rejected": -1.1872998476028442, "loss": 0.7974, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -1.053971290588379, "rewards/margins": 0.13332852721214294, "rewards/rejected": -1.1872998476028442, "step": 35 }, { "epoch": 0.08374771002355404, "grad_norm": 10.873743845098522, "learning_rate": 4.1666666666666667e-07, "logits/chosen": -2.797991991043091, "logits/rejected": -2.7014565467834473, "logps/chosen": -0.9985781908035278, "logps/rejected": -1.2822500467300415, "loss": 0.7923, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.9985781908035278, "rewards/margins": 0.28367188572883606, "rewards/rejected": -1.2822500467300415, "step": 40 }, { "epoch": 0.0942161737764983, "grad_norm": 9.5351641443286, "learning_rate": 4.6874999999999996e-07, "logits/chosen": -2.8105621337890625, "logits/rejected": -2.7648262977600098, "logps/chosen": -0.9561222195625305, "logps/rejected": -1.1234276294708252, "loss": 0.7892, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.9561222195625305, "rewards/margins": 0.1673053801059723, "rewards/rejected": -1.1234276294708252, "step": 45 }, { "epoch": 0.10468463752944256, "grad_norm": 15.905860590814207, "learning_rate": 4.999731868769026e-07, "logits/chosen": -2.7718915939331055, "logits/rejected": -2.704052686691284, "logps/chosen": -1.0114643573760986, "logps/rejected": -1.2787563800811768, "loss": 0.7887, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -1.0114643573760986, "rewards/margins": 0.26729193329811096, "rewards/rejected": -1.2787563800811768, "step": 50 }, { "epoch": 0.11515310128238682, "grad_norm": 10.647312199476977, "learning_rate": 4.996716052911017e-07, "logits/chosen": -2.7726762294769287, "logits/rejected": -2.7395455837249756, "logps/chosen": -0.9699283838272095, "logps/rejected": -1.193861484527588, "loss": 0.7891, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.9699283838272095, "rewards/margins": 0.2239331752061844, "rewards/rejected": -1.193861484527588, "step": 55 }, { "epoch": 0.12562156503533106, "grad_norm": 10.393691121493097, "learning_rate": 4.990353313429303e-07, "logits/chosen": -2.8305859565734863, "logits/rejected": -2.7853503227233887, "logps/chosen": -0.9936162233352661, "logps/rejected": -1.1538995504379272, "loss": 0.7809, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.9936162233352661, "rewards/margins": 0.16028328239917755, "rewards/rejected": -1.1538995504379272, "step": 60 }, { "epoch": 0.1360900287882753, "grad_norm": 10.006212720391833, "learning_rate": 4.980652179769217e-07, "logits/chosen": -2.821927309036255, "logits/rejected": -2.7109711170196533, "logps/chosen": -1.0194857120513916, "logps/rejected": -1.4160047769546509, "loss": 0.7711, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -1.0194857120513916, "rewards/margins": 0.3965190351009369, "rewards/rejected": -1.4160047769546509, "step": 65 }, { "epoch": 0.14655849254121958, "grad_norm": 12.427363846728545, "learning_rate": 4.967625656594781e-07, "logits/chosen": -2.7151401042938232, "logits/rejected": -2.6647896766662598, "logps/chosen": -1.061846375465393, "logps/rejected": -1.2517098188400269, "loss": 0.7746, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -1.061846375465393, "rewards/margins": 0.18986348807811737, "rewards/rejected": -1.2517098188400269, "step": 70 }, { "epoch": 0.15702695629416383, "grad_norm": 15.040518143943558, "learning_rate": 4.951291206355559e-07, "logits/chosen": -2.7983217239379883, "logits/rejected": -2.7329821586608887, "logps/chosen": -0.967555820941925, "logps/rejected": -1.2309954166412354, "loss": 0.7653, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.967555820941925, "rewards/margins": 0.2634395658969879, "rewards/rejected": -1.2309954166412354, "step": 75 }, { "epoch": 0.16749542004710807, "grad_norm": 13.367533047557702, "learning_rate": 4.93167072587771e-07, "logits/chosen": -2.779024362564087, "logits/rejected": -2.7217113971710205, "logps/chosen": -1.0280585289001465, "logps/rejected": -1.3247190713882446, "loss": 0.7769, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -1.0280585289001465, "rewards/margins": 0.29666048288345337, "rewards/rejected": -1.3247190713882446, "step": 80 }, { "epoch": 0.17796388380005235, "grad_norm": 12.448866220441356, "learning_rate": 4.908790517010636e-07, "logits/chosen": -2.792797565460205, "logits/rejected": -2.742084503173828, "logps/chosen": -0.9986704587936401, "logps/rejected": -1.251503586769104, "loss": 0.7761, "rewards/accuracies": 0.59375, "rewards/chosen": -0.9986704587936401, "rewards/margins": 0.2528330981731415, "rewards/rejected": -1.251503586769104, "step": 85 }, { "epoch": 0.1884323475529966, "grad_norm": 10.949313193375856, "learning_rate": 4.882681251368548e-07, "logits/chosen": -2.753514289855957, "logits/rejected": -2.728050470352173, "logps/chosen": -1.0043939352035522, "logps/rejected": -1.2518372535705566, "loss": 0.7618, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.0043939352035522, "rewards/margins": 0.24744322896003723, "rewards/rejected": -1.2518372535705566, "step": 90 }, { "epoch": 0.19890081130594087, "grad_norm": 13.130451787621045, "learning_rate": 4.853377929214243e-07, "logits/chosen": -2.811157703399658, "logits/rejected": -2.7097816467285156, "logps/chosen": -1.0308064222335815, "logps/rejected": -1.2379958629608154, "loss": 0.7623, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -1.0308064222335815, "rewards/margins": 0.20718936622142792, "rewards/rejected": -1.2379958629608154, "step": 95 }, { "epoch": 0.2093692750588851, "grad_norm": 13.323653880946075, "learning_rate": 4.820919832540181e-07, "logits/chosen": -2.7652459144592285, "logits/rejected": -2.669363260269165, "logps/chosen": -1.0318419933319092, "logps/rejected": -1.3162591457366943, "loss": 0.7648, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -1.0318419933319092, "rewards/margins": 0.28441694378852844, "rewards/rejected": -1.3162591457366943, "step": 100 }, { "epoch": 0.21983773881182936, "grad_norm": 14.04623446067068, "learning_rate": 4.785350472409791e-07, "logits/chosen": -2.7531397342681885, "logits/rejected": -2.7293217182159424, "logps/chosen": -0.9516981840133667, "logps/rejected": -1.297178864479065, "loss": 0.7509, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.9516981840133667, "rewards/margins": 0.34548065066337585, "rewards/rejected": -1.297178864479065, "step": 105 }, { "epoch": 0.23030620256477363, "grad_norm": 13.599390080064117, "learning_rate": 4.7467175306295647e-07, "logits/chosen": -2.800642728805542, "logits/rejected": -2.7362637519836426, "logps/chosen": -1.0258657932281494, "logps/rejected": -1.3415501117706299, "loss": 0.7602, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -1.0258657932281494, "rewards/margins": 0.31568431854248047, "rewards/rejected": -1.3415501117706299, "step": 110 }, { "epoch": 0.24077466631771788, "grad_norm": 11.166851777914701, "learning_rate": 4.70507279583015e-07, "logits/chosen": -2.7502248287200928, "logits/rejected": -2.6826751232147217, "logps/chosen": -0.9817054867744446, "logps/rejected": -1.369450569152832, "loss": 0.7574, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -0.9817054867744446, "rewards/margins": 0.3877450227737427, "rewards/rejected": -1.369450569152832, "step": 115 }, { "epoch": 0.2512431300706621, "grad_norm": 11.347114772747197, "learning_rate": 4.6604720940421207e-07, "logits/chosen": -2.761990547180176, "logits/rejected": -2.7169580459594727, "logps/chosen": -1.0401480197906494, "logps/rejected": -1.3125249147415161, "loss": 0.7548, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -1.0401480197906494, "rewards/margins": 0.2723769247531891, "rewards/rejected": -1.3125249147415161, "step": 120 }, { "epoch": 0.26171159382360637, "grad_norm": 11.52046180991134, "learning_rate": 4.612975213859487e-07, "logits/chosen": -2.806786060333252, "logits/rejected": -2.7883572578430176, "logps/chosen": -1.0292778015136719, "logps/rejected": -1.2358990907669067, "loss": 0.7635, "rewards/accuracies": 0.59375, "rewards/chosen": -1.0292778015136719, "rewards/margins": 0.2066211998462677, "rewards/rejected": -1.2358990907669067, "step": 125 }, { "epoch": 0.2721800575765506, "grad_norm": 13.331336972954334, "learning_rate": 4.5626458262912735e-07, "logits/chosen": -2.7795305252075195, "logits/rejected": -2.7438769340515137, "logps/chosen": -0.9998070001602173, "logps/rejected": -1.1660974025726318, "loss": 0.7593, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.9998070001602173, "rewards/margins": 0.1662905514240265, "rewards/rejected": -1.1660974025726318, "step": 130 }, { "epoch": 0.2826485213294949, "grad_norm": 17.992436930119467, "learning_rate": 4.5095513994085974e-07, "logits/chosen": -2.7764856815338135, "logits/rejected": -2.6815366744995117, "logps/chosen": -1.0169318914413452, "logps/rejected": -1.509103775024414, "loss": 0.7517, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.0169318914413452, "rewards/margins": 0.4921717643737793, "rewards/rejected": -1.509103775024414, "step": 135 }, { "epoch": 0.29311698508243916, "grad_norm": 12.279405154425808, "learning_rate": 4.453763107901675e-07, "logits/chosen": -2.7898383140563965, "logits/rejected": -2.7315587997436523, "logps/chosen": -0.9953171014785767, "logps/rejected": -1.354308843612671, "loss": 0.7381, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -0.9953171014785767, "rewards/margins": 0.3589917719364166, "rewards/rejected": -1.354308843612671, "step": 140 }, { "epoch": 0.3035854488353834, "grad_norm": 11.881664766821807, "learning_rate": 4.395355737667985e-07, "logits/chosen": -2.7770204544067383, "logits/rejected": -2.7200214862823486, "logps/chosen": -0.9782532453536987, "logps/rejected": -1.2849363088607788, "loss": 0.757, "rewards/accuracies": 0.65625, "rewards/chosen": -0.9782532453536987, "rewards/margins": 0.30668309330940247, "rewards/rejected": -1.2849363088607788, "step": 145 }, { "epoch": 0.31405391258832765, "grad_norm": 11.86886950333677, "learning_rate": 4.3344075855595097e-07, "logits/chosen": -2.8294339179992676, "logits/rejected": -2.8064193725585938, "logps/chosen": -0.9476372003555298, "logps/rejected": -1.2449183464050293, "loss": 0.7298, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.9476372003555298, "rewards/margins": 0.29728126525878906, "rewards/rejected": -1.2449183464050293, "step": 150 }, { "epoch": 0.3245223763412719, "grad_norm": 14.182562821077749, "learning_rate": 4.271000354423425e-07, "logits/chosen": -2.7843871116638184, "logits/rejected": -2.7540178298950195, "logps/chosen": -1.0450012683868408, "logps/rejected": -1.2890008687973022, "loss": 0.7419, "rewards/accuracies": 0.625, "rewards/chosen": -1.0450012683868408, "rewards/margins": 0.24399971961975098, "rewards/rejected": -1.2890008687973022, "step": 155 }, { "epoch": 0.33499084009421615, "grad_norm": 14.568182153638816, "learning_rate": 4.2052190435769554e-07, "logits/chosen": -2.7708890438079834, "logits/rejected": -2.7408313751220703, "logps/chosen": -1.0297720432281494, "logps/rejected": -1.3061676025390625, "loss": 0.746, "rewards/accuracies": 0.625, "rewards/chosen": -1.0297720432281494, "rewards/margins": 0.2763958275318146, "rewards/rejected": -1.3061676025390625, "step": 160 }, { "epoch": 0.34545930384716045, "grad_norm": 13.952356966322911, "learning_rate": 4.137151834863213e-07, "logits/chosen": -2.772400140762329, "logits/rejected": -2.6952953338623047, "logps/chosen": -0.9658434987068176, "logps/rejected": -1.2914047241210938, "loss": 0.7413, "rewards/accuracies": 0.625, "rewards/chosen": -0.9658434987068176, "rewards/margins": 0.32556113600730896, "rewards/rejected": -1.2914047241210938, "step": 165 }, { "epoch": 0.3559277676001047, "grad_norm": 13.933907932744528, "learning_rate": 4.0668899744407567e-07, "logits/chosen": -2.7683300971984863, "logits/rejected": -2.7026619911193848, "logps/chosen": -1.0619356632232666, "logps/rejected": -1.3435611724853516, "loss": 0.747, "rewards/accuracies": 0.625, "rewards/chosen": -1.0619356632232666, "rewards/margins": 0.2816254794597626, "rewards/rejected": -1.3435611724853516, "step": 170 }, { "epoch": 0.36639623135304894, "grad_norm": 12.172184718827962, "learning_rate": 3.994527650465352e-07, "logits/chosen": -2.762418270111084, "logits/rejected": -2.6775519847869873, "logps/chosen": -1.0495750904083252, "logps/rejected": -1.331768274307251, "loss": 0.755, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -1.0495750904083252, "rewards/margins": 0.2821928858757019, "rewards/rejected": -1.331768274307251, "step": 175 }, { "epoch": 0.3768646951059932, "grad_norm": 14.269164632788968, "learning_rate": 3.920161866827889e-07, "logits/chosen": -2.758896589279175, "logits/rejected": -2.742147922515869, "logps/chosen": -1.0251743793487549, "logps/rejected": -1.3645293712615967, "loss": 0.7455, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.0251743793487549, "rewards/margins": 0.3393550515174866, "rewards/rejected": -1.3645293712615967, "step": 180 }, { "epoch": 0.38733315885893743, "grad_norm": 12.197693303841156, "learning_rate": 3.8438923131177237e-07, "logits/chosen": -2.766483783721924, "logits/rejected": -2.678823471069336, "logps/chosen": -1.0455878973007202, "logps/rejected": -1.4074945449829102, "loss": 0.7358, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.0455878973007202, "rewards/margins": 0.36190664768218994, "rewards/rejected": -1.4074945449829102, "step": 185 }, { "epoch": 0.39780162261188173, "grad_norm": 16.35735790830286, "learning_rate": 3.765821230985757e-07, "logits/chosen": -2.7736458778381348, "logits/rejected": -2.7361159324645996, "logps/chosen": -1.034018635749817, "logps/rejected": -1.3103978633880615, "loss": 0.7575, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -1.034018635749817, "rewards/margins": 0.2763793170452118, "rewards/rejected": -1.3103978633880615, "step": 190 }, { "epoch": 0.408270086364826, "grad_norm": 14.111583126579113, "learning_rate": 3.6860532770864005e-07, "logits/chosen": -2.6895153522491455, "logits/rejected": -2.662205696105957, "logps/chosen": -1.0923384428024292, "logps/rejected": -1.3151925802230835, "loss": 0.7386, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -1.0923384428024292, "rewards/margins": 0.22285406291484833, "rewards/rejected": -1.3151925802230835, "step": 195 }, { "epoch": 0.4187385501177702, "grad_norm": 14.076396554619945, "learning_rate": 3.604695382782159e-07, "logits/chosen": -2.730325222015381, "logits/rejected": -2.7146542072296143, "logps/chosen": -1.0586004257202148, "logps/rejected": -1.3197696208953857, "loss": 0.7445, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -1.0586004257202148, "rewards/margins": 0.2611694037914276, "rewards/rejected": -1.3197696208953857, "step": 200 }, { "epoch": 0.42920701387071447, "grad_norm": 13.245710220481957, "learning_rate": 3.5218566107988867e-07, "logits/chosen": -2.6548683643341064, "logits/rejected": -2.587470054626465, "logps/chosen": -1.0069193840026855, "logps/rejected": -1.2861696481704712, "loss": 0.7362, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -1.0069193840026855, "rewards/margins": 0.27925023436546326, "rewards/rejected": -1.2861696481704712, "step": 205 }, { "epoch": 0.4396754776236587, "grad_norm": 14.845756718974505, "learning_rate": 3.4376480090239047e-07, "logits/chosen": -2.6589674949645996, "logits/rejected": -2.6112613677978516, "logps/chosen": -1.043101191520691, "logps/rejected": -1.2650586366653442, "loss": 0.7466, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -1.043101191520691, "rewards/margins": 0.22195737063884735, "rewards/rejected": -1.2650586366653442, "step": 210 }, { "epoch": 0.45014394137660296, "grad_norm": 11.043203195018044, "learning_rate": 3.3521824616429284e-07, "logits/chosen": -2.68113374710083, "logits/rejected": -2.6553902626037598, "logps/chosen": -0.9405063390731812, "logps/rejected": -1.3108428716659546, "loss": 0.7378, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.9405063390731812, "rewards/margins": 0.3703363537788391, "rewards/rejected": -1.3108428716659546, "step": 215 }, { "epoch": 0.46061240512954726, "grad_norm": 16.749924929142505, "learning_rate": 3.265574537815398e-07, "logits/chosen": -2.714653968811035, "logits/rejected": -2.6374471187591553, "logps/chosen": -1.0079431533813477, "logps/rejected": -1.37057626247406, "loss": 0.735, "rewards/accuracies": 0.643750011920929, "rewards/chosen": -1.0079431533813477, "rewards/margins": 0.3626330494880676, "rewards/rejected": -1.37057626247406, "step": 220 }, { "epoch": 0.4710808688824915, "grad_norm": 14.441344062450174, "learning_rate": 3.1779403380910425e-07, "logits/chosen": -2.705112934112549, "logits/rejected": -2.5974721908569336, "logps/chosen": -0.9732646942138672, "logps/rejected": -1.3816461563110352, "loss": 0.7356, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.9732646942138672, "rewards/margins": 0.40838152170181274, "rewards/rejected": -1.3816461563110352, "step": 225 }, { "epoch": 0.48154933263543576, "grad_norm": 12.617003125685786, "learning_rate": 3.0893973387735683e-07, "logits/chosen": -2.7150943279266357, "logits/rejected": -2.6219162940979004, "logps/chosen": -0.9865689277648926, "logps/rejected": -1.4115015268325806, "loss": 0.7293, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.9865689277648926, "rewards/margins": 0.42493247985839844, "rewards/rejected": -1.4115015268325806, "step": 230 }, { "epoch": 0.49201779638838, "grad_norm": 15.76783159424914, "learning_rate": 3.000064234440111e-07, "logits/chosen": -2.7245564460754395, "logits/rejected": -2.667783260345459, "logps/chosen": -0.9892565608024597, "logps/rejected": -1.3223989009857178, "loss": 0.7336, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.9892565608024597, "rewards/margins": 0.33314234018325806, "rewards/rejected": -1.3223989009857178, "step": 235 }, { "epoch": 0.5024862601413242, "grad_norm": 12.168750234398605, "learning_rate": 2.910060778827554e-07, "logits/chosen": -2.669553756713867, "logits/rejected": -2.6276214122772217, "logps/chosen": -1.0081182718276978, "logps/rejected": -1.3357713222503662, "loss": 0.7291, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": -1.0081182718276978, "rewards/margins": 0.32765328884124756, "rewards/rejected": -1.3357713222503662, "step": 240 }, { "epoch": 0.5129547238942685, "grad_norm": 10.888223535723034, "learning_rate": 2.8195076242990116e-07, "logits/chosen": -2.7039177417755127, "logits/rejected": -2.6095385551452637, "logps/chosen": -1.057512879371643, "logps/rejected": -1.33600652217865, "loss": 0.7378, "rewards/accuracies": 0.625, "rewards/chosen": -1.057512879371643, "rewards/margins": 0.2784937620162964, "rewards/rejected": -1.33600652217865, "step": 245 }, { "epoch": 0.5234231876472127, "grad_norm": 13.48556259525564, "learning_rate": 2.7285261601056697e-07, "logits/chosen": -2.603414535522461, "logits/rejected": -2.5493083000183105, "logps/chosen": -1.0165168046951294, "logps/rejected": -1.3705979585647583, "loss": 0.7501, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.0165168046951294, "rewards/margins": 0.35408109426498413, "rewards/rejected": -1.3705979585647583, "step": 250 }, { "epoch": 0.533891651400157, "grad_norm": 11.25238225575941, "learning_rate": 2.6372383496608186e-07, "logits/chosen": -2.643340587615967, "logits/rejected": -2.536959171295166, "logps/chosen": -1.0472781658172607, "logps/rejected": -1.4470490217208862, "loss": 0.721, "rewards/accuracies": 0.65625, "rewards/chosen": -1.0472781658172607, "rewards/margins": 0.39977091550827026, "rewards/rejected": -1.4470490217208862, "step": 255 }, { "epoch": 0.5443601151531012, "grad_norm": 11.565809494898701, "learning_rate": 2.5457665670441937e-07, "logits/chosen": -2.609039545059204, "logits/rejected": -2.502159595489502, "logps/chosen": -0.9671168327331543, "logps/rejected": -1.4019583463668823, "loss": 0.7349, "rewards/accuracies": 0.65625, "rewards/chosen": -0.9671168327331543, "rewards/margins": 0.4348415434360504, "rewards/rejected": -1.4019583463668823, "step": 260 }, { "epoch": 0.5548285789060455, "grad_norm": 12.453073847695425, "learning_rate": 2.454233432955807e-07, "logits/chosen": -2.649348735809326, "logits/rejected": -2.5835018157958984, "logps/chosen": -0.9769356846809387, "logps/rejected": -1.1988904476165771, "loss": 0.7297, "rewards/accuracies": 0.65625, "rewards/chosen": -0.9769356846809387, "rewards/margins": 0.22195477783679962, "rewards/rejected": -1.1988904476165771, "step": 265 }, { "epoch": 0.5652970426589898, "grad_norm": 14.98293097042728, "learning_rate": 2.3627616503391812e-07, "logits/chosen": -2.6596057415008545, "logits/rejected": -2.6029915809631348, "logps/chosen": -1.060955286026001, "logps/rejected": -1.3076387643814087, "loss": 0.7388, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -1.060955286026001, "rewards/margins": 0.24668343365192413, "rewards/rejected": -1.3076387643814087, "step": 270 }, { "epoch": 0.575765506411934, "grad_norm": 11.623532564015099, "learning_rate": 2.2714738398943308e-07, "logits/chosen": -2.7393107414245605, "logits/rejected": -2.627927303314209, "logps/chosen": -1.0123107433319092, "logps/rejected": -1.4212400913238525, "loss": 0.7318, "rewards/accuracies": 0.65625, "rewards/chosen": -1.0123107433319092, "rewards/margins": 0.4089292585849762, "rewards/rejected": -1.4212400913238525, "step": 275 }, { "epoch": 0.5862339701648783, "grad_norm": 12.263997829347408, "learning_rate": 2.1804923757009882e-07, "logits/chosen": -2.769882917404175, "logits/rejected": -2.697937488555908, "logps/chosen": -1.003604531288147, "logps/rejected": -1.2558648586273193, "loss": 0.7371, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -1.003604531288147, "rewards/margins": 0.2522605061531067, "rewards/rejected": -1.2558648586273193, "step": 280 }, { "epoch": 0.5967024339178225, "grad_norm": 11.924728394634029, "learning_rate": 2.089939221172446e-07, "logits/chosen": -2.7121453285217285, "logits/rejected": -2.7105841636657715, "logps/chosen": -1.0530743598937988, "logps/rejected": -1.3655306100845337, "loss": 0.7323, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.0530743598937988, "rewards/margins": 0.31245604157447815, "rewards/rejected": -1.3655306100845337, "step": 285 }, { "epoch": 0.6071708976707668, "grad_norm": 11.28678386558285, "learning_rate": 1.9999357655598891e-07, "logits/chosen": -2.758227825164795, "logits/rejected": -2.711308002471924, "logps/chosen": -1.0129145383834839, "logps/rejected": -1.386401891708374, "loss": 0.7258, "rewards/accuracies": 0.6875, "rewards/chosen": -1.0129145383834839, "rewards/margins": 0.3734874725341797, "rewards/rejected": -1.386401891708374, "step": 290 }, { "epoch": 0.6176393614237111, "grad_norm": 13.706657289324387, "learning_rate": 1.9106026612264315e-07, "logits/chosen": -2.7800045013427734, "logits/rejected": -2.690302610397339, "logps/chosen": -1.016311526298523, "logps/rejected": -1.3633009195327759, "loss": 0.7376, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -1.016311526298523, "rewards/margins": 0.34698933362960815, "rewards/rejected": -1.3633009195327759, "step": 295 }, { "epoch": 0.6281078251766553, "grad_norm": 13.79182727729131, "learning_rate": 1.8220596619089573e-07, "logits/chosen": -2.680269718170166, "logits/rejected": -2.629142999649048, "logps/chosen": -0.9930880665779114, "logps/rejected": -1.2967426776885986, "loss": 0.7295, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.9930880665779114, "rewards/margins": 0.30365443229675293, "rewards/rejected": -1.2967426776885986, "step": 300 }, { "epoch": 0.6385762889295996, "grad_norm": 14.457437233341306, "learning_rate": 1.7344254621846017e-07, "logits/chosen": -2.6520402431488037, "logits/rejected": -2.6048717498779297, "logps/chosen": -1.0168180465698242, "logps/rejected": -1.415621042251587, "loss": 0.7162, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.0168180465698242, "rewards/margins": 0.3988030254840851, "rewards/rejected": -1.415621042251587, "step": 305 }, { "epoch": 0.6490447526825438, "grad_norm": 12.793548808779951, "learning_rate": 1.647817538357072e-07, "logits/chosen": -2.6753201484680176, "logits/rejected": -2.6719937324523926, "logps/chosen": -0.9765653610229492, "logps/rejected": -1.3679265975952148, "loss": 0.7149, "rewards/accuracies": 0.6875, "rewards/chosen": -0.9765653610229492, "rewards/margins": 0.3913612961769104, "rewards/rejected": -1.3679265975952148, "step": 310 }, { "epoch": 0.6595132164354881, "grad_norm": 13.647697652174228, "learning_rate": 1.562351990976095e-07, "logits/chosen": -2.707498550415039, "logits/rejected": -2.652143955230713, "logps/chosen": -0.9738147854804993, "logps/rejected": -1.4322879314422607, "loss": 0.7255, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.9738147854804993, "rewards/margins": 0.4584731161594391, "rewards/rejected": -1.4322879314422607, "step": 315 }, { "epoch": 0.6699816801884323, "grad_norm": 15.43709393377155, "learning_rate": 1.478143389201113e-07, "logits/chosen": -2.699500322341919, "logits/rejected": -2.677785634994507, "logps/chosen": -1.0496845245361328, "logps/rejected": -1.3157514333724976, "loss": 0.7299, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -1.0496845245361328, "rewards/margins": 0.2660670876502991, "rewards/rejected": -1.3157514333724976, "step": 320 }, { "epoch": 0.6804501439413766, "grad_norm": 13.720816152844446, "learning_rate": 1.3953046172178413e-07, "logits/chosen": -2.720409631729126, "logits/rejected": -2.635681629180908, "logps/chosen": -1.0098615884780884, "logps/rejected": -1.4135617017745972, "loss": 0.7208, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.0098615884780884, "rewards/margins": 0.4037002921104431, "rewards/rejected": -1.4135617017745972, "step": 325 }, { "epoch": 0.6909186076943209, "grad_norm": 15.415691985906774, "learning_rate": 1.3139467229135998e-07, "logits/chosen": -2.691079616546631, "logits/rejected": -2.6406595706939697, "logps/chosen": -0.9770712852478027, "logps/rejected": -1.2339041233062744, "loss": 0.727, "rewards/accuracies": 0.625, "rewards/chosen": -0.9770712852478027, "rewards/margins": 0.2568328380584717, "rewards/rejected": -1.2339041233062744, "step": 330 }, { "epoch": 0.7013870714472651, "grad_norm": 13.11840303428364, "learning_rate": 1.2341787690142435e-07, "logits/chosen": -2.797006845474243, "logits/rejected": -2.7196357250213623, "logps/chosen": -0.968543529510498, "logps/rejected": -1.316467046737671, "loss": 0.7386, "rewards/accuracies": 0.6875, "rewards/chosen": -0.968543529510498, "rewards/margins": 0.3479234576225281, "rewards/rejected": -1.316467046737671, "step": 335 }, { "epoch": 0.7118555352002094, "grad_norm": 13.776626930672093, "learning_rate": 1.1561076868822755e-07, "logits/chosen": -2.7232143878936768, "logits/rejected": -2.646807909011841, "logps/chosen": -1.1491429805755615, "logps/rejected": -1.4109306335449219, "loss": 0.7332, "rewards/accuracies": 0.6875, "rewards/chosen": -1.1491429805755615, "rewards/margins": 0.26178762316703796, "rewards/rejected": -1.4109306335449219, "step": 340 }, { "epoch": 0.7223239989531536, "grad_norm": 31.3699616400003, "learning_rate": 1.0798381331721107e-07, "logits/chosen": -2.74650502204895, "logits/rejected": -2.7001473903656006, "logps/chosen": -1.0154855251312256, "logps/rejected": -1.3520514965057373, "loss": 0.7316, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": -1.0154855251312256, "rewards/margins": 0.3365658223628998, "rewards/rejected": -1.3520514965057373, "step": 345 }, { "epoch": 0.7327924627060979, "grad_norm": 13.496272269070083, "learning_rate": 1.0054723495346482e-07, "logits/chosen": -2.704589366912842, "logits/rejected": -2.6791272163391113, "logps/chosen": -1.0462541580200195, "logps/rejected": -1.3184092044830322, "loss": 0.7317, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -1.0462541580200195, "rewards/margins": 0.2721550166606903, "rewards/rejected": -1.3184092044830322, "step": 350 }, { "epoch": 0.7432609264590422, "grad_norm": 14.124668961739028, "learning_rate": 9.331100255592436e-08, "logits/chosen": -2.708498477935791, "logits/rejected": -2.6446452140808105, "logps/chosen": -0.9414374232292175, "logps/rejected": -1.36155104637146, "loss": 0.7205, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.9414374232292175, "rewards/margins": 0.4201137125492096, "rewards/rejected": -1.36155104637146, "step": 355 }, { "epoch": 0.7537293902119864, "grad_norm": 13.457043890907538, "learning_rate": 8.628481651367875e-08, "logits/chosen": -2.7404441833496094, "logits/rejected": -2.67539644241333, "logps/chosen": -0.9412056803703308, "logps/rejected": -1.2558085918426514, "loss": 0.7175, "rewards/accuracies": 0.643750011920929, "rewards/chosen": -0.9412056803703308, "rewards/margins": 0.3146027624607086, "rewards/rejected": -1.2558085918426514, "step": 360 }, { "epoch": 0.7641978539649307, "grad_norm": 15.132184147715591, "learning_rate": 7.947809564230445e-08, "logits/chosen": -2.6669039726257324, "logits/rejected": -2.5819497108459473, "logps/chosen": -1.0663950443267822, "logps/rejected": -1.4138139486312866, "loss": 0.733, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -1.0663950443267822, "rewards/margins": 0.3474189043045044, "rewards/rejected": -1.4138139486312866, "step": 365 }, { "epoch": 0.7746663177178749, "grad_norm": 14.332198948063045, "learning_rate": 7.289996455765748e-08, "logits/chosen": -2.723731517791748, "logits/rejected": -2.6572673320770264, "logps/chosen": -1.0250599384307861, "logps/rejected": -1.424739122390747, "loss": 0.7222, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.0250599384307861, "rewards/margins": 0.3996792137622833, "rewards/rejected": -1.424739122390747, "step": 370 }, { "epoch": 0.7851347814708192, "grad_norm": 14.596381600862914, "learning_rate": 6.655924144404906e-08, "logits/chosen": -2.6905336380004883, "logits/rejected": -2.7106149196624756, "logps/chosen": -1.0149867534637451, "logps/rejected": -1.3051953315734863, "loss": 0.7202, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -1.0149867534637451, "rewards/margins": 0.29020851850509644, "rewards/rejected": -1.3051953315734863, "step": 375 }, { "epoch": 0.7956032452237635, "grad_norm": 14.603719819531335, "learning_rate": 6.046442623320145e-08, "logits/chosen": -2.6890244483947754, "logits/rejected": -2.642037868499756, "logps/chosen": -1.0287978649139404, "logps/rejected": -1.3297768831253052, "loss": 0.7174, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -1.0287978649139404, "rewards/margins": 0.3009788990020752, "rewards/rejected": -1.3297768831253052, "step": 380 }, { "epoch": 0.8060717089767077, "grad_norm": 13.110971433594608, "learning_rate": 5.4623689209832484e-08, "logits/chosen": -2.711418628692627, "logits/rejected": -2.630286931991577, "logps/chosen": -0.9913849830627441, "logps/rejected": -1.3187940120697021, "loss": 0.7269, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -0.9913849830627441, "rewards/margins": 0.32740893959999084, "rewards/rejected": -1.3187940120697021, "step": 385 }, { "epoch": 0.816540172729652, "grad_norm": 13.54888816829319, "learning_rate": 4.904486005914027e-08, "logits/chosen": -2.7440168857574463, "logits/rejected": -2.6783242225646973, "logps/chosen": -0.9728143811225891, "logps/rejected": -1.401568055152893, "loss": 0.721, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.9728143811225891, "rewards/margins": 0.4287536144256592, "rewards/rejected": -1.401568055152893, "step": 390 }, { "epoch": 0.8270086364825961, "grad_norm": 14.140732605295996, "learning_rate": 4.373541737087263e-08, "logits/chosen": -2.678018569946289, "logits/rejected": -2.5986359119415283, "logps/chosen": -0.9800823330879211, "logps/rejected": -1.3363488912582397, "loss": 0.7204, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -0.9800823330879211, "rewards/margins": 0.35626673698425293, "rewards/rejected": -1.3363488912582397, "step": 395 }, { "epoch": 0.8374771002355405, "grad_norm": 15.5816112649507, "learning_rate": 3.8702478614051345e-08, "logits/chosen": -2.6373329162597656, "logits/rejected": -2.564797878265381, "logps/chosen": -0.9701173901557922, "logps/rejected": -1.2960200309753418, "loss": 0.7308, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.9701173901557922, "rewards/margins": 0.3259026110172272, "rewards/rejected": -1.2960200309753418, "step": 400 }, { "epoch": 0.8479455639884846, "grad_norm": 17.784476132865002, "learning_rate": 3.3952790595787986e-08, "logits/chosen": -2.7012085914611816, "logits/rejected": -2.6638095378875732, "logps/chosen": -1.0386940240859985, "logps/rejected": -1.3863780498504639, "loss": 0.7304, "rewards/accuracies": 0.625, "rewards/chosen": -1.0386940240859985, "rewards/margins": 0.3476840555667877, "rewards/rejected": -1.3863780498504639, "step": 405 }, { "epoch": 0.8584140277414289, "grad_norm": 15.06603652432609, "learning_rate": 2.9492720416985e-08, "logits/chosen": -2.7128243446350098, "logits/rejected": -2.665844678878784, "logps/chosen": -0.945576548576355, "logps/rejected": -1.2531321048736572, "loss": 0.7286, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.945576548576355, "rewards/margins": 0.30755552649497986, "rewards/rejected": -1.2531321048736572, "step": 410 }, { "epoch": 0.8688824914943732, "grad_norm": 14.428983993595216, "learning_rate": 2.5328246937043525e-08, "logits/chosen": -2.7359042167663574, "logits/rejected": -2.667357921600342, "logps/chosen": -0.9915645718574524, "logps/rejected": -1.3631106615066528, "loss": 0.7099, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.9915645718574524, "rewards/margins": 0.37154603004455566, "rewards/rejected": -1.3631106615066528, "step": 415 }, { "epoch": 0.8793509552473174, "grad_norm": 14.884029872245316, "learning_rate": 2.1464952759020856e-08, "logits/chosen": -2.6930131912231445, "logits/rejected": -2.6401591300964355, "logps/chosen": -0.9694199562072754, "logps/rejected": -1.325496792793274, "loss": 0.7333, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -0.9694199562072754, "rewards/margins": 0.3560766577720642, "rewards/rejected": -1.325496792793274, "step": 420 }, { "epoch": 0.8898194190002617, "grad_norm": 12.827005672064265, "learning_rate": 1.7908016745981856e-08, "logits/chosen": -2.656219482421875, "logits/rejected": -2.6254589557647705, "logps/chosen": -1.0963706970214844, "logps/rejected": -1.3683406114578247, "loss": 0.7122, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -1.0963706970214844, "rewards/margins": 0.27196991443634033, "rewards/rejected": -1.3683406114578247, "step": 425 }, { "epoch": 0.9002878827532059, "grad_norm": 15.198602172266689, "learning_rate": 1.4662207078575684e-08, "logits/chosen": -2.6591544151306152, "logits/rejected": -2.595102548599243, "logps/chosen": -1.0449292659759521, "logps/rejected": -1.3946082592010498, "loss": 0.7107, "rewards/accuracies": 0.643750011920929, "rewards/chosen": -1.0449292659759521, "rewards/margins": 0.3496789336204529, "rewards/rejected": -1.3946082592010498, "step": 430 }, { "epoch": 0.9107563465061502, "grad_norm": 18.68711614034724, "learning_rate": 1.1731874863145142e-08, "logits/chosen": -2.7090446949005127, "logits/rejected": -2.670319080352783, "logps/chosen": -1.0166494846343994, "logps/rejected": -1.443265438079834, "loss": 0.7283, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -1.0166494846343994, "rewards/margins": 0.4266158938407898, "rewards/rejected": -1.443265438079834, "step": 435 }, { "epoch": 0.9212248102590945, "grad_norm": 15.09060995607935, "learning_rate": 9.12094829893642e-09, "logits/chosen": -2.65128755569458, "logits/rejected": -2.6307120323181152, "logps/chosen": -1.0401701927185059, "logps/rejected": -1.3651132583618164, "loss": 0.7233, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.0401701927185059, "rewards/margins": 0.32494306564331055, "rewards/rejected": -1.3651132583618164, "step": 440 }, { "epoch": 0.9316932740120387, "grad_norm": 15.446357779543456, "learning_rate": 6.832927412229017e-09, "logits/chosen": -2.7183310985565186, "logits/rejected": -2.6503405570983887, "logps/chosen": -1.0531015396118164, "logps/rejected": -1.3475028276443481, "loss": 0.7386, "rewards/accuracies": 0.643750011920929, "rewards/chosen": -1.0531015396118164, "rewards/margins": 0.2944013178348541, "rewards/rejected": -1.3475028276443481, "step": 445 }, { "epoch": 0.942161737764983, "grad_norm": 13.258009995423004, "learning_rate": 4.8708793644441086e-09, "logits/chosen": -2.6066839694976807, "logits/rejected": -2.5763983726501465, "logps/chosen": -1.0170929431915283, "logps/rejected": -1.3344027996063232, "loss": 0.7111, "rewards/accuracies": 0.65625, "rewards/chosen": -1.0170929431915283, "rewards/margins": 0.3173098564147949, "rewards/rejected": -1.3344027996063232, "step": 450 }, { "epoch": 0.9526302015179272, "grad_norm": 15.54617819240941, "learning_rate": 3.2374343405217884e-09, "logits/chosen": -2.6832032203674316, "logits/rejected": -2.6095805168151855, "logps/chosen": -1.0661931037902832, "logps/rejected": -1.3039623498916626, "loss": 0.7273, "rewards/accuracies": 0.643750011920929, "rewards/chosen": -1.0661931037902832, "rewards/margins": 0.23776927590370178, "rewards/rejected": -1.3039623498916626, "step": 455 }, { "epoch": 0.9630986652708715, "grad_norm": 14.258536237289112, "learning_rate": 1.9347820230782295e-09, "logits/chosen": -2.6628944873809814, "logits/rejected": -2.628173351287842, "logps/chosen": -0.9801522493362427, "logps/rejected": -1.3730151653289795, "loss": 0.7228, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.9801522493362427, "rewards/margins": 0.3928629755973816, "rewards/rejected": -1.3730151653289795, "step": 460 }, { "epoch": 0.9735671290238157, "grad_norm": 15.020648407629507, "learning_rate": 9.64668657069706e-10, "logits/chosen": -2.720750570297241, "logits/rejected": -2.642735004425049, "logps/chosen": -1.056501030921936, "logps/rejected": -1.426608681678772, "loss": 0.7332, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.056501030921936, "rewards/margins": 0.37010759115219116, "rewards/rejected": -1.426608681678772, "step": 465 }, { "epoch": 0.98403559277676, "grad_norm": 13.93963936089262, "learning_rate": 3.2839470889836627e-10, "logits/chosen": -2.701134443283081, "logits/rejected": -2.6342315673828125, "logps/chosen": -0.9712923169136047, "logps/rejected": -1.2896690368652344, "loss": 0.7383, "rewards/accuracies": 0.65625, "rewards/chosen": -0.9712923169136047, "rewards/margins": 0.318376749753952, "rewards/rejected": -1.2896690368652344, "step": 470 }, { "epoch": 0.9945040565297043, "grad_norm": 13.208264465733016, "learning_rate": 2.6813123097352287e-11, "logits/chosen": -2.7540841102600098, "logits/rejected": -2.649541139602661, "logps/chosen": -1.0263760089874268, "logps/rejected": -1.3381690979003906, "loss": 0.7172, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -1.0263760089874268, "rewards/margins": 0.31179291009902954, "rewards/rejected": -1.3381690979003906, "step": 475 }, { "epoch": 0.998691442030882, "step": 477, "total_flos": 0.0, "train_loss": 0.7450811588039438, "train_runtime": 7556.1724, "train_samples_per_second": 8.091, "train_steps_per_second": 0.063 } ], "logging_steps": 5, "max_steps": 477, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1000000, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }