|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9987943737441393, |
|
"eval_steps": 400, |
|
"global_step": 466, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.010716677829872739, |
|
"grad_norm": 38.81959429763923, |
|
"learning_rate": 6.382978723404255e-08, |
|
"logits/chosen": -2.397952079772949, |
|
"logits/rejected": -2.391846179962158, |
|
"logps/chosen": -0.5666699409484863, |
|
"logps/rejected": -0.5553711652755737, |
|
"loss": 1.5469, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.5666699409484863, |
|
"rewards/margins": -0.01129874400794506, |
|
"rewards/rejected": -0.5553711652755737, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.021433355659745478, |
|
"grad_norm": 17.957819802244767, |
|
"learning_rate": 1.276595744680851e-07, |
|
"logits/chosen": -2.402738571166992, |
|
"logits/rejected": -2.3730971813201904, |
|
"logps/chosen": -0.5517541766166687, |
|
"logps/rejected": -0.5785264372825623, |
|
"loss": 1.5538, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.5517541766166687, |
|
"rewards/margins": 0.026772266253829002, |
|
"rewards/rejected": -0.5785264372825623, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.032150033489618215, |
|
"grad_norm": 17.06492283094742, |
|
"learning_rate": 1.9148936170212767e-07, |
|
"logits/chosen": -2.4437928199768066, |
|
"logits/rejected": -2.449697732925415, |
|
"logps/chosen": -0.5636163353919983, |
|
"logps/rejected": -0.5669411420822144, |
|
"loss": 1.5619, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.5636163353919983, |
|
"rewards/margins": 0.00332476943731308, |
|
"rewards/rejected": -0.5669411420822144, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.042866711319490956, |
|
"grad_norm": 17.478232600769196, |
|
"learning_rate": 2.553191489361702e-07, |
|
"logits/chosen": -2.383941650390625, |
|
"logits/rejected": -2.3943183422088623, |
|
"logps/chosen": -0.5459321737289429, |
|
"logps/rejected": -0.5427771806716919, |
|
"loss": 1.5322, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.5459321737289429, |
|
"rewards/margins": -0.00315500283613801, |
|
"rewards/rejected": -0.5427771806716919, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0535833891493637, |
|
"grad_norm": 14.134950451452564, |
|
"learning_rate": 3.1914893617021275e-07, |
|
"logits/chosen": -2.2786340713500977, |
|
"logits/rejected": -2.2805464267730713, |
|
"logps/chosen": -0.5260549783706665, |
|
"logps/rejected": -0.5430394411087036, |
|
"loss": 1.5298, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.5260549783706665, |
|
"rewards/margins": 0.016984451562166214, |
|
"rewards/rejected": -0.5430394411087036, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.06430006697923643, |
|
"grad_norm": 19.57863908597214, |
|
"learning_rate": 3.8297872340425535e-07, |
|
"logits/chosen": -2.3897128105163574, |
|
"logits/rejected": -2.4030909538269043, |
|
"logps/chosen": -0.5465933680534363, |
|
"logps/rejected": -0.5372768640518188, |
|
"loss": 1.5509, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -0.5465933680534363, |
|
"rewards/margins": -0.009316539391875267, |
|
"rewards/rejected": -0.5372768640518188, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.07501674480910918, |
|
"grad_norm": 24.218016837268095, |
|
"learning_rate": 4.4680851063829783e-07, |
|
"logits/chosen": -2.453273296356201, |
|
"logits/rejected": -2.424668788909912, |
|
"logps/chosen": -0.5341351628303528, |
|
"logps/rejected": -0.5890725255012512, |
|
"loss": 1.5479, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.5341351628303528, |
|
"rewards/margins": 0.054937295615673065, |
|
"rewards/rejected": -0.5890725255012512, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.08573342263898191, |
|
"grad_norm": 20.81509422651472, |
|
"learning_rate": 5.106382978723404e-07, |
|
"logits/chosen": -2.3677382469177246, |
|
"logits/rejected": -2.3493103981018066, |
|
"logps/chosen": -0.565592885017395, |
|
"logps/rejected": -0.5375810861587524, |
|
"loss": 1.5573, |
|
"rewards/accuracies": 0.4312500059604645, |
|
"rewards/chosen": -0.565592885017395, |
|
"rewards/margins": -0.02801181748509407, |
|
"rewards/rejected": -0.5375810861587524, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.09645010046885466, |
|
"grad_norm": 17.50647386551691, |
|
"learning_rate": 5.74468085106383e-07, |
|
"logits/chosen": -2.3343653678894043, |
|
"logits/rejected": -2.32906436920166, |
|
"logps/chosen": -0.5402032732963562, |
|
"logps/rejected": -0.5591766238212585, |
|
"loss": 1.5421, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.5402032732963562, |
|
"rewards/margins": 0.018973344936966896, |
|
"rewards/rejected": -0.5591766238212585, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.1071667782987274, |
|
"grad_norm": 16.375037037224466, |
|
"learning_rate": 5.999241095449976e-07, |
|
"logits/chosen": -2.3641974925994873, |
|
"logits/rejected": -2.3596482276916504, |
|
"logps/chosen": -0.5401940941810608, |
|
"logps/rejected": -0.525315523147583, |
|
"loss": 1.5284, |
|
"rewards/accuracies": 0.45625001192092896, |
|
"rewards/chosen": -0.5401940941810608, |
|
"rewards/margins": -0.01487857848405838, |
|
"rewards/rejected": -0.525315523147583, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.11788345612860013, |
|
"grad_norm": 26.218018133925373, |
|
"learning_rate": 5.994604735812144e-07, |
|
"logits/chosen": -2.4210665225982666, |
|
"logits/rejected": -2.424318790435791, |
|
"logps/chosen": -0.565641462802887, |
|
"logps/rejected": -0.5864871740341187, |
|
"loss": 1.5392, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.565641462802887, |
|
"rewards/margins": 0.020845741033554077, |
|
"rewards/rejected": -0.5864871740341187, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.12860013395847286, |
|
"grad_norm": 42.6456644243847, |
|
"learning_rate": 5.985760137627685e-07, |
|
"logits/chosen": -2.325913906097412, |
|
"logits/rejected": -2.3350510597229004, |
|
"logps/chosen": -0.4915548861026764, |
|
"logps/rejected": -0.5130532383918762, |
|
"loss": 1.5405, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.4915548861026764, |
|
"rewards/margins": 0.02149834856390953, |
|
"rewards/rejected": -0.5130532383918762, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.13931681178834562, |
|
"grad_norm": 17.49728999516173, |
|
"learning_rate": 5.972719729975655e-07, |
|
"logits/chosen": -2.3687386512756348, |
|
"logits/rejected": -2.3732752799987793, |
|
"logps/chosen": -0.5264291167259216, |
|
"logps/rejected": -0.5606903433799744, |
|
"loss": 1.5441, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.5264291167259216, |
|
"rewards/margins": 0.03426120802760124, |
|
"rewards/rejected": -0.5606903433799744, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.15003348961821836, |
|
"grad_norm": 19.04975638080615, |
|
"learning_rate": 5.955501838194784e-07, |
|
"logits/chosen": -2.2692012786865234, |
|
"logits/rejected": -2.2734649181365967, |
|
"logps/chosen": -0.5329629778862, |
|
"logps/rejected": -0.585782527923584, |
|
"loss": 1.5335, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.5329629778862, |
|
"rewards/margins": 0.05281956121325493, |
|
"rewards/rejected": -0.585782527923584, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.1607501674480911, |
|
"grad_norm": 16.46150164067359, |
|
"learning_rate": 5.934130658131361e-07, |
|
"logits/chosen": -2.3084473609924316, |
|
"logits/rejected": -2.303145408630371, |
|
"logps/chosen": -0.4908691346645355, |
|
"logps/rejected": -0.5239783525466919, |
|
"loss": 1.5327, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.4908691346645355, |
|
"rewards/margins": 0.03310922160744667, |
|
"rewards/rejected": -0.5239783525466919, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.17146684527796383, |
|
"grad_norm": 21.613971984342516, |
|
"learning_rate": 5.908636222137454e-07, |
|
"logits/chosen": -2.291396141052246, |
|
"logits/rejected": -2.3133578300476074, |
|
"logps/chosen": -0.48883646726608276, |
|
"logps/rejected": -0.5628662109375, |
|
"loss": 1.5301, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.48883646726608276, |
|
"rewards/margins": 0.07402969151735306, |
|
"rewards/rejected": -0.5628662109375, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.18218352310783656, |
|
"grad_norm": 22.2008339670987, |
|
"learning_rate": 5.879054356867243e-07, |
|
"logits/chosen": -2.328059673309326, |
|
"logits/rejected": -2.3216350078582764, |
|
"logps/chosen": -0.5081610679626465, |
|
"logps/rejected": -0.5643308162689209, |
|
"loss": 1.5335, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.5081610679626465, |
|
"rewards/margins": 0.05616975575685501, |
|
"rewards/rejected": -0.5643308162689209, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.19290020093770932, |
|
"grad_norm": 19.64419890416597, |
|
"learning_rate": 5.84542663293077e-07, |
|
"logits/chosen": -2.272433280944824, |
|
"logits/rejected": -2.2766337394714355, |
|
"logps/chosen": -0.5117042660713196, |
|
"logps/rejected": -0.558184802532196, |
|
"loss": 1.5348, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.5117042660713196, |
|
"rewards/margins": 0.046480584889650345, |
|
"rewards/rejected": -0.558184802532196, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.20361687876758205, |
|
"grad_norm": 17.22227696113398, |
|
"learning_rate": 5.807800306475876e-07, |
|
"logits/chosen": -2.3275113105773926, |
|
"logits/rejected": -2.3395214080810547, |
|
"logps/chosen": -0.6069667935371399, |
|
"logps/rejected": -0.6348728537559509, |
|
"loss": 1.5217, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.6069667935371399, |
|
"rewards/margins": 0.027906125411391258, |
|
"rewards/rejected": -0.6348728537559509, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.2143335565974548, |
|
"grad_norm": 24.396288528469587, |
|
"learning_rate": 5.766228252780373e-07, |
|
"logits/chosen": -2.368147373199463, |
|
"logits/rejected": -2.377194881439209, |
|
"logps/chosen": -0.5941327214241028, |
|
"logps/rejected": -0.6152836680412292, |
|
"loss": 1.5435, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.5941327214241028, |
|
"rewards/margins": 0.0211509857326746, |
|
"rewards/rejected": -0.6152836680412292, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.22505023442732752, |
|
"grad_norm": 16.23048110894587, |
|
"learning_rate": 5.720768891947834e-07, |
|
"logits/chosen": -2.3831636905670166, |
|
"logits/rejected": -2.383808135986328, |
|
"logps/chosen": -0.5236924886703491, |
|
"logps/rejected": -0.5740348100662231, |
|
"loss": 1.517, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.5236924886703491, |
|
"rewards/margins": 0.05034228041768074, |
|
"rewards/rejected": -0.5740348100662231, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.23576691225720026, |
|
"grad_norm": 22.95053096013821, |
|
"learning_rate": 5.671486106811365e-07, |
|
"logits/chosen": -2.4293274879455566, |
|
"logits/rejected": -2.4386584758758545, |
|
"logps/chosen": -0.5232604146003723, |
|
"logps/rejected": -0.5748019218444824, |
|
"loss": 1.54, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.5232604146003723, |
|
"rewards/margins": 0.051541589200496674, |
|
"rewards/rejected": -0.5748019218444824, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.24648359008707302, |
|
"grad_norm": 30.43281833851228, |
|
"learning_rate": 5.618449153160763e-07, |
|
"logits/chosen": -2.521904945373535, |
|
"logits/rejected": -2.5165414810180664, |
|
"logps/chosen": -0.5098231434822083, |
|
"logps/rejected": -0.5272140502929688, |
|
"loss": 1.5652, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.5098231434822083, |
|
"rewards/margins": 0.01739095151424408, |
|
"rewards/rejected": -0.5272140502929688, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.2572002679169457, |
|
"grad_norm": 18.218447276406668, |
|
"learning_rate": 5.56173256241918e-07, |
|
"logits/chosen": -2.6067259311676025, |
|
"logits/rejected": -2.594320774078369, |
|
"logps/chosen": -0.5535318851470947, |
|
"logps/rejected": -0.5515246987342834, |
|
"loss": 1.553, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.5535318851470947, |
|
"rewards/margins": -0.002007170347496867, |
|
"rewards/rejected": -0.5515246987342834, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.2679169457468185, |
|
"grad_norm": 19.047294066362046, |
|
"learning_rate": 5.501416036906106e-07, |
|
"logits/chosen": -2.599743366241455, |
|
"logits/rejected": -2.602095603942871, |
|
"logps/chosen": -0.5660097599029541, |
|
"logps/rejected": -0.5823434591293335, |
|
"loss": 1.5462, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.5660097599029541, |
|
"rewards/margins": 0.01633365824818611, |
|
"rewards/rejected": -0.5823434591293335, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.27863362357669125, |
|
"grad_norm": 25.91707976400477, |
|
"learning_rate": 5.437584337833803e-07, |
|
"logits/chosen": -2.6727051734924316, |
|
"logits/rejected": -2.6547203063964844, |
|
"logps/chosen": -0.5484704375267029, |
|
"logps/rejected": -0.5791813731193542, |
|
"loss": 1.5217, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.5484704375267029, |
|
"rewards/margins": 0.030710989609360695, |
|
"rewards/rejected": -0.5791813731193542, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.289350301406564, |
|
"grad_norm": 24.930436449789845, |
|
"learning_rate": 5.370327166194635e-07, |
|
"logits/chosen": -2.6759390830993652, |
|
"logits/rejected": -2.688563346862793, |
|
"logps/chosen": -0.5326634049415588, |
|
"logps/rejected": -0.5721167325973511, |
|
"loss": 1.5358, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.5326634049415588, |
|
"rewards/margins": 0.03945332020521164, |
|
"rewards/rejected": -0.5721167325973511, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.3000669792364367, |
|
"grad_norm": 25.343122236521907, |
|
"learning_rate": 5.299739036706635e-07, |
|
"logits/chosen": -2.6917319297790527, |
|
"logits/rejected": -2.6787917613983154, |
|
"logps/chosen": -0.5328460931777954, |
|
"logps/rejected": -0.5668941736221313, |
|
"loss": 1.5162, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.5328460931777954, |
|
"rewards/margins": 0.034048013389110565, |
|
"rewards/rejected": -0.5668941736221313, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.31078365706630945, |
|
"grad_norm": 20.469732566491704, |
|
"learning_rate": 5.225919144994487e-07, |
|
"logits/chosen": -2.7843101024627686, |
|
"logits/rejected": -2.7595150470733643, |
|
"logps/chosen": -0.5282408595085144, |
|
"logps/rejected": -0.5839791893959045, |
|
"loss": 1.5106, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.5282408595085144, |
|
"rewards/margins": 0.05573834106326103, |
|
"rewards/rejected": -0.5839791893959045, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.3215003348961822, |
|
"grad_norm": 22.910568604755916, |
|
"learning_rate": 5.148971228192543e-07, |
|
"logits/chosen": -2.769007921218872, |
|
"logits/rejected": -2.7604851722717285, |
|
"logps/chosen": -0.5120500326156616, |
|
"logps/rejected": -0.5518966913223267, |
|
"loss": 1.5387, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.5120500326156616, |
|
"rewards/margins": 0.03984668105840683, |
|
"rewards/rejected": -0.5518966913223267, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.3322170127260549, |
|
"grad_norm": 21.1001003359098, |
|
"learning_rate": 5.069003419165781e-07, |
|
"logits/chosen": -2.8498682975769043, |
|
"logits/rejected": -2.8591020107269287, |
|
"logps/chosen": -0.5589969754219055, |
|
"logps/rejected": -0.5984258651733398, |
|
"loss": 1.5176, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.5589969754219055, |
|
"rewards/margins": 0.039428871124982834, |
|
"rewards/rejected": -0.5984258651733398, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.34293369055592765, |
|
"grad_norm": 26.259271435648458, |
|
"learning_rate": 4.986128094553569e-07, |
|
"logits/chosen": -2.8450496196746826, |
|
"logits/rejected": -2.8269691467285156, |
|
"logps/chosen": -0.5768808126449585, |
|
"logps/rejected": -0.6496576070785522, |
|
"loss": 1.5296, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.5768808126449585, |
|
"rewards/margins": 0.07277677953243256, |
|
"rewards/rejected": -0.6496576070785522, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.3536503683858004, |
|
"grad_norm": 22.95727659422549, |
|
"learning_rate": 4.900461716849745e-07, |
|
"logits/chosen": -2.9189038276672363, |
|
"logits/rejected": -2.8916220664978027, |
|
"logps/chosen": -0.5302075147628784, |
|
"logps/rejected": -0.5729304552078247, |
|
"loss": 1.5205, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.5302075147628784, |
|
"rewards/margins": 0.042722832411527634, |
|
"rewards/rejected": -0.5729304552078247, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.3643670462156731, |
|
"grad_norm": 17.76635841368691, |
|
"learning_rate": 4.812124670740974e-07, |
|
"logits/chosen": -2.9066505432128906, |
|
"logits/rejected": -2.910203456878662, |
|
"logps/chosen": -0.5274362564086914, |
|
"logps/rejected": -0.6013033986091614, |
|
"loss": 1.5096, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.5274362564086914, |
|
"rewards/margins": 0.0738670751452446, |
|
"rewards/rejected": -0.6013033986091614, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.3750837240455459, |
|
"grad_norm": 22.708920984772448, |
|
"learning_rate": 4.7212410939333393e-07, |
|
"logits/chosen": -2.9743309020996094, |
|
"logits/rejected": -2.9548959732055664, |
|
"logps/chosen": -0.5641797780990601, |
|
"logps/rejected": -0.594096302986145, |
|
"loss": 1.5387, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.5641797780990601, |
|
"rewards/margins": 0.029916446655988693, |
|
"rewards/rejected": -0.594096302986145, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.38580040187541864, |
|
"grad_norm": 22.445712735196388, |
|
"learning_rate": 4.6279387027049207e-07, |
|
"logits/chosen": -3.0900559425354004, |
|
"logits/rejected": -3.091893434524536, |
|
"logps/chosen": -0.5932881236076355, |
|
"logps/rejected": -0.6334934234619141, |
|
"loss": 1.5474, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.5932881236076355, |
|
"rewards/margins": 0.040205273777246475, |
|
"rewards/rejected": -0.6334934234619141, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.3965170797052914, |
|
"grad_norm": 30.045592149378802, |
|
"learning_rate": 4.5323486124294974e-07, |
|
"logits/chosen": -3.1286568641662598, |
|
"logits/rejected": -3.1514105796813965, |
|
"logps/chosen": -0.5893043279647827, |
|
"logps/rejected": -0.6178286075592041, |
|
"loss": 1.52, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.5893043279647827, |
|
"rewards/margins": 0.02852421998977661, |
|
"rewards/rejected": -0.6178286075592041, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.4072337575351641, |
|
"grad_norm": 19.66079670307951, |
|
"learning_rate": 4.434605153323596e-07, |
|
"logits/chosen": -3.0138182640075684, |
|
"logits/rejected": -3.027487277984619, |
|
"logps/chosen": -0.5490652322769165, |
|
"logps/rejected": -0.7248018383979797, |
|
"loss": 1.5211, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.5490652322769165, |
|
"rewards/margins": 0.17573660612106323, |
|
"rewards/rejected": -0.7248018383979797, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.41795043536503684, |
|
"grad_norm": 23.76319559347926, |
|
"learning_rate": 4.334845681675802e-07, |
|
"logits/chosen": -3.263240098953247, |
|
"logits/rejected": -3.234492063522339, |
|
"logps/chosen": -0.523744523525238, |
|
"logps/rejected": -0.5552490949630737, |
|
"loss": 1.5291, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.523744523525238, |
|
"rewards/margins": 0.03150450438261032, |
|
"rewards/rejected": -0.5552490949630737, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.4286671131949096, |
|
"grad_norm": 22.67820361436796, |
|
"learning_rate": 4.233210386823613e-07, |
|
"logits/chosen": -3.1373372077941895, |
|
"logits/rejected": -3.1572506427764893, |
|
"logps/chosen": -0.49712926149368286, |
|
"logps/rejected": -0.5306284427642822, |
|
"loss": 1.5161, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.49712926149368286, |
|
"rewards/margins": 0.033499158918857574, |
|
"rewards/rejected": -0.5306284427642822, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.4393837910247823, |
|
"grad_norm": 26.07083589291384, |
|
"learning_rate": 4.129842094149083e-07, |
|
"logits/chosen": -3.277681827545166, |
|
"logits/rejected": -3.2762560844421387, |
|
"logps/chosen": -0.4885168969631195, |
|
"logps/rejected": -0.5218795537948608, |
|
"loss": 1.5331, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.4885168969631195, |
|
"rewards/margins": 0.03336270898580551, |
|
"rewards/rejected": -0.5218795537948608, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.45010046885465504, |
|
"grad_norm": 21.312251857603005, |
|
"learning_rate": 4.024886064370107e-07, |
|
"logits/chosen": -3.2972412109375, |
|
"logits/rejected": -3.2872118949890137, |
|
"logps/chosen": -0.509524941444397, |
|
"logps/rejected": -0.5547453761100769, |
|
"loss": 1.5062, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.509524941444397, |
|
"rewards/margins": 0.04522045701742172, |
|
"rewards/rejected": -0.5547453761100769, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.4608171466845278, |
|
"grad_norm": 22.199858140310962, |
|
"learning_rate": 3.9184897894093836e-07, |
|
"logits/chosen": -3.3184287548065186, |
|
"logits/rejected": -3.3149967193603516, |
|
"logps/chosen": -0.5457042455673218, |
|
"logps/rejected": -0.5951209664344788, |
|
"loss": 1.4947, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.5457042455673218, |
|
"rewards/margins": 0.049416683614254, |
|
"rewards/rejected": -0.5951209664344788, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.4715338245144005, |
|
"grad_norm": 26.637536554337164, |
|
"learning_rate": 3.8108027851279425e-07, |
|
"logits/chosen": -3.3705334663391113, |
|
"logits/rejected": -3.348128080368042, |
|
"logps/chosen": -0.5251081585884094, |
|
"logps/rejected": -0.6058255434036255, |
|
"loss": 1.533, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.5251081585884094, |
|
"rewards/margins": 0.08071742951869965, |
|
"rewards/rejected": -0.6058255434036255, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.4822505023442733, |
|
"grad_norm": 20.985181328903547, |
|
"learning_rate": 3.701976381214462e-07, |
|
"logits/chosen": -3.441849946975708, |
|
"logits/rejected": -3.436166286468506, |
|
"logps/chosen": -0.5670623183250427, |
|
"logps/rejected": -0.6455426812171936, |
|
"loss": 1.5056, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.5670623183250427, |
|
"rewards/margins": 0.0784803032875061, |
|
"rewards/rejected": -0.6455426812171936, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.49296718017414604, |
|
"grad_norm": 26.37006096745831, |
|
"learning_rate": 3.5921635085256784e-07, |
|
"logits/chosen": -3.3313636779785156, |
|
"logits/rejected": -3.316943407058716, |
|
"logps/chosen": -0.5642744898796082, |
|
"logps/rejected": -0.6056590676307678, |
|
"loss": 1.4952, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.5642744898796082, |
|
"rewards/margins": 0.041384514421224594, |
|
"rewards/rejected": -0.6056590676307678, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.5036838580040187, |
|
"grad_norm": 21.351561473671772, |
|
"learning_rate": 3.4815184841767167e-07, |
|
"logits/chosen": -3.3021767139434814, |
|
"logits/rejected": -3.2890796661376953, |
|
"logps/chosen": -0.5324856042861938, |
|
"logps/rejected": -0.6350933313369751, |
|
"loss": 1.5016, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.5324856042861938, |
|
"rewards/margins": 0.10260789096355438, |
|
"rewards/rejected": -0.6350933313369751, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.5144005358338914, |
|
"grad_norm": 25.25074061630777, |
|
"learning_rate": 3.3701967946833387e-07, |
|
"logits/chosen": -3.2254951000213623, |
|
"logits/rejected": -3.232588529586792, |
|
"logps/chosen": -0.5533393621444702, |
|
"logps/rejected": -0.6351491212844849, |
|
"loss": 1.5163, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.5533393621444702, |
|
"rewards/margins": 0.08180973678827286, |
|
"rewards/rejected": -0.6351491212844849, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.5251172136637642, |
|
"grad_norm": 25.884047164441235, |
|
"learning_rate": 3.258354877460875e-07, |
|
"logits/chosen": -3.2459404468536377, |
|
"logits/rejected": -3.2240214347839355, |
|
"logps/chosen": -0.5927519202232361, |
|
"logps/rejected": -0.6273369193077087, |
|
"loss": 1.5018, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.5927519202232361, |
|
"rewards/margins": 0.03458496928215027, |
|
"rewards/rejected": -0.6273369193077087, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.535833891493637, |
|
"grad_norm": 24.128535335441807, |
|
"learning_rate": 3.1461499009868705e-07, |
|
"logits/chosen": -3.229731321334839, |
|
"logits/rejected": -3.2193870544433594, |
|
"logps/chosen": -0.6217538118362427, |
|
"logps/rejected": -0.6148039102554321, |
|
"loss": 1.5303, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.6217538118362427, |
|
"rewards/margins": -0.006949885282665491, |
|
"rewards/rejected": -0.6148039102554321, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.5465505693235098, |
|
"grad_norm": 22.132762654520068, |
|
"learning_rate": 3.033739543936404e-07, |
|
"logits/chosen": -3.251239776611328, |
|
"logits/rejected": -3.251615047454834, |
|
"logps/chosen": -0.5319584608078003, |
|
"logps/rejected": -0.6038156747817993, |
|
"loss": 1.4956, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.5319584608078003, |
|
"rewards/margins": 0.0718572586774826, |
|
"rewards/rejected": -0.6038156747817993, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.5572672471533825, |
|
"grad_norm": 20.467314286083344, |
|
"learning_rate": 2.921281773600424e-07, |
|
"logits/chosen": -3.172785520553589, |
|
"logits/rejected": -3.191011905670166, |
|
"logps/chosen": -0.5374451875686646, |
|
"logps/rejected": -0.6392644047737122, |
|
"loss": 1.5245, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.5374451875686646, |
|
"rewards/margins": 0.10181926190853119, |
|
"rewards/rejected": -0.6392644047737122, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.5679839249832552, |
|
"grad_norm": 21.60647847703085, |
|
"learning_rate": 2.808934623898511e-07, |
|
"logits/chosen": -3.1863551139831543, |
|
"logits/rejected": -3.1833932399749756, |
|
"logps/chosen": -0.5692937970161438, |
|
"logps/rejected": -0.6735215783119202, |
|
"loss": 1.4938, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.5692937970161438, |
|
"rewards/margins": 0.10422778129577637, |
|
"rewards/rejected": -0.6735215783119202, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.578700602813128, |
|
"grad_norm": 25.07152854176611, |
|
"learning_rate": 2.696855973298007e-07, |
|
"logits/chosen": -3.1901869773864746, |
|
"logits/rejected": -3.183385133743286, |
|
"logps/chosen": -0.536370575428009, |
|
"logps/rejected": -0.5907222628593445, |
|
"loss": 1.5085, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.536370575428009, |
|
"rewards/margins": 0.05435168743133545, |
|
"rewards/rejected": -0.5907222628593445, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.5894172806430007, |
|
"grad_norm": 18.910155061928734, |
|
"learning_rate": 2.585203322951589e-07, |
|
"logits/chosen": -3.274017810821533, |
|
"logits/rejected": -3.2836010456085205, |
|
"logps/chosen": -0.4976142942905426, |
|
"logps/rejected": -0.5548876523971558, |
|
"loss": 1.5118, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.4976142942905426, |
|
"rewards/margins": 0.05727345868945122, |
|
"rewards/rejected": -0.5548876523971558, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.6001339584728734, |
|
"grad_norm": 21.74570057366724, |
|
"learning_rate": 2.47413357536509e-07, |
|
"logits/chosen": -3.2025809288024902, |
|
"logits/rejected": -3.1752185821533203, |
|
"logps/chosen": -0.569342851638794, |
|
"logps/rejected": -0.623855471611023, |
|
"loss": 1.5032, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.569342851638794, |
|
"rewards/margins": 0.054512638598680496, |
|
"rewards/rejected": -0.623855471611023, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.6108506363027462, |
|
"grad_norm": 25.233010095872693, |
|
"learning_rate": 2.3638028139065624e-07, |
|
"logits/chosen": -3.230616331100464, |
|
"logits/rejected": -3.2365059852600098, |
|
"logps/chosen": -0.5778087973594666, |
|
"logps/rejected": -0.5905576944351196, |
|
"loss": 1.521, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.5778087973594666, |
|
"rewards/margins": 0.012748857028782368, |
|
"rewards/rejected": -0.5905576944351196, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.6215673141326189, |
|
"grad_norm": 52.99096570461333, |
|
"learning_rate": 2.2543660834664724e-07, |
|
"logits/chosen": -3.3016669750213623, |
|
"logits/rejected": -3.28556489944458, |
|
"logps/chosen": -0.5023082494735718, |
|
"logps/rejected": -0.5799704790115356, |
|
"loss": 1.496, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.5023082494735718, |
|
"rewards/margins": 0.07766219973564148, |
|
"rewards/rejected": -0.5799704790115356, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.6322839919624916, |
|
"grad_norm": 22.86414304018135, |
|
"learning_rate": 2.1459771725772267e-07, |
|
"logits/chosen": -3.2204766273498535, |
|
"logits/rejected": -3.243717908859253, |
|
"logps/chosen": -0.5589109063148499, |
|
"logps/rejected": -0.6346697807312012, |
|
"loss": 1.4948, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.5589109063148499, |
|
"rewards/margins": 0.07575888931751251, |
|
"rewards/rejected": -0.6346697807312012, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.6430006697923644, |
|
"grad_norm": 24.245750925813, |
|
"learning_rate": 2.0387883972982259e-07, |
|
"logits/chosen": -3.3650691509246826, |
|
"logits/rejected": -3.367690324783325, |
|
"logps/chosen": -0.5385848879814148, |
|
"logps/rejected": -0.6212387681007385, |
|
"loss": 1.4876, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.5385848879814148, |
|
"rewards/margins": 0.08265385776758194, |
|
"rewards/rejected": -0.6212387681007385, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.6537173476222371, |
|
"grad_norm": 23.847848751632885, |
|
"learning_rate": 1.9329503871701592e-07, |
|
"logits/chosen": -3.309741973876953, |
|
"logits/rejected": -3.281573534011841, |
|
"logps/chosen": -0.5459524989128113, |
|
"logps/rejected": -0.5997665524482727, |
|
"loss": 1.497, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.5459524989128113, |
|
"rewards/margins": 0.05381406098604202, |
|
"rewards/rejected": -0.5997665524482727, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.6644340254521098, |
|
"grad_norm": 27.809403889861738, |
|
"learning_rate": 1.8286118735393015e-07, |
|
"logits/chosen": -3.3402085304260254, |
|
"logits/rejected": -3.3407912254333496, |
|
"logps/chosen": -0.5383692979812622, |
|
"logps/rejected": -0.5971530079841614, |
|
"loss": 1.4859, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.5383692979812622, |
|
"rewards/margins": 0.058783747255802155, |
|
"rewards/rejected": -0.5971530079841614, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.6751507032819826, |
|
"grad_norm": 20.70533413800174, |
|
"learning_rate": 1.7259194805493042e-07, |
|
"logits/chosen": -3.2731971740722656, |
|
"logits/rejected": -3.2823867797851562, |
|
"logps/chosen": -0.5446811318397522, |
|
"logps/rejected": -0.606468915939331, |
|
"loss": 1.4954, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.5446811318397522, |
|
"rewards/margins": 0.06178779527544975, |
|
"rewards/rejected": -0.606468915939331, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.6858673811118553, |
|
"grad_norm": 23.184720457564282, |
|
"learning_rate": 1.6250175190941725e-07, |
|
"logits/chosen": -3.3182265758514404, |
|
"logits/rejected": -3.2908051013946533, |
|
"logps/chosen": -0.5561486482620239, |
|
"logps/rejected": -0.5695281028747559, |
|
"loss": 1.5159, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.5561486482620239, |
|
"rewards/margins": 0.0133795365691185, |
|
"rewards/rejected": -0.5695281028747559, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.696584058941728, |
|
"grad_norm": 27.421134529600376, |
|
"learning_rate": 1.5260477840220057e-07, |
|
"logits/chosen": -3.309216260910034, |
|
"logits/rejected": -3.318588972091675, |
|
"logps/chosen": -0.5043013095855713, |
|
"logps/rejected": -0.5974953770637512, |
|
"loss": 1.4727, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.5043013095855713, |
|
"rewards/margins": 0.09319403767585754, |
|
"rewards/rejected": -0.5974953770637512, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.7073007367716008, |
|
"grad_norm": 24.907218358327214, |
|
"learning_rate": 1.4291493548744542e-07, |
|
"logits/chosen": -3.2981224060058594, |
|
"logits/rejected": -3.269383668899536, |
|
"logps/chosen": -0.5354940295219421, |
|
"logps/rejected": -0.5932218432426453, |
|
"loss": 1.492, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.5354940295219421, |
|
"rewards/margins": 0.05772777646780014, |
|
"rewards/rejected": -0.5932218432426453, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.7180174146014735, |
|
"grad_norm": 40.51550261895159, |
|
"learning_rate": 1.334458400441933e-07, |
|
"logits/chosen": -3.3821797370910645, |
|
"logits/rejected": -3.373931884765625, |
|
"logps/chosen": -0.5368712544441223, |
|
"logps/rejected": -0.6348738670349121, |
|
"loss": 1.5027, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.5368712544441223, |
|
"rewards/margins": 0.09800264984369278, |
|
"rewards/rejected": -0.6348738670349121, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.7287340924313462, |
|
"grad_norm": 25.305322827743687, |
|
"learning_rate": 1.2421079874092336e-07, |
|
"logits/chosen": -3.311006546020508, |
|
"logits/rejected": -3.2969226837158203, |
|
"logps/chosen": -0.5727181434631348, |
|
"logps/rejected": -0.6479278802871704, |
|
"loss": 1.516, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.5727181434631348, |
|
"rewards/margins": 0.07520972192287445, |
|
"rewards/rejected": -0.6479278802871704, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.739450770261219, |
|
"grad_norm": 22.209687488331966, |
|
"learning_rate": 1.1522278933604484e-07, |
|
"logits/chosen": -3.3381361961364746, |
|
"logits/rejected": -3.3484432697296143, |
|
"logps/chosen": -0.5813694596290588, |
|
"logps/rejected": -0.6797500848770142, |
|
"loss": 1.4932, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.5813694596290588, |
|
"rewards/margins": 0.09838052839040756, |
|
"rewards/rejected": -0.6797500848770142, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.7501674480910918, |
|
"grad_norm": 22.960003013518822, |
|
"learning_rate": 1.0649444244059717e-07, |
|
"logits/chosen": -3.32041597366333, |
|
"logits/rejected": -3.3459019660949707, |
|
"logps/chosen": -0.5482354164123535, |
|
"logps/rejected": -0.6205809712409973, |
|
"loss": 1.4932, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.5482354164123535, |
|
"rewards/margins": 0.07234560698270798, |
|
"rewards/rejected": -0.6205809712409973, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.7608841259209645, |
|
"grad_norm": 25.349782143414405, |
|
"learning_rate": 9.803802376878795e-08, |
|
"logits/chosen": -3.3137733936309814, |
|
"logits/rejected": -3.3020172119140625, |
|
"logps/chosen": -0.5893365740776062, |
|
"logps/rejected": -0.6166855096817017, |
|
"loss": 1.4957, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.5893365740776062, |
|
"rewards/margins": 0.02734885737299919, |
|
"rewards/rejected": -0.6166855096817017, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.7716008037508373, |
|
"grad_norm": 22.556111949917664, |
|
"learning_rate": 8.98654169013098e-08, |
|
"logits/chosen": -3.3032424449920654, |
|
"logits/rejected": -3.288992404937744, |
|
"logps/chosen": -0.521614670753479, |
|
"logps/rejected": -0.5875999331474304, |
|
"loss": 1.5004, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.521614670753479, |
|
"rewards/margins": 0.06598522514104843, |
|
"rewards/rejected": -0.5875999331474304, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.78231748158071, |
|
"grad_norm": 23.52734286974739, |
|
"learning_rate": 8.198810658566058e-08, |
|
"logits/chosen": -3.3537094593048096, |
|
"logits/rejected": -3.348142147064209, |
|
"logps/chosen": -0.5639868974685669, |
|
"logps/rejected": -0.6276763677597046, |
|
"loss": 1.5179, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.5639868974685669, |
|
"rewards/margins": 0.06368952244520187, |
|
"rewards/rejected": -0.6276763677597046, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.7930341594105828, |
|
"grad_norm": 35.138524177158146, |
|
"learning_rate": 7.441716259693182e-08, |
|
"logits/chosen": -3.3430557250976562, |
|
"logits/rejected": -3.365880250930786, |
|
"logps/chosen": -0.5710283517837524, |
|
"logps/rejected": -0.6515873670578003, |
|
"loss": 1.5075, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.5710283517837524, |
|
"rewards/margins": 0.08055897057056427, |
|
"rewards/rejected": -0.6515873670578003, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.8037508372404555, |
|
"grad_norm": 23.46216038728244, |
|
"learning_rate": 6.716322418174835e-08, |
|
"logits/chosen": -3.309415102005005, |
|
"logits/rejected": -3.2904000282287598, |
|
"logps/chosen": -0.6181533336639404, |
|
"logps/rejected": -0.7165501713752747, |
|
"loss": 1.4916, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.6181533336639404, |
|
"rewards/margins": 0.09839687496423721, |
|
"rewards/rejected": -0.7165501713752747, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.8144675150703282, |
|
"grad_norm": 26.38220306063447, |
|
"learning_rate": 6.023648510721696e-08, |
|
"logits/chosen": -3.391897678375244, |
|
"logits/rejected": -3.358309268951416, |
|
"logps/chosen": -0.5505380630493164, |
|
"logps/rejected": -0.6369754076004028, |
|
"loss": 1.5034, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.5505380630493164, |
|
"rewards/margins": 0.08643738180398941, |
|
"rewards/rejected": -0.6369754076004028, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.825184192900201, |
|
"grad_norm": 24.084433663112158, |
|
"learning_rate": 5.364667933589596e-08, |
|
"logits/chosen": -3.2913315296173096, |
|
"logits/rejected": -3.3047938346862793, |
|
"logps/chosen": -0.5729898810386658, |
|
"logps/rejected": -0.6550789475440979, |
|
"loss": 1.4915, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.5729898810386658, |
|
"rewards/margins": 0.08208902180194855, |
|
"rewards/rejected": -0.6550789475440979, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.8359008707300737, |
|
"grad_norm": 25.406153364539314, |
|
"learning_rate": 4.74030673469165e-08, |
|
"logits/chosen": -3.3330624103546143, |
|
"logits/rejected": -3.293489933013916, |
|
"logps/chosen": -0.613795280456543, |
|
"logps/rejected": -0.6435045003890991, |
|
"loss": 1.5266, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.613795280456543, |
|
"rewards/margins": 0.029709184542298317, |
|
"rewards/rejected": -0.6435045003890991, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.8466175485599464, |
|
"grad_norm": 24.427651638617984, |
|
"learning_rate": 4.1514423122476606e-08, |
|
"logits/chosen": -3.3390536308288574, |
|
"logits/rejected": -3.3218231201171875, |
|
"logps/chosen": -0.5534718632698059, |
|
"logps/rejected": -0.600857138633728, |
|
"loss": 1.507, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.5534718632698059, |
|
"rewards/margins": 0.04738527163863182, |
|
"rewards/rejected": -0.600857138633728, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.8573342263898192, |
|
"grad_norm": 26.673472503173954, |
|
"learning_rate": 3.598902181799717e-08, |
|
"logits/chosen": -3.298213481903076, |
|
"logits/rejected": -3.2662785053253174, |
|
"logps/chosen": -0.5154682397842407, |
|
"logps/rejected": -0.6383693814277649, |
|
"loss": 1.4828, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.5154682397842407, |
|
"rewards/margins": 0.12290115654468536, |
|
"rewards/rejected": -0.6383693814277649, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.8573342263898192, |
|
"eval_logits/chosen": -3.409546136856079, |
|
"eval_logits/rejected": -3.403578758239746, |
|
"eval_logps/chosen": -0.5692862868309021, |
|
"eval_logps/rejected": -0.6257904171943665, |
|
"eval_loss": 1.5201373100280762, |
|
"eval_rewards/accuracies": 0.5691489577293396, |
|
"eval_rewards/chosen": -0.5692862868309021, |
|
"eval_rewards/margins": 0.05650414153933525, |
|
"eval_rewards/rejected": -0.6257904171943665, |
|
"eval_runtime": 432.4468, |
|
"eval_samples_per_second": 6.923, |
|
"eval_steps_per_second": 0.435, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.8680509042196919, |
|
"grad_norm": 24.460303851950272, |
|
"learning_rate": 3.0834628133265293e-08, |
|
"logits/chosen": -3.308946132659912, |
|
"logits/rejected": -3.293513536453247, |
|
"logps/chosen": -0.5684244632720947, |
|
"logps/rejected": -0.6289744973182678, |
|
"loss": 1.5056, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.5684244632720947, |
|
"rewards/margins": 0.06054999679327011, |
|
"rewards/rejected": -0.6289744973182678, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.8787675820495646, |
|
"grad_norm": 22.9693279472293, |
|
"learning_rate": 2.6058485400908248e-08, |
|
"logits/chosen": -3.358743190765381, |
|
"logits/rejected": -3.3271114826202393, |
|
"logps/chosen": -0.5511162877082825, |
|
"logps/rejected": -0.5816215872764587, |
|
"loss": 1.4873, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.5511162877082825, |
|
"rewards/margins": 0.030505258589982986, |
|
"rewards/rejected": -0.5816215872764587, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.8894842598794374, |
|
"grad_norm": 25.421672297355457, |
|
"learning_rate": 2.1667305407530255e-08, |
|
"logits/chosen": -3.2762393951416016, |
|
"logits/rejected": -3.2448742389678955, |
|
"logps/chosen": -0.548682689666748, |
|
"logps/rejected": -0.6257365942001343, |
|
"loss": 1.4819, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.548682689666748, |
|
"rewards/margins": 0.07705400884151459, |
|
"rewards/rejected": -0.6257365942001343, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.9002009377093101, |
|
"grad_norm": 27.04902626819421, |
|
"learning_rate": 1.7667258961816723e-08, |
|
"logits/chosen": -3.2720954418182373, |
|
"logits/rejected": -3.2802345752716064, |
|
"logps/chosen": -0.5331937074661255, |
|
"logps/rejected": -0.573731541633606, |
|
"loss": 1.5172, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.5331937074661255, |
|
"rewards/margins": 0.04053787142038345, |
|
"rewards/rejected": -0.573731541633606, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.9109176155391828, |
|
"grad_norm": 27.138507466839215, |
|
"learning_rate": 1.4063967222860872e-08, |
|
"logits/chosen": -3.280641555786133, |
|
"logits/rejected": -3.268662214279175, |
|
"logps/chosen": -0.5173559188842773, |
|
"logps/rejected": -0.6271434426307678, |
|
"loss": 1.4903, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.5173559188842773, |
|
"rewards/margins": 0.10978756844997406, |
|
"rewards/rejected": -0.6271434426307678, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.9216342933690556, |
|
"grad_norm": 36.01846846843038, |
|
"learning_rate": 1.086249380089782e-08, |
|
"logits/chosen": -3.33141827583313, |
|
"logits/rejected": -3.3434956073760986, |
|
"logps/chosen": -0.615702748298645, |
|
"logps/rejected": -0.7053866386413574, |
|
"loss": 1.4914, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.615702748298645, |
|
"rewards/margins": 0.0896839126944542, |
|
"rewards/rejected": -0.7053866386413574, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.9323509711989283, |
|
"grad_norm": 29.36505146378418, |
|
"learning_rate": 8.067337641547777e-09, |
|
"logits/chosen": -3.3964333534240723, |
|
"logits/rejected": -3.4042282104492188, |
|
"logps/chosen": -0.5159146189689636, |
|
"logps/rejected": -0.6444130539894104, |
|
"loss": 1.4866, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.5159146189689636, |
|
"rewards/margins": 0.12849843502044678, |
|
"rewards/rejected": -0.6444130539894104, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.943067649028801, |
|
"grad_norm": 23.999063503219116, |
|
"learning_rate": 5.682426703567034e-09, |
|
"logits/chosen": -3.2346031665802, |
|
"logits/rejected": -3.2309417724609375, |
|
"logps/chosen": -0.52639240026474, |
|
"logps/rejected": -0.6208442449569702, |
|
"loss": 1.4676, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.52639240026474, |
|
"rewards/margins": 0.09445185959339142, |
|
"rewards/rejected": -0.6208442449569702, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.9537843268586738, |
|
"grad_norm": 28.590598764799935, |
|
"learning_rate": 3.7111124389918146e-09, |
|
"logits/chosen": -3.2654852867126465, |
|
"logits/rejected": -3.264702558517456, |
|
"logps/chosen": -0.5419159531593323, |
|
"logps/rejected": -0.6305166482925415, |
|
"loss": 1.4876, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.5419159531593323, |
|
"rewards/margins": 0.08860062062740326, |
|
"rewards/rejected": -0.6305166482925415, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.9645010046885466, |
|
"grad_norm": 23.071429282207497, |
|
"learning_rate": 2.156165083431627e-09, |
|
"logits/chosen": -3.2962241172790527, |
|
"logits/rejected": -3.283967971801758, |
|
"logps/chosen": -0.5432751774787903, |
|
"logps/rejected": -0.6289895176887512, |
|
"loss": 1.4804, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.5432751774787903, |
|
"rewards/margins": 0.08571438491344452, |
|
"rewards/rejected": -0.6289895176887512, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.9752176825184193, |
|
"grad_norm": 25.76239166835448, |
|
"learning_rate": 1.019769763130851e-09, |
|
"logits/chosen": -3.2721996307373047, |
|
"logits/rejected": -3.271106243133545, |
|
"logps/chosen": -0.5389679670333862, |
|
"logps/rejected": -0.6317923665046692, |
|
"loss": 1.4855, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.5389679670333862, |
|
"rewards/margins": 0.09282433986663818, |
|
"rewards/rejected": -0.6317923665046692, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.9859343603482921, |
|
"grad_norm": 29.67085990506561, |
|
"learning_rate": 3.0352342426868125e-10, |
|
"logits/chosen": -3.287473678588867, |
|
"logits/rejected": -3.29301381111145, |
|
"logps/chosen": -0.5480049252510071, |
|
"logps/rejected": -0.6534655690193176, |
|
"loss": 1.4972, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.5480049252510071, |
|
"rewards/margins": 0.10546054691076279, |
|
"rewards/rejected": -0.6534655690193176, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.9966510381781648, |
|
"grad_norm": 29.944439651933386, |
|
"learning_rate": 8.432588813089836e-12, |
|
"logits/chosen": -3.3211536407470703, |
|
"logits/rejected": -3.304069995880127, |
|
"logps/chosen": -0.583086371421814, |
|
"logps/rejected": -0.6369927525520325, |
|
"loss": 1.5004, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.583086371421814, |
|
"rewards/margins": 0.05390629172325134, |
|
"rewards/rejected": -0.6369927525520325, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.9987943737441393, |
|
"step": 466, |
|
"total_flos": 0.0, |
|
"train_loss": 1.5151263257976253, |
|
"train_runtime": 19305.9847, |
|
"train_samples_per_second": 3.093, |
|
"train_steps_per_second": 0.024 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 466, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 1000000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|