|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.998691442030882, |
|
"eval_steps": 500, |
|
"global_step": 477, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.010468463752944255, |
|
"grad_norm": 11.782889401902718, |
|
"learning_rate": 6.25e-08, |
|
"logits/chosen": -1.444485068321228, |
|
"logits/rejected": -1.4456722736358643, |
|
"logps/chosen": -7.9825921058654785, |
|
"logps/rejected": -8.156225204467773, |
|
"loss": 8.9796, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -7.9825921058654785, |
|
"rewards/margins": 0.17363198101520538, |
|
"rewards/rejected": -8.156225204467773, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.02093692750588851, |
|
"grad_norm": 9.749361718413306, |
|
"learning_rate": 1.25e-07, |
|
"logits/chosen": -1.447454810142517, |
|
"logits/rejected": -1.4387584924697876, |
|
"logps/chosen": -8.047009468078613, |
|
"logps/rejected": -7.960066795349121, |
|
"loss": 8.9813, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -8.047009468078613, |
|
"rewards/margins": -0.08694207668304443, |
|
"rewards/rejected": -7.960066795349121, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.031405391258832765, |
|
"grad_norm": 16.533988717004068, |
|
"learning_rate": 1.875e-07, |
|
"logits/chosen": -1.4474663734436035, |
|
"logits/rejected": -1.4442191123962402, |
|
"logps/chosen": -7.851595401763916, |
|
"logps/rejected": -7.866987705230713, |
|
"loss": 8.8899, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -7.851595401763916, |
|
"rewards/margins": 0.01539215724915266, |
|
"rewards/rejected": -7.866987705230713, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.04187385501177702, |
|
"grad_norm": 13.917496227050558, |
|
"learning_rate": 2.5e-07, |
|
"logits/chosen": -1.440216064453125, |
|
"logits/rejected": -1.4452197551727295, |
|
"logps/chosen": -8.178640365600586, |
|
"logps/rejected": -8.201952934265137, |
|
"loss": 9.0475, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -8.178640365600586, |
|
"rewards/margins": 0.023312047123908997, |
|
"rewards/rejected": -8.201952934265137, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.05234231876472128, |
|
"grad_norm": 13.358220692601913, |
|
"learning_rate": 3.125e-07, |
|
"logits/chosen": -1.474110722541809, |
|
"logits/rejected": -1.463666558265686, |
|
"logps/chosen": -8.079231262207031, |
|
"logps/rejected": -7.98193883895874, |
|
"loss": 9.124, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -8.079231262207031, |
|
"rewards/margins": -0.09729210287332535, |
|
"rewards/rejected": -7.98193883895874, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.06281078251766553, |
|
"grad_norm": 11.375823739582524, |
|
"learning_rate": 3.75e-07, |
|
"logits/chosen": -1.4473092555999756, |
|
"logits/rejected": -1.4344959259033203, |
|
"logps/chosen": -7.780773162841797, |
|
"logps/rejected": -7.703455448150635, |
|
"loss": 9.0197, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -7.780773162841797, |
|
"rewards/margins": -0.07731723040342331, |
|
"rewards/rejected": -7.703455448150635, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.07327924627060979, |
|
"grad_norm": 10.213017154182484, |
|
"learning_rate": 4.3749999999999994e-07, |
|
"logits/chosen": -1.4583995342254639, |
|
"logits/rejected": -1.431770920753479, |
|
"logps/chosen": -8.027624130249023, |
|
"logps/rejected": -7.8937225341796875, |
|
"loss": 8.9843, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -8.027624130249023, |
|
"rewards/margins": -0.13390299677848816, |
|
"rewards/rejected": -7.8937225341796875, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.08374771002355404, |
|
"grad_norm": 10.12652288345569, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": -1.4447615146636963, |
|
"logits/rejected": -1.458698034286499, |
|
"logps/chosen": -7.983005523681641, |
|
"logps/rejected": -8.174285888671875, |
|
"loss": 9.0094, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -7.983005523681641, |
|
"rewards/margins": 0.19128072261810303, |
|
"rewards/rejected": -8.174285888671875, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.0942161737764983, |
|
"grad_norm": 10.9885005835532, |
|
"learning_rate": 5.625e-07, |
|
"logits/chosen": -1.4630662202835083, |
|
"logits/rejected": -1.4628698825836182, |
|
"logps/chosen": -8.03730583190918, |
|
"logps/rejected": -7.831875801086426, |
|
"loss": 8.9878, |
|
"rewards/accuracies": 0.45625001192092896, |
|
"rewards/chosen": -8.03730583190918, |
|
"rewards/margins": -0.20542971789836884, |
|
"rewards/rejected": -7.831875801086426, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.10468463752944256, |
|
"grad_norm": 13.872196323961617, |
|
"learning_rate": 5.999678242522831e-07, |
|
"logits/chosen": -1.4442825317382812, |
|
"logits/rejected": -1.4613512754440308, |
|
"logps/chosen": -8.217935562133789, |
|
"logps/rejected": -8.252190589904785, |
|
"loss": 9.0757, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -8.217935562133789, |
|
"rewards/margins": 0.03425510972738266, |
|
"rewards/rejected": -8.252190589904785, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.11515310128238682, |
|
"grad_norm": 10.905494395813982, |
|
"learning_rate": 5.996059263493219e-07, |
|
"logits/chosen": -1.4492484331130981, |
|
"logits/rejected": -1.4467532634735107, |
|
"logps/chosen": -8.046092987060547, |
|
"logps/rejected": -8.062843322753906, |
|
"loss": 9.1036, |
|
"rewards/accuracies": 0.4312500059604645, |
|
"rewards/chosen": -8.046092987060547, |
|
"rewards/margins": 0.01675090566277504, |
|
"rewards/rejected": -8.062843322753906, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.12562156503533106, |
|
"grad_norm": 15.995330684554988, |
|
"learning_rate": 5.988423976115163e-07, |
|
"logits/chosen": -1.443290114402771, |
|
"logits/rejected": -1.4562170505523682, |
|
"logps/chosen": -8.026491165161133, |
|
"logps/rejected": -8.317246437072754, |
|
"loss": 8.9008, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -8.026491165161133, |
|
"rewards/margins": 0.29075488448143005, |
|
"rewards/rejected": -8.317246437072754, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.1360900287882753, |
|
"grad_norm": 24.861886587620123, |
|
"learning_rate": 5.976782615723061e-07, |
|
"logits/chosen": -1.392534613609314, |
|
"logits/rejected": -1.4108682870864868, |
|
"logps/chosen": -7.828791618347168, |
|
"logps/rejected": -8.337072372436523, |
|
"loss": 8.934, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -7.828791618347168, |
|
"rewards/margins": 0.5082817673683167, |
|
"rewards/rejected": -8.337072372436523, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.14655849254121958, |
|
"grad_norm": 35.209412870115344, |
|
"learning_rate": 5.961150787913738e-07, |
|
"logits/chosen": -1.39071524143219, |
|
"logits/rejected": -1.3853540420532227, |
|
"logps/chosen": -7.945198059082031, |
|
"logps/rejected": -8.038311004638672, |
|
"loss": 8.9653, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -7.945198059082031, |
|
"rewards/margins": 0.0931134819984436, |
|
"rewards/rejected": -8.038311004638672, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.15702695629416383, |
|
"grad_norm": 12.413941901156766, |
|
"learning_rate": 5.941549447626671e-07, |
|
"logits/chosen": -1.3913167715072632, |
|
"logits/rejected": -1.3984179496765137, |
|
"logps/chosen": -7.823273658752441, |
|
"logps/rejected": -7.864768981933594, |
|
"loss": 8.9142, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -7.823273658752441, |
|
"rewards/margins": 0.04149458184838295, |
|
"rewards/rejected": -7.864768981933594, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.16749542004710807, |
|
"grad_norm": 21.221667512587725, |
|
"learning_rate": 5.918004871053251e-07, |
|
"logits/chosen": -1.3923091888427734, |
|
"logits/rejected": -1.4085341691970825, |
|
"logps/chosen": -7.852835178375244, |
|
"logps/rejected": -7.9230217933654785, |
|
"loss": 8.9088, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -7.852835178375244, |
|
"rewards/margins": 0.07018764317035675, |
|
"rewards/rejected": -7.9230217933654785, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.17796388380005235, |
|
"grad_norm": 12.603711372215182, |
|
"learning_rate": 5.890548620412763e-07, |
|
"logits/chosen": -1.4011937379837036, |
|
"logits/rejected": -1.39864182472229, |
|
"logps/chosen": -7.970945835113525, |
|
"logps/rejected": -8.160429000854492, |
|
"loss": 9.0488, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -7.970945835113525, |
|
"rewards/margins": 0.18948234617710114, |
|
"rewards/rejected": -8.160429000854492, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.1884323475529966, |
|
"grad_norm": 13.164098047063113, |
|
"learning_rate": 5.859217501642258e-07, |
|
"logits/chosen": -1.375800371170044, |
|
"logits/rejected": -1.389070749282837, |
|
"logps/chosen": -7.946028232574463, |
|
"logps/rejected": -8.130967140197754, |
|
"loss": 9.0141, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -7.946028232574463, |
|
"rewards/margins": 0.18493881821632385, |
|
"rewards/rejected": -8.130967140197754, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.19890081130594087, |
|
"grad_norm": 11.129043830781203, |
|
"learning_rate": 5.824053515057091e-07, |
|
"logits/chosen": -1.384723424911499, |
|
"logits/rejected": -1.3767420053482056, |
|
"logps/chosen": -8.055198669433594, |
|
"logps/rejected": -7.921385288238525, |
|
"loss": 9.0835, |
|
"rewards/accuracies": 0.4124999940395355, |
|
"rewards/chosen": -8.055198669433594, |
|
"rewards/margins": -0.13381320238113403, |
|
"rewards/rejected": -7.921385288238525, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.2093692750588851, |
|
"grad_norm": 19.959846628166616, |
|
"learning_rate": 5.785103799048218e-07, |
|
"logits/chosen": -1.4132357835769653, |
|
"logits/rejected": -1.418881893157959, |
|
"logps/chosen": -8.033044815063477, |
|
"logps/rejected": -8.07997989654541, |
|
"loss": 9.0153, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -8.033044815063477, |
|
"rewards/margins": 0.04693456366658211, |
|
"rewards/rejected": -8.07997989654541, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.21983773881182936, |
|
"grad_norm": 12.843923135972, |
|
"learning_rate": 5.742420566891749e-07, |
|
"logits/chosen": -1.413010835647583, |
|
"logits/rejected": -1.4074172973632812, |
|
"logps/chosen": -7.718166351318359, |
|
"logps/rejected": -7.9243879318237305, |
|
"loss": 8.9445, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -7.718166351318359, |
|
"rewards/margins": 0.206221342086792, |
|
"rewards/rejected": -7.9243879318237305, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.23030620256477363, |
|
"grad_norm": 13.825481188162163, |
|
"learning_rate": 5.696061036755478e-07, |
|
"logits/chosen": -1.4453760385513306, |
|
"logits/rejected": -1.4452683925628662, |
|
"logps/chosen": -7.982637882232666, |
|
"logps/rejected": -8.220747947692871, |
|
"loss": 9.0144, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -7.982637882232666, |
|
"rewards/margins": 0.23810970783233643, |
|
"rewards/rejected": -8.220747947692871, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.24077466631771788, |
|
"grad_norm": 1525.1356967991103, |
|
"learning_rate": 5.64608735499618e-07, |
|
"logits/chosen": -1.3860673904418945, |
|
"logits/rejected": -1.3894257545471191, |
|
"logps/chosen": -7.8776044845581055, |
|
"logps/rejected": -8.189804077148438, |
|
"loss": 8.9598, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -7.8776044845581055, |
|
"rewards/margins": 0.31219929456710815, |
|
"rewards/rejected": -8.189804077148438, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.2512431300706621, |
|
"grad_norm": 13.913132246096096, |
|
"learning_rate": 5.592566512850545e-07, |
|
"logits/chosen": -1.3590507507324219, |
|
"logits/rejected": -1.3622348308563232, |
|
"logps/chosen": -8.100934982299805, |
|
"logps/rejected": -8.155590057373047, |
|
"loss": 8.9501, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -8.100934982299805, |
|
"rewards/margins": 0.054654598236083984, |
|
"rewards/rejected": -8.155590057373047, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.26171159382360637, |
|
"grad_norm": 14.57715484351377, |
|
"learning_rate": 5.535570256631384e-07, |
|
"logits/chosen": -1.4173157215118408, |
|
"logits/rejected": -1.411921739578247, |
|
"logps/chosen": -8.191034317016602, |
|
"logps/rejected": -8.077339172363281, |
|
"loss": 9.0651, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -8.191034317016602, |
|
"rewards/margins": -0.11369502544403076, |
|
"rewards/rejected": -8.077339172363281, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.2721800575765506, |
|
"grad_norm": 14.54742842440625, |
|
"learning_rate": 5.475174991549528e-07, |
|
"logits/chosen": -1.37632417678833, |
|
"logits/rejected": -1.3858749866485596, |
|
"logps/chosen": -8.046875953674316, |
|
"logps/rejected": -8.172870635986328, |
|
"loss": 8.9777, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -8.046875953674316, |
|
"rewards/margins": 0.12599456310272217, |
|
"rewards/rejected": -8.172870635986328, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.2826485213294949, |
|
"grad_norm": 18.256408848890032, |
|
"learning_rate": 5.411461679290317e-07, |
|
"logits/chosen": -1.3864247798919678, |
|
"logits/rejected": -1.4004995822906494, |
|
"logps/chosen": -7.979268550872803, |
|
"logps/rejected": -8.406595230102539, |
|
"loss": 8.9672, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -7.979268550872803, |
|
"rewards/margins": 0.4273262023925781, |
|
"rewards/rejected": -8.406595230102539, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.29311698508243916, |
|
"grad_norm": 14.27521931097187, |
|
"learning_rate": 5.34451572948201e-07, |
|
"logits/chosen": -1.4093233346939087, |
|
"logits/rejected": -1.4172067642211914, |
|
"logps/chosen": -7.903810977935791, |
|
"logps/rejected": -7.975949287414551, |
|
"loss": 8.9533, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -7.903810977935791, |
|
"rewards/margins": 0.07213909924030304, |
|
"rewards/rejected": -7.975949287414551, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.3035854488353834, |
|
"grad_norm": 10.523105376926537, |
|
"learning_rate": 5.274426885201582e-07, |
|
"logits/chosen": -1.4147297143936157, |
|
"logits/rejected": -1.4396823644638062, |
|
"logps/chosen": -7.8977460861206055, |
|
"logps/rejected": -8.05931568145752, |
|
"loss": 8.915, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -7.8977460861206055, |
|
"rewards/margins": 0.16156847774982452, |
|
"rewards/rejected": -8.05931568145752, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.31405391258832765, |
|
"grad_norm": 14.122907500033074, |
|
"learning_rate": 5.201289102671411e-07, |
|
"logits/chosen": -1.4332246780395508, |
|
"logits/rejected": -1.436842679977417, |
|
"logps/chosen": -7.895875453948975, |
|
"logps/rejected": -8.0299072265625, |
|
"loss": 8.9785, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -7.895875453948975, |
|
"rewards/margins": 0.13403132557868958, |
|
"rewards/rejected": -8.0299072265625, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.3245223763412719, |
|
"grad_norm": 12.92310774863363, |
|
"learning_rate": 5.12520042530811e-07, |
|
"logits/chosen": -1.402719259262085, |
|
"logits/rejected": -1.3787992000579834, |
|
"logps/chosen": -7.979246616363525, |
|
"logps/rejected": -7.966032981872559, |
|
"loss": 9.0256, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -7.979246616363525, |
|
"rewards/margins": -0.013212683610618114, |
|
"rewards/rejected": -7.966032981872559, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.33499084009421615, |
|
"grad_norm": 15.237628673130487, |
|
"learning_rate": 5.046262852292346e-07, |
|
"logits/chosen": -1.3872135877609253, |
|
"logits/rejected": -1.395935297012329, |
|
"logps/chosen": -8.034635543823242, |
|
"logps/rejected": -8.069303512573242, |
|
"loss": 9.0268, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -8.034635543823242, |
|
"rewards/margins": 0.03466759994626045, |
|
"rewards/rejected": -8.069303512573242, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.34545930384716045, |
|
"grad_norm": 11.298592435998462, |
|
"learning_rate": 4.964582201835856e-07, |
|
"logits/chosen": -1.396750569343567, |
|
"logits/rejected": -1.3891570568084717, |
|
"logps/chosen": -7.99398946762085, |
|
"logps/rejected": -8.040716171264648, |
|
"loss": 9.0073, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -7.99398946762085, |
|
"rewards/margins": 0.04672648385167122, |
|
"rewards/rejected": -8.040716171264648, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.3559277676001047, |
|
"grad_norm": 12.492415372530475, |
|
"learning_rate": 4.880267969328908e-07, |
|
"logits/chosen": -1.3683674335479736, |
|
"logits/rejected": -1.3726252317428589, |
|
"logps/chosen": -8.114925384521484, |
|
"logps/rejected": -8.097586631774902, |
|
"loss": 9.0856, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -8.114925384521484, |
|
"rewards/margins": -0.01733933761715889, |
|
"rewards/rejected": -8.097586631774902, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.36639623135304894, |
|
"grad_norm": 12.610496367889976, |
|
"learning_rate": 4.793433180558423e-07, |
|
"logits/chosen": -1.3843915462493896, |
|
"logits/rejected": -1.3853034973144531, |
|
"logps/chosen": -7.956766605377197, |
|
"logps/rejected": -7.944356441497803, |
|
"loss": 9.0054, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -7.956766605377197, |
|
"rewards/margins": -0.012410154566168785, |
|
"rewards/rejected": -7.944356441497803, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.3768646951059932, |
|
"grad_norm": 15.598692092405715, |
|
"learning_rate": 4.704194240193467e-07, |
|
"logits/chosen": -1.3554438352584839, |
|
"logits/rejected": -1.372804880142212, |
|
"logps/chosen": -8.031749725341797, |
|
"logps/rejected": -8.155205726623535, |
|
"loss": 8.9878, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -8.031749725341797, |
|
"rewards/margins": 0.12345610558986664, |
|
"rewards/rejected": -8.155205726623535, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.38733315885893743, |
|
"grad_norm": 13.474501957199323, |
|
"learning_rate": 4.6126707757412686e-07, |
|
"logits/chosen": -1.3345744609832764, |
|
"logits/rejected": -1.3397581577301025, |
|
"logps/chosen": -7.977494716644287, |
|
"logps/rejected": -8.02932357788086, |
|
"loss": 8.9482, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -7.977494716644287, |
|
"rewards/margins": 0.051828037947416306, |
|
"rewards/rejected": -8.02932357788086, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.39780162261188173, |
|
"grad_norm": 11.929724403265839, |
|
"learning_rate": 4.5189854771829086e-07, |
|
"logits/chosen": -1.3528499603271484, |
|
"logits/rejected": -1.3492704629898071, |
|
"logps/chosen": -7.803788661956787, |
|
"logps/rejected": -7.93734073638916, |
|
"loss": 8.9516, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -7.803788661956787, |
|
"rewards/margins": 0.1335521936416626, |
|
"rewards/rejected": -7.93734073638916, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.408270086364826, |
|
"grad_norm": 14.327437395286285, |
|
"learning_rate": 4.4232639325036807e-07, |
|
"logits/chosen": -1.3263393640518188, |
|
"logits/rejected": -1.3331449031829834, |
|
"logps/chosen": -8.183530807495117, |
|
"logps/rejected": -8.074382781982422, |
|
"loss": 9.054, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -8.183530807495117, |
|
"rewards/margins": -0.10914800316095352, |
|
"rewards/rejected": -8.074382781982422, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.4187385501177702, |
|
"grad_norm": 12.623357323327125, |
|
"learning_rate": 4.32563445933859e-07, |
|
"logits/chosen": -1.3866218328475952, |
|
"logits/rejected": -1.376103401184082, |
|
"logps/chosen": -7.869284152984619, |
|
"logps/rejected": -7.980343818664551, |
|
"loss": 9.0216, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -7.869284152984619, |
|
"rewards/margins": 0.11105932295322418, |
|
"rewards/rejected": -7.980343818664551, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.42920701387071447, |
|
"grad_norm": 15.673764218634288, |
|
"learning_rate": 4.226227932958664e-07, |
|
"logits/chosen": -1.3467977046966553, |
|
"logits/rejected": -1.3465808629989624, |
|
"logps/chosen": -7.946604251861572, |
|
"logps/rejected": -8.12873363494873, |
|
"loss": 8.9418, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -7.946604251861572, |
|
"rewards/margins": 0.18213000893592834, |
|
"rewards/rejected": -8.12873363494873, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.4396754776236587, |
|
"grad_norm": 20.82547017360473, |
|
"learning_rate": 4.1251776108286854e-07, |
|
"logits/chosen": -1.3276244401931763, |
|
"logits/rejected": -1.3366806507110596, |
|
"logps/chosen": -7.942746639251709, |
|
"logps/rejected": -8.075704574584961, |
|
"loss": 8.992, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -7.942746639251709, |
|
"rewards/margins": 0.13295890390872955, |
|
"rewards/rejected": -8.075704574584961, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.45014394137660296, |
|
"grad_norm": 11.77567830972404, |
|
"learning_rate": 4.022618953971514e-07, |
|
"logits/chosen": -1.3542811870574951, |
|
"logits/rejected": -1.3621467351913452, |
|
"logps/chosen": -7.741019248962402, |
|
"logps/rejected": -8.169224739074707, |
|
"loss": 8.9028, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -7.741019248962402, |
|
"rewards/margins": 0.42820531129837036, |
|
"rewards/rejected": -8.169224739074707, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.46061240512954726, |
|
"grad_norm": 13.792748846310712, |
|
"learning_rate": 3.918689445378477e-07, |
|
"logits/chosen": -1.3647044897079468, |
|
"logits/rejected": -1.3888493776321411, |
|
"logps/chosen": -7.679605960845947, |
|
"logps/rejected": -7.820864677429199, |
|
"loss": 9.0059, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -7.679605960845947, |
|
"rewards/margins": 0.14125962555408478, |
|
"rewards/rejected": -7.820864677429199, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.4710808688824915, |
|
"grad_norm": 10.698537268464346, |
|
"learning_rate": 3.813528405709251e-07, |
|
"logits/chosen": -1.3668994903564453, |
|
"logits/rejected": -1.370476484298706, |
|
"logps/chosen": -7.723212242126465, |
|
"logps/rejected": -7.974145412445068, |
|
"loss": 8.9131, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -7.723212242126465, |
|
"rewards/margins": 0.25093379616737366, |
|
"rewards/rejected": -7.974145412445068, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.48154933263543576, |
|
"grad_norm": 12.476277662413903, |
|
"learning_rate": 3.707276806528282e-07, |
|
"logits/chosen": -1.37067449092865, |
|
"logits/rejected": -1.3700437545776367, |
|
"logps/chosen": -8.093690872192383, |
|
"logps/rejected": -8.251599311828613, |
|
"loss": 9.068, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -8.093690872192383, |
|
"rewards/margins": 0.157908633351326, |
|
"rewards/rejected": -8.251599311828613, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.49201779638838, |
|
"grad_norm": 12.703214615987921, |
|
"learning_rate": 3.6000770813281334e-07, |
|
"logits/chosen": -1.3918092250823975, |
|
"logits/rejected": -1.3941457271575928, |
|
"logps/chosen": -7.891854286193848, |
|
"logps/rejected": -8.121790885925293, |
|
"loss": 8.9911, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -7.891854286193848, |
|
"rewards/margins": 0.22993668913841248, |
|
"rewards/rejected": -8.121790885925293, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.5024862601413242, |
|
"grad_norm": 18.912862114031174, |
|
"learning_rate": 3.4920729345930654e-07, |
|
"logits/chosen": -1.3598334789276123, |
|
"logits/rejected": -1.3656227588653564, |
|
"logps/chosen": -7.972811698913574, |
|
"logps/rejected": -8.120051383972168, |
|
"loss": 9.0708, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -7.972811698913574, |
|
"rewards/margins": 0.14723989367485046, |
|
"rewards/rejected": -8.120051383972168, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.5129547238942685, |
|
"grad_norm": 15.322600609417346, |
|
"learning_rate": 3.383409149158814e-07, |
|
"logits/chosen": -1.3441493511199951, |
|
"logits/rejected": -1.3492319583892822, |
|
"logps/chosen": -8.092975616455078, |
|
"logps/rejected": -8.160036087036133, |
|
"loss": 8.9194, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -8.092975616455078, |
|
"rewards/margins": 0.06706006824970245, |
|
"rewards/rejected": -8.160036087036133, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.5234231876472127, |
|
"grad_norm": 16.724538535729355, |
|
"learning_rate": 3.2742313921268035e-07, |
|
"logits/chosen": -1.3152296543121338, |
|
"logits/rejected": -1.3239524364471436, |
|
"logps/chosen": -7.889418601989746, |
|
"logps/rejected": -8.20849323272705, |
|
"loss": 8.8184, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -7.889418601989746, |
|
"rewards/margins": 0.31907448172569275, |
|
"rewards/rejected": -8.20849323272705, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.533891651400157, |
|
"grad_norm": 12.327867536896116, |
|
"learning_rate": 3.1646860195929825e-07, |
|
"logits/chosen": -1.3065917491912842, |
|
"logits/rejected": -1.3107439279556274, |
|
"logps/chosen": -8.116486549377441, |
|
"logps/rejected": -8.308655738830566, |
|
"loss": 8.9949, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -8.116486549377441, |
|
"rewards/margins": 0.19216908514499664, |
|
"rewards/rejected": -8.308655738830566, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.5443601151531012, |
|
"grad_norm": 14.17754725379555, |
|
"learning_rate": 3.054919880453032e-07, |
|
"logits/chosen": -1.246124029159546, |
|
"logits/rejected": -1.2508999109268188, |
|
"logps/chosen": -7.7648186683654785, |
|
"logps/rejected": -8.22431755065918, |
|
"loss": 8.941, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -7.7648186683654785, |
|
"rewards/margins": 0.4594977796077728, |
|
"rewards/rejected": -8.22431755065918, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.5548285789060455, |
|
"grad_norm": 11.969966746660198, |
|
"learning_rate": 2.9450801195469686e-07, |
|
"logits/chosen": -1.3018732070922852, |
|
"logits/rejected": -1.3149497509002686, |
|
"logps/chosen": -7.904818058013916, |
|
"logps/rejected": -8.152360916137695, |
|
"loss": 8.9657, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -7.904818058013916, |
|
"rewards/margins": 0.24754443764686584, |
|
"rewards/rejected": -8.152360916137695, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.5652970426589898, |
|
"grad_norm": 14.47186665684816, |
|
"learning_rate": 2.835313980407017e-07, |
|
"logits/chosen": -1.3108150959014893, |
|
"logits/rejected": -1.288703441619873, |
|
"logps/chosen": -8.249927520751953, |
|
"logps/rejected": -8.318041801452637, |
|
"loss": 9.0073, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -8.249927520751953, |
|
"rewards/margins": 0.06811434030532837, |
|
"rewards/rejected": -8.318041801452637, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.575765506411934, |
|
"grad_norm": 26.602745593974163, |
|
"learning_rate": 2.7257686078731973e-07, |
|
"logits/chosen": -1.337909460067749, |
|
"logits/rejected": -1.348547339439392, |
|
"logps/chosen": -7.881032466888428, |
|
"logps/rejected": -8.068848609924316, |
|
"loss": 8.8981, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -7.881032466888428, |
|
"rewards/margins": 0.18781575560569763, |
|
"rewards/rejected": -8.068848609924316, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.5862339701648783, |
|
"grad_norm": 14.906273538361356, |
|
"learning_rate": 2.6165908508411857e-07, |
|
"logits/chosen": -1.3503994941711426, |
|
"logits/rejected": -1.3676143884658813, |
|
"logps/chosen": -7.861943244934082, |
|
"logps/rejected": -8.101309776306152, |
|
"loss": 8.9213, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -7.861943244934082, |
|
"rewards/margins": 0.23936741054058075, |
|
"rewards/rejected": -8.101309776306152, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.5967024339178225, |
|
"grad_norm": 14.643252229490672, |
|
"learning_rate": 2.5079270654069354e-07, |
|
"logits/chosen": -1.3024542331695557, |
|
"logits/rejected": -1.3081843852996826, |
|
"logps/chosen": -7.836719512939453, |
|
"logps/rejected": -8.08849048614502, |
|
"loss": 8.8721, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -7.836719512939453, |
|
"rewards/margins": 0.251770943403244, |
|
"rewards/rejected": -8.08849048614502, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.6071708976707668, |
|
"grad_norm": 12.350106404715637, |
|
"learning_rate": 2.399922918671867e-07, |
|
"logits/chosen": -1.337571620941162, |
|
"logits/rejected": -1.3552089929580688, |
|
"logps/chosen": -7.821458339691162, |
|
"logps/rejected": -8.146204948425293, |
|
"loss": 8.9032, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -7.821458339691162, |
|
"rewards/margins": 0.3247470557689667, |
|
"rewards/rejected": -8.146204948425293, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.6176393614237111, |
|
"grad_norm": 13.544262102627407, |
|
"learning_rate": 2.2927231934717176e-07, |
|
"logits/chosen": -1.331067442893982, |
|
"logits/rejected": -1.3430246114730835, |
|
"logps/chosen": -7.9300737380981445, |
|
"logps/rejected": -8.060845375061035, |
|
"loss": 8.9735, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -7.9300737380981445, |
|
"rewards/margins": 0.1307719349861145, |
|
"rewards/rejected": -8.060845375061035, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.6281078251766553, |
|
"grad_norm": 12.721635836612304, |
|
"learning_rate": 2.1864715942907487e-07, |
|
"logits/chosen": -1.299328088760376, |
|
"logits/rejected": -1.3065472841262817, |
|
"logps/chosen": -7.961094856262207, |
|
"logps/rejected": -8.206907272338867, |
|
"loss": 8.9027, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -7.961094856262207, |
|
"rewards/margins": 0.24581179022789001, |
|
"rewards/rejected": -8.206907272338867, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.6385762889295996, |
|
"grad_norm": 12.545308362098348, |
|
"learning_rate": 2.081310554621522e-07, |
|
"logits/chosen": -1.3111393451690674, |
|
"logits/rejected": -1.335069179534912, |
|
"logps/chosen": -8.182366371154785, |
|
"logps/rejected": -8.390935897827148, |
|
"loss": 9.0314, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -8.182366371154785, |
|
"rewards/margins": 0.20856896042823792, |
|
"rewards/rejected": -8.390935897827148, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.6490447526825438, |
|
"grad_norm": 15.575558913925711, |
|
"learning_rate": 1.9773810460284862e-07, |
|
"logits/chosen": -1.3477294445037842, |
|
"logits/rejected": -1.3550546169281006, |
|
"logps/chosen": -8.07054328918457, |
|
"logps/rejected": -8.061942100524902, |
|
"loss": 9.0612, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -8.07054328918457, |
|
"rewards/margins": -0.008599767461419106, |
|
"rewards/rejected": -8.061942100524902, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.6595132164354881, |
|
"grad_norm": 20.765211421302535, |
|
"learning_rate": 1.874822389171314e-07, |
|
"logits/chosen": -1.3256926536560059, |
|
"logits/rejected": -1.3409112691879272, |
|
"logps/chosen": -7.885645389556885, |
|
"logps/rejected": -8.124526977539062, |
|
"loss": 8.8864, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -7.885645389556885, |
|
"rewards/margins": 0.2388812005519867, |
|
"rewards/rejected": -8.124526977539062, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.6699816801884323, |
|
"grad_norm": 14.208218028523063, |
|
"learning_rate": 1.7737720670413356e-07, |
|
"logits/chosen": -1.344118595123291, |
|
"logits/rejected": -1.336096167564392, |
|
"logps/chosen": -8.159255981445312, |
|
"logps/rejected": -8.0567045211792, |
|
"loss": 8.9837, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -8.159255981445312, |
|
"rewards/margins": -0.10255154222249985, |
|
"rewards/rejected": -8.0567045211792, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.6804501439413766, |
|
"grad_norm": 14.61227257116642, |
|
"learning_rate": 1.6743655406614095e-07, |
|
"logits/chosen": -1.340541958808899, |
|
"logits/rejected": -1.3474371433258057, |
|
"logps/chosen": -8.056330680847168, |
|
"logps/rejected": -8.348928451538086, |
|
"loss": 8.9222, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -8.056330680847168, |
|
"rewards/margins": 0.29259705543518066, |
|
"rewards/rejected": -8.348928451538086, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.6909186076943209, |
|
"grad_norm": 13.778075151913542, |
|
"learning_rate": 1.5767360674963198e-07, |
|
"logits/chosen": -1.3218133449554443, |
|
"logits/rejected": -1.3337442874908447, |
|
"logps/chosen": -7.961134910583496, |
|
"logps/rejected": -7.996614933013916, |
|
"loss": 9.0247, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -7.961134910583496, |
|
"rewards/margins": 0.035479746758937836, |
|
"rewards/rejected": -7.996614933013916, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.7013870714472651, |
|
"grad_norm": 13.684086792814428, |
|
"learning_rate": 1.4810145228170922e-07, |
|
"logits/chosen": -1.3398381471633911, |
|
"logits/rejected": -1.3437585830688477, |
|
"logps/chosen": -7.856637001037598, |
|
"logps/rejected": -8.111886978149414, |
|
"loss": 8.8913, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -7.856637001037598, |
|
"rewards/margins": 0.25525030493736267, |
|
"rewards/rejected": -8.111886978149414, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.7118555352002094, |
|
"grad_norm": 15.39649445200101, |
|
"learning_rate": 1.3873292242587306e-07, |
|
"logits/chosen": -1.3376450538635254, |
|
"logits/rejected": -1.3476964235305786, |
|
"logps/chosen": -8.228338241577148, |
|
"logps/rejected": -8.340727806091309, |
|
"loss": 9.0269, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -8.228338241577148, |
|
"rewards/margins": 0.11239071190357208, |
|
"rewards/rejected": -8.340727806091309, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.7223239989531536, |
|
"grad_norm": 15.302013253785537, |
|
"learning_rate": 1.295805759806533e-07, |
|
"logits/chosen": -1.3724461793899536, |
|
"logits/rejected": -1.3841075897216797, |
|
"logps/chosen": -8.054750442504883, |
|
"logps/rejected": -8.403682708740234, |
|
"loss": 9.0089, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -8.054750442504883, |
|
"rewards/margins": 0.3489326238632202, |
|
"rewards/rejected": -8.403682708740234, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.7327924627060979, |
|
"grad_norm": 18.608453972243662, |
|
"learning_rate": 1.2065668194415777e-07, |
|
"logits/chosen": -1.3417284488677979, |
|
"logits/rejected": -1.3348530530929565, |
|
"logps/chosen": -7.915482997894287, |
|
"logps/rejected": -8.044729232788086, |
|
"loss": 8.9016, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -7.915482997894287, |
|
"rewards/margins": 0.12924641370773315, |
|
"rewards/rejected": -8.044729232788086, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.7432609264590422, |
|
"grad_norm": 14.900748845819772, |
|
"learning_rate": 1.1197320306710923e-07, |
|
"logits/chosen": -1.3621351718902588, |
|
"logits/rejected": -1.3541442155838013, |
|
"logps/chosen": -8.007196426391602, |
|
"logps/rejected": -7.965734004974365, |
|
"loss": 8.9062, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -8.007196426391602, |
|
"rewards/margins": -0.04146287590265274, |
|
"rewards/rejected": -7.965734004974365, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.7537293902119864, |
|
"grad_norm": 11.569520650790327, |
|
"learning_rate": 1.035417798164145e-07, |
|
"logits/chosen": -1.3260619640350342, |
|
"logits/rejected": -1.3356263637542725, |
|
"logps/chosen": -7.753990173339844, |
|
"logps/rejected": -8.039525985717773, |
|
"loss": 8.8536, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -7.753990173339844, |
|
"rewards/margins": 0.2855362296104431, |
|
"rewards/rejected": -8.039525985717773, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.7641978539649307, |
|
"grad_norm": 13.480030507608214, |
|
"learning_rate": 9.537371477076535e-08, |
|
"logits/chosen": -1.2944828271865845, |
|
"logits/rejected": -1.2956254482269287, |
|
"logps/chosen": -7.926826477050781, |
|
"logps/rejected": -7.9895477294921875, |
|
"loss": 8.9487, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -7.926826477050781, |
|
"rewards/margins": 0.06272158026695251, |
|
"rewards/rejected": -7.9895477294921875, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.7746663177178749, |
|
"grad_norm": 14.634365970472302, |
|
"learning_rate": 8.747995746918898e-08, |
|
"logits/chosen": -1.3467233180999756, |
|
"logits/rejected": -1.3351846933364868, |
|
"logps/chosen": -8.043527603149414, |
|
"logps/rejected": -8.186015129089355, |
|
"loss": 8.9627, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -8.043527603149414, |
|
"rewards/margins": 0.14248715341091156, |
|
"rewards/rejected": -8.186015129089355, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.7851347814708192, |
|
"grad_norm": 28.38170473677795, |
|
"learning_rate": 7.987108973285888e-08, |
|
"logits/chosen": -1.3258306980133057, |
|
"logits/rejected": -1.3155487775802612, |
|
"logps/chosen": -8.005027770996094, |
|
"logps/rejected": -8.246636390686035, |
|
"loss": 8.9413, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -8.005027770996094, |
|
"rewards/margins": 0.24160809814929962, |
|
"rewards/rejected": -8.246636390686035, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.7956032452237635, |
|
"grad_norm": 13.711915418794124, |
|
"learning_rate": 7.255731147984174e-08, |
|
"logits/chosen": -1.3438084125518799, |
|
"logits/rejected": -1.297163963317871, |
|
"logps/chosen": -8.208559036254883, |
|
"logps/rejected": -8.363499641418457, |
|
"loss": 8.942, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -8.208559036254883, |
|
"rewards/margins": 0.15493938326835632, |
|
"rewards/rejected": -8.363499641418457, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.8060717089767077, |
|
"grad_norm": 13.642711731891415, |
|
"learning_rate": 6.554842705179898e-08, |
|
"logits/chosen": -1.3352845907211304, |
|
"logits/rejected": -1.3314430713653564, |
|
"logps/chosen": -8.112469673156738, |
|
"logps/rejected": -8.209820747375488, |
|
"loss": 8.9588, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -8.112469673156738, |
|
"rewards/margins": 0.09735036641359329, |
|
"rewards/rejected": -8.209820747375488, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.816540172729652, |
|
"grad_norm": 14.269345053816819, |
|
"learning_rate": 5.885383207096832e-08, |
|
"logits/chosen": -1.3467012643814087, |
|
"logits/rejected": -1.3490493297576904, |
|
"logps/chosen": -7.833376884460449, |
|
"logps/rejected": -8.030352592468262, |
|
"loss": 8.8689, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -7.833376884460449, |
|
"rewards/margins": 0.1969761848449707, |
|
"rewards/rejected": -8.030352592468262, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.8270086364825961, |
|
"grad_norm": 16.14618551872646, |
|
"learning_rate": 5.2482500845047165e-08, |
|
"logits/chosen": -1.3177175521850586, |
|
"logits/rejected": -1.3296372890472412, |
|
"logps/chosen": -7.635066032409668, |
|
"logps/rejected": -7.791895866394043, |
|
"loss": 8.9076, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -7.635066032409668, |
|
"rewards/margins": 0.15682990849018097, |
|
"rewards/rejected": -7.791895866394043, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.8374771002355405, |
|
"grad_norm": 12.278193076130206, |
|
"learning_rate": 4.644297433686162e-08, |
|
"logits/chosen": -1.3246910572052002, |
|
"logits/rejected": -1.315019965171814, |
|
"logps/chosen": -7.837827205657959, |
|
"logps/rejected": -7.908313751220703, |
|
"loss": 8.951, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -7.837827205657959, |
|
"rewards/margins": 0.0704866498708725, |
|
"rewards/rejected": -7.908313751220703, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.8479455639884846, |
|
"grad_norm": 12.786235556241849, |
|
"learning_rate": 4.074334871494558e-08, |
|
"logits/chosen": -1.3545995950698853, |
|
"logits/rejected": -1.3624496459960938, |
|
"logps/chosen": -8.024687767028809, |
|
"logps/rejected": -8.172109603881836, |
|
"loss": 8.9198, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -8.024687767028809, |
|
"rewards/margins": 0.1474229097366333, |
|
"rewards/rejected": -8.172109603881836, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.8584140277414289, |
|
"grad_norm": 30.758577870183032, |
|
"learning_rate": 3.5391264500382e-08, |
|
"logits/chosen": -1.360478401184082, |
|
"logits/rejected": -1.3552910089492798, |
|
"logps/chosen": -7.844922065734863, |
|
"logps/rejected": -7.852625846862793, |
|
"loss": 8.8997, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -7.844922065734863, |
|
"rewards/margins": 0.0077047706581652164, |
|
"rewards/rejected": -7.852625846862793, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.8688824914943732, |
|
"grad_norm": 14.260374307768236, |
|
"learning_rate": 3.0393896324452226e-08, |
|
"logits/chosen": -1.372036337852478, |
|
"logits/rejected": -1.3762390613555908, |
|
"logps/chosen": -7.982748508453369, |
|
"logps/rejected": -8.225188255310059, |
|
"loss": 8.9748, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -7.982748508453369, |
|
"rewards/margins": 0.24244041740894318, |
|
"rewards/rejected": -8.225188255310059, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.8793509552473174, |
|
"grad_norm": 14.09879602927017, |
|
"learning_rate": 2.5757943310825026e-08, |
|
"logits/chosen": -1.3225996494293213, |
|
"logits/rejected": -1.3161330223083496, |
|
"logps/chosen": -7.865872859954834, |
|
"logps/rejected": -7.931491851806641, |
|
"loss": 8.9372, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -7.865872859954834, |
|
"rewards/margins": 0.06561894714832306, |
|
"rewards/rejected": -7.931491851806641, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.8898194190002617, |
|
"grad_norm": 13.351401390808332, |
|
"learning_rate": 2.148962009517823e-08, |
|
"logits/chosen": -1.342071771621704, |
|
"logits/rejected": -1.337024450302124, |
|
"logps/chosen": -8.03447151184082, |
|
"logps/rejected": -8.085325241088867, |
|
"loss": 8.9767, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -8.03447151184082, |
|
"rewards/margins": 0.050852321088314056, |
|
"rewards/rejected": -8.085325241088867, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.9002878827532059, |
|
"grad_norm": 16.02345328859732, |
|
"learning_rate": 1.759464849429082e-08, |
|
"logits/chosen": -1.3405394554138184, |
|
"logits/rejected": -1.3419816493988037, |
|
"logps/chosen": -7.878898620605469, |
|
"logps/rejected": -8.002215385437012, |
|
"loss": 8.9292, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -7.878898620605469, |
|
"rewards/margins": 0.123316690325737, |
|
"rewards/rejected": -8.002215385437012, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.9107563465061502, |
|
"grad_norm": 13.971661978504134, |
|
"learning_rate": 1.4078249835774169e-08, |
|
"logits/chosen": -1.3646373748779297, |
|
"logits/rejected": -1.3699538707733154, |
|
"logps/chosen": -7.937603950500488, |
|
"logps/rejected": -8.069661140441895, |
|
"loss": 8.8372, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -7.937603950500488, |
|
"rewards/margins": 0.13205692172050476, |
|
"rewards/rejected": -8.069661140441895, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.9212248102590945, |
|
"grad_norm": 13.636923891581842, |
|
"learning_rate": 1.0945137958723705e-08, |
|
"logits/chosen": -1.3303980827331543, |
|
"logits/rejected": -1.3274564743041992, |
|
"logps/chosen": -8.00455379486084, |
|
"logps/rejected": -8.096671104431152, |
|
"loss": 8.9997, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -8.00455379486084, |
|
"rewards/margins": 0.09211695194244385, |
|
"rewards/rejected": -8.096671104431152, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.9316932740120387, |
|
"grad_norm": 14.66331138432002, |
|
"learning_rate": 8.19951289467482e-09, |
|
"logits/chosen": -1.3527616262435913, |
|
"logits/rejected": -1.352975606918335, |
|
"logps/chosen": -7.898123741149902, |
|
"logps/rejected": -8.020647048950195, |
|
"loss": 8.9114, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -7.898123741149902, |
|
"rewards/margins": 0.1225227564573288, |
|
"rewards/rejected": -8.020647048950195, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.942161737764983, |
|
"grad_norm": 34.64920022108061, |
|
"learning_rate": 5.84505523733293e-09, |
|
"logits/chosen": -1.3027703762054443, |
|
"logits/rejected": -1.2922091484069824, |
|
"logps/chosen": -8.017878532409668, |
|
"logps/rejected": -8.019991874694824, |
|
"loss": 9.0038, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -8.017878532409668, |
|
"rewards/margins": 0.0021121830213814974, |
|
"rewards/rejected": -8.019991874694824, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.9526302015179272, |
|
"grad_norm": 14.096689301269398, |
|
"learning_rate": 3.8849212086261466e-09, |
|
"logits/chosen": -1.3568954467773438, |
|
"logits/rejected": -1.345536231994629, |
|
"logps/chosen": -7.817251682281494, |
|
"logps/rejected": -8.18480110168457, |
|
"loss": 8.9022, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -7.817251682281494, |
|
"rewards/margins": 0.3675496578216553, |
|
"rewards/rejected": -8.18480110168457, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.9630986652708715, |
|
"grad_norm": 21.948748802651522, |
|
"learning_rate": 2.3217384276938756e-09, |
|
"logits/chosen": -1.3387937545776367, |
|
"logits/rejected": -1.349258542060852, |
|
"logps/chosen": -7.9868292808532715, |
|
"logps/rejected": -8.197335243225098, |
|
"loss": 8.8854, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -7.9868292808532715, |
|
"rewards/margins": 0.21050508320331573, |
|
"rewards/rejected": -8.197335243225098, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.9735671290238157, |
|
"grad_norm": 12.715751305789052, |
|
"learning_rate": 1.1576023884836472e-09, |
|
"logits/chosen": -1.3674533367156982, |
|
"logits/rejected": -1.3665874004364014, |
|
"logps/chosen": -8.10934066772461, |
|
"logps/rejected": -8.27099323272705, |
|
"loss": 8.9853, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -8.10934066772461, |
|
"rewards/margins": 0.16165266931056976, |
|
"rewards/rejected": -8.27099323272705, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.98403559277676, |
|
"grad_norm": 13.20358280327505, |
|
"learning_rate": 3.940736506780395e-10, |
|
"logits/chosen": -1.348550796508789, |
|
"logits/rejected": -1.3657060861587524, |
|
"logps/chosen": -7.707891941070557, |
|
"logps/rejected": -7.990015983581543, |
|
"loss": 8.9804, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -7.707891941070557, |
|
"rewards/margins": 0.2821243703365326, |
|
"rewards/rejected": -7.990015983581543, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.9945040565297043, |
|
"grad_norm": 17.668181816444864, |
|
"learning_rate": 3.2175747716822744e-11, |
|
"logits/chosen": -1.3433798551559448, |
|
"logits/rejected": -1.3304665088653564, |
|
"logps/chosen": -8.101046562194824, |
|
"logps/rejected": -8.15410041809082, |
|
"loss": 8.9813, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -8.101046562194824, |
|
"rewards/margins": 0.05305204540491104, |
|
"rewards/rejected": -8.15410041809082, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.998691442030882, |
|
"step": 477, |
|
"total_flos": 0.0, |
|
"train_loss": 8.967987340451286, |
|
"train_runtime": 8184.2286, |
|
"train_samples_per_second": 7.47, |
|
"train_steps_per_second": 0.058 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 477, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 1000000, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|