|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9993414204074695, |
|
"eval_steps": 100, |
|
"global_step": 1470, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 3.4013605442176867e-09, |
|
"logits/chosen": -2.8035497665405273, |
|
"logits/rejected": -2.7962629795074463, |
|
"logps/chosen": -211.36532592773438, |
|
"logps/rejected": -294.74530029296875, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.2265625, |
|
"rewards/chosen": 0.0010320872534066439, |
|
"rewards/margins": 0.0005493065109476447, |
|
"rewards/rejected": 0.0004827805096283555, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.4013605442176873e-08, |
|
"logits/chosen": -2.7791833877563477, |
|
"logits/rejected": -2.804030418395996, |
|
"logps/chosen": -240.9124298095703, |
|
"logps/rejected": -369.5000305175781, |
|
"loss": 0.6926, |
|
"rewards/accuracies": 0.4696180522441864, |
|
"rewards/chosen": 0.00038262151065282524, |
|
"rewards/margins": 0.0016919042682275176, |
|
"rewards/rejected": -0.00130928261205554, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 6.802721088435375e-08, |
|
"logits/chosen": -2.7648768424987793, |
|
"logits/rejected": -2.78273606300354, |
|
"logps/chosen": -245.15121459960938, |
|
"logps/rejected": -350.14898681640625, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.5023437738418579, |
|
"rewards/chosen": 0.0015446910401806235, |
|
"rewards/margins": 0.0005673653213307261, |
|
"rewards/rejected": 0.000977325951680541, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.0204081632653061e-07, |
|
"logits/chosen": -2.8178772926330566, |
|
"logits/rejected": -2.786083221435547, |
|
"logps/chosen": -240.51516723632812, |
|
"logps/rejected": -352.41339111328125, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.508593738079071, |
|
"rewards/chosen": 0.0015831931959837675, |
|
"rewards/margins": 0.001221821061335504, |
|
"rewards/rejected": 0.00036137248389422894, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.360544217687075e-07, |
|
"logits/chosen": -2.803492307662964, |
|
"logits/rejected": -2.7716286182403564, |
|
"logps/chosen": -235.7887725830078, |
|
"logps/rejected": -359.8059997558594, |
|
"loss": 0.6912, |
|
"rewards/accuracies": 0.5234375, |
|
"rewards/chosen": 0.0033938586711883545, |
|
"rewards/margins": 0.004743899218738079, |
|
"rewards/rejected": -0.001350040198303759, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.7006802721088434e-07, |
|
"logits/chosen": -2.8103866577148438, |
|
"logits/rejected": -2.803828716278076, |
|
"logps/chosen": -245.4801483154297, |
|
"logps/rejected": -306.09783935546875, |
|
"loss": 0.6899, |
|
"rewards/accuracies": 0.5640624761581421, |
|
"rewards/chosen": 0.00749587407335639, |
|
"rewards/margins": 0.007244518492370844, |
|
"rewards/rejected": 0.00025135590112768114, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.0408163265306121e-07, |
|
"logits/chosen": -2.7881524562835693, |
|
"logits/rejected": -2.808814525604248, |
|
"logps/chosen": -269.1226501464844, |
|
"logps/rejected": -339.7763977050781, |
|
"loss": 0.6874, |
|
"rewards/accuracies": 0.6015625, |
|
"rewards/chosen": 0.011595133692026138, |
|
"rewards/margins": 0.012225830927491188, |
|
"rewards/rejected": -0.000630697060842067, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.3809523809523806e-07, |
|
"logits/chosen": -2.7982544898986816, |
|
"logits/rejected": -2.765774726867676, |
|
"logps/chosen": -258.89117431640625, |
|
"logps/rejected": -372.06451416015625, |
|
"loss": 0.6864, |
|
"rewards/accuracies": 0.6148437261581421, |
|
"rewards/chosen": 0.013396549038589, |
|
"rewards/margins": 0.014446373097598553, |
|
"rewards/rejected": -0.0010498259216547012, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.72108843537415e-07, |
|
"logits/chosen": -2.8109402656555176, |
|
"logits/rejected": -2.7843804359436035, |
|
"logps/chosen": -244.15817260742188, |
|
"logps/rejected": -369.6734313964844, |
|
"loss": 0.6821, |
|
"rewards/accuracies": 0.6742187738418579, |
|
"rewards/chosen": 0.02298940345644951, |
|
"rewards/margins": 0.023194540292024612, |
|
"rewards/rejected": -0.0002051351184491068, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.0612244897959183e-07, |
|
"logits/chosen": -2.8090157508850098, |
|
"logits/rejected": -2.7707672119140625, |
|
"logps/chosen": -222.1091766357422, |
|
"logps/rejected": -365.6192321777344, |
|
"loss": 0.6766, |
|
"rewards/accuracies": 0.7242187261581421, |
|
"rewards/chosen": 0.030939970165491104, |
|
"rewards/margins": 0.03436826914548874, |
|
"rewards/rejected": -0.0034283031709492207, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.401360544217687e-07, |
|
"logits/chosen": -2.7735049724578857, |
|
"logits/rejected": -2.7935452461242676, |
|
"logps/chosen": -251.73049926757812, |
|
"logps/rejected": -388.00115966796875, |
|
"loss": 0.6728, |
|
"rewards/accuracies": 0.735156238079071, |
|
"rewards/chosen": 0.03729977086186409, |
|
"rewards/margins": 0.04232599213719368, |
|
"rewards/rejected": -0.0050262222066521645, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.741496598639456e-07, |
|
"logits/chosen": -2.797628164291382, |
|
"logits/rejected": -2.784834384918213, |
|
"logps/chosen": -255.72265625, |
|
"logps/rejected": -349.15985107421875, |
|
"loss": 0.6651, |
|
"rewards/accuracies": 0.788281261920929, |
|
"rewards/chosen": 0.05175922438502312, |
|
"rewards/margins": 0.05847715586423874, |
|
"rewards/rejected": -0.00671793520450592, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.0816326530612243e-07, |
|
"logits/chosen": -2.7973737716674805, |
|
"logits/rejected": -2.7825686931610107, |
|
"logps/chosen": -252.3303985595703, |
|
"logps/rejected": -348.4207458496094, |
|
"loss": 0.6604, |
|
"rewards/accuracies": 0.813281238079071, |
|
"rewards/chosen": 0.06004839017987251, |
|
"rewards/margins": 0.06873828917741776, |
|
"rewards/rejected": -0.008689895272254944, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.421768707482993e-07, |
|
"logits/chosen": -2.7856903076171875, |
|
"logits/rejected": -2.8103625774383545, |
|
"logps/chosen": -248.4453125, |
|
"logps/rejected": -316.520263671875, |
|
"loss": 0.6528, |
|
"rewards/accuracies": 0.8179687261581421, |
|
"rewards/chosen": 0.07609430700540543, |
|
"rewards/margins": 0.08578468859195709, |
|
"rewards/rejected": -0.00969038438051939, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.761904761904761e-07, |
|
"logits/chosen": -2.7964794635772705, |
|
"logits/rejected": -2.8038413524627686, |
|
"logps/chosen": -251.0780029296875, |
|
"logps/rejected": -380.4024353027344, |
|
"loss": 0.6409, |
|
"rewards/accuracies": 0.842968761920929, |
|
"rewards/chosen": 0.10089793056249619, |
|
"rewards/margins": 0.11140058934688568, |
|
"rewards/rejected": -0.010502668097615242, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.988662131519274e-07, |
|
"logits/chosen": -2.7733452320098877, |
|
"logits/rejected": -2.799926280975342, |
|
"logps/chosen": -259.34686279296875, |
|
"logps/rejected": -335.1527404785156, |
|
"loss": 0.6297, |
|
"rewards/accuracies": 0.8539062738418579, |
|
"rewards/chosen": 0.12008102238178253, |
|
"rewards/margins": 0.13700444996356964, |
|
"rewards/rejected": -0.016923416405916214, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.950869236583522e-07, |
|
"logits/chosen": -2.774165153503418, |
|
"logits/rejected": -2.7881526947021484, |
|
"logps/chosen": -245.5338134765625, |
|
"logps/rejected": -338.31597900390625, |
|
"loss": 0.6201, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": 0.14051470160484314, |
|
"rewards/margins": 0.1599283218383789, |
|
"rewards/rejected": -0.019413620233535767, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.91307634164777e-07, |
|
"logits/chosen": -2.811603546142578, |
|
"logits/rejected": -2.8174936771392822, |
|
"logps/chosen": -260.7558898925781, |
|
"logps/rejected": -356.88153076171875, |
|
"loss": 0.6041, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.1749168038368225, |
|
"rewards/margins": 0.19711166620254517, |
|
"rewards/rejected": -0.022194867953658104, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.875283446712018e-07, |
|
"logits/chosen": -2.7915146350860596, |
|
"logits/rejected": -2.7889480590820312, |
|
"logps/chosen": -264.36138916015625, |
|
"logps/rejected": -353.7435607910156, |
|
"loss": 0.5926, |
|
"rewards/accuracies": 0.883593738079071, |
|
"rewards/chosen": 0.19911792874336243, |
|
"rewards/margins": 0.22633683681488037, |
|
"rewards/rejected": -0.02721891924738884, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.837490551776266e-07, |
|
"logits/chosen": -2.7990036010742188, |
|
"logits/rejected": -2.7916808128356934, |
|
"logps/chosen": -257.4069519042969, |
|
"logps/rejected": -372.6297302246094, |
|
"loss": 0.5799, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": 0.22631244361400604, |
|
"rewards/margins": 0.2581940293312073, |
|
"rewards/rejected": -0.031881578266620636, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.799697656840514e-07, |
|
"logits/chosen": -2.7753312587738037, |
|
"logits/rejected": -2.7730696201324463, |
|
"logps/chosen": -259.2568054199219, |
|
"logps/rejected": -390.26995849609375, |
|
"loss": 0.564, |
|
"rewards/accuracies": 0.889843761920929, |
|
"rewards/chosen": 0.25861743092536926, |
|
"rewards/margins": 0.30055442452430725, |
|
"rewards/rejected": -0.0419369637966156, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.761904761904761e-07, |
|
"logits/chosen": -2.7830989360809326, |
|
"logits/rejected": -2.7885472774505615, |
|
"logps/chosen": -229.49685668945312, |
|
"logps/rejected": -346.35784912109375, |
|
"loss": 0.5551, |
|
"rewards/accuracies": 0.91015625, |
|
"rewards/chosen": 0.28561651706695557, |
|
"rewards/margins": 0.32180091738700867, |
|
"rewards/rejected": -0.03618443384766579, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.7241118669690096e-07, |
|
"logits/chosen": -2.7914628982543945, |
|
"logits/rejected": -2.7812819480895996, |
|
"logps/chosen": -277.1968078613281, |
|
"logps/rejected": -334.34124755859375, |
|
"loss": 0.5473, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.30997538566589355, |
|
"rewards/margins": 0.3486320972442627, |
|
"rewards/rejected": -0.038656704127788544, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.6863189720332574e-07, |
|
"logits/chosen": -2.7915186882019043, |
|
"logits/rejected": -2.7635109424591064, |
|
"logps/chosen": -230.6345672607422, |
|
"logps/rejected": -366.45855712890625, |
|
"loss": 0.5283, |
|
"rewards/accuracies": 0.901562511920929, |
|
"rewards/chosen": 0.3480406403541565, |
|
"rewards/margins": 0.3980127274990082, |
|
"rewards/rejected": -0.04997207969427109, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.648526077097505e-07, |
|
"logits/chosen": -2.8176677227020264, |
|
"logits/rejected": -2.8094589710235596, |
|
"logps/chosen": -255.73318481445312, |
|
"logps/rejected": -356.473876953125, |
|
"loss": 0.5141, |
|
"rewards/accuracies": 0.905468761920929, |
|
"rewards/chosen": 0.38035809993743896, |
|
"rewards/margins": 0.4426742494106293, |
|
"rewards/rejected": -0.062316179275512695, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.6107331821617536e-07, |
|
"logits/chosen": -2.778831958770752, |
|
"logits/rejected": -2.7532734870910645, |
|
"logps/chosen": -260.0787658691406, |
|
"logps/rejected": -382.69403076171875, |
|
"loss": 0.5037, |
|
"rewards/accuracies": 0.9078124761581421, |
|
"rewards/chosen": 0.4094300866127014, |
|
"rewards/margins": 0.4735100269317627, |
|
"rewards/rejected": -0.06407993286848068, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.5729402872260014e-07, |
|
"logits/chosen": -2.7875959873199463, |
|
"logits/rejected": -2.789522647857666, |
|
"logps/chosen": -245.36215209960938, |
|
"logps/rejected": -398.8630676269531, |
|
"loss": 0.4946, |
|
"rewards/accuracies": 0.897656261920929, |
|
"rewards/chosen": 0.43164581060409546, |
|
"rewards/margins": 0.506696879863739, |
|
"rewards/rejected": -0.07505108416080475, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.535147392290249e-07, |
|
"logits/chosen": -2.7784600257873535, |
|
"logits/rejected": -2.743320941925049, |
|
"logps/chosen": -240.0518035888672, |
|
"logps/rejected": -373.5130920410156, |
|
"loss": 0.4891, |
|
"rewards/accuracies": 0.89453125, |
|
"rewards/chosen": 0.45601949095726013, |
|
"rewards/margins": 0.5297552347183228, |
|
"rewards/rejected": -0.07373576611280441, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.497354497354497e-07, |
|
"logits/chosen": -2.777036190032959, |
|
"logits/rejected": -2.7678191661834717, |
|
"logps/chosen": -264.9656677246094, |
|
"logps/rejected": -373.12042236328125, |
|
"loss": 0.4766, |
|
"rewards/accuracies": 0.9156249761581421, |
|
"rewards/chosen": 0.47401291131973267, |
|
"rewards/margins": 0.5673891305923462, |
|
"rewards/rejected": -0.09337621927261353, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.459561602418745e-07, |
|
"logits/chosen": -2.7813751697540283, |
|
"logits/rejected": -2.7827224731445312, |
|
"logps/chosen": -239.7397918701172, |
|
"logps/rejected": -392.6272888183594, |
|
"loss": 0.4603, |
|
"rewards/accuracies": 0.9117187261581421, |
|
"rewards/chosen": 0.5112585425376892, |
|
"rewards/margins": 0.6238077878952026, |
|
"rewards/rejected": -0.11254926025867462, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.421768707482993e-07, |
|
"logits/chosen": -2.784381628036499, |
|
"logits/rejected": -2.7823455333709717, |
|
"logps/chosen": -247.23696899414062, |
|
"logps/rejected": -340.01971435546875, |
|
"loss": 0.4569, |
|
"rewards/accuracies": 0.909375011920929, |
|
"rewards/chosen": 0.5431731939315796, |
|
"rewards/margins": 0.6343038082122803, |
|
"rewards/rejected": -0.09113059937953949, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.383975812547241e-07, |
|
"logits/chosen": -2.7919013500213623, |
|
"logits/rejected": -2.7927372455596924, |
|
"logps/chosen": -244.9982147216797, |
|
"logps/rejected": -345.5526428222656, |
|
"loss": 0.4422, |
|
"rewards/accuracies": 0.922656238079071, |
|
"rewards/chosen": 0.5760600566864014, |
|
"rewards/margins": 0.6899352669715881, |
|
"rewards/rejected": -0.11387525498867035, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.346182917611489e-07, |
|
"logits/chosen": -2.786698341369629, |
|
"logits/rejected": -2.7934978008270264, |
|
"logps/chosen": -255.37142944335938, |
|
"logps/rejected": -399.12957763671875, |
|
"loss": 0.4344, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 0.5843140482902527, |
|
"rewards/margins": 0.7244275808334351, |
|
"rewards/rejected": -0.1401134431362152, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.308390022675737e-07, |
|
"logits/chosen": -2.7745113372802734, |
|
"logits/rejected": -2.7805206775665283, |
|
"logps/chosen": -252.92514038085938, |
|
"logps/rejected": -392.51165771484375, |
|
"loss": 0.4332, |
|
"rewards/accuracies": 0.9046875238418579, |
|
"rewards/chosen": 0.5970828533172607, |
|
"rewards/margins": 0.7323796153068542, |
|
"rewards/rejected": -0.1352967619895935, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.270597127739985e-07, |
|
"logits/chosen": -2.783926486968994, |
|
"logits/rejected": -2.7875866889953613, |
|
"logps/chosen": -250.8353729248047, |
|
"logps/rejected": -335.03265380859375, |
|
"loss": 0.4175, |
|
"rewards/accuracies": 0.9164062738418579, |
|
"rewards/chosen": 0.6492675542831421, |
|
"rewards/margins": 0.7875067591667175, |
|
"rewards/rejected": -0.13823917508125305, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.2328042328042324e-07, |
|
"logits/chosen": -2.7828190326690674, |
|
"logits/rejected": -2.772052764892578, |
|
"logps/chosen": -236.33706665039062, |
|
"logps/rejected": -370.28399658203125, |
|
"loss": 0.4152, |
|
"rewards/accuracies": 0.9242187738418579, |
|
"rewards/chosen": 0.662378191947937, |
|
"rewards/margins": 0.7986767888069153, |
|
"rewards/rejected": -0.13629861176013947, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.19501133786848e-07, |
|
"logits/chosen": -2.780648946762085, |
|
"logits/rejected": -2.771820545196533, |
|
"logps/chosen": -228.22445678710938, |
|
"logps/rejected": -390.63751220703125, |
|
"loss": 0.4051, |
|
"rewards/accuracies": 0.9140625, |
|
"rewards/chosen": 0.6962443590164185, |
|
"rewards/margins": 0.8446155786514282, |
|
"rewards/rejected": -0.1483711302280426, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.1572184429327286e-07, |
|
"logits/chosen": -2.8088645935058594, |
|
"logits/rejected": -2.7826154232025146, |
|
"logps/chosen": -255.2318572998047, |
|
"logps/rejected": -344.69183349609375, |
|
"loss": 0.3908, |
|
"rewards/accuracies": 0.9203125238418579, |
|
"rewards/chosen": 0.7306076288223267, |
|
"rewards/margins": 0.900040328502655, |
|
"rewards/rejected": -0.16943258047103882, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.1194255479969764e-07, |
|
"logits/chosen": -2.7837393283843994, |
|
"logits/rejected": -2.754739284515381, |
|
"logps/chosen": -252.39779663085938, |
|
"logps/rejected": -347.7734069824219, |
|
"loss": 0.4019, |
|
"rewards/accuracies": 0.907031238079071, |
|
"rewards/chosen": 0.7146260738372803, |
|
"rewards/margins": 0.8642898797988892, |
|
"rewards/rejected": -0.14966385066509247, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.0816326530612243e-07, |
|
"logits/chosen": -2.793994426727295, |
|
"logits/rejected": -2.789456605911255, |
|
"logps/chosen": -250.083984375, |
|
"logps/rejected": -345.2536315917969, |
|
"loss": 0.3843, |
|
"rewards/accuracies": 0.9203125238418579, |
|
"rewards/chosen": 0.760775089263916, |
|
"rewards/margins": 0.9284068942070007, |
|
"rewards/rejected": -0.1676318198442459, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.0438397581254726e-07, |
|
"logits/chosen": -2.7863235473632812, |
|
"logits/rejected": -2.7660741806030273, |
|
"logps/chosen": -243.2860565185547, |
|
"logps/rejected": -375.15283203125, |
|
"loss": 0.3736, |
|
"rewards/accuracies": 0.9195312261581421, |
|
"rewards/chosen": 0.7728086113929749, |
|
"rewards/margins": 0.9798704385757446, |
|
"rewards/rejected": -0.20706184208393097, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.0060468631897205e-07, |
|
"logits/chosen": -2.7740797996520996, |
|
"logits/rejected": -2.787078857421875, |
|
"logps/chosen": -231.3814239501953, |
|
"logps/rejected": -373.4275817871094, |
|
"loss": 0.3779, |
|
"rewards/accuracies": 0.9140625, |
|
"rewards/chosen": 0.786165177822113, |
|
"rewards/margins": 0.9645744562149048, |
|
"rewards/rejected": -0.1784091293811798, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 3.968253968253968e-07, |
|
"logits/chosen": -2.7854466438293457, |
|
"logits/rejected": -2.782599449157715, |
|
"logps/chosen": -234.27853393554688, |
|
"logps/rejected": -341.40106201171875, |
|
"loss": 0.3758, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 0.8017100095748901, |
|
"rewards/margins": 0.9820283651351929, |
|
"rewards/rejected": -0.1803184449672699, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 3.930461073318216e-07, |
|
"logits/chosen": -2.7634427547454834, |
|
"logits/rejected": -2.7768495082855225, |
|
"logps/chosen": -230.73318481445312, |
|
"logps/rejected": -427.71917724609375, |
|
"loss": 0.3665, |
|
"rewards/accuracies": 0.91796875, |
|
"rewards/chosen": 0.8091424703598022, |
|
"rewards/margins": 1.025179386138916, |
|
"rewards/rejected": -0.2160368263721466, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 3.892668178382464e-07, |
|
"logits/chosen": -2.774629592895508, |
|
"logits/rejected": -2.7814247608184814, |
|
"logps/chosen": -253.4683074951172, |
|
"logps/rejected": -386.40216064453125, |
|
"loss": 0.3495, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.8851088285446167, |
|
"rewards/margins": 1.123652696609497, |
|
"rewards/rejected": -0.23854386806488037, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 3.854875283446712e-07, |
|
"logits/chosen": -2.766551971435547, |
|
"logits/rejected": -2.7709641456604004, |
|
"logps/chosen": -271.8524475097656, |
|
"logps/rejected": -379.4809265136719, |
|
"loss": 0.3575, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 0.8432048559188843, |
|
"rewards/margins": 1.0976295471191406, |
|
"rewards/rejected": -0.254424512386322, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 3.8170823885109596e-07, |
|
"logits/chosen": -2.8009865283966064, |
|
"logits/rejected": -2.7705283164978027, |
|
"logps/chosen": -241.07632446289062, |
|
"logps/rejected": -366.87127685546875, |
|
"loss": 0.3459, |
|
"rewards/accuracies": 0.9281250238418579, |
|
"rewards/chosen": 0.9000816345214844, |
|
"rewards/margins": 1.1292930841445923, |
|
"rewards/rejected": -0.22921133041381836, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 3.779289493575208e-07, |
|
"logits/chosen": -2.7855477333068848, |
|
"logits/rejected": -2.771469831466675, |
|
"logps/chosen": -248.2216033935547, |
|
"logps/rejected": -379.58709716796875, |
|
"loss": 0.3488, |
|
"rewards/accuracies": 0.913281261920929, |
|
"rewards/chosen": 0.8979974985122681, |
|
"rewards/margins": 1.1383633613586426, |
|
"rewards/rejected": -0.2403658926486969, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 3.741496598639456e-07, |
|
"logits/chosen": -2.783979892730713, |
|
"logits/rejected": -2.787400722503662, |
|
"logps/chosen": -234.78939819335938, |
|
"logps/rejected": -391.0784912109375, |
|
"loss": 0.3396, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": 0.8895782232284546, |
|
"rewards/margins": 1.1713939905166626, |
|
"rewards/rejected": -0.281815767288208, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 3.703703703703703e-07, |
|
"logits/chosen": -2.7796401977539062, |
|
"logits/rejected": -2.78939151763916, |
|
"logps/chosen": -255.79556274414062, |
|
"logps/rejected": -376.7617492675781, |
|
"loss": 0.3496, |
|
"rewards/accuracies": 0.91796875, |
|
"rewards/chosen": 0.8880151510238647, |
|
"rewards/margins": 1.1511998176574707, |
|
"rewards/rejected": -0.26318463683128357, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 3.6659108087679515e-07, |
|
"logits/chosen": -2.784447193145752, |
|
"logits/rejected": -2.7811279296875, |
|
"logps/chosen": -240.26943969726562, |
|
"logps/rejected": -373.43585205078125, |
|
"loss": 0.3317, |
|
"rewards/accuracies": 0.9242187738418579, |
|
"rewards/chosen": 0.9207477569580078, |
|
"rewards/margins": 1.2141565084457397, |
|
"rewards/rejected": -0.2934088110923767, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 3.6281179138321993e-07, |
|
"logits/chosen": -2.7936480045318604, |
|
"logits/rejected": -2.7741034030914307, |
|
"logps/chosen": -253.25625610351562, |
|
"logps/rejected": -388.1740417480469, |
|
"loss": 0.3307, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.9261225461959839, |
|
"rewards/margins": 1.2367761135101318, |
|
"rewards/rejected": -0.310653418302536, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 3.590325018896447e-07, |
|
"logits/chosen": -2.764971971511841, |
|
"logits/rejected": -2.779900074005127, |
|
"logps/chosen": -277.50433349609375, |
|
"logps/rejected": -390.9405822753906, |
|
"loss": 0.3301, |
|
"rewards/accuracies": 0.9203125238418579, |
|
"rewards/chosen": 0.9403823614120483, |
|
"rewards/margins": 1.250135898590088, |
|
"rewards/rejected": -0.3097533881664276, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 3.5525321239606955e-07, |
|
"logits/chosen": -2.7859063148498535, |
|
"logits/rejected": -2.7852673530578613, |
|
"logps/chosen": -240.83847045898438, |
|
"logps/rejected": -329.5592346191406, |
|
"loss": 0.3185, |
|
"rewards/accuracies": 0.9296875, |
|
"rewards/chosen": 0.998257040977478, |
|
"rewards/margins": 1.3062750101089478, |
|
"rewards/rejected": -0.30801790952682495, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.5147392290249433e-07, |
|
"logits/chosen": -2.7856059074401855, |
|
"logits/rejected": -2.7904558181762695, |
|
"logps/chosen": -256.13116455078125, |
|
"logps/rejected": -359.0440673828125, |
|
"loss": 0.3201, |
|
"rewards/accuracies": 0.917187511920929, |
|
"rewards/chosen": 0.9812418222427368, |
|
"rewards/margins": 1.2980186939239502, |
|
"rewards/rejected": -0.316776841878891, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.4769463340891906e-07, |
|
"logits/chosen": -2.7746291160583496, |
|
"logits/rejected": -2.8083655834198, |
|
"logps/chosen": -243.3596649169922, |
|
"logps/rejected": -381.6620788574219, |
|
"loss": 0.321, |
|
"rewards/accuracies": 0.921093761920929, |
|
"rewards/chosen": 0.9785689115524292, |
|
"rewards/margins": 1.3102028369903564, |
|
"rewards/rejected": -0.33163395524024963, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.439153439153439e-07, |
|
"logits/chosen": -2.788200616836548, |
|
"logits/rejected": -2.806088924407959, |
|
"logps/chosen": -243.46371459960938, |
|
"logps/rejected": -353.0728454589844, |
|
"loss": 0.3037, |
|
"rewards/accuracies": 0.9281250238418579, |
|
"rewards/chosen": 1.0423057079315186, |
|
"rewards/margins": 1.40134596824646, |
|
"rewards/rejected": -0.35903996229171753, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.401360544217687e-07, |
|
"logits/chosen": -2.8205642700195312, |
|
"logits/rejected": -2.75651216506958, |
|
"logps/chosen": -225.49380493164062, |
|
"logps/rejected": -383.3102111816406, |
|
"loss": 0.2961, |
|
"rewards/accuracies": 0.934374988079071, |
|
"rewards/chosen": 1.0518951416015625, |
|
"rewards/margins": 1.416092872619629, |
|
"rewards/rejected": -0.3641977310180664, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.3635676492819346e-07, |
|
"logits/chosen": -2.778111696243286, |
|
"logits/rejected": -2.8062729835510254, |
|
"logps/chosen": -241.8183135986328, |
|
"logps/rejected": -360.12677001953125, |
|
"loss": 0.3026, |
|
"rewards/accuracies": 0.928906261920929, |
|
"rewards/chosen": 1.0336360931396484, |
|
"rewards/margins": 1.3975627422332764, |
|
"rewards/rejected": -0.3639264702796936, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.325774754346183e-07, |
|
"logits/chosen": -2.7760305404663086, |
|
"logits/rejected": -2.7639145851135254, |
|
"logps/chosen": -263.2132568359375, |
|
"logps/rejected": -326.3753356933594, |
|
"loss": 0.3079, |
|
"rewards/accuracies": 0.917187511920929, |
|
"rewards/chosen": 1.0236365795135498, |
|
"rewards/margins": 1.3882102966308594, |
|
"rewards/rejected": -0.36457380652427673, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.287981859410431e-07, |
|
"logits/chosen": -2.8040480613708496, |
|
"logits/rejected": -2.781839370727539, |
|
"logps/chosen": -232.688720703125, |
|
"logps/rejected": -341.75372314453125, |
|
"loss": 0.294, |
|
"rewards/accuracies": 0.92578125, |
|
"rewards/chosen": 1.0763448476791382, |
|
"rewards/margins": 1.459729790687561, |
|
"rewards/rejected": -0.38338491320610046, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.2501889644746787e-07, |
|
"logits/chosen": -2.797874927520752, |
|
"logits/rejected": -2.748481512069702, |
|
"logps/chosen": -232.8326873779297, |
|
"logps/rejected": -369.7907409667969, |
|
"loss": 0.2837, |
|
"rewards/accuracies": 0.938281238079071, |
|
"rewards/chosen": 1.101806640625, |
|
"rewards/margins": 1.498957633972168, |
|
"rewards/rejected": -0.39715105295181274, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.2123960695389265e-07, |
|
"logits/chosen": -2.780925989151001, |
|
"logits/rejected": -2.735792636871338, |
|
"logps/chosen": -222.20596313476562, |
|
"logps/rejected": -380.5815124511719, |
|
"loss": 0.2935, |
|
"rewards/accuracies": 0.921093761920929, |
|
"rewards/chosen": 1.0859084129333496, |
|
"rewards/margins": 1.4819860458374023, |
|
"rewards/rejected": -0.39607763290405273, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.1746031746031743e-07, |
|
"logits/chosen": -2.7768056392669678, |
|
"logits/rejected": -2.764166831970215, |
|
"logps/chosen": -236.9914093017578, |
|
"logps/rejected": -345.6325378417969, |
|
"loss": 0.2895, |
|
"rewards/accuracies": 0.932812511920929, |
|
"rewards/chosen": 1.1014459133148193, |
|
"rewards/margins": 1.5069670677185059, |
|
"rewards/rejected": -0.40552106499671936, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.136810279667422e-07, |
|
"logits/chosen": -2.7987258434295654, |
|
"logits/rejected": -2.8054118156433105, |
|
"logps/chosen": -235.97109985351562, |
|
"logps/rejected": -330.56439208984375, |
|
"loss": 0.2775, |
|
"rewards/accuracies": 0.93359375, |
|
"rewards/chosen": 1.1580729484558105, |
|
"rewards/margins": 1.5699806213378906, |
|
"rewards/rejected": -0.4119076728820801, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.0990173847316705e-07, |
|
"logits/chosen": -2.7858521938323975, |
|
"logits/rejected": -2.779346466064453, |
|
"logps/chosen": -257.5158386230469, |
|
"logps/rejected": -322.25103759765625, |
|
"loss": 0.287, |
|
"rewards/accuracies": 0.9195312261581421, |
|
"rewards/chosen": 1.1325995922088623, |
|
"rewards/margins": 1.5360453128814697, |
|
"rewards/rejected": -0.40344563126564026, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.0612244897959183e-07, |
|
"logits/chosen": -2.7976508140563965, |
|
"logits/rejected": -2.8010151386260986, |
|
"logps/chosen": -219.1446533203125, |
|
"logps/rejected": -315.2838439941406, |
|
"loss": 0.2703, |
|
"rewards/accuracies": 0.9453125, |
|
"rewards/chosen": 1.1511547565460205, |
|
"rewards/margins": 1.5933144092559814, |
|
"rewards/rejected": -0.44215965270996094, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.023431594860166e-07, |
|
"logits/chosen": -2.767582416534424, |
|
"logits/rejected": -2.8024327754974365, |
|
"logps/chosen": -237.21578979492188, |
|
"logps/rejected": -314.68377685546875, |
|
"loss": 0.2637, |
|
"rewards/accuracies": 0.9359375238418579, |
|
"rewards/chosen": 1.1508355140686035, |
|
"rewards/margins": 1.6350256204605103, |
|
"rewards/rejected": -0.48419007658958435, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 2.9856386999244145e-07, |
|
"logits/chosen": -2.7926082611083984, |
|
"logits/rejected": -2.780251979827881, |
|
"logps/chosen": -244.810302734375, |
|
"logps/rejected": -347.9936828613281, |
|
"loss": 0.2784, |
|
"rewards/accuracies": 0.930468738079071, |
|
"rewards/chosen": 1.1081712245941162, |
|
"rewards/margins": 1.5819367170333862, |
|
"rewards/rejected": -0.47376567125320435, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.947845804988662e-07, |
|
"logits/chosen": -2.771953821182251, |
|
"logits/rejected": -2.768907070159912, |
|
"logps/chosen": -248.50332641601562, |
|
"logps/rejected": -360.6126403808594, |
|
"loss": 0.2897, |
|
"rewards/accuracies": 0.921875, |
|
"rewards/chosen": 1.0936378240585327, |
|
"rewards/margins": 1.5781736373901367, |
|
"rewards/rejected": -0.4845358729362488, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.9100529100529097e-07, |
|
"logits/chosen": -2.7748546600341797, |
|
"logits/rejected": -2.7857470512390137, |
|
"logps/chosen": -227.1557159423828, |
|
"logps/rejected": -390.3030700683594, |
|
"loss": 0.2597, |
|
"rewards/accuracies": 0.9320312738418579, |
|
"rewards/chosen": 1.1781264543533325, |
|
"rewards/margins": 1.7109047174453735, |
|
"rewards/rejected": -0.5327781438827515, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.872260015117158e-07, |
|
"logits/chosen": -2.77628231048584, |
|
"logits/rejected": -2.7869679927825928, |
|
"logps/chosen": -245.57839965820312, |
|
"logps/rejected": -326.86212158203125, |
|
"loss": 0.2613, |
|
"rewards/accuracies": 0.934374988079071, |
|
"rewards/chosen": 1.1419804096221924, |
|
"rewards/margins": 1.6727325916290283, |
|
"rewards/rejected": -0.5307522416114807, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.834467120181406e-07, |
|
"logits/chosen": -2.7608537673950195, |
|
"logits/rejected": -2.7646660804748535, |
|
"logps/chosen": -241.5836944580078, |
|
"logps/rejected": -379.62860107421875, |
|
"loss": 0.2738, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 1.1373337507247925, |
|
"rewards/margins": 1.6593284606933594, |
|
"rewards/rejected": -0.5219947099685669, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.7966742252456537e-07, |
|
"logits/chosen": -2.777465343475342, |
|
"logits/rejected": -2.801975965499878, |
|
"logps/chosen": -227.2059326171875, |
|
"logps/rejected": -369.7891540527344, |
|
"loss": 0.2554, |
|
"rewards/accuracies": 0.934374988079071, |
|
"rewards/chosen": 1.209241271018982, |
|
"rewards/margins": 1.7282158136367798, |
|
"rewards/rejected": -0.5189744234085083, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.758881330309902e-07, |
|
"logits/chosen": -2.7639384269714355, |
|
"logits/rejected": -2.7558932304382324, |
|
"logps/chosen": -255.972412109375, |
|
"logps/rejected": -410.17431640625, |
|
"loss": 0.2766, |
|
"rewards/accuracies": 0.9156249761581421, |
|
"rewards/chosen": 1.1485779285430908, |
|
"rewards/margins": 1.672486662864685, |
|
"rewards/rejected": -0.5239086151123047, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.72108843537415e-07, |
|
"logits/chosen": -2.7429962158203125, |
|
"logits/rejected": -2.7603325843811035, |
|
"logps/chosen": -248.05697631835938, |
|
"logps/rejected": -382.65863037109375, |
|
"loss": 0.2692, |
|
"rewards/accuracies": 0.934374988079071, |
|
"rewards/chosen": 1.1628259420394897, |
|
"rewards/margins": 1.7002170085906982, |
|
"rewards/rejected": -0.5373910665512085, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.683295540438397e-07, |
|
"logits/chosen": -2.7732365131378174, |
|
"logits/rejected": -2.7899222373962402, |
|
"logps/chosen": -230.82577514648438, |
|
"logps/rejected": -356.39349365234375, |
|
"loss": 0.262, |
|
"rewards/accuracies": 0.93359375, |
|
"rewards/chosen": 1.1516262292861938, |
|
"rewards/margins": 1.7132419347763062, |
|
"rewards/rejected": -0.5616158843040466, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.645502645502645e-07, |
|
"logits/chosen": -2.764669895172119, |
|
"logits/rejected": -2.7641212940216064, |
|
"logps/chosen": -246.3456573486328, |
|
"logps/rejected": -370.99896240234375, |
|
"loss": 0.2701, |
|
"rewards/accuracies": 0.9164062738418579, |
|
"rewards/chosen": 1.191197395324707, |
|
"rewards/margins": 1.7232650518417358, |
|
"rewards/rejected": -0.5320678949356079, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.6077097505668934e-07, |
|
"logits/chosen": -2.7817633152008057, |
|
"logits/rejected": -2.7922616004943848, |
|
"logps/chosen": -256.2757873535156, |
|
"logps/rejected": -356.1881408691406, |
|
"loss": 0.2571, |
|
"rewards/accuracies": 0.9359375238418579, |
|
"rewards/chosen": 1.2059863805770874, |
|
"rewards/margins": 1.7701711654663086, |
|
"rewards/rejected": -0.5641847848892212, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.569916855631141e-07, |
|
"logits/chosen": -2.7949161529541016, |
|
"logits/rejected": -2.800379514694214, |
|
"logps/chosen": -219.2698516845703, |
|
"logps/rejected": -384.794189453125, |
|
"loss": 0.2512, |
|
"rewards/accuracies": 0.9296875, |
|
"rewards/chosen": 1.2469325065612793, |
|
"rewards/margins": 1.8279892206192017, |
|
"rewards/rejected": -0.5810565948486328, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.532123960695389e-07, |
|
"logits/chosen": -2.7864224910736084, |
|
"logits/rejected": -2.8051304817199707, |
|
"logps/chosen": -243.42105102539062, |
|
"logps/rejected": -376.7647399902344, |
|
"loss": 0.2455, |
|
"rewards/accuracies": 0.9398437738418579, |
|
"rewards/chosen": 1.25649094581604, |
|
"rewards/margins": 1.8448721170425415, |
|
"rewards/rejected": -0.5883811712265015, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.494331065759637e-07, |
|
"logits/chosen": -2.7794528007507324, |
|
"logits/rejected": -2.787205457687378, |
|
"logps/chosen": -239.23776245117188, |
|
"logps/rejected": -348.8122863769531, |
|
"loss": 0.2407, |
|
"rewards/accuracies": 0.940625011920929, |
|
"rewards/chosen": 1.279539942741394, |
|
"rewards/margins": 1.9069591760635376, |
|
"rewards/rejected": -0.627419114112854, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.456538170823885e-07, |
|
"logits/chosen": -2.7908012866973877, |
|
"logits/rejected": -2.775237798690796, |
|
"logps/chosen": -237.18807983398438, |
|
"logps/rejected": -347.73028564453125, |
|
"loss": 0.2346, |
|
"rewards/accuracies": 0.938281238079071, |
|
"rewards/chosen": 1.2818529605865479, |
|
"rewards/margins": 1.891405701637268, |
|
"rewards/rejected": -0.6095527410507202, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.418745275888133e-07, |
|
"logits/chosen": -2.788677453994751, |
|
"logits/rejected": -2.759464740753174, |
|
"logps/chosen": -244.3543243408203, |
|
"logps/rejected": -384.2773742675781, |
|
"loss": 0.249, |
|
"rewards/accuracies": 0.9273437261581421, |
|
"rewards/chosen": 1.2608978748321533, |
|
"rewards/margins": 1.8487341403961182, |
|
"rewards/rejected": -0.5878363251686096, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.3809523809523806e-07, |
|
"logits/chosen": -2.7865688800811768, |
|
"logits/rejected": -2.744267463684082, |
|
"logps/chosen": -225.56716918945312, |
|
"logps/rejected": -373.64788818359375, |
|
"loss": 0.2401, |
|
"rewards/accuracies": 0.940625011920929, |
|
"rewards/chosen": 1.2590898275375366, |
|
"rewards/margins": 1.872513771057129, |
|
"rewards/rejected": -0.6134239435195923, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.3431594860166287e-07, |
|
"logits/chosen": -2.763679027557373, |
|
"logits/rejected": -2.7585010528564453, |
|
"logps/chosen": -234.14706420898438, |
|
"logps/rejected": -332.43975830078125, |
|
"loss": 0.2506, |
|
"rewards/accuracies": 0.934374988079071, |
|
"rewards/chosen": 1.2429834604263306, |
|
"rewards/margins": 1.8476206064224243, |
|
"rewards/rejected": -0.6046372056007385, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.3053665910808768e-07, |
|
"logits/chosen": -2.7579002380371094, |
|
"logits/rejected": -2.7620043754577637, |
|
"logps/chosen": -236.3244171142578, |
|
"logps/rejected": -339.3128356933594, |
|
"loss": 0.2543, |
|
"rewards/accuracies": 0.936718761920929, |
|
"rewards/chosen": 1.218972086906433, |
|
"rewards/margins": 1.8291162252426147, |
|
"rewards/rejected": -0.6101440191268921, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.2675736961451246e-07, |
|
"logits/chosen": -2.7839019298553467, |
|
"logits/rejected": -2.7369167804718018, |
|
"logps/chosen": -219.27053833007812, |
|
"logps/rejected": -405.5704650878906, |
|
"loss": 0.2458, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": 1.2801100015640259, |
|
"rewards/margins": 1.8934139013290405, |
|
"rewards/rejected": -0.6133038997650146, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.2297808012093725e-07, |
|
"logits/chosen": -2.782578945159912, |
|
"logits/rejected": -2.7683374881744385, |
|
"logps/chosen": -245.6527099609375, |
|
"logps/rejected": -378.6884765625, |
|
"loss": 0.2384, |
|
"rewards/accuracies": 0.934374988079071, |
|
"rewards/chosen": 1.321287751197815, |
|
"rewards/margins": 1.9386436939239502, |
|
"rewards/rejected": -0.6173557043075562, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 2.1919879062736206e-07, |
|
"logits/chosen": -2.7775015830993652, |
|
"logits/rejected": -2.752042293548584, |
|
"logps/chosen": -229.3787078857422, |
|
"logps/rejected": -356.0593566894531, |
|
"loss": 0.2423, |
|
"rewards/accuracies": 0.934374988079071, |
|
"rewards/chosen": 1.2723052501678467, |
|
"rewards/margins": 1.9301214218139648, |
|
"rewards/rejected": -0.6578160524368286, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 2.1541950113378684e-07, |
|
"logits/chosen": -2.768510580062866, |
|
"logits/rejected": -2.7404208183288574, |
|
"logps/chosen": -265.3998107910156, |
|
"logps/rejected": -373.4928283691406, |
|
"loss": 0.2467, |
|
"rewards/accuracies": 0.9320312738418579, |
|
"rewards/chosen": 1.264615774154663, |
|
"rewards/margins": 1.920330286026001, |
|
"rewards/rejected": -0.6557145714759827, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 2.1164021164021162e-07, |
|
"logits/chosen": -2.7891170978546143, |
|
"logits/rejected": -2.7741641998291016, |
|
"logps/chosen": -220.24307250976562, |
|
"logps/rejected": -358.5487976074219, |
|
"loss": 0.2284, |
|
"rewards/accuracies": 0.94140625, |
|
"rewards/chosen": 1.304023027420044, |
|
"rewards/margins": 1.9829524755477905, |
|
"rewards/rejected": -0.6789294481277466, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 2.0786092214663643e-07, |
|
"logits/chosen": -2.7575926780700684, |
|
"logits/rejected": -2.7642369270324707, |
|
"logps/chosen": -234.12026977539062, |
|
"logps/rejected": -384.3020935058594, |
|
"loss": 0.2373, |
|
"rewards/accuracies": 0.94140625, |
|
"rewards/chosen": 1.2832618951797485, |
|
"rewards/margins": 1.9688091278076172, |
|
"rewards/rejected": -0.6855469942092896, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 2.0408163265306121e-07, |
|
"logits/chosen": -2.766233444213867, |
|
"logits/rejected": -2.7951343059539795, |
|
"logps/chosen": -244.18026733398438, |
|
"logps/rejected": -320.21771240234375, |
|
"loss": 0.2259, |
|
"rewards/accuracies": 0.9398437738418579, |
|
"rewards/chosen": 1.312534213066101, |
|
"rewards/margins": 2.0482983589172363, |
|
"rewards/rejected": -0.7357643246650696, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 2.0030234315948602e-07, |
|
"logits/chosen": -2.768256664276123, |
|
"logits/rejected": -2.7545723915100098, |
|
"logps/chosen": -248.816650390625, |
|
"logps/rejected": -401.00958251953125, |
|
"loss": 0.234, |
|
"rewards/accuracies": 0.9320312738418579, |
|
"rewards/chosen": 1.3067686557769775, |
|
"rewards/margins": 2.003986358642578, |
|
"rewards/rejected": -0.6972178816795349, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.965230536659108e-07, |
|
"logits/chosen": -2.7718937397003174, |
|
"logits/rejected": -2.7864131927490234, |
|
"logps/chosen": -245.76220703125, |
|
"logps/rejected": -350.4901428222656, |
|
"loss": 0.2342, |
|
"rewards/accuracies": 0.938281238079071, |
|
"rewards/chosen": 1.3374592065811157, |
|
"rewards/margins": 2.001889228820801, |
|
"rewards/rejected": -0.6644300222396851, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.927437641723356e-07, |
|
"logits/chosen": -2.7670133113861084, |
|
"logits/rejected": -2.76993465423584, |
|
"logps/chosen": -227.41748046875, |
|
"logps/rejected": -354.6375427246094, |
|
"loss": 0.2386, |
|
"rewards/accuracies": 0.92578125, |
|
"rewards/chosen": 1.3317902088165283, |
|
"rewards/margins": 1.9966375827789307, |
|
"rewards/rejected": -0.6648473739624023, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.889644746787604e-07, |
|
"logits/chosen": -2.7860965728759766, |
|
"logits/rejected": -2.776639699935913, |
|
"logps/chosen": -257.2185363769531, |
|
"logps/rejected": -302.48846435546875, |
|
"loss": 0.2278, |
|
"rewards/accuracies": 0.9390624761581421, |
|
"rewards/chosen": 1.352912187576294, |
|
"rewards/margins": 2.036379814147949, |
|
"rewards/rejected": -0.6834677457809448, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.8518518518518516e-07, |
|
"logits/chosen": -2.7641091346740723, |
|
"logits/rejected": -2.7789313793182373, |
|
"logps/chosen": -256.19476318359375, |
|
"logps/rejected": -390.69549560546875, |
|
"loss": 0.2325, |
|
"rewards/accuracies": 0.942187488079071, |
|
"rewards/chosen": 1.3011709451675415, |
|
"rewards/margins": 2.039425849914551, |
|
"rewards/rejected": -0.738254964351654, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.8140589569160996e-07, |
|
"logits/chosen": -2.795642614364624, |
|
"logits/rejected": -2.7746355533599854, |
|
"logps/chosen": -234.4689483642578, |
|
"logps/rejected": -395.40618896484375, |
|
"loss": 0.223, |
|
"rewards/accuracies": 0.9390624761581421, |
|
"rewards/chosen": 1.3416879177093506, |
|
"rewards/margins": 2.1014368534088135, |
|
"rewards/rejected": -0.7597488164901733, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.7762660619803477e-07, |
|
"logits/chosen": -2.7756259441375732, |
|
"logits/rejected": -2.741664409637451, |
|
"logps/chosen": -242.3101348876953, |
|
"logps/rejected": -363.46160888671875, |
|
"loss": 0.2285, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 1.3446866273880005, |
|
"rewards/margins": 2.070406436920166, |
|
"rewards/rejected": -0.7257199287414551, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.7384731670445953e-07, |
|
"logits/chosen": -2.7595419883728027, |
|
"logits/rejected": -2.7858798503875732, |
|
"logps/chosen": -259.9520568847656, |
|
"logps/rejected": -358.3509216308594, |
|
"loss": 0.2273, |
|
"rewards/accuracies": 0.9359375238418579, |
|
"rewards/chosen": 1.3033568859100342, |
|
"rewards/margins": 2.0887067317962646, |
|
"rewards/rejected": -0.78534996509552, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.7006802721088434e-07, |
|
"logits/chosen": -2.768449068069458, |
|
"logits/rejected": -2.7718656063079834, |
|
"logps/chosen": -238.11740112304688, |
|
"logps/rejected": -354.0820007324219, |
|
"loss": 0.236, |
|
"rewards/accuracies": 0.9351562261581421, |
|
"rewards/chosen": 1.3048899173736572, |
|
"rewards/margins": 2.033565044403076, |
|
"rewards/rejected": -0.7286752462387085, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.6628873771730915e-07, |
|
"logits/chosen": -2.7650275230407715, |
|
"logits/rejected": -2.7476916313171387, |
|
"logps/chosen": -245.41885375976562, |
|
"logps/rejected": -343.54437255859375, |
|
"loss": 0.2357, |
|
"rewards/accuracies": 0.928906261920929, |
|
"rewards/chosen": 1.316489815711975, |
|
"rewards/margins": 2.062798023223877, |
|
"rewards/rejected": -0.7463082671165466, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.6250944822373393e-07, |
|
"logits/chosen": -2.7734358310699463, |
|
"logits/rejected": -2.7748751640319824, |
|
"logps/chosen": -237.48538208007812, |
|
"logps/rejected": -389.1809997558594, |
|
"loss": 0.2308, |
|
"rewards/accuracies": 0.930468738079071, |
|
"rewards/chosen": 1.2910696268081665, |
|
"rewards/margins": 2.079051971435547, |
|
"rewards/rejected": -0.7879821062088013, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.5873015873015872e-07, |
|
"logits/chosen": -2.7670979499816895, |
|
"logits/rejected": -2.769535779953003, |
|
"logps/chosen": -214.1968536376953, |
|
"logps/rejected": -331.4734802246094, |
|
"loss": 0.224, |
|
"rewards/accuracies": 0.9390624761581421, |
|
"rewards/chosen": 1.377071738243103, |
|
"rewards/margins": 2.1104674339294434, |
|
"rewards/rejected": -0.7333956956863403, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.5495086923658353e-07, |
|
"logits/chosen": -2.789698839187622, |
|
"logits/rejected": -2.7418367862701416, |
|
"logps/chosen": -233.3469696044922, |
|
"logps/rejected": -372.7334289550781, |
|
"loss": 0.2046, |
|
"rewards/accuracies": 0.948437511920929, |
|
"rewards/chosen": 1.4205210208892822, |
|
"rewards/margins": 2.2147347927093506, |
|
"rewards/rejected": -0.7942138910293579, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.511715797430083e-07, |
|
"logits/chosen": -2.7732410430908203, |
|
"logits/rejected": -2.7837493419647217, |
|
"logps/chosen": -240.2108917236328, |
|
"logps/rejected": -340.86712646484375, |
|
"loss": 0.2229, |
|
"rewards/accuracies": 0.932812511920929, |
|
"rewards/chosen": 1.365039348602295, |
|
"rewards/margins": 2.149728298187256, |
|
"rewards/rejected": -0.7846890091896057, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.473922902494331e-07, |
|
"logits/chosen": -2.762357711791992, |
|
"logits/rejected": -2.7503538131713867, |
|
"logps/chosen": -245.73129272460938, |
|
"logps/rejected": -367.2342529296875, |
|
"loss": 0.2247, |
|
"rewards/accuracies": 0.934374988079071, |
|
"rewards/chosen": 1.3420137166976929, |
|
"rewards/margins": 2.1435036659240723, |
|
"rewards/rejected": -0.8014899492263794, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.436130007558579e-07, |
|
"logits/chosen": -2.786447048187256, |
|
"logits/rejected": -2.7433903217315674, |
|
"logps/chosen": -259.77923583984375, |
|
"logps/rejected": -384.2717590332031, |
|
"loss": 0.2176, |
|
"rewards/accuracies": 0.938281238079071, |
|
"rewards/chosen": 1.4291341304779053, |
|
"rewards/margins": 2.1485352516174316, |
|
"rewards/rejected": -0.7194010019302368, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.3983371126228268e-07, |
|
"logits/chosen": -2.766045570373535, |
|
"logits/rejected": -2.783592700958252, |
|
"logps/chosen": -258.433349609375, |
|
"logps/rejected": -356.44293212890625, |
|
"loss": 0.2166, |
|
"rewards/accuracies": 0.9359375238418579, |
|
"rewards/chosen": 1.3983967304229736, |
|
"rewards/margins": 2.2020390033721924, |
|
"rewards/rejected": -0.8036419153213501, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.360544217687075e-07, |
|
"logits/chosen": -2.784245491027832, |
|
"logits/rejected": -2.7566187381744385, |
|
"logps/chosen": -251.7339324951172, |
|
"logps/rejected": -356.1120300292969, |
|
"loss": 0.2042, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 1.394803762435913, |
|
"rewards/margins": 2.219846487045288, |
|
"rewards/rejected": -0.8250430822372437, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.3227513227513225e-07, |
|
"logits/chosen": -2.768209218978882, |
|
"logits/rejected": -2.7927510738372803, |
|
"logps/chosen": -250.1661376953125, |
|
"logps/rejected": -341.25396728515625, |
|
"loss": 0.2216, |
|
"rewards/accuracies": 0.936718761920929, |
|
"rewards/chosen": 1.3784762620925903, |
|
"rewards/margins": 2.1875884532928467, |
|
"rewards/rejected": -0.8091121912002563, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.2849584278155706e-07, |
|
"logits/chosen": -2.755992889404297, |
|
"logits/rejected": -2.7883083820343018, |
|
"logps/chosen": -258.3106384277344, |
|
"logps/rejected": -338.23822021484375, |
|
"loss": 0.2233, |
|
"rewards/accuracies": 0.9359375238418579, |
|
"rewards/chosen": 1.371977686882019, |
|
"rewards/margins": 2.1515755653381348, |
|
"rewards/rejected": -0.7795979380607605, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.2471655328798184e-07, |
|
"logits/chosen": -2.765443801879883, |
|
"logits/rejected": -2.773919105529785, |
|
"logps/chosen": -228.51766967773438, |
|
"logps/rejected": -353.0353698730469, |
|
"loss": 0.2184, |
|
"rewards/accuracies": 0.9359375238418579, |
|
"rewards/chosen": 1.3785618543624878, |
|
"rewards/margins": 2.173300266265869, |
|
"rewards/rejected": -0.7947384119033813, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.2093726379440665e-07, |
|
"logits/chosen": -2.7806408405303955, |
|
"logits/rejected": -2.756528854370117, |
|
"logps/chosen": -227.71621704101562, |
|
"logps/rejected": -391.3194580078125, |
|
"loss": 0.2113, |
|
"rewards/accuracies": 0.9476562738418579, |
|
"rewards/chosen": 1.3853504657745361, |
|
"rewards/margins": 2.21071195602417, |
|
"rewards/rejected": -0.8253618478775024, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.1715797430083144e-07, |
|
"logits/chosen": -2.7610268592834473, |
|
"logits/rejected": -2.7615675926208496, |
|
"logps/chosen": -269.79010009765625, |
|
"logps/rejected": -378.21209716796875, |
|
"loss": 0.2102, |
|
"rewards/accuracies": 0.9398437738418579, |
|
"rewards/chosen": 1.3920191526412964, |
|
"rewards/margins": 2.2798304557800293, |
|
"rewards/rejected": -0.887811541557312, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.1337868480725623e-07, |
|
"logits/chosen": -2.776198625564575, |
|
"logits/rejected": -2.768550395965576, |
|
"logps/chosen": -246.81887817382812, |
|
"logps/rejected": -365.49249267578125, |
|
"loss": 0.2134, |
|
"rewards/accuracies": 0.93359375, |
|
"rewards/chosen": 1.4072265625, |
|
"rewards/margins": 2.2190985679626465, |
|
"rewards/rejected": -0.8118720054626465, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.0959939531368103e-07, |
|
"logits/chosen": -2.782680034637451, |
|
"logits/rejected": -2.7389519214630127, |
|
"logps/chosen": -239.7198944091797, |
|
"logps/rejected": -363.89215087890625, |
|
"loss": 0.2147, |
|
"rewards/accuracies": 0.942187488079071, |
|
"rewards/chosen": 1.3731368780136108, |
|
"rewards/margins": 2.227461814880371, |
|
"rewards/rejected": -0.8543251156806946, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.0582010582010581e-07, |
|
"logits/chosen": -2.7652835845947266, |
|
"logits/rejected": -2.74135160446167, |
|
"logps/chosen": -223.03579711914062, |
|
"logps/rejected": -407.8848571777344, |
|
"loss": 0.2255, |
|
"rewards/accuracies": 0.930468738079071, |
|
"rewards/chosen": 1.3337465524673462, |
|
"rewards/margins": 2.1734132766723633, |
|
"rewards/rejected": -0.8396667242050171, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.0204081632653061e-07, |
|
"logits/chosen": -2.785404920578003, |
|
"logits/rejected": -2.768556594848633, |
|
"logps/chosen": -217.6273651123047, |
|
"logps/rejected": -390.0624694824219, |
|
"loss": 0.2004, |
|
"rewards/accuracies": 0.9515625238418579, |
|
"rewards/chosen": 1.452343225479126, |
|
"rewards/margins": 2.3132705688476562, |
|
"rewards/rejected": -0.8609271049499512, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 9.82615268329554e-08, |
|
"logits/chosen": -2.7641220092773438, |
|
"logits/rejected": -2.7403550148010254, |
|
"logps/chosen": -254.1678466796875, |
|
"logps/rejected": -381.2986145019531, |
|
"loss": 0.2165, |
|
"rewards/accuracies": 0.938281238079071, |
|
"rewards/chosen": 1.3716920614242554, |
|
"rewards/margins": 2.208040952682495, |
|
"rewards/rejected": -0.8363490104675293, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 9.44822373393802e-08, |
|
"logits/chosen": -2.7834503650665283, |
|
"logits/rejected": -2.7499313354492188, |
|
"logps/chosen": -220.555908203125, |
|
"logps/rejected": -353.34454345703125, |
|
"loss": 0.2073, |
|
"rewards/accuracies": 0.936718761920929, |
|
"rewards/chosen": 1.4545724391937256, |
|
"rewards/margins": 2.2807674407958984, |
|
"rewards/rejected": -0.826195240020752, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 9.070294784580498e-08, |
|
"logits/chosen": -2.7742843627929688, |
|
"logits/rejected": -2.7704269886016846, |
|
"logps/chosen": -240.16586303710938, |
|
"logps/rejected": -341.08270263671875, |
|
"loss": 0.2097, |
|
"rewards/accuracies": 0.9398437738418579, |
|
"rewards/chosen": 1.3970229625701904, |
|
"rewards/margins": 2.205933094024658, |
|
"rewards/rejected": -0.8089098930358887, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 8.692365835222977e-08, |
|
"logits/chosen": -2.7731618881225586, |
|
"logits/rejected": -2.7807064056396484, |
|
"logps/chosen": -246.8760223388672, |
|
"logps/rejected": -394.09661865234375, |
|
"loss": 0.1942, |
|
"rewards/accuracies": 0.94921875, |
|
"rewards/chosen": 1.4174280166625977, |
|
"rewards/margins": 2.335336446762085, |
|
"rewards/rejected": -0.9179089665412903, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 8.314436885865457e-08, |
|
"logits/chosen": -2.7794883251190186, |
|
"logits/rejected": -2.7599997520446777, |
|
"logps/chosen": -234.8397979736328, |
|
"logps/rejected": -354.03411865234375, |
|
"loss": 0.2101, |
|
"rewards/accuracies": 0.9359375238418579, |
|
"rewards/chosen": 1.3885688781738281, |
|
"rewards/margins": 2.2632603645324707, |
|
"rewards/rejected": -0.8746916651725769, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 7.936507936507936e-08, |
|
"logits/chosen": -2.7606375217437744, |
|
"logits/rejected": -2.7535159587860107, |
|
"logps/chosen": -225.62606811523438, |
|
"logps/rejected": -382.0788269042969, |
|
"loss": 0.2247, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 1.3528351783752441, |
|
"rewards/margins": 2.18499755859375, |
|
"rewards/rejected": -0.8321624994277954, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 7.558578987150415e-08, |
|
"logits/chosen": -2.7874550819396973, |
|
"logits/rejected": -2.7440848350524902, |
|
"logps/chosen": -216.8153533935547, |
|
"logps/rejected": -372.12982177734375, |
|
"loss": 0.2204, |
|
"rewards/accuracies": 0.946093738079071, |
|
"rewards/chosen": 1.3856043815612793, |
|
"rewards/margins": 2.1913902759552, |
|
"rewards/rejected": -0.8057858347892761, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 7.180650037792895e-08, |
|
"logits/chosen": -2.7706284523010254, |
|
"logits/rejected": -2.7321717739105225, |
|
"logps/chosen": -249.1674041748047, |
|
"logps/rejected": -390.70855712890625, |
|
"loss": 0.2218, |
|
"rewards/accuracies": 0.932812511920929, |
|
"rewards/chosen": 1.3266818523406982, |
|
"rewards/margins": 2.204909086227417, |
|
"rewards/rejected": -0.8782272338867188, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 6.802721088435375e-08, |
|
"logits/chosen": -2.771331310272217, |
|
"logits/rejected": -2.7345921993255615, |
|
"logps/chosen": -244.32217407226562, |
|
"logps/rejected": -395.6925964355469, |
|
"loss": 0.2148, |
|
"rewards/accuracies": 0.93359375, |
|
"rewards/chosen": 1.379319190979004, |
|
"rewards/margins": 2.2045130729675293, |
|
"rewards/rejected": -0.8251941800117493, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 6.424792139077853e-08, |
|
"logits/chosen": -2.781578779220581, |
|
"logits/rejected": -2.7588868141174316, |
|
"logps/chosen": -234.79800415039062, |
|
"logps/rejected": -359.72332763671875, |
|
"loss": 0.2047, |
|
"rewards/accuracies": 0.9476562738418579, |
|
"rewards/chosen": 1.4176688194274902, |
|
"rewards/margins": 2.3017234802246094, |
|
"rewards/rejected": -0.8840547800064087, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 6.046863189720333e-08, |
|
"logits/chosen": -2.770113706588745, |
|
"logits/rejected": -2.73785662651062, |
|
"logps/chosen": -244.0814971923828, |
|
"logps/rejected": -370.0007019042969, |
|
"loss": 0.2086, |
|
"rewards/accuracies": 0.9351562261581421, |
|
"rewards/chosen": 1.4492876529693604, |
|
"rewards/margins": 2.2907984256744385, |
|
"rewards/rejected": -0.8415109515190125, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 5.6689342403628116e-08, |
|
"logits/chosen": -2.753613233566284, |
|
"logits/rejected": -2.7601518630981445, |
|
"logps/chosen": -250.3900604248047, |
|
"logps/rejected": -360.0616455078125, |
|
"loss": 0.2099, |
|
"rewards/accuracies": 0.9359375238418579, |
|
"rewards/chosen": 1.4489208459854126, |
|
"rewards/margins": 2.3246617317199707, |
|
"rewards/rejected": -0.8757408261299133, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 5.2910052910052905e-08, |
|
"logits/chosen": -2.7654261589050293, |
|
"logits/rejected": -2.7347397804260254, |
|
"logps/chosen": -232.9058837890625, |
|
"logps/rejected": -352.3494567871094, |
|
"loss": 0.2101, |
|
"rewards/accuracies": 0.9398437738418579, |
|
"rewards/chosen": 1.4914627075195312, |
|
"rewards/margins": 2.3455305099487305, |
|
"rewards/rejected": -0.8540679216384888, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.91307634164777e-08, |
|
"logits/chosen": -2.7729830741882324, |
|
"logits/rejected": -2.7506096363067627, |
|
"logps/chosen": -237.5419158935547, |
|
"logps/rejected": -361.7286071777344, |
|
"loss": 0.2271, |
|
"rewards/accuracies": 0.936718761920929, |
|
"rewards/chosen": 1.3387925624847412, |
|
"rewards/margins": 2.1680846214294434, |
|
"rewards/rejected": -0.8292919397354126, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.535147392290249e-08, |
|
"logits/chosen": -2.758366584777832, |
|
"logits/rejected": -2.747448444366455, |
|
"logps/chosen": -262.02313232421875, |
|
"logps/rejected": -371.6409912109375, |
|
"loss": 0.2117, |
|
"rewards/accuracies": 0.93359375, |
|
"rewards/chosen": 1.4255142211914062, |
|
"rewards/margins": 2.2661709785461426, |
|
"rewards/rejected": -0.8406568765640259, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.157218442932729e-08, |
|
"logits/chosen": -2.7460246086120605, |
|
"logits/rejected": -2.7499794960021973, |
|
"logps/chosen": -242.7806396484375, |
|
"logps/rejected": -374.79736328125, |
|
"loss": 0.2305, |
|
"rewards/accuracies": 0.9242187738418579, |
|
"rewards/chosen": 1.3290668725967407, |
|
"rewards/margins": 2.187917470932007, |
|
"rewards/rejected": -0.8588504791259766, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.779289493575208e-08, |
|
"logits/chosen": -2.7681326866149902, |
|
"logits/rejected": -2.7562155723571777, |
|
"logps/chosen": -220.0043487548828, |
|
"logps/rejected": -369.31268310546875, |
|
"loss": 0.2015, |
|
"rewards/accuracies": 0.944531261920929, |
|
"rewards/chosen": 1.4147917032241821, |
|
"rewards/margins": 2.347784996032715, |
|
"rewards/rejected": -0.9329932332038879, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.4013605442176873e-08, |
|
"logits/chosen": -2.7685980796813965, |
|
"logits/rejected": -2.761018753051758, |
|
"logps/chosen": -244.3848114013672, |
|
"logps/rejected": -352.2154235839844, |
|
"loss": 0.2147, |
|
"rewards/accuracies": 0.9398437738418579, |
|
"rewards/chosen": 1.3917274475097656, |
|
"rewards/margins": 2.2305819988250732, |
|
"rewards/rejected": -0.8388546109199524, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.023431594860166e-08, |
|
"logits/chosen": -2.7724173069000244, |
|
"logits/rejected": -2.773851156234741, |
|
"logps/chosen": -251.663330078125, |
|
"logps/rejected": -341.803466796875, |
|
"loss": 0.1992, |
|
"rewards/accuracies": 0.94921875, |
|
"rewards/chosen": 1.4376652240753174, |
|
"rewards/margins": 2.324432134628296, |
|
"rewards/rejected": -0.886766791343689, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.6455026455026453e-08, |
|
"logits/chosen": -2.758798122406006, |
|
"logits/rejected": -2.763350009918213, |
|
"logps/chosen": -238.17745971679688, |
|
"logps/rejected": -398.58135986328125, |
|
"loss": 0.2004, |
|
"rewards/accuracies": 0.9476562738418579, |
|
"rewards/chosen": 1.4174001216888428, |
|
"rewards/margins": 2.3445682525634766, |
|
"rewards/rejected": -0.9271681904792786, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.2675736961451246e-08, |
|
"logits/chosen": -2.7801098823547363, |
|
"logits/rejected": -2.7490382194519043, |
|
"logps/chosen": -242.81613159179688, |
|
"logps/rejected": -361.264892578125, |
|
"loss": 0.2077, |
|
"rewards/accuracies": 0.94140625, |
|
"rewards/chosen": 1.4166629314422607, |
|
"rewards/margins": 2.316483736038208, |
|
"rewards/rejected": -0.8998208045959473, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.889644746787604e-08, |
|
"logits/chosen": -2.75722336769104, |
|
"logits/rejected": -2.7228329181671143, |
|
"logps/chosen": -251.5331268310547, |
|
"logps/rejected": -375.8110046386719, |
|
"loss": 0.2226, |
|
"rewards/accuracies": 0.940625011920929, |
|
"rewards/chosen": 1.4032243490219116, |
|
"rewards/margins": 2.2185873985290527, |
|
"rewards/rejected": -0.8153629302978516, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.511715797430083e-08, |
|
"logits/chosen": -2.7710134983062744, |
|
"logits/rejected": -2.787081241607666, |
|
"logps/chosen": -241.9620361328125, |
|
"logps/rejected": -356.4383544921875, |
|
"loss": 0.2074, |
|
"rewards/accuracies": 0.938281238079071, |
|
"rewards/chosen": 1.4058793783187866, |
|
"rewards/margins": 2.3387274742126465, |
|
"rewards/rejected": -0.9328481554985046, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.1337868480725623e-08, |
|
"logits/chosen": -2.788255214691162, |
|
"logits/rejected": -2.790001392364502, |
|
"logps/chosen": -249.0662078857422, |
|
"logps/rejected": -375.603759765625, |
|
"loss": 0.1976, |
|
"rewards/accuracies": 0.9429687261581421, |
|
"rewards/chosen": 1.4689807891845703, |
|
"rewards/margins": 2.375899076461792, |
|
"rewards/rejected": -0.9069182276725769, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 7.558578987150416e-09, |
|
"logits/chosen": -2.762585401535034, |
|
"logits/rejected": -2.7085330486297607, |
|
"logps/chosen": -238.41751098632812, |
|
"logps/rejected": -380.84942626953125, |
|
"loss": 0.2228, |
|
"rewards/accuracies": 0.936718761920929, |
|
"rewards/chosen": 1.4105838537216187, |
|
"rewards/margins": 2.214503288269043, |
|
"rewards/rejected": -0.8039194345474243, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.779289493575208e-09, |
|
"logits/chosen": -2.7654013633728027, |
|
"logits/rejected": -2.7555670738220215, |
|
"logps/chosen": -237.80899047851562, |
|
"logps/rejected": -345.7412109375, |
|
"loss": 0.2026, |
|
"rewards/accuracies": 0.9515625238418579, |
|
"rewards/chosen": 1.418304443359375, |
|
"rewards/margins": 2.327260971069336, |
|
"rewards/rejected": -0.9089563488960266, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.0, |
|
"logits/chosen": -2.75099515914917, |
|
"logits/rejected": -2.7724432945251465, |
|
"logps/chosen": -255.767578125, |
|
"logps/rejected": -360.5829772949219, |
|
"loss": 0.2019, |
|
"rewards/accuracies": 0.946093738079071, |
|
"rewards/chosen": 1.4355896711349487, |
|
"rewards/margins": 2.3439955711364746, |
|
"rewards/rejected": -0.9084057807922363, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_logits/chosen": -2.617767333984375, |
|
"eval_logits/rejected": -2.721874952316284, |
|
"eval_logps/chosen": -238.54788208007812, |
|
"eval_logps/rejected": -388.59033203125, |
|
"eval_loss": 0.20815864205360413, |
|
"eval_rewards/accuracies": 0.9413930773735046, |
|
"eval_rewards/chosen": 1.3856867551803589, |
|
"eval_rewards/margins": 2.292266845703125, |
|
"eval_rewards/rejected": -0.9065799117088318, |
|
"eval_runtime": 2798.4996, |
|
"eval_samples_per_second": 3.395, |
|
"eval_steps_per_second": 0.425, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 1470, |
|
"total_flos": 0.0, |
|
"train_loss": 0.33413780781687524, |
|
"train_runtime": 91396.7242, |
|
"train_samples_per_second": 2.06, |
|
"train_steps_per_second": 0.016 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1470, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"total_flos": 0.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|