|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9990186457311089, |
|
"eval_steps": 100, |
|
"global_step": 509, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 9.803921568627451e-09, |
|
"logits/chosen": -2.7483465671539307, |
|
"logits/rejected": -2.739339828491211, |
|
"logps/chosen": -287.5325927734375, |
|
"logps/rejected": -235.635986328125, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 9.80392156862745e-08, |
|
"logits/chosen": -2.709578037261963, |
|
"logits/rejected": -2.7113540172576904, |
|
"logps/chosen": -260.56292724609375, |
|
"logps/rejected": -256.438232421875, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.4194444417953491, |
|
"rewards/chosen": 0.00014394157915376127, |
|
"rewards/margins": 1.0432106591906631e-06, |
|
"rewards/rejected": 0.00014289839600678533, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.96078431372549e-07, |
|
"logits/chosen": -2.728665828704834, |
|
"logits/rejected": -2.7061820030212402, |
|
"logps/chosen": -280.0662536621094, |
|
"logps/rejected": -254.76626586914062, |
|
"loss": 0.6926, |
|
"rewards/accuracies": 0.5724999904632568, |
|
"rewards/chosen": -4.974007424607407e-06, |
|
"rewards/margins": 0.0005589541979134083, |
|
"rewards/rejected": -0.0005639282753691077, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.941176470588235e-07, |
|
"logits/chosen": -2.7290821075439453, |
|
"logits/rejected": -2.742999315261841, |
|
"logps/chosen": -279.2391357421875, |
|
"logps/rejected": -253.37265014648438, |
|
"loss": 0.6895, |
|
"rewards/accuracies": 0.6349999904632568, |
|
"rewards/chosen": 0.0049138437025249004, |
|
"rewards/margins": 0.007674422115087509, |
|
"rewards/rejected": -0.002760578179731965, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.92156862745098e-07, |
|
"logits/chosen": -2.7134017944335938, |
|
"logits/rejected": -2.698641777038574, |
|
"logps/chosen": -274.20147705078125, |
|
"logps/rejected": -255.8253936767578, |
|
"loss": 0.6782, |
|
"rewards/accuracies": 0.6924999952316284, |
|
"rewards/chosen": 0.0260241087526083, |
|
"rewards/margins": 0.026919733732938766, |
|
"rewards/rejected": -0.0008956241654232144, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.901960784313725e-07, |
|
"logits/chosen": -2.6435346603393555, |
|
"logits/rejected": -2.6110424995422363, |
|
"logps/chosen": -302.06768798828125, |
|
"logps/rejected": -261.10919189453125, |
|
"loss": 0.6612, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.023571131750941277, |
|
"rewards/margins": 0.07649616152048111, |
|
"rewards/rejected": -0.05292503535747528, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.995237599803335e-07, |
|
"logits/chosen": -2.6205055713653564, |
|
"logits/rejected": -2.5843255519866943, |
|
"logps/chosen": -300.914306640625, |
|
"logps/rejected": -286.0216064453125, |
|
"loss": 0.6451, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.05583832785487175, |
|
"rewards/margins": 0.11994686722755432, |
|
"rewards/rejected": -0.17578519880771637, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.978798275112142e-07, |
|
"logits/chosen": -2.607668161392212, |
|
"logits/rejected": -2.568187952041626, |
|
"logps/chosen": -308.4685974121094, |
|
"logps/rejected": -305.6259460449219, |
|
"loss": 0.6212, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.1777888685464859, |
|
"rewards/margins": 0.19118839502334595, |
|
"rewards/rejected": -0.3689771890640259, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.950700530747689e-07, |
|
"logits/chosen": -2.6067116260528564, |
|
"logits/rejected": -2.5767879486083984, |
|
"logps/chosen": -300.19488525390625, |
|
"logps/rejected": -295.8065185546875, |
|
"loss": 0.6196, |
|
"rewards/accuracies": 0.6850000023841858, |
|
"rewards/chosen": -0.13195012509822845, |
|
"rewards/margins": 0.25833892822265625, |
|
"rewards/rejected": -0.3902890384197235, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.911076517558622e-07, |
|
"logits/chosen": -2.5809831619262695, |
|
"logits/rejected": -2.555103302001953, |
|
"logps/chosen": -325.28692626953125, |
|
"logps/rejected": -330.8323974609375, |
|
"loss": 0.5844, |
|
"rewards/accuracies": 0.7300000190734863, |
|
"rewards/chosen": -0.21861158311367035, |
|
"rewards/margins": 0.3220059275627136, |
|
"rewards/rejected": -0.5406175851821899, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.860112597371772e-07, |
|
"logits/chosen": -2.5413742065429688, |
|
"logits/rejected": -2.5363407135009766, |
|
"logps/chosen": -295.8542175292969, |
|
"logps/rejected": -310.6338195800781, |
|
"loss": 0.5764, |
|
"rewards/accuracies": 0.6675000190734863, |
|
"rewards/chosen": -0.26630619168281555, |
|
"rewards/margins": 0.3358945846557617, |
|
"rewards/rejected": -0.6022006869316101, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_logits/chosen": -2.4791219234466553, |
|
"eval_logits/rejected": -2.4360005855560303, |
|
"eval_logps/chosen": -313.6502990722656, |
|
"eval_logps/rejected": -340.86053466796875, |
|
"eval_loss": 0.5828901529312134, |
|
"eval_rewards/accuracies": 0.6931137442588806, |
|
"eval_rewards/chosen": -0.3592246174812317, |
|
"eval_rewards/margins": 0.40203189849853516, |
|
"eval_rewards/rejected": -0.7612565159797668, |
|
"eval_runtime": 494.2516, |
|
"eval_samples_per_second": 4.047, |
|
"eval_steps_per_second": 0.338, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.798048466485017e-07, |
|
"logits/chosen": -2.0916123390197754, |
|
"logits/rejected": -2.1291110515594482, |
|
"logps/chosen": -337.0193786621094, |
|
"logps/rejected": -372.4815368652344, |
|
"loss": 0.5665, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.6119796633720398, |
|
"rewards/margins": 0.5584384799003601, |
|
"rewards/rejected": -1.1704181432724, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.725176028314541e-07, |
|
"logits/chosen": -1.8370585441589355, |
|
"logits/rejected": -1.7712280750274658, |
|
"logps/chosen": -370.1864318847656, |
|
"logps/rejected": -398.8289794921875, |
|
"loss": 0.56, |
|
"rewards/accuracies": 0.7350000143051147, |
|
"rewards/chosen": -0.8116917610168457, |
|
"rewards/margins": 0.6380540728569031, |
|
"rewards/rejected": -1.449745774269104, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.641838020498713e-07, |
|
"logits/chosen": -1.7485500574111938, |
|
"logits/rejected": -1.5671393871307373, |
|
"logps/chosen": -380.29913330078125, |
|
"logps/rejected": -424.1035461425781, |
|
"loss": 0.5461, |
|
"rewards/accuracies": 0.7200000286102295, |
|
"rewards/chosen": -0.8717474937438965, |
|
"rewards/margins": 0.6444628834724426, |
|
"rewards/rejected": -1.5162103176116943, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.5484264029156733e-07, |
|
"logits/chosen": -1.9667887687683105, |
|
"logits/rejected": -1.6983026266098022, |
|
"logps/chosen": -322.9972839355469, |
|
"logps/rejected": -379.5963134765625, |
|
"loss": 0.5416, |
|
"rewards/accuracies": 0.7149999737739563, |
|
"rewards/chosen": -0.6348860263824463, |
|
"rewards/margins": 0.6040786504745483, |
|
"rewards/rejected": -1.2389646768569946, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.445380514196192e-07, |
|
"logits/chosen": -1.2058897018432617, |
|
"logits/rejected": -0.9969528317451477, |
|
"logps/chosen": -379.3441467285156, |
|
"logps/rejected": -449.9009704589844, |
|
"loss": 0.5485, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.9173200726509094, |
|
"rewards/margins": 0.7758927941322327, |
|
"rewards/rejected": -1.6932127475738525, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.33318500540218e-07, |
|
"logits/chosen": -1.7521625757217407, |
|
"logits/rejected": -1.4877443313598633, |
|
"logps/chosen": -356.1580810546875, |
|
"logps/rejected": -389.0058288574219, |
|
"loss": 0.5183, |
|
"rewards/accuracies": 0.7850000262260437, |
|
"rewards/chosen": -0.6841800212860107, |
|
"rewards/margins": 0.7851129174232483, |
|
"rewards/rejected": -1.4692928791046143, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.2123675605892985e-07, |
|
"logits/chosen": -1.6861900091171265, |
|
"logits/rejected": -1.4684306383132935, |
|
"logps/chosen": -379.7774658203125, |
|
"logps/rejected": -437.3900451660156, |
|
"loss": 0.5146, |
|
"rewards/accuracies": 0.7300000190734863, |
|
"rewards/chosen": -0.8159699440002441, |
|
"rewards/margins": 0.7220683097839355, |
|
"rewards/rejected": -1.5380383729934692, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.0834964149744333e-07, |
|
"logits/chosen": -1.3343206644058228, |
|
"logits/rejected": -1.0179518461227417, |
|
"logps/chosen": -358.3331298828125, |
|
"logps/rejected": -399.9204406738281, |
|
"loss": 0.5536, |
|
"rewards/accuracies": 0.7074999809265137, |
|
"rewards/chosen": -0.8257815837860107, |
|
"rewards/margins": 0.7000215649604797, |
|
"rewards/rejected": -1.5258032083511353, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.947177682380738e-07, |
|
"logits/chosen": -1.2010215520858765, |
|
"logits/rejected": -0.8926857709884644, |
|
"logps/chosen": -375.1010437011719, |
|
"logps/rejected": -433.2417297363281, |
|
"loss": 0.5309, |
|
"rewards/accuracies": 0.7425000071525574, |
|
"rewards/chosen": -0.7876387238502502, |
|
"rewards/margins": 0.7681831121444702, |
|
"rewards/rejected": -1.5558221340179443, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.804052504529933e-07, |
|
"logits/chosen": -1.1186742782592773, |
|
"logits/rejected": -0.7032889723777771, |
|
"logps/chosen": -351.2778625488281, |
|
"logps/rejected": -416.71820068359375, |
|
"loss": 0.5169, |
|
"rewards/accuracies": 0.7475000023841858, |
|
"rewards/chosen": -0.7259469032287598, |
|
"rewards/margins": 0.874809741973877, |
|
"rewards/rejected": -1.6007568836212158, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_logits/chosen": -1.201006293296814, |
|
"eval_logits/rejected": -0.8443379402160645, |
|
"eval_logps/chosen": -366.2012023925781, |
|
"eval_logps/rejected": -426.77203369140625, |
|
"eval_loss": 0.531209409236908, |
|
"eval_rewards/accuracies": 0.7065868377685547, |
|
"eval_rewards/chosen": -0.8847335577011108, |
|
"eval_rewards/margins": 0.7356376647949219, |
|
"eval_rewards/rejected": -1.6203712224960327, |
|
"eval_runtime": 494.1792, |
|
"eval_samples_per_second": 4.047, |
|
"eval_steps_per_second": 0.338, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.654794035589483e-07, |
|
"logits/chosen": -0.9955520629882812, |
|
"logits/rejected": -0.5436328649520874, |
|
"logps/chosen": -402.7477722167969, |
|
"logps/rejected": -444.9473876953125, |
|
"loss": 0.5126, |
|
"rewards/accuracies": 0.7225000262260437, |
|
"rewards/chosen": -1.0243951082229614, |
|
"rewards/margins": 0.7689486742019653, |
|
"rewards/rejected": -1.7933436632156372, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.5001042761570826e-07, |
|
"logits/chosen": -0.7878814935684204, |
|
"logits/rejected": -0.33438754081726074, |
|
"logps/chosen": -379.41448974609375, |
|
"logps/rejected": -452.28009033203125, |
|
"loss": 0.5159, |
|
"rewards/accuracies": 0.7475000023841858, |
|
"rewards/chosen": -1.0701900720596313, |
|
"rewards/margins": 0.8491780161857605, |
|
"rewards/rejected": -1.919368028640747, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.34071077157304e-07, |
|
"logits/chosen": -0.6851831078529358, |
|
"logits/rejected": -0.29147180914878845, |
|
"logps/chosen": -360.47869873046875, |
|
"logps/rejected": -406.3958740234375, |
|
"loss": 0.5399, |
|
"rewards/accuracies": 0.7149999737739563, |
|
"rewards/chosen": -0.9100778698921204, |
|
"rewards/margins": 0.7056692242622375, |
|
"rewards/rejected": -1.6157469749450684, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.1773631900892204e-07, |
|
"logits/chosen": -0.6293848752975464, |
|
"logits/rejected": -0.2972988784313202, |
|
"logps/chosen": -364.2557067871094, |
|
"logps/rejected": -426.8414306640625, |
|
"loss": 0.5184, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.945137083530426, |
|
"rewards/margins": 0.7834777235984802, |
|
"rewards/rejected": -1.7286149263381958, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3.0108297969883103e-07, |
|
"logits/chosen": -0.6830095052719116, |
|
"logits/rejected": -0.20727473497390747, |
|
"logps/chosen": -377.15960693359375, |
|
"logps/rejected": -440.8514709472656, |
|
"loss": 0.5199, |
|
"rewards/accuracies": 0.7475000023841858, |
|
"rewards/chosen": -0.9253360033035278, |
|
"rewards/margins": 0.7137148380279541, |
|
"rewards/rejected": -1.6390507221221924, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.8418938412365013e-07, |
|
"logits/chosen": -0.595008909702301, |
|
"logits/rejected": -0.22117982804775238, |
|
"logps/chosen": -378.3102722167969, |
|
"logps/rejected": -421.2056884765625, |
|
"loss": 0.5259, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.0280470848083496, |
|
"rewards/margins": 0.6548060774803162, |
|
"rewards/rejected": -1.682853102684021, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.671349871664101e-07, |
|
"logits/chosen": -0.4738517105579376, |
|
"logits/rejected": -0.06301561743021011, |
|
"logps/chosen": -391.0889892578125, |
|
"logps/rejected": -433.60174560546875, |
|
"loss": 0.4996, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.925932765007019, |
|
"rewards/margins": 0.8979344367980957, |
|
"rewards/rejected": -1.8238672018051147, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.5e-07, |
|
"logits/chosen": -0.29330724477767944, |
|
"logits/rejected": 0.11182761192321777, |
|
"logps/chosen": -400.1533203125, |
|
"logps/rejected": -453.4571228027344, |
|
"loss": 0.5108, |
|
"rewards/accuracies": 0.7174999713897705, |
|
"rewards/chosen": -1.1598564386367798, |
|
"rewards/margins": 0.7635893821716309, |
|
"rewards/rejected": -1.9234455823898315, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.3286501283358982e-07, |
|
"logits/chosen": -0.049084682017564774, |
|
"logits/rejected": 0.32071781158447266, |
|
"logps/chosen": -421.474853515625, |
|
"logps/rejected": -480.5507507324219, |
|
"loss": 0.5107, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.2823936939239502, |
|
"rewards/margins": 0.920534610748291, |
|
"rewards/rejected": -2.202928304672241, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.1581061587634987e-07, |
|
"logits/chosen": -0.3210409879684448, |
|
"logits/rejected": 0.13426151871681213, |
|
"logps/chosen": -392.66351318359375, |
|
"logps/rejected": -457.4385681152344, |
|
"loss": 0.5133, |
|
"rewards/accuracies": 0.7825000286102295, |
|
"rewards/chosen": -1.2225959300994873, |
|
"rewards/margins": 0.9219253659248352, |
|
"rewards/rejected": -2.1445212364196777, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"eval_logits/chosen": -0.38526856899261475, |
|
"eval_logits/rejected": 0.0459565594792366, |
|
"eval_logps/chosen": -396.590576171875, |
|
"eval_logps/rejected": -460.7764892578125, |
|
"eval_loss": 0.5159304141998291, |
|
"eval_rewards/accuracies": 0.7245509028434753, |
|
"eval_rewards/chosen": -1.1886271238327026, |
|
"eval_rewards/margins": 0.7717891931533813, |
|
"eval_rewards/rejected": -1.9604166746139526, |
|
"eval_runtime": 494.4328, |
|
"eval_samples_per_second": 4.045, |
|
"eval_steps_per_second": 0.338, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.9891702030116897e-07, |
|
"logits/chosen": -0.6406633257865906, |
|
"logits/rejected": 0.15507885813713074, |
|
"logps/chosen": -384.56219482421875, |
|
"logps/rejected": -443.3284912109375, |
|
"loss": 0.5192, |
|
"rewards/accuracies": 0.7599999904632568, |
|
"rewards/chosen": -1.066334843635559, |
|
"rewards/margins": 0.8297566175460815, |
|
"rewards/rejected": -1.8960914611816406, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.8226368099107792e-07, |
|
"logits/chosen": -0.6926136016845703, |
|
"logits/rejected": -0.09604160487651825, |
|
"logps/chosen": -414.7826232910156, |
|
"logps/rejected": -454.5480041503906, |
|
"loss": 0.5065, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.0457278490066528, |
|
"rewards/margins": 0.7350744605064392, |
|
"rewards/rejected": -1.7808022499084473, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.6592892284269594e-07, |
|
"logits/chosen": -0.5141594409942627, |
|
"logits/rejected": 0.11050853878259659, |
|
"logps/chosen": -402.63348388671875, |
|
"logps/rejected": -431.8319091796875, |
|
"loss": 0.5093, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.0640606880187988, |
|
"rewards/margins": 0.7925867438316345, |
|
"rewards/rejected": -1.8566473722457886, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.4998957238429172e-07, |
|
"logits/chosen": -0.08297364413738251, |
|
"logits/rejected": 0.21859808266162872, |
|
"logps/chosen": -390.8412170410156, |
|
"logps/rejected": -461.3310546875, |
|
"loss": 0.505, |
|
"rewards/accuracies": 0.7275000214576721, |
|
"rewards/chosen": -1.190333604812622, |
|
"rewards/margins": 0.8922053575515747, |
|
"rewards/rejected": -2.0825393199920654, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.345205964410517e-07, |
|
"logits/chosen": -0.539190948009491, |
|
"logits/rejected": -0.053236301988363266, |
|
"logps/chosen": -392.14385986328125, |
|
"logps/rejected": -447.09844970703125, |
|
"loss": 0.5125, |
|
"rewards/accuracies": 0.7774999737739563, |
|
"rewards/chosen": -0.9940242767333984, |
|
"rewards/margins": 0.9291434288024902, |
|
"rewards/rejected": -1.9231675863265991, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.1959474954700665e-07, |
|
"logits/chosen": -0.6150873303413391, |
|
"logits/rejected": -0.08470536023378372, |
|
"logps/chosen": -377.5425109863281, |
|
"logps/rejected": -434.1069030761719, |
|
"loss": 0.5266, |
|
"rewards/accuracies": 0.7599999904632568, |
|
"rewards/chosen": -1.0171641111373901, |
|
"rewards/margins": 0.7864332795143127, |
|
"rewards/rejected": -1.803597092628479, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.0528223176192615e-07, |
|
"logits/chosen": -0.464309424161911, |
|
"logits/rejected": 0.11655576527118683, |
|
"logps/chosen": -397.9951477050781, |
|
"logps/rejected": -446.141845703125, |
|
"loss": 0.4885, |
|
"rewards/accuracies": 0.7350000143051147, |
|
"rewards/chosen": -1.1220192909240723, |
|
"rewards/margins": 0.7690063714981079, |
|
"rewards/rejected": -1.8910256624221802, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 9.16503585025567e-08, |
|
"logits/chosen": -0.3131292462348938, |
|
"logits/rejected": 0.1059599220752716, |
|
"logps/chosen": -398.6189880371094, |
|
"logps/rejected": -455.5489807128906, |
|
"loss": 0.4785, |
|
"rewards/accuracies": 0.7774999737739563, |
|
"rewards/chosen": -1.180424451828003, |
|
"rewards/margins": 0.9602058529853821, |
|
"rewards/rejected": -2.1406302452087402, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.876324394107017e-08, |
|
"logits/chosen": -0.06371825933456421, |
|
"logits/rejected": 0.4222162663936615, |
|
"logps/chosen": -408.15203857421875, |
|
"logps/rejected": -469.3525085449219, |
|
"loss": 0.4945, |
|
"rewards/accuracies": 0.7774999737739563, |
|
"rewards/chosen": -1.2744272947311401, |
|
"rewards/margins": 0.8693990111351013, |
|
"rewards/rejected": -2.1438262462615967, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.668149945978201e-08, |
|
"logits/chosen": -0.4337286353111267, |
|
"logits/rejected": 0.11450805515050888, |
|
"logps/chosen": -406.1577453613281, |
|
"logps/rejected": -468.1871337890625, |
|
"loss": 0.4968, |
|
"rewards/accuracies": 0.7574999928474426, |
|
"rewards/chosen": -1.204884648323059, |
|
"rewards/margins": 0.9240193367004395, |
|
"rewards/rejected": -2.128904104232788, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_logits/chosen": -0.2552393972873688, |
|
"eval_logits/rejected": 0.20138485729694366, |
|
"eval_logps/chosen": -402.1766357421875, |
|
"eval_logps/rejected": -475.3639221191406, |
|
"eval_loss": 0.5057728290557861, |
|
"eval_rewards/accuracies": 0.7140718698501587, |
|
"eval_rewards/chosen": -1.2444883584976196, |
|
"eval_rewards/margins": 0.8618020415306091, |
|
"eval_rewards/rejected": -2.106290578842163, |
|
"eval_runtime": 493.9837, |
|
"eval_samples_per_second": 4.049, |
|
"eval_steps_per_second": 0.338, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5.546194858038072e-08, |
|
"logits/chosen": -0.3444100618362427, |
|
"logits/rejected": 0.08428356051445007, |
|
"logps/chosen": -419.0089111328125, |
|
"logps/rejected": -482.5577392578125, |
|
"loss": 0.488, |
|
"rewards/accuracies": 0.7325000166893005, |
|
"rewards/chosen": -1.1570134162902832, |
|
"rewards/margins": 0.9088660478591919, |
|
"rewards/rejected": -2.0658795833587646, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.5157359708432626e-08, |
|
"logits/chosen": -0.3363034129142761, |
|
"logits/rejected": 0.1421819031238556, |
|
"logps/chosen": -417.26116943359375, |
|
"logps/rejected": -475.9188537597656, |
|
"loss": 0.5012, |
|
"rewards/accuracies": 0.7549999952316284, |
|
"rewards/chosen": -1.1876376867294312, |
|
"rewards/margins": 0.9119570255279541, |
|
"rewards/rejected": -2.0995945930480957, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.581619795012874e-08, |
|
"logits/chosen": -0.4450594186782837, |
|
"logits/rejected": 0.03785795345902443, |
|
"logps/chosen": -404.95281982421875, |
|
"logps/rejected": -467.25531005859375, |
|
"loss": 0.4861, |
|
"rewards/accuracies": 0.7724999785423279, |
|
"rewards/chosen": -1.1584584712982178, |
|
"rewards/margins": 0.9622448086738586, |
|
"rewards/rejected": -2.1207032203674316, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.748239716854589e-08, |
|
"logits/chosen": -0.31011733412742615, |
|
"logits/rejected": 0.310569167137146, |
|
"logps/chosen": -389.67132568359375, |
|
"logps/rejected": -470.01104736328125, |
|
"loss": 0.5105, |
|
"rewards/accuracies": 0.7350000143051147, |
|
"rewards/chosen": -1.1304560899734497, |
|
"rewards/margins": 0.8861461877822876, |
|
"rewards/rejected": -2.016602039337158, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.0195153351498323e-08, |
|
"logits/chosen": -0.3003827631473541, |
|
"logits/rejected": 0.046957388520240784, |
|
"logps/chosen": -412.5171203613281, |
|
"logps/rejected": -481.26898193359375, |
|
"loss": 0.5128, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.1812173128128052, |
|
"rewards/margins": 0.8305546641349792, |
|
"rewards/rejected": -2.0117719173431396, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.3988740262822846e-08, |
|
"logits/chosen": -0.47582343220710754, |
|
"logits/rejected": -0.11152289062738419, |
|
"logps/chosen": -410.2917175292969, |
|
"logps/rejected": -457.774658203125, |
|
"loss": 0.5044, |
|
"rewards/accuracies": 0.7649999856948853, |
|
"rewards/chosen": -1.1460288763046265, |
|
"rewards/margins": 0.8546761870384216, |
|
"rewards/rejected": -2.0007050037384033, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 8.892348244137788e-09, |
|
"logits/chosen": -0.5770422220230103, |
|
"logits/rejected": -0.025662722066044807, |
|
"logps/chosen": -372.98187255859375, |
|
"logps/rejected": -467.86199951171875, |
|
"loss": 0.4973, |
|
"rewards/accuracies": 0.7200000286102295, |
|
"rewards/chosen": -1.0886142253875732, |
|
"rewards/margins": 0.8808639049530029, |
|
"rewards/rejected": -1.9694780111312866, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 4.929946925231076e-09, |
|
"logits/chosen": -0.5876446962356567, |
|
"logits/rejected": -0.16365936398506165, |
|
"logps/chosen": -400.3377685546875, |
|
"logps/rejected": -455.9208068847656, |
|
"loss": 0.5072, |
|
"rewards/accuracies": 0.7024999856948853, |
|
"rewards/chosen": -1.1451067924499512, |
|
"rewards/margins": 0.7030719518661499, |
|
"rewards/rejected": -1.848178744316101, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.1201724887858484e-09, |
|
"logits/chosen": -0.4430970847606659, |
|
"logits/rejected": 0.12594802677631378, |
|
"logps/chosen": -409.6846008300781, |
|
"logps/rejected": -458.5526428222656, |
|
"loss": 0.4887, |
|
"rewards/accuracies": 0.7574999928474426, |
|
"rewards/chosen": -1.0775573253631592, |
|
"rewards/margins": 0.9305427074432373, |
|
"rewards/rejected": -2.0081000328063965, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 4.762400196664518e-10, |
|
"logits/chosen": -0.41937455534935, |
|
"logits/rejected": -0.08660510927438736, |
|
"logps/chosen": -385.8563232421875, |
|
"logps/rejected": -454.9473571777344, |
|
"loss": 0.4833, |
|
"rewards/accuracies": 0.7699999809265137, |
|
"rewards/chosen": -1.093034267425537, |
|
"rewards/margins": 0.9196186661720276, |
|
"rewards/rejected": -2.012652635574341, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"eval_logits/chosen": -0.4496035575866699, |
|
"eval_logits/rejected": 0.04359949380159378, |
|
"eval_logps/chosen": -395.9374084472656, |
|
"eval_logps/rejected": -470.5448303222656, |
|
"eval_loss": 0.5045374631881714, |
|
"eval_rewards/accuracies": 0.726047933101654, |
|
"eval_rewards/chosen": -1.182096004486084, |
|
"eval_rewards/margins": 0.876003086566925, |
|
"eval_rewards/rejected": -2.0580990314483643, |
|
"eval_runtime": 494.2334, |
|
"eval_samples_per_second": 4.047, |
|
"eval_steps_per_second": 0.338, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 509, |
|
"total_flos": 0.0, |
|
"train_loss": 0.5401819272219315, |
|
"train_runtime": 34352.758, |
|
"train_samples_per_second": 1.78, |
|
"train_steps_per_second": 0.015 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 509, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 1000, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|