|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 1912, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.6041666666666667e-08, |
|
"logits/chosen": -2.4267122745513916, |
|
"logits/rejected": -2.4661664962768555, |
|
"logps/chosen": -213.77220153808594, |
|
"logps/rejected": -156.72488403320312, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.604166666666667e-07, |
|
"logits/chosen": -2.258631706237793, |
|
"logits/rejected": -2.2316627502441406, |
|
"logps/chosen": -232.25244140625, |
|
"logps/rejected": -223.98828125, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.3888888955116272, |
|
"rewards/chosen": -0.00020581186981871724, |
|
"rewards/margins": 0.0007613838533870876, |
|
"rewards/rejected": -0.0009671957814134657, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 5.208333333333334e-07, |
|
"logits/chosen": -2.3259849548339844, |
|
"logits/rejected": -2.3685474395751953, |
|
"logps/chosen": -245.956787109375, |
|
"logps/rejected": -231.7569122314453, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.42500001192092896, |
|
"rewards/chosen": -0.0001782081089913845, |
|
"rewards/margins": -0.0010433623101562262, |
|
"rewards/rejected": 0.0008651542593725026, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 7.8125e-07, |
|
"logits/chosen": -2.28281569480896, |
|
"logits/rejected": -2.1847293376922607, |
|
"logps/chosen": -256.9249572753906, |
|
"logps/rejected": -222.64596557617188, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.0004837578162550926, |
|
"rewards/margins": 0.0007256327080540359, |
|
"rewards/rejected": -0.0002418749063508585, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.0416666666666667e-06, |
|
"logits/chosen": -2.2957146167755127, |
|
"logits/rejected": -2.221849203109741, |
|
"logps/chosen": -246.8458709716797, |
|
"logps/rejected": -216.1331024169922, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.001086801290512085, |
|
"rewards/margins": 0.0008543278090655804, |
|
"rewards/rejected": 0.00023247336503118277, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.3020833333333335e-06, |
|
"logits/chosen": -2.395200729370117, |
|
"logits/rejected": -2.1403450965881348, |
|
"logps/chosen": -230.1165313720703, |
|
"logps/rejected": -217.97817993164062, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.0015203278744593263, |
|
"rewards/margins": 0.0014944769209250808, |
|
"rewards/rejected": 2.5850749807432294e-05, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.5625e-06, |
|
"logits/chosen": -2.273294687271118, |
|
"logits/rejected": -2.337575912475586, |
|
"logps/chosen": -209.8964385986328, |
|
"logps/rejected": -216.16592407226562, |
|
"loss": 0.6928, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.001072435756213963, |
|
"rewards/margins": 0.0009590731933712959, |
|
"rewards/rejected": 0.0001133625628426671, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.8229166666666666e-06, |
|
"logits/chosen": -2.237490177154541, |
|
"logits/rejected": -2.1758182048797607, |
|
"logps/chosen": -215.27633666992188, |
|
"logps/rejected": -191.88693237304688, |
|
"loss": 0.6926, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.0013392124092206359, |
|
"rewards/margins": 0.0011731551494449377, |
|
"rewards/rejected": 0.00016605725977569818, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.0833333333333334e-06, |
|
"logits/chosen": -2.281623363494873, |
|
"logits/rejected": -2.2088568210601807, |
|
"logps/chosen": -288.46343994140625, |
|
"logps/rejected": -255.1790313720703, |
|
"loss": 0.6926, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.00038608754402957857, |
|
"rewards/margins": 0.0004603867419064045, |
|
"rewards/rejected": -7.429932884406298e-05, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.3437500000000002e-06, |
|
"logits/chosen": -2.3208765983581543, |
|
"logits/rejected": -2.1739656925201416, |
|
"logps/chosen": -271.21282958984375, |
|
"logps/rejected": -223.1171112060547, |
|
"loss": 0.6924, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.00312628410756588, |
|
"rewards/margins": 0.003461697371676564, |
|
"rewards/rejected": -0.00033541349694132805, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.604166666666667e-06, |
|
"logits/chosen": -2.3325040340423584, |
|
"logits/rejected": -2.267913341522217, |
|
"logps/chosen": -277.1980895996094, |
|
"logps/rejected": -233.1110076904297, |
|
"loss": 0.6918, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.003763762768357992, |
|
"rewards/margins": 0.0024481036234647036, |
|
"rewards/rejected": 0.0013156586792320013, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_logits/chosen": -2.228503942489624, |
|
"eval_logits/rejected": -2.1363625526428223, |
|
"eval_logps/chosen": -260.84576416015625, |
|
"eval_logps/rejected": -245.6464080810547, |
|
"eval_loss": 0.6914187669754028, |
|
"eval_rewards/accuracies": 0.7109375, |
|
"eval_rewards/chosen": 0.005916436668485403, |
|
"eval_rewards/margins": 0.004088809713721275, |
|
"eval_rewards/rejected": 0.001827626721933484, |
|
"eval_runtime": 99.0156, |
|
"eval_samples_per_second": 20.199, |
|
"eval_steps_per_second": 0.323, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.8645833333333334e-06, |
|
"logits/chosen": -2.2577621936798096, |
|
"logits/rejected": -2.3346638679504395, |
|
"logps/chosen": -229.360595703125, |
|
"logps/rejected": -291.27734375, |
|
"loss": 0.6908, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.007094097323715687, |
|
"rewards/margins": 0.002854767022654414, |
|
"rewards/rejected": 0.004239329136908054, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.125e-06, |
|
"logits/chosen": -2.253248453140259, |
|
"logits/rejected": -2.1254897117614746, |
|
"logps/chosen": -268.021240234375, |
|
"logps/rejected": -243.69796752929688, |
|
"loss": 0.6898, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.01565476693212986, |
|
"rewards/margins": 0.013339616358280182, |
|
"rewards/rejected": 0.002315149875357747, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.385416666666667e-06, |
|
"logits/chosen": -2.395129442214966, |
|
"logits/rejected": -2.2741332054138184, |
|
"logps/chosen": -256.48455810546875, |
|
"logps/rejected": -264.9955749511719, |
|
"loss": 0.6898, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.012440720573067665, |
|
"rewards/margins": -8.803028322290629e-05, |
|
"rewards/rejected": 0.012528751976788044, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.6458333333333333e-06, |
|
"logits/chosen": -2.275693416595459, |
|
"logits/rejected": -2.145570755004883, |
|
"logps/chosen": -327.93218994140625, |
|
"logps/rejected": -250.51492309570312, |
|
"loss": 0.6846, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.03406423702836037, |
|
"rewards/margins": 0.022959694266319275, |
|
"rewards/rejected": 0.011104539968073368, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.90625e-06, |
|
"logits/chosen": -2.310237407684326, |
|
"logits/rejected": -2.2222559452056885, |
|
"logps/chosen": -269.9709167480469, |
|
"logps/rejected": -258.72406005859375, |
|
"loss": 0.6836, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.04501372575759888, |
|
"rewards/margins": 0.02766810730099678, |
|
"rewards/rejected": 0.017345618456602097, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.166666666666667e-06, |
|
"logits/chosen": -2.3985304832458496, |
|
"logits/rejected": -2.370260000228882, |
|
"logps/chosen": -269.9902038574219, |
|
"logps/rejected": -250.7908172607422, |
|
"loss": 0.6797, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.03560282289981842, |
|
"rewards/margins": 0.019300740212202072, |
|
"rewards/rejected": 0.01630208268761635, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.427083333333334e-06, |
|
"logits/chosen": -2.331279993057251, |
|
"logits/rejected": -2.2696022987365723, |
|
"logps/chosen": -250.06179809570312, |
|
"logps/rejected": -243.79971313476562, |
|
"loss": 0.6794, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.050360582768917084, |
|
"rewards/margins": 0.036857619881629944, |
|
"rewards/rejected": 0.013502964749932289, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.6875000000000004e-06, |
|
"logits/chosen": -2.127589464187622, |
|
"logits/rejected": -2.0284266471862793, |
|
"logps/chosen": -317.98809814453125, |
|
"logps/rejected": -264.03387451171875, |
|
"loss": 0.6688, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.05277144908905029, |
|
"rewards/margins": 0.060719214379787445, |
|
"rewards/rejected": -0.007947764359414577, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.947916666666667e-06, |
|
"logits/chosen": -2.0413432121276855, |
|
"logits/rejected": -2.025883913040161, |
|
"logps/chosen": -279.8322448730469, |
|
"logps/rejected": -278.4949035644531, |
|
"loss": 0.6663, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.014455978758633137, |
|
"rewards/margins": 0.019963031634688377, |
|
"rewards/rejected": -0.03441900759935379, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.999733114418726e-06, |
|
"logits/chosen": -2.3885703086853027, |
|
"logits/rejected": -2.1402792930603027, |
|
"logps/chosen": -353.4009094238281, |
|
"logps/rejected": -268.4277648925781, |
|
"loss": 0.6619, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.021488813683390617, |
|
"rewards/margins": 0.1255442202091217, |
|
"rewards/rejected": -0.10405541956424713, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_logits/chosen": -2.155834436416626, |
|
"eval_logits/rejected": -2.053650140762329, |
|
"eval_logps/chosen": -264.06280517578125, |
|
"eval_logps/rejected": -259.0110168457031, |
|
"eval_loss": 0.6497414708137512, |
|
"eval_rewards/accuracies": 0.70703125, |
|
"eval_rewards/chosen": -0.02625393494963646, |
|
"eval_rewards/margins": 0.10556431859731674, |
|
"eval_rewards/rejected": -0.1318182498216629, |
|
"eval_runtime": 100.0964, |
|
"eval_samples_per_second": 19.981, |
|
"eval_steps_per_second": 0.32, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.998648989404205e-06, |
|
"logits/chosen": -2.0490939617156982, |
|
"logits/rejected": -2.0286402702331543, |
|
"logps/chosen": -252.8923797607422, |
|
"logps/rejected": -319.3832092285156, |
|
"loss": 0.6513, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.011713042855262756, |
|
"rewards/margins": 0.11224497854709625, |
|
"rewards/rejected": -0.12395800650119781, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.996731305997416e-06, |
|
"logits/chosen": -2.139150381088257, |
|
"logits/rejected": -2.073772430419922, |
|
"logps/chosen": -235.38320922851562, |
|
"logps/rejected": -231.4700164794922, |
|
"loss": 0.6389, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.06928001344203949, |
|
"rewards/margins": 0.14029313623905182, |
|
"rewards/rejected": -0.2095731794834137, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.9939807039445205e-06, |
|
"logits/chosen": -2.2183709144592285, |
|
"logits/rejected": -2.206679582595825, |
|
"logps/chosen": -294.1888732910156, |
|
"logps/rejected": -347.28173828125, |
|
"loss": 0.6245, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.11882080882787704, |
|
"rewards/margins": 0.23026032745838165, |
|
"rewards/rejected": -0.3490811288356781, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.990398100856367e-06, |
|
"logits/chosen": -2.148719310760498, |
|
"logits/rejected": -2.125946044921875, |
|
"logps/chosen": -233.49478149414062, |
|
"logps/rejected": -280.72503662109375, |
|
"loss": 0.6275, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.15465135872364044, |
|
"rewards/margins": 0.1490645855665207, |
|
"rewards/rejected": -0.30371594429016113, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.985984691902379e-06, |
|
"logits/chosen": -2.243445873260498, |
|
"logits/rejected": -2.1190781593322754, |
|
"logps/chosen": -368.4538879394531, |
|
"logps/rejected": -323.40850830078125, |
|
"loss": 0.6479, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.38147231936454773, |
|
"rewards/margins": 0.18310242891311646, |
|
"rewards/rejected": -0.5645747780799866, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.980741949411839e-06, |
|
"logits/chosen": -2.0785181522369385, |
|
"logits/rejected": -1.9865573644638062, |
|
"logps/chosen": -249.36190795898438, |
|
"logps/rejected": -240.6489715576172, |
|
"loss": 0.6203, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.35873857140541077, |
|
"rewards/margins": 0.13610118627548218, |
|
"rewards/rejected": -0.49483975768089294, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.974671622382713e-06, |
|
"logits/chosen": -2.0679633617401123, |
|
"logits/rejected": -2.0502331256866455, |
|
"logps/chosen": -273.707763671875, |
|
"logps/rejected": -319.79541015625, |
|
"loss": 0.6132, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.385969340801239, |
|
"rewards/margins": 0.18665318191051483, |
|
"rewards/rejected": -0.572622537612915, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.967775735898179e-06, |
|
"logits/chosen": -2.0888559818267822, |
|
"logits/rejected": -2.04829478263855, |
|
"logps/chosen": -297.3575439453125, |
|
"logps/rejected": -322.2998352050781, |
|
"loss": 0.5982, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.3168260455131531, |
|
"rewards/margins": 0.23598246276378632, |
|
"rewards/rejected": -0.5528085231781006, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.960056590451056e-06, |
|
"logits/chosen": -2.0440139770507812, |
|
"logits/rejected": -1.889760971069336, |
|
"logps/chosen": -281.42559814453125, |
|
"logps/rejected": -238.29434204101562, |
|
"loss": 0.6149, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.30001693964004517, |
|
"rewards/margins": 0.2756292521953583, |
|
"rewards/rejected": -0.5756462216377258, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.951516761176344e-06, |
|
"logits/chosen": -2.001333713531494, |
|
"logits/rejected": -1.9682204723358154, |
|
"logps/chosen": -310.86669921875, |
|
"logps/rejected": -276.2847595214844, |
|
"loss": 0.6077, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.3108038306236267, |
|
"rewards/margins": 0.14596325159072876, |
|
"rewards/rejected": -0.45676714181900024, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_logits/chosen": -1.9869587421417236, |
|
"eval_logits/rejected": -1.850504994392395, |
|
"eval_logps/chosen": -287.5378723144531, |
|
"eval_logps/rejected": -300.8819580078125, |
|
"eval_loss": 0.6082866191864014, |
|
"eval_rewards/accuracies": 0.71875, |
|
"eval_rewards/chosen": -0.2610045373439789, |
|
"eval_rewards/margins": 0.289523184299469, |
|
"eval_rewards/rejected": -0.5505276918411255, |
|
"eval_runtime": 100.0694, |
|
"eval_samples_per_second": 19.986, |
|
"eval_steps_per_second": 0.32, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.942159096992154e-06, |
|
"logits/chosen": -2.2042763233184814, |
|
"logits/rejected": -2.090355157852173, |
|
"logps/chosen": -310.1408386230469, |
|
"logps/rejected": -309.30841064453125, |
|
"loss": 0.6141, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.3219727873802185, |
|
"rewards/margins": 0.24631533026695251, |
|
"rewards/rejected": -0.5682880878448486, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.931986719649298e-06, |
|
"logits/chosen": -2.02527117729187, |
|
"logits/rejected": -1.9362668991088867, |
|
"logps/chosen": -248.31161499023438, |
|
"logps/rejected": -276.61370849609375, |
|
"loss": 0.5962, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.362973153591156, |
|
"rewards/margins": 0.08302036672830582, |
|
"rewards/rejected": -0.445993572473526, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.921003022689865e-06, |
|
"logits/chosen": -1.9443562030792236, |
|
"logits/rejected": -1.8264896869659424, |
|
"logps/chosen": -310.73309326171875, |
|
"logps/rejected": -327.0270690917969, |
|
"loss": 0.6018, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.39779695868492126, |
|
"rewards/margins": 0.37337055802345276, |
|
"rewards/rejected": -0.771167516708374, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.909211670315115e-06, |
|
"logits/chosen": -2.0006632804870605, |
|
"logits/rejected": -1.8731825351715088, |
|
"logps/chosen": -346.4628601074219, |
|
"logps/rejected": -290.8958435058594, |
|
"loss": 0.6264, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.45741358399391174, |
|
"rewards/margins": 0.184947669506073, |
|
"rewards/rejected": -0.6423612833023071, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.8966165961630914e-06, |
|
"logits/chosen": -1.9583308696746826, |
|
"logits/rejected": -1.836646318435669, |
|
"logps/chosen": -295.84503173828125, |
|
"logps/rejected": -316.0587463378906, |
|
"loss": 0.6125, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.32640910148620605, |
|
"rewards/margins": 0.405341237783432, |
|
"rewards/rejected": -0.7317503094673157, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.883222001996352e-06, |
|
"logits/chosen": -1.839678406715393, |
|
"logits/rejected": -1.8322643041610718, |
|
"logps/chosen": -264.20538330078125, |
|
"logps/rejected": -310.7685852050781, |
|
"loss": 0.5648, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.49244385957717896, |
|
"rewards/margins": 0.33429765701293945, |
|
"rewards/rejected": -0.8267415165901184, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.869032356300234e-06, |
|
"logits/chosen": -1.939711570739746, |
|
"logits/rejected": -1.838923692703247, |
|
"logps/chosen": -346.75946044921875, |
|
"logps/rejected": -347.9893493652344, |
|
"loss": 0.5856, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.3741551339626312, |
|
"rewards/margins": 0.45344096422195435, |
|
"rewards/rejected": -0.8275960683822632, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.854052392792162e-06, |
|
"logits/chosen": -1.995643973350525, |
|
"logits/rejected": -1.8869165182113647, |
|
"logps/chosen": -270.1524658203125, |
|
"logps/rejected": -292.2115173339844, |
|
"loss": 0.6162, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.4847889840602875, |
|
"rewards/margins": 0.3497465252876282, |
|
"rewards/rejected": -0.8345354795455933, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.838287108842454e-06, |
|
"logits/chosen": -1.9855091571807861, |
|
"logits/rejected": -1.8763986825942993, |
|
"logps/chosen": -321.6457214355469, |
|
"logps/rejected": -359.28326416015625, |
|
"loss": 0.5772, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.5840765237808228, |
|
"rewards/margins": 0.4493025839328766, |
|
"rewards/rejected": -1.033379077911377, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.821741763807186e-06, |
|
"logits/chosen": -1.8463340997695923, |
|
"logits/rejected": -1.870745062828064, |
|
"logps/chosen": -330.8719787597656, |
|
"logps/rejected": -393.6171875, |
|
"loss": 0.5813, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.4910261034965515, |
|
"rewards/margins": 0.6333194375038147, |
|
"rewards/rejected": -1.1243455410003662, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_logits/chosen": -1.9346712827682495, |
|
"eval_logits/rejected": -1.7833642959594727, |
|
"eval_logps/chosen": -311.629150390625, |
|
"eval_logps/rejected": -338.0690612792969, |
|
"eval_loss": 0.5857117772102356, |
|
"eval_rewards/accuracies": 0.734375, |
|
"eval_rewards/chosen": -0.5019174218177795, |
|
"eval_rewards/margins": 0.42048126459121704, |
|
"eval_rewards/rejected": -0.9223986864089966, |
|
"eval_runtime": 99.8761, |
|
"eval_samples_per_second": 20.025, |
|
"eval_steps_per_second": 0.32, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.804421877273649e-06, |
|
"logits/chosen": -1.9884185791015625, |
|
"logits/rejected": -1.923436164855957, |
|
"logps/chosen": -320.7406921386719, |
|
"logps/rejected": -325.28436279296875, |
|
"loss": 0.584, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.39559125900268555, |
|
"rewards/margins": 0.4437909722328186, |
|
"rewards/rejected": -0.8393821716308594, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.786333227218996e-06, |
|
"logits/chosen": -1.9525524377822876, |
|
"logits/rejected": -1.8964300155639648, |
|
"logps/chosen": -246.2771759033203, |
|
"logps/rejected": -283.88470458984375, |
|
"loss": 0.5985, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.3708118796348572, |
|
"rewards/margins": 0.24297749996185303, |
|
"rewards/rejected": -0.6137893795967102, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.7674818480826856e-06, |
|
"logits/chosen": -2.1766157150268555, |
|
"logits/rejected": -2.0494487285614014, |
|
"logps/chosen": -368.79644775390625, |
|
"logps/rejected": -343.870361328125, |
|
"loss": 0.5883, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.36435064673423767, |
|
"rewards/margins": 0.5354640483856201, |
|
"rewards/rejected": -0.8998147249221802, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.747874028753375e-06, |
|
"logits/chosen": -2.0296483039855957, |
|
"logits/rejected": -2.0344138145446777, |
|
"logps/chosen": -333.8695068359375, |
|
"logps/rejected": -346.3202209472656, |
|
"loss": 0.6011, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.47436419129371643, |
|
"rewards/margins": 0.3697816729545593, |
|
"rewards/rejected": -0.8441458940505981, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.72751631047092e-06, |
|
"logits/chosen": -1.8225123882293701, |
|
"logits/rejected": -1.6715686321258545, |
|
"logps/chosen": -314.07525634765625, |
|
"logps/rejected": -329.0157775878906, |
|
"loss": 0.5694, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.40629976987838745, |
|
"rewards/margins": 0.3972943127155304, |
|
"rewards/rejected": -0.8035939931869507, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.706415484644196e-06, |
|
"logits/chosen": -1.7603601217269897, |
|
"logits/rejected": -1.7641122341156006, |
|
"logps/chosen": -298.09197998046875, |
|
"logps/rejected": -359.8817138671875, |
|
"loss": 0.6036, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.6035802960395813, |
|
"rewards/margins": 0.2919658422470093, |
|
"rewards/rejected": -0.8955461382865906, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.684578590585457e-06, |
|
"logits/chosen": -1.7902147769927979, |
|
"logits/rejected": -1.5845298767089844, |
|
"logps/chosen": -339.6332092285156, |
|
"logps/rejected": -328.145263671875, |
|
"loss": 0.6001, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.355998158454895, |
|
"rewards/margins": 0.5849534869194031, |
|
"rewards/rejected": -0.9409516453742981, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.662012913161998e-06, |
|
"logits/chosen": -1.4573760032653809, |
|
"logits/rejected": -1.2839057445526123, |
|
"logps/chosen": -342.0000305175781, |
|
"logps/rejected": -344.8834228515625, |
|
"loss": 0.5528, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.44703975319862366, |
|
"rewards/margins": 0.5145122408866882, |
|
"rewards/rejected": -0.9615518450737, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.6387259803658945e-06, |
|
"logits/chosen": -1.5755952596664429, |
|
"logits/rejected": -1.6874421834945679, |
|
"logps/chosen": -310.2431640625, |
|
"logps/rejected": -330.83929443359375, |
|
"loss": 0.5653, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.5772610306739807, |
|
"rewards/margins": 0.27001291513442993, |
|
"rewards/rejected": -0.8472738265991211, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.6147255608026395e-06, |
|
"logits/chosen": -1.265753149986267, |
|
"logits/rejected": -1.2227959632873535, |
|
"logps/chosen": -297.21771240234375, |
|
"logps/rejected": -325.6265869140625, |
|
"loss": 0.6033, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.6450126767158508, |
|
"rewards/margins": 0.21162936091423035, |
|
"rewards/rejected": -0.8566421270370483, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_logits/chosen": -1.284441351890564, |
|
"eval_logits/rejected": -1.0646388530731201, |
|
"eval_logps/chosen": -326.2359924316406, |
|
"eval_logps/rejected": -359.0956726074219, |
|
"eval_loss": 0.5683714747428894, |
|
"eval_rewards/accuracies": 0.7578125, |
|
"eval_rewards/chosen": -0.6479859948158264, |
|
"eval_rewards/margins": 0.4846786856651306, |
|
"eval_rewards/rejected": -1.132664680480957, |
|
"eval_runtime": 99.87, |
|
"eval_samples_per_second": 20.026, |
|
"eval_steps_per_second": 0.32, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.590019661099507e-06, |
|
"logits/chosen": -1.3058087825775146, |
|
"logits/rejected": -0.954175591468811, |
|
"logps/chosen": -344.62689208984375, |
|
"logps/rejected": -351.04461669921875, |
|
"loss": 0.5816, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.686931312084198, |
|
"rewards/margins": 0.5147491693496704, |
|
"rewards/rejected": -1.2016804218292236, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.564616523234511e-06, |
|
"logits/chosen": -1.1599328517913818, |
|
"logits/rejected": -1.1522148847579956, |
|
"logps/chosen": -404.53692626953125, |
|
"logps/rejected": -452.5421447753906, |
|
"loss": 0.5824, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.0343563556671143, |
|
"rewards/margins": 0.4577902853488922, |
|
"rewards/rejected": -1.4921467304229736, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.538524621786851e-06, |
|
"logits/chosen": -1.2001521587371826, |
|
"logits/rejected": -0.9351218342781067, |
|
"logps/chosen": -339.4947814941406, |
|
"logps/rejected": -394.2229919433594, |
|
"loss": 0.5663, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.8950597643852234, |
|
"rewards/margins": 0.6420807242393494, |
|
"rewards/rejected": -1.5371406078338623, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.5117526611097685e-06, |
|
"logits/chosen": -1.2330704927444458, |
|
"logits/rejected": -1.01473069190979, |
|
"logps/chosen": -337.0997314453125, |
|
"logps/rejected": -390.93597412109375, |
|
"loss": 0.5192, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.6719285249710083, |
|
"rewards/margins": 0.7142583131790161, |
|
"rewards/rejected": -1.3861868381500244, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.484309572426732e-06, |
|
"logits/chosen": -1.082319974899292, |
|
"logits/rejected": -0.7122712731361389, |
|
"logps/chosen": -314.4213562011719, |
|
"logps/rejected": -322.6642150878906, |
|
"loss": 0.5411, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.8003314733505249, |
|
"rewards/margins": 0.607052206993103, |
|
"rewards/rejected": -1.407383680343628, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.456204510851957e-06, |
|
"logits/chosen": -0.9956424832344055, |
|
"logits/rejected": -0.6910310983657837, |
|
"logps/chosen": -381.59820556640625, |
|
"logps/rejected": -378.14532470703125, |
|
"loss": 0.556, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.123124361038208, |
|
"rewards/margins": 0.35411447286605835, |
|
"rewards/rejected": -1.4772387742996216, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.427446852336223e-06, |
|
"logits/chosen": -0.6827043294906616, |
|
"logits/rejected": -0.7810491323471069, |
|
"logps/chosen": -299.59564208984375, |
|
"logps/rejected": -401.03204345703125, |
|
"loss": 0.538, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.8863980174064636, |
|
"rewards/margins": 0.5527952909469604, |
|
"rewards/rejected": -1.4391934871673584, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.398046190539025e-06, |
|
"logits/chosen": -1.026639461517334, |
|
"logits/rejected": -0.9812144041061401, |
|
"logps/chosen": -346.61846923828125, |
|
"logps/rejected": -431.44622802734375, |
|
"loss": 0.5472, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.7535640001296997, |
|
"rewards/margins": 0.735582172870636, |
|
"rewards/rejected": -1.4891459941864014, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.368012333628089e-06, |
|
"logits/chosen": -0.5979812145233154, |
|
"logits/rejected": -0.34077292680740356, |
|
"logps/chosen": -342.59039306640625, |
|
"logps/rejected": -410.510498046875, |
|
"loss": 0.5475, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.9304134249687195, |
|
"rewards/margins": 0.619970440864563, |
|
"rewards/rejected": -1.5503838062286377, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.337355301007336e-06, |
|
"logits/chosen": -0.6573127508163452, |
|
"logits/rejected": -0.4596225321292877, |
|
"logps/chosen": -428.990966796875, |
|
"logps/rejected": -461.5831604003906, |
|
"loss": 0.5338, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.002087950706482, |
|
"rewards/margins": 0.5927420854568481, |
|
"rewards/rejected": -1.5948301553726196, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_logits/chosen": -0.3462526202201843, |
|
"eval_logits/rejected": -0.005769494920969009, |
|
"eval_logps/chosen": -352.115234375, |
|
"eval_logps/rejected": -406.63665771484375, |
|
"eval_loss": 0.5431244969367981, |
|
"eval_rewards/accuracies": 0.75390625, |
|
"eval_rewards/chosen": -0.9067782163619995, |
|
"eval_rewards/margins": 0.7012960910797119, |
|
"eval_rewards/rejected": -1.6080741882324219, |
|
"eval_runtime": 100.0473, |
|
"eval_samples_per_second": 19.991, |
|
"eval_steps_per_second": 0.32, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.306085319974368e-06, |
|
"logits/chosen": -0.5207012891769409, |
|
"logits/rejected": -0.08656563609838486, |
|
"logps/chosen": -370.1058654785156, |
|
"logps/rejected": -396.0721435546875, |
|
"loss": 0.5403, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.8168034553527832, |
|
"rewards/margins": 0.7966574430465698, |
|
"rewards/rejected": -1.6134607791900635, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.274212822308612e-06, |
|
"logits/chosen": -0.7111207246780396, |
|
"logits/rejected": -0.2700180411338806, |
|
"logps/chosen": -391.42694091796875, |
|
"logps/rejected": -451.6090393066406, |
|
"loss": 0.594, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.9661979675292969, |
|
"rewards/margins": 0.7674239277839661, |
|
"rewards/rejected": -1.7336218357086182, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.241748440791232e-06, |
|
"logits/chosen": 0.08613274991512299, |
|
"logits/rejected": 0.35309532284736633, |
|
"logps/chosen": -364.17803955078125, |
|
"logps/rejected": -411.7427673339844, |
|
"loss": 0.5565, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.0981857776641846, |
|
"rewards/margins": 0.6119104623794556, |
|
"rewards/rejected": -1.7100961208343506, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.208703005658e-06, |
|
"logits/chosen": -0.4963720440864563, |
|
"logits/rejected": -0.3326560854911804, |
|
"logps/chosen": -360.01678466796875, |
|
"logps/rejected": -418.79473876953125, |
|
"loss": 0.5304, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.9775301218032837, |
|
"rewards/margins": 0.6676076054573059, |
|
"rewards/rejected": -1.6451377868652344, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.175087540986283e-06, |
|
"logits/chosen": -0.41058802604675293, |
|
"logits/rejected": -0.10796304047107697, |
|
"logps/chosen": -394.26788330078125, |
|
"logps/rejected": -391.35235595703125, |
|
"loss": 0.5173, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.0439192056655884, |
|
"rewards/margins": 0.5056353211402893, |
|
"rewards/rejected": -1.5495543479919434, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.140913261017382e-06, |
|
"logits/chosen": -0.2698209285736084, |
|
"logits/rejected": -0.15175169706344604, |
|
"logps/chosen": -360.52435302734375, |
|
"logps/rejected": -470.09796142578125, |
|
"loss": 0.5291, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.0439623594284058, |
|
"rewards/margins": 0.9672080278396606, |
|
"rewards/rejected": -2.0111701488494873, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.106191566415412e-06, |
|
"logits/chosen": 0.04819805547595024, |
|
"logits/rejected": 0.37417465448379517, |
|
"logps/chosen": -363.9477233886719, |
|
"logps/rejected": -359.45098876953125, |
|
"loss": 0.525, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.1165151596069336, |
|
"rewards/margins": 0.47697582840919495, |
|
"rewards/rejected": -1.5934909582138062, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.070934040463999e-06, |
|
"logits/chosen": -0.24051837623119354, |
|
"logits/rejected": -0.013500308617949486, |
|
"logps/chosen": -402.32135009765625, |
|
"logps/rejected": -470.900390625, |
|
"loss": 0.5275, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.8973871469497681, |
|
"rewards/margins": 0.8212674260139465, |
|
"rewards/rejected": -1.7186546325683594, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.035152445202057e-06, |
|
"logits/chosen": -0.12294012308120728, |
|
"logits/rejected": 0.18862707912921906, |
|
"logps/chosen": -334.65838623046875, |
|
"logps/rejected": -339.1643981933594, |
|
"loss": 0.5649, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.870491623878479, |
|
"rewards/margins": 0.3955751061439514, |
|
"rewards/rejected": -1.2660666704177856, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.998858717499931e-06, |
|
"logits/chosen": -0.14695128798484802, |
|
"logits/rejected": -0.06862150132656097, |
|
"logps/chosen": -320.9615783691406, |
|
"logps/rejected": -379.9163513183594, |
|
"loss": 0.5235, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.0008286237716675, |
|
"rewards/margins": 0.7716361880302429, |
|
"rewards/rejected": -1.7724647521972656, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_logits/chosen": -0.13744021952152252, |
|
"eval_logits/rejected": 0.22457998991012573, |
|
"eval_logps/chosen": -364.7436218261719, |
|
"eval_logps/rejected": -428.6434326171875, |
|
"eval_loss": 0.5304214358329773, |
|
"eval_rewards/accuracies": 0.74609375, |
|
"eval_rewards/chosen": -1.0330617427825928, |
|
"eval_rewards/margins": 0.7950804233551025, |
|
"eval_rewards/rejected": -1.8281422853469849, |
|
"eval_runtime": 99.8419, |
|
"eval_samples_per_second": 20.032, |
|
"eval_steps_per_second": 0.321, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.9620649650772075e-06, |
|
"logits/chosen": -0.4862421154975891, |
|
"logits/rejected": -0.31390875577926636, |
|
"logps/chosen": -370.2496643066406, |
|
"logps/rejected": -415.6681213378906, |
|
"loss": 0.6052, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.104994535446167, |
|
"rewards/margins": 0.5711352229118347, |
|
"rewards/rejected": -1.6761295795440674, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.924783462463541e-06, |
|
"logits/chosen": -0.8888682126998901, |
|
"logits/rejected": -0.5112391710281372, |
|
"logps/chosen": -419.72259521484375, |
|
"logps/rejected": -429.8675842285156, |
|
"loss": 0.5374, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.8601617813110352, |
|
"rewards/margins": 0.8069065809249878, |
|
"rewards/rejected": -1.6670682430267334, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.887026646903824e-06, |
|
"logits/chosen": -0.5013810992240906, |
|
"logits/rejected": -0.2705232799053192, |
|
"logps/chosen": -308.744140625, |
|
"logps/rejected": -341.9669494628906, |
|
"loss": 0.5341, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.1102688312530518, |
|
"rewards/margins": 0.56825852394104, |
|
"rewards/rejected": -1.6785274744033813, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.8488071142090745e-06, |
|
"logits/chosen": -0.09252335131168365, |
|
"logits/rejected": 0.34569135308265686, |
|
"logps/chosen": -395.5841064453125, |
|
"logps/rejected": -367.30340576171875, |
|
"loss": 0.5889, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.5202785730361938, |
|
"rewards/margins": 0.374620258808136, |
|
"rewards/rejected": -1.8948990106582642, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.8101376145544326e-06, |
|
"logits/chosen": -0.28637129068374634, |
|
"logits/rejected": 0.01174071989953518, |
|
"logps/chosen": -399.59100341796875, |
|
"logps/rejected": -440.2303161621094, |
|
"loss": 0.5645, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.3130629062652588, |
|
"rewards/margins": 0.7463555335998535, |
|
"rewards/rejected": -2.0594184398651123, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.771031048225653e-06, |
|
"logits/chosen": -0.419985294342041, |
|
"logits/rejected": 0.13889722526073456, |
|
"logps/chosen": -446.94354248046875, |
|
"logps/rejected": -477.18621826171875, |
|
"loss": 0.5789, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.3231090307235718, |
|
"rewards/margins": 0.6492261290550232, |
|
"rewards/rejected": -1.9723352193832397, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.7315004613155203e-06, |
|
"logits/chosen": -0.1880003809928894, |
|
"logits/rejected": 0.058419276028871536, |
|
"logps/chosen": -360.2503356933594, |
|
"logps/rejected": -452.9226989746094, |
|
"loss": 0.5476, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.9840580821037292, |
|
"rewards/margins": 0.7168115377426147, |
|
"rewards/rejected": -1.7008697986602783, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.6915590413716314e-06, |
|
"logits/chosen": -0.15248139202594757, |
|
"logits/rejected": -0.13186867535114288, |
|
"logps/chosen": -369.7059631347656, |
|
"logps/rejected": -392.6582946777344, |
|
"loss": 0.5676, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -1.1791012287139893, |
|
"rewards/margins": 0.19577212631702423, |
|
"rewards/rejected": -1.374873399734497, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.651220112996974e-06, |
|
"logits/chosen": -0.29559603333473206, |
|
"logits/rejected": 0.2896161377429962, |
|
"logps/chosen": -366.8363952636719, |
|
"logps/rejected": -398.7510070800781, |
|
"loss": 0.5805, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.1136764287948608, |
|
"rewards/margins": 0.7713703513145447, |
|
"rewards/rejected": -1.8850466012954712, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.6104971334047954e-06, |
|
"logits/chosen": -0.414069265127182, |
|
"logits/rejected": 0.013663103803992271, |
|
"logps/chosen": -333.1685791015625, |
|
"logps/rejected": -332.27154541015625, |
|
"loss": 0.5241, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.1102468967437744, |
|
"rewards/margins": 0.4005044400691986, |
|
"rewards/rejected": -1.510751485824585, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_logits/chosen": -0.043191783130168915, |
|
"eval_logits/rejected": 0.3361242711544037, |
|
"eval_logps/chosen": -359.03619384765625, |
|
"eval_logps/rejected": -416.9325256347656, |
|
"eval_loss": 0.5276142954826355, |
|
"eval_rewards/accuracies": 0.7578125, |
|
"eval_rewards/chosen": -0.9759873151779175, |
|
"eval_rewards/margins": 0.7350461483001709, |
|
"eval_rewards/rejected": -1.711033582687378, |
|
"eval_runtime": 100.0001, |
|
"eval_samples_per_second": 20.0, |
|
"eval_steps_per_second": 0.32, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.5694036879292278e-06, |
|
"logits/chosen": 0.23451080918312073, |
|
"logits/rejected": 0.3333522379398346, |
|
"logps/chosen": -328.44879150390625, |
|
"logps/rejected": -390.59796142578125, |
|
"loss": 0.5168, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.1335585117340088, |
|
"rewards/margins": 0.44246071577072144, |
|
"rewards/rejected": -1.576019525527954, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.527953485493168e-06, |
|
"logits/chosen": -0.14245302975177765, |
|
"logits/rejected": 0.7153403162956238, |
|
"logps/chosen": -433.4105529785156, |
|
"logps/rejected": -413.8798828125, |
|
"loss": 0.5277, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.0100504159927368, |
|
"rewards/margins": 0.694607138633728, |
|
"rewards/rejected": -1.7046575546264648, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.4861603540349343e-06, |
|
"logits/chosen": -0.15209975838661194, |
|
"logits/rejected": 0.1794484406709671, |
|
"logps/chosen": -386.0712890625, |
|
"logps/rejected": -396.65350341796875, |
|
"loss": 0.5623, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.0117709636688232, |
|
"rewards/margins": 0.4693912863731384, |
|
"rewards/rejected": -1.4811620712280273, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.4440382358952116e-06, |
|
"logits/chosen": -0.18714885413646698, |
|
"logits/rejected": 0.25312191247940063, |
|
"logps/chosen": -389.5378723144531, |
|
"logps/rejected": -420.5859375, |
|
"loss": 0.5739, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.263262152671814, |
|
"rewards/margins": 0.639037013053894, |
|
"rewards/rejected": -1.902299165725708, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.4016011831658413e-06, |
|
"logits/chosen": -0.19216255843639374, |
|
"logits/rejected": 0.2039567232131958, |
|
"logps/chosen": -401.4894104003906, |
|
"logps/rejected": -410.1803283691406, |
|
"loss": 0.5573, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.3493444919586182, |
|
"rewards/margins": 0.5228354334831238, |
|
"rewards/rejected": -1.8721797466278076, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.3588633530019872e-06, |
|
"logits/chosen": 0.14342132210731506, |
|
"logits/rejected": 0.7742571234703064, |
|
"logps/chosen": -423.52508544921875, |
|
"logps/rejected": -430.2787170410156, |
|
"loss": 0.5323, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.1869566440582275, |
|
"rewards/margins": 0.7774726748466492, |
|
"rewards/rejected": -1.9644291400909424, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.3158390028992613e-06, |
|
"logits/chosen": 0.427659273147583, |
|
"logits/rejected": 0.7266635894775391, |
|
"logps/chosen": -395.55694580078125, |
|
"logps/rejected": -439.48974609375, |
|
"loss": 0.5392, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.3343242406845093, |
|
"rewards/margins": 0.7467038035392761, |
|
"rewards/rejected": -2.0810282230377197, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.272542485937369e-06, |
|
"logits/chosen": 0.6910224556922913, |
|
"logits/rejected": 0.5784742832183838, |
|
"logps/chosen": -356.70440673828125, |
|
"logps/rejected": -424.77606201171875, |
|
"loss": 0.4842, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.0763694047927856, |
|
"rewards/margins": 0.7257629632949829, |
|
"rewards/rejected": -1.8021323680877686, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.228988245991869e-06, |
|
"logits/chosen": 0.2895136773586273, |
|
"logits/rejected": 0.8819330930709839, |
|
"logps/chosen": -408.9416809082031, |
|
"logps/rejected": -470.29168701171875, |
|
"loss": 0.546, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.4385627508163452, |
|
"rewards/margins": 0.8292133212089539, |
|
"rewards/rejected": -2.2677762508392334, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.185190812915646e-06, |
|
"logits/chosen": 0.31424444913864136, |
|
"logits/rejected": 1.1830999851226807, |
|
"logps/chosen": -399.9066162109375, |
|
"logps/rejected": -402.701171875, |
|
"loss": 0.5332, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.3664418458938599, |
|
"rewards/margins": 0.7213462591171265, |
|
"rewards/rejected": -2.0877881050109863, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_logits/chosen": 0.46808481216430664, |
|
"eval_logits/rejected": 0.89262855052948, |
|
"eval_logps/chosen": -385.51177978515625, |
|
"eval_logps/rejected": -452.3992919921875, |
|
"eval_loss": 0.5257142782211304, |
|
"eval_rewards/accuracies": 0.75, |
|
"eval_rewards/chosen": -1.2407435178756714, |
|
"eval_rewards/margins": 0.8249573707580566, |
|
"eval_rewards/rejected": -2.0657010078430176, |
|
"eval_runtime": 99.7208, |
|
"eval_samples_per_second": 20.056, |
|
"eval_steps_per_second": 0.321, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.141164797691697e-06, |
|
"logits/chosen": 0.3064689636230469, |
|
"logits/rejected": 1.0518977642059326, |
|
"logps/chosen": -415.59722900390625, |
|
"logps/rejected": -537.9058837890625, |
|
"loss": 0.5208, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.5367953777313232, |
|
"rewards/margins": 0.9906780123710632, |
|
"rewards/rejected": -2.527473211288452, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.0969248875588547e-06, |
|
"logits/chosen": 0.3721020817756653, |
|
"logits/rejected": 0.9606655836105347, |
|
"logps/chosen": -354.6772155761719, |
|
"logps/rejected": -410.63189697265625, |
|
"loss": 0.5864, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.3338547945022583, |
|
"rewards/margins": 0.5358750224113464, |
|
"rewards/rejected": -1.8697296380996704, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3.0524858411120745e-06, |
|
"logits/chosen": 0.24086757004261017, |
|
"logits/rejected": 0.5955575108528137, |
|
"logps/chosen": -439.4383239746094, |
|
"logps/rejected": -438.33892822265625, |
|
"loss": 0.4967, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.1810623407363892, |
|
"rewards/margins": 0.7811315059661865, |
|
"rewards/rejected": -1.9621938467025757, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3.007862483378906e-06, |
|
"logits/chosen": -0.363764226436615, |
|
"logits/rejected": 0.19057665765285492, |
|
"logps/chosen": -405.48583984375, |
|
"logps/rejected": -428.91021728515625, |
|
"loss": 0.5547, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.2349156141281128, |
|
"rewards/margins": 0.433199018239975, |
|
"rewards/rejected": -1.6681146621704102, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.963069700873817e-06, |
|
"logits/chosen": -0.17086614668369293, |
|
"logits/rejected": 0.29652929306030273, |
|
"logps/chosen": -400.3623962402344, |
|
"logps/rejected": -419.65692138671875, |
|
"loss": 0.5162, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.1464965343475342, |
|
"rewards/margins": 0.6613754630088806, |
|
"rewards/rejected": -1.8078720569610596, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.9181224366319947e-06, |
|
"logits/chosen": 0.455890953540802, |
|
"logits/rejected": 1.111419439315796, |
|
"logps/chosen": -390.26019287109375, |
|
"logps/rejected": -440.094970703125, |
|
"loss": 0.5123, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.5266087055206299, |
|
"rewards/margins": 0.7709134221076965, |
|
"rewards/rejected": -2.2975220680236816, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.873035685224297e-06, |
|
"logits/chosen": 0.41874760389328003, |
|
"logits/rejected": 0.5401151180267334, |
|
"logps/chosen": -409.41815185546875, |
|
"logps/rejected": -519.6390380859375, |
|
"loss": 0.5646, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.4578272104263306, |
|
"rewards/margins": 0.6119868159294128, |
|
"rewards/rejected": -2.0698142051696777, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.827824487755007e-06, |
|
"logits/chosen": 0.3823867440223694, |
|
"logits/rejected": 0.33489638566970825, |
|
"logps/chosen": -424.8614807128906, |
|
"logps/rejected": -423.9286193847656, |
|
"loss": 0.5896, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.6043908596038818, |
|
"rewards/margins": 0.10235877335071564, |
|
"rewards/rejected": -1.706749677658081, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.7825039268440655e-06, |
|
"logits/chosen": 0.1400536596775055, |
|
"logits/rejected": 0.41318145394325256, |
|
"logps/chosen": -332.98907470703125, |
|
"logps/rejected": -411.08929443359375, |
|
"loss": 0.5605, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.362296462059021, |
|
"rewards/margins": 0.7367597818374634, |
|
"rewards/rejected": -2.0990562438964844, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.7370891215954572e-06, |
|
"logits/chosen": 0.009750944562256336, |
|
"logits/rejected": 0.4523545205593109, |
|
"logps/chosen": -405.9288635253906, |
|
"logps/rejected": -448.93890380859375, |
|
"loss": 0.531, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.4209400415420532, |
|
"rewards/margins": 0.5459128618240356, |
|
"rewards/rejected": -1.9668527841567993, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_logits/chosen": -0.0765841156244278, |
|
"eval_logits/rejected": 0.2824910283088684, |
|
"eval_logps/chosen": -374.2120361328125, |
|
"eval_logps/rejected": -431.3622741699219, |
|
"eval_loss": 0.5232118964195251, |
|
"eval_rewards/accuracies": 0.74609375, |
|
"eval_rewards/chosen": -1.1277458667755127, |
|
"eval_rewards/margins": 0.7275847792625427, |
|
"eval_rewards/rejected": -1.8553305864334106, |
|
"eval_runtime": 99.9876, |
|
"eval_samples_per_second": 20.002, |
|
"eval_steps_per_second": 0.32, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.6915952225534187e-06, |
|
"logits/chosen": -0.0979020744562149, |
|
"logits/rejected": 0.344775527715683, |
|
"logps/chosen": -407.2764892578125, |
|
"logps/rejected": -420.5032653808594, |
|
"loss": 0.5258, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.3287527561187744, |
|
"rewards/margins": 0.5185291171073914, |
|
"rewards/rejected": -1.847281813621521, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.6460374066481654e-06, |
|
"logits/chosen": 0.06318001449108124, |
|
"logits/rejected": 0.052066363394260406, |
|
"logps/chosen": -414.02142333984375, |
|
"logps/rejected": -435.2493591308594, |
|
"loss": 0.5507, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -1.5719435214996338, |
|
"rewards/margins": 0.11801069974899292, |
|
"rewards/rejected": -1.689954161643982, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.6004308721328096e-06, |
|
"logits/chosen": -0.09392335265874863, |
|
"logits/rejected": 0.6331489682197571, |
|
"logps/chosen": -454.8587341308594, |
|
"logps/rejected": -456.37518310546875, |
|
"loss": 0.5531, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.4329391717910767, |
|
"rewards/margins": 0.73013836145401, |
|
"rewards/rejected": -2.1630773544311523, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.5547908335131704e-06, |
|
"logits/chosen": 0.029746657237410545, |
|
"logits/rejected": 0.5345848798751831, |
|
"logps/chosen": -442.5694274902344, |
|
"logps/rejected": -484.28472900390625, |
|
"loss": 0.5263, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.2879016399383545, |
|
"rewards/margins": 0.9445211291313171, |
|
"rewards/rejected": -2.2324228286743164, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.5091325164721574e-06, |
|
"logits/chosen": -0.13794167339801788, |
|
"logits/rejected": 0.2926728129386902, |
|
"logps/chosen": -406.499267578125, |
|
"logps/rejected": -446.9866638183594, |
|
"loss": 0.5208, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.321260690689087, |
|
"rewards/margins": 0.577609121799469, |
|
"rewards/rejected": -1.8988698720932007, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.463471152790427e-06, |
|
"logits/chosen": 0.5882676243782043, |
|
"logits/rejected": 0.7257478833198547, |
|
"logps/chosen": -323.394287109375, |
|
"logps/rejected": -373.9036560058594, |
|
"loss": 0.5704, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.2941257953643799, |
|
"rewards/margins": 0.4104182720184326, |
|
"rewards/rejected": -1.7045438289642334, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.417821975265009e-06, |
|
"logits/chosen": 0.3513713479042053, |
|
"logits/rejected": 0.9065437316894531, |
|
"logps/chosen": -369.2812194824219, |
|
"logps/rejected": -405.2218322753906, |
|
"loss": 0.529, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.17937433719635, |
|
"rewards/margins": 0.8810786008834839, |
|
"rewards/rejected": -2.060452699661255, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.3722002126275826e-06, |
|
"logits/chosen": 0.16156932711601257, |
|
"logits/rejected": 0.3264058828353882, |
|
"logps/chosen": -362.4791564941406, |
|
"logps/rejected": -458.8309020996094, |
|
"loss": 0.5004, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.1431844234466553, |
|
"rewards/margins": 0.9749501347541809, |
|
"rewards/rejected": -2.1181344985961914, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.3266210844641242e-06, |
|
"logits/chosen": -0.016268134117126465, |
|
"logits/rejected": 0.5968390703201294, |
|
"logps/chosen": -462.67864990234375, |
|
"logps/rejected": -465.4684143066406, |
|
"loss": 0.5165, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.264756441116333, |
|
"rewards/margins": 0.684155285358429, |
|
"rewards/rejected": -1.9489116668701172, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.2810997961375943e-06, |
|
"logits/chosen": 0.6272622346878052, |
|
"logits/rejected": 1.0814670324325562, |
|
"logps/chosen": -317.378662109375, |
|
"logps/rejected": -359.07080078125, |
|
"loss": 0.4864, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.1075953245162964, |
|
"rewards/margins": 0.677285373210907, |
|
"rewards/rejected": -1.7848806381225586, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_logits/chosen": 0.740875244140625, |
|
"eval_logits/rejected": 1.181448221206665, |
|
"eval_logps/chosen": -378.1357727050781, |
|
"eval_logps/rejected": -444.7675476074219, |
|
"eval_loss": 0.5172202587127686, |
|
"eval_rewards/accuracies": 0.75, |
|
"eval_rewards/chosen": -1.1669834852218628, |
|
"eval_rewards/margins": 0.8224003314971924, |
|
"eval_rewards/rejected": -1.9893838167190552, |
|
"eval_runtime": 99.7722, |
|
"eval_samples_per_second": 20.046, |
|
"eval_steps_per_second": 0.321, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.2356515337153726e-06, |
|
"logits/chosen": 0.4388123154640198, |
|
"logits/rejected": 0.5722852349281311, |
|
"logps/chosen": -431.4241638183594, |
|
"logps/rejected": -424.3793029785156, |
|
"loss": 0.5682, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -1.5110528469085693, |
|
"rewards/margins": 0.26485976576805115, |
|
"rewards/rejected": -1.7759126424789429, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.19029145890313e-06, |
|
"logits/chosen": 0.6199942827224731, |
|
"logits/rejected": 0.3805321156978607, |
|
"logps/chosen": -441.6695861816406, |
|
"logps/rejected": -530.5279541015625, |
|
"loss": 0.5617, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.2569580078125, |
|
"rewards/margins": 0.8591600656509399, |
|
"rewards/rejected": -2.1161179542541504, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.145034703986834e-06, |
|
"logits/chosen": 0.03130330145359039, |
|
"logits/rejected": 0.9182190895080566, |
|
"logps/chosen": -422.25433349609375, |
|
"logps/rejected": -398.5935363769531, |
|
"loss": 0.5214, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.061409592628479, |
|
"rewards/margins": 0.7287064790725708, |
|
"rewards/rejected": -1.7901159524917603, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.099896366784554e-06, |
|
"logits/chosen": 0.569893479347229, |
|
"logits/rejected": 0.5035176277160645, |
|
"logps/chosen": -370.8650817871094, |
|
"logps/rejected": -492.43206787109375, |
|
"loss": 0.519, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.164158582687378, |
|
"rewards/margins": 0.7605217695236206, |
|
"rewards/rejected": -1.9246803522109985, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.054891505609782e-06, |
|
"logits/chosen": 0.6834603548049927, |
|
"logits/rejected": 0.6329541802406311, |
|
"logps/chosen": -374.13568115234375, |
|
"logps/rejected": -448.3985290527344, |
|
"loss": 0.5592, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -1.1955368518829346, |
|
"rewards/margins": 0.5105778574943542, |
|
"rewards/rejected": -1.7061145305633545, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 2.010035134247922e-06, |
|
"logits/chosen": 0.7083349227905273, |
|
"logits/rejected": 1.068521499633789, |
|
"logps/chosen": -379.41778564453125, |
|
"logps/rejected": -438.62921142578125, |
|
"loss": 0.5196, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.0824288129806519, |
|
"rewards/margins": 0.8338676691055298, |
|
"rewards/rejected": -1.916296362876892, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.9653422169476354e-06, |
|
"logits/chosen": 0.44568267464637756, |
|
"logits/rejected": 0.703025221824646, |
|
"logps/chosen": -373.59710693359375, |
|
"logps/rejected": -427.1033630371094, |
|
"loss": 0.5485, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.987470269203186, |
|
"rewards/margins": 0.8238475918769836, |
|
"rewards/rejected": -1.811317801475525, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.9208276634287143e-06, |
|
"logits/chosen": 0.3246023654937744, |
|
"logits/rejected": 1.222904920578003, |
|
"logps/chosen": -463.6178283691406, |
|
"logps/rejected": -413.4266662597656, |
|
"loss": 0.5061, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.2248636484146118, |
|
"rewards/margins": 0.7811811566352844, |
|
"rewards/rejected": -2.006044626235962, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.8765063239081442e-06, |
|
"logits/chosen": 0.8462961912155151, |
|
"logits/rejected": 1.0419355630874634, |
|
"logps/chosen": -394.54376220703125, |
|
"logps/rejected": -464.9222106933594, |
|
"loss": 0.5619, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.465216875076294, |
|
"rewards/margins": 0.712496817111969, |
|
"rewards/rejected": -2.177713632583618, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.832392984146018e-06, |
|
"logits/chosen": 0.9549469947814941, |
|
"logits/rejected": 1.0832746028900146, |
|
"logps/chosen": -353.7349853515625, |
|
"logps/rejected": -435.2191467285156, |
|
"loss": 0.5467, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.5669246912002563, |
|
"rewards/margins": 0.64616858959198, |
|
"rewards/rejected": -2.2130932807922363, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_logits/chosen": 0.8593495488166809, |
|
"eval_logits/rejected": 1.3020421266555786, |
|
"eval_logps/chosen": -397.76275634765625, |
|
"eval_logps/rejected": -462.7305603027344, |
|
"eval_loss": 0.5195906758308411, |
|
"eval_rewards/accuracies": 0.73828125, |
|
"eval_rewards/chosen": -1.3632532358169556, |
|
"eval_rewards/margins": 0.8057605624198914, |
|
"eval_rewards/rejected": -2.169013738632202, |
|
"eval_runtime": 99.9113, |
|
"eval_samples_per_second": 20.018, |
|
"eval_steps_per_second": 0.32, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.7885023605129532e-06, |
|
"logits/chosen": 0.4313087463378906, |
|
"logits/rejected": 0.9333802461624146, |
|
"logps/chosen": -355.1097717285156, |
|
"logps/rejected": -417.61810302734375, |
|
"loss": 0.534, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.317725419998169, |
|
"rewards/margins": 0.7560078501701355, |
|
"rewards/rejected": -2.073733329772949, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.7448490950806552e-06, |
|
"logits/chosen": 0.38834458589553833, |
|
"logits/rejected": 0.989336371421814, |
|
"logps/chosen": -413.79473876953125, |
|
"logps/rejected": -411.78973388671875, |
|
"loss": 0.5136, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.1249158382415771, |
|
"rewards/margins": 0.6000865697860718, |
|
"rewards/rejected": -1.7250025272369385, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.7014477507372712e-06, |
|
"logits/chosen": 0.537342369556427, |
|
"logits/rejected": 0.595427930355072, |
|
"logps/chosen": -365.5661315917969, |
|
"logps/rejected": -432.7490234375, |
|
"loss": 0.5302, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.1988743543624878, |
|
"rewards/margins": 0.6834364533424377, |
|
"rewards/rejected": -1.8823106288909912, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.6583128063291576e-06, |
|
"logits/chosen": -0.22250571846961975, |
|
"logits/rejected": 0.4327928125858307, |
|
"logps/chosen": -429.69677734375, |
|
"logps/rejected": -486.3809509277344, |
|
"loss": 0.4874, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.2788269519805908, |
|
"rewards/margins": 0.803261935710907, |
|
"rewards/rejected": -2.0820889472961426, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.6154586518306798e-06, |
|
"logits/chosen": 0.827475905418396, |
|
"logits/rejected": 1.162104845046997, |
|
"logps/chosen": -395.8812255859375, |
|
"logps/rejected": -423.1787109375, |
|
"loss": 0.5493, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.3758419752120972, |
|
"rewards/margins": 0.5010850429534912, |
|
"rewards/rejected": -1.876927137374878, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.5728995835436711e-06, |
|
"logits/chosen": 0.7995271682739258, |
|
"logits/rejected": 1.8586130142211914, |
|
"logps/chosen": -387.32623291015625, |
|
"logps/rejected": -397.79827880859375, |
|
"loss": 0.5026, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.1881077289581299, |
|
"rewards/margins": 1.0256298780441284, |
|
"rewards/rejected": -2.2137374877929688, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.5306497993281312e-06, |
|
"logits/chosen": 1.029513955116272, |
|
"logits/rejected": 1.2201731204986572, |
|
"logps/chosen": -383.57476806640625, |
|
"logps/rejected": -387.10015869140625, |
|
"loss": 0.5487, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.4437286853790283, |
|
"rewards/margins": 0.5175876021385193, |
|
"rewards/rejected": -1.9613163471221924, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.488723393865766e-06, |
|
"logits/chosen": 0.6667646765708923, |
|
"logits/rejected": 1.68923020362854, |
|
"logps/chosen": -444.7820739746094, |
|
"logps/rejected": -444.03375244140625, |
|
"loss": 0.5775, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.6295793056488037, |
|
"rewards/margins": 0.6660442352294922, |
|
"rewards/rejected": -2.295623540878296, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.4471343539579562e-06, |
|
"logits/chosen": 0.24659457802772522, |
|
"logits/rejected": 1.137135624885559, |
|
"logps/chosen": -447.05303955078125, |
|
"logps/rejected": -455.335693359375, |
|
"loss": 0.5527, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.5161960124969482, |
|
"rewards/margins": 0.6156362295150757, |
|
"rewards/rejected": -2.1318325996398926, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.4058965538597033e-06, |
|
"logits/chosen": 0.5961114168167114, |
|
"logits/rejected": 0.9649273753166199, |
|
"logps/chosen": -414.1187438964844, |
|
"logps/rejected": -474.6805725097656, |
|
"loss": 0.5125, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.1609492301940918, |
|
"rewards/margins": 0.921238124370575, |
|
"rewards/rejected": -2.0821871757507324, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_logits/chosen": 0.6638839840888977, |
|
"eval_logits/rejected": 1.1044923067092896, |
|
"eval_logps/chosen": -381.7656555175781, |
|
"eval_logps/rejected": -446.24365234375, |
|
"eval_loss": 0.5179304480552673, |
|
"eval_rewards/accuracies": 0.7421875, |
|
"eval_rewards/chosen": -1.203282117843628, |
|
"eval_rewards/margins": 0.8008626103401184, |
|
"eval_rewards/rejected": -2.0041446685791016, |
|
"eval_runtime": 99.7889, |
|
"eval_samples_per_second": 20.042, |
|
"eval_steps_per_second": 0.321, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.3650237506511333e-06, |
|
"logits/chosen": 0.17492257058620453, |
|
"logits/rejected": 0.6054025888442993, |
|
"logps/chosen": -375.28460693359375, |
|
"logps/rejected": -439.0665588378906, |
|
"loss": 0.4896, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.1908279657363892, |
|
"rewards/margins": 0.9128111600875854, |
|
"rewards/rejected": -2.1036391258239746, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.324529579648079e-06, |
|
"logits/chosen": 0.2962098717689514, |
|
"logits/rejected": 0.7064357995986938, |
|
"logps/chosen": -435.1327209472656, |
|
"logps/rejected": -473.338623046875, |
|
"loss": 0.5331, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.3846468925476074, |
|
"rewards/margins": 0.5992780923843384, |
|
"rewards/rejected": -1.9839251041412354, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.2844275498532955e-06, |
|
"logits/chosen": 0.7400895953178406, |
|
"logits/rejected": 0.7335311770439148, |
|
"logps/chosen": -384.15252685546875, |
|
"logps/rejected": -455.39276123046875, |
|
"loss": 0.5029, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.3056604862213135, |
|
"rewards/margins": 0.7046310901641846, |
|
"rewards/rejected": -2.010291576385498, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.244731039449802e-06, |
|
"logits/chosen": 0.6501943469047546, |
|
"logits/rejected": 1.1686570644378662, |
|
"logps/chosen": -431.3033142089844, |
|
"logps/rejected": -449.19207763671875, |
|
"loss": 0.4817, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.0331215858459473, |
|
"rewards/margins": 1.0517748594284058, |
|
"rewards/rejected": -2.0848963260650635, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.2054532913378738e-06, |
|
"logits/chosen": 0.44238996505737305, |
|
"logits/rejected": 0.9898010492324829, |
|
"logps/chosen": -413.67388916015625, |
|
"logps/rejected": -463.6742248535156, |
|
"loss": 0.5009, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.4910118579864502, |
|
"rewards/margins": 0.8411634564399719, |
|
"rewards/rejected": -2.3321752548217773, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.1666074087171628e-06, |
|
"logits/chosen": 0.43889451026916504, |
|
"logits/rejected": 1.1597634553909302, |
|
"logps/chosen": -416.033203125, |
|
"logps/rejected": -421.3883361816406, |
|
"loss": 0.5046, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.1603151559829712, |
|
"rewards/margins": 0.6429589986801147, |
|
"rewards/rejected": -1.803274154663086, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.1282063507154214e-06, |
|
"logits/chosen": 0.9281649589538574, |
|
"logits/rejected": 0.7970014810562134, |
|
"logps/chosen": -383.08203125, |
|
"logps/rejected": -429.7383728027344, |
|
"loss": 0.521, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.6160444021224976, |
|
"rewards/margins": 0.5065132975578308, |
|
"rewards/rejected": -2.1225578784942627, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.0902629280652933e-06, |
|
"logits/chosen": 0.7554650902748108, |
|
"logits/rejected": 1.4683620929718018, |
|
"logps/chosen": -424.9288024902344, |
|
"logps/rejected": -410.4168395996094, |
|
"loss": 0.5506, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.6847953796386719, |
|
"rewards/margins": 0.4181737005710602, |
|
"rewards/rejected": -2.102969169616699, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.052789798830601e-06, |
|
"logits/chosen": 0.642483651638031, |
|
"logits/rejected": 0.7909359931945801, |
|
"logps/chosen": -461.81671142578125, |
|
"logps/rejected": -548.4481201171875, |
|
"loss": 0.4936, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.4545402526855469, |
|
"rewards/margins": 1.028944730758667, |
|
"rewards/rejected": -2.483484983444214, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.0157994641835737e-06, |
|
"logits/chosen": 0.5413963198661804, |
|
"logits/rejected": 1.2730069160461426, |
|
"logps/chosen": -385.44110107421875, |
|
"logps/rejected": -425.7725524902344, |
|
"loss": 0.4881, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.3209706544876099, |
|
"rewards/margins": 0.7255110144615173, |
|
"rewards/rejected": -2.0464818477630615, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_logits/chosen": 0.7444608807563782, |
|
"eval_logits/rejected": 1.1890881061553955, |
|
"eval_logps/chosen": -389.3553771972656, |
|
"eval_logps/rejected": -459.1728210449219, |
|
"eval_loss": 0.5158318877220154, |
|
"eval_rewards/accuracies": 0.75390625, |
|
"eval_rewards/chosen": -1.2791798114776611, |
|
"eval_rewards/margins": 0.8542567491531372, |
|
"eval_rewards/rejected": -2.133436679840088, |
|
"eval_runtime": 99.9692, |
|
"eval_samples_per_second": 20.006, |
|
"eval_steps_per_second": 0.32, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.793042642344122e-07, |
|
"logits/chosen": 0.7017895579338074, |
|
"logits/rejected": 0.7292946577072144, |
|
"logps/chosen": -396.1260986328125, |
|
"logps/rejected": -448.18133544921875, |
|
"loss": 0.5094, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.3052157163619995, |
|
"rewards/margins": 0.5403749346733093, |
|
"rewards/rejected": -1.845590591430664, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.433163739145773e-07, |
|
"logits/chosen": 0.32617324590682983, |
|
"logits/rejected": 0.8873109817504883, |
|
"logps/chosen": -385.48651123046875, |
|
"logps/rejected": -387.12322998046875, |
|
"loss": 0.5213, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.2667211294174194, |
|
"rewards/margins": 0.7441008687019348, |
|
"rewards/rejected": -2.01082181930542, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 9.078477989151957e-07, |
|
"logits/chosen": 0.4015222191810608, |
|
"logits/rejected": 0.17243003845214844, |
|
"logps/chosen": -413.64813232421875, |
|
"logps/rejected": -495.1240234375, |
|
"loss": 0.492, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.3316669464111328, |
|
"rewards/margins": 0.6485150456428528, |
|
"rewards/rejected": -1.9801819324493408, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 8.729103716819113e-07, |
|
"logits/chosen": 0.7524212598800659, |
|
"logits/rejected": 1.1642448902130127, |
|
"logps/chosen": -382.9913635253906, |
|
"logps/rejected": -450.04986572265625, |
|
"loss": 0.5076, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.3288081884384155, |
|
"rewards/margins": 0.8739243745803833, |
|
"rewards/rejected": -2.202732563018799, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 8.385157474675415e-07, |
|
"logits/chosen": 0.31630071997642517, |
|
"logits/rejected": 0.6081461310386658, |
|
"logps/chosen": -395.0060119628906, |
|
"logps/rejected": -481.4752502441406, |
|
"loss": 0.5291, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.4594825506210327, |
|
"rewards/margins": 0.6843896508216858, |
|
"rewards/rejected": -2.1438724994659424, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 8.046754004438428e-07, |
|
"logits/chosen": 0.766178548336029, |
|
"logits/rejected": 1.4351043701171875, |
|
"logps/chosen": -365.0591735839844, |
|
"logps/rejected": -402.4694519042969, |
|
"loss": 0.5532, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.0528379678726196, |
|
"rewards/margins": 0.6676140427589417, |
|
"rewards/rejected": -1.720452070236206, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.714006198736837e-07, |
|
"logits/chosen": 1.2198323011398315, |
|
"logits/rejected": 1.0236773490905762, |
|
"logps/chosen": -340.1226501464844, |
|
"logps/rejected": -483.118896484375, |
|
"loss": 0.48, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.4166100025177002, |
|
"rewards/margins": 0.9187544584274292, |
|
"rewards/rejected": -2.33536434173584, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.387025063449082e-07, |
|
"logits/chosen": 0.8669508099555969, |
|
"logits/rejected": 1.123145341873169, |
|
"logps/chosen": -364.0406799316406, |
|
"logps/rejected": -466.4491271972656, |
|
"loss": 0.4846, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.308837652206421, |
|
"rewards/margins": 0.9366034269332886, |
|
"rewards/rejected": -2.245441198348999, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 7.065919680671357e-07, |
|
"logits/chosen": 1.0810492038726807, |
|
"logits/rejected": 1.4076943397521973, |
|
"logps/chosen": -371.6559753417969, |
|
"logps/rejected": -437.2684631347656, |
|
"loss": 0.5433, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.464137315750122, |
|
"rewards/margins": 0.6649857759475708, |
|
"rewards/rejected": -2.1291232109069824, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 6.750797172327442e-07, |
|
"logits/chosen": 0.6836698651313782, |
|
"logits/rejected": 1.290857195854187, |
|
"logps/chosen": -459.5965270996094, |
|
"logps/rejected": -491.36322021484375, |
|
"loss": 0.5273, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.3482565879821777, |
|
"rewards/margins": 0.5898759365081787, |
|
"rewards/rejected": -1.9381325244903564, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_logits/chosen": 0.7972918748855591, |
|
"eval_logits/rejected": 1.2533293962478638, |
|
"eval_logps/chosen": -382.25054931640625, |
|
"eval_logps/rejected": -453.2860412597656, |
|
"eval_loss": 0.5135490298271179, |
|
"eval_rewards/accuracies": 0.75390625, |
|
"eval_rewards/chosen": -1.2081315517425537, |
|
"eval_rewards/margins": 0.8664370179176331, |
|
"eval_rewards/rejected": -2.074568271636963, |
|
"eval_runtime": 99.7477, |
|
"eval_samples_per_second": 20.051, |
|
"eval_steps_per_second": 0.321, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.441762664432463e-07, |
|
"logits/chosen": 0.7760528922080994, |
|
"logits/rejected": 1.4465583562850952, |
|
"logps/chosen": -396.90496826171875, |
|
"logps/rejected": -484.97454833984375, |
|
"loss": 0.5245, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.2378466129302979, |
|
"rewards/margins": 1.0589042901992798, |
|
"rewards/rejected": -2.296750545501709, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.138919252022435e-07, |
|
"logits/chosen": 0.4587163031101227, |
|
"logits/rejected": 0.4502899646759033, |
|
"logps/chosen": -432.886962890625, |
|
"logps/rejected": -490.9667053222656, |
|
"loss": 0.5225, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.5580703020095825, |
|
"rewards/margins": 0.6701681613922119, |
|
"rewards/rejected": -2.228238582611084, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5.842367964761389e-07, |
|
"logits/chosen": 0.6432207226753235, |
|
"logits/rejected": 0.7316852807998657, |
|
"logps/chosen": -400.072265625, |
|
"logps/rejected": -470.08953857421875, |
|
"loss": 0.5145, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.3107836246490479, |
|
"rewards/margins": 0.6859927177429199, |
|
"rewards/rejected": -1.9967763423919678, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.552207733237544e-07, |
|
"logits/chosen": 1.033753514289856, |
|
"logits/rejected": 1.2738745212554932, |
|
"logps/chosen": -349.98992919921875, |
|
"logps/rejected": -408.69586181640625, |
|
"loss": 0.52, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.2401503324508667, |
|
"rewards/margins": 0.8352483510971069, |
|
"rewards/rejected": -2.0753986835479736, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.268535355959642e-07, |
|
"logits/chosen": 0.4373984932899475, |
|
"logits/rejected": 1.6638437509536743, |
|
"logps/chosen": -381.151123046875, |
|
"logps/rejected": -425.26373291015625, |
|
"loss": 0.5309, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.3500919342041016, |
|
"rewards/margins": 1.0549076795578003, |
|
"rewards/rejected": -2.4049997329711914, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.99144546706469e-07, |
|
"logits/chosen": 0.5850018262863159, |
|
"logits/rejected": 0.9529399871826172, |
|
"logps/chosen": -376.8275451660156, |
|
"logps/rejected": -434.23651123046875, |
|
"loss": 0.5249, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.557494044303894, |
|
"rewards/margins": 0.6373199820518494, |
|
"rewards/rejected": -2.1948142051696777, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.721030504747573e-07, |
|
"logits/chosen": 0.6524550914764404, |
|
"logits/rejected": 1.50775945186615, |
|
"logps/chosen": -485.05218505859375, |
|
"logps/rejected": -525.0234985351562, |
|
"loss": 0.5256, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.531062126159668, |
|
"rewards/margins": 1.007774829864502, |
|
"rewards/rejected": -2.53883695602417, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.4573806804234344e-07, |
|
"logits/chosen": 1.0139309167861938, |
|
"logits/rejected": 1.3080246448516846, |
|
"logps/chosen": -372.17633056640625, |
|
"logps/rejected": -457.5221252441406, |
|
"loss": 0.5338, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.3809771537780762, |
|
"rewards/margins": 0.7371150851249695, |
|
"rewards/rejected": -2.1180920600891113, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.2005839486327524e-07, |
|
"logits/chosen": 0.920538067817688, |
|
"logits/rejected": 1.3884646892547607, |
|
"logps/chosen": -408.55792236328125, |
|
"logps/rejected": -495.26007080078125, |
|
"loss": 0.4886, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.345349907875061, |
|
"rewards/margins": 0.9609087109565735, |
|
"rewards/rejected": -2.3062586784362793, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.950725977699396e-07, |
|
"logits/chosen": 0.23017816245555878, |
|
"logits/rejected": 1.3223010301589966, |
|
"logps/chosen": -419.499755859375, |
|
"logps/rejected": -443.2064514160156, |
|
"loss": 0.5317, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.2144871950149536, |
|
"rewards/margins": 1.026813268661499, |
|
"rewards/rejected": -2.241300582885742, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_logits/chosen": 0.820225715637207, |
|
"eval_logits/rejected": 1.2751976251602173, |
|
"eval_logps/chosen": -389.5859069824219, |
|
"eval_logps/rejected": -461.7518310546875, |
|
"eval_loss": 0.5140097737312317, |
|
"eval_rewards/accuracies": 0.75, |
|
"eval_rewards/chosen": -1.281484842300415, |
|
"eval_rewards/margins": 0.8777413368225098, |
|
"eval_rewards/rejected": -2.159226179122925, |
|
"eval_runtime": 99.8683, |
|
"eval_samples_per_second": 20.026, |
|
"eval_steps_per_second": 0.32, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.707890121151403e-07, |
|
"logits/chosen": 0.6467644572257996, |
|
"logits/rejected": 0.7336404323577881, |
|
"logps/chosen": -391.0705261230469, |
|
"logps/rejected": -510.64385986328125, |
|
"loss": 0.4993, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.2752177715301514, |
|
"rewards/margins": 0.9291207194328308, |
|
"rewards/rejected": -2.204338550567627, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.4721573899138743e-07, |
|
"logits/chosen": 0.9288214445114136, |
|
"logits/rejected": 1.101698875427246, |
|
"logps/chosen": -416.335693359375, |
|
"logps/rejected": -487.194091796875, |
|
"loss": 0.5261, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.3879741430282593, |
|
"rewards/margins": 0.7634307146072388, |
|
"rewards/rejected": -2.151404857635498, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.2436064252834853e-07, |
|
"logits/chosen": 0.9459419250488281, |
|
"logits/rejected": 0.8354867696762085, |
|
"logps/chosen": -375.0196228027344, |
|
"logps/rejected": -481.24310302734375, |
|
"loss": 0.5144, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.3403657674789429, |
|
"rewards/margins": 0.9639253616333008, |
|
"rewards/rejected": -2.304291248321533, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.022313472693447e-07, |
|
"logits/chosen": 0.7536389231681824, |
|
"logits/rejected": 0.9406282305717468, |
|
"logps/chosen": -388.83245849609375, |
|
"logps/rejected": -452.368896484375, |
|
"loss": 0.4979, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.2452423572540283, |
|
"rewards/margins": 0.6904104948043823, |
|
"rewards/rejected": -1.9356529712677002, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.808352356277738e-07, |
|
"logits/chosen": 0.5242574214935303, |
|
"logits/rejected": 1.6256099939346313, |
|
"logps/chosen": -419.309326171875, |
|
"logps/rejected": -446.4677734375, |
|
"loss": 0.5174, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.3458099365234375, |
|
"rewards/margins": 0.7937489748001099, |
|
"rewards/rejected": -2.139559030532837, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.6017944542431394e-07, |
|
"logits/chosen": 0.4203415811061859, |
|
"logits/rejected": 0.6204716563224792, |
|
"logps/chosen": -407.8147277832031, |
|
"logps/rejected": -414.63201904296875, |
|
"loss": 0.5193, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.241703748703003, |
|
"rewards/margins": 0.5451580882072449, |
|
"rewards/rejected": -1.786861777305603, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.4027086750571635e-07, |
|
"logits/chosen": 0.2778792977333069, |
|
"logits/rejected": 0.7795981168746948, |
|
"logps/chosen": -446.410888671875, |
|
"logps/rejected": -511.241455078125, |
|
"loss": 0.5197, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.3088264465332031, |
|
"rewards/margins": 0.6983151435852051, |
|
"rewards/rejected": -2.007141590118408, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.2111614344599686e-07, |
|
"logits/chosen": 0.31662610173225403, |
|
"logits/rejected": 0.2457505166530609, |
|
"logps/chosen": -361.7285461425781, |
|
"logps/rejected": -504.502197265625, |
|
"loss": 0.5237, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.1863205432891846, |
|
"rewards/margins": 0.8205347061157227, |
|
"rewards/rejected": -2.006855010986328, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.027216633307813e-07, |
|
"logits/chosen": 0.7463894486427307, |
|
"logits/rejected": 0.9927177429199219, |
|
"logps/chosen": -337.95306396484375, |
|
"logps/rejected": -456.77288818359375, |
|
"loss": 0.5427, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.2024390697479248, |
|
"rewards/margins": 0.9413339495658875, |
|
"rewards/rejected": -2.143772840499878, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.8509356362554964e-07, |
|
"logits/chosen": 0.49471530318260193, |
|
"logits/rejected": 0.8219636678695679, |
|
"logps/chosen": -390.1451416015625, |
|
"logps/rejected": -428.46484375, |
|
"loss": 0.5384, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.300763726234436, |
|
"rewards/margins": 0.8397086262702942, |
|
"rewards/rejected": -2.140472173690796, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"eval_logits/chosen": 0.8384195566177368, |
|
"eval_logits/rejected": 1.2938010692596436, |
|
"eval_logps/chosen": -386.92913818359375, |
|
"eval_logps/rejected": -458.70379638671875, |
|
"eval_loss": 0.5134327411651611, |
|
"eval_rewards/accuracies": 0.75390625, |
|
"eval_rewards/chosen": -1.2549172639846802, |
|
"eval_rewards/margins": 0.8738288283348083, |
|
"eval_rewards/rejected": -2.1287460327148438, |
|
"eval_runtime": 99.8361, |
|
"eval_samples_per_second": 20.033, |
|
"eval_steps_per_second": 0.321, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.6823772512849086e-07, |
|
"logits/chosen": 1.2365819215774536, |
|
"logits/rejected": 1.28219473361969, |
|
"logps/chosen": -361.33197021484375, |
|
"logps/rejected": -429.3739318847656, |
|
"loss": 0.5435, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -1.4629837274551392, |
|
"rewards/margins": 0.5283857583999634, |
|
"rewards/rejected": -1.9913694858551025, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.5215977100864394e-07, |
|
"logits/chosen": 0.44147777557373047, |
|
"logits/rejected": 0.7171396017074585, |
|
"logps/chosen": -452.79046630859375, |
|
"logps/rejected": -513.9752197265625, |
|
"loss": 0.516, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.2802356481552124, |
|
"rewards/margins": 0.8417502641677856, |
|
"rewards/rejected": -2.121985912322998, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.3686506492999019e-07, |
|
"logits/chosen": 0.41590723395347595, |
|
"logits/rejected": 0.6705255508422852, |
|
"logps/chosen": -404.0863037109375, |
|
"logps/rejected": -468.7840270996094, |
|
"loss": 0.5693, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.4168977737426758, |
|
"rewards/margins": 0.6643761396408081, |
|
"rewards/rejected": -2.0812740325927734, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.223587092621162e-07, |
|
"logits/chosen": 1.298346996307373, |
|
"logits/rejected": 1.2134599685668945, |
|
"logps/chosen": -342.03424072265625, |
|
"logps/rejected": -450.5323791503906, |
|
"loss": 0.4678, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.4358490705490112, |
|
"rewards/margins": 0.6444058418273926, |
|
"rewards/rejected": -2.0802547931671143, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.08645543378042e-07, |
|
"logits/chosen": 0.9801321029663086, |
|
"logits/rejected": 1.268592119216919, |
|
"logps/chosen": -363.4621887207031, |
|
"logps/rejected": -438.5384826660156, |
|
"loss": 0.4722, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.1044265031814575, |
|
"rewards/margins": 0.8860410451889038, |
|
"rewards/rejected": -1.9904674291610718, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 9.573014203979241e-08, |
|
"logits/chosen": 0.8289960622787476, |
|
"logits/rejected": 1.2797130346298218, |
|
"logps/chosen": -406.34625244140625, |
|
"logps/rejected": -437.1571350097656, |
|
"loss": 0.4995, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.2909425497055054, |
|
"rewards/margins": 0.9118080139160156, |
|
"rewards/rejected": -2.2027504444122314, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 8.36168138722357e-08, |
|
"logits/chosen": 1.0189353227615356, |
|
"logits/rejected": 0.649911105632782, |
|
"logps/chosen": -410.6741638183594, |
|
"logps/rejected": -475.7628479003906, |
|
"loss": 0.5279, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.4780073165893555, |
|
"rewards/margins": 0.6910057067871094, |
|
"rewards/rejected": -2.169013023376465, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 7.230959992571368e-08, |
|
"logits/chosen": 0.8552034497261047, |
|
"logits/rejected": 0.7838449478149414, |
|
"logps/chosen": -379.01898193359375, |
|
"logps/rejected": -456.55023193359375, |
|
"loss": 0.4994, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.3288278579711914, |
|
"rewards/margins": 0.6242281794548035, |
|
"rewards/rejected": -1.95305597782135, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 6.181227232793185e-08, |
|
"logits/chosen": 0.42763280868530273, |
|
"logits/rejected": 0.637231707572937, |
|
"logps/chosen": -440.8536071777344, |
|
"logps/rejected": -470.2457580566406, |
|
"loss": 0.513, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.4187681674957275, |
|
"rewards/margins": 0.4330345690250397, |
|
"rewards/rejected": -1.8518028259277344, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 5.212833302556258e-08, |
|
"logits/chosen": 0.6027169227600098, |
|
"logits/rejected": 1.1977932453155518, |
|
"logps/chosen": -382.1881408691406, |
|
"logps/rejected": -478.81787109375, |
|
"loss": 0.5619, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.3630785942077637, |
|
"rewards/margins": 0.7937374114990234, |
|
"rewards/rejected": -2.156816005706787, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_logits/chosen": 0.7986226677894592, |
|
"eval_logits/rejected": 1.2532446384429932, |
|
"eval_logps/chosen": -385.8194885253906, |
|
"eval_logps/rejected": -456.913330078125, |
|
"eval_loss": 0.5134571194648743, |
|
"eval_rewards/accuracies": 0.7578125, |
|
"eval_rewards/chosen": -1.2438209056854248, |
|
"eval_rewards/margins": 0.8670201301574707, |
|
"eval_rewards/rejected": -2.1108410358428955, |
|
"eval_runtime": 99.9232, |
|
"eval_samples_per_second": 20.015, |
|
"eval_steps_per_second": 0.32, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 4.32610126159852e-08, |
|
"logits/chosen": 0.7875297665596008, |
|
"logits/rejected": 1.140857458114624, |
|
"logps/chosen": -437.805908203125, |
|
"logps/rejected": -488.8186950683594, |
|
"loss": 0.4932, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.306668996810913, |
|
"rewards/margins": 0.7567360997200012, |
|
"rewards/rejected": -2.0634050369262695, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.521326926954533e-08, |
|
"logits/chosen": 0.038898296654224396, |
|
"logits/rejected": 0.8747750520706177, |
|
"logps/chosen": -430.21270751953125, |
|
"logps/rejected": -509.03338623046875, |
|
"loss": 0.5421, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.4582428932189941, |
|
"rewards/margins": 0.557195782661438, |
|
"rewards/rejected": -2.0154385566711426, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.798778774269817e-08, |
|
"logits/chosen": 1.1741001605987549, |
|
"logits/rejected": 1.0272743701934814, |
|
"logps/chosen": -384.75341796875, |
|
"logps/rejected": -457.12432861328125, |
|
"loss": 0.5129, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.477851152420044, |
|
"rewards/margins": 0.7019861936569214, |
|
"rewards/rejected": -2.179837465286255, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.1586978482366072e-08, |
|
"logits/chosen": 1.0403835773468018, |
|
"logits/rejected": 1.361781120300293, |
|
"logps/chosen": -325.6305847167969, |
|
"logps/rejected": -370.1999206542969, |
|
"loss": 0.4816, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.231935739517212, |
|
"rewards/margins": 0.4666922986507416, |
|
"rewards/rejected": -1.6986281871795654, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.6012976821802583e-08, |
|
"logits/chosen": 0.4566507935523987, |
|
"logits/rejected": 0.8711694478988647, |
|
"logps/chosen": -389.9162902832031, |
|
"logps/rejected": -444.21783447265625, |
|
"loss": 0.543, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.235186219215393, |
|
"rewards/margins": 0.7044228315353394, |
|
"rewards/rejected": -1.939609169960022, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.1267642268238121e-08, |
|
"logits/chosen": 0.705321192741394, |
|
"logits/rejected": 1.1006934642791748, |
|
"logps/chosen": -322.65911865234375, |
|
"logps/rejected": -389.2583312988281, |
|
"loss": 0.4922, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.2569407224655151, |
|
"rewards/margins": 0.7663857936859131, |
|
"rewards/rejected": -2.0233263969421387, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 7.352557882542022e-09, |
|
"logits/chosen": 0.11342964321374893, |
|
"logits/rejected": 1.1264240741729736, |
|
"logps/chosen": -491.3070373535156, |
|
"logps/rejected": -466.52423095703125, |
|
"loss": 0.475, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.4384621381759644, |
|
"rewards/margins": 0.7035077810287476, |
|
"rewards/rejected": -2.141970157623291, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 4.269029751107489e-09, |
|
"logits/chosen": 0.8203773498535156, |
|
"logits/rejected": 0.9573888778686523, |
|
"logps/chosen": -361.1534729003906, |
|
"logps/rejected": -452.228271484375, |
|
"loss": 0.5102, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.16557776927948, |
|
"rewards/margins": 0.9508386850357056, |
|
"rewards/rejected": -2.1164164543151855, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 2.0180865501362447e-09, |
|
"logits/chosen": 0.8782768249511719, |
|
"logits/rejected": 0.8547204732894897, |
|
"logps/chosen": -413.7078552246094, |
|
"logps/rejected": -529.6265869140625, |
|
"loss": 0.4938, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.4604012966156006, |
|
"rewards/margins": 0.9819992780685425, |
|
"rewards/rejected": -2.4424006938934326, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 6.004792024680295e-10, |
|
"logits/chosen": 0.9474574327468872, |
|
"logits/rejected": 0.7719841599464417, |
|
"logps/chosen": -407.2633972167969, |
|
"logps/rejected": -473.81585693359375, |
|
"loss": 0.5169, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.5017306804656982, |
|
"rewards/margins": 0.5851131081581116, |
|
"rewards/rejected": -2.086843967437744, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_logits/chosen": 0.7988529205322266, |
|
"eval_logits/rejected": 1.2523157596588135, |
|
"eval_logps/chosen": -385.9081726074219, |
|
"eval_logps/rejected": -457.0127868652344, |
|
"eval_loss": 0.5133188962936401, |
|
"eval_rewards/accuracies": 0.75390625, |
|
"eval_rewards/chosen": -1.244707703590393, |
|
"eval_rewards/margins": 0.8671280741691589, |
|
"eval_rewards/rejected": -2.1118357181549072, |
|
"eval_runtime": 99.8541, |
|
"eval_samples_per_second": 20.029, |
|
"eval_steps_per_second": 0.32, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.668062707155249e-11, |
|
"logits/chosen": 0.5608649253845215, |
|
"logits/rejected": 0.8075926899909973, |
|
"logps/chosen": -380.0560302734375, |
|
"logps/rejected": -421.47735595703125, |
|
"loss": 0.5332, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.450054407119751, |
|
"rewards/margins": 0.7468727827072144, |
|
"rewards/rejected": -2.196927309036255, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 1912, |
|
"total_flos": 0.0, |
|
"train_loss": 0.5577758540044768, |
|
"train_runtime": 7516.1301, |
|
"train_samples_per_second": 8.137, |
|
"train_steps_per_second": 0.254 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1912, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|