|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.998451213216314, |
|
"eval_steps": 100, |
|
"global_step": 2904, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.7182130584192438e-09, |
|
"logits/chosen": -2.41489839553833, |
|
"logits/rejected": -2.313730239868164, |
|
"logps/chosen": -426.6319580078125, |
|
"logps/rejected": -209.72433471679688, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.718213058419244e-08, |
|
"logits/chosen": -2.736762762069702, |
|
"logits/rejected": -2.701097249984741, |
|
"logps/chosen": -275.3837890625, |
|
"logps/rejected": -267.7837219238281, |
|
"loss": 0.696, |
|
"rewards/accuracies": 0.4236111044883728, |
|
"rewards/chosen": -0.0019075096352025867, |
|
"rewards/margins": -0.0022293850779533386, |
|
"rewards/rejected": 0.00032187564647756517, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 3.436426116838488e-08, |
|
"logits/chosen": -2.6771740913391113, |
|
"logits/rejected": -2.614168643951416, |
|
"logps/chosen": -282.4807434082031, |
|
"logps/rejected": -230.01553344726562, |
|
"loss": 0.6902, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": 0.00013565353583544493, |
|
"rewards/margins": 0.002631585579365492, |
|
"rewards/rejected": -0.0024959323927760124, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 5.154639175257731e-08, |
|
"logits/chosen": -2.670668363571167, |
|
"logits/rejected": -2.628671169281006, |
|
"logps/chosen": -254.06240844726562, |
|
"logps/rejected": -235.0716094970703, |
|
"loss": 0.6828, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": 0.0025966805405914783, |
|
"rewards/margins": 0.00959115568548441, |
|
"rewards/rejected": -0.006994475610554218, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 6.872852233676976e-08, |
|
"logits/chosen": -2.7003836631774902, |
|
"logits/rejected": -2.615793228149414, |
|
"logps/chosen": -238.7230682373047, |
|
"logps/rejected": -180.0827178955078, |
|
"loss": 0.6629, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": 0.025214945897459984, |
|
"rewards/margins": 0.05416213348507881, |
|
"rewards/rejected": -0.02894718386232853, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 8.59106529209622e-08, |
|
"logits/chosen": -2.6934826374053955, |
|
"logits/rejected": -2.6144022941589355, |
|
"logps/chosen": -251.5126495361328, |
|
"logps/rejected": -197.26048278808594, |
|
"loss": 0.6399, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": 0.046762533485889435, |
|
"rewards/margins": 0.12699802219867706, |
|
"rewards/rejected": -0.08023548126220703, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.0309278350515462e-07, |
|
"logits/chosen": -2.692603349685669, |
|
"logits/rejected": -2.6785025596618652, |
|
"logps/chosen": -257.05914306640625, |
|
"logps/rejected": -247.1437530517578, |
|
"loss": 0.6155, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.04687836393713951, |
|
"rewards/margins": 0.17602364718914032, |
|
"rewards/rejected": -0.1291452944278717, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.202749140893471e-07, |
|
"logits/chosen": -2.737905979156494, |
|
"logits/rejected": -2.623173952102661, |
|
"logps/chosen": -316.3946228027344, |
|
"logps/rejected": -238.3878936767578, |
|
"loss": 0.5916, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": 0.10290606319904327, |
|
"rewards/margins": 0.3630576729774475, |
|
"rewards/rejected": -0.26015162467956543, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.3745704467353952e-07, |
|
"logits/chosen": -2.6619038581848145, |
|
"logits/rejected": -2.6211159229278564, |
|
"logps/chosen": -279.396728515625, |
|
"logps/rejected": -232.0877685546875, |
|
"loss": 0.5671, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": 0.05718129128217697, |
|
"rewards/margins": 0.43765443563461304, |
|
"rewards/rejected": -0.38047313690185547, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.5463917525773197e-07, |
|
"logits/chosen": -2.6507420539855957, |
|
"logits/rejected": -2.575387954711914, |
|
"logps/chosen": -272.7942199707031, |
|
"logps/rejected": -227.7272491455078, |
|
"loss": 0.5676, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.008855322375893593, |
|
"rewards/margins": 0.3766869902610779, |
|
"rewards/rejected": -0.3855423331260681, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.718213058419244e-07, |
|
"logits/chosen": -2.6757609844207764, |
|
"logits/rejected": -2.5984952449798584, |
|
"logps/chosen": -278.7255554199219, |
|
"logps/rejected": -253.88668823242188, |
|
"loss": 0.575, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": 0.0012895159889012575, |
|
"rewards/margins": 0.500220775604248, |
|
"rewards/rejected": -0.49893131852149963, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_logits/chosen": -2.745777130126953, |
|
"eval_logits/rejected": -2.6524808406829834, |
|
"eval_logps/chosen": -273.94866943359375, |
|
"eval_logps/rejected": -235.7017822265625, |
|
"eval_loss": 0.5309013724327087, |
|
"eval_rewards/accuracies": 0.7459999918937683, |
|
"eval_rewards/chosen": -0.01014842838048935, |
|
"eval_rewards/margins": 0.593262791633606, |
|
"eval_rewards/rejected": -0.6034111976623535, |
|
"eval_runtime": 330.0041, |
|
"eval_samples_per_second": 6.061, |
|
"eval_steps_per_second": 0.379, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.8900343642611682e-07, |
|
"logits/chosen": -2.731081485748291, |
|
"logits/rejected": -2.6355175971984863, |
|
"logps/chosen": -276.24908447265625, |
|
"logps/rejected": -247.2675018310547, |
|
"loss": 0.5464, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.021220406517386436, |
|
"rewards/margins": 0.625217854976654, |
|
"rewards/rejected": -0.6464383006095886, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.0618556701030925e-07, |
|
"logits/chosen": -2.715306520462036, |
|
"logits/rejected": -2.6525187492370605, |
|
"logps/chosen": -274.8995361328125, |
|
"logps/rejected": -230.00900268554688, |
|
"loss": 0.5043, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.022734543308615685, |
|
"rewards/margins": 0.7808176279067993, |
|
"rewards/rejected": -0.8035521507263184, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 2.2336769759450173e-07, |
|
"logits/chosen": -2.720980405807495, |
|
"logits/rejected": -2.6762051582336426, |
|
"logps/chosen": -258.5883483886719, |
|
"logps/rejected": -226.6556396484375, |
|
"loss": 0.5365, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.060701239854097366, |
|
"rewards/margins": 0.8721252679824829, |
|
"rewards/rejected": -0.9328263998031616, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 2.405498281786942e-07, |
|
"logits/chosen": -2.7193636894226074, |
|
"logits/rejected": -2.6467044353485107, |
|
"logps/chosen": -283.06817626953125, |
|
"logps/rejected": -243.6209716796875, |
|
"loss": 0.5378, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.13080577552318573, |
|
"rewards/margins": 0.6716828942298889, |
|
"rewards/rejected": -0.8024886250495911, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2.5773195876288655e-07, |
|
"logits/chosen": -2.7493207454681396, |
|
"logits/rejected": -2.6163530349731445, |
|
"logps/chosen": -282.48358154296875, |
|
"logps/rejected": -222.87710571289062, |
|
"loss": 0.522, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.12233565002679825, |
|
"rewards/margins": 0.6884902119636536, |
|
"rewards/rejected": -0.8108257055282593, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2.7491408934707903e-07, |
|
"logits/chosen": -2.7176194190979004, |
|
"logits/rejected": -2.6076390743255615, |
|
"logps/chosen": -283.25506591796875, |
|
"logps/rejected": -229.7573699951172, |
|
"loss": 0.5103, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.0403318852186203, |
|
"rewards/margins": 0.8281108140945435, |
|
"rewards/rejected": -0.868442714214325, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 2.9209621993127146e-07, |
|
"logits/chosen": -2.715559482574463, |
|
"logits/rejected": -2.630178689956665, |
|
"logps/chosen": -270.50274658203125, |
|
"logps/rejected": -247.9048614501953, |
|
"loss": 0.5238, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.018013525754213333, |
|
"rewards/margins": 0.9226928949356079, |
|
"rewards/rejected": -0.9407063722610474, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 3.0927835051546394e-07, |
|
"logits/chosen": -2.668119430541992, |
|
"logits/rejected": -2.601747989654541, |
|
"logps/chosen": -248.54458618164062, |
|
"logps/rejected": -225.4590606689453, |
|
"loss": 0.4972, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.2065100222826004, |
|
"rewards/margins": 0.7541019320487976, |
|
"rewards/rejected": -0.9606119394302368, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 3.2646048109965636e-07, |
|
"logits/chosen": -2.6944541931152344, |
|
"logits/rejected": -2.607551097869873, |
|
"logps/chosen": -268.26678466796875, |
|
"logps/rejected": -233.5041961669922, |
|
"loss": 0.498, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.1957332193851471, |
|
"rewards/margins": 0.9389954805374146, |
|
"rewards/rejected": -1.1347286701202393, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 3.436426116838488e-07, |
|
"logits/chosen": -2.779195785522461, |
|
"logits/rejected": -2.6563549041748047, |
|
"logps/chosen": -318.3896484375, |
|
"logps/rejected": -265.0843505859375, |
|
"loss": 0.4759, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.09323607385158539, |
|
"rewards/margins": 1.1875323057174683, |
|
"rewards/rejected": -1.2807685136795044, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_logits/chosen": -2.800610303878784, |
|
"eval_logits/rejected": -2.7065932750701904, |
|
"eval_logps/chosen": -274.48919677734375, |
|
"eval_logps/rejected": -240.49658203125, |
|
"eval_loss": 0.49432528018951416, |
|
"eval_rewards/accuracies": 0.75, |
|
"eval_rewards/chosen": -0.06420008093118668, |
|
"eval_rewards/margins": 1.0186885595321655, |
|
"eval_rewards/rejected": -1.0828887224197388, |
|
"eval_runtime": 330.0896, |
|
"eval_samples_per_second": 6.059, |
|
"eval_steps_per_second": 0.379, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 3.608247422680412e-07, |
|
"logits/chosen": -2.7947614192962646, |
|
"logits/rejected": -2.7344536781311035, |
|
"logps/chosen": -279.47857666015625, |
|
"logps/rejected": -244.9560546875, |
|
"loss": 0.5212, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.10790084302425385, |
|
"rewards/margins": 0.8937327265739441, |
|
"rewards/rejected": -1.0016335248947144, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 3.7800687285223364e-07, |
|
"logits/chosen": -2.730440855026245, |
|
"logits/rejected": -2.670431613922119, |
|
"logps/chosen": -239.8012237548828, |
|
"logps/rejected": -226.3561248779297, |
|
"loss": 0.4978, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.10349278151988983, |
|
"rewards/margins": 1.0418349504470825, |
|
"rewards/rejected": -1.1453276872634888, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 3.9518900343642607e-07, |
|
"logits/chosen": -2.798637866973877, |
|
"logits/rejected": -2.733868360519409, |
|
"logps/chosen": -326.19427490234375, |
|
"logps/rejected": -257.0804138183594, |
|
"loss": 0.5144, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.14514626562595367, |
|
"rewards/margins": 1.034011960029602, |
|
"rewards/rejected": -1.1791582107543945, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.123711340206185e-07, |
|
"logits/chosen": -2.7531001567840576, |
|
"logits/rejected": -2.6891307830810547, |
|
"logps/chosen": -270.3514404296875, |
|
"logps/rejected": -258.6917724609375, |
|
"loss": 0.4934, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -0.23920175433158875, |
|
"rewards/margins": 1.1313306093215942, |
|
"rewards/rejected": -1.3705322742462158, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.2955326460481097e-07, |
|
"logits/chosen": -2.6519603729248047, |
|
"logits/rejected": -2.569204807281494, |
|
"logps/chosen": -292.78662109375, |
|
"logps/rejected": -245.4613800048828, |
|
"loss": 0.5315, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.37750571966171265, |
|
"rewards/margins": 0.8689467310905457, |
|
"rewards/rejected": -1.2464525699615479, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.4673539518900345e-07, |
|
"logits/chosen": -2.683892011642456, |
|
"logits/rejected": -2.599489450454712, |
|
"logps/chosen": -291.04229736328125, |
|
"logps/rejected": -281.9589538574219, |
|
"loss": 0.4881, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.4090940058231354, |
|
"rewards/margins": 1.0680254697799683, |
|
"rewards/rejected": -1.4771194458007812, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.639175257731959e-07, |
|
"logits/chosen": -2.6438565254211426, |
|
"logits/rejected": -2.536273241043091, |
|
"logps/chosen": -297.5185241699219, |
|
"logps/rejected": -231.0972137451172, |
|
"loss": 0.4995, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.3678213357925415, |
|
"rewards/margins": 0.9588603973388672, |
|
"rewards/rejected": -1.3266817331314087, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.810996563573884e-07, |
|
"logits/chosen": -2.6681864261627197, |
|
"logits/rejected": -2.6065673828125, |
|
"logps/chosen": -239.76901245117188, |
|
"logps/rejected": -237.3007354736328, |
|
"loss": 0.5027, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.25205284357070923, |
|
"rewards/margins": 1.061631679534912, |
|
"rewards/rejected": -1.3136845827102661, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.982817869415807e-07, |
|
"logits/chosen": -2.6735098361968994, |
|
"logits/rejected": -2.5926923751831055, |
|
"logps/chosen": -266.5799255371094, |
|
"logps/rejected": -243.5729217529297, |
|
"loss": 0.4921, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.22308111190795898, |
|
"rewards/margins": 1.2045137882232666, |
|
"rewards/rejected": -1.4275949001312256, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.982778415614236e-07, |
|
"logits/chosen": -2.7693233489990234, |
|
"logits/rejected": -2.703303575515747, |
|
"logps/chosen": -270.84503173828125, |
|
"logps/rejected": -263.55548095703125, |
|
"loss": 0.5022, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.21006159484386444, |
|
"rewards/margins": 1.251680850982666, |
|
"rewards/rejected": -1.4617425203323364, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_logits/chosen": -2.8225364685058594, |
|
"eval_logits/rejected": -2.7362353801727295, |
|
"eval_logps/chosen": -275.37347412109375, |
|
"eval_logps/rejected": -242.1844940185547, |
|
"eval_loss": 0.4824218451976776, |
|
"eval_rewards/accuracies": 0.7620000243186951, |
|
"eval_rewards/chosen": -0.15262925624847412, |
|
"eval_rewards/margins": 1.0990517139434814, |
|
"eval_rewards/rejected": -1.2516810894012451, |
|
"eval_runtime": 329.0169, |
|
"eval_samples_per_second": 6.079, |
|
"eval_steps_per_second": 0.38, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.963643321852277e-07, |
|
"logits/chosen": -2.657869815826416, |
|
"logits/rejected": -2.604085683822632, |
|
"logps/chosen": -269.7090759277344, |
|
"logps/rejected": -248.0914306640625, |
|
"loss": 0.4821, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.2833268642425537, |
|
"rewards/margins": 1.0022751092910767, |
|
"rewards/rejected": -1.2856018543243408, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.944508228090318e-07, |
|
"logits/chosen": -2.753511905670166, |
|
"logits/rejected": -2.669100284576416, |
|
"logps/chosen": -282.27386474609375, |
|
"logps/rejected": -246.5732879638672, |
|
"loss": 0.4972, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -0.3067387044429779, |
|
"rewards/margins": 1.3396486043930054, |
|
"rewards/rejected": -1.6463874578475952, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.925373134328357e-07, |
|
"logits/chosen": -2.638120412826538, |
|
"logits/rejected": -2.5762674808502197, |
|
"logps/chosen": -251.3295135498047, |
|
"logps/rejected": -223.2657470703125, |
|
"loss": 0.4817, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.36169368028640747, |
|
"rewards/margins": 1.3353826999664307, |
|
"rewards/rejected": -1.697076439857483, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.906238040566398e-07, |
|
"logits/chosen": -2.7591946125030518, |
|
"logits/rejected": -2.647918939590454, |
|
"logps/chosen": -253.46444702148438, |
|
"logps/rejected": -215.0161590576172, |
|
"loss": 0.4821, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -0.3391885757446289, |
|
"rewards/margins": 1.2555291652679443, |
|
"rewards/rejected": -1.5947177410125732, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.887102946804438e-07, |
|
"logits/chosen": -2.755331039428711, |
|
"logits/rejected": -2.664771556854248, |
|
"logps/chosen": -278.7284851074219, |
|
"logps/rejected": -265.8031311035156, |
|
"loss": 0.5166, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.29750093817710876, |
|
"rewards/margins": 1.3157111406326294, |
|
"rewards/rejected": -1.61321222782135, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.867967853042479e-07, |
|
"logits/chosen": -2.7414841651916504, |
|
"logits/rejected": -2.649625778198242, |
|
"logps/chosen": -289.3199768066406, |
|
"logps/rejected": -261.6228942871094, |
|
"loss": 0.507, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.4183623790740967, |
|
"rewards/margins": 1.3235833644866943, |
|
"rewards/rejected": -1.7419458627700806, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.84883275928052e-07, |
|
"logits/chosen": -2.6831367015838623, |
|
"logits/rejected": -2.6130385398864746, |
|
"logps/chosen": -243.5470428466797, |
|
"logps/rejected": -231.25369262695312, |
|
"loss": 0.5619, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.7277100086212158, |
|
"rewards/margins": 0.7260710597038269, |
|
"rewards/rejected": -1.4537811279296875, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.82969766551856e-07, |
|
"logits/chosen": -2.79364275932312, |
|
"logits/rejected": -2.704852342605591, |
|
"logps/chosen": -284.8489685058594, |
|
"logps/rejected": -241.8092041015625, |
|
"loss": 0.4803, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.4155152440071106, |
|
"rewards/margins": 1.2566057443618774, |
|
"rewards/rejected": -1.6721210479736328, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.810562571756601e-07, |
|
"logits/chosen": -2.7232635021209717, |
|
"logits/rejected": -2.6678431034088135, |
|
"logps/chosen": -290.93505859375, |
|
"logps/rejected": -260.04241943359375, |
|
"loss": 0.4719, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.3851390480995178, |
|
"rewards/margins": 1.2824820280075073, |
|
"rewards/rejected": -1.6676212549209595, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.791427477994642e-07, |
|
"logits/chosen": -2.7520861625671387, |
|
"logits/rejected": -2.6794071197509766, |
|
"logps/chosen": -318.5746765136719, |
|
"logps/rejected": -269.6056823730469, |
|
"loss": 0.5282, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.6232264637947083, |
|
"rewards/margins": 0.9759466052055359, |
|
"rewards/rejected": -1.5991729497909546, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_logits/chosen": -2.792391061782837, |
|
"eval_logits/rejected": -2.702341079711914, |
|
"eval_logps/chosen": -280.6413269042969, |
|
"eval_logps/rejected": -249.08758544921875, |
|
"eval_loss": 0.4877680838108063, |
|
"eval_rewards/accuracies": 0.7839999794960022, |
|
"eval_rewards/chosen": -0.6794139742851257, |
|
"eval_rewards/margins": 1.262575387954712, |
|
"eval_rewards/rejected": -1.9419893026351929, |
|
"eval_runtime": 327.8949, |
|
"eval_samples_per_second": 6.1, |
|
"eval_steps_per_second": 0.381, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.772292384232682e-07, |
|
"logits/chosen": -2.709923028945923, |
|
"logits/rejected": -2.662933588027954, |
|
"logps/chosen": -258.0174560546875, |
|
"logps/rejected": -243.55264282226562, |
|
"loss": 0.506, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.7414777278900146, |
|
"rewards/margins": 1.040825605392456, |
|
"rewards/rejected": -1.7823032140731812, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.753157290470723e-07, |
|
"logits/chosen": -2.770792245864868, |
|
"logits/rejected": -2.7004318237304688, |
|
"logps/chosen": -301.0741271972656, |
|
"logps/rejected": -260.6761779785156, |
|
"loss": 0.5656, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.5612724423408508, |
|
"rewards/margins": 0.8177105188369751, |
|
"rewards/rejected": -1.3789829015731812, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.7340221967087635e-07, |
|
"logits/chosen": -2.6355745792388916, |
|
"logits/rejected": -2.5235517024993896, |
|
"logps/chosen": -259.4550476074219, |
|
"logps/rejected": -241.081298828125, |
|
"loss": 0.5152, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -0.33019572496414185, |
|
"rewards/margins": 1.2212638854980469, |
|
"rewards/rejected": -1.5514596700668335, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.714887102946804e-07, |
|
"logits/chosen": -2.6858983039855957, |
|
"logits/rejected": -2.606168031692505, |
|
"logps/chosen": -286.8215026855469, |
|
"logps/rejected": -261.89642333984375, |
|
"loss": 0.4437, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.27932292222976685, |
|
"rewards/margins": 1.2575178146362305, |
|
"rewards/rejected": -1.536840796470642, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.6957520091848447e-07, |
|
"logits/chosen": -2.6906371116638184, |
|
"logits/rejected": -2.62721586227417, |
|
"logps/chosen": -282.0963439941406, |
|
"logps/rejected": -252.3018035888672, |
|
"loss": 0.4949, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.45128607749938965, |
|
"rewards/margins": 1.2710622549057007, |
|
"rewards/rejected": -1.7223484516143799, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.6766169154228853e-07, |
|
"logits/chosen": -2.6066861152648926, |
|
"logits/rejected": -2.541717767715454, |
|
"logps/chosen": -287.3714904785156, |
|
"logps/rejected": -264.9255065917969, |
|
"loss": 0.4911, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.575259804725647, |
|
"rewards/margins": 1.08199143409729, |
|
"rewards/rejected": -1.6572513580322266, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.657481821660926e-07, |
|
"logits/chosen": -2.634578227996826, |
|
"logits/rejected": -2.5506088733673096, |
|
"logps/chosen": -291.7644348144531, |
|
"logps/rejected": -266.2176513671875, |
|
"loss": 0.4912, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.5489755868911743, |
|
"rewards/margins": 1.124472975730896, |
|
"rewards/rejected": -1.6734485626220703, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.6383467278989666e-07, |
|
"logits/chosen": -2.7003040313720703, |
|
"logits/rejected": -2.5828516483306885, |
|
"logps/chosen": -315.20574951171875, |
|
"logps/rejected": -249.669677734375, |
|
"loss": 0.4455, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.2965030074119568, |
|
"rewards/margins": 1.5941083431243896, |
|
"rewards/rejected": -1.8906112909317017, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.6192116341370067e-07, |
|
"logits/chosen": -2.6843831539154053, |
|
"logits/rejected": -2.560138702392578, |
|
"logps/chosen": -286.26092529296875, |
|
"logps/rejected": -245.45443725585938, |
|
"loss": 0.5024, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -0.48061037063598633, |
|
"rewards/margins": 1.3923174142837524, |
|
"rewards/rejected": -1.8729279041290283, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.6000765403750473e-07, |
|
"logits/chosen": -2.7150895595550537, |
|
"logits/rejected": -2.6805529594421387, |
|
"logps/chosen": -259.2522888183594, |
|
"logps/rejected": -249.19253540039062, |
|
"loss": 0.5179, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.38437995314598083, |
|
"rewards/margins": 0.9828270077705383, |
|
"rewards/rejected": -1.3672068119049072, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_logits/chosen": -2.7630860805511475, |
|
"eval_logits/rejected": -2.677306652069092, |
|
"eval_logps/chosen": -276.4918212890625, |
|
"eval_logps/rejected": -244.1531524658203, |
|
"eval_loss": 0.4804608225822449, |
|
"eval_rewards/accuracies": 0.7760000228881836, |
|
"eval_rewards/chosen": -0.26446640491485596, |
|
"eval_rewards/margins": 1.1840814352035522, |
|
"eval_rewards/rejected": -1.4485478401184082, |
|
"eval_runtime": 328.3333, |
|
"eval_samples_per_second": 6.091, |
|
"eval_steps_per_second": 0.381, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.580941446613088e-07, |
|
"logits/chosen": -2.771284580230713, |
|
"logits/rejected": -2.660457134246826, |
|
"logps/chosen": -299.34197998046875, |
|
"logps/rejected": -261.97998046875, |
|
"loss": 0.4769, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -0.30852824449539185, |
|
"rewards/margins": 1.3942599296569824, |
|
"rewards/rejected": -1.7027881145477295, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.5618063528511285e-07, |
|
"logits/chosen": -2.7847771644592285, |
|
"logits/rejected": -2.7074437141418457, |
|
"logps/chosen": -247.3426513671875, |
|
"logps/rejected": -230.1765899658203, |
|
"loss": 0.457, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.6353321075439453, |
|
"rewards/margins": 1.1674182415008545, |
|
"rewards/rejected": -1.8027503490447998, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.542671259089169e-07, |
|
"logits/chosen": -2.6685898303985596, |
|
"logits/rejected": -2.611987590789795, |
|
"logps/chosen": -291.04827880859375, |
|
"logps/rejected": -270.1742248535156, |
|
"loss": 0.4972, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.6013702154159546, |
|
"rewards/margins": 1.2920764684677124, |
|
"rewards/rejected": -1.893446922302246, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.52353616532721e-07, |
|
"logits/chosen": -2.691497802734375, |
|
"logits/rejected": -2.620311737060547, |
|
"logps/chosen": -271.4696044921875, |
|
"logps/rejected": -264.66278076171875, |
|
"loss": 0.5215, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.5857911109924316, |
|
"rewards/margins": 1.2745181322097778, |
|
"rewards/rejected": -1.860309362411499, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.5044010715652504e-07, |
|
"logits/chosen": -2.656611919403076, |
|
"logits/rejected": -2.6252033710479736, |
|
"logps/chosen": -273.7254943847656, |
|
"logps/rejected": -247.2091827392578, |
|
"loss": 0.4766, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.7783125042915344, |
|
"rewards/margins": 0.9800532460212708, |
|
"rewards/rejected": -1.7583658695220947, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.485265977803291e-07, |
|
"logits/chosen": -2.7592997550964355, |
|
"logits/rejected": -2.657989978790283, |
|
"logps/chosen": -281.6728820800781, |
|
"logps/rejected": -246.2255859375, |
|
"loss": 0.4732, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.4413899779319763, |
|
"rewards/margins": 1.3485950231552124, |
|
"rewards/rejected": -1.7899850606918335, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.4661308840413316e-07, |
|
"logits/chosen": -2.7648417949676514, |
|
"logits/rejected": -2.6974472999572754, |
|
"logps/chosen": -249.6786346435547, |
|
"logps/rejected": -236.48934936523438, |
|
"loss": 0.4813, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.43044313788414, |
|
"rewards/margins": 1.4435796737670898, |
|
"rewards/rejected": -1.8740227222442627, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.446995790279372e-07, |
|
"logits/chosen": -2.756700038909912, |
|
"logits/rejected": -2.6791555881500244, |
|
"logps/chosen": -291.4740295410156, |
|
"logps/rejected": -262.89801025390625, |
|
"loss": 0.4691, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.6896177530288696, |
|
"rewards/margins": 1.4548016786575317, |
|
"rewards/rejected": -2.1444194316864014, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.4278606965174123e-07, |
|
"logits/chosen": -2.6673600673675537, |
|
"logits/rejected": -2.6058640480041504, |
|
"logps/chosen": -262.8567810058594, |
|
"logps/rejected": -247.41561889648438, |
|
"loss": 0.509, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.3871522545814514, |
|
"rewards/margins": 1.3774772882461548, |
|
"rewards/rejected": -1.764629602432251, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.408725602755453e-07, |
|
"logits/chosen": -2.745607376098633, |
|
"logits/rejected": -2.6397905349731445, |
|
"logps/chosen": -289.14971923828125, |
|
"logps/rejected": -248.13156127929688, |
|
"loss": 0.4705, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -0.5186043977737427, |
|
"rewards/margins": 1.4924787282943726, |
|
"rewards/rejected": -2.0110831260681152, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"eval_logits/chosen": -2.783844232559204, |
|
"eval_logits/rejected": -2.7009334564208984, |
|
"eval_logps/chosen": -276.8629150390625, |
|
"eval_logps/rejected": -245.43368530273438, |
|
"eval_loss": 0.47145330905914307, |
|
"eval_rewards/accuracies": 0.7559999823570251, |
|
"eval_rewards/chosen": -0.30157405138015747, |
|
"eval_rewards/margins": 1.2750270366668701, |
|
"eval_rewards/rejected": -1.5766010284423828, |
|
"eval_runtime": 329.209, |
|
"eval_samples_per_second": 6.075, |
|
"eval_steps_per_second": 0.38, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.3895905089934936e-07, |
|
"logits/chosen": -2.776756525039673, |
|
"logits/rejected": -2.627607822418213, |
|
"logps/chosen": -299.1159362792969, |
|
"logps/rejected": -242.3809051513672, |
|
"loss": 0.4517, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -0.31977617740631104, |
|
"rewards/margins": 1.6483633518218994, |
|
"rewards/rejected": -1.968139410018921, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.370455415231534e-07, |
|
"logits/chosen": -2.73073148727417, |
|
"logits/rejected": -2.6580116748809814, |
|
"logps/chosen": -263.01519775390625, |
|
"logps/rejected": -232.265625, |
|
"loss": 0.4977, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.4189836084842682, |
|
"rewards/margins": 1.2726702690124512, |
|
"rewards/rejected": -1.691653847694397, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.351320321469575e-07, |
|
"logits/chosen": -2.7166030406951904, |
|
"logits/rejected": -2.6070265769958496, |
|
"logps/chosen": -264.41058349609375, |
|
"logps/rejected": -236.72128295898438, |
|
"loss": 0.4824, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.27821844816207886, |
|
"rewards/margins": 1.1915299892425537, |
|
"rewards/rejected": -1.4697484970092773, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 4.3321852277076154e-07, |
|
"logits/chosen": -2.611860513687134, |
|
"logits/rejected": -2.5530788898468018, |
|
"logps/chosen": -286.221923828125, |
|
"logps/rejected": -246.6705780029297, |
|
"loss": 0.4779, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.16525471210479736, |
|
"rewards/margins": 1.415501594543457, |
|
"rewards/rejected": -1.5807561874389648, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 4.313050133945656e-07, |
|
"logits/chosen": -2.626971960067749, |
|
"logits/rejected": -2.5745906829833984, |
|
"logps/chosen": -255.57138061523438, |
|
"logps/rejected": -252.6260528564453, |
|
"loss": 0.5389, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.09872700273990631, |
|
"rewards/margins": 1.0981225967407227, |
|
"rewards/rejected": -1.1968495845794678, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 4.2939150401836967e-07, |
|
"logits/chosen": -2.596104145050049, |
|
"logits/rejected": -2.5283610820770264, |
|
"logps/chosen": -255.2698516845703, |
|
"logps/rejected": -223.36605834960938, |
|
"loss": 0.5229, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.45211920142173767, |
|
"rewards/margins": 0.9975794553756714, |
|
"rewards/rejected": -1.4496986865997314, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 4.2747799464217373e-07, |
|
"logits/chosen": -2.6542000770568848, |
|
"logits/rejected": -2.574998378753662, |
|
"logps/chosen": -261.0472717285156, |
|
"logps/rejected": -231.40328979492188, |
|
"loss": 0.5082, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.492902934551239, |
|
"rewards/margins": 1.1550512313842773, |
|
"rewards/rejected": -1.647953987121582, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 4.255644852659778e-07, |
|
"logits/chosen": -2.6648573875427246, |
|
"logits/rejected": -2.5788636207580566, |
|
"logps/chosen": -303.67791748046875, |
|
"logps/rejected": -276.10723876953125, |
|
"loss": 0.4633, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.35903650522232056, |
|
"rewards/margins": 1.5083674192428589, |
|
"rewards/rejected": -1.8674037456512451, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 4.236509758897818e-07, |
|
"logits/chosen": -2.6442172527313232, |
|
"logits/rejected": -2.5383548736572266, |
|
"logps/chosen": -322.7700500488281, |
|
"logps/rejected": -257.04278564453125, |
|
"loss": 0.503, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.48487988114356995, |
|
"rewards/margins": 1.269715666770935, |
|
"rewards/rejected": -1.7545955181121826, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 4.2173746651358586e-07, |
|
"logits/chosen": -2.6078996658325195, |
|
"logits/rejected": -2.502384901046753, |
|
"logps/chosen": -276.53558349609375, |
|
"logps/rejected": -268.16131591796875, |
|
"loss": 0.5038, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.3312338888645172, |
|
"rewards/margins": 1.3475643396377563, |
|
"rewards/rejected": -1.6787983179092407, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_logits/chosen": -2.6268980503082275, |
|
"eval_logits/rejected": -2.5408694744110107, |
|
"eval_logps/chosen": -276.9665832519531, |
|
"eval_logps/rejected": -245.39862060546875, |
|
"eval_loss": 0.47904127836227417, |
|
"eval_rewards/accuracies": 0.7680000066757202, |
|
"eval_rewards/chosen": -0.31194165349006653, |
|
"eval_rewards/margins": 1.261155366897583, |
|
"eval_rewards/rejected": -1.5730971097946167, |
|
"eval_runtime": 329.3542, |
|
"eval_samples_per_second": 6.072, |
|
"eval_steps_per_second": 0.38, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 4.198239571373899e-07, |
|
"logits/chosen": -2.565584182739258, |
|
"logits/rejected": -2.4798641204833984, |
|
"logps/chosen": -289.7725830078125, |
|
"logps/rejected": -241.417724609375, |
|
"loss": 0.4679, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -0.13149698078632355, |
|
"rewards/margins": 1.4788200855255127, |
|
"rewards/rejected": -1.6103169918060303, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 4.17910447761194e-07, |
|
"logits/chosen": -2.5125203132629395, |
|
"logits/rejected": -2.440331220626831, |
|
"logps/chosen": -243.36306762695312, |
|
"logps/rejected": -215.3681640625, |
|
"loss": 0.4894, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -0.39927834272384644, |
|
"rewards/margins": 1.3379909992218018, |
|
"rewards/rejected": -1.737269401550293, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 4.1599693838499805e-07, |
|
"logits/chosen": -2.523902177810669, |
|
"logits/rejected": -2.4464023113250732, |
|
"logps/chosen": -322.8412170410156, |
|
"logps/rejected": -260.85198974609375, |
|
"loss": 0.5377, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.4920814633369446, |
|
"rewards/margins": 1.0238711833953857, |
|
"rewards/rejected": -1.515952467918396, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 4.140834290088021e-07, |
|
"logits/chosen": -2.4797842502593994, |
|
"logits/rejected": -2.4306349754333496, |
|
"logps/chosen": -260.5240173339844, |
|
"logps/rejected": -235.9158172607422, |
|
"loss": 0.4844, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.26274144649505615, |
|
"rewards/margins": 1.1417067050933838, |
|
"rewards/rejected": -1.40444815158844, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 4.121699196326062e-07, |
|
"logits/chosen": -2.5010387897491455, |
|
"logits/rejected": -2.4390175342559814, |
|
"logps/chosen": -267.5348815917969, |
|
"logps/rejected": -265.01861572265625, |
|
"loss": 0.5134, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.3145067095756531, |
|
"rewards/margins": 1.411447286605835, |
|
"rewards/rejected": -1.7259540557861328, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 4.1025641025641024e-07, |
|
"logits/chosen": -2.502624988555908, |
|
"logits/rejected": -2.4160873889923096, |
|
"logps/chosen": -248.45449829101562, |
|
"logps/rejected": -230.887939453125, |
|
"loss": 0.5244, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.5923348665237427, |
|
"rewards/margins": 1.0817146301269531, |
|
"rewards/rejected": -1.6740491390228271, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 4.083429008802143e-07, |
|
"logits/chosen": -2.600817918777466, |
|
"logits/rejected": -2.5673515796661377, |
|
"logps/chosen": -283.1528015136719, |
|
"logps/rejected": -261.5489501953125, |
|
"loss": 0.5599, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.4175918698310852, |
|
"rewards/margins": 1.0563184022903442, |
|
"rewards/rejected": -1.4739103317260742, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 4.0642939150401836e-07, |
|
"logits/chosen": -2.5576181411743164, |
|
"logits/rejected": -2.5258378982543945, |
|
"logps/chosen": -274.6518859863281, |
|
"logps/rejected": -244.93185424804688, |
|
"loss": 0.4387, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.4217241406440735, |
|
"rewards/margins": 1.273429036140442, |
|
"rewards/rejected": -1.695152997970581, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.0451588212782237e-07, |
|
"logits/chosen": -2.5924856662750244, |
|
"logits/rejected": -2.5202670097351074, |
|
"logps/chosen": -269.2760009765625, |
|
"logps/rejected": -251.0695037841797, |
|
"loss": 0.4782, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.3948896527290344, |
|
"rewards/margins": 1.6062942743301392, |
|
"rewards/rejected": -2.0011839866638184, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.0260237275162643e-07, |
|
"logits/chosen": -2.500338315963745, |
|
"logits/rejected": -2.4486918449401855, |
|
"logps/chosen": -275.6775817871094, |
|
"logps/rejected": -254.4421844482422, |
|
"loss": 0.4418, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.3929779827594757, |
|
"rewards/margins": 1.565860390663147, |
|
"rewards/rejected": -1.9588382244110107, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"eval_logits/chosen": -2.5635573863983154, |
|
"eval_logits/rejected": -2.4833929538726807, |
|
"eval_logps/chosen": -278.41131591796875, |
|
"eval_logps/rejected": -249.84423828125, |
|
"eval_loss": 0.46652939915657043, |
|
"eval_rewards/accuracies": 0.7799999713897705, |
|
"eval_rewards/chosen": -0.45641571283340454, |
|
"eval_rewards/margins": 1.5612393617630005, |
|
"eval_rewards/rejected": -2.01765513420105, |
|
"eval_runtime": 328.8068, |
|
"eval_samples_per_second": 6.083, |
|
"eval_steps_per_second": 0.38, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 4.006888633754305e-07, |
|
"logits/chosen": -2.521207332611084, |
|
"logits/rejected": -2.4697537422180176, |
|
"logps/chosen": -286.6387634277344, |
|
"logps/rejected": -234.17514038085938, |
|
"loss": 0.5215, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.6284313201904297, |
|
"rewards/margins": 1.2665073871612549, |
|
"rewards/rejected": -1.8949388265609741, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.9877535399923456e-07, |
|
"logits/chosen": -2.600959539413452, |
|
"logits/rejected": -2.547930955886841, |
|
"logps/chosen": -279.1971435546875, |
|
"logps/rejected": -271.48529052734375, |
|
"loss": 0.5072, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.5025497674942017, |
|
"rewards/margins": 1.4657728672027588, |
|
"rewards/rejected": -1.968322515487671, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.968618446230386e-07, |
|
"logits/chosen": -2.5383434295654297, |
|
"logits/rejected": -2.4821691513061523, |
|
"logps/chosen": -276.9917907714844, |
|
"logps/rejected": -230.35025024414062, |
|
"loss": 0.4449, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.5041875243186951, |
|
"rewards/margins": 1.4996968507766724, |
|
"rewards/rejected": -2.0038845539093018, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.949483352468427e-07, |
|
"logits/chosen": -2.568638563156128, |
|
"logits/rejected": -2.5122299194335938, |
|
"logps/chosen": -234.30862426757812, |
|
"logps/rejected": -238.3922119140625, |
|
"loss": 0.4666, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.5311821103096008, |
|
"rewards/margins": 1.3636810779571533, |
|
"rewards/rejected": -1.894863486289978, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.9303482587064674e-07, |
|
"logits/chosen": -2.550352096557617, |
|
"logits/rejected": -2.4809679985046387, |
|
"logps/chosen": -277.41363525390625, |
|
"logps/rejected": -261.5442810058594, |
|
"loss": 0.4991, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -0.4109998345375061, |
|
"rewards/margins": 1.6612545251846313, |
|
"rewards/rejected": -2.072254180908203, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.911213164944508e-07, |
|
"logits/chosen": -2.5133216381073, |
|
"logits/rejected": -2.4779887199401855, |
|
"logps/chosen": -280.0914001464844, |
|
"logps/rejected": -264.759765625, |
|
"loss": 0.4767, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.3346596360206604, |
|
"rewards/margins": 1.4379736185073853, |
|
"rewards/rejected": -1.7726333141326904, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.8920780711825487e-07, |
|
"logits/chosen": -2.558295488357544, |
|
"logits/rejected": -2.4871633052825928, |
|
"logps/chosen": -284.81866455078125, |
|
"logps/rejected": -264.2315368652344, |
|
"loss": 0.5071, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.68181312084198, |
|
"rewards/margins": 1.149623155593872, |
|
"rewards/rejected": -1.8314363956451416, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.8729429774205893e-07, |
|
"logits/chosen": -2.5324788093566895, |
|
"logits/rejected": -2.458425998687744, |
|
"logps/chosen": -279.7908935546875, |
|
"logps/rejected": -257.31512451171875, |
|
"loss": 0.4992, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.35828351974487305, |
|
"rewards/margins": 1.2599681615829468, |
|
"rewards/rejected": -1.6182515621185303, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.8538078836586294e-07, |
|
"logits/chosen": -2.4853129386901855, |
|
"logits/rejected": -2.4347476959228516, |
|
"logps/chosen": -281.7766418457031, |
|
"logps/rejected": -250.4887237548828, |
|
"loss": 0.4881, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.3670748770236969, |
|
"rewards/margins": 1.4128227233886719, |
|
"rewards/rejected": -1.7798974514007568, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.83467278989667e-07, |
|
"logits/chosen": -2.498248338699341, |
|
"logits/rejected": -2.4675116539001465, |
|
"logps/chosen": -293.01519775390625, |
|
"logps/rejected": -260.99517822265625, |
|
"loss": 0.5155, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.6357588768005371, |
|
"rewards/margins": 1.182974100112915, |
|
"rewards/rejected": -1.8187328577041626, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"eval_logits/chosen": -2.592735528945923, |
|
"eval_logits/rejected": -2.511793851852417, |
|
"eval_logps/chosen": -277.5622253417969, |
|
"eval_logps/rejected": -246.7467803955078, |
|
"eval_loss": 0.47698700428009033, |
|
"eval_rewards/accuracies": 0.7739999890327454, |
|
"eval_rewards/chosen": -0.3715021014213562, |
|
"eval_rewards/margins": 1.3364099264144897, |
|
"eval_rewards/rejected": -1.7079118490219116, |
|
"eval_runtime": 329.3477, |
|
"eval_samples_per_second": 6.073, |
|
"eval_steps_per_second": 0.38, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.8155376961347106e-07, |
|
"logits/chosen": -2.5742714405059814, |
|
"logits/rejected": -2.529768466949463, |
|
"logps/chosen": -299.46148681640625, |
|
"logps/rejected": -274.9568176269531, |
|
"loss": 0.5162, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.5742639303207397, |
|
"rewards/margins": 1.0656424760818481, |
|
"rewards/rejected": -1.6399062871932983, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.796402602372751e-07, |
|
"logits/chosen": -2.591820001602173, |
|
"logits/rejected": -2.5311317443847656, |
|
"logps/chosen": -250.81991577148438, |
|
"logps/rejected": -271.74761962890625, |
|
"loss": 0.4654, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.5318310260772705, |
|
"rewards/margins": 1.5595569610595703, |
|
"rewards/rejected": -2.09138822555542, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.777267508610792e-07, |
|
"logits/chosen": -2.615499496459961, |
|
"logits/rejected": -2.5490453243255615, |
|
"logps/chosen": -298.67694091796875, |
|
"logps/rejected": -250.24130249023438, |
|
"loss": 0.4747, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -0.4868291914463043, |
|
"rewards/margins": 1.7270475625991821, |
|
"rewards/rejected": -2.213876247406006, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 3.7581324148488325e-07, |
|
"logits/chosen": -2.6219589710235596, |
|
"logits/rejected": -2.500911235809326, |
|
"logps/chosen": -284.215576171875, |
|
"logps/rejected": -231.4106903076172, |
|
"loss": 0.4837, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.46450090408325195, |
|
"rewards/margins": 1.5634416341781616, |
|
"rewards/rejected": -2.027942657470703, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.738997321086873e-07, |
|
"logits/chosen": -2.5877745151519775, |
|
"logits/rejected": -2.5523974895477295, |
|
"logps/chosen": -296.5566101074219, |
|
"logps/rejected": -282.413818359375, |
|
"loss": 0.4916, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.7169415354728699, |
|
"rewards/margins": 1.3798969984054565, |
|
"rewards/rejected": -2.0968384742736816, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.7198622273249137e-07, |
|
"logits/chosen": -2.5522520542144775, |
|
"logits/rejected": -2.5006377696990967, |
|
"logps/chosen": -271.15631103515625, |
|
"logps/rejected": -253.3765411376953, |
|
"loss": 0.4431, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.6804525852203369, |
|
"rewards/margins": 1.3519657850265503, |
|
"rewards/rejected": -2.0324184894561768, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3.7007271335629544e-07, |
|
"logits/chosen": -2.5357367992401123, |
|
"logits/rejected": -2.434713363647461, |
|
"logps/chosen": -301.4870300292969, |
|
"logps/rejected": -240.8806915283203, |
|
"loss": 0.5095, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.6089678406715393, |
|
"rewards/margins": 1.4054759740829468, |
|
"rewards/rejected": -2.014443874359131, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 3.681592039800995e-07, |
|
"logits/chosen": -2.637885570526123, |
|
"logits/rejected": -2.5964674949645996, |
|
"logps/chosen": -285.35125732421875, |
|
"logps/rejected": -281.14105224609375, |
|
"loss": 0.3956, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.23545095324516296, |
|
"rewards/margins": 2.0617527961730957, |
|
"rewards/rejected": -2.29720401763916, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 3.662456946039035e-07, |
|
"logits/chosen": -2.633040428161621, |
|
"logits/rejected": -2.5896546840667725, |
|
"logps/chosen": -291.3595886230469, |
|
"logps/rejected": -263.8270263671875, |
|
"loss": 0.3593, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -0.06208040565252304, |
|
"rewards/margins": 2.071259021759033, |
|
"rewards/rejected": -2.1333394050598145, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 3.6433218522770757e-07, |
|
"logits/chosen": -2.621001958847046, |
|
"logits/rejected": -2.5748400688171387, |
|
"logps/chosen": -238.26870727539062, |
|
"logps/rejected": -245.1639404296875, |
|
"loss": 0.3463, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.22620248794555664, |
|
"rewards/margins": 1.9476579427719116, |
|
"rewards/rejected": -2.1738600730895996, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"eval_logits/chosen": -2.7082529067993164, |
|
"eval_logits/rejected": -2.6281511783599854, |
|
"eval_logps/chosen": -279.15203857421875, |
|
"eval_logps/rejected": -247.93055725097656, |
|
"eval_loss": 0.47547972202301025, |
|
"eval_rewards/accuracies": 0.7680000066757202, |
|
"eval_rewards/chosen": -0.5304849743843079, |
|
"eval_rewards/margins": 1.2958035469055176, |
|
"eval_rewards/rejected": -1.8262888193130493, |
|
"eval_runtime": 328.7321, |
|
"eval_samples_per_second": 6.084, |
|
"eval_steps_per_second": 0.38, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 3.6241867585151163e-07, |
|
"logits/chosen": -2.6334519386291504, |
|
"logits/rejected": -2.5576975345611572, |
|
"logps/chosen": -237.9059295654297, |
|
"logps/rejected": -202.5619659423828, |
|
"loss": 0.2682, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -0.20000645518302917, |
|
"rewards/margins": 1.985250473022461, |
|
"rewards/rejected": -2.1852567195892334, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 3.605051664753157e-07, |
|
"logits/chosen": -2.6050827503204346, |
|
"logits/rejected": -2.5590696334838867, |
|
"logps/chosen": -263.57232666015625, |
|
"logps/rejected": -226.03652954101562, |
|
"loss": 0.2637, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": 0.14606496691703796, |
|
"rewards/margins": 2.4412872791290283, |
|
"rewards/rejected": -2.295222520828247, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 3.5859165709911975e-07, |
|
"logits/chosen": -2.592588424682617, |
|
"logits/rejected": -2.560858726501465, |
|
"logps/chosen": -251.0801239013672, |
|
"logps/rejected": -268.08868408203125, |
|
"loss": 0.2414, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.16298413276672363, |
|
"rewards/margins": 2.507159948348999, |
|
"rewards/rejected": -2.3441758155822754, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 3.566781477229238e-07, |
|
"logits/chosen": -2.623647451400757, |
|
"logits/rejected": -2.5220494270324707, |
|
"logps/chosen": -319.46319580078125, |
|
"logps/rejected": -263.6154479980469, |
|
"loss": 0.2081, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": 0.14128056168556213, |
|
"rewards/margins": 2.827650785446167, |
|
"rewards/rejected": -2.6863701343536377, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 3.547646383467279e-07, |
|
"logits/chosen": -2.5959651470184326, |
|
"logits/rejected": -2.5497353076934814, |
|
"logps/chosen": -283.4841613769531, |
|
"logps/rejected": -247.54867553710938, |
|
"loss": 0.1888, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.1350892335176468, |
|
"rewards/margins": 2.7649712562561035, |
|
"rewards/rejected": -2.6298820972442627, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 3.5285112897053194e-07, |
|
"logits/chosen": -2.559783697128296, |
|
"logits/rejected": -2.5014069080352783, |
|
"logps/chosen": -270.575927734375, |
|
"logps/rejected": -260.4879455566406, |
|
"loss": 0.1804, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 0.1834518164396286, |
|
"rewards/margins": 3.1269893646240234, |
|
"rewards/rejected": -2.943537712097168, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 3.50937619594336e-07, |
|
"logits/chosen": -2.627401113510132, |
|
"logits/rejected": -2.5442299842834473, |
|
"logps/chosen": -265.7518310546875, |
|
"logps/rejected": -277.06048583984375, |
|
"loss": 0.1877, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.1024879664182663, |
|
"rewards/margins": 2.9685001373291016, |
|
"rewards/rejected": -2.8660120964050293, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 3.4902411021814007e-07, |
|
"logits/chosen": -2.631279706954956, |
|
"logits/rejected": -2.5586652755737305, |
|
"logps/chosen": -291.9482116699219, |
|
"logps/rejected": -279.103759765625, |
|
"loss": 0.1456, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.08294292539358139, |
|
"rewards/margins": 3.53105092048645, |
|
"rewards/rejected": -3.448107957839966, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 3.4711060084194413e-07, |
|
"logits/chosen": -2.5913405418395996, |
|
"logits/rejected": -2.5348830223083496, |
|
"logps/chosen": -256.9413757324219, |
|
"logps/rejected": -251.29953002929688, |
|
"loss": 0.1252, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.05543314665555954, |
|
"rewards/margins": 3.7001495361328125, |
|
"rewards/rejected": -3.644716262817383, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 3.4519709146574814e-07, |
|
"logits/chosen": -2.631477117538452, |
|
"logits/rejected": -2.6130545139312744, |
|
"logps/chosen": -256.7820739746094, |
|
"logps/rejected": -247.29861450195312, |
|
"loss": 0.1266, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": 0.08383353799581528, |
|
"rewards/margins": 3.7477478981018066, |
|
"rewards/rejected": -3.663914203643799, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"eval_logits/chosen": -2.6430113315582275, |
|
"eval_logits/rejected": -2.5584213733673096, |
|
"eval_logps/chosen": -283.978271484375, |
|
"eval_logps/rejected": -258.3182067871094, |
|
"eval_loss": 0.49244019389152527, |
|
"eval_rewards/accuracies": 0.7739999890327454, |
|
"eval_rewards/chosen": -1.0131123065948486, |
|
"eval_rewards/margins": 1.8519433736801147, |
|
"eval_rewards/rejected": -2.8650553226470947, |
|
"eval_runtime": 328.4358, |
|
"eval_samples_per_second": 6.089, |
|
"eval_steps_per_second": 0.381, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 3.432835820895522e-07, |
|
"logits/chosen": -2.6089444160461426, |
|
"logits/rejected": -2.569436550140381, |
|
"logps/chosen": -277.25665283203125, |
|
"logps/rejected": -279.2236633300781, |
|
"loss": 0.1245, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.09174539893865585, |
|
"rewards/margins": 3.8159255981445312, |
|
"rewards/rejected": -3.9076714515686035, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 3.4137007271335626e-07, |
|
"logits/chosen": -2.6246414184570312, |
|
"logits/rejected": -2.5093507766723633, |
|
"logps/chosen": -296.4078674316406, |
|
"logps/rejected": -253.695068359375, |
|
"loss": 0.1108, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 0.04911733418703079, |
|
"rewards/margins": 3.8723056316375732, |
|
"rewards/rejected": -3.823188304901123, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 3.394565633371603e-07, |
|
"logits/chosen": -2.5909955501556396, |
|
"logits/rejected": -2.5098001956939697, |
|
"logps/chosen": -271.1025390625, |
|
"logps/rejected": -267.0798645019531, |
|
"loss": 0.1042, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": 0.018337160348892212, |
|
"rewards/margins": 4.38796329498291, |
|
"rewards/rejected": -4.369626522064209, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 3.375430539609644e-07, |
|
"logits/chosen": -2.603445291519165, |
|
"logits/rejected": -2.5317625999450684, |
|
"logps/chosen": -267.75177001953125, |
|
"logps/rejected": -276.5166931152344, |
|
"loss": 0.0957, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.007721236441284418, |
|
"rewards/margins": 4.616903781890869, |
|
"rewards/rejected": -4.624624252319336, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 3.3562954458476845e-07, |
|
"logits/chosen": -2.5505504608154297, |
|
"logits/rejected": -2.4800028800964355, |
|
"logps/chosen": -244.16177368164062, |
|
"logps/rejected": -260.59527587890625, |
|
"loss": 0.0964, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": 0.06246214359998703, |
|
"rewards/margins": 4.644533634185791, |
|
"rewards/rejected": -4.582070827484131, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 3.337160352085725e-07, |
|
"logits/chosen": -2.5717084407806396, |
|
"logits/rejected": -2.48624587059021, |
|
"logps/chosen": -284.52362060546875, |
|
"logps/rejected": -286.84625244140625, |
|
"loss": 0.0821, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": 0.03565800562500954, |
|
"rewards/margins": 4.872807502746582, |
|
"rewards/rejected": -4.837149620056152, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 3.3180252583237657e-07, |
|
"logits/chosen": -2.61575984954834, |
|
"logits/rejected": -2.5330493450164795, |
|
"logps/chosen": -302.53533935546875, |
|
"logps/rejected": -303.67889404296875, |
|
"loss": 0.0761, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": -0.09367243200540543, |
|
"rewards/margins": 5.462395668029785, |
|
"rewards/rejected": -5.5560688972473145, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 3.2988901645618063e-07, |
|
"logits/chosen": -2.6104063987731934, |
|
"logits/rejected": -2.5553605556488037, |
|
"logps/chosen": -269.8067932128906, |
|
"logps/rejected": -275.9356994628906, |
|
"loss": 0.0647, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/chosen": 0.17088812589645386, |
|
"rewards/margins": 5.2629804611206055, |
|
"rewards/rejected": -5.092092990875244, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 3.279755070799847e-07, |
|
"logits/chosen": -2.594054698944092, |
|
"logits/rejected": -2.5422348976135254, |
|
"logps/chosen": -262.38433837890625, |
|
"logps/rejected": -284.35272216796875, |
|
"loss": 0.0645, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -0.2543705999851227, |
|
"rewards/margins": 5.296311378479004, |
|
"rewards/rejected": -5.550681114196777, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 3.260619977037887e-07, |
|
"logits/chosen": -2.606248140335083, |
|
"logits/rejected": -2.5383098125457764, |
|
"logps/chosen": -306.80389404296875, |
|
"logps/rejected": -289.07110595703125, |
|
"loss": 0.0751, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.4288966655731201, |
|
"rewards/margins": 5.881345272064209, |
|
"rewards/rejected": -5.45244836807251, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"eval_logits/chosen": -2.6450486183166504, |
|
"eval_logits/rejected": -2.5574116706848145, |
|
"eval_logps/chosen": -288.3549499511719, |
|
"eval_logps/rejected": -266.3139343261719, |
|
"eval_loss": 0.5208475589752197, |
|
"eval_rewards/accuracies": 0.7760000228881836, |
|
"eval_rewards/chosen": -1.4507769346237183, |
|
"eval_rewards/margins": 2.213848352432251, |
|
"eval_rewards/rejected": -3.6646251678466797, |
|
"eval_runtime": 328.5316, |
|
"eval_samples_per_second": 6.088, |
|
"eval_steps_per_second": 0.38, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 3.2414848832759277e-07, |
|
"logits/chosen": -2.569955348968506, |
|
"logits/rejected": -2.5366978645324707, |
|
"logps/chosen": -271.14764404296875, |
|
"logps/rejected": -311.7731628417969, |
|
"loss": 0.0516, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.0714511051774025, |
|
"rewards/margins": 6.200972557067871, |
|
"rewards/rejected": -6.129521369934082, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 3.2223497895139683e-07, |
|
"logits/chosen": -2.552734851837158, |
|
"logits/rejected": -2.444171905517578, |
|
"logps/chosen": -288.9889221191406, |
|
"logps/rejected": -297.237548828125, |
|
"loss": 0.0629, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.27881333231925964, |
|
"rewards/margins": 6.696805000305176, |
|
"rewards/rejected": -6.417990684509277, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 3.203214695752009e-07, |
|
"logits/chosen": -2.5759170055389404, |
|
"logits/rejected": -2.5034148693084717, |
|
"logps/chosen": -285.6966857910156, |
|
"logps/rejected": -329.5125427246094, |
|
"loss": 0.0629, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.15722158551216125, |
|
"rewards/margins": 6.698854923248291, |
|
"rewards/rejected": -6.541632652282715, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 3.1840796019900495e-07, |
|
"logits/chosen": -2.555694818496704, |
|
"logits/rejected": -2.485347270965576, |
|
"logps/chosen": -288.07611083984375, |
|
"logps/rejected": -292.7718505859375, |
|
"loss": 0.0525, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 0.205997496843338, |
|
"rewards/margins": 6.743833065032959, |
|
"rewards/rejected": -6.53783655166626, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 3.16494450822809e-07, |
|
"logits/chosen": -2.576767683029175, |
|
"logits/rejected": -2.534345865249634, |
|
"logps/chosen": -238.9010467529297, |
|
"logps/rejected": -278.7056579589844, |
|
"loss": 0.0425, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": 0.13358671963214874, |
|
"rewards/margins": 6.311589241027832, |
|
"rewards/rejected": -6.178002834320068, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 3.145809414466131e-07, |
|
"logits/chosen": -2.576357126235962, |
|
"logits/rejected": -2.497936248779297, |
|
"logps/chosen": -266.5452880859375, |
|
"logps/rejected": -300.4448547363281, |
|
"loss": 0.0417, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.3052632808685303, |
|
"rewards/margins": 7.074311256408691, |
|
"rewards/rejected": -6.769047737121582, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 3.1266743207041714e-07, |
|
"logits/chosen": -2.5541863441467285, |
|
"logits/rejected": -2.496516704559326, |
|
"logps/chosen": -262.9737854003906, |
|
"logps/rejected": -312.977783203125, |
|
"loss": 0.047, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": 0.3359752893447876, |
|
"rewards/margins": 7.000231742858887, |
|
"rewards/rejected": -6.664257049560547, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 3.107539226942212e-07, |
|
"logits/chosen": -2.4729394912719727, |
|
"logits/rejected": -2.4231820106506348, |
|
"logps/chosen": -249.3036651611328, |
|
"logps/rejected": -301.2051696777344, |
|
"loss": 0.0501, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.4446950852870941, |
|
"rewards/margins": 6.993998050689697, |
|
"rewards/rejected": -6.549304008483887, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 3.0884041331802526e-07, |
|
"logits/chosen": -2.563807964324951, |
|
"logits/rejected": -2.496666431427002, |
|
"logps/chosen": -284.2848205566406, |
|
"logps/rejected": -292.50872802734375, |
|
"loss": 0.0458, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/chosen": 0.3111470341682434, |
|
"rewards/margins": 7.219347953796387, |
|
"rewards/rejected": -6.908200740814209, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 3.0692690394182927e-07, |
|
"logits/chosen": -2.505225658416748, |
|
"logits/rejected": -2.4466845989227295, |
|
"logps/chosen": -245.6407470703125, |
|
"logps/rejected": -272.4444274902344, |
|
"loss": 0.0306, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -0.265463650226593, |
|
"rewards/margins": 6.57799768447876, |
|
"rewards/rejected": -6.843461513519287, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"eval_logits/chosen": -2.5864531993865967, |
|
"eval_logits/rejected": -2.4956560134887695, |
|
"eval_logps/chosen": -295.3101806640625, |
|
"eval_logps/rejected": -277.1172180175781, |
|
"eval_loss": 0.577923059463501, |
|
"eval_rewards/accuracies": 0.7580000162124634, |
|
"eval_rewards/chosen": -2.1463019847869873, |
|
"eval_rewards/margins": 2.5986533164978027, |
|
"eval_rewards/rejected": -4.744955539703369, |
|
"eval_runtime": 327.6113, |
|
"eval_samples_per_second": 6.105, |
|
"eval_steps_per_second": 0.382, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 3.0501339456563334e-07, |
|
"logits/chosen": -2.530522584915161, |
|
"logits/rejected": -2.4422647953033447, |
|
"logps/chosen": -260.3827819824219, |
|
"logps/rejected": -274.53424072265625, |
|
"loss": 0.0479, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.38277196884155273, |
|
"rewards/margins": 6.145764350891113, |
|
"rewards/rejected": -6.52853536605835, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 3.030998851894374e-07, |
|
"logits/chosen": -2.530467987060547, |
|
"logits/rejected": -2.457545757293701, |
|
"logps/chosen": -280.2557678222656, |
|
"logps/rejected": -312.9700622558594, |
|
"loss": 0.0435, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.0774858146905899, |
|
"rewards/margins": 7.1998748779296875, |
|
"rewards/rejected": -7.122389316558838, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 3.0118637581324146e-07, |
|
"logits/chosen": -2.5862784385681152, |
|
"logits/rejected": -2.4979710578918457, |
|
"logps/chosen": -265.94195556640625, |
|
"logps/rejected": -322.9318542480469, |
|
"loss": 0.0529, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.03257175534963608, |
|
"rewards/margins": 7.4844207763671875, |
|
"rewards/rejected": -7.451849460601807, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 2.992728664370455e-07, |
|
"logits/chosen": -2.529265880584717, |
|
"logits/rejected": -2.472142457962036, |
|
"logps/chosen": -241.907958984375, |
|
"logps/rejected": -295.9644775390625, |
|
"loss": 0.0453, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -0.2552769184112549, |
|
"rewards/margins": 7.376172065734863, |
|
"rewards/rejected": -7.631448268890381, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 2.973593570608496e-07, |
|
"logits/chosen": -2.6861939430236816, |
|
"logits/rejected": -2.5757153034210205, |
|
"logps/chosen": -304.3006286621094, |
|
"logps/rejected": -301.52545166015625, |
|
"loss": 0.0396, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/chosen": 0.03746197372674942, |
|
"rewards/margins": 7.219727993011475, |
|
"rewards/rejected": -7.1822662353515625, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 2.9544584768465365e-07, |
|
"logits/chosen": -2.54135799407959, |
|
"logits/rejected": -2.515650749206543, |
|
"logps/chosen": -278.438232421875, |
|
"logps/rejected": -317.2010498046875, |
|
"loss": 0.0398, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/chosen": -0.3339845538139343, |
|
"rewards/margins": 7.23375940322876, |
|
"rewards/rejected": -7.56774377822876, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 2.935323383084577e-07, |
|
"logits/chosen": -2.599730968475342, |
|
"logits/rejected": -2.531221628189087, |
|
"logps/chosen": -311.6480712890625, |
|
"logps/rejected": -324.45635986328125, |
|
"loss": 0.0447, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -0.26491448283195496, |
|
"rewards/margins": 7.551673889160156, |
|
"rewards/rejected": -7.816588401794434, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 2.9161882893226177e-07, |
|
"logits/chosen": -2.5754849910736084, |
|
"logits/rejected": -2.5394327640533447, |
|
"logps/chosen": -259.94171142578125, |
|
"logps/rejected": -315.7677001953125, |
|
"loss": 0.0517, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.3456391394138336, |
|
"rewards/margins": 6.892177581787109, |
|
"rewards/rejected": -7.23781681060791, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 2.8970531955606583e-07, |
|
"logits/chosen": -2.6177515983581543, |
|
"logits/rejected": -2.5629687309265137, |
|
"logps/chosen": -286.0957336425781, |
|
"logps/rejected": -307.2629699707031, |
|
"loss": 0.0425, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.2400694638490677, |
|
"rewards/margins": 6.7607903480529785, |
|
"rewards/rejected": -6.520721435546875, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 2.8779181017986984e-07, |
|
"logits/chosen": -2.5180513858795166, |
|
"logits/rejected": -2.4203319549560547, |
|
"logps/chosen": -262.3363952636719, |
|
"logps/rejected": -293.3941955566406, |
|
"loss": 0.031, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -0.30498212575912476, |
|
"rewards/margins": 6.864785671234131, |
|
"rewards/rejected": -7.1697678565979, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"eval_logits/chosen": -2.6050970554351807, |
|
"eval_logits/rejected": -2.5157244205474854, |
|
"eval_logps/chosen": -300.5773620605469, |
|
"eval_logps/rejected": -282.7791748046875, |
|
"eval_loss": 0.5992786288261414, |
|
"eval_rewards/accuracies": 0.7580000162124634, |
|
"eval_rewards/chosen": -2.673020124435425, |
|
"eval_rewards/margins": 2.638129711151123, |
|
"eval_rewards/rejected": -5.311149597167969, |
|
"eval_runtime": 329.4856, |
|
"eval_samples_per_second": 6.07, |
|
"eval_steps_per_second": 0.379, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 2.858783008036739e-07, |
|
"logits/chosen": -2.6051602363586426, |
|
"logits/rejected": -2.492736339569092, |
|
"logps/chosen": -296.25274658203125, |
|
"logps/rejected": -316.0076599121094, |
|
"loss": 0.0344, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -0.7290128469467163, |
|
"rewards/margins": 6.831245422363281, |
|
"rewards/rejected": -7.560257911682129, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 2.8396479142747797e-07, |
|
"logits/chosen": -2.5280001163482666, |
|
"logits/rejected": -2.476745367050171, |
|
"logps/chosen": -287.8592529296875, |
|
"logps/rejected": -325.2400207519531, |
|
"loss": 0.0638, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/chosen": -0.8896492123603821, |
|
"rewards/margins": 7.580406188964844, |
|
"rewards/rejected": -8.47005558013916, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 2.8205128205128203e-07, |
|
"logits/chosen": -2.591068744659424, |
|
"logits/rejected": -2.5293643474578857, |
|
"logps/chosen": -284.336181640625, |
|
"logps/rejected": -330.9443359375, |
|
"loss": 0.0486, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.8629849553108215, |
|
"rewards/margins": 6.952902317047119, |
|
"rewards/rejected": -7.815886497497559, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 2.801377726750861e-07, |
|
"logits/chosen": -2.597510814666748, |
|
"logits/rejected": -2.549468517303467, |
|
"logps/chosen": -293.65545654296875, |
|
"logps/rejected": -315.900146484375, |
|
"loss": 0.0487, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -0.5767217874526978, |
|
"rewards/margins": 7.4031982421875, |
|
"rewards/rejected": -7.97991943359375, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 2.7822426329889015e-07, |
|
"logits/chosen": -2.6672866344451904, |
|
"logits/rejected": -2.5381336212158203, |
|
"logps/chosen": -321.498291015625, |
|
"logps/rejected": -318.640869140625, |
|
"loss": 0.0441, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -0.2594658434391022, |
|
"rewards/margins": 7.167167663574219, |
|
"rewards/rejected": -7.426634311676025, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 2.763107539226942e-07, |
|
"logits/chosen": -2.6042659282684326, |
|
"logits/rejected": -2.4887306690216064, |
|
"logps/chosen": -276.0786437988281, |
|
"logps/rejected": -292.72967529296875, |
|
"loss": 0.0589, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.5728877186775208, |
|
"rewards/margins": 6.894124507904053, |
|
"rewards/rejected": -7.467011451721191, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 2.743972445464983e-07, |
|
"logits/chosen": -2.5835888385772705, |
|
"logits/rejected": -2.523806571960449, |
|
"logps/chosen": -274.3123474121094, |
|
"logps/rejected": -314.17633056640625, |
|
"loss": 0.0726, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -0.20469942688941956, |
|
"rewards/margins": 6.882570743560791, |
|
"rewards/rejected": -7.087271213531494, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 2.7248373517030234e-07, |
|
"logits/chosen": -2.5750460624694824, |
|
"logits/rejected": -2.498121976852417, |
|
"logps/chosen": -270.62261962890625, |
|
"logps/rejected": -296.22247314453125, |
|
"loss": 0.0533, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.46651512384414673, |
|
"rewards/margins": 6.364560604095459, |
|
"rewards/rejected": -6.831076145172119, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 2.705702257941064e-07, |
|
"logits/chosen": -2.6253905296325684, |
|
"logits/rejected": -2.530066967010498, |
|
"logps/chosen": -249.2847442626953, |
|
"logps/rejected": -282.79571533203125, |
|
"loss": 0.0408, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/chosen": -0.562870979309082, |
|
"rewards/margins": 6.45670223236084, |
|
"rewards/rejected": -7.019574165344238, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 2.686567164179104e-07, |
|
"logits/chosen": -2.5404858589172363, |
|
"logits/rejected": -2.4926059246063232, |
|
"logps/chosen": -304.41253662109375, |
|
"logps/rejected": -347.6630554199219, |
|
"loss": 0.0535, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -0.6575719714164734, |
|
"rewards/margins": 7.0210371017456055, |
|
"rewards/rejected": -7.678609371185303, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"eval_logits/chosen": -2.652937650680542, |
|
"eval_logits/rejected": -2.5616180896759033, |
|
"eval_logps/chosen": -295.47467041015625, |
|
"eval_logps/rejected": -277.6109924316406, |
|
"eval_loss": 0.573060154914856, |
|
"eval_rewards/accuracies": 0.75, |
|
"eval_rewards/chosen": -2.162747621536255, |
|
"eval_rewards/margins": 2.631584882736206, |
|
"eval_rewards/rejected": -4.794332981109619, |
|
"eval_runtime": 329.0517, |
|
"eval_samples_per_second": 6.078, |
|
"eval_steps_per_second": 0.38, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 2.6674320704171447e-07, |
|
"logits/chosen": -2.6182427406311035, |
|
"logits/rejected": -2.5373194217681885, |
|
"logps/chosen": -272.0426940917969, |
|
"logps/rejected": -314.56256103515625, |
|
"loss": 0.0406, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/chosen": -0.302041620016098, |
|
"rewards/margins": 6.977307319641113, |
|
"rewards/rejected": -7.279348850250244, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 2.6482969766551853e-07, |
|
"logits/chosen": -2.5645289421081543, |
|
"logits/rejected": -2.5248210430145264, |
|
"logps/chosen": -267.35479736328125, |
|
"logps/rejected": -293.21136474609375, |
|
"loss": 0.0381, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.3300275206565857, |
|
"rewards/margins": 6.987812042236328, |
|
"rewards/rejected": -7.317839622497559, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 2.629161882893226e-07, |
|
"logits/chosen": -2.588085889816284, |
|
"logits/rejected": -2.4837424755096436, |
|
"logps/chosen": -276.7251281738281, |
|
"logps/rejected": -307.32916259765625, |
|
"loss": 0.0313, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.410992294549942, |
|
"rewards/margins": 6.828526496887207, |
|
"rewards/rejected": -7.239518165588379, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 2.6100267891312666e-07, |
|
"logits/chosen": -2.5293657779693604, |
|
"logits/rejected": -2.4737491607666016, |
|
"logps/chosen": -249.6378936767578, |
|
"logps/rejected": -282.2840881347656, |
|
"loss": 0.0485, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -1.0298715829849243, |
|
"rewards/margins": 6.555671691894531, |
|
"rewards/rejected": -7.58554220199585, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 2.590891695369307e-07, |
|
"logits/chosen": -2.5496673583984375, |
|
"logits/rejected": -2.4890804290771484, |
|
"logps/chosen": -300.34918212890625, |
|
"logps/rejected": -333.11688232421875, |
|
"loss": 0.0441, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -1.0020015239715576, |
|
"rewards/margins": 6.947371006011963, |
|
"rewards/rejected": -7.949372291564941, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 2.571756601607348e-07, |
|
"logits/chosen": -2.464364528656006, |
|
"logits/rejected": -2.387753963470459, |
|
"logps/chosen": -264.74920654296875, |
|
"logps/rejected": -290.9281005859375, |
|
"loss": 0.0492, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.6402736902236938, |
|
"rewards/margins": 6.474742889404297, |
|
"rewards/rejected": -7.115015983581543, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 2.5526215078453884e-07, |
|
"logits/chosen": -2.5369412899017334, |
|
"logits/rejected": -2.425361156463623, |
|
"logps/chosen": -284.7231750488281, |
|
"logps/rejected": -296.4128112792969, |
|
"loss": 0.0491, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -0.9682533144950867, |
|
"rewards/margins": 6.315736770629883, |
|
"rewards/rejected": -7.28399133682251, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 2.533486414083429e-07, |
|
"logits/chosen": -2.563443660736084, |
|
"logits/rejected": -2.441769599914551, |
|
"logps/chosen": -313.6112365722656, |
|
"logps/rejected": -305.52239990234375, |
|
"loss": 0.0388, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -0.46426552534103394, |
|
"rewards/margins": 6.881335258483887, |
|
"rewards/rejected": -7.3456010818481445, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 2.5143513203214697e-07, |
|
"logits/chosen": -2.5470073223114014, |
|
"logits/rejected": -2.4438252449035645, |
|
"logps/chosen": -251.0168914794922, |
|
"logps/rejected": -280.91961669921875, |
|
"loss": 0.0562, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.10129622370004654, |
|
"rewards/margins": 6.691439151763916, |
|
"rewards/rejected": -6.7927350997924805, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 2.49521622655951e-07, |
|
"logits/chosen": -2.610071897506714, |
|
"logits/rejected": -2.5157220363616943, |
|
"logps/chosen": -281.8477783203125, |
|
"logps/rejected": -294.8487243652344, |
|
"loss": 0.063, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.2210036963224411, |
|
"rewards/margins": 6.306001663208008, |
|
"rewards/rejected": -6.527005195617676, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"eval_logits/chosen": -2.5985054969787598, |
|
"eval_logits/rejected": -2.503840923309326, |
|
"eval_logps/chosen": -293.67022705078125, |
|
"eval_logps/rejected": -275.4324645996094, |
|
"eval_loss": 0.543339192867279, |
|
"eval_rewards/accuracies": 0.7580000162124634, |
|
"eval_rewards/chosen": -1.9823036193847656, |
|
"eval_rewards/margins": 2.594174861907959, |
|
"eval_rewards/rejected": -4.576478481292725, |
|
"eval_runtime": 329.1368, |
|
"eval_samples_per_second": 6.077, |
|
"eval_steps_per_second": 0.38, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 2.4760811327975504e-07, |
|
"logits/chosen": -2.5374481678009033, |
|
"logits/rejected": -2.4631876945495605, |
|
"logps/chosen": -272.7527770996094, |
|
"logps/rejected": -301.7461242675781, |
|
"loss": 0.0515, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -0.12996195256710052, |
|
"rewards/margins": 7.1182541847229, |
|
"rewards/rejected": -7.248216152191162, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 2.456946039035591e-07, |
|
"logits/chosen": -2.5310726165771484, |
|
"logits/rejected": -2.470857620239258, |
|
"logps/chosen": -264.4376525878906, |
|
"logps/rejected": -309.92205810546875, |
|
"loss": 0.0466, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -0.09321962296962738, |
|
"rewards/margins": 6.667203426361084, |
|
"rewards/rejected": -6.760423183441162, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 2.4378109452736316e-07, |
|
"logits/chosen": -2.4673683643341064, |
|
"logits/rejected": -2.4015889167785645, |
|
"logps/chosen": -245.9351348876953, |
|
"logps/rejected": -268.8101806640625, |
|
"loss": 0.054, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/chosen": -0.7791624665260315, |
|
"rewards/margins": 6.342289924621582, |
|
"rewards/rejected": -7.121452331542969, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 2.418675851511672e-07, |
|
"logits/chosen": -2.560784101486206, |
|
"logits/rejected": -2.449129343032837, |
|
"logps/chosen": -273.04229736328125, |
|
"logps/rejected": -294.8717346191406, |
|
"loss": 0.0389, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.1826784312725067, |
|
"rewards/margins": 6.658658504486084, |
|
"rewards/rejected": -6.841336727142334, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 2.399540757749713e-07, |
|
"logits/chosen": -2.5664353370666504, |
|
"logits/rejected": -2.452826738357544, |
|
"logps/chosen": -303.6127624511719, |
|
"logps/rejected": -331.66033935546875, |
|
"loss": 0.0434, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.40071630477905273, |
|
"rewards/margins": 6.958899021148682, |
|
"rewards/rejected": -7.359615325927734, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 2.3804056639877535e-07, |
|
"logits/chosen": -2.535295009613037, |
|
"logits/rejected": -2.417152166366577, |
|
"logps/chosen": -333.24072265625, |
|
"logps/rejected": -316.44830322265625, |
|
"loss": 0.0342, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.7423610091209412, |
|
"rewards/margins": 7.322349548339844, |
|
"rewards/rejected": -8.06471061706543, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 2.361270570225794e-07, |
|
"logits/chosen": -2.564420700073242, |
|
"logits/rejected": -2.4815382957458496, |
|
"logps/chosen": -262.5311279296875, |
|
"logps/rejected": -331.90570068359375, |
|
"loss": 0.0324, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -0.6712821125984192, |
|
"rewards/margins": 7.437242031097412, |
|
"rewards/rejected": -8.108525276184082, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 2.3421354764638345e-07, |
|
"logits/chosen": -2.515321969985962, |
|
"logits/rejected": -2.3945698738098145, |
|
"logps/chosen": -290.71563720703125, |
|
"logps/rejected": -294.52008056640625, |
|
"loss": 0.0412, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/chosen": -1.4271801710128784, |
|
"rewards/margins": 6.557519435882568, |
|
"rewards/rejected": -7.984699249267578, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 2.323000382701875e-07, |
|
"logits/chosen": -2.523240566253662, |
|
"logits/rejected": -2.432344436645508, |
|
"logps/chosen": -275.192138671875, |
|
"logps/rejected": -287.0332336425781, |
|
"loss": 0.0567, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/chosen": -1.1197443008422852, |
|
"rewards/margins": 6.0831122398376465, |
|
"rewards/rejected": -7.202856540679932, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 2.3038652889399157e-07, |
|
"logits/chosen": -2.4755594730377197, |
|
"logits/rejected": -2.389221668243408, |
|
"logps/chosen": -303.61181640625, |
|
"logps/rejected": -317.91302490234375, |
|
"loss": 0.0423, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.6061100959777832, |
|
"rewards/margins": 7.471484184265137, |
|
"rewards/rejected": -8.077593803405762, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"eval_logits/chosen": -2.5654473304748535, |
|
"eval_logits/rejected": -2.4635632038116455, |
|
"eval_logps/chosen": -300.39990234375, |
|
"eval_logps/rejected": -283.8501892089844, |
|
"eval_loss": 0.5821015238761902, |
|
"eval_rewards/accuracies": 0.7540000081062317, |
|
"eval_rewards/chosen": -2.655275344848633, |
|
"eval_rewards/margins": 2.762974739074707, |
|
"eval_rewards/rejected": -5.41825008392334, |
|
"eval_runtime": 329.0804, |
|
"eval_samples_per_second": 6.078, |
|
"eval_steps_per_second": 0.38, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 2.2847301951779563e-07, |
|
"logits/chosen": -2.4770474433898926, |
|
"logits/rejected": -2.4310386180877686, |
|
"logps/chosen": -263.2637939453125, |
|
"logps/rejected": -285.5542297363281, |
|
"loss": 0.0644, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.271353542804718, |
|
"rewards/margins": 6.415966987609863, |
|
"rewards/rejected": -6.687320709228516, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 2.265595101415997e-07, |
|
"logits/chosen": -2.52775239944458, |
|
"logits/rejected": -2.4429614543914795, |
|
"logps/chosen": -280.1533203125, |
|
"logps/rejected": -329.3421936035156, |
|
"loss": 0.0626, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/chosen": -0.26627108454704285, |
|
"rewards/margins": 7.076508522033691, |
|
"rewards/rejected": -7.342779636383057, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 2.2464600076540373e-07, |
|
"logits/chosen": -2.5030529499053955, |
|
"logits/rejected": -2.4013702869415283, |
|
"logps/chosen": -240.1175537109375, |
|
"logps/rejected": -286.1576232910156, |
|
"loss": 0.0656, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": -0.5509620904922485, |
|
"rewards/margins": 6.837267875671387, |
|
"rewards/rejected": -7.388230323791504, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 2.227324913892078e-07, |
|
"logits/chosen": -2.5704593658447266, |
|
"logits/rejected": -2.5203540325164795, |
|
"logps/chosen": -287.09075927734375, |
|
"logps/rejected": -313.4488830566406, |
|
"loss": 0.0569, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/chosen": -0.18775661289691925, |
|
"rewards/margins": 6.396711826324463, |
|
"rewards/rejected": -6.584468841552734, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 2.2081898201301186e-07, |
|
"logits/chosen": -2.455457925796509, |
|
"logits/rejected": -2.402740716934204, |
|
"logps/chosen": -269.8533630371094, |
|
"logps/rejected": -295.97845458984375, |
|
"loss": 0.0409, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.4922047257423401, |
|
"rewards/margins": 6.239272117614746, |
|
"rewards/rejected": -6.7314772605896, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 2.1890547263681592e-07, |
|
"logits/chosen": -2.5787675380706787, |
|
"logits/rejected": -2.5012478828430176, |
|
"logps/chosen": -270.3778381347656, |
|
"logps/rejected": -303.08624267578125, |
|
"loss": 0.0471, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/chosen": -0.5367101430892944, |
|
"rewards/margins": 6.749762058258057, |
|
"rewards/rejected": -7.286472320556641, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 2.1699196326061998e-07, |
|
"logits/chosen": -2.498349189758301, |
|
"logits/rejected": -2.4476866722106934, |
|
"logps/chosen": -277.01446533203125, |
|
"logps/rejected": -307.1866760253906, |
|
"loss": 0.0436, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -0.36937451362609863, |
|
"rewards/margins": 6.952645778656006, |
|
"rewards/rejected": -7.322020053863525, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 2.1507845388442402e-07, |
|
"logits/chosen": -2.5487923622131348, |
|
"logits/rejected": -2.4616153240203857, |
|
"logps/chosen": -280.83990478515625, |
|
"logps/rejected": -283.40008544921875, |
|
"loss": 0.0603, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.3574257791042328, |
|
"rewards/margins": 6.618617057800293, |
|
"rewards/rejected": -6.976043701171875, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 2.1316494450822808e-07, |
|
"logits/chosen": -2.508967876434326, |
|
"logits/rejected": -2.4427499771118164, |
|
"logps/chosen": -282.5861511230469, |
|
"logps/rejected": -310.12091064453125, |
|
"loss": 0.059, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.5728462934494019, |
|
"rewards/margins": 6.127859115600586, |
|
"rewards/rejected": -6.700705051422119, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 2.1125143513203214e-07, |
|
"logits/chosen": -2.4674155712127686, |
|
"logits/rejected": -2.4133477210998535, |
|
"logps/chosen": -274.87060546875, |
|
"logps/rejected": -287.4228210449219, |
|
"loss": 0.0559, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.9308744668960571, |
|
"rewards/margins": 6.335507869720459, |
|
"rewards/rejected": -7.266383171081543, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"eval_logits/chosen": -2.5741446018218994, |
|
"eval_logits/rejected": -2.4843475818634033, |
|
"eval_logps/chosen": -299.6483154296875, |
|
"eval_logps/rejected": -282.3105773925781, |
|
"eval_loss": 0.5656670928001404, |
|
"eval_rewards/accuracies": 0.7519999742507935, |
|
"eval_rewards/chosen": -2.580115556716919, |
|
"eval_rewards/margins": 2.684171438217163, |
|
"eval_rewards/rejected": -5.264286518096924, |
|
"eval_runtime": 330.2789, |
|
"eval_samples_per_second": 6.055, |
|
"eval_steps_per_second": 0.378, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 2.093379257558362e-07, |
|
"logits/chosen": -2.5033411979675293, |
|
"logits/rejected": -2.432813882827759, |
|
"logps/chosen": -247.942626953125, |
|
"logps/rejected": -305.20867919921875, |
|
"loss": 0.0516, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -0.7584810853004456, |
|
"rewards/margins": 6.978280067443848, |
|
"rewards/rejected": -7.73676061630249, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 2.0742441637964026e-07, |
|
"logits/chosen": -2.499147653579712, |
|
"logits/rejected": -2.4221835136413574, |
|
"logps/chosen": -284.1277160644531, |
|
"logps/rejected": -317.18463134765625, |
|
"loss": 0.0549, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -0.9464308619499207, |
|
"rewards/margins": 6.537860870361328, |
|
"rewards/rejected": -7.484292507171631, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 2.055109070034443e-07, |
|
"logits/chosen": -2.519296169281006, |
|
"logits/rejected": -2.4646060466766357, |
|
"logps/chosen": -279.44268798828125, |
|
"logps/rejected": -308.8426513671875, |
|
"loss": 0.05, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.49726948142051697, |
|
"rewards/margins": 6.457852840423584, |
|
"rewards/rejected": -6.9551215171813965, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 2.0359739762724836e-07, |
|
"logits/chosen": -2.5203187465667725, |
|
"logits/rejected": -2.459474802017212, |
|
"logps/chosen": -275.2139892578125, |
|
"logps/rejected": -314.4609680175781, |
|
"loss": 0.0526, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.601524293422699, |
|
"rewards/margins": 6.1087565422058105, |
|
"rewards/rejected": -6.7102813720703125, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 2.0168388825105242e-07, |
|
"logits/chosen": -2.523761034011841, |
|
"logits/rejected": -2.4565975666046143, |
|
"logps/chosen": -288.87469482421875, |
|
"logps/rejected": -316.4368896484375, |
|
"loss": 0.0576, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.4725467562675476, |
|
"rewards/margins": 6.452563285827637, |
|
"rewards/rejected": -6.92510986328125, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 1.997703788748565e-07, |
|
"logits/chosen": -2.49338960647583, |
|
"logits/rejected": -2.41786527633667, |
|
"logps/chosen": -295.960205078125, |
|
"logps/rejected": -308.38922119140625, |
|
"loss": 0.0607, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.5486451387405396, |
|
"rewards/margins": 6.254899024963379, |
|
"rewards/rejected": -6.803544044494629, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 1.9785686949866055e-07, |
|
"logits/chosen": -2.4441514015197754, |
|
"logits/rejected": -2.4188904762268066, |
|
"logps/chosen": -280.95782470703125, |
|
"logps/rejected": -308.5982360839844, |
|
"loss": 0.0612, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/chosen": -0.31675153970718384, |
|
"rewards/margins": 6.945546627044678, |
|
"rewards/rejected": -7.262298583984375, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 1.9594336012246458e-07, |
|
"logits/chosen": -2.518660545349121, |
|
"logits/rejected": -2.458202838897705, |
|
"logps/chosen": -288.21527099609375, |
|
"logps/rejected": -335.2455139160156, |
|
"loss": 0.0436, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.16030362248420715, |
|
"rewards/margins": 6.654341697692871, |
|
"rewards/rejected": -6.814645290374756, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 1.9402985074626865e-07, |
|
"logits/chosen": -2.5323879718780518, |
|
"logits/rejected": -2.4646458625793457, |
|
"logps/chosen": -253.74435424804688, |
|
"logps/rejected": -308.44464111328125, |
|
"loss": 0.0475, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/chosen": -0.6830048561096191, |
|
"rewards/margins": 6.6035966873168945, |
|
"rewards/rejected": -7.286602020263672, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 1.921163413700727e-07, |
|
"logits/chosen": -2.5229687690734863, |
|
"logits/rejected": -2.4853615760803223, |
|
"logps/chosen": -314.48468017578125, |
|
"logps/rejected": -311.384521484375, |
|
"loss": 0.0468, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8841110467910767, |
|
"rewards/margins": 6.407034873962402, |
|
"rewards/rejected": -7.291146278381348, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"eval_logits/chosen": -2.539219379425049, |
|
"eval_logits/rejected": -2.449059009552002, |
|
"eval_logps/chosen": -298.44427490234375, |
|
"eval_logps/rejected": -282.5741882324219, |
|
"eval_loss": 0.5758858919143677, |
|
"eval_rewards/accuracies": 0.7480000257492065, |
|
"eval_rewards/chosen": -2.4597108364105225, |
|
"eval_rewards/margins": 2.8309402465820312, |
|
"eval_rewards/rejected": -5.290651321411133, |
|
"eval_runtime": 329.9748, |
|
"eval_samples_per_second": 6.061, |
|
"eval_steps_per_second": 0.379, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 1.9020283199387677e-07, |
|
"logits/chosen": -2.4514074325561523, |
|
"logits/rejected": -2.3297629356384277, |
|
"logps/chosen": -287.038330078125, |
|
"logps/rejected": -299.0102233886719, |
|
"loss": 0.0499, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -1.0671433210372925, |
|
"rewards/margins": 5.865691184997559, |
|
"rewards/rejected": -6.932834625244141, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 1.8828932261768083e-07, |
|
"logits/chosen": -2.4341206550598145, |
|
"logits/rejected": -2.371324300765991, |
|
"logps/chosen": -282.197998046875, |
|
"logps/rejected": -301.5996398925781, |
|
"loss": 0.0463, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -1.2245758771896362, |
|
"rewards/margins": 6.24572229385376, |
|
"rewards/rejected": -7.470297813415527, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 1.8637581324148487e-07, |
|
"logits/chosen": -2.438136577606201, |
|
"logits/rejected": -2.3593366146087646, |
|
"logps/chosen": -286.82855224609375, |
|
"logps/rejected": -316.28240966796875, |
|
"loss": 0.0489, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -1.0146198272705078, |
|
"rewards/margins": 6.5871477127075195, |
|
"rewards/rejected": -7.601768493652344, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.8446230386528893e-07, |
|
"logits/chosen": -2.383092164993286, |
|
"logits/rejected": -2.2963826656341553, |
|
"logps/chosen": -294.37200927734375, |
|
"logps/rejected": -295.9538879394531, |
|
"loss": 0.0617, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": -0.6222116947174072, |
|
"rewards/margins": 6.908046722412109, |
|
"rewards/rejected": -7.530259132385254, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 1.82548794489093e-07, |
|
"logits/chosen": -2.4570600986480713, |
|
"logits/rejected": -2.4170963764190674, |
|
"logps/chosen": -294.67333984375, |
|
"logps/rejected": -328.25335693359375, |
|
"loss": 0.0567, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.6761181950569153, |
|
"rewards/margins": 6.681130886077881, |
|
"rewards/rejected": -7.3572492599487305, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 1.8063528511289706e-07, |
|
"logits/chosen": -2.4279465675354004, |
|
"logits/rejected": -2.3740410804748535, |
|
"logps/chosen": -305.5331726074219, |
|
"logps/rejected": -321.54974365234375, |
|
"loss": 0.0629, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": -0.977795422077179, |
|
"rewards/margins": 6.151275634765625, |
|
"rewards/rejected": -7.129071235656738, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 1.7872177573670112e-07, |
|
"logits/chosen": -2.4284489154815674, |
|
"logits/rejected": -2.3778624534606934, |
|
"logps/chosen": -238.752685546875, |
|
"logps/rejected": -279.09161376953125, |
|
"loss": 0.0655, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -1.1264437437057495, |
|
"rewards/margins": 5.814446926116943, |
|
"rewards/rejected": -6.940890312194824, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 1.7680826636050515e-07, |
|
"logits/chosen": -2.4329612255096436, |
|
"logits/rejected": -2.374427080154419, |
|
"logps/chosen": -259.0732116699219, |
|
"logps/rejected": -254.0245361328125, |
|
"loss": 0.0516, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -1.238419771194458, |
|
"rewards/margins": 5.634509086608887, |
|
"rewards/rejected": -6.872928619384766, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 1.7489475698430921e-07, |
|
"logits/chosen": -2.435520648956299, |
|
"logits/rejected": -2.4173076152801514, |
|
"logps/chosen": -271.8101501464844, |
|
"logps/rejected": -295.2347412109375, |
|
"loss": 0.0426, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -0.8866006731987, |
|
"rewards/margins": 6.229605674743652, |
|
"rewards/rejected": -7.116206169128418, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 1.7298124760811328e-07, |
|
"logits/chosen": -2.4372620582580566, |
|
"logits/rejected": -2.3834497928619385, |
|
"logps/chosen": -262.3973388671875, |
|
"logps/rejected": -304.9111022949219, |
|
"loss": 0.0576, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -1.2331212759017944, |
|
"rewards/margins": 5.813534259796143, |
|
"rewards/rejected": -7.046655178070068, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"eval_logits/chosen": -2.5015718936920166, |
|
"eval_logits/rejected": -2.413215160369873, |
|
"eval_logps/chosen": -299.8445739746094, |
|
"eval_logps/rejected": -282.8996887207031, |
|
"eval_loss": 0.5613510608673096, |
|
"eval_rewards/accuracies": 0.7620000243186951, |
|
"eval_rewards/chosen": -2.5997402667999268, |
|
"eval_rewards/margins": 2.7234604358673096, |
|
"eval_rewards/rejected": -5.323200702667236, |
|
"eval_runtime": 329.9108, |
|
"eval_samples_per_second": 6.062, |
|
"eval_steps_per_second": 0.379, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 1.7106773823191734e-07, |
|
"logits/chosen": -2.474686622619629, |
|
"logits/rejected": -2.3782389163970947, |
|
"logps/chosen": -324.77459716796875, |
|
"logps/rejected": -299.6976623535156, |
|
"loss": 0.0394, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -0.9665130376815796, |
|
"rewards/margins": 6.133960723876953, |
|
"rewards/rejected": -7.100473880767822, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 1.691542288557214e-07, |
|
"logits/chosen": -2.4789416790008545, |
|
"logits/rejected": -2.4072928428649902, |
|
"logps/chosen": -298.6253356933594, |
|
"logps/rejected": -285.716064453125, |
|
"loss": 0.0399, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/chosen": -1.0751765966415405, |
|
"rewards/margins": 5.9020466804504395, |
|
"rewards/rejected": -6.9772233963012695, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 1.6724071947952544e-07, |
|
"logits/chosen": -2.4440250396728516, |
|
"logits/rejected": -2.360992670059204, |
|
"logps/chosen": -285.7022705078125, |
|
"logps/rejected": -315.54937744140625, |
|
"loss": 0.0295, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -1.0514458417892456, |
|
"rewards/margins": 6.273679733276367, |
|
"rewards/rejected": -7.325125694274902, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 1.653272101033295e-07, |
|
"logits/chosen": -2.4839930534362793, |
|
"logits/rejected": -2.404003620147705, |
|
"logps/chosen": -268.47308349609375, |
|
"logps/rejected": -310.1471252441406, |
|
"loss": 0.0317, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -1.2032978534698486, |
|
"rewards/margins": 5.9852614402771, |
|
"rewards/rejected": -7.188559532165527, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 1.6341370072713356e-07, |
|
"logits/chosen": -2.52724289894104, |
|
"logits/rejected": -2.4334981441497803, |
|
"logps/chosen": -312.8106689453125, |
|
"logps/rejected": -323.5155334472656, |
|
"loss": 0.0278, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.9176046252250671, |
|
"rewards/margins": 6.5511651039123535, |
|
"rewards/rejected": -7.4687700271606445, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 1.6150019135093762e-07, |
|
"logits/chosen": -2.4501614570617676, |
|
"logits/rejected": -2.4063010215759277, |
|
"logps/chosen": -260.9747314453125, |
|
"logps/rejected": -286.7646179199219, |
|
"loss": 0.0239, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -1.3017902374267578, |
|
"rewards/margins": 6.399510860443115, |
|
"rewards/rejected": -7.701300144195557, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 1.5958668197474169e-07, |
|
"logits/chosen": -2.491021156311035, |
|
"logits/rejected": -2.484483480453491, |
|
"logps/chosen": -273.3662109375, |
|
"logps/rejected": -292.72528076171875, |
|
"loss": 0.0237, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -1.281099557876587, |
|
"rewards/margins": 6.190659523010254, |
|
"rewards/rejected": -7.471758842468262, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 1.5767317259854572e-07, |
|
"logits/chosen": -2.4878249168395996, |
|
"logits/rejected": -2.434704542160034, |
|
"logps/chosen": -300.8520812988281, |
|
"logps/rejected": -315.6512756347656, |
|
"loss": 0.0222, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.3595197200775146, |
|
"rewards/margins": 6.689205169677734, |
|
"rewards/rejected": -8.048724174499512, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 1.5575966322234978e-07, |
|
"logits/chosen": -2.46006441116333, |
|
"logits/rejected": -2.346818447113037, |
|
"logps/chosen": -302.2090759277344, |
|
"logps/rejected": -295.3673095703125, |
|
"loss": 0.0235, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.1756342649459839, |
|
"rewards/margins": 6.565530300140381, |
|
"rewards/rejected": -7.7411651611328125, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 1.5384615384615385e-07, |
|
"logits/chosen": -2.456608295440674, |
|
"logits/rejected": -2.355076313018799, |
|
"logps/chosen": -282.78973388671875, |
|
"logps/rejected": -310.10906982421875, |
|
"loss": 0.0135, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.484881043434143, |
|
"rewards/margins": 7.286185264587402, |
|
"rewards/rejected": -8.771065711975098, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"eval_logits/chosen": -2.5039713382720947, |
|
"eval_logits/rejected": -2.4052250385284424, |
|
"eval_logps/chosen": -305.83544921875, |
|
"eval_logps/rejected": -293.5166320800781, |
|
"eval_loss": 0.6182354688644409, |
|
"eval_rewards/accuracies": 0.7639999985694885, |
|
"eval_rewards/chosen": -3.198823928833008, |
|
"eval_rewards/margins": 3.186070442199707, |
|
"eval_rewards/rejected": -6.384894371032715, |
|
"eval_runtime": 330.4934, |
|
"eval_samples_per_second": 6.052, |
|
"eval_steps_per_second": 0.378, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 1.519326444699579e-07, |
|
"logits/chosen": -2.447910785675049, |
|
"logits/rejected": -2.3610103130340576, |
|
"logps/chosen": -282.10418701171875, |
|
"logps/rejected": -316.7162780761719, |
|
"loss": 0.0157, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -1.4366261959075928, |
|
"rewards/margins": 7.56749963760376, |
|
"rewards/rejected": -9.004125595092773, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 1.5001913509376197e-07, |
|
"logits/chosen": -2.4108386039733887, |
|
"logits/rejected": -2.3353419303894043, |
|
"logps/chosen": -258.7259826660156, |
|
"logps/rejected": -306.69049072265625, |
|
"loss": 0.0193, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -1.2959132194519043, |
|
"rewards/margins": 7.697202205657959, |
|
"rewards/rejected": -8.993115425109863, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 1.4810562571756603e-07, |
|
"logits/chosen": -2.437985897064209, |
|
"logits/rejected": -2.352027416229248, |
|
"logps/chosen": -304.39508056640625, |
|
"logps/rejected": -358.80987548828125, |
|
"loss": 0.0139, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.8380666971206665, |
|
"rewards/margins": 8.16486930847168, |
|
"rewards/rejected": -10.002935409545898, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 1.4619211634137007e-07, |
|
"logits/chosen": -2.495406150817871, |
|
"logits/rejected": -2.417428970336914, |
|
"logps/chosen": -327.63592529296875, |
|
"logps/rejected": -334.3495788574219, |
|
"loss": 0.0151, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -1.7903884649276733, |
|
"rewards/margins": 7.90988302230835, |
|
"rewards/rejected": -9.700272560119629, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 1.4427860696517413e-07, |
|
"logits/chosen": -2.4637489318847656, |
|
"logits/rejected": -2.3969979286193848, |
|
"logps/chosen": -277.05206298828125, |
|
"logps/rejected": -316.2105712890625, |
|
"loss": 0.0135, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -1.583605170249939, |
|
"rewards/margins": 8.034161567687988, |
|
"rewards/rejected": -9.617767333984375, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 1.423650975889782e-07, |
|
"logits/chosen": -2.4575247764587402, |
|
"logits/rejected": -2.3824923038482666, |
|
"logps/chosen": -284.96319580078125, |
|
"logps/rejected": -330.1680908203125, |
|
"loss": 0.0118, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -1.8364570140838623, |
|
"rewards/margins": 8.434205055236816, |
|
"rewards/rejected": -10.270662307739258, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 1.4045158821278225e-07, |
|
"logits/chosen": -2.43703031539917, |
|
"logits/rejected": -2.3671228885650635, |
|
"logps/chosen": -336.13751220703125, |
|
"logps/rejected": -354.32635498046875, |
|
"loss": 0.0176, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -1.1960761547088623, |
|
"rewards/margins": 8.524799346923828, |
|
"rewards/rejected": -9.720874786376953, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 1.3853807883658632e-07, |
|
"logits/chosen": -2.3543310165405273, |
|
"logits/rejected": -2.3394277095794678, |
|
"logps/chosen": -272.6856384277344, |
|
"logps/rejected": -339.7895812988281, |
|
"loss": 0.0073, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.8999465703964233, |
|
"rewards/margins": 8.533658981323242, |
|
"rewards/rejected": -10.433606147766113, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 1.3662456946039035e-07, |
|
"logits/chosen": -2.386059522628784, |
|
"logits/rejected": -2.2526228427886963, |
|
"logps/chosen": -311.24481201171875, |
|
"logps/rejected": -337.0218811035156, |
|
"loss": 0.0138, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.3375542163848877, |
|
"rewards/margins": 9.32975959777832, |
|
"rewards/rejected": -10.667314529418945, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 1.3471106008419441e-07, |
|
"logits/chosen": -2.4342336654663086, |
|
"logits/rejected": -2.3418126106262207, |
|
"logps/chosen": -309.2164611816406, |
|
"logps/rejected": -374.22576904296875, |
|
"loss": 0.0149, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.8751029968261719, |
|
"rewards/margins": 9.406895637512207, |
|
"rewards/rejected": -11.281997680664062, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"eval_logits/chosen": -2.4494409561157227, |
|
"eval_logits/rejected": -2.353502035140991, |
|
"eval_logps/chosen": -319.8072204589844, |
|
"eval_logps/rejected": -311.62286376953125, |
|
"eval_loss": 0.7074651122093201, |
|
"eval_rewards/accuracies": 0.7419999837875366, |
|
"eval_rewards/chosen": -4.59600305557251, |
|
"eval_rewards/margins": 3.599517345428467, |
|
"eval_rewards/rejected": -8.195520401000977, |
|
"eval_runtime": 330.3156, |
|
"eval_samples_per_second": 6.055, |
|
"eval_steps_per_second": 0.378, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 1.3279755070799848e-07, |
|
"logits/chosen": -2.337085247039795, |
|
"logits/rejected": -2.296482563018799, |
|
"logps/chosen": -290.27789306640625, |
|
"logps/rejected": -344.79730224609375, |
|
"loss": 0.0096, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -1.79486083984375, |
|
"rewards/margins": 9.907821655273438, |
|
"rewards/rejected": -11.702681541442871, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 1.3088404133180254e-07, |
|
"logits/chosen": -2.3764395713806152, |
|
"logits/rejected": -2.3238234519958496, |
|
"logps/chosen": -271.51763916015625, |
|
"logps/rejected": -333.38580322265625, |
|
"loss": 0.0058, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.068006992340088, |
|
"rewards/margins": 9.185879707336426, |
|
"rewards/rejected": -11.253885269165039, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 1.289705319556066e-07, |
|
"logits/chosen": -2.3385708332061768, |
|
"logits/rejected": -2.2395923137664795, |
|
"logps/chosen": -283.2237854003906, |
|
"logps/rejected": -340.7456970214844, |
|
"loss": 0.0137, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -1.4410890340805054, |
|
"rewards/margins": 9.802583694458008, |
|
"rewards/rejected": -11.243673324584961, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 1.2705702257941064e-07, |
|
"logits/chosen": -2.3051886558532715, |
|
"logits/rejected": -2.232696771621704, |
|
"logps/chosen": -284.79730224609375, |
|
"logps/rejected": -361.62896728515625, |
|
"loss": 0.0135, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -1.2478265762329102, |
|
"rewards/margins": 9.604982376098633, |
|
"rewards/rejected": -10.85280990600586, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 1.251435132032147e-07, |
|
"logits/chosen": -2.236691474914551, |
|
"logits/rejected": -2.1974494457244873, |
|
"logps/chosen": -269.29443359375, |
|
"logps/rejected": -338.17822265625, |
|
"loss": 0.0139, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -1.1111952066421509, |
|
"rewards/margins": 9.45576000213623, |
|
"rewards/rejected": -10.566953659057617, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 1.2323000382701873e-07, |
|
"logits/chosen": -2.350980520248413, |
|
"logits/rejected": -2.2944483757019043, |
|
"logps/chosen": -288.3419494628906, |
|
"logps/rejected": -323.5022277832031, |
|
"loss": 0.0118, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.3432337045669556, |
|
"rewards/margins": 9.604742050170898, |
|
"rewards/rejected": -10.947977066040039, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 1.213164944508228e-07, |
|
"logits/chosen": -2.3275721073150635, |
|
"logits/rejected": -2.25993013381958, |
|
"logps/chosen": -260.2298889160156, |
|
"logps/rejected": -314.8516845703125, |
|
"loss": 0.0103, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -1.1889066696166992, |
|
"rewards/margins": 8.959939956665039, |
|
"rewards/rejected": -10.148846626281738, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 1.1940298507462686e-07, |
|
"logits/chosen": -2.3323237895965576, |
|
"logits/rejected": -2.2373764514923096, |
|
"logps/chosen": -270.145751953125, |
|
"logps/rejected": -313.8666076660156, |
|
"loss": 0.0115, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.1815932989120483, |
|
"rewards/margins": 9.265039443969727, |
|
"rewards/rejected": -10.446632385253906, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 1.1748947569843092e-07, |
|
"logits/chosen": -2.3373894691467285, |
|
"logits/rejected": -2.2577757835388184, |
|
"logps/chosen": -290.20556640625, |
|
"logps/rejected": -351.8134765625, |
|
"loss": 0.0088, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -1.011973261833191, |
|
"rewards/margins": 9.961956977844238, |
|
"rewards/rejected": -10.973930358886719, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 1.1557596632223497e-07, |
|
"logits/chosen": -2.344902515411377, |
|
"logits/rejected": -2.2504470348358154, |
|
"logps/chosen": -266.8149719238281, |
|
"logps/rejected": -353.9951477050781, |
|
"loss": 0.0095, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.1190001964569092, |
|
"rewards/margins": 10.073897361755371, |
|
"rewards/rejected": -11.192898750305176, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"eval_logits/chosen": -2.397207736968994, |
|
"eval_logits/rejected": -2.2942862510681152, |
|
"eval_logps/chosen": -315.9493408203125, |
|
"eval_logps/rejected": -307.45587158203125, |
|
"eval_loss": 0.7116624712944031, |
|
"eval_rewards/accuracies": 0.7540000081062317, |
|
"eval_rewards/chosen": -4.210216999053955, |
|
"eval_rewards/margins": 3.5686044692993164, |
|
"eval_rewards/rejected": -7.778822422027588, |
|
"eval_runtime": 329.6201, |
|
"eval_samples_per_second": 6.068, |
|
"eval_steps_per_second": 0.379, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 1.1366245694603903e-07, |
|
"logits/chosen": -2.30245304107666, |
|
"logits/rejected": -2.233797073364258, |
|
"logps/chosen": -265.81402587890625, |
|
"logps/rejected": -341.81341552734375, |
|
"loss": 0.0119, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -1.4100592136383057, |
|
"rewards/margins": 10.045531272888184, |
|
"rewards/rejected": -11.455589294433594, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 1.1174894756984308e-07, |
|
"logits/chosen": -2.3736674785614014, |
|
"logits/rejected": -2.2465882301330566, |
|
"logps/chosen": -302.5309143066406, |
|
"logps/rejected": -323.94378662109375, |
|
"loss": 0.0054, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.3777319192886353, |
|
"rewards/margins": 9.126462936401367, |
|
"rewards/rejected": -10.504194259643555, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 1.0983543819364714e-07, |
|
"logits/chosen": -2.2658305168151855, |
|
"logits/rejected": -2.225450038909912, |
|
"logps/chosen": -304.5056457519531, |
|
"logps/rejected": -348.7767639160156, |
|
"loss": 0.0114, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -1.0617547035217285, |
|
"rewards/margins": 9.54466724395752, |
|
"rewards/rejected": -10.60642147064209, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 1.079219288174512e-07, |
|
"logits/chosen": -2.2534327507019043, |
|
"logits/rejected": -2.2105424404144287, |
|
"logps/chosen": -306.9606018066406, |
|
"logps/rejected": -354.7109375, |
|
"loss": 0.0128, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.238394021987915, |
|
"rewards/margins": 9.64238166809082, |
|
"rewards/rejected": -10.880776405334473, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 1.0600841944125525e-07, |
|
"logits/chosen": -2.3249001502990723, |
|
"logits/rejected": -2.2780890464782715, |
|
"logps/chosen": -268.6153564453125, |
|
"logps/rejected": -346.42047119140625, |
|
"loss": 0.0061, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.2436628341674805, |
|
"rewards/margins": 9.205244064331055, |
|
"rewards/rejected": -10.448905944824219, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 1.0409491006505931e-07, |
|
"logits/chosen": -2.3749475479125977, |
|
"logits/rejected": -2.277677059173584, |
|
"logps/chosen": -300.11895751953125, |
|
"logps/rejected": -348.1705017089844, |
|
"loss": 0.0061, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -1.0702159404754639, |
|
"rewards/margins": 9.171914100646973, |
|
"rewards/rejected": -10.2421293258667, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 1.0218140068886336e-07, |
|
"logits/chosen": -2.2845263481140137, |
|
"logits/rejected": -2.197556972503662, |
|
"logps/chosen": -277.10174560546875, |
|
"logps/rejected": -324.47216796875, |
|
"loss": 0.01, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -1.2265087366104126, |
|
"rewards/margins": 9.168752670288086, |
|
"rewards/rejected": -10.395261764526367, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 1.0026789131266743e-07, |
|
"logits/chosen": -2.3598082065582275, |
|
"logits/rejected": -2.265746593475342, |
|
"logps/chosen": -294.85296630859375, |
|
"logps/rejected": -352.6213073730469, |
|
"loss": 0.0095, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.5622330904006958, |
|
"rewards/margins": 9.054221153259277, |
|
"rewards/rejected": -10.616453170776367, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 9.835438193647149e-08, |
|
"logits/chosen": -2.3179988861083984, |
|
"logits/rejected": -2.2776644229888916, |
|
"logps/chosen": -301.02130126953125, |
|
"logps/rejected": -350.03765869140625, |
|
"loss": 0.0076, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.5659319162368774, |
|
"rewards/margins": 9.565340995788574, |
|
"rewards/rejected": -11.13127326965332, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 9.644087256027554e-08, |
|
"logits/chosen": -2.3582870960235596, |
|
"logits/rejected": -2.267897367477417, |
|
"logps/chosen": -298.76849365234375, |
|
"logps/rejected": -371.87237548828125, |
|
"loss": 0.0104, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -1.6058155298233032, |
|
"rewards/margins": 9.32790470123291, |
|
"rewards/rejected": -10.93371868133545, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"eval_logits/chosen": -2.409748077392578, |
|
"eval_logits/rejected": -2.3097164630889893, |
|
"eval_logps/chosen": -317.2179870605469, |
|
"eval_logps/rejected": -308.91986083984375, |
|
"eval_loss": 0.7131382822990417, |
|
"eval_rewards/accuracies": 0.7540000081062317, |
|
"eval_rewards/chosen": -4.337080955505371, |
|
"eval_rewards/margins": 3.5881383419036865, |
|
"eval_rewards/rejected": -7.92521858215332, |
|
"eval_runtime": 330.8382, |
|
"eval_samples_per_second": 6.045, |
|
"eval_steps_per_second": 0.378, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 9.45273631840796e-08, |
|
"logits/chosen": -2.34238862991333, |
|
"logits/rejected": -2.2983975410461426, |
|
"logps/chosen": -308.1969299316406, |
|
"logps/rejected": -335.4678955078125, |
|
"loss": 0.008, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -1.8870359659194946, |
|
"rewards/margins": 9.149595260620117, |
|
"rewards/rejected": -11.03663158416748, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 9.261385380788366e-08, |
|
"logits/chosen": -2.4131252765655518, |
|
"logits/rejected": -2.2457823753356934, |
|
"logps/chosen": -327.6688537597656, |
|
"logps/rejected": -347.8465576171875, |
|
"loss": 0.0113, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -1.6184027194976807, |
|
"rewards/margins": 9.20851993560791, |
|
"rewards/rejected": -10.826921463012695, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 9.070034443168771e-08, |
|
"logits/chosen": -2.3476271629333496, |
|
"logits/rejected": -2.217395067214966, |
|
"logps/chosen": -293.73785400390625, |
|
"logps/rejected": -336.8843994140625, |
|
"loss": 0.0077, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -2.0833864212036133, |
|
"rewards/margins": 9.448251724243164, |
|
"rewards/rejected": -11.531637191772461, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 8.878683505549177e-08, |
|
"logits/chosen": -2.3457190990448, |
|
"logits/rejected": -2.294241428375244, |
|
"logps/chosen": -289.23876953125, |
|
"logps/rejected": -358.06390380859375, |
|
"loss": 0.012, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.4211084842681885, |
|
"rewards/margins": 9.614474296569824, |
|
"rewards/rejected": -11.035581588745117, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 8.687332567929582e-08, |
|
"logits/chosen": -2.321498394012451, |
|
"logits/rejected": -2.2159245014190674, |
|
"logps/chosen": -276.77105712890625, |
|
"logps/rejected": -323.2955627441406, |
|
"loss": 0.0122, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -1.7652084827423096, |
|
"rewards/margins": 8.866913795471191, |
|
"rewards/rejected": -10.632121086120605, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 8.495981630309988e-08, |
|
"logits/chosen": -2.3643240928649902, |
|
"logits/rejected": -2.2679874897003174, |
|
"logps/chosen": -282.6573181152344, |
|
"logps/rejected": -344.65411376953125, |
|
"loss": 0.0067, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -1.9658873081207275, |
|
"rewards/margins": 9.22823715209961, |
|
"rewards/rejected": -11.194124221801758, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 8.304630692690395e-08, |
|
"logits/chosen": -2.252459764480591, |
|
"logits/rejected": -2.1973116397857666, |
|
"logps/chosen": -299.83819580078125, |
|
"logps/rejected": -377.33172607421875, |
|
"loss": 0.0138, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/chosen": -1.9341270923614502, |
|
"rewards/margins": 9.789133071899414, |
|
"rewards/rejected": -11.723260879516602, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 8.1132797550708e-08, |
|
"logits/chosen": -2.3481366634368896, |
|
"logits/rejected": -2.285489320755005, |
|
"logps/chosen": -295.62939453125, |
|
"logps/rejected": -349.37347412109375, |
|
"loss": 0.0124, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -1.7781429290771484, |
|
"rewards/margins": 9.523946762084961, |
|
"rewards/rejected": -11.302090644836426, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 7.921928817451206e-08, |
|
"logits/chosen": -2.2330024242401123, |
|
"logits/rejected": -2.1658129692077637, |
|
"logps/chosen": -266.7092590332031, |
|
"logps/rejected": -324.52203369140625, |
|
"loss": 0.0034, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.702344298362732, |
|
"rewards/margins": 9.572081565856934, |
|
"rewards/rejected": -11.274426460266113, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 7.73057787983161e-08, |
|
"logits/chosen": -2.310683488845825, |
|
"logits/rejected": -2.223453998565674, |
|
"logps/chosen": -284.54583740234375, |
|
"logps/rejected": -350.00469970703125, |
|
"loss": 0.008, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -1.8044601678848267, |
|
"rewards/margins": 9.359917640686035, |
|
"rewards/rejected": -11.164377212524414, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"eval_logits/chosen": -2.376404047012329, |
|
"eval_logits/rejected": -2.2755658626556396, |
|
"eval_logps/chosen": -318.2084045410156, |
|
"eval_logps/rejected": -311.3636474609375, |
|
"eval_loss": 0.7328027486801147, |
|
"eval_rewards/accuracies": 0.7519999742507935, |
|
"eval_rewards/chosen": -4.436122894287109, |
|
"eval_rewards/margins": 3.7334771156311035, |
|
"eval_rewards/rejected": -8.169599533081055, |
|
"eval_runtime": 330.2894, |
|
"eval_samples_per_second": 6.055, |
|
"eval_steps_per_second": 0.378, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 7.539226942212017e-08, |
|
"logits/chosen": -2.2334113121032715, |
|
"logits/rejected": -2.171754837036133, |
|
"logps/chosen": -278.3266906738281, |
|
"logps/rejected": -326.1250915527344, |
|
"loss": 0.0105, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -1.8271163702011108, |
|
"rewards/margins": 9.274404525756836, |
|
"rewards/rejected": -11.101519584655762, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 7.347876004592423e-08, |
|
"logits/chosen": -2.2726237773895264, |
|
"logits/rejected": -2.2070040702819824, |
|
"logps/chosen": -294.99591064453125, |
|
"logps/rejected": -365.6657409667969, |
|
"loss": 0.0075, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.223010778427124, |
|
"rewards/margins": 9.6091890335083, |
|
"rewards/rejected": -11.832199096679688, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 7.156525066972828e-08, |
|
"logits/chosen": -2.232114553451538, |
|
"logits/rejected": -2.1668314933776855, |
|
"logps/chosen": -302.2503662109375, |
|
"logps/rejected": -340.13568115234375, |
|
"loss": 0.0112, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -1.7910659313201904, |
|
"rewards/margins": 9.791200637817383, |
|
"rewards/rejected": -11.582267761230469, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 6.965174129353234e-08, |
|
"logits/chosen": -2.3309519290924072, |
|
"logits/rejected": -2.1995091438293457, |
|
"logps/chosen": -290.02789306640625, |
|
"logps/rejected": -338.7962341308594, |
|
"loss": 0.0089, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.841522216796875, |
|
"rewards/margins": 9.228216171264648, |
|
"rewards/rejected": -11.069738388061523, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 6.773823191733639e-08, |
|
"logits/chosen": -2.3205971717834473, |
|
"logits/rejected": -2.2115180492401123, |
|
"logps/chosen": -326.6658020019531, |
|
"logps/rejected": -337.70684814453125, |
|
"loss": 0.0066, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.852582573890686, |
|
"rewards/margins": 9.197256088256836, |
|
"rewards/rejected": -11.049838066101074, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 6.582472254114045e-08, |
|
"logits/chosen": -2.294541835784912, |
|
"logits/rejected": -2.1916394233703613, |
|
"logps/chosen": -262.17401123046875, |
|
"logps/rejected": -323.60498046875, |
|
"loss": 0.0139, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.84153151512146, |
|
"rewards/margins": 9.305184364318848, |
|
"rewards/rejected": -11.14671516418457, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 6.391121316494451e-08, |
|
"logits/chosen": -2.3827064037323, |
|
"logits/rejected": -2.2606282234191895, |
|
"logps/chosen": -295.3243408203125, |
|
"logps/rejected": -343.00860595703125, |
|
"loss": 0.0082, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -1.9150327444076538, |
|
"rewards/margins": 9.581887245178223, |
|
"rewards/rejected": -11.496919631958008, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 6.199770378874856e-08, |
|
"logits/chosen": -2.2721750736236572, |
|
"logits/rejected": -2.233607292175293, |
|
"logps/chosen": -286.4488220214844, |
|
"logps/rejected": -361.966552734375, |
|
"loss": 0.01, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.6838099956512451, |
|
"rewards/margins": 10.286967277526855, |
|
"rewards/rejected": -11.97077751159668, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 6.008419441255262e-08, |
|
"logits/chosen": -2.2622904777526855, |
|
"logits/rejected": -2.212773084640503, |
|
"logps/chosen": -274.4309997558594, |
|
"logps/rejected": -340.4100646972656, |
|
"loss": 0.0179, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -1.554031252861023, |
|
"rewards/margins": 9.818443298339844, |
|
"rewards/rejected": -11.372475624084473, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 5.817068503635668e-08, |
|
"logits/chosen": -2.3351638317108154, |
|
"logits/rejected": -2.226699113845825, |
|
"logps/chosen": -269.5257568359375, |
|
"logps/rejected": -310.54815673828125, |
|
"loss": 0.0051, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.2515082359313965, |
|
"rewards/margins": 8.843351364135742, |
|
"rewards/rejected": -11.094860076904297, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"eval_logits/chosen": -2.418520927429199, |
|
"eval_logits/rejected": -2.3138442039489746, |
|
"eval_logps/chosen": -316.7310791015625, |
|
"eval_logps/rejected": -309.56011962890625, |
|
"eval_loss": 0.7192761898040771, |
|
"eval_rewards/accuracies": 0.7599999904632568, |
|
"eval_rewards/chosen": -4.288391590118408, |
|
"eval_rewards/margins": 3.700854778289795, |
|
"eval_rewards/rejected": -7.989245891571045, |
|
"eval_runtime": 330.2015, |
|
"eval_samples_per_second": 6.057, |
|
"eval_steps_per_second": 0.379, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 5.6257175660160735e-08, |
|
"logits/chosen": -2.3566582202911377, |
|
"logits/rejected": -2.2767062187194824, |
|
"logps/chosen": -286.93414306640625, |
|
"logps/rejected": -343.0436706542969, |
|
"loss": 0.0041, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -1.3818962574005127, |
|
"rewards/margins": 9.171338081359863, |
|
"rewards/rejected": -10.553235054016113, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 5.4343666283964784e-08, |
|
"logits/chosen": -2.4049510955810547, |
|
"logits/rejected": -2.256659746170044, |
|
"logps/chosen": -307.3303527832031, |
|
"logps/rejected": -365.7108459472656, |
|
"loss": 0.0107, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -1.3092598915100098, |
|
"rewards/margins": 9.412524223327637, |
|
"rewards/rejected": -10.721784591674805, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 5.243015690776884e-08, |
|
"logits/chosen": -2.4375126361846924, |
|
"logits/rejected": -2.30391001701355, |
|
"logps/chosen": -343.348388671875, |
|
"logps/rejected": -355.0252990722656, |
|
"loss": 0.0072, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -1.578098177909851, |
|
"rewards/margins": 9.311563491821289, |
|
"rewards/rejected": -10.88966178894043, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 5.05166475315729e-08, |
|
"logits/chosen": -2.3526530265808105, |
|
"logits/rejected": -2.2642931938171387, |
|
"logps/chosen": -269.9921875, |
|
"logps/rejected": -338.1278381347656, |
|
"loss": 0.0048, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -1.4594029188156128, |
|
"rewards/margins": 9.540541648864746, |
|
"rewards/rejected": -10.999944686889648, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 4.860313815537696e-08, |
|
"logits/chosen": -2.3303122520446777, |
|
"logits/rejected": -2.2322537899017334, |
|
"logps/chosen": -304.40924072265625, |
|
"logps/rejected": -339.898681640625, |
|
"loss": 0.0102, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.252328395843506, |
|
"rewards/margins": 9.010406494140625, |
|
"rewards/rejected": -11.262735366821289, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 4.668962877918101e-08, |
|
"logits/chosen": -2.300126552581787, |
|
"logits/rejected": -2.1978871822357178, |
|
"logps/chosen": -281.3654479980469, |
|
"logps/rejected": -307.08197021484375, |
|
"loss": 0.0129, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -2.039625644683838, |
|
"rewards/margins": 8.738824844360352, |
|
"rewards/rejected": -10.778450012207031, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 4.477611940298507e-08, |
|
"logits/chosen": -2.3153433799743652, |
|
"logits/rejected": -2.2383222579956055, |
|
"logps/chosen": -313.5261535644531, |
|
"logps/rejected": -362.248779296875, |
|
"loss": 0.0111, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/chosen": -2.031290054321289, |
|
"rewards/margins": 9.838370323181152, |
|
"rewards/rejected": -11.869660377502441, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 4.2862610026789124e-08, |
|
"logits/chosen": -2.3045172691345215, |
|
"logits/rejected": -2.2299160957336426, |
|
"logps/chosen": -275.14276123046875, |
|
"logps/rejected": -345.14617919921875, |
|
"loss": 0.0074, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.9210354089736938, |
|
"rewards/margins": 9.726446151733398, |
|
"rewards/rejected": -11.647480964660645, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 4.0949100650593186e-08, |
|
"logits/chosen": -2.360119104385376, |
|
"logits/rejected": -2.2811567783355713, |
|
"logps/chosen": -291.0294494628906, |
|
"logps/rejected": -362.3855285644531, |
|
"loss": 0.009, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.178312063217163, |
|
"rewards/margins": 9.444883346557617, |
|
"rewards/rejected": -11.623196601867676, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 3.903559127439724e-08, |
|
"logits/chosen": -2.2991082668304443, |
|
"logits/rejected": -2.2151143550872803, |
|
"logps/chosen": -280.21710205078125, |
|
"logps/rejected": -339.8370056152344, |
|
"loss": 0.0089, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -2.628218173980713, |
|
"rewards/margins": 10.084501266479492, |
|
"rewards/rejected": -12.712719917297363, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"eval_logits/chosen": -2.395979881286621, |
|
"eval_logits/rejected": -2.2942333221435547, |
|
"eval_logps/chosen": -322.83795166015625, |
|
"eval_logps/rejected": -316.2196044921875, |
|
"eval_loss": 0.738807737827301, |
|
"eval_rewards/accuracies": 0.765999972820282, |
|
"eval_rewards/chosen": -4.899077892303467, |
|
"eval_rewards/margins": 3.7561144828796387, |
|
"eval_rewards/rejected": -8.655191421508789, |
|
"eval_runtime": 330.5088, |
|
"eval_samples_per_second": 6.051, |
|
"eval_steps_per_second": 0.378, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 3.71220818982013e-08, |
|
"logits/chosen": -2.360029935836792, |
|
"logits/rejected": -2.322143077850342, |
|
"logps/chosen": -303.7354736328125, |
|
"logps/rejected": -367.031005859375, |
|
"loss": 0.0054, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.0700087547302246, |
|
"rewards/margins": 9.75294017791748, |
|
"rewards/rejected": -11.822949409484863, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 3.520857252200535e-08, |
|
"logits/chosen": -2.272301197052002, |
|
"logits/rejected": -2.2256054878234863, |
|
"logps/chosen": -273.448486328125, |
|
"logps/rejected": -336.57305908203125, |
|
"loss": 0.0366, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -2.192540168762207, |
|
"rewards/margins": 9.447149276733398, |
|
"rewards/rejected": -11.639688491821289, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 3.3295063145809414e-08, |
|
"logits/chosen": -2.3102471828460693, |
|
"logits/rejected": -2.2527964115142822, |
|
"logps/chosen": -293.8779602050781, |
|
"logps/rejected": -362.58563232421875, |
|
"loss": 0.0113, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/chosen": -2.485677719116211, |
|
"rewards/margins": 9.276286125183105, |
|
"rewards/rejected": -11.761963844299316, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 3.138155376961347e-08, |
|
"logits/chosen": -2.342339038848877, |
|
"logits/rejected": -2.276078462600708, |
|
"logps/chosen": -300.9256286621094, |
|
"logps/rejected": -342.9737243652344, |
|
"loss": 0.0071, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.9704710245132446, |
|
"rewards/margins": 9.322904586791992, |
|
"rewards/rejected": -11.293375015258789, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 2.9468044393417525e-08, |
|
"logits/chosen": -2.323369264602661, |
|
"logits/rejected": -2.2281460762023926, |
|
"logps/chosen": -298.3914794921875, |
|
"logps/rejected": -334.6033020019531, |
|
"loss": 0.0077, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.0311081409454346, |
|
"rewards/margins": 9.329480171203613, |
|
"rewards/rejected": -11.360588073730469, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 2.755453501722158e-08, |
|
"logits/chosen": -2.251509189605713, |
|
"logits/rejected": -2.2324776649475098, |
|
"logps/chosen": -290.03106689453125, |
|
"logps/rejected": -339.38238525390625, |
|
"loss": 0.0105, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.435882091522217, |
|
"rewards/margins": 9.06352710723877, |
|
"rewards/rejected": -11.499407768249512, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 2.564102564102564e-08, |
|
"logits/chosen": -2.2469379901885986, |
|
"logits/rejected": -2.1791176795959473, |
|
"logps/chosen": -287.87298583984375, |
|
"logps/rejected": -333.5580139160156, |
|
"loss": 0.0113, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -2.3411865234375, |
|
"rewards/margins": 9.410177230834961, |
|
"rewards/rejected": -11.751363754272461, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 2.3727516264829695e-08, |
|
"logits/chosen": -2.291459560394287, |
|
"logits/rejected": -2.226825475692749, |
|
"logps/chosen": -268.06280517578125, |
|
"logps/rejected": -356.3489990234375, |
|
"loss": 0.0061, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -2.386735677719116, |
|
"rewards/margins": 9.646692276000977, |
|
"rewards/rejected": -12.033427238464355, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 2.1814006888633754e-08, |
|
"logits/chosen": -2.290907382965088, |
|
"logits/rejected": -2.2021796703338623, |
|
"logps/chosen": -306.6482849121094, |
|
"logps/rejected": -370.08038330078125, |
|
"loss": 0.005, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -2.622554302215576, |
|
"rewards/margins": 9.027674674987793, |
|
"rewards/rejected": -11.650228500366211, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 1.990049751243781e-08, |
|
"logits/chosen": -2.3071415424346924, |
|
"logits/rejected": -2.198819637298584, |
|
"logps/chosen": -302.01904296875, |
|
"logps/rejected": -338.0803527832031, |
|
"loss": 0.0082, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.463911294937134, |
|
"rewards/margins": 9.179804801940918, |
|
"rewards/rejected": -11.643716812133789, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"eval_logits/chosen": -2.364900588989258, |
|
"eval_logits/rejected": -2.2619574069976807, |
|
"eval_logps/chosen": -321.83087158203125, |
|
"eval_logps/rejected": -316.2637939453125, |
|
"eval_loss": 0.7342348694801331, |
|
"eval_rewards/accuracies": 0.7639999985694885, |
|
"eval_rewards/chosen": -4.798369407653809, |
|
"eval_rewards/margins": 3.8612406253814697, |
|
"eval_rewards/rejected": -8.659610748291016, |
|
"eval_runtime": 330.2098, |
|
"eval_samples_per_second": 6.057, |
|
"eval_steps_per_second": 0.379, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 1.7986988136241865e-08, |
|
"logits/chosen": -2.27158784866333, |
|
"logits/rejected": -2.264709949493408, |
|
"logps/chosen": -289.5348205566406, |
|
"logps/rejected": -370.21295166015625, |
|
"loss": 0.0065, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -2.4264960289001465, |
|
"rewards/margins": 8.935715675354004, |
|
"rewards/rejected": -11.362211227416992, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 1.6073478760045924e-08, |
|
"logits/chosen": -2.2701802253723145, |
|
"logits/rejected": -2.182992458343506, |
|
"logps/chosen": -302.4941101074219, |
|
"logps/rejected": -356.8041687011719, |
|
"loss": 0.0059, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -2.402383804321289, |
|
"rewards/margins": 8.9962739944458, |
|
"rewards/rejected": -11.398656845092773, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 1.4159969383849981e-08, |
|
"logits/chosen": -2.266104221343994, |
|
"logits/rejected": -2.209751605987549, |
|
"logps/chosen": -311.89373779296875, |
|
"logps/rejected": -364.10321044921875, |
|
"loss": 0.0079, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -1.8278541564941406, |
|
"rewards/margins": 9.237452507019043, |
|
"rewards/rejected": -11.06530475616455, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 1.2246460007654037e-08, |
|
"logits/chosen": -2.2711033821105957, |
|
"logits/rejected": -2.213872194290161, |
|
"logps/chosen": -296.39361572265625, |
|
"logps/rejected": -334.15985107421875, |
|
"loss": 0.0069, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.3090121746063232, |
|
"rewards/margins": 9.236477851867676, |
|
"rewards/rejected": -11.545488357543945, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 1.0332950631458094e-08, |
|
"logits/chosen": -2.3495352268218994, |
|
"logits/rejected": -2.28285813331604, |
|
"logps/chosen": -316.610595703125, |
|
"logps/rejected": -394.0874938964844, |
|
"loss": 0.0116, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.016587495803833, |
|
"rewards/margins": 9.51962947845459, |
|
"rewards/rejected": -11.536214828491211, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 8.419441255262151e-09, |
|
"logits/chosen": -2.369030237197876, |
|
"logits/rejected": -2.2844557762145996, |
|
"logps/chosen": -275.51312255859375, |
|
"logps/rejected": -343.92144775390625, |
|
"loss": 0.006, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.705946445465088, |
|
"rewards/margins": 8.749194145202637, |
|
"rewards/rejected": -11.455141067504883, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 6.505931879066207e-09, |
|
"logits/chosen": -2.3274118900299072, |
|
"logits/rejected": -2.265143632888794, |
|
"logps/chosen": -333.2660827636719, |
|
"logps/rejected": -350.2222900390625, |
|
"loss": 0.0108, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -2.5637712478637695, |
|
"rewards/margins": 8.87217903137207, |
|
"rewards/rejected": -11.435951232910156, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 4.592422502870264e-09, |
|
"logits/chosen": -2.3058080673217773, |
|
"logits/rejected": -2.24003267288208, |
|
"logps/chosen": -289.5403747558594, |
|
"logps/rejected": -346.329345703125, |
|
"loss": 0.0151, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -2.4935154914855957, |
|
"rewards/margins": 8.715460777282715, |
|
"rewards/rejected": -11.208975791931152, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 2.6789131266743202e-09, |
|
"logits/chosen": -2.305800437927246, |
|
"logits/rejected": -2.183337926864624, |
|
"logps/chosen": -303.07989501953125, |
|
"logps/rejected": -338.078125, |
|
"loss": 0.0067, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.6788885593414307, |
|
"rewards/margins": 8.81675910949707, |
|
"rewards/rejected": -11.495648384094238, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 7.654037504783773e-10, |
|
"logits/chosen": -2.2422897815704346, |
|
"logits/rejected": -2.167163848876953, |
|
"logps/chosen": -308.4017639160156, |
|
"logps/rejected": -364.2879333496094, |
|
"loss": 0.0094, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -2.076625347137451, |
|
"rewards/margins": 9.432092666625977, |
|
"rewards/rejected": -11.50871753692627, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"eval_logits/chosen": -2.3625237941741943, |
|
"eval_logits/rejected": -2.259509325027466, |
|
"eval_logps/chosen": -321.42047119140625, |
|
"eval_logps/rejected": -315.8360595703125, |
|
"eval_loss": 0.7374239563941956, |
|
"eval_rewards/accuracies": 0.7580000162124634, |
|
"eval_rewards/chosen": -4.75732946395874, |
|
"eval_rewards/margins": 3.859508514404297, |
|
"eval_rewards/rejected": -8.616838455200195, |
|
"eval_runtime": 329.7799, |
|
"eval_samples_per_second": 6.065, |
|
"eval_steps_per_second": 0.379, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 2904, |
|
"total_flos": 0.0, |
|
"train_loss": 0.20145473461829064, |
|
"train_runtime": 66730.9037, |
|
"train_samples_per_second": 2.786, |
|
"train_steps_per_second": 0.044 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 2904, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"total_flos": 0.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|