|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.99968, |
|
"eval_steps": 100, |
|
"global_step": 1562, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 3.184713375796179e-08, |
|
"logits/chosen": -0.9295870065689087, |
|
"logits/rejected": -0.43873703479766846, |
|
"logps/chosen": -320.5160827636719, |
|
"logps/rejected": -293.4969482421875, |
|
"loss": 0.0436, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.1847133757961787e-07, |
|
"logits/chosen": -0.8594987988471985, |
|
"logits/rejected": -0.762122631072998, |
|
"logps/chosen": -363.58154296875, |
|
"logps/rejected": -320.7466735839844, |
|
"loss": 0.1005, |
|
"rewards/accuracies": 0.4444444477558136, |
|
"rewards/chosen": 0.00017958095122594386, |
|
"rewards/margins": 0.00031936122104525566, |
|
"rewards/rejected": -0.0001397802698193118, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 6.369426751592357e-07, |
|
"logits/chosen": -0.8261665105819702, |
|
"logits/rejected": -0.8099607229232788, |
|
"logps/chosen": -329.95257568359375, |
|
"logps/rejected": -333.58843994140625, |
|
"loss": 0.0881, |
|
"rewards/accuracies": 0.3812499940395355, |
|
"rewards/chosen": 1.5792587873875163e-05, |
|
"rewards/margins": 9.325530118076131e-05, |
|
"rewards/rejected": -7.746272603981197e-05, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 9.554140127388537e-07, |
|
"logits/chosen": -0.8844400644302368, |
|
"logits/rejected": -0.8693345189094543, |
|
"logps/chosen": -266.14617919921875, |
|
"logps/rejected": -239.8477020263672, |
|
"loss": 0.1076, |
|
"rewards/accuracies": 0.33125001192092896, |
|
"rewards/chosen": -0.00019076233729720116, |
|
"rewards/margins": -0.00011497503146529198, |
|
"rewards/rejected": -7.578730583190918e-05, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.2738853503184715e-06, |
|
"logits/chosen": -0.9354842305183411, |
|
"logits/rejected": -0.8044538497924805, |
|
"logps/chosen": -337.16436767578125, |
|
"logps/rejected": -294.0509338378906, |
|
"loss": 0.1014, |
|
"rewards/accuracies": 0.40625, |
|
"rewards/chosen": -0.0002183823671657592, |
|
"rewards/margins": 0.00015057336713653058, |
|
"rewards/rejected": -0.00036895571975037456, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.5923566878980892e-06, |
|
"logits/chosen": -0.9226251840591431, |
|
"logits/rejected": -0.7269760966300964, |
|
"logps/chosen": -291.8105773925781, |
|
"logps/rejected": -249.9472198486328, |
|
"loss": 0.0984, |
|
"rewards/accuracies": 0.35624998807907104, |
|
"rewards/chosen": -1.6449857866973616e-05, |
|
"rewards/margins": -4.999707016395405e-05, |
|
"rewards/rejected": 3.354722139192745e-05, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.9108280254777074e-06, |
|
"logits/chosen": -0.7843996286392212, |
|
"logits/rejected": -0.8503357768058777, |
|
"logps/chosen": -314.31842041015625, |
|
"logps/rejected": -272.9629821777344, |
|
"loss": 0.0831, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.0004891738062724471, |
|
"rewards/margins": 3.405969255254604e-05, |
|
"rewards/rejected": -0.0005232334951870143, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.229299363057325e-06, |
|
"logits/chosen": -0.8444937467575073, |
|
"logits/rejected": -0.8687158823013306, |
|
"logps/chosen": -346.148193359375, |
|
"logps/rejected": -314.0673828125, |
|
"loss": 0.101, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.0008814434404484928, |
|
"rewards/margins": 0.0003942731418646872, |
|
"rewards/rejected": -0.00127571658231318, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.547770700636943e-06, |
|
"logits/chosen": -0.7672846913337708, |
|
"logits/rejected": -0.8321496844291687, |
|
"logps/chosen": -308.66973876953125, |
|
"logps/rejected": -290.4983215332031, |
|
"loss": 0.0897, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.0009476385894231498, |
|
"rewards/margins": 0.0005286627565510571, |
|
"rewards/rejected": -0.0014763014623895288, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.8662420382165605e-06, |
|
"logits/chosen": -0.9202607870101929, |
|
"logits/rejected": -0.6941283345222473, |
|
"logps/chosen": -294.6612854003906, |
|
"logps/rejected": -279.28497314453125, |
|
"loss": 0.0967, |
|
"rewards/accuracies": 0.4124999940395355, |
|
"rewards/chosen": -0.0010531718144193292, |
|
"rewards/margins": 0.0009010445210151374, |
|
"rewards/rejected": -0.0019542162772268057, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.1847133757961785e-06, |
|
"logits/chosen": -0.8830461502075195, |
|
"logits/rejected": -0.7728676795959473, |
|
"logps/chosen": -346.04144287109375, |
|
"logps/rejected": -351.6465759277344, |
|
"loss": 0.098, |
|
"rewards/accuracies": 0.4312500059604645, |
|
"rewards/chosen": -0.0019343973835930228, |
|
"rewards/margins": 0.0005748984985984862, |
|
"rewards/rejected": -0.00250929594039917, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"eval_logits/chosen": -0.8170500993728638, |
|
"eval_logits/rejected": -0.7225118279457092, |
|
"eval_logps/chosen": -399.25030517578125, |
|
"eval_logps/rejected": -370.84332275390625, |
|
"eval_loss": 0.053326018154621124, |
|
"eval_rewards/accuracies": 0.49799999594688416, |
|
"eval_rewards/chosen": -0.002945071319118142, |
|
"eval_rewards/margins": 0.0006628122646361589, |
|
"eval_rewards/rejected": -0.0036078833509236574, |
|
"eval_runtime": 539.8002, |
|
"eval_samples_per_second": 3.705, |
|
"eval_steps_per_second": 0.926, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.5031847133757964e-06, |
|
"logits/chosen": -0.8748549222946167, |
|
"logits/rejected": -0.8573201298713684, |
|
"logps/chosen": -356.620361328125, |
|
"logps/rejected": -334.7717590332031, |
|
"loss": 0.0783, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -0.0018858186667785048, |
|
"rewards/margins": 0.0009479810250923038, |
|
"rewards/rejected": -0.0028337999247014523, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.821656050955415e-06, |
|
"logits/chosen": -0.9678497314453125, |
|
"logits/rejected": -0.7722519636154175, |
|
"logps/chosen": -313.03741455078125, |
|
"logps/rejected": -280.0140686035156, |
|
"loss": 0.1122, |
|
"rewards/accuracies": 0.35624998807907104, |
|
"rewards/chosen": -0.0022745749447494745, |
|
"rewards/margins": 0.0005548128974623978, |
|
"rewards/rejected": -0.0028293877840042114, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.140127388535032e-06, |
|
"logits/chosen": -0.8600385785102844, |
|
"logits/rejected": -0.8846877813339233, |
|
"logps/chosen": -355.20513916015625, |
|
"logps/rejected": -346.33135986328125, |
|
"loss": 0.098, |
|
"rewards/accuracies": 0.3812499940395355, |
|
"rewards/chosen": -0.005060167517513037, |
|
"rewards/margins": -0.00011312137212371454, |
|
"rewards/rejected": -0.004947046283632517, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.45859872611465e-06, |
|
"logits/chosen": -0.848353385925293, |
|
"logits/rejected": -0.8213680386543274, |
|
"logps/chosen": -355.6943054199219, |
|
"logps/rejected": -313.1587829589844, |
|
"loss": 0.0975, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.005685682408511639, |
|
"rewards/margins": 0.0019586696289479733, |
|
"rewards/rejected": -0.007644351571798325, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.777070063694268e-06, |
|
"logits/chosen": -0.9287412762641907, |
|
"logits/rejected": -0.8769465684890747, |
|
"logps/chosen": -329.2186584472656, |
|
"logps/rejected": -298.8802795410156, |
|
"loss": 0.0712, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.00595915038138628, |
|
"rewards/margins": 0.0024761247914284468, |
|
"rewards/rejected": -0.00843527540564537, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.999943753177818e-06, |
|
"logits/chosen": -1.005480170249939, |
|
"logits/rejected": -0.9711716771125793, |
|
"logps/chosen": -311.71209716796875, |
|
"logps/rejected": -299.3721923828125, |
|
"loss": 0.0886, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.00825573317706585, |
|
"rewards/margins": 0.0037352764047682285, |
|
"rewards/rejected": -0.011991010047495365, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.998943880079481e-06, |
|
"logits/chosen": -0.9299044609069824, |
|
"logits/rejected": -0.9373615384101868, |
|
"logps/chosen": -343.8020324707031, |
|
"logps/rejected": -338.0310974121094, |
|
"loss": 0.0924, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.01192331500351429, |
|
"rewards/margins": 0.0040515875443816185, |
|
"rewards/rejected": -0.015974899753928185, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.99669465299937e-06, |
|
"logits/chosen": -0.9855674505233765, |
|
"logits/rejected": -0.9634091258049011, |
|
"logps/chosen": -309.28851318359375, |
|
"logps/rejected": -315.94561767578125, |
|
"loss": 0.0928, |
|
"rewards/accuracies": 0.45625001192092896, |
|
"rewards/chosen": -0.015265477821230888, |
|
"rewards/margins": 0.005659168586134911, |
|
"rewards/rejected": -0.0209246464073658, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.993197196444851e-06, |
|
"logits/chosen": -1.0295236110687256, |
|
"logits/rejected": -1.1208027601242065, |
|
"logps/chosen": -317.75494384765625, |
|
"logps/rejected": -328.9816589355469, |
|
"loss": 0.0805, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -0.020896239206194878, |
|
"rewards/margins": 0.007452984340488911, |
|
"rewards/rejected": -0.028349224478006363, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.988453258979111e-06, |
|
"logits/chosen": -1.1082799434661865, |
|
"logits/rejected": -1.0890588760375977, |
|
"logps/chosen": -350.9955139160156, |
|
"logps/rejected": -370.35955810546875, |
|
"loss": 0.094, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.02722758986055851, |
|
"rewards/margins": 0.009304001927375793, |
|
"rewards/rejected": -0.036531589925289154, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_logits/chosen": -1.1387817859649658, |
|
"eval_logits/rejected": -1.0754390954971313, |
|
"eval_logps/chosen": -435.2693176269531, |
|
"eval_logps/rejected": -419.6948547363281, |
|
"eval_loss": 0.04912900552153587, |
|
"eval_rewards/accuracies": 0.5525000095367432, |
|
"eval_rewards/chosen": -0.03896407037973404, |
|
"eval_rewards/margins": 0.013495376333594322, |
|
"eval_rewards/rejected": -0.05245944485068321, |
|
"eval_runtime": 539.703, |
|
"eval_samples_per_second": 3.706, |
|
"eval_steps_per_second": 0.926, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.982465212346954e-06, |
|
"logits/chosen": -1.1744426488876343, |
|
"logits/rejected": -1.174753189086914, |
|
"logps/chosen": -324.98370361328125, |
|
"logps/rejected": -325.42767333984375, |
|
"loss": 0.1019, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -0.031165916472673416, |
|
"rewards/margins": 0.018221553415060043, |
|
"rewards/rejected": -0.04938746988773346, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.975236050289041e-06, |
|
"logits/chosen": -1.1808674335479736, |
|
"logits/rejected": -1.0834966897964478, |
|
"logps/chosen": -334.90087890625, |
|
"logps/rejected": -318.37060546875, |
|
"loss": 0.0697, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -0.028991717845201492, |
|
"rewards/margins": 0.01802077516913414, |
|
"rewards/rejected": -0.04701249301433563, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.96676938704516e-06, |
|
"logits/chosen": -1.1029061079025269, |
|
"logits/rejected": -1.1640408039093018, |
|
"logps/chosen": -435.73687744140625, |
|
"logps/rejected": -423.72900390625, |
|
"loss": 0.0749, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -0.040695372968912125, |
|
"rewards/margins": 0.014833291992545128, |
|
"rewards/rejected": -0.0555286630988121, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.95706945554728e-06, |
|
"logits/chosen": -1.1478805541992188, |
|
"logits/rejected": -1.2186776399612427, |
|
"logps/chosen": -370.34503173828125, |
|
"logps/rejected": -392.4799499511719, |
|
"loss": 0.0734, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.0380186066031456, |
|
"rewards/margins": 0.017158757895231247, |
|
"rewards/rejected": -0.05517736077308655, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.9461411053032805e-06, |
|
"logits/chosen": -1.1441445350646973, |
|
"logits/rejected": -1.1573059558868408, |
|
"logps/chosen": -357.66925048828125, |
|
"logps/rejected": -347.88848876953125, |
|
"loss": 0.1096, |
|
"rewards/accuracies": 0.4312500059604645, |
|
"rewards/chosen": -0.03414061293005943, |
|
"rewards/margins": 0.009822173975408077, |
|
"rewards/rejected": -0.043962787836790085, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.933989799972431e-06, |
|
"logits/chosen": -1.1844251155853271, |
|
"logits/rejected": -1.1023800373077393, |
|
"logps/chosen": -362.12890625, |
|
"logps/rejected": -344.55694580078125, |
|
"loss": 0.087, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.023841477930545807, |
|
"rewards/margins": 0.01648232527077198, |
|
"rewards/rejected": -0.04032380133867264, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.920621614633815e-06, |
|
"logits/chosen": -1.158454418182373, |
|
"logits/rejected": -1.1034467220306396, |
|
"logps/chosen": -350.0291748046875, |
|
"logps/rejected": -357.4220886230469, |
|
"loss": 0.094, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -0.02827698551118374, |
|
"rewards/margins": 0.01563875935971737, |
|
"rewards/rejected": -0.04391574487090111, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.906043232749081e-06, |
|
"logits/chosen": -1.1796420812606812, |
|
"logits/rejected": -0.9934653043746948, |
|
"logps/chosen": -337.7796325683594, |
|
"logps/rejected": -324.3747863769531, |
|
"loss": 0.0808, |
|
"rewards/accuracies": 0.45625001192092896, |
|
"rewards/chosen": -0.02835283800959587, |
|
"rewards/margins": 0.013374457135796547, |
|
"rewards/rejected": -0.04172729700803757, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.890261942821023e-06, |
|
"logits/chosen": -1.0947716236114502, |
|
"logits/rejected": -1.134075403213501, |
|
"logps/chosen": -331.79107666015625, |
|
"logps/rejected": -309.49609375, |
|
"loss": 0.1014, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -0.024125058203935623, |
|
"rewards/margins": 0.01503480039536953, |
|
"rewards/rejected": -0.0391598604619503, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.873285634749678e-06, |
|
"logits/chosen": -1.1100094318389893, |
|
"logits/rejected": -1.0816996097564697, |
|
"logps/chosen": -311.70867919921875, |
|
"logps/rejected": -315.1243591308594, |
|
"loss": 0.0898, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -0.017298461869359016, |
|
"rewards/margins": 0.013444353826344013, |
|
"rewards/rejected": -0.030742818489670753, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_logits/chosen": -1.0857516527175903, |
|
"eval_logits/rejected": -1.0291430950164795, |
|
"eval_logps/chosen": -414.7480163574219, |
|
"eval_logps/rejected": -407.5087585449219, |
|
"eval_loss": 0.045198723673820496, |
|
"eval_rewards/accuracies": 0.578000009059906, |
|
"eval_rewards/chosen": -0.018442772328853607, |
|
"eval_rewards/margins": 0.021830614656209946, |
|
"eval_rewards/rejected": -0.04027338698506355, |
|
"eval_runtime": 539.5827, |
|
"eval_samples_per_second": 3.707, |
|
"eval_steps_per_second": 0.927, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.855122795887746e-06, |
|
"logits/chosen": -1.1289124488830566, |
|
"logits/rejected": -1.06770658493042, |
|
"logps/chosen": -297.93218994140625, |
|
"logps/rejected": -279.66107177734375, |
|
"loss": 0.0967, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.0071363793686032295, |
|
"rewards/margins": 0.01606178656220436, |
|
"rewards/rejected": -0.023198166862130165, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.83578250679731e-06, |
|
"logits/chosen": -1.0398236513137817, |
|
"logits/rejected": -1.0716559886932373, |
|
"logps/chosen": -323.6569519042969, |
|
"logps/rejected": -321.41845703125, |
|
"loss": 0.1007, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.01684439741075039, |
|
"rewards/margins": 0.016590449959039688, |
|
"rewards/rejected": -0.03343484550714493, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.8152744367099935e-06, |
|
"logits/chosen": -1.13577139377594, |
|
"logits/rejected": -1.0597126483917236, |
|
"logps/chosen": -253.326171875, |
|
"logps/rejected": -260.37652587890625, |
|
"loss": 0.1324, |
|
"rewards/accuracies": 0.45625001192092896, |
|
"rewards/chosen": -0.011873602867126465, |
|
"rewards/margins": 0.021937990561127663, |
|
"rewards/rejected": -0.03381159156560898, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.793608838692792e-06, |
|
"logits/chosen": -1.05599844455719, |
|
"logits/rejected": -1.1052017211914062, |
|
"logps/chosen": -330.0927734375, |
|
"logps/rejected": -354.5337829589844, |
|
"loss": 0.0709, |
|
"rewards/accuracies": 0.4312500059604645, |
|
"rewards/chosen": -0.014163943938910961, |
|
"rewards/margins": 0.0242028646171093, |
|
"rewards/rejected": -0.038366809487342834, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.770796544522026e-06, |
|
"logits/chosen": -1.1162309646606445, |
|
"logits/rejected": -1.0977243185043335, |
|
"logps/chosen": -298.46929931640625, |
|
"logps/rejected": -301.0343017578125, |
|
"loss": 0.0828, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": -0.014557460322976112, |
|
"rewards/margins": 0.010886872187256813, |
|
"rewards/rejected": -0.025444332510232925, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.746848959267968e-06, |
|
"logits/chosen": -1.0554795265197754, |
|
"logits/rejected": -0.9742960929870605, |
|
"logps/chosen": -337.4122314453125, |
|
"logps/rejected": -334.58367919921875, |
|
"loss": 0.0999, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -0.002840764820575714, |
|
"rewards/margins": 0.01675793156027794, |
|
"rewards/rejected": -0.019598694518208504, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.721778055592841e-06, |
|
"logits/chosen": -0.995721161365509, |
|
"logits/rejected": -1.009927749633789, |
|
"logps/chosen": -307.5436706542969, |
|
"logps/rejected": -305.3434753417969, |
|
"loss": 0.0958, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": 0.0018125835340470076, |
|
"rewards/margins": 0.01936521753668785, |
|
"rewards/rejected": -0.017552632838487625, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.695596367765054e-06, |
|
"logits/chosen": -0.9923852682113647, |
|
"logits/rejected": -0.9759656190872192, |
|
"logps/chosen": -369.8957824707031, |
|
"logps/rejected": -335.42694091796875, |
|
"loss": 0.0605, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -0.0068010808899998665, |
|
"rewards/margins": 0.011534233577549458, |
|
"rewards/rejected": -0.018335314467549324, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.6683169853926615e-06, |
|
"logits/chosen": -1.056213617324829, |
|
"logits/rejected": -1.1043860912322998, |
|
"logps/chosen": -302.78570556640625, |
|
"logps/rejected": -315.38604736328125, |
|
"loss": 0.0863, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.00549090001732111, |
|
"rewards/margins": 0.01604645326733589, |
|
"rewards/rejected": -0.021537352353334427, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.639953546879173e-06, |
|
"logits/chosen": -1.011988639831543, |
|
"logits/rejected": -0.9677278399467468, |
|
"logps/chosen": -352.2360534667969, |
|
"logps/rejected": -355.6038513183594, |
|
"loss": 0.0731, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.0009747882140800357, |
|
"rewards/margins": 0.02269767038524151, |
|
"rewards/rejected": -0.02367245778441429, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_logits/chosen": -1.0412452220916748, |
|
"eval_logits/rejected": -0.9863678812980652, |
|
"eval_logps/chosen": -403.1915588378906, |
|
"eval_logps/rejected": -400.2978820800781, |
|
"eval_loss": 0.04299690201878548, |
|
"eval_rewards/accuracies": 0.597000002861023, |
|
"eval_rewards/chosen": -0.006886274088174105, |
|
"eval_rewards/margins": 0.026176199316978455, |
|
"eval_rewards/rejected": -0.03306247293949127, |
|
"eval_runtime": 539.7275, |
|
"eval_samples_per_second": 3.706, |
|
"eval_steps_per_second": 0.926, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.610520232605e-06, |
|
"logits/chosen": -1.0571563243865967, |
|
"logits/rejected": -0.9747453927993774, |
|
"logps/chosen": -360.03936767578125, |
|
"logps/rejected": -312.3787536621094, |
|
"loss": 0.0706, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.007904710248112679, |
|
"rewards/margins": 0.012304016388952732, |
|
"rewards/rejected": -0.020208725705742836, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.580031757837931e-06, |
|
"logits/chosen": -1.0687059164047241, |
|
"logits/rejected": -1.028510570526123, |
|
"logps/chosen": -309.14532470703125, |
|
"logps/rejected": -316.378662109375, |
|
"loss": 0.0713, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.0021046344190835953, |
|
"rewards/margins": 0.0198093019425869, |
|
"rewards/rejected": -0.021913940086960793, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.5485033653761936e-06, |
|
"logits/chosen": -1.0271378755569458, |
|
"logits/rejected": -1.0654436349868774, |
|
"logps/chosen": -289.8176574707031, |
|
"logps/rejected": -310.6886291503906, |
|
"loss": 0.0907, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.001297330716624856, |
|
"rewards/margins": 0.02493324503302574, |
|
"rewards/rejected": -0.02363591641187668, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.5159508179277775e-06, |
|
"logits/chosen": -1.0649276971817017, |
|
"logits/rejected": -1.0275962352752686, |
|
"logps/chosen": -287.9720458984375, |
|
"logps/rejected": -285.8517761230469, |
|
"loss": 0.0857, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": 0.0025724810548126698, |
|
"rewards/margins": 0.02007809281349182, |
|
"rewards/rejected": -0.01750561222434044, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.48239039022982e-06, |
|
"logits/chosen": -1.0931072235107422, |
|
"logits/rejected": -0.9774026870727539, |
|
"logps/chosen": -302.41497802734375, |
|
"logps/rejected": -302.8403625488281, |
|
"loss": 0.0786, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": 0.0017650052905082703, |
|
"rewards/margins": 0.017620224505662918, |
|
"rewards/rejected": -0.015855219215154648, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.447838860912011e-06, |
|
"logits/chosen": -1.0981853008270264, |
|
"logits/rejected": -0.9999715685844421, |
|
"logps/chosen": -287.58599853515625, |
|
"logps/rejected": -293.4311218261719, |
|
"loss": 0.0808, |
|
"rewards/accuracies": 0.38749998807907104, |
|
"rewards/chosen": -0.007796216756105423, |
|
"rewards/margins": 0.02398526482284069, |
|
"rewards/rejected": -0.03178148344159126, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.412313504108062e-06, |
|
"logits/chosen": -1.1234943866729736, |
|
"logits/rejected": -1.0496309995651245, |
|
"logps/chosen": -343.08856201171875, |
|
"logps/rejected": -333.7749938964844, |
|
"loss": 0.0816, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.00918793398886919, |
|
"rewards/margins": 0.024025408551096916, |
|
"rewards/rejected": -0.03321333974599838, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.375832080819465e-06, |
|
"logits/chosen": -1.1054179668426514, |
|
"logits/rejected": -1.063127040863037, |
|
"logps/chosen": -352.350341796875, |
|
"logps/rejected": -334.8752746582031, |
|
"loss": 0.0853, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -0.00490077119320631, |
|
"rewards/margins": 0.02163395844399929, |
|
"rewards/rejected": -0.026534726843237877, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.338412830035823e-06, |
|
"logits/chosen": -1.0628612041473389, |
|
"logits/rejected": -1.0398916006088257, |
|
"logps/chosen": -347.806884765625, |
|
"logps/rejected": -326.5227355957031, |
|
"loss": 0.0819, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": 0.0002868110022973269, |
|
"rewards/margins": 0.024012237787246704, |
|
"rewards/rejected": -0.02372542954981327, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.300074459616216e-06, |
|
"logits/chosen": -1.083707332611084, |
|
"logits/rejected": -1.1182454824447632, |
|
"logps/chosen": -304.49078369140625, |
|
"logps/rejected": -335.3125, |
|
"loss": 0.0787, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.0009085664642043412, |
|
"rewards/margins": 0.03841399401426315, |
|
"rewards/rejected": -0.039322562515735626, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"eval_logits/chosen": -1.0974537134170532, |
|
"eval_logits/rejected": -1.0587471723556519, |
|
"eval_logps/chosen": -408.4565734863281, |
|
"eval_logps/rejected": -414.48870849609375, |
|
"eval_loss": 0.0421723797917366, |
|
"eval_rewards/accuracies": 0.6069999933242798, |
|
"eval_rewards/chosen": -0.012151296250522137, |
|
"eval_rewards/margins": 0.0351020023226738, |
|
"eval_rewards/rejected": -0.04725329577922821, |
|
"eval_runtime": 539.5972, |
|
"eval_samples_per_second": 3.706, |
|
"eval_steps_per_second": 0.927, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.260836136936159e-06, |
|
"logits/chosen": -1.0836219787597656, |
|
"logits/rejected": -1.0971637964248657, |
|
"logps/chosen": -287.1617431640625, |
|
"logps/rejected": -287.58148193359375, |
|
"loss": 0.0893, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -0.0011848447611555457, |
|
"rewards/margins": 0.033604733645915985, |
|
"rewards/rejected": -0.03478958457708359, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.220717479304816e-06, |
|
"logits/chosen": -1.0768189430236816, |
|
"logits/rejected": -1.0736405849456787, |
|
"logps/chosen": -351.4105529785156, |
|
"logps/rejected": -374.5068359375, |
|
"loss": 0.092, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.007005206309258938, |
|
"rewards/margins": 0.034257300198078156, |
|
"rewards/rejected": -0.04126249998807907, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.179738544157272e-06, |
|
"logits/chosen": -1.0806314945220947, |
|
"logits/rejected": -1.0778720378875732, |
|
"logps/chosen": -275.35101318359375, |
|
"logps/rejected": -270.8028869628906, |
|
"loss": 0.0745, |
|
"rewards/accuracies": 0.4124999940395355, |
|
"rewards/chosen": 0.0068043931387364864, |
|
"rewards/margins": 0.013285738416016102, |
|
"rewards/rejected": -0.006481344345957041, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.137919819026762e-06, |
|
"logits/chosen": -1.0614066123962402, |
|
"logits/rejected": -0.9721907377243042, |
|
"logps/chosen": -313.3482971191406, |
|
"logps/rejected": -321.7149963378906, |
|
"loss": 0.076, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": 0.009386066347360611, |
|
"rewards/margins": 0.01596895419061184, |
|
"rewards/rejected": -0.006582888774573803, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.09528221130187e-06, |
|
"logits/chosen": -1.0457605123519897, |
|
"logits/rejected": -1.0901412963867188, |
|
"logps/chosen": -314.82086181640625, |
|
"logps/rejected": -286.0039367675781, |
|
"loss": 0.0912, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.009857202880084515, |
|
"rewards/margins": 0.024335484951734543, |
|
"rewards/rejected": -0.014478283934295177, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.0518470377738274e-06, |
|
"logits/chosen": -1.0764890909194946, |
|
"logits/rejected": -1.1156491041183472, |
|
"logps/chosen": -293.7644348144531, |
|
"logps/rejected": -315.3866882324219, |
|
"loss": 0.0867, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": 0.011628219857811928, |
|
"rewards/margins": 0.028168832883238792, |
|
"rewards/rejected": -0.016540613025426865, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.0076360139791155e-06, |
|
"logits/chosen": -1.0172516107559204, |
|
"logits/rejected": -1.0550997257232666, |
|
"logps/chosen": -340.00164794921875, |
|
"logps/rejected": -341.64068603515625, |
|
"loss": 0.0722, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.017406892031431198, |
|
"rewards/margins": 0.03364910930395126, |
|
"rewards/rejected": -0.016242217272520065, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.962671243342728e-06, |
|
"logits/chosen": -1.089564323425293, |
|
"logits/rejected": -1.038475513458252, |
|
"logps/chosen": -314.32012939453125, |
|
"logps/rejected": -327.6108703613281, |
|
"loss": 0.0795, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": 0.00651575718075037, |
|
"rewards/margins": 0.018204618245363235, |
|
"rewards/rejected": -0.011688861064612865, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.916975206127501e-06, |
|
"logits/chosen": -1.0772429704666138, |
|
"logits/rejected": -1.0009437799453735, |
|
"logps/chosen": -317.83563232421875, |
|
"logps/rejected": -302.42535400390625, |
|
"loss": 0.0735, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.006415791809558868, |
|
"rewards/margins": 0.03005843423306942, |
|
"rewards/rejected": -0.0236426442861557, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.870570748195039e-06, |
|
"logits/chosen": -1.1022416353225708, |
|
"logits/rejected": -1.1078603267669678, |
|
"logps/chosen": -338.0223083496094, |
|
"logps/rejected": -388.9841003417969, |
|
"loss": 0.0742, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": 0.012317690066993237, |
|
"rewards/margins": 0.049174439162015915, |
|
"rewards/rejected": -0.036856748163700104, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eval_logits/chosen": -1.0245832204818726, |
|
"eval_logits/rejected": -0.9872007966041565, |
|
"eval_logps/chosen": -382.8363342285156, |
|
"eval_logps/rejected": -384.7105407714844, |
|
"eval_loss": 0.04055745154619217, |
|
"eval_rewards/accuracies": 0.6085000038146973, |
|
"eval_rewards/chosen": 0.013468942604959011, |
|
"eval_rewards/margins": 0.030944030731916428, |
|
"eval_rewards/rejected": -0.017475087195634842, |
|
"eval_runtime": 539.6208, |
|
"eval_samples_per_second": 3.706, |
|
"eval_steps_per_second": 0.927, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.823481069583869e-06, |
|
"logits/chosen": -1.0397017002105713, |
|
"logits/rejected": -0.9971572756767273, |
|
"logps/chosen": -305.09442138671875, |
|
"logps/rejected": -314.68621826171875, |
|
"loss": 0.08, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.01682308129966259, |
|
"rewards/margins": 0.026519659906625748, |
|
"rewards/rejected": -0.009696578606963158, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.7757297129105087e-06, |
|
"logits/chosen": -1.1045721769332886, |
|
"logits/rejected": -1.0196996927261353, |
|
"logps/chosen": -272.93206787109375, |
|
"logps/rejected": -281.6795349121094, |
|
"loss": 0.0878, |
|
"rewards/accuracies": 0.4124999940395355, |
|
"rewards/chosen": 0.009568464942276478, |
|
"rewards/margins": 0.01820019818842411, |
|
"rewards/rejected": -0.008631732314825058, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.7273405515992785e-06, |
|
"logits/chosen": -1.0682175159454346, |
|
"logits/rejected": -1.0213497877120972, |
|
"logps/chosen": -284.39495849609375, |
|
"logps/rejected": -300.61920166015625, |
|
"loss": 0.0707, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": 0.01212714146822691, |
|
"rewards/margins": 0.01712757535278797, |
|
"rewards/rejected": -0.005000432953238487, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.678337777946706e-06, |
|
"logits/chosen": -1.0374300479888916, |
|
"logits/rejected": -0.9567736387252808, |
|
"logps/chosen": -334.5086669921875, |
|
"logps/rejected": -323.7910461425781, |
|
"loss": 0.0941, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.02154025062918663, |
|
"rewards/margins": 0.038109757006168365, |
|
"rewards/rejected": -0.016569510102272034, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.6287458910265293e-06, |
|
"logits/chosen": -1.0035964250564575, |
|
"logits/rejected": -0.9258484840393066, |
|
"logps/chosen": -309.08038330078125, |
|
"logps/rejected": -294.2804260253906, |
|
"loss": 0.0892, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": 0.011333728209137917, |
|
"rewards/margins": 0.01899079605937004, |
|
"rewards/rejected": -0.0076570697128772736, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.57858968444131e-06, |
|
"logits/chosen": -1.0853677988052368, |
|
"logits/rejected": -0.9975327253341675, |
|
"logps/chosen": -330.30255126953125, |
|
"logps/rejected": -312.9807434082031, |
|
"loss": 0.0737, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 0.014233666472136974, |
|
"rewards/margins": 0.025601008906960487, |
|
"rewards/rejected": -0.011367343366146088, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.5278942339268034e-06, |
|
"logits/chosen": -1.0091218948364258, |
|
"logits/rejected": -1.0755088329315186, |
|
"logps/chosen": -276.8922424316406, |
|
"logps/rejected": -313.3753967285156, |
|
"loss": 0.0777, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.016201717779040337, |
|
"rewards/margins": 0.032345883548259735, |
|
"rewards/rejected": -0.016144167631864548, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.476684884815279e-06, |
|
"logits/chosen": -1.0601623058319092, |
|
"logits/rejected": -0.9890631437301636, |
|
"logps/chosen": -280.58538818359375, |
|
"logps/rejected": -280.070068359375, |
|
"loss": 0.1082, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": 0.00014938972890377045, |
|
"rewards/margins": 0.00774354999884963, |
|
"rewards/rejected": -0.007594159804284573, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.424987239364044e-06, |
|
"logits/chosen": -1.0564385652542114, |
|
"logits/rejected": -1.0079165697097778, |
|
"logps/chosen": -290.0457458496094, |
|
"logps/rejected": -317.35809326171875, |
|
"loss": 0.0844, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.01850438304245472, |
|
"rewards/margins": 0.03815234825015068, |
|
"rewards/rejected": -0.01964796707034111, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.3728271439555277e-06, |
|
"logits/chosen": -1.061989426612854, |
|
"logits/rejected": -1.0436265468597412, |
|
"logps/chosen": -300.4573669433594, |
|
"logps/rejected": -291.84320068359375, |
|
"loss": 0.0635, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": 0.012897541746497154, |
|
"rewards/margins": 0.022769348695874214, |
|
"rewards/rejected": -0.009871806018054485, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"eval_logits/chosen": -1.022524118423462, |
|
"eval_logits/rejected": -0.9903200268745422, |
|
"eval_logps/chosen": -379.6695556640625, |
|
"eval_logps/rejected": -386.0258483886719, |
|
"eval_loss": 0.04010883718729019, |
|
"eval_rewards/accuracies": 0.609499990940094, |
|
"eval_rewards/chosen": 0.01663573645055294, |
|
"eval_rewards/margins": 0.035426173359155655, |
|
"eval_rewards/rejected": -0.018790436908602715, |
|
"eval_runtime": 539.7506, |
|
"eval_samples_per_second": 3.705, |
|
"eval_steps_per_second": 0.926, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.3202306761753078e-06, |
|
"logits/chosen": -1.056015133857727, |
|
"logits/rejected": -1.044135570526123, |
|
"logps/chosen": -291.650634765625, |
|
"logps/rejected": -287.6162414550781, |
|
"loss": 0.0908, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": 0.012986931018531322, |
|
"rewards/margins": 0.0346546433866024, |
|
"rewards/rejected": -0.021667715162038803, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.2672241317745513e-06, |
|
"logits/chosen": -0.9736151695251465, |
|
"logits/rejected": -0.9646120071411133, |
|
"logps/chosen": -311.49346923828125, |
|
"logps/rejected": -341.27496337890625, |
|
"loss": 0.0756, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": 0.017372317612171173, |
|
"rewards/margins": 0.028918754309415817, |
|
"rewards/rejected": -0.011546434834599495, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.213834011523378e-06, |
|
"logits/chosen": -1.1018531322479248, |
|
"logits/rejected": -1.025577187538147, |
|
"logps/chosen": -319.1675720214844, |
|
"logps/rejected": -316.7040100097656, |
|
"loss": 0.0765, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": 0.014244127087295055, |
|
"rewards/margins": 0.03847852349281311, |
|
"rewards/rejected": -0.02423439547419548, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.160087007961724e-06, |
|
"logits/chosen": -1.048353910446167, |
|
"logits/rejected": -0.990998387336731, |
|
"logps/chosen": -310.2376403808594, |
|
"logps/rejected": -319.0958557128906, |
|
"loss": 0.0929, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": 0.01772814430296421, |
|
"rewards/margins": 0.029912665486335754, |
|
"rewards/rejected": -0.012184521183371544, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.1060099920543404e-06, |
|
"logits/chosen": -0.9734305143356323, |
|
"logits/rejected": -1.053117275238037, |
|
"logps/chosen": -253.1719207763672, |
|
"logps/rejected": -268.72723388671875, |
|
"loss": 0.1126, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": 0.019246799871325493, |
|
"rewards/margins": 0.028338003903627396, |
|
"rewards/rejected": -0.009091204032301903, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3.0516299997565675e-06, |
|
"logits/chosen": -0.9801149368286133, |
|
"logits/rejected": -1.053255558013916, |
|
"logps/chosen": -300.00152587890625, |
|
"logps/rejected": -316.54656982421875, |
|
"loss": 0.0736, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": 0.013104112818837166, |
|
"rewards/margins": 0.0340370312333107, |
|
"rewards/rejected": -0.020932912826538086, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.996974218497643e-06, |
|
"logits/chosen": -0.9665297269821167, |
|
"logits/rejected": -1.033067226409912, |
|
"logps/chosen": -327.8155822753906, |
|
"logps/rejected": -291.17840576171875, |
|
"loss": 0.0969, |
|
"rewards/accuracies": 0.4312500059604645, |
|
"rewards/chosen": 0.013064196333289146, |
|
"rewards/margins": 0.014280739240348339, |
|
"rewards/rejected": -0.0012165403459221125, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.9420699735882673e-06, |
|
"logits/chosen": -1.103208065032959, |
|
"logits/rejected": -1.0667420625686646, |
|
"logps/chosen": -284.13043212890625, |
|
"logps/rejected": -288.5975646972656, |
|
"loss": 0.0726, |
|
"rewards/accuracies": 0.45625001192092896, |
|
"rewards/chosen": 0.021614065393805504, |
|
"rewards/margins": 0.022522414103150368, |
|
"rewards/rejected": -0.000908347952645272, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.8869447145592345e-06, |
|
"logits/chosen": -0.9419771432876587, |
|
"logits/rejected": -0.9063804745674133, |
|
"logps/chosen": -275.3219299316406, |
|
"logps/rejected": -281.0148620605469, |
|
"loss": 0.0885, |
|
"rewards/accuracies": 0.4124999940395355, |
|
"rewards/chosen": 0.021688418462872505, |
|
"rewards/margins": 0.02460251934826374, |
|
"rewards/rejected": -0.0029140994884073734, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.831626001437969e-06, |
|
"logits/chosen": -0.9940840601921082, |
|
"logits/rejected": -0.9761594533920288, |
|
"logps/chosen": -284.7848205566406, |
|
"logps/rejected": -305.55279541015625, |
|
"loss": 0.0881, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.02503793314099312, |
|
"rewards/margins": 0.03414962440729141, |
|
"rewards/rejected": -0.009111693128943443, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_logits/chosen": -0.9974508881568909, |
|
"eval_logits/rejected": -0.9657922387123108, |
|
"eval_logps/chosen": -371.2671813964844, |
|
"eval_logps/rejected": -377.4323425292969, |
|
"eval_loss": 0.03950659930706024, |
|
"eval_rewards/accuracies": 0.6085000038146973, |
|
"eval_rewards/chosen": 0.025038093328475952, |
|
"eval_rewards/margins": 0.03523498401045799, |
|
"eval_rewards/rejected": -0.010196887888014317, |
|
"eval_runtime": 539.4827, |
|
"eval_samples_per_second": 3.707, |
|
"eval_steps_per_second": 0.927, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.776141490969808e-06, |
|
"logits/chosen": -0.9662303924560547, |
|
"logits/rejected": -1.007033109664917, |
|
"logps/chosen": -273.5274963378906, |
|
"logps/rejected": -297.3504943847656, |
|
"loss": 0.0811, |
|
"rewards/accuracies": 0.4124999940395355, |
|
"rewards/chosen": 0.017237264662981033, |
|
"rewards/margins": 0.020813334733247757, |
|
"rewards/rejected": -0.0035760707687586546, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.720518922790937e-06, |
|
"logits/chosen": -1.0530205965042114, |
|
"logits/rejected": -0.9808292388916016, |
|
"logps/chosen": -260.7068176269531, |
|
"logps/rejected": -254.8417205810547, |
|
"loss": 0.1111, |
|
"rewards/accuracies": 0.45625001192092896, |
|
"rewards/chosen": 0.015834391117095947, |
|
"rewards/margins": 0.030566086992621422, |
|
"rewards/rejected": -0.014731695875525475, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.66478610555988e-06, |
|
"logits/chosen": -0.973135769367218, |
|
"logits/rejected": -1.0151408910751343, |
|
"logps/chosen": -315.51763916015625, |
|
"logps/rejected": -324.4259338378906, |
|
"loss": 0.0997, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": 0.022444238886237144, |
|
"rewards/margins": 0.034130193293094635, |
|
"rewards/rejected": -0.011685955338180065, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.608970903054482e-06, |
|
"logits/chosen": -0.9921044111251831, |
|
"logits/rejected": -0.996437668800354, |
|
"logps/chosen": -298.2916564941406, |
|
"logps/rejected": -307.67156982421875, |
|
"loss": 0.0765, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 0.020961161702871323, |
|
"rewards/margins": 0.020338475704193115, |
|
"rewards/rejected": 0.0006226839614100754, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.553101220241337e-06, |
|
"logits/chosen": -0.955792248249054, |
|
"logits/rejected": -0.9853283166885376, |
|
"logps/chosen": -322.03619384765625, |
|
"logps/rejected": -341.2371520996094, |
|
"loss": 0.0739, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": 0.02565811201930046, |
|
"rewards/margins": 0.01996433734893799, |
|
"rewards/rejected": 0.005693775601685047, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.4972049893246218e-06, |
|
"logits/chosen": -0.9531941413879395, |
|
"logits/rejected": -0.9866918325424194, |
|
"logps/chosen": -299.243896484375, |
|
"logps/rejected": -302.88336181640625, |
|
"loss": 0.0889, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.02744477614760399, |
|
"rewards/margins": 0.03810085728764534, |
|
"rewards/rejected": -0.0106560830026865, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.4413101557813095e-06, |
|
"logits/chosen": -0.9942695498466492, |
|
"logits/rejected": -1.0122566223144531, |
|
"logps/chosen": -291.08892822265625, |
|
"logps/rejected": -301.3489074707031, |
|
"loss": 0.076, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": 0.027957703918218613, |
|
"rewards/margins": 0.030374949797987938, |
|
"rewards/rejected": -0.0024172496050596237, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.3854446643897566e-06, |
|
"logits/chosen": -0.935819149017334, |
|
"logits/rejected": -0.8908926248550415, |
|
"logps/chosen": -274.655517578125, |
|
"logps/rejected": -301.8233642578125, |
|
"loss": 0.093, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.02359806001186371, |
|
"rewards/margins": 0.040579214692115784, |
|
"rewards/rejected": -0.016981154680252075, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.3296364452586246e-06, |
|
"logits/chosen": -0.9394947290420532, |
|
"logits/rejected": -1.015801191329956, |
|
"logps/chosen": -244.6657257080078, |
|
"logps/rejected": -255.62155151367188, |
|
"loss": 0.0973, |
|
"rewards/accuracies": 0.40625, |
|
"rewards/chosen": 0.02046729251742363, |
|
"rewards/margins": 0.011587701737880707, |
|
"rewards/rejected": 0.008879591710865498, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.273913399863151e-06, |
|
"logits/chosen": -0.9255924224853516, |
|
"logits/rejected": -0.9949308633804321, |
|
"logps/chosen": -319.0769958496094, |
|
"logps/rejected": -335.22125244140625, |
|
"loss": 0.0753, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.0340295247733593, |
|
"rewards/margins": 0.03226994723081589, |
|
"rewards/rejected": 0.0017595753306522965, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_logits/chosen": -0.9455928802490234, |
|
"eval_logits/rejected": -0.9026016592979431, |
|
"eval_logps/chosen": -365.869873046875, |
|
"eval_logps/rejected": -371.78717041015625, |
|
"eval_loss": 0.03934764117002487, |
|
"eval_rewards/accuracies": 0.5989999771118164, |
|
"eval_rewards/chosen": 0.0304353516548872, |
|
"eval_rewards/margins": 0.03498707711696625, |
|
"eval_rewards/rejected": -0.00455172173678875, |
|
"eval_runtime": 539.932, |
|
"eval_samples_per_second": 3.704, |
|
"eval_steps_per_second": 0.926, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.2183033870957237e-06, |
|
"logits/chosen": -0.9681524038314819, |
|
"logits/rejected": -0.9325584173202515, |
|
"logps/chosen": -338.40325927734375, |
|
"logps/rejected": -328.984619140625, |
|
"loss": 0.0758, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.017296601086854935, |
|
"rewards/margins": 0.03395865857601166, |
|
"rewards/rejected": -0.016662055626511574, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.1628342093377533e-06, |
|
"logits/chosen": -0.9232437014579773, |
|
"logits/rejected": -0.9427019357681274, |
|
"logps/chosen": -301.0130615234375, |
|
"logps/rejected": -300.59088134765625, |
|
"loss": 0.0675, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.03031068481504917, |
|
"rewards/margins": 0.030671527609229088, |
|
"rewards/rejected": -0.00036084355087950826, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.1075335985597954e-06, |
|
"logits/chosen": -0.9461824297904968, |
|
"logits/rejected": -0.9701375961303711, |
|
"logps/chosen": -309.2986755371094, |
|
"logps/rejected": -296.10052490234375, |
|
"loss": 0.0781, |
|
"rewards/accuracies": 0.45625001192092896, |
|
"rewards/chosen": 0.02147866040468216, |
|
"rewards/margins": 0.023677725344896317, |
|
"rewards/rejected": -0.002199061680585146, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.0524292024568687e-06, |
|
"logits/chosen": -0.8931276202201843, |
|
"logits/rejected": -0.9411805272102356, |
|
"logps/chosen": -305.31793212890625, |
|
"logps/rejected": -297.4659729003906, |
|
"loss": 0.076, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": 0.024056371301412582, |
|
"rewards/margins": 0.024243740364909172, |
|
"rewards/rejected": -0.00018737166828941554, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.9975485706259194e-06, |
|
"logits/chosen": -0.9346133470535278, |
|
"logits/rejected": -0.9174652099609375, |
|
"logps/chosen": -243.5562744140625, |
|
"logps/rejected": -260.65185546875, |
|
"loss": 0.0812, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.036083243787288666, |
|
"rewards/margins": 0.028012529015541077, |
|
"rewards/rejected": 0.00807071290910244, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.942919140792319e-06, |
|
"logits/chosen": -0.9425550699234009, |
|
"logits/rejected": -0.8944212198257446, |
|
"logps/chosen": -263.16070556640625, |
|
"logps/rejected": -296.932861328125, |
|
"loss": 0.0774, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": 0.03240296244621277, |
|
"rewards/margins": 0.03853650763630867, |
|
"rewards/rejected": -0.006133544258773327, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.888568225092296e-06, |
|
"logits/chosen": -0.9160875082015991, |
|
"logits/rejected": -0.9029878377914429, |
|
"logps/chosen": -259.0009460449219, |
|
"logps/rejected": -284.0046081542969, |
|
"loss": 0.071, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": 0.031025772914290428, |
|
"rewards/margins": 0.026529574766755104, |
|
"rewards/rejected": 0.004496193490922451, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.8345229964181628e-06, |
|
"logits/chosen": -1.0157150030136108, |
|
"logits/rejected": -0.9280340075492859, |
|
"logps/chosen": -280.16455078125, |
|
"logps/rejected": -297.15655517578125, |
|
"loss": 0.0771, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": 0.02662072703242302, |
|
"rewards/margins": 0.036351434886455536, |
|
"rewards/rejected": -0.009730703197419643, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.7808104748331459e-06, |
|
"logits/chosen": -0.9648904800415039, |
|
"logits/rejected": -0.9164671897888184, |
|
"logps/chosen": -303.514892578125, |
|
"logps/rejected": -292.10748291015625, |
|
"loss": 0.0935, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": 0.025951769202947617, |
|
"rewards/margins": 0.030179208144545555, |
|
"rewards/rejected": -0.004227438475936651, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.7274575140626318e-06, |
|
"logits/chosen": -1.048002004623413, |
|
"logits/rejected": -0.9596036672592163, |
|
"logps/chosen": -297.2816467285156, |
|
"logps/rejected": -316.36981201171875, |
|
"loss": 0.0922, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": 0.01601681485772133, |
|
"rewards/margins": 0.02387891337275505, |
|
"rewards/rejected": -0.007862097583711147, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_logits/chosen": -0.9184301495552063, |
|
"eval_logits/rejected": -0.8800927996635437, |
|
"eval_logps/chosen": -367.73187255859375, |
|
"eval_logps/rejected": -374.7669372558594, |
|
"eval_loss": 0.03902236372232437, |
|
"eval_rewards/accuracies": 0.5989999771118164, |
|
"eval_rewards/chosen": 0.028573375195264816, |
|
"eval_rewards/margins": 0.03610490262508392, |
|
"eval_rewards/rejected": -0.007531528826802969, |
|
"eval_runtime": 539.7163, |
|
"eval_samples_per_second": 3.706, |
|
"eval_steps_per_second": 0.926, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.6744907880685735e-06, |
|
"logits/chosen": -0.9194186925888062, |
|
"logits/rejected": -0.8924249410629272, |
|
"logps/chosen": -283.1637878417969, |
|
"logps/rejected": -281.9581604003906, |
|
"loss": 0.0699, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": 0.02727659046649933, |
|
"rewards/margins": 0.020740380510687828, |
|
"rewards/rejected": 0.006536208093166351, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.6219367777137652e-06, |
|
"logits/chosen": -0.9643144607543945, |
|
"logits/rejected": -0.9084329605102539, |
|
"logps/chosen": -296.56732177734375, |
|
"logps/rejected": -299.80718994140625, |
|
"loss": 0.0756, |
|
"rewards/accuracies": 0.4437499940395355, |
|
"rewards/chosen": 0.02761662006378174, |
|
"rewards/margins": 0.022495564073324203, |
|
"rewards/rejected": 0.00512105505913496, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.569821757522666e-06, |
|
"logits/chosen": -0.9250129461288452, |
|
"logits/rejected": -0.8530179262161255, |
|
"logps/chosen": -305.6438903808594, |
|
"logps/rejected": -310.8564758300781, |
|
"loss": 0.0796, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": 0.023295477032661438, |
|
"rewards/margins": 0.03615058213472366, |
|
"rewards/rejected": -0.012855103239417076, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.5181717825453732e-06, |
|
"logits/chosen": -0.9920689463615417, |
|
"logits/rejected": -0.9287697076797485, |
|
"logps/chosen": -268.88922119140625, |
|
"logps/rejected": -297.29791259765625, |
|
"loss": 0.0907, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": 0.025286570191383362, |
|
"rewards/margins": 0.03552253916859627, |
|
"rewards/rejected": -0.010235967114567757, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.4670126753313286e-06, |
|
"logits/chosen": -0.9325569272041321, |
|
"logits/rejected": -0.9226212501525879, |
|
"logps/chosen": -273.0399169921875, |
|
"logps/rejected": -300.64337158203125, |
|
"loss": 0.0829, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.02183723822236061, |
|
"rewards/margins": 0.02524760365486145, |
|
"rewards/rejected": -0.0034103658981621265, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.4163700130192627e-06, |
|
"logits/chosen": -0.9867246747016907, |
|
"logits/rejected": -0.9270626306533813, |
|
"logps/chosen": -240.3236846923828, |
|
"logps/rejected": -277.5113525390625, |
|
"loss": 0.1007, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": 0.0198238343000412, |
|
"rewards/margins": 0.027476917952299118, |
|
"rewards/rejected": -0.007653082255274057, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.366269114549833e-06, |
|
"logits/chosen": -0.9571270942687988, |
|
"logits/rejected": -0.9630721807479858, |
|
"logps/chosen": -282.31561279296875, |
|
"logps/rejected": -273.7547607421875, |
|
"loss": 0.0832, |
|
"rewards/accuracies": 0.4312500059604645, |
|
"rewards/chosen": 0.014912809245288372, |
|
"rewards/margins": 0.02561432123184204, |
|
"rewards/rejected": -0.01070151012390852, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.3167350280073514e-06, |
|
"logits/chosen": -0.9449627995491028, |
|
"logits/rejected": -0.9669274091720581, |
|
"logps/chosen": -290.40899658203125, |
|
"logps/rejected": -305.1846923828125, |
|
"loss": 0.0881, |
|
"rewards/accuracies": 0.45625001192092896, |
|
"rewards/chosen": 0.020313743501901627, |
|
"rewards/margins": 0.03765721619129181, |
|
"rewards/rejected": -0.017343472689390182, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.267792518096918e-06, |
|
"logits/chosen": -0.954971194267273, |
|
"logits/rejected": -1.0019423961639404, |
|
"logps/chosen": -314.25335693359375, |
|
"logps/rejected": -375.009521484375, |
|
"loss": 0.0652, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": 0.023724760860204697, |
|
"rewards/margins": 0.04069076478481293, |
|
"rewards/rejected": -0.01696600392460823, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.2194660537632423e-06, |
|
"logits/chosen": -0.9429014325141907, |
|
"logits/rejected": -0.9538652300834656, |
|
"logps/chosen": -279.4613342285156, |
|
"logps/rejected": -323.7087097167969, |
|
"loss": 0.0703, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": 0.02060026489198208, |
|
"rewards/margins": 0.03607643395662308, |
|
"rewards/rejected": -0.015476171858608723, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_logits/chosen": -0.9601659178733826, |
|
"eval_logits/rejected": -0.9299748539924622, |
|
"eval_logps/chosen": -373.62261962890625, |
|
"eval_logps/rejected": -383.30255126953125, |
|
"eval_loss": 0.03885647654533386, |
|
"eval_rewards/accuracies": 0.6000000238418579, |
|
"eval_rewards/chosen": 0.022682595998048782, |
|
"eval_rewards/margins": 0.03874973580241203, |
|
"eval_rewards/rejected": -0.016067136079072952, |
|
"eval_runtime": 539.6341, |
|
"eval_samples_per_second": 3.706, |
|
"eval_steps_per_second": 0.927, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.1717797959573262e-06, |
|
"logits/chosen": -1.003103494644165, |
|
"logits/rejected": -0.9326059222221375, |
|
"logps/chosen": -297.8578186035156, |
|
"logps/rejected": -297.1626892089844, |
|
"loss": 0.0771, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.017790749669075012, |
|
"rewards/margins": 0.035782478749752045, |
|
"rewards/rejected": -0.017991727218031883, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.1247575855571251e-06, |
|
"logits/chosen": -0.9976641535758972, |
|
"logits/rejected": -0.9637433886528015, |
|
"logps/chosen": -306.8631896972656, |
|
"logps/rejected": -350.14324951171875, |
|
"loss": 0.081, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": 0.029013922438025475, |
|
"rewards/margins": 0.05382751673460007, |
|
"rewards/rejected": -0.02481359988451004, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.07842293144824e-06, |
|
"logits/chosen": -0.9757798314094543, |
|
"logits/rejected": -0.9444389343261719, |
|
"logps/chosen": -312.77191162109375, |
|
"logps/rejected": -317.60406494140625, |
|
"loss": 0.0816, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.015693334862589836, |
|
"rewards/margins": 0.029305079951882362, |
|
"rewards/rejected": -0.013611746951937675, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.0327989987705781e-06, |
|
"logits/chosen": -1.0041489601135254, |
|
"logits/rejected": -1.006225824356079, |
|
"logps/chosen": -291.49542236328125, |
|
"logps/rejected": -287.52978515625, |
|
"loss": 0.0888, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": 0.01858539506793022, |
|
"rewards/margins": 0.029126202687621117, |
|
"rewards/rejected": -0.01054080855101347, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.879085973368805e-07, |
|
"logits/chosen": -1.0146872997283936, |
|
"logits/rejected": -0.9577957987785339, |
|
"logps/chosen": -322.76556396484375, |
|
"logps/rejected": -336.08636474609375, |
|
"loss": 0.0665, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.023015085607767105, |
|
"rewards/margins": 0.05666361376643181, |
|
"rewards/rejected": -0.0336485281586647, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.437741702288908e-07, |
|
"logits/chosen": -1.0026918649673462, |
|
"logits/rejected": -0.9933171272277832, |
|
"logps/chosen": -299.72308349609375, |
|
"logps/rejected": -311.6734924316406, |
|
"loss": 0.0794, |
|
"rewards/accuracies": 0.4437499940395355, |
|
"rewards/chosen": 0.01335589587688446, |
|
"rewards/margins": 0.025558674708008766, |
|
"rewards/rejected": -0.012202778831124306, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 9.004177825768751e-07, |
|
"logits/chosen": -1.0369510650634766, |
|
"logits/rejected": -0.9392277002334595, |
|
"logps/chosen": -267.6405334472656, |
|
"logps/rejected": -298.1479797363281, |
|
"loss": 0.0757, |
|
"rewards/accuracies": 0.45625001192092896, |
|
"rewards/chosen": 0.012007731013000011, |
|
"rewards/margins": 0.024681296199560165, |
|
"rewards/rejected": -0.012673566117882729, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 8.578611105280987e-07, |
|
"logits/chosen": -1.0083134174346924, |
|
"logits/rejected": -0.992781937122345, |
|
"logps/chosen": -316.5964660644531, |
|
"logps/rejected": -365.96661376953125, |
|
"loss": 0.0643, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.02335866168141365, |
|
"rewards/margins": 0.050595641136169434, |
|
"rewards/rejected": -0.027236973866820335, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 8.161254304097715e-07, |
|
"logits/chosen": -1.026871919631958, |
|
"logits/rejected": -0.9791032075881958, |
|
"logps/chosen": -277.67547607421875, |
|
"logps/rejected": -294.22369384765625, |
|
"loss": 0.0853, |
|
"rewards/accuracies": 0.42500001192092896, |
|
"rewards/chosen": 0.02239377610385418, |
|
"rewards/margins": 0.026370327919721603, |
|
"rewards/rejected": -0.003976552281528711, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.752316080918934e-07, |
|
"logits/chosen": -0.942740797996521, |
|
"logits/rejected": -0.9156352877616882, |
|
"logps/chosen": -290.0126953125, |
|
"logps/rejected": -303.8479919433594, |
|
"loss": 0.0746, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": 0.021792907267808914, |
|
"rewards/margins": 0.02630285918712616, |
|
"rewards/rejected": -0.004509954713284969, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"eval_logits/chosen": -0.930611789226532, |
|
"eval_logits/rejected": -0.8943980932235718, |
|
"eval_logps/chosen": -373.7153015136719, |
|
"eval_logps/rejected": -385.1601257324219, |
|
"eval_loss": 0.03884938731789589, |
|
"eval_rewards/accuracies": 0.6050000190734863, |
|
"eval_rewards/chosen": 0.022589918226003647, |
|
"eval_rewards/margins": 0.04051463305950165, |
|
"eval_rewards/rejected": -0.017924712970852852, |
|
"eval_runtime": 539.5465, |
|
"eval_samples_per_second": 3.707, |
|
"eval_steps_per_second": 0.927, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.352000885553012e-07, |
|
"logits/chosen": -0.9893749356269836, |
|
"logits/rejected": -0.9994084239006042, |
|
"logps/chosen": -307.1074523925781, |
|
"logps/rejected": -315.70770263671875, |
|
"loss": 0.0767, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": 0.016450155526399612, |
|
"rewards/margins": 0.02466857247054577, |
|
"rewards/rejected": -0.008218420669436455, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 6.960508856701464e-07, |
|
"logits/chosen": -1.0321322679519653, |
|
"logits/rejected": -0.9207181930541992, |
|
"logps/chosen": -291.80029296875, |
|
"logps/rejected": -298.3025817871094, |
|
"loss": 0.0945, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": 0.015555900521576405, |
|
"rewards/margins": 0.02910393476486206, |
|
"rewards/rejected": -0.013548034243285656, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.578035721899029e-07, |
|
"logits/chosen": -0.9535512924194336, |
|
"logits/rejected": -1.008029580116272, |
|
"logps/chosen": -324.25152587890625, |
|
"logps/rejected": -346.2152099609375, |
|
"loss": 0.0693, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": 0.012142010033130646, |
|
"rewards/margins": 0.03695748746395111, |
|
"rewards/rejected": -0.024815475568175316, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.204772699659126e-07, |
|
"logits/chosen": -1.0189663171768188, |
|
"logits/rejected": -1.051673173904419, |
|
"logps/chosen": -268.8255920410156, |
|
"logps/rejected": -264.9363708496094, |
|
"loss": 0.0811, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": 0.022226419299840927, |
|
"rewards/margins": 0.02963913045823574, |
|
"rewards/rejected": -0.007412709295749664, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5.840906403873648e-07, |
|
"logits/chosen": -0.9550544023513794, |
|
"logits/rejected": -0.9686701893806458, |
|
"logps/chosen": -311.6850280761719, |
|
"logps/rejected": -331.0824279785156, |
|
"loss": 0.0602, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": 0.01663174293935299, |
|
"rewards/margins": 0.03210796043276787, |
|
"rewards/rejected": -0.015476214699447155, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.486618750514813e-07, |
|
"logits/chosen": -0.9893242716789246, |
|
"logits/rejected": -0.9823307991027832, |
|
"logps/chosen": -319.13421630859375, |
|
"logps/rejected": -335.4979553222656, |
|
"loss": 0.0904, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": 0.019062984734773636, |
|
"rewards/margins": 0.03206023946404457, |
|
"rewards/rejected": -0.012997254729270935, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.142086866685783e-07, |
|
"logits/chosen": -0.9724270701408386, |
|
"logits/rejected": -0.9491437673568726, |
|
"logps/chosen": -279.63824462890625, |
|
"logps/rejected": -299.930908203125, |
|
"loss": 0.0748, |
|
"rewards/accuracies": 0.4437499940395355, |
|
"rewards/chosen": 0.01930154860019684, |
|
"rewards/margins": 0.022973302751779556, |
|
"rewards/rejected": -0.0036717529874294996, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.807483002065439e-07, |
|
"logits/chosen": -0.9794095754623413, |
|
"logits/rejected": -0.9791723489761353, |
|
"logps/chosen": -263.2690124511719, |
|
"logps/rejected": -264.7826232910156, |
|
"loss": 0.0906, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": 0.020124919712543488, |
|
"rewards/margins": 0.028638970106840134, |
|
"rewards/rejected": -0.008514048531651497, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.4829744427917153e-07, |
|
"logits/chosen": -1.0250556468963623, |
|
"logits/rejected": -0.9449254870414734, |
|
"logps/chosen": -262.35614013671875, |
|
"logps/rejected": -292.6833190917969, |
|
"loss": 0.0725, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": 0.029375359416007996, |
|
"rewards/margins": 0.04200378805398941, |
|
"rewards/rejected": -0.012628423981368542, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.168723427826382e-07, |
|
"logits/chosen": -0.964804470539093, |
|
"logits/rejected": -0.9295471906661987, |
|
"logps/chosen": -272.17938232421875, |
|
"logps/rejected": -284.2236022949219, |
|
"loss": 0.0925, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": 0.01671653613448143, |
|
"rewards/margins": 0.0240671094506979, |
|
"rewards/rejected": -0.007350574247539043, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"eval_logits/chosen": -0.949418306350708, |
|
"eval_logits/rejected": -0.9171380400657654, |
|
"eval_logps/chosen": -370.0339660644531, |
|
"eval_logps/rejected": -380.30718994140625, |
|
"eval_loss": 0.03874335065484047, |
|
"eval_rewards/accuracies": 0.6029999852180481, |
|
"eval_rewards/chosen": 0.02627129666507244, |
|
"eval_rewards/margins": 0.039343029260635376, |
|
"eval_rewards/rejected": -0.013071730732917786, |
|
"eval_runtime": 539.7909, |
|
"eval_samples_per_second": 3.705, |
|
"eval_steps_per_second": 0.926, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.864887067843251e-07, |
|
"logits/chosen": -0.9890697598457336, |
|
"logits/rejected": -0.8923286199569702, |
|
"logps/chosen": -310.97125244140625, |
|
"logps/rejected": -309.845703125, |
|
"loss": 0.0789, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": 0.019753634929656982, |
|
"rewards/margins": 0.031969424337148666, |
|
"rewards/rejected": -0.01221578847616911, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.5716172666802637e-07, |
|
"logits/chosen": -1.0090614557266235, |
|
"logits/rejected": -0.9583064317703247, |
|
"logps/chosen": -266.8111267089844, |
|
"logps/rejected": -302.0957946777344, |
|
"loss": 0.0764, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": 0.023147406056523323, |
|
"rewards/margins": 0.04169207811355591, |
|
"rewards/rejected": -0.018544670194387436, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.289060645394704e-07, |
|
"logits/chosen": -0.9466385841369629, |
|
"logits/rejected": -0.9388877749443054, |
|
"logps/chosen": -291.6334533691406, |
|
"logps/rejected": -310.3324279785156, |
|
"loss": 0.094, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": 0.021957775577902794, |
|
"rewards/margins": 0.026331758126616478, |
|
"rewards/rejected": -0.0043739816173911095, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.0173584689596246e-07, |
|
"logits/chosen": -0.9820948839187622, |
|
"logits/rejected": -0.9631234407424927, |
|
"logps/chosen": -290.67987060546875, |
|
"logps/rejected": -281.71405029296875, |
|
"loss": 0.0816, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": 0.026712050661444664, |
|
"rewards/margins": 0.026149820536375046, |
|
"rewards/rejected": 0.0005622319877147675, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.756646575638025e-07, |
|
"logits/chosen": -0.9123773574829102, |
|
"logits/rejected": -0.9757976531982422, |
|
"logps/chosen": -270.1868896484375, |
|
"logps/rejected": -303.7170715332031, |
|
"loss": 0.0753, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": 0.025742124766111374, |
|
"rewards/margins": 0.034964997321367264, |
|
"rewards/rejected": -0.009222874417901039, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.507055309070111e-07, |
|
"logits/chosen": -0.9779159426689148, |
|
"logits/rejected": -0.9737545847892761, |
|
"logps/chosen": -285.8861083984375, |
|
"logps/rejected": -280.3493347167969, |
|
"loss": 0.0912, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": 0.021084221079945564, |
|
"rewards/margins": 0.029098382219672203, |
|
"rewards/rejected": -0.008014162071049213, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.2687094531076565e-07, |
|
"logits/chosen": -1.0015113353729248, |
|
"logits/rejected": -0.9118536114692688, |
|
"logps/chosen": -292.0755920410156, |
|
"logps/rejected": -336.7613830566406, |
|
"loss": 0.0668, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": 0.023335173726081848, |
|
"rewards/margins": 0.042082760483026505, |
|
"rewards/rejected": -0.018747588619589806, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.0417281694279424e-07, |
|
"logits/chosen": -0.9744800329208374, |
|
"logits/rejected": -0.8867910504341125, |
|
"logps/chosen": -322.03643798828125, |
|
"logps/rejected": -347.53192138671875, |
|
"loss": 0.0651, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": 0.016095371916890144, |
|
"rewards/margins": 0.03282975032925606, |
|
"rewards/rejected": -0.016734374687075615, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.8262249379585484e-07, |
|
"logits/chosen": -0.8901314735412598, |
|
"logits/rejected": -0.9079896807670593, |
|
"logps/chosen": -318.6395568847656, |
|
"logps/rejected": -306.5535583496094, |
|
"loss": 0.0696, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": 0.02375376783311367, |
|
"rewards/margins": 0.02338986098766327, |
|
"rewards/rejected": 0.00036390620516613126, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.6223075001427667e-07, |
|
"logits/chosen": -0.9552518725395203, |
|
"logits/rejected": -0.9494439959526062, |
|
"logps/chosen": -281.0766906738281, |
|
"logps/rejected": -289.63055419921875, |
|
"loss": 0.0863, |
|
"rewards/accuracies": 0.40625, |
|
"rewards/chosen": 0.010858943685889244, |
|
"rewards/margins": 0.015300577506422997, |
|
"rewards/rejected": -0.004441632889211178, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_logits/chosen": -0.9446871280670166, |
|
"eval_logits/rejected": -0.9120718240737915, |
|
"eval_logps/chosen": -369.4450378417969, |
|
"eval_logps/rejected": -379.5608215332031, |
|
"eval_loss": 0.038691576570272446, |
|
"eval_rewards/accuracies": 0.6054999828338623, |
|
"eval_rewards/chosen": 0.02686023712158203, |
|
"eval_rewards/margins": 0.03918563574552536, |
|
"eval_rewards/rejected": -0.01232539676129818, |
|
"eval_runtime": 539.9174, |
|
"eval_samples_per_second": 3.704, |
|
"eval_steps_per_second": 0.926, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.4300778050739317e-07, |
|
"logits/chosen": -0.9774508476257324, |
|
"logits/rejected": -0.9408830404281616, |
|
"logps/chosen": -298.72247314453125, |
|
"logps/rejected": -294.75408935546875, |
|
"loss": 0.0665, |
|
"rewards/accuracies": 0.4437499940395355, |
|
"rewards/chosen": 0.01832416281104088, |
|
"rewards/margins": 0.014284146018326283, |
|
"rewards/rejected": 0.004040017258375883, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.2496319585257183e-07, |
|
"logits/chosen": -0.9892775416374207, |
|
"logits/rejected": -0.9716461300849915, |
|
"logps/chosen": -323.1170349121094, |
|
"logps/rejected": -303.7762451171875, |
|
"loss": 0.0854, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": 0.02715294435620308, |
|
"rewards/margins": 0.03064989112317562, |
|
"rewards/rejected": -0.0034969463013112545, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.0810601749037669e-07, |
|
"logits/chosen": -0.9020091891288757, |
|
"logits/rejected": -0.9708755612373352, |
|
"logps/chosen": -289.08258056640625, |
|
"logps/rejected": -339.64703369140625, |
|
"loss": 0.0713, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": 0.023048024624586105, |
|
"rewards/margins": 0.04538671672344208, |
|
"rewards/rejected": -0.022338688373565674, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 9.244467321427585e-08, |
|
"logits/chosen": -1.0033928155899048, |
|
"logits/rejected": -1.0245609283447266, |
|
"logps/chosen": -276.730712890625, |
|
"logps/rejected": -303.7331237792969, |
|
"loss": 0.0785, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.033762019127607346, |
|
"rewards/margins": 0.05797597020864487, |
|
"rewards/rejected": -0.024213949218392372, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 7.798699295714002e-08, |
|
"logits/chosen": -0.9668358564376831, |
|
"logits/rejected": -0.9086298942565918, |
|
"logps/chosen": -320.6669006347656, |
|
"logps/rejected": -296.5704345703125, |
|
"loss": 0.0777, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.023420458659529686, |
|
"rewards/margins": 0.03309481590986252, |
|
"rewards/rejected": -0.009674356319010258, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 6.474020487664934e-08, |
|
"logits/chosen": -0.9561643600463867, |
|
"logits/rejected": -0.9422094225883484, |
|
"logps/chosen": -276.42156982421875, |
|
"logps/rejected": -291.9980163574219, |
|
"loss": 0.076, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": 0.023049993440508842, |
|
"rewards/margins": 0.038113731890916824, |
|
"rewards/rejected": -0.01506374217569828, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 5.271093174155223e-08, |
|
"logits/chosen": -1.0463026762008667, |
|
"logits/rejected": -0.9269806742668152, |
|
"logps/chosen": -271.7001037597656, |
|
"logps/rejected": -273.11419677734375, |
|
"loss": 0.0773, |
|
"rewards/accuracies": 0.45625001192092896, |
|
"rewards/chosen": 0.02771763503551483, |
|
"rewards/margins": 0.02654975652694702, |
|
"rewards/rejected": 0.0011678790906444192, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 4.190518762059587e-08, |
|
"logits/chosen": -0.9642139673233032, |
|
"logits/rejected": -0.9358107447624207, |
|
"logps/chosen": -301.32647705078125, |
|
"logps/rejected": -310.1565856933594, |
|
"loss": 0.0955, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": 0.019302543252706528, |
|
"rewards/margins": 0.034524787217378616, |
|
"rewards/rejected": -0.015222239308059216, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.232837487577384e-08, |
|
"logits/chosen": -0.9852960705757141, |
|
"logits/rejected": -0.9471799731254578, |
|
"logps/chosen": -306.68548583984375, |
|
"logps/rejected": -294.8326416015625, |
|
"loss": 0.0895, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": 0.018351960927248, |
|
"rewards/margins": 0.04016681760549545, |
|
"rewards/rejected": -0.0218148622661829, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.39852814614025e-08, |
|
"logits/chosen": -0.9789448976516724, |
|
"logits/rejected": -0.9438329935073853, |
|
"logps/chosen": -281.9150085449219, |
|
"logps/rejected": -267.4187316894531, |
|
"loss": 0.0904, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": 0.019616561010479927, |
|
"rewards/margins": 0.023770466446876526, |
|
"rewards/rejected": -0.004153906367719173, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"eval_logits/chosen": -0.9535864591598511, |
|
"eval_logits/rejected": -0.9203009009361267, |
|
"eval_logps/chosen": -369.494384765625, |
|
"eval_logps/rejected": -379.5999755859375, |
|
"eval_loss": 0.03863796219229698, |
|
"eval_rewards/accuracies": 0.6044999957084656, |
|
"eval_rewards/chosen": 0.026810916140675545, |
|
"eval_rewards/margins": 0.03917544335126877, |
|
"eval_rewards/rejected": -0.012364527210593224, |
|
"eval_runtime": 539.9867, |
|
"eval_samples_per_second": 3.704, |
|
"eval_steps_per_second": 0.926, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.6880078530367716e-08, |
|
"logits/chosen": -0.9464886784553528, |
|
"logits/rejected": -0.9203370213508606, |
|
"logps/chosen": -291.42730712890625, |
|
"logps/rejected": -302.47357177734375, |
|
"loss": 0.0968, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": 0.018558986485004425, |
|
"rewards/margins": 0.03005158342421055, |
|
"rewards/rejected": -0.011492597870528698, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.1016318348746058e-08, |
|
"logits/chosen": -0.9404022097587585, |
|
"logits/rejected": -0.9021556973457336, |
|
"logps/chosen": -266.0870666503906, |
|
"logps/rejected": -277.4389343261719, |
|
"loss": 0.1015, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": 0.021859928965568542, |
|
"rewards/margins": 0.028949573636054993, |
|
"rewards/rejected": -0.007089647464454174, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 6.396932519840693e-09, |
|
"logits/chosen": -0.9894587397575378, |
|
"logits/rejected": -1.043691873550415, |
|
"logps/chosen": -284.725830078125, |
|
"logps/rejected": -302.0658874511719, |
|
"loss": 0.0885, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": 0.01785745844244957, |
|
"rewards/margins": 0.0238940566778183, |
|
"rewards/rejected": -0.0060366010293364525, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.024230518515192e-09, |
|
"logits/chosen": -1.0197975635528564, |
|
"logits/rejected": -0.9348329305648804, |
|
"logps/chosen": -242.8181915283203, |
|
"logps/rejected": -304.18841552734375, |
|
"loss": 0.115, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": 0.027777481824159622, |
|
"rewards/margins": 0.04096692427992821, |
|
"rewards/rejected": -0.013189440593123436, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 8.998985365679669e-10, |
|
"logits/chosen": -0.918280303478241, |
|
"logits/rejected": -0.9589518308639526, |
|
"logps/chosen": -293.98236083984375, |
|
"logps/rejected": -301.8656921386719, |
|
"loss": 0.0665, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": 0.025340059772133827, |
|
"rewards/margins": 0.033479273319244385, |
|
"rewards/rejected": -0.008139212615787983, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 2.499863971494598e-11, |
|
"logits/chosen": -0.9552096128463745, |
|
"logits/rejected": -0.8985775113105774, |
|
"logps/chosen": -317.04779052734375, |
|
"logps/rejected": -310.51031494140625, |
|
"loss": 0.0696, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.025858771055936813, |
|
"rewards/margins": 0.03246783837676048, |
|
"rewards/rejected": -0.006609070114791393, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 1562, |
|
"total_flos": 0.0, |
|
"train_loss": 0.08419492204701015, |
|
"train_runtime": 22113.8812, |
|
"train_samples_per_second": 1.131, |
|
"train_steps_per_second": 0.071 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1562, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|