|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9994767137624281, |
|
"eval_steps": 100, |
|
"global_step": 955, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 5.208333333333333e-09, |
|
"logits/chosen": -1.404180884361267, |
|
"logits/rejected": -1.4915521144866943, |
|
"logps/chosen": -253.50843811035156, |
|
"logps/rejected": -228.21987915039062, |
|
"loss": 0.6931, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1, |
|
"use_label": 6.0 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 5.208333333333333e-08, |
|
"logits/chosen": -1.3619701862335205, |
|
"logits/rejected": -1.3933377265930176, |
|
"logps/chosen": -394.7999267578125, |
|
"logps/rejected": -299.5929870605469, |
|
"loss": 0.6938, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.5694444179534912, |
|
"rewards/chosen": 0.004884083289653063, |
|
"rewards/margins": 0.009783001616597176, |
|
"rewards/rejected": -0.0048989187926054, |
|
"step": 10, |
|
"use_label": 46.0 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.0416666666666667e-07, |
|
"logits/chosen": -1.3801854848861694, |
|
"logits/rejected": -1.3241381645202637, |
|
"logps/chosen": -280.59625244140625, |
|
"logps/rejected": -281.3047790527344, |
|
"loss": 0.6889, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.022532949224114418, |
|
"rewards/margins": 0.00945592112839222, |
|
"rewards/rejected": 0.013077028095722198, |
|
"step": 20, |
|
"use_label": 122.0 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.5624999999999999e-07, |
|
"logits/chosen": -1.4004117250442505, |
|
"logits/rejected": -1.3532276153564453, |
|
"logps/chosen": -354.6773376464844, |
|
"logps/rejected": -297.12548828125, |
|
"loss": 0.6742, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.09127137809991837, |
|
"rewards/margins": 0.05737986043095589, |
|
"rewards/rejected": 0.03389151394367218, |
|
"step": 30, |
|
"use_label": 202.0 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.0833333333333333e-07, |
|
"logits/chosen": -1.375112771987915, |
|
"logits/rejected": -1.3534674644470215, |
|
"logps/chosen": -342.58099365234375, |
|
"logps/rejected": -301.4981384277344, |
|
"loss": 0.6653, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.1729733645915985, |
|
"rewards/margins": 0.04751574248075485, |
|
"rewards/rejected": 0.12545761466026306, |
|
"step": 40, |
|
"use_label": 282.0 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.604166666666667e-07, |
|
"logits/chosen": -1.3950073719024658, |
|
"logits/rejected": -1.3905388116836548, |
|
"logps/chosen": -305.8765563964844, |
|
"logps/rejected": -294.03155517578125, |
|
"loss": 0.6438, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.2843405604362488, |
|
"rewards/margins": 0.14093510806560516, |
|
"rewards/rejected": 0.14340545237064362, |
|
"step": 50, |
|
"use_label": 362.0 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.1249999999999997e-07, |
|
"logits/chosen": -1.3388121128082275, |
|
"logits/rejected": -1.417626142501831, |
|
"logps/chosen": -326.7298583984375, |
|
"logps/rejected": -311.16656494140625, |
|
"loss": 0.6341, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.3555651009082794, |
|
"rewards/margins": 0.16317971050739288, |
|
"rewards/rejected": 0.19238540530204773, |
|
"step": 60, |
|
"use_label": 442.0 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.645833333333333e-07, |
|
"logits/chosen": -1.442932367324829, |
|
"logits/rejected": -1.4548231363296509, |
|
"logps/chosen": -356.62017822265625, |
|
"logps/rejected": -318.04931640625, |
|
"loss": 0.6155, |
|
"pred_label": 1.7999999523162842, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.5572141408920288, |
|
"rewards/margins": 0.2931746542453766, |
|
"rewards/rejected": 0.26403939723968506, |
|
"step": 70, |
|
"use_label": 520.2000122070312 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.1666666666666667e-07, |
|
"logits/chosen": -1.2479106187820435, |
|
"logits/rejected": -1.2282390594482422, |
|
"logps/chosen": -359.615478515625, |
|
"logps/rejected": -299.5457458496094, |
|
"loss": 0.579, |
|
"pred_label": 6.349999904632568, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.5866491198539734, |
|
"rewards/margins": 0.5094068050384521, |
|
"rewards/rejected": 0.07724229991436005, |
|
"step": 80, |
|
"use_label": 595.6500244140625 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.6874999999999996e-07, |
|
"logits/chosen": -1.2900737524032593, |
|
"logits/rejected": -1.325388789176941, |
|
"logps/chosen": -360.9581298828125, |
|
"logps/rejected": -319.90625, |
|
"loss": 0.5944, |
|
"pred_label": 13.550000190734863, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.7376635074615479, |
|
"rewards/margins": 0.3891093134880066, |
|
"rewards/rejected": 0.34855419397354126, |
|
"step": 90, |
|
"use_label": 668.4500122070312 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.976717112922002e-07, |
|
"logits/chosen": -1.3841612339019775, |
|
"logits/rejected": -1.4763787984848022, |
|
"logps/chosen": -366.4154052734375, |
|
"logps/rejected": -358.1784362792969, |
|
"loss": 0.5918, |
|
"pred_label": 19.350000381469727, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.8709436655044556, |
|
"rewards/margins": 0.47374382615089417, |
|
"rewards/rejected": 0.39719995856285095, |
|
"step": 100, |
|
"use_label": 742.6500244140625 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.918509895227007e-07, |
|
"logits/chosen": -1.373608946800232, |
|
"logits/rejected": -1.4505846500396729, |
|
"logps/chosen": -306.3442077636719, |
|
"logps/rejected": -289.1839904785156, |
|
"loss": 0.5705, |
|
"pred_label": 29.049999237060547, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.7434765100479126, |
|
"rewards/margins": 0.5097277760505676, |
|
"rewards/rejected": 0.23374874889850616, |
|
"step": 110, |
|
"use_label": 812.9500122070312 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.860302677532014e-07, |
|
"logits/chosen": -1.3047031164169312, |
|
"logits/rejected": -1.317251205444336, |
|
"logps/chosen": -324.14703369140625, |
|
"logps/rejected": -270.2970275878906, |
|
"loss": 0.5547, |
|
"pred_label": 40.0, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.7135593891143799, |
|
"rewards/margins": 0.6617862582206726, |
|
"rewards/rejected": 0.05177304893732071, |
|
"step": 120, |
|
"use_label": 882.0 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.802095459837019e-07, |
|
"logits/chosen": -1.421308159828186, |
|
"logits/rejected": -1.3982212543487549, |
|
"logps/chosen": -381.67926025390625, |
|
"logps/rejected": -307.33258056640625, |
|
"loss": 0.5737, |
|
"pred_label": 47.150001525878906, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 1.0430835485458374, |
|
"rewards/margins": 0.5517104268074036, |
|
"rewards/rejected": 0.49137312173843384, |
|
"step": 130, |
|
"use_label": 954.8499755859375 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.743888242142026e-07, |
|
"logits/chosen": -1.477430820465088, |
|
"logits/rejected": -1.4979420900344849, |
|
"logps/chosen": -278.7945861816406, |
|
"logps/rejected": -253.20358276367188, |
|
"loss": 0.5539, |
|
"pred_label": 52.849998474121094, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.726542055606842, |
|
"rewards/margins": 0.5279777646064758, |
|
"rewards/rejected": 0.19856423139572144, |
|
"step": 140, |
|
"use_label": 1029.1500244140625 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.685681024447031e-07, |
|
"logits/chosen": -1.4019322395324707, |
|
"logits/rejected": -1.4297659397125244, |
|
"logps/chosen": -336.5803527832031, |
|
"logps/rejected": -333.4914245605469, |
|
"loss": 0.5609, |
|
"pred_label": 60.45000076293945, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.8561170697212219, |
|
"rewards/margins": 0.3941256105899811, |
|
"rewards/rejected": 0.46199145913124084, |
|
"step": 150, |
|
"use_label": 1101.550048828125 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.627473806752037e-07, |
|
"logits/chosen": -1.3426318168640137, |
|
"logits/rejected": -1.3782684803009033, |
|
"logps/chosen": -347.442626953125, |
|
"logps/rejected": -337.1346130371094, |
|
"loss": 0.5358, |
|
"pred_label": 71.05000305175781, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 1.0326350927352905, |
|
"rewards/margins": 0.5863274335861206, |
|
"rewards/rejected": 0.4463076591491699, |
|
"step": 160, |
|
"use_label": 1170.949951171875 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.5692665890570433e-07, |
|
"logits/chosen": -1.3970049619674683, |
|
"logits/rejected": -1.4572138786315918, |
|
"logps/chosen": -335.081787109375, |
|
"logps/rejected": -267.0716552734375, |
|
"loss": 0.5303, |
|
"pred_label": 80.0, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.8701731562614441, |
|
"rewards/margins": 0.6228026151657104, |
|
"rewards/rejected": 0.24737051129341125, |
|
"step": 170, |
|
"use_label": 1242.0 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.5110593713620486e-07, |
|
"logits/chosen": -1.3490818738937378, |
|
"logits/rejected": -1.3612277507781982, |
|
"logps/chosen": -265.89105224609375, |
|
"logps/rejected": -296.7646484375, |
|
"loss": 0.5288, |
|
"pred_label": 92.75, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.7616567611694336, |
|
"rewards/margins": 0.6177178025245667, |
|
"rewards/rejected": 0.14393898844718933, |
|
"step": 180, |
|
"use_label": 1309.25 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.4528521536670544e-07, |
|
"logits/chosen": -1.4309965372085571, |
|
"logits/rejected": -1.4547052383422852, |
|
"logps/chosen": -323.339111328125, |
|
"logps/rejected": -273.64691162109375, |
|
"loss": 0.5208, |
|
"pred_label": 106.19999694824219, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 1.0953993797302246, |
|
"rewards/margins": 0.8105791211128235, |
|
"rewards/rejected": 0.2848203480243683, |
|
"step": 190, |
|
"use_label": 1375.800048828125 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.3946449359720607e-07, |
|
"logits/chosen": -1.4408454895019531, |
|
"logits/rejected": -1.4733045101165771, |
|
"logps/chosen": -323.10467529296875, |
|
"logps/rejected": -352.9010009765625, |
|
"loss": 0.4934, |
|
"pred_label": 116.3499984741211, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 1.2384220361709595, |
|
"rewards/margins": 0.7947490811347961, |
|
"rewards/rejected": 0.4436728358268738, |
|
"step": 200, |
|
"use_label": 1445.6500244140625 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.336437718277066e-07, |
|
"logits/chosen": -1.373170256614685, |
|
"logits/rejected": -1.3570789098739624, |
|
"logps/chosen": -382.3429870605469, |
|
"logps/rejected": -266.7330627441406, |
|
"loss": 0.5348, |
|
"pred_label": 131.64999389648438, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 1.3385562896728516, |
|
"rewards/margins": 1.1700799465179443, |
|
"rewards/rejected": 0.16847634315490723, |
|
"step": 210, |
|
"use_label": 1510.3499755859375 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.278230500582072e-07, |
|
"logits/chosen": -1.3580390214920044, |
|
"logits/rejected": -1.386907935142517, |
|
"logps/chosen": -335.6164245605469, |
|
"logps/rejected": -291.1451416015625, |
|
"loss": 0.5062, |
|
"pred_label": 152.14999389648438, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 1.2729132175445557, |
|
"rewards/margins": 1.0367872714996338, |
|
"rewards/rejected": 0.23612599074840546, |
|
"step": 220, |
|
"use_label": 1569.8499755859375 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.220023282887078e-07, |
|
"logits/chosen": -1.415291428565979, |
|
"logits/rejected": -1.4271855354309082, |
|
"logps/chosen": -337.0808410644531, |
|
"logps/rejected": -293.56695556640625, |
|
"loss": 0.4849, |
|
"pred_label": 175.89999389648438, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 1.054723858833313, |
|
"rewards/margins": 0.9728155136108398, |
|
"rewards/rejected": 0.08190834522247314, |
|
"step": 230, |
|
"use_label": 1626.0999755859375 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.1618160651920834e-07, |
|
"logits/chosen": -1.456987738609314, |
|
"logits/rejected": -1.487733244895935, |
|
"logps/chosen": -301.118896484375, |
|
"logps/rejected": -239.19088745117188, |
|
"loss": 0.5087, |
|
"pred_label": 191.25, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 1.0468814373016357, |
|
"rewards/margins": 0.7403112649917603, |
|
"rewards/rejected": 0.30657026171684265, |
|
"step": 240, |
|
"use_label": 1690.75 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.103608847497089e-07, |
|
"logits/chosen": -1.3532272577285767, |
|
"logits/rejected": -1.3804259300231934, |
|
"logps/chosen": -317.42230224609375, |
|
"logps/rejected": -329.9377136230469, |
|
"loss": 0.5253, |
|
"pred_label": 205.25, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 1.2708420753479004, |
|
"rewards/margins": 0.9137954711914062, |
|
"rewards/rejected": 0.3570464253425598, |
|
"step": 250, |
|
"use_label": 1756.75 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.0454016298020956e-07, |
|
"logits/chosen": -1.3812406063079834, |
|
"logits/rejected": -1.4057163000106812, |
|
"logps/chosen": -331.4401550292969, |
|
"logps/rejected": -308.66558837890625, |
|
"loss": 0.5155, |
|
"pred_label": 220.64999389648438, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.9700489044189453, |
|
"rewards/margins": 0.6729909181594849, |
|
"rewards/rejected": 0.29705801606178284, |
|
"step": 260, |
|
"use_label": 1821.3499755859375 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 3.987194412107101e-07, |
|
"logits/chosen": -1.4664005041122437, |
|
"logits/rejected": -1.4549050331115723, |
|
"logps/chosen": -349.93994140625, |
|
"logps/rejected": -278.48590087890625, |
|
"loss": 0.4778, |
|
"pred_label": 240.25, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 1.315645456314087, |
|
"rewards/margins": 1.0870709419250488, |
|
"rewards/rejected": 0.22857451438903809, |
|
"step": 270, |
|
"use_label": 1881.75 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 3.9289871944121066e-07, |
|
"logits/chosen": -1.4699549674987793, |
|
"logits/rejected": -1.417506456375122, |
|
"logps/chosen": -373.6863708496094, |
|
"logps/rejected": -331.5750427246094, |
|
"loss": 0.4764, |
|
"pred_label": 259.95001220703125, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 1.3312596082687378, |
|
"rewards/margins": 0.79558265209198, |
|
"rewards/rejected": 0.5356770753860474, |
|
"step": 280, |
|
"use_label": 1942.050048828125 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 3.870779976717113e-07, |
|
"logits/chosen": -1.4723706245422363, |
|
"logits/rejected": -1.4665457010269165, |
|
"logps/chosen": -311.8465881347656, |
|
"logps/rejected": -274.10888671875, |
|
"loss": 0.4979, |
|
"pred_label": 278.3500061035156, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 1.0801382064819336, |
|
"rewards/margins": 0.6504096388816833, |
|
"rewards/rejected": 0.42972850799560547, |
|
"step": 290, |
|
"use_label": 2003.6500244140625 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 3.812572759022118e-07, |
|
"logits/chosen": -1.4164968729019165, |
|
"logits/rejected": -1.4505560398101807, |
|
"logps/chosen": -274.9853210449219, |
|
"logps/rejected": -272.59814453125, |
|
"loss": 0.4576, |
|
"pred_label": 297.5, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 1.4018886089324951, |
|
"rewards/margins": 1.1822335720062256, |
|
"rewards/rejected": 0.2196551263332367, |
|
"step": 300, |
|
"use_label": 2064.5 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 3.754365541327124e-07, |
|
"logits/chosen": -1.3352575302124023, |
|
"logits/rejected": -1.4178216457366943, |
|
"logps/chosen": -303.4643249511719, |
|
"logps/rejected": -325.78961181640625, |
|
"loss": 0.4554, |
|
"pred_label": 323.8999938964844, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 1.0443353652954102, |
|
"rewards/margins": 1.2372277975082397, |
|
"rewards/rejected": -0.19289246201515198, |
|
"step": 310, |
|
"use_label": 2118.10009765625 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 3.6961583236321304e-07, |
|
"logits/chosen": -1.4550950527191162, |
|
"logits/rejected": -1.436576247215271, |
|
"logps/chosen": -340.7930908203125, |
|
"logps/rejected": -323.78985595703125, |
|
"loss": 0.4729, |
|
"pred_label": 351.29998779296875, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 1.1371341943740845, |
|
"rewards/margins": 0.8774749636650085, |
|
"rewards/rejected": 0.259659081697464, |
|
"step": 320, |
|
"use_label": 2170.699951171875 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 3.637951105937136e-07, |
|
"logits/chosen": -1.428165078163147, |
|
"logits/rejected": -1.4091517925262451, |
|
"logps/chosen": -317.4041748046875, |
|
"logps/rejected": -238.2615509033203, |
|
"loss": 0.4642, |
|
"pred_label": 379.04998779296875, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 1.1726207733154297, |
|
"rewards/margins": 1.0814019441604614, |
|
"rewards/rejected": 0.0912187248468399, |
|
"step": 330, |
|
"use_label": 2222.949951171875 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 3.579743888242142e-07, |
|
"logits/chosen": -1.4679944515228271, |
|
"logits/rejected": -1.5165075063705444, |
|
"logps/chosen": -273.54071044921875, |
|
"logps/rejected": -243.1127471923828, |
|
"loss": 0.4859, |
|
"pred_label": 401.95001220703125, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 1.1446250677108765, |
|
"rewards/margins": 0.7705689668655396, |
|
"rewards/rejected": 0.3740561306476593, |
|
"step": 340, |
|
"use_label": 2280.050048828125 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.521536670547148e-07, |
|
"logits/chosen": -1.2454713582992554, |
|
"logits/rejected": -1.321508765220642, |
|
"logps/chosen": -286.65777587890625, |
|
"logps/rejected": -306.366455078125, |
|
"loss": 0.4577, |
|
"pred_label": 428.45001220703125, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 1.181875467300415, |
|
"rewards/margins": 1.0571717023849487, |
|
"rewards/rejected": 0.12470376491546631, |
|
"step": 350, |
|
"use_label": 2333.550048828125 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.4633294528521536e-07, |
|
"logits/chosen": -1.4310450553894043, |
|
"logits/rejected": -1.4215561151504517, |
|
"logps/chosen": -347.1667785644531, |
|
"logps/rejected": -319.175537109375, |
|
"loss": 0.462, |
|
"pred_label": 454.75, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 1.2432093620300293, |
|
"rewards/margins": 0.7565422058105469, |
|
"rewards/rejected": 0.4866672456264496, |
|
"step": 360, |
|
"use_label": 2387.25 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.4051222351571594e-07, |
|
"logits/chosen": -1.3713960647583008, |
|
"logits/rejected": -1.4076581001281738, |
|
"logps/chosen": -289.0054626464844, |
|
"logps/rejected": -323.2015686035156, |
|
"loss": 0.4783, |
|
"pred_label": 474.5, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.9605358839035034, |
|
"rewards/margins": 0.8715503811836243, |
|
"rewards/rejected": 0.08898548781871796, |
|
"step": 370, |
|
"use_label": 2447.5 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.346915017462165e-07, |
|
"logits/chosen": -1.3850533962249756, |
|
"logits/rejected": -1.4161368608474731, |
|
"logps/chosen": -363.1261291503906, |
|
"logps/rejected": -332.7879943847656, |
|
"loss": 0.4392, |
|
"pred_label": 492.8999938964844, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 1.097650408744812, |
|
"rewards/margins": 0.8471637964248657, |
|
"rewards/rejected": 0.2504865527153015, |
|
"step": 380, |
|
"use_label": 2509.10009765625 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.288707799767171e-07, |
|
"logits/chosen": -1.389650821685791, |
|
"logits/rejected": -1.339377760887146, |
|
"logps/chosen": -286.0871276855469, |
|
"logps/rejected": -249.79629516601562, |
|
"loss": 0.4179, |
|
"pred_label": 517.4000244140625, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": 1.0542852878570557, |
|
"rewards/margins": 1.5020934343338013, |
|
"rewards/rejected": -0.44780832529067993, |
|
"step": 390, |
|
"use_label": 2564.60009765625 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.230500582072177e-07, |
|
"logits/chosen": -1.4820091724395752, |
|
"logits/rejected": -1.4391555786132812, |
|
"logps/chosen": -380.75665283203125, |
|
"logps/rejected": -305.62017822265625, |
|
"loss": 0.4127, |
|
"pred_label": 540.4500122070312, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 1.1569273471832275, |
|
"rewards/margins": 1.0463058948516846, |
|
"rewards/rejected": 0.11062141507863998, |
|
"step": 400, |
|
"use_label": 2621.550048828125 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.1722933643771827e-07, |
|
"logits/chosen": -1.3879801034927368, |
|
"logits/rejected": -1.4610755443572998, |
|
"logps/chosen": -391.5262756347656, |
|
"logps/rejected": -361.547607421875, |
|
"loss": 0.4672, |
|
"pred_label": 569.0499877929688, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 1.5551117658615112, |
|
"rewards/margins": 1.2884727716445923, |
|
"rewards/rejected": 0.2666390836238861, |
|
"step": 410, |
|
"use_label": 2672.949951171875 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.1140861466821885e-07, |
|
"logits/chosen": -1.3801909685134888, |
|
"logits/rejected": -1.4261410236358643, |
|
"logps/chosen": -263.9217224121094, |
|
"logps/rejected": -260.2132568359375, |
|
"loss": 0.4391, |
|
"pred_label": 595.8499755859375, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.9185078740119934, |
|
"rewards/margins": 1.1499378681182861, |
|
"rewards/rejected": -0.2314300537109375, |
|
"step": 420, |
|
"use_label": 2726.14990234375 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.0558789289871943e-07, |
|
"logits/chosen": -1.4430896043777466, |
|
"logits/rejected": -1.4777699708938599, |
|
"logps/chosen": -270.7845458984375, |
|
"logps/rejected": -265.51568603515625, |
|
"loss": 0.4777, |
|
"pred_label": 626.7999877929688, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 1.1179291009902954, |
|
"rewards/margins": 1.0040278434753418, |
|
"rewards/rejected": 0.1139012798666954, |
|
"step": 430, |
|
"use_label": 2775.199951171875 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 2.9976717112922e-07, |
|
"logits/chosen": -1.4349843263626099, |
|
"logits/rejected": -1.4729284048080444, |
|
"logps/chosen": -311.2239990234375, |
|
"logps/rejected": -299.1993103027344, |
|
"loss": 0.4809, |
|
"pred_label": 650.2999877929688, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 1.2415423393249512, |
|
"rewards/margins": 0.8923840522766113, |
|
"rewards/rejected": 0.34915822744369507, |
|
"step": 440, |
|
"use_label": 2831.699951171875 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.939464493597206e-07, |
|
"logits/chosen": -1.2597442865371704, |
|
"logits/rejected": -1.30291748046875, |
|
"logps/chosen": -256.5679626464844, |
|
"logps/rejected": -269.8310852050781, |
|
"loss": 0.4691, |
|
"pred_label": 675.0, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 1.0971431732177734, |
|
"rewards/margins": 0.8100937008857727, |
|
"rewards/rejected": 0.2870495319366455, |
|
"step": 450, |
|
"use_label": 2887.0 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.8812572759022117e-07, |
|
"logits/chosen": -1.4251500368118286, |
|
"logits/rejected": -1.4686391353607178, |
|
"logps/chosen": -337.9930725097656, |
|
"logps/rejected": -332.3681945800781, |
|
"loss": 0.4543, |
|
"pred_label": 701.6500244140625, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 1.3275657892227173, |
|
"rewards/margins": 1.003480076789856, |
|
"rewards/rejected": 0.324085533618927, |
|
"step": 460, |
|
"use_label": 2940.35009765625 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.8230500582072175e-07, |
|
"logits/chosen": -1.353682041168213, |
|
"logits/rejected": -1.3279917240142822, |
|
"logps/chosen": -335.19329833984375, |
|
"logps/rejected": -268.65606689453125, |
|
"loss": 0.4462, |
|
"pred_label": 728.75, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 1.3157131671905518, |
|
"rewards/margins": 1.3176007270812988, |
|
"rewards/rejected": -0.0018875360256060958, |
|
"step": 470, |
|
"use_label": 2993.25 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.7648428405122233e-07, |
|
"logits/chosen": -1.3808891773223877, |
|
"logits/rejected": -1.3530418872833252, |
|
"logps/chosen": -266.064453125, |
|
"logps/rejected": -263.7291564941406, |
|
"loss": 0.4507, |
|
"pred_label": 754.5, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.7782207727432251, |
|
"rewards/margins": 0.7220892906188965, |
|
"rewards/rejected": 0.05613153427839279, |
|
"step": 480, |
|
"use_label": 3047.5 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.706635622817229e-07, |
|
"logits/chosen": -1.3707979917526245, |
|
"logits/rejected": -1.399265170097351, |
|
"logps/chosen": -318.2115783691406, |
|
"logps/rejected": -310.44512939453125, |
|
"loss": 0.4379, |
|
"pred_label": 780.5999755859375, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 1.2304394245147705, |
|
"rewards/margins": 1.0040814876556396, |
|
"rewards/rejected": 0.22635769844055176, |
|
"step": 490, |
|
"use_label": 3101.39990234375 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.648428405122235e-07, |
|
"logits/chosen": -1.3638988733291626, |
|
"logits/rejected": -1.3315374851226807, |
|
"logps/chosen": -320.8160400390625, |
|
"logps/rejected": -311.55328369140625, |
|
"loss": 0.4435, |
|
"pred_label": 812.4000244140625, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 1.2452471256256104, |
|
"rewards/margins": 1.6832224130630493, |
|
"rewards/rejected": -0.4379752576351166, |
|
"step": 500, |
|
"use_label": 3149.60009765625 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.590221187427241e-07, |
|
"logits/chosen": -1.470384120941162, |
|
"logits/rejected": -1.5311263799667358, |
|
"logps/chosen": -307.1033935546875, |
|
"logps/rejected": -274.86114501953125, |
|
"loss": 0.4278, |
|
"pred_label": 838.0499877929688, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.8434027433395386, |
|
"rewards/margins": 1.0462173223495483, |
|
"rewards/rejected": -0.20281438529491425, |
|
"step": 510, |
|
"use_label": 3203.949951171875 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.5320139697322466e-07, |
|
"logits/chosen": -1.3783634901046753, |
|
"logits/rejected": -1.3840962648391724, |
|
"logps/chosen": -316.1509704589844, |
|
"logps/rejected": -297.71527099609375, |
|
"loss": 0.4795, |
|
"pred_label": 861.8499755859375, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 1.3357948064804077, |
|
"rewards/margins": 1.1968021392822266, |
|
"rewards/rejected": 0.13899262249469757, |
|
"step": 520, |
|
"use_label": 3260.14990234375 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.4738067520372524e-07, |
|
"logits/chosen": -1.3981616497039795, |
|
"logits/rejected": -1.3489913940429688, |
|
"logps/chosen": -341.50408935546875, |
|
"logps/rejected": -313.69134521484375, |
|
"loss": 0.4405, |
|
"pred_label": 883.5499877929688, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 1.3740103244781494, |
|
"rewards/margins": 1.0623902082443237, |
|
"rewards/rejected": 0.31162017583847046, |
|
"step": 530, |
|
"use_label": 3318.449951171875 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.415599534342258e-07, |
|
"logits/chosen": -1.3855282068252563, |
|
"logits/rejected": -1.3808072805404663, |
|
"logps/chosen": -341.39154052734375, |
|
"logps/rejected": -329.89276123046875, |
|
"loss": 0.4216, |
|
"pred_label": 915.0, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 1.546844482421875, |
|
"rewards/margins": 1.5073612928390503, |
|
"rewards/rejected": 0.039483144879341125, |
|
"step": 540, |
|
"use_label": 3367.0 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.3573923166472642e-07, |
|
"logits/chosen": -1.411578893661499, |
|
"logits/rejected": -1.4331611394882202, |
|
"logps/chosen": -319.5863952636719, |
|
"logps/rejected": -259.39813232421875, |
|
"loss": 0.4388, |
|
"pred_label": 943.7999877929688, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 1.2579312324523926, |
|
"rewards/margins": 0.9186943769454956, |
|
"rewards/rejected": 0.3392367959022522, |
|
"step": 550, |
|
"use_label": 3418.199951171875 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.2991850989522698e-07, |
|
"logits/chosen": -1.4499213695526123, |
|
"logits/rejected": -1.4626659154891968, |
|
"logps/chosen": -320.32757568359375, |
|
"logps/rejected": -273.2851867675781, |
|
"loss": 0.4447, |
|
"pred_label": 967.6500244140625, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 1.5723648071289062, |
|
"rewards/margins": 1.1994202136993408, |
|
"rewards/rejected": 0.37294459342956543, |
|
"step": 560, |
|
"use_label": 3474.35009765625 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.2409778812572759e-07, |
|
"logits/chosen": -1.4085966348648071, |
|
"logits/rejected": -1.4106355905532837, |
|
"logps/chosen": -275.1546325683594, |
|
"logps/rejected": -290.6806640625, |
|
"loss": 0.452, |
|
"pred_label": 994.75, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 1.4340472221374512, |
|
"rewards/margins": 1.0139697790145874, |
|
"rewards/rejected": 0.42007726430892944, |
|
"step": 570, |
|
"use_label": 3527.25 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 2.1827706635622817e-07, |
|
"logits/chosen": -1.4144864082336426, |
|
"logits/rejected": -1.3897688388824463, |
|
"logps/chosen": -291.3064880371094, |
|
"logps/rejected": -258.45001220703125, |
|
"loss": 0.4618, |
|
"pred_label": 1023.5499877929688, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 1.6620935201644897, |
|
"rewards/margins": 1.2861305475234985, |
|
"rewards/rejected": 0.375963032245636, |
|
"step": 580, |
|
"use_label": 3578.449951171875 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 2.1245634458672875e-07, |
|
"logits/chosen": -1.3765242099761963, |
|
"logits/rejected": -1.36930251121521, |
|
"logps/chosen": -277.45513916015625, |
|
"logps/rejected": -277.4739685058594, |
|
"loss": 0.4351, |
|
"pred_label": 1045.0, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.9612810015678406, |
|
"rewards/margins": 0.7380627393722534, |
|
"rewards/rejected": 0.22321827709674835, |
|
"step": 590, |
|
"use_label": 3637.0 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 2.0663562281722933e-07, |
|
"logits/chosen": -1.3574728965759277, |
|
"logits/rejected": -1.3883976936340332, |
|
"logps/chosen": -383.3297424316406, |
|
"logps/rejected": -419.20562744140625, |
|
"loss": 0.4241, |
|
"pred_label": 1069.0999755859375, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 1.8888603448867798, |
|
"rewards/margins": 1.7262849807739258, |
|
"rewards/rejected": 0.16257524490356445, |
|
"step": 600, |
|
"use_label": 3692.89990234375 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 2.008149010477299e-07, |
|
"logits/chosen": -1.4299789667129517, |
|
"logits/rejected": -1.4499460458755493, |
|
"logps/chosen": -298.21856689453125, |
|
"logps/rejected": -250.89303588867188, |
|
"loss": 0.3996, |
|
"pred_label": 1102.800048828125, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 1.500954508781433, |
|
"rewards/margins": 1.2322769165039062, |
|
"rewards/rejected": 0.2686777114868164, |
|
"step": 610, |
|
"use_label": 3739.199951171875 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.949941792782305e-07, |
|
"logits/chosen": -1.409200668334961, |
|
"logits/rejected": -1.4022369384765625, |
|
"logps/chosen": -324.9916076660156, |
|
"logps/rejected": -291.18145751953125, |
|
"loss": 0.3997, |
|
"pred_label": 1130.0999755859375, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": 1.2097212076187134, |
|
"rewards/margins": 1.1757129430770874, |
|
"rewards/rejected": 0.03400828689336777, |
|
"step": 620, |
|
"use_label": 3791.89990234375 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.8917345750873107e-07, |
|
"logits/chosen": -1.4302200078964233, |
|
"logits/rejected": -1.3909845352172852, |
|
"logps/chosen": -304.67205810546875, |
|
"logps/rejected": -302.0281066894531, |
|
"loss": 0.435, |
|
"pred_label": 1159.6500244140625, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 1.0896503925323486, |
|
"rewards/margins": 1.4318714141845703, |
|
"rewards/rejected": -0.34222090244293213, |
|
"step": 630, |
|
"use_label": 3842.35009765625 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.8335273573923165e-07, |
|
"logits/chosen": -1.3809505701065063, |
|
"logits/rejected": -1.40725576877594, |
|
"logps/chosen": -292.699951171875, |
|
"logps/rejected": -259.057861328125, |
|
"loss": 0.412, |
|
"pred_label": 1190.0999755859375, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 1.1348669528961182, |
|
"rewards/margins": 1.3778162002563477, |
|
"rewards/rejected": -0.2429492473602295, |
|
"step": 640, |
|
"use_label": 3891.89990234375 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.7753201396973226e-07, |
|
"logits/chosen": -1.4333328008651733, |
|
"logits/rejected": -1.4184446334838867, |
|
"logps/chosen": -347.40069580078125, |
|
"logps/rejected": -302.5298156738281, |
|
"loss": 0.424, |
|
"pred_label": 1222.949951171875, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 1.7120205163955688, |
|
"rewards/margins": 1.4448004961013794, |
|
"rewards/rejected": 0.26721999049186707, |
|
"step": 650, |
|
"use_label": 3939.050048828125 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.7171129220023281e-07, |
|
"logits/chosen": -1.441620945930481, |
|
"logits/rejected": -1.4552855491638184, |
|
"logps/chosen": -370.9482727050781, |
|
"logps/rejected": -276.6924133300781, |
|
"loss": 0.4167, |
|
"pred_label": 1254.8499755859375, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 1.308371901512146, |
|
"rewards/margins": 1.105975866317749, |
|
"rewards/rejected": 0.20239587128162384, |
|
"step": 660, |
|
"use_label": 3987.14990234375 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.658905704307334e-07, |
|
"logits/chosen": -1.3389856815338135, |
|
"logits/rejected": -1.4132729768753052, |
|
"logps/chosen": -337.841796875, |
|
"logps/rejected": -343.3721923828125, |
|
"loss": 0.4335, |
|
"pred_label": 1284.449951171875, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 1.2429578304290771, |
|
"rewards/margins": 1.373488187789917, |
|
"rewards/rejected": -0.13053035736083984, |
|
"step": 670, |
|
"use_label": 4037.550048828125 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.60069848661234e-07, |
|
"logits/chosen": -1.4116053581237793, |
|
"logits/rejected": -1.3834308385849, |
|
"logps/chosen": -286.65435791015625, |
|
"logps/rejected": -264.6783752441406, |
|
"loss": 0.4426, |
|
"pred_label": 1311.800048828125, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 1.0291013717651367, |
|
"rewards/margins": 0.6306339502334595, |
|
"rewards/rejected": 0.398467481136322, |
|
"step": 680, |
|
"use_label": 4090.199951171875 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.5424912689173456e-07, |
|
"logits/chosen": -1.3959333896636963, |
|
"logits/rejected": -1.3411847352981567, |
|
"logps/chosen": -257.6075744628906, |
|
"logps/rejected": -232.0232391357422, |
|
"loss": 0.4287, |
|
"pred_label": 1343.199951171875, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.9776744842529297, |
|
"rewards/margins": 1.2699415683746338, |
|
"rewards/rejected": -0.2922670245170593, |
|
"step": 690, |
|
"use_label": 4138.7998046875 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.4842840512223514e-07, |
|
"logits/chosen": -1.339285135269165, |
|
"logits/rejected": -1.3700721263885498, |
|
"logps/chosen": -287.5785217285156, |
|
"logps/rejected": -265.7539978027344, |
|
"loss": 0.4009, |
|
"pred_label": 1377.449951171875, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.8038662672042847, |
|
"rewards/margins": 1.3565984964370728, |
|
"rewards/rejected": -0.552731990814209, |
|
"step": 700, |
|
"use_label": 4184.5498046875 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.4260768335273574e-07, |
|
"logits/chosen": -1.3346189260482788, |
|
"logits/rejected": -1.3581318855285645, |
|
"logps/chosen": -307.48382568359375, |
|
"logps/rejected": -266.8608703613281, |
|
"loss": 0.4291, |
|
"pred_label": 1405.949951171875, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 1.1296558380126953, |
|
"rewards/margins": 1.2326912879943848, |
|
"rewards/rejected": -0.1030355915427208, |
|
"step": 710, |
|
"use_label": 4236.0498046875 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.3678696158323632e-07, |
|
"logits/chosen": -1.4102370738983154, |
|
"logits/rejected": -1.434556484222412, |
|
"logps/chosen": -361.06298828125, |
|
"logps/rejected": -317.8431091308594, |
|
"loss": 0.4146, |
|
"pred_label": 1440.3499755859375, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 1.1569219827651978, |
|
"rewards/margins": 1.3604518175125122, |
|
"rewards/rejected": -0.2035297155380249, |
|
"step": 720, |
|
"use_label": 4281.64990234375 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.3096623981373688e-07, |
|
"logits/chosen": -1.4674919843673706, |
|
"logits/rejected": -1.4597179889678955, |
|
"logps/chosen": -350.6000671386719, |
|
"logps/rejected": -272.87091064453125, |
|
"loss": 0.4332, |
|
"pred_label": 1467.0999755859375, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 1.227412462234497, |
|
"rewards/margins": 1.0035221576690674, |
|
"rewards/rejected": 0.2238902598619461, |
|
"step": 730, |
|
"use_label": 4334.89990234375 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.2514551804423749e-07, |
|
"logits/chosen": -1.3954675197601318, |
|
"logits/rejected": -1.3886655569076538, |
|
"logps/chosen": -387.49664306640625, |
|
"logps/rejected": -334.8097839355469, |
|
"loss": 0.4182, |
|
"pred_label": 1499.800048828125, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 1.6025422811508179, |
|
"rewards/margins": 1.8493826389312744, |
|
"rewards/rejected": -0.24684014916419983, |
|
"step": 740, |
|
"use_label": 4382.2001953125 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.1932479627473807e-07, |
|
"logits/chosen": -1.3316097259521484, |
|
"logits/rejected": -1.354921579360962, |
|
"logps/chosen": -326.4532165527344, |
|
"logps/rejected": -342.7228698730469, |
|
"loss": 0.4293, |
|
"pred_label": 1531.550048828125, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 1.2817234992980957, |
|
"rewards/margins": 1.2376317977905273, |
|
"rewards/rejected": 0.04409169405698776, |
|
"step": 750, |
|
"use_label": 4430.4501953125 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.1350407450523865e-07, |
|
"logits/chosen": -1.4075146913528442, |
|
"logits/rejected": -1.379639983177185, |
|
"logps/chosen": -321.2019348144531, |
|
"logps/rejected": -295.81097412109375, |
|
"loss": 0.3994, |
|
"pred_label": 1558.199951171875, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 1.4901134967803955, |
|
"rewards/margins": 1.1105048656463623, |
|
"rewards/rejected": 0.3796086311340332, |
|
"step": 760, |
|
"use_label": 4483.7998046875 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.0768335273573923e-07, |
|
"logits/chosen": -1.42855703830719, |
|
"logits/rejected": -1.4395478963851929, |
|
"logps/chosen": -401.9483337402344, |
|
"logps/rejected": -382.68719482421875, |
|
"loss": 0.423, |
|
"pred_label": 1587.8499755859375, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 1.3799264430999756, |
|
"rewards/margins": 1.2153711318969727, |
|
"rewards/rejected": 0.16455543041229248, |
|
"step": 770, |
|
"use_label": 4534.14990234375 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.0186263096623981e-07, |
|
"logits/chosen": -1.470218539237976, |
|
"logits/rejected": -1.4779566526412964, |
|
"logps/chosen": -231.970458984375, |
|
"logps/rejected": -231.0379638671875, |
|
"loss": 0.4331, |
|
"pred_label": 1611.0999755859375, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 1.0060170888900757, |
|
"rewards/margins": 0.8901361227035522, |
|
"rewards/rejected": 0.11588089168071747, |
|
"step": 780, |
|
"use_label": 4590.89990234375 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 9.604190919674039e-08, |
|
"logits/chosen": -1.3265211582183838, |
|
"logits/rejected": -1.3321948051452637, |
|
"logps/chosen": -322.144287109375, |
|
"logps/rejected": -319.94805908203125, |
|
"loss": 0.427, |
|
"pred_label": 1633.0999755859375, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.9607486724853516, |
|
"rewards/margins": 0.85713130235672, |
|
"rewards/rejected": 0.10361733287572861, |
|
"step": 790, |
|
"use_label": 4648.89990234375 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 9.022118742724097e-08, |
|
"logits/chosen": -1.2604429721832275, |
|
"logits/rejected": -1.2056382894515991, |
|
"logps/chosen": -399.75701904296875, |
|
"logps/rejected": -295.15570068359375, |
|
"loss": 0.4324, |
|
"pred_label": 1658.9000244140625, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 1.0637061595916748, |
|
"rewards/margins": 1.153640866279602, |
|
"rewards/rejected": -0.08993469178676605, |
|
"step": 800, |
|
"use_label": 4703.10009765625 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 8.440046565774157e-08, |
|
"logits/chosen": -1.4043221473693848, |
|
"logits/rejected": -1.393165111541748, |
|
"logps/chosen": -328.1530456542969, |
|
"logps/rejected": -278.7724914550781, |
|
"loss": 0.4131, |
|
"pred_label": 1681.800048828125, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.7224707007408142, |
|
"rewards/margins": 0.7582930326461792, |
|
"rewards/rejected": -0.035822317004203796, |
|
"step": 810, |
|
"use_label": 4760.2001953125 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 7.857974388824213e-08, |
|
"logits/chosen": -1.4293477535247803, |
|
"logits/rejected": -1.3876526355743408, |
|
"logps/chosen": -344.793212890625, |
|
"logps/rejected": -273.80633544921875, |
|
"loss": 0.4219, |
|
"pred_label": 1706.699951171875, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 1.0625591278076172, |
|
"rewards/margins": 1.4430253505706787, |
|
"rewards/rejected": -0.38046637177467346, |
|
"step": 820, |
|
"use_label": 4815.2998046875 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 7.275902211874273e-08, |
|
"logits/chosen": -1.3367773294448853, |
|
"logits/rejected": -1.3325202465057373, |
|
"logps/chosen": -288.30853271484375, |
|
"logps/rejected": -340.27947998046875, |
|
"loss": 0.4016, |
|
"pred_label": 1736.0, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.9091051816940308, |
|
"rewards/margins": 1.0785413980484009, |
|
"rewards/rejected": -0.16943617165088654, |
|
"step": 830, |
|
"use_label": 4866.0 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 6.693830034924331e-08, |
|
"logits/chosen": -1.3973602056503296, |
|
"logits/rejected": -1.42702054977417, |
|
"logps/chosen": -320.7757263183594, |
|
"logps/rejected": -301.08648681640625, |
|
"loss": 0.4395, |
|
"pred_label": 1768.5999755859375, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.8193642497062683, |
|
"rewards/margins": 0.6294553875923157, |
|
"rewards/rejected": 0.18990902602672577, |
|
"step": 840, |
|
"use_label": 4913.39990234375 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 6.111757857974389e-08, |
|
"logits/chosen": -1.3880376815795898, |
|
"logits/rejected": -1.3541958332061768, |
|
"logps/chosen": -338.11419677734375, |
|
"logps/rejected": -272.3611755371094, |
|
"loss": 0.4452, |
|
"pred_label": 1792.699951171875, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 1.19716477394104, |
|
"rewards/margins": 0.992131233215332, |
|
"rewards/rejected": 0.20503361523151398, |
|
"step": 850, |
|
"use_label": 4969.2998046875 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 5.529685681024446e-08, |
|
"logits/chosen": -1.4271256923675537, |
|
"logits/rejected": -1.3966145515441895, |
|
"logps/chosen": -348.1614685058594, |
|
"logps/rejected": -303.44366455078125, |
|
"loss": 0.4074, |
|
"pred_label": 1821.3499755859375, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 1.1399493217468262, |
|
"rewards/margins": 1.3317511081695557, |
|
"rewards/rejected": -0.19180157780647278, |
|
"step": 860, |
|
"use_label": 5020.64990234375 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.947613504074505e-08, |
|
"logits/chosen": -1.3959957361221313, |
|
"logits/rejected": -1.3792731761932373, |
|
"logps/chosen": -294.4374694824219, |
|
"logps/rejected": -295.8282165527344, |
|
"loss": 0.4377, |
|
"pred_label": 1854.699951171875, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 1.4856358766555786, |
|
"rewards/margins": 1.3335050344467163, |
|
"rewards/rejected": 0.15213069319725037, |
|
"step": 870, |
|
"use_label": 5067.2998046875 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.365541327124563e-08, |
|
"logits/chosen": -1.3475821018218994, |
|
"logits/rejected": -1.3881584405899048, |
|
"logps/chosen": -361.41815185546875, |
|
"logps/rejected": -341.0059509277344, |
|
"loss": 0.3875, |
|
"pred_label": 1890.75, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 1.0190831422805786, |
|
"rewards/margins": 1.0306825637817383, |
|
"rewards/rejected": -0.011599564924836159, |
|
"step": 880, |
|
"use_label": 5111.25 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.783469150174622e-08, |
|
"logits/chosen": -1.356093168258667, |
|
"logits/rejected": -1.3647395372390747, |
|
"logps/chosen": -305.94012451171875, |
|
"logps/rejected": -321.16632080078125, |
|
"loss": 0.4422, |
|
"pred_label": 1924.9000244140625, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 1.3352932929992676, |
|
"rewards/margins": 1.4743802547454834, |
|
"rewards/rejected": -0.13908688724040985, |
|
"step": 890, |
|
"use_label": 5157.10009765625 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.20139697322468e-08, |
|
"logits/chosen": -1.2386057376861572, |
|
"logits/rejected": -1.2891590595245361, |
|
"logps/chosen": -356.11444091796875, |
|
"logps/rejected": -326.7837829589844, |
|
"loss": 0.4248, |
|
"pred_label": 1953.9000244140625, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.934838593006134, |
|
"rewards/margins": 1.1246929168701172, |
|
"rewards/rejected": -0.1898542195558548, |
|
"step": 900, |
|
"use_label": 5208.10009765625 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.619324796274738e-08, |
|
"logits/chosen": -1.4316788911819458, |
|
"logits/rejected": -1.4342458248138428, |
|
"logps/chosen": -307.85601806640625, |
|
"logps/rejected": -290.3519592285156, |
|
"loss": 0.4028, |
|
"pred_label": 1981.25, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 1.2007009983062744, |
|
"rewards/margins": 1.2482540607452393, |
|
"rewards/rejected": -0.04755309969186783, |
|
"step": 910, |
|
"use_label": 5260.75 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.037252619324796e-08, |
|
"logits/chosen": -1.4033372402191162, |
|
"logits/rejected": -1.414004921913147, |
|
"logps/chosen": -313.77239990234375, |
|
"logps/rejected": -298.83465576171875, |
|
"loss": 0.4144, |
|
"pred_label": 2008.5, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 1.128159523010254, |
|
"rewards/margins": 1.3455536365509033, |
|
"rewards/rejected": -0.21739396452903748, |
|
"step": 920, |
|
"use_label": 5313.5 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.4551804423748545e-08, |
|
"logits/chosen": -1.4188601970672607, |
|
"logits/rejected": -1.4161401987075806, |
|
"logps/chosen": -271.213134765625, |
|
"logps/rejected": -271.7073669433594, |
|
"loss": 0.4048, |
|
"pred_label": 2038.9000244140625, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 1.2960989475250244, |
|
"rewards/margins": 1.6117242574691772, |
|
"rewards/rejected": -0.3156254291534424, |
|
"step": 930, |
|
"use_label": 5363.10009765625 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 8.731082654249125e-09, |
|
"logits/chosen": -1.3573418855667114, |
|
"logits/rejected": -1.3962044715881348, |
|
"logps/chosen": -368.0345153808594, |
|
"logps/rejected": -342.75604248046875, |
|
"loss": 0.3982, |
|
"pred_label": 2071.39990234375, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 1.0156645774841309, |
|
"rewards/margins": 1.1141072511672974, |
|
"rewards/rejected": -0.09844263643026352, |
|
"step": 940, |
|
"use_label": 5410.60009765625 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 2.910360884749709e-09, |
|
"logits/chosen": -1.3523130416870117, |
|
"logits/rejected": -1.3391929864883423, |
|
"logps/chosen": -287.52691650390625, |
|
"logps/rejected": -279.154541015625, |
|
"loss": 0.4503, |
|
"pred_label": 2100.5, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.7535191774368286, |
|
"rewards/margins": 1.287442684173584, |
|
"rewards/rejected": -0.5339235067367554, |
|
"step": 950, |
|
"use_label": 5461.5 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_logits/chosen": -1.3773202896118164, |
|
"eval_logits/rejected": -1.3735064268112183, |
|
"eval_logps/chosen": -342.40301513671875, |
|
"eval_logps/rejected": -313.79144287109375, |
|
"eval_loss": 0.43090611696243286, |
|
"eval_pred_label": 2177.77783203125, |
|
"eval_rewards/accuracies": 0.7460317611694336, |
|
"eval_rewards/chosen": 1.2747503519058228, |
|
"eval_rewards/margins": 1.4222474098205566, |
|
"eval_rewards/rejected": -0.14749698340892792, |
|
"eval_runtime": 275.5038, |
|
"eval_samples_per_second": 7.259, |
|
"eval_steps_per_second": 0.229, |
|
"eval_use_label": 5590.22216796875, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 955, |
|
"total_flos": 0.0, |
|
"train_loss": 0.47434414693822413, |
|
"train_runtime": 13309.0876, |
|
"train_samples_per_second": 4.593, |
|
"train_steps_per_second": 0.072 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 955, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"total_flos": 0.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|