|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 1250, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4e-08, |
|
"logits/chosen": -0.09526942670345306, |
|
"logits/rejected": -0.23948004841804504, |
|
"logps/chosen": -3969.244140625, |
|
"logps/rejected": -2912.11376953125, |
|
"loss": 0.1112, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.0000000000000003e-07, |
|
"logits/chosen": -0.11336694657802582, |
|
"logits/rejected": -0.21862205862998962, |
|
"logps/chosen": -3883.32763671875, |
|
"logps/rejected": -3105.751708984375, |
|
"loss": 0.1476, |
|
"rewards/accuracies": 0.3541666567325592, |
|
"rewards/chosen": -6.194857178343227e-06, |
|
"rewards/margins": -7.76553206378594e-05, |
|
"rewards/rejected": 7.146046118577942e-05, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 8.000000000000001e-07, |
|
"logits/chosen": -0.11791403591632843, |
|
"logits/rejected": -0.19368262588977814, |
|
"logps/chosen": -3725.202392578125, |
|
"logps/rejected": -3018.818603515625, |
|
"loss": 0.1581, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": 0.00026606960454955697, |
|
"rewards/margins": -6.655660399701446e-05, |
|
"rewards/rejected": 0.00033262622309848666, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.2000000000000002e-06, |
|
"logits/chosen": -0.10180139541625977, |
|
"logits/rejected": -0.1368643343448639, |
|
"logps/chosen": -3734.001953125, |
|
"logps/rejected": -3405.164794921875, |
|
"loss": 0.1348, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.001434823265299201, |
|
"rewards/margins": 0.0006993044517003, |
|
"rewards/rejected": 0.000735518871806562, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.6000000000000001e-06, |
|
"logits/chosen": -0.11290457099676132, |
|
"logits/rejected": -0.15302149951457977, |
|
"logps/chosen": -3876.04052734375, |
|
"logps/rejected": -3476.99365234375, |
|
"loss": 0.1472, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": 0.00464777322486043, |
|
"rewards/margins": 0.0033736887853592634, |
|
"rewards/rejected": 0.0012740844395011663, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"logits/chosen": -0.10660350322723389, |
|
"logits/rejected": -0.16240839660167694, |
|
"logps/chosen": -3837.369873046875, |
|
"logps/rejected": -3373.931640625, |
|
"loss": 0.1426, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": 0.013103676959872246, |
|
"rewards/margins": 0.008479808457195759, |
|
"rewards/rejected": 0.004623868502676487, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.4000000000000003e-06, |
|
"logits/chosen": -0.08258971571922302, |
|
"logits/rejected": -0.15586063265800476, |
|
"logps/chosen": -3829.67041015625, |
|
"logps/rejected": -3369.47119140625, |
|
"loss": 0.1363, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": 0.029313404113054276, |
|
"rewards/margins": 0.022997483611106873, |
|
"rewards/rejected": 0.006315918173640966, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.8000000000000003e-06, |
|
"logits/chosen": -0.10445211082696915, |
|
"logits/rejected": -0.180179625749588, |
|
"logps/chosen": -3644.919921875, |
|
"logps/rejected": -3038.00732421875, |
|
"loss": 0.1343, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.05821167677640915, |
|
"rewards/margins": 0.030616426840424538, |
|
"rewards/rejected": 0.02759525738656521, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.2000000000000003e-06, |
|
"logits/chosen": 0.0064078932628035545, |
|
"logits/rejected": -0.10336550325155258, |
|
"logps/chosen": -3655.2109375, |
|
"logps/rejected": -3017.391357421875, |
|
"loss": 0.1205, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.0925077348947525, |
|
"rewards/margins": 0.05041419342160225, |
|
"rewards/rejected": 0.04209354892373085, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.6000000000000003e-06, |
|
"logits/chosen": 0.034665923565626144, |
|
"logits/rejected": -0.03560350090265274, |
|
"logps/chosen": -3621.365234375, |
|
"logps/rejected": -3200.21240234375, |
|
"loss": 0.1068, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": 0.1416129767894745, |
|
"rewards/margins": 0.08034636825323105, |
|
"rewards/rejected": 0.06126661226153374, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.000000000000001e-06, |
|
"logits/chosen": 0.04580535367131233, |
|
"logits/rejected": -0.005379015114158392, |
|
"logps/chosen": -3695.321533203125, |
|
"logps/rejected": -3426.08447265625, |
|
"loss": 0.1051, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": 0.17054443061351776, |
|
"rewards/margins": 0.08689786493778229, |
|
"rewards/rejected": 0.08364654332399368, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.4e-06, |
|
"logits/chosen": 0.05780696123838425, |
|
"logits/rejected": -0.00175203918479383, |
|
"logps/chosen": -3636.29638671875, |
|
"logps/rejected": -3395.66748046875, |
|
"loss": 0.1036, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.2138124257326126, |
|
"rewards/margins": 0.0803312435746193, |
|
"rewards/rejected": 0.1334811896085739, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.800000000000001e-06, |
|
"logits/chosen": 0.06642362475395203, |
|
"logits/rejected": -0.037016235291957855, |
|
"logps/chosen": -3482.66015625, |
|
"logps/rejected": -2920.569580078125, |
|
"loss": 0.1081, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.2094995528459549, |
|
"rewards/margins": 0.10118832439184189, |
|
"rewards/rejected": 0.1083112508058548, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.999756310023261e-06, |
|
"logits/chosen": 0.13063621520996094, |
|
"logits/rejected": 0.06078845262527466, |
|
"logps/chosen": -3470.27197265625, |
|
"logps/rejected": -3143.759033203125, |
|
"loss": 0.0841, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.2582111954689026, |
|
"rewards/margins": 0.10591275990009308, |
|
"rewards/rejected": 0.1522984504699707, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.997807075247147e-06, |
|
"logits/chosen": 0.16255763173103333, |
|
"logits/rejected": 0.07761454582214355, |
|
"logps/chosen": -3592.858642578125, |
|
"logps/rejected": -3239.969482421875, |
|
"loss": 0.1071, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.2606235444545746, |
|
"rewards/margins": 0.11447002738714218, |
|
"rewards/rejected": 0.1461535096168518, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.993910125649561e-06, |
|
"logits/chosen": 0.16677160561084747, |
|
"logits/rejected": 0.08901594579219818, |
|
"logps/chosen": -3506.703857421875, |
|
"logps/rejected": -3104.87255859375, |
|
"loss": 0.0945, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": 0.2724655568599701, |
|
"rewards/margins": 0.12591706216335297, |
|
"rewards/rejected": 0.14654847979545593, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.988068499954578e-06, |
|
"logits/chosen": 0.22131207585334778, |
|
"logits/rejected": 0.13661739230155945, |
|
"logps/chosen": -3316.39208984375, |
|
"logps/rejected": -2960.86083984375, |
|
"loss": 0.0905, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": 0.2880935072898865, |
|
"rewards/margins": 0.1261982023715973, |
|
"rewards/rejected": 0.16189530491828918, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.980286753286196e-06, |
|
"logits/chosen": 0.26340895891189575, |
|
"logits/rejected": 0.17107249796390533, |
|
"logps/chosen": -3592.39306640625, |
|
"logps/rejected": -3124.1767578125, |
|
"loss": 0.0955, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": 0.30015623569488525, |
|
"rewards/margins": 0.14308349788188934, |
|
"rewards/rejected": 0.15707270801067352, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.970570953616383e-06, |
|
"logits/chosen": 0.2763732671737671, |
|
"logits/rejected": 0.1752476692199707, |
|
"logps/chosen": -3527.703125, |
|
"logps/rejected": -3171.340576171875, |
|
"loss": 0.0918, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": 0.3266971707344055, |
|
"rewards/margins": 0.15975165367126465, |
|
"rewards/rejected": 0.16694548726081848, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.958928677033465e-06, |
|
"logits/chosen": 0.2631959021091461, |
|
"logits/rejected": 0.21199622750282288, |
|
"logps/chosen": -3533.75341796875, |
|
"logps/rejected": -3337.073486328125, |
|
"loss": 0.0921, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": 0.3471175730228424, |
|
"rewards/margins": 0.11177588999271393, |
|
"rewards/rejected": 0.23534169793128967, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.9453690018345144e-06, |
|
"logits/chosen": 0.2420426905155182, |
|
"logits/rejected": 0.1836567521095276, |
|
"logps/chosen": -3306.966064453125, |
|
"logps/rejected": -2964.663818359375, |
|
"loss": 0.0895, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": 0.32517337799072266, |
|
"rewards/margins": 0.12965548038482666, |
|
"rewards/rejected": 0.19551792740821838, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.9299025014463665e-06, |
|
"logits/chosen": 0.23070940375328064, |
|
"logits/rejected": 0.17138567566871643, |
|
"logps/chosen": -3292.091796875, |
|
"logps/rejected": -2975.43115234375, |
|
"loss": 0.1009, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": 0.328936368227005, |
|
"rewards/margins": 0.11976213753223419, |
|
"rewards/rejected": 0.20917420089244843, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.912541236180779e-06, |
|
"logits/chosen": 0.15947876870632172, |
|
"logits/rejected": 0.08568959683179855, |
|
"logps/chosen": -3484.258544921875, |
|
"logps/rejected": -3174.15087890625, |
|
"loss": 0.0956, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": 0.34722018241882324, |
|
"rewards/margins": 0.155741885304451, |
|
"rewards/rejected": 0.19147828221321106, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.893298743830168e-06, |
|
"logits/chosen": 0.22774775326251984, |
|
"logits/rejected": 0.13324826955795288, |
|
"logps/chosen": -3539.89453125, |
|
"logps/rejected": -3201.11279296875, |
|
"loss": 0.0968, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": 0.38138940930366516, |
|
"rewards/margins": 0.15053245425224304, |
|
"rewards/rejected": 0.23085694015026093, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.8721900291112415e-06, |
|
"logits/chosen": 0.24971647560596466, |
|
"logits/rejected": 0.18798983097076416, |
|
"logps/chosen": -3449.693359375, |
|
"logps/rejected": -3182.7216796875, |
|
"loss": 0.1007, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": 0.4053890109062195, |
|
"rewards/margins": 0.12963128089904785, |
|
"rewards/rejected": 0.27575770020484924, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.849231551964771e-06, |
|
"logits/chosen": 0.290091335773468, |
|
"logits/rejected": 0.2263306826353073, |
|
"logps/chosen": -3255.100830078125, |
|
"logps/rejected": -2931.810546875, |
|
"loss": 0.0973, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": 0.38671380281448364, |
|
"rewards/margins": 0.1347990781068802, |
|
"rewards/rejected": 0.2519146800041199, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.824441214720629e-06, |
|
"logits/chosen": 0.35076624155044556, |
|
"logits/rejected": 0.26289868354797363, |
|
"logps/chosen": -3281.548828125, |
|
"logps/rejected": -2823.29052734375, |
|
"loss": 0.0937, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": 0.3694974184036255, |
|
"rewards/margins": 0.1352803260087967, |
|
"rewards/rejected": 0.2342170774936676, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.7978383481380865e-06, |
|
"logits/chosen": 0.3338952660560608, |
|
"logits/rejected": 0.30002114176750183, |
|
"logps/chosen": -3367.43115234375, |
|
"logps/rejected": -3190.338623046875, |
|
"loss": 0.0961, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.38234108686447144, |
|
"rewards/margins": 0.14520631730556488, |
|
"rewards/rejected": 0.23713478446006775, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.769443696332272e-06, |
|
"logits/chosen": 0.4176582396030426, |
|
"logits/rejected": 0.34488004446029663, |
|
"logps/chosen": -3451.33447265625, |
|
"logps/rejected": -3161.616943359375, |
|
"loss": 0.091, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": 0.3714825510978699, |
|
"rewards/margins": 0.14414021372795105, |
|
"rewards/rejected": 0.22734233736991882, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.7392794005985324e-06, |
|
"logits/chosen": 0.39374208450317383, |
|
"logits/rejected": 0.36565738916397095, |
|
"logps/chosen": -3225.15380859375, |
|
"logps/rejected": -3087.46728515625, |
|
"loss": 0.1046, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": 0.3453277051448822, |
|
"rewards/margins": 0.16379894316196442, |
|
"rewards/rejected": 0.18152877688407898, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.707368982147318e-06, |
|
"logits/chosen": 0.39281272888183594, |
|
"logits/rejected": 0.36582762002944946, |
|
"logps/chosen": -3398.608154296875, |
|
"logps/rejected": -3263.87939453125, |
|
"loss": 0.0904, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": 0.3241587281227112, |
|
"rewards/margins": 0.14963512122631073, |
|
"rewards/rejected": 0.17452362179756165, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.673737323763048e-06, |
|
"logits/chosen": 0.30504000186920166, |
|
"logits/rejected": 0.22714261710643768, |
|
"logps/chosen": -3418.66748046875, |
|
"logps/rejected": -3106.970947265625, |
|
"loss": 0.0872, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.3434060215950012, |
|
"rewards/margins": 0.173831969499588, |
|
"rewards/rejected": 0.16957402229309082, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.638410650401267e-06, |
|
"logits/chosen": 0.34125423431396484, |
|
"logits/rejected": 0.2718544602394104, |
|
"logps/chosen": -3394.98193359375, |
|
"logps/rejected": -3080.618408203125, |
|
"loss": 0.0786, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": 0.32423415780067444, |
|
"rewards/margins": 0.1511869728565216, |
|
"rewards/rejected": 0.17304711043834686, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.601416508739211e-06, |
|
"logits/chosen": 0.36246171593666077, |
|
"logits/rejected": 0.3016008734703064, |
|
"logps/chosen": -3435.645263671875, |
|
"logps/rejected": -3126.3828125, |
|
"loss": 0.0868, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": 0.31689783930778503, |
|
"rewards/margins": 0.16403648257255554, |
|
"rewards/rejected": 0.1528613567352295, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.562783745695738e-06, |
|
"logits/chosen": 0.3510586619377136, |
|
"logits/rejected": 0.254965603351593, |
|
"logps/chosen": -3339.10986328125, |
|
"logps/rejected": -2880.090576171875, |
|
"loss": 0.0873, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": 0.3381843566894531, |
|
"rewards/margins": 0.14746293425559998, |
|
"rewards/rejected": 0.19072142243385315, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.522542485937369e-06, |
|
"logits/chosen": 0.3423737585544586, |
|
"logits/rejected": 0.2638542056083679, |
|
"logps/chosen": -3313.405517578125, |
|
"logps/rejected": -2987.49609375, |
|
"loss": 0.0887, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": 0.35876303911209106, |
|
"rewards/margins": 0.15431997179985046, |
|
"rewards/rejected": 0.204443097114563, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.4807241083879774e-06, |
|
"logits/chosen": 0.35648784041404724, |
|
"logits/rejected": 0.276287704706192, |
|
"logps/chosen": -3448.21142578125, |
|
"logps/rejected": -3130.1142578125, |
|
"loss": 0.0875, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 0.3623107969760895, |
|
"rewards/margins": 0.15894022583961487, |
|
"rewards/rejected": 0.2033705711364746, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.437361221760449e-06, |
|
"logits/chosen": 0.42664772272109985, |
|
"logits/rejected": 0.379373162984848, |
|
"logps/chosen": -3359.43603515625, |
|
"logps/rejected": -3039.10693359375, |
|
"loss": 0.1004, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": 0.34946465492248535, |
|
"rewards/margins": 0.14916878938674927, |
|
"rewards/rejected": 0.20029588043689728, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.3924876391293915e-06, |
|
"logits/chosen": 0.467120498418808, |
|
"logits/rejected": 0.40080124139785767, |
|
"logps/chosen": -3476.703125, |
|
"logps/rejected": -3288.689453125, |
|
"loss": 0.0805, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": 0.3640100955963135, |
|
"rewards/margins": 0.17574051022529602, |
|
"rewards/rejected": 0.18826961517333984, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.346138351564711e-06, |
|
"logits/chosen": 0.42980876564979553, |
|
"logits/rejected": 0.3273950517177582, |
|
"logps/chosen": -3256.771484375, |
|
"logps/rejected": -2806.90625, |
|
"loss": 0.0925, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": 0.3438069224357605, |
|
"rewards/margins": 0.14692214131355286, |
|
"rewards/rejected": 0.19688478112220764, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.2983495008466285e-06, |
|
"logits/chosen": 0.4755614697933197, |
|
"logits/rejected": 0.4100918769836426, |
|
"logps/chosen": -3387.567626953125, |
|
"logps/rejected": -3176.111572265625, |
|
"loss": 0.1011, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": 0.3908953368663788, |
|
"rewards/margins": 0.14914441108703613, |
|
"rewards/rejected": 0.24175091087818146, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.249158351283414e-06, |
|
"logits/chosen": 0.43400949239730835, |
|
"logits/rejected": 0.35584893822669983, |
|
"logps/chosen": -3405.94384765625, |
|
"logps/rejected": -3099.514404296875, |
|
"loss": 0.0885, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": 0.3543582856655121, |
|
"rewards/margins": 0.15985527634620667, |
|
"rewards/rejected": 0.1945030391216278, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.198603260653792e-06, |
|
"logits/chosen": 0.4672483801841736, |
|
"logits/rejected": 0.38711977005004883, |
|
"logps/chosen": -3460.95947265625, |
|
"logps/rejected": -3150.413330078125, |
|
"loss": 0.0982, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": 0.38315701484680176, |
|
"rewards/margins": 0.16369104385375977, |
|
"rewards/rejected": 0.219465970993042, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.146723650296701e-06, |
|
"logits/chosen": 0.483567476272583, |
|
"logits/rejected": 0.4229060113430023, |
|
"logps/chosen": -3454.10107421875, |
|
"logps/rejected": -3221.44091796875, |
|
"loss": 0.0865, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": 0.38951292634010315, |
|
"rewards/margins": 0.1551971733570099, |
|
"rewards/rejected": 0.23431572318077087, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.093559974371725e-06, |
|
"logits/chosen": 0.5139747858047485, |
|
"logits/rejected": 0.42474398016929626, |
|
"logps/chosen": -3357.93896484375, |
|
"logps/rejected": -3006.518310546875, |
|
"loss": 0.0832, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": 0.3771596848964691, |
|
"rewards/margins": 0.16284802556037903, |
|
"rewards/rejected": 0.21431168913841248, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.039153688314146e-06, |
|
"logits/chosen": 0.5247809886932373, |
|
"logits/rejected": 0.4323784410953522, |
|
"logps/chosen": -3264.754638671875, |
|
"logps/rejected": -2934.151123046875, |
|
"loss": 0.0968, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 0.3777909576892853, |
|
"rewards/margins": 0.1437477171421051, |
|
"rewards/rejected": 0.2340432107448578, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.983547216509254e-06, |
|
"logits/chosen": 0.5001921653747559, |
|
"logits/rejected": 0.40190553665161133, |
|
"logps/chosen": -3468.16015625, |
|
"logps/rejected": -3026.26025390625, |
|
"loss": 0.09, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.38785520195961, |
|
"rewards/margins": 0.17032787203788757, |
|
"rewards/rejected": 0.2175273448228836, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.92678391921108e-06, |
|
"logits/chosen": 0.5006144642829895, |
|
"logits/rejected": 0.415066659450531, |
|
"logps/chosen": -3334.10009765625, |
|
"logps/rejected": -2893.51123046875, |
|
"loss": 0.0972, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": 0.3449471592903137, |
|
"rewards/margins": 0.1539374142885208, |
|
"rewards/rejected": 0.1910097301006317, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.868908058731376e-06, |
|
"logits/chosen": 0.5540028810501099, |
|
"logits/rejected": 0.48031479120254517, |
|
"logps/chosen": -3356.93212890625, |
|
"logps/rejected": -2993.399169921875, |
|
"loss": 0.0985, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.3838370144367218, |
|
"rewards/margins": 0.14018234610557556, |
|
"rewards/rejected": 0.24365463852882385, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.8099647649251984e-06, |
|
"logits/chosen": 0.5899518728256226, |
|
"logits/rejected": 0.4895103871822357, |
|
"logps/chosen": -3501.290283203125, |
|
"logps/rejected": -3142.33544921875, |
|
"loss": 0.0869, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": 0.386918306350708, |
|
"rewards/margins": 0.17316767573356628, |
|
"rewards/rejected": 0.21375060081481934, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.7500000000000005e-06, |
|
"logits/chosen": 0.533571720123291, |
|
"logits/rejected": 0.45087581872940063, |
|
"logps/chosen": -3382.43115234375, |
|
"logps/rejected": -3039.810791015625, |
|
"loss": 0.0938, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": 0.35097357630729675, |
|
"rewards/margins": 0.16260935366153717, |
|
"rewards/rejected": 0.18836425244808197, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.689060522675689e-06, |
|
"logits/chosen": 0.5135028958320618, |
|
"logits/rejected": 0.38714414834976196, |
|
"logps/chosen": -3217.434814453125, |
|
"logps/rejected": -2690.976806640625, |
|
"loss": 0.0962, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": 0.3489993214607239, |
|
"rewards/margins": 0.14681319892406464, |
|
"rewards/rejected": 0.20218610763549805, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.627193851723577e-06, |
|
"logits/chosen": 0.49498695135116577, |
|
"logits/rejected": 0.4188925325870514, |
|
"logps/chosen": -3364.91552734375, |
|
"logps/rejected": -3090.07275390625, |
|
"loss": 0.0802, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.37084800004959106, |
|
"rewards/margins": 0.15501223504543304, |
|
"rewards/rejected": 0.2158357799053192, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.564448228912682e-06, |
|
"logits/chosen": 0.4635513722896576, |
|
"logits/rejected": 0.36989638209342957, |
|
"logps/chosen": -3387.86474609375, |
|
"logps/rejected": -3016.7216796875, |
|
"loss": 0.0943, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.3274918794631958, |
|
"rewards/margins": 0.15053078532218933, |
|
"rewards/rejected": 0.17696109414100647, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.5008725813922383e-06, |
|
"logits/chosen": 0.49751853942871094, |
|
"logits/rejected": 0.3913223147392273, |
|
"logps/chosen": -3440.885986328125, |
|
"logps/rejected": -2968.994873046875, |
|
"loss": 0.0981, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": 0.34751924872398376, |
|
"rewards/margins": 0.14958377182483673, |
|
"rewards/rejected": 0.19793547689914703, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.436516483539781e-06, |
|
"logits/chosen": 0.5272361040115356, |
|
"logits/rejected": 0.40506014227867126, |
|
"logps/chosen": -3340.62060546875, |
|
"logps/rejected": -2864.970947265625, |
|
"loss": 0.0903, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": 0.3856109082698822, |
|
"rewards/margins": 0.15098164975643158, |
|
"rewards/rejected": 0.23462922871112823, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.3714301183045382e-06, |
|
"logits/chosen": 0.4976809024810791, |
|
"logits/rejected": 0.4073059558868408, |
|
"logps/chosen": -3362.13134765625, |
|
"logps/rejected": -2928.28515625, |
|
"loss": 0.1007, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": 0.4102960526943207, |
|
"rewards/margins": 0.14597077667713165, |
|
"rewards/rejected": 0.2643252909183502, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.3056642380762783e-06, |
|
"logits/chosen": 0.5032496452331543, |
|
"logits/rejected": 0.40577760338783264, |
|
"logps/chosen": -3327.85888671875, |
|
"logps/rejected": -2854.94970703125, |
|
"loss": 0.1083, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": 0.3718569278717041, |
|
"rewards/margins": 0.14686135947704315, |
|
"rewards/rejected": 0.22499553859233856, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.2392701251101172e-06, |
|
"logits/chosen": 0.5247712731361389, |
|
"logits/rejected": 0.4414879381656647, |
|
"logps/chosen": -3346.58740234375, |
|
"logps/rejected": -3087.098876953125, |
|
"loss": 0.0987, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": 0.38225480914115906, |
|
"rewards/margins": 0.13378790020942688, |
|
"rewards/rejected": 0.24846693873405457, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.1722995515381644e-06, |
|
"logits/chosen": 0.514184296131134, |
|
"logits/rejected": 0.40285858511924744, |
|
"logps/chosen": -3443.8828125, |
|
"logps/rejected": -2874.86181640625, |
|
"loss": 0.0832, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": 0.3588571548461914, |
|
"rewards/margins": 0.15165671706199646, |
|
"rewards/rejected": 0.20720043778419495, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.1048047389991693e-06, |
|
"logits/chosen": 0.5682590007781982, |
|
"logits/rejected": 0.5120213627815247, |
|
"logps/chosen": -3459.860595703125, |
|
"logps/rejected": -3225.1884765625, |
|
"loss": 0.0838, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": 0.35979193449020386, |
|
"rewards/margins": 0.15927888453006744, |
|
"rewards/rejected": 0.2005130797624588, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3.0368383179176584e-06, |
|
"logits/chosen": 0.5174692869186401, |
|
"logits/rejected": 0.41469916701316833, |
|
"logps/chosen": -3251.609619140625, |
|
"logps/rejected": -2831.67431640625, |
|
"loss": 0.111, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": 0.3455356955528259, |
|
"rewards/margins": 0.15155228972434998, |
|
"rewards/rejected": 0.19398342072963715, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.9684532864643123e-06, |
|
"logits/chosen": 0.5736369490623474, |
|
"logits/rejected": 0.42854124307632446, |
|
"logps/chosen": -3492.6796875, |
|
"logps/rejected": -2873.034912109375, |
|
"loss": 0.0857, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.3865596354007721, |
|
"rewards/margins": 0.1634531319141388, |
|
"rewards/rejected": 0.2231064736843109, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.8997029692295875e-06, |
|
"logits/chosen": 0.5368185043334961, |
|
"logits/rejected": 0.4608641266822815, |
|
"logps/chosen": -3310.38623046875, |
|
"logps/rejected": -3082.637939453125, |
|
"loss": 0.0929, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.3720545768737793, |
|
"rewards/margins": 0.14468377828598022, |
|
"rewards/rejected": 0.22737076878547668, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.8306409756428067e-06, |
|
"logits/chosen": 0.5856886506080627, |
|
"logits/rejected": 0.4743286669254303, |
|
"logps/chosen": -3404.43310546875, |
|
"logps/rejected": -3081.451904296875, |
|
"loss": 0.0699, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": 0.37632375955581665, |
|
"rewards/margins": 0.16852709650993347, |
|
"rewards/rejected": 0.20779672265052795, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.761321158169134e-06, |
|
"logits/chosen": 0.6226561665534973, |
|
"logits/rejected": 0.5113543272018433, |
|
"logps/chosen": -3556.053955078125, |
|
"logps/rejected": -3128.98486328125, |
|
"loss": 0.0791, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": 0.3752782642841339, |
|
"rewards/margins": 0.17961682379245758, |
|
"rewards/rejected": 0.19566142559051514, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.6917975703170466e-06, |
|
"logits/chosen": 0.6197515726089478, |
|
"logits/rejected": 0.5554805994033813, |
|
"logps/chosen": -3466.33251953125, |
|
"logps/rejected": -3276.38671875, |
|
"loss": 0.0789, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.41308966279029846, |
|
"rewards/margins": 0.15082643926143646, |
|
"rewards/rejected": 0.2622632086277008, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.6221244244890336e-06, |
|
"logits/chosen": 0.6192020773887634, |
|
"logits/rejected": 0.5320231914520264, |
|
"logps/chosen": -3314.12548828125, |
|
"logps/rejected": -3007.730224609375, |
|
"loss": 0.1062, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.4171117842197418, |
|
"rewards/margins": 0.1608533412218094, |
|
"rewards/rejected": 0.25625842809677124, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.5523560497083927e-06, |
|
"logits/chosen": 0.5730468034744263, |
|
"logits/rejected": 0.5238832235336304, |
|
"logps/chosen": -3282.682373046875, |
|
"logps/rejected": -3070.96875, |
|
"loss": 0.1052, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.347822368144989, |
|
"rewards/margins": 0.12122434377670288, |
|
"rewards/rejected": 0.22659802436828613, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.482546849255096e-06, |
|
"logits/chosen": 0.5806129574775696, |
|
"logits/rejected": 0.4810718595981598, |
|
"logps/chosen": -3249.9609375, |
|
"logps/rejected": -2905.1328125, |
|
"loss": 0.0932, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": 0.36439889669418335, |
|
"rewards/margins": 0.15161724388599396, |
|
"rewards/rejected": 0.21278166770935059, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.4127512582437486e-06, |
|
"logits/chosen": 0.5876488089561462, |
|
"logits/rejected": 0.47464412450790405, |
|
"logps/chosen": -3425.721923828125, |
|
"logps/rejected": -3029.7919921875, |
|
"loss": 0.08, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.43426513671875, |
|
"rewards/margins": 0.17609842121601105, |
|
"rewards/rejected": 0.2581667900085449, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.3430237011767166e-06, |
|
"logits/chosen": 0.5383955836296082, |
|
"logits/rejected": 0.4219956398010254, |
|
"logps/chosen": -3329.063720703125, |
|
"logps/rejected": -2868.416015625, |
|
"loss": 0.0784, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 0.4002605974674225, |
|
"rewards/margins": 0.15942516922950745, |
|
"rewards/rejected": 0.24083542823791504, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.2734185495055503e-06, |
|
"logits/chosen": 0.5360678434371948, |
|
"logits/rejected": 0.4522096514701843, |
|
"logps/chosen": -3392.73388671875, |
|
"logps/rejected": -3113.76708984375, |
|
"loss": 0.1041, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": 0.42230457067489624, |
|
"rewards/margins": 0.14559249579906464, |
|
"rewards/rejected": 0.2767120599746704, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.2039900792337477e-06, |
|
"logits/chosen": 0.5705752968788147, |
|
"logits/rejected": 0.5024815201759338, |
|
"logps/chosen": -3402.189453125, |
|
"logps/rejected": -3169.858154296875, |
|
"loss": 0.0749, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": 0.4054113030433655, |
|
"rewards/margins": 0.16554218530654907, |
|
"rewards/rejected": 0.2398691177368164, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.134792428593971e-06, |
|
"logits/chosen": 0.5779368281364441, |
|
"logits/rejected": 0.4750920832157135, |
|
"logps/chosen": -3396.721923828125, |
|
"logps/rejected": -3118.03076171875, |
|
"loss": 0.0873, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": 0.3966867923736572, |
|
"rewards/margins": 0.15320774912834167, |
|
"rewards/rejected": 0.24347904324531555, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.0658795558326745e-06, |
|
"logits/chosen": 0.5595996379852295, |
|
"logits/rejected": 0.49162426590919495, |
|
"logps/chosen": -3377.83984375, |
|
"logps/rejected": -3203.514404296875, |
|
"loss": 0.0759, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": 0.41549405455589294, |
|
"rewards/margins": 0.15610775351524353, |
|
"rewards/rejected": 0.2593863010406494, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.997305197135089e-06, |
|
"logits/chosen": 0.5741230249404907, |
|
"logits/rejected": 0.499004602432251, |
|
"logps/chosen": -3418.917236328125, |
|
"logps/rejected": -3171.630859375, |
|
"loss": 0.0854, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": 0.38339635729789734, |
|
"rewards/margins": 0.15282298624515533, |
|
"rewards/rejected": 0.2305733859539032, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.9291228247233607e-06, |
|
"logits/chosen": 0.5983024835586548, |
|
"logits/rejected": 0.5140877962112427, |
|
"logps/chosen": -3385.004638671875, |
|
"logps/rejected": -3130.511474609375, |
|
"loss": 0.076, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": 0.41556110978126526, |
|
"rewards/margins": 0.18630118668079376, |
|
"rewards/rejected": 0.2292599231004715, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.8613856051605242e-06, |
|
"logits/chosen": 0.5828371644020081, |
|
"logits/rejected": 0.509624719619751, |
|
"logps/chosen": -3329.71923828125, |
|
"logps/rejected": -3106.307861328125, |
|
"loss": 0.0886, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.3785203993320465, |
|
"rewards/margins": 0.17451588809490204, |
|
"rewards/rejected": 0.20400448143482208, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.7941463578928088e-06, |
|
"logits/chosen": 0.5620870590209961, |
|
"logits/rejected": 0.4992052912712097, |
|
"logps/chosen": -3354.768310546875, |
|
"logps/rejected": -3018.64599609375, |
|
"loss": 0.0991, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.3805944621562958, |
|
"rewards/margins": 0.16154679656028748, |
|
"rewards/rejected": 0.2190476357936859, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.7274575140626318e-06, |
|
"logits/chosen": 0.6222743391990662, |
|
"logits/rejected": 0.5590968132019043, |
|
"logps/chosen": -3424.283935546875, |
|
"logps/rejected": -3156.167724609375, |
|
"loss": 0.0838, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": 0.39403584599494934, |
|
"rewards/margins": 0.16475993394851685, |
|
"rewards/rejected": 0.2292759120464325, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.661371075624363e-06, |
|
"logits/chosen": 0.6057112216949463, |
|
"logits/rejected": 0.5170978903770447, |
|
"logps/chosen": -3288.67333984375, |
|
"logps/rejected": -2878.7490234375, |
|
"loss": 0.0902, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.3696768581867218, |
|
"rewards/margins": 0.1375175267457962, |
|
"rewards/rejected": 0.2321593314409256, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.5959385747947697e-06, |
|
"logits/chosen": 0.6024297475814819, |
|
"logits/rejected": 0.46327948570251465, |
|
"logps/chosen": -3431.43798828125, |
|
"logps/rejected": -2972.927734375, |
|
"loss": 0.0918, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": 0.4163932204246521, |
|
"rewards/margins": 0.17428722977638245, |
|
"rewards/rejected": 0.24210599064826965, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.5312110338697427e-06, |
|
"logits/chosen": 0.6199443340301514, |
|
"logits/rejected": 0.5287885069847107, |
|
"logps/chosen": -3343.03955078125, |
|
"logps/rejected": -3022.42236328125, |
|
"loss": 0.0814, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": 0.399059921503067, |
|
"rewards/margins": 0.15364623069763184, |
|
"rewards/rejected": 0.2454136610031128, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.467238925438646e-06, |
|
"logits/chosen": 0.6143923401832581, |
|
"logits/rejected": 0.5960370898246765, |
|
"logps/chosen": -3441.96875, |
|
"logps/rejected": -3306.02587890625, |
|
"loss": 0.0976, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": 0.40943044424057007, |
|
"rewards/margins": 0.15342941880226135, |
|
"rewards/rejected": 0.2560010552406311, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.4040721330273063e-06, |
|
"logits/chosen": 0.5770421624183655, |
|
"logits/rejected": 0.4791272282600403, |
|
"logps/chosen": -3493.46533203125, |
|
"logps/rejected": -3112.23486328125, |
|
"loss": 0.078, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": 0.4063073992729187, |
|
"rewards/margins": 0.16731533408164978, |
|
"rewards/rejected": 0.23899206519126892, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.3417599122003464e-06, |
|
"logits/chosen": 0.6255184412002563, |
|
"logits/rejected": 0.5643167495727539, |
|
"logps/chosen": -3481.497314453125, |
|
"logps/rejected": -3120.768310546875, |
|
"loss": 0.0768, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": 0.38735657930374146, |
|
"rewards/margins": 0.1667819321155548, |
|
"rewards/rejected": 0.22057469189167023, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.280350852153168e-06, |
|
"logits/chosen": 0.5782762765884399, |
|
"logits/rejected": 0.49263420701026917, |
|
"logps/chosen": -3499.00146484375, |
|
"logps/rejected": -3000.46728515625, |
|
"loss": 0.0836, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": 0.3818155527114868, |
|
"rewards/margins": 0.158293217420578, |
|
"rewards/rejected": 0.2235223352909088, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.2198928378235717e-06, |
|
"logits/chosen": 0.5858504176139832, |
|
"logits/rejected": 0.4866867661476135, |
|
"logps/chosen": -3311.93115234375, |
|
"logps/rejected": -2876.67041015625, |
|
"loss": 0.0807, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.3901521563529968, |
|
"rewards/margins": 0.17163410782814026, |
|
"rewards/rejected": 0.21851806342601776, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.160433012552508e-06, |
|
"logits/chosen": 0.6068114638328552, |
|
"logits/rejected": 0.5134158730506897, |
|
"logps/chosen": -3114.994140625, |
|
"logps/rejected": -2820.002685546875, |
|
"loss": 0.0843, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": 0.37761688232421875, |
|
"rewards/margins": 0.1500733643770218, |
|
"rewards/rejected": 0.22754351794719696, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.1020177413231334e-06, |
|
"logits/chosen": 0.6057130098342896, |
|
"logits/rejected": 0.4943726062774658, |
|
"logps/chosen": -3233.512939453125, |
|
"logps/rejected": -2793.17724609375, |
|
"loss": 0.1014, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": 0.37504011392593384, |
|
"rewards/margins": 0.15143273770809174, |
|
"rewards/rejected": 0.2236073762178421, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.0446925746067768e-06, |
|
"logits/chosen": 0.6174426078796387, |
|
"logits/rejected": 0.5331483483314514, |
|
"logps/chosen": -3416.95556640625, |
|
"logps/rejected": -3206.383056640625, |
|
"loss": 0.0898, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": 0.3918195962905884, |
|
"rewards/margins": 0.1552480012178421, |
|
"rewards/rejected": 0.23657159507274628, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.88502212844063e-07, |
|
"logits/chosen": 0.6079164743423462, |
|
"logits/rejected": 0.5762253999710083, |
|
"logps/chosen": -3149.41357421875, |
|
"logps/rejected": -2938.776123046875, |
|
"loss": 0.0841, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": 0.3338172435760498, |
|
"rewards/margins": 0.11891861259937286, |
|
"rewards/rejected": 0.21489866077899933, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.334904715888496e-07, |
|
"logits/chosen": 0.6562485694885254, |
|
"logits/rejected": 0.5610599517822266, |
|
"logps/chosen": -3430.844482421875, |
|
"logps/rejected": -3026.52587890625, |
|
"loss": 0.0942, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": 0.3751566410064697, |
|
"rewards/margins": 0.17377465963363647, |
|
"rewards/rejected": 0.20138195157051086, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 8.797002473421729e-07, |
|
"logits/chosen": 0.6029684543609619, |
|
"logits/rejected": 0.5253230333328247, |
|
"logps/chosen": -3235.121826171875, |
|
"logps/rejected": -2938.1640625, |
|
"loss": 0.0747, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.35139986872673035, |
|
"rewards/margins": 0.16471409797668457, |
|
"rewards/rejected": 0.18668580055236816, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 8.271734841028553e-07, |
|
"logits/chosen": 0.6538732051849365, |
|
"logits/rejected": 0.5982731580734253, |
|
"logps/chosen": -3381.48388671875, |
|
"logps/rejected": -3193.509033203125, |
|
"loss": 0.0925, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.3694911003112793, |
|
"rewards/margins": 0.15476444363594055, |
|
"rewards/rejected": 0.21472665667533875, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.759511406608255e-07, |
|
"logits/chosen": 0.6155306100845337, |
|
"logits/rejected": 0.5355127453804016, |
|
"logps/chosen": -3603.495361328125, |
|
"logps/rejected": -3289.190673828125, |
|
"loss": 0.0996, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": 0.3712473511695862, |
|
"rewards/margins": 0.14055751264095306, |
|
"rewards/rejected": 0.23068983852863312, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 7.260731586586983e-07, |
|
"logits/chosen": 0.6570934057235718, |
|
"logits/rejected": 0.5331934094429016, |
|
"logps/chosen": -3460.41552734375, |
|
"logps/rejected": -3020.72216796875, |
|
"loss": 0.094, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 0.36751216650009155, |
|
"rewards/margins": 0.16225677728652954, |
|
"rewards/rejected": 0.2052554190158844, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 6.775784314464717e-07, |
|
"logits/chosen": 0.6543987393379211, |
|
"logits/rejected": 0.5447486639022827, |
|
"logps/chosen": -3222.0947265625, |
|
"logps/rejected": -2851.6064453125, |
|
"loss": 0.0877, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": 0.3648824095726013, |
|
"rewards/margins": 0.15842023491859436, |
|
"rewards/rejected": 0.20646218955516815, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.305047737536707e-07, |
|
"logits/chosen": 0.6519988179206848, |
|
"logits/rejected": 0.5568141341209412, |
|
"logps/chosen": -3336.877685546875, |
|
"logps/rejected": -3047.09326171875, |
|
"loss": 0.0764, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.40213853120803833, |
|
"rewards/margins": 0.1877773106098175, |
|
"rewards/rejected": 0.21436119079589844, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5.848888922025553e-07, |
|
"logits/chosen": 0.6370071172714233, |
|
"logits/rejected": 0.5316422581672668, |
|
"logps/chosen": -3368.276611328125, |
|
"logps/rejected": -2950.913330078125, |
|
"loss": 0.0863, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 0.41091403365135193, |
|
"rewards/margins": 0.1616295725107193, |
|
"rewards/rejected": 0.24928446114063263, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.407663566854008e-07, |
|
"logits/chosen": 0.6158221960067749, |
|
"logits/rejected": 0.5360954403877258, |
|
"logps/chosen": -3394.571533203125, |
|
"logps/rejected": -3010.104248046875, |
|
"loss": 0.0845, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": 0.4172899127006531, |
|
"rewards/margins": 0.18432098627090454, |
|
"rewards/rejected": 0.23296895623207092, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.981715726281666e-07, |
|
"logits/chosen": 0.6194905042648315, |
|
"logits/rejected": 0.5224823355674744, |
|
"logps/chosen": -3173.560546875, |
|
"logps/rejected": -2845.927734375, |
|
"loss": 0.0886, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": 0.3923606276512146, |
|
"rewards/margins": 0.1794053614139557, |
|
"rewards/rejected": 0.2129552811384201, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.5713775416217884e-07, |
|
"logits/chosen": 0.630397617816925, |
|
"logits/rejected": 0.5148654580116272, |
|
"logps/chosen": -3482.11181640625, |
|
"logps/rejected": -3051.90673828125, |
|
"loss": 0.0877, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 0.41265344619750977, |
|
"rewards/margins": 0.1835349202156067, |
|
"rewards/rejected": 0.22911854088306427, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.1769689822475147e-07, |
|
"logits/chosen": 0.6434907913208008, |
|
"logits/rejected": 0.548559308052063, |
|
"logps/chosen": -3349.734375, |
|
"logps/rejected": -3031.77978515625, |
|
"loss": 0.0941, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": 0.3825233578681946, |
|
"rewards/margins": 0.14600971341133118, |
|
"rewards/rejected": 0.2365136444568634, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.798797596089351e-07, |
|
"logits/chosen": 0.6427907943725586, |
|
"logits/rejected": 0.5501333475112915, |
|
"logps/chosen": -3478.50537109375, |
|
"logps/rejected": -3245.615234375, |
|
"loss": 0.0937, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 0.41868337988853455, |
|
"rewards/margins": 0.1864897906780243, |
|
"rewards/rejected": 0.23219358921051025, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.4371582698185636e-07, |
|
"logits/chosen": 0.6372936964035034, |
|
"logits/rejected": 0.5432217717170715, |
|
"logps/chosen": -3423.430908203125, |
|
"logps/rejected": -3087.765380859375, |
|
"loss": 0.092, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": 0.4114980101585388, |
|
"rewards/margins": 0.17217496037483215, |
|
"rewards/rejected": 0.23932309448719025, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.092332998903416e-07, |
|
"logits/chosen": 0.5996044874191284, |
|
"logits/rejected": 0.4986226558685303, |
|
"logps/chosen": -3295.41455078125, |
|
"logps/rejected": -2947.134765625, |
|
"loss": 0.0865, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.39540520310401917, |
|
"rewards/margins": 0.1856795847415924, |
|
"rewards/rejected": 0.20972561836242676, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.764590667717562e-07, |
|
"logits/chosen": 0.6375841498374939, |
|
"logits/rejected": 0.544662356376648, |
|
"logps/chosen": -3654.934326171875, |
|
"logps/rejected": -3230.848876953125, |
|
"loss": 0.067, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.3808661103248596, |
|
"rewards/margins": 0.15394194424152374, |
|
"rewards/rejected": 0.22692415118217468, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.454186839872158e-07, |
|
"logits/chosen": 0.6398609280586243, |
|
"logits/rejected": 0.5464919805526733, |
|
"logps/chosen": -3495.825439453125, |
|
"logps/rejected": -3124.951416015625, |
|
"loss": 0.081, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": 0.39376845955848694, |
|
"rewards/margins": 0.16370807588100433, |
|
"rewards/rejected": 0.23006033897399902, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.1613635589349756e-07, |
|
"logits/chosen": 0.6471344232559204, |
|
"logits/rejected": 0.5833622813224792, |
|
"logps/chosen": -3451.12890625, |
|
"logps/rejected": -3257.12353515625, |
|
"loss": 0.0869, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": 0.3840171694755554, |
|
"rewards/margins": 0.14787636697292328, |
|
"rewards/rejected": 0.23614077270030975, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.8863491596921745e-07, |
|
"logits/chosen": 0.6442585587501526, |
|
"logits/rejected": 0.5597777962684631, |
|
"logps/chosen": -3400.688720703125, |
|
"logps/rejected": -3029.727294921875, |
|
"loss": 0.0815, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": 0.3747108280658722, |
|
"rewards/margins": 0.14767669141292572, |
|
"rewards/rejected": 0.22703413665294647, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.629358090099639e-07, |
|
"logits/chosen": 0.6222900152206421, |
|
"logits/rejected": 0.5375300645828247, |
|
"logps/chosen": -3375.23583984375, |
|
"logps/rejected": -3024.98876953125, |
|
"loss": 0.088, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 0.37642619013786316, |
|
"rewards/margins": 0.1609790325164795, |
|
"rewards/rejected": 0.21544715762138367, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.3905907440629752e-07, |
|
"logits/chosen": 0.6418689489364624, |
|
"logits/rejected": 0.5823384523391724, |
|
"logps/chosen": -3317.762451171875, |
|
"logps/rejected": -3089.084716796875, |
|
"loss": 0.0788, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.3975849449634552, |
|
"rewards/margins": 0.17512866854667664, |
|
"rewards/rejected": 0.22245629131793976, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.1702333051763271e-07, |
|
"logits/chosen": 0.5996700525283813, |
|
"logits/rejected": 0.5386776924133301, |
|
"logps/chosen": -3202.969970703125, |
|
"logps/rejected": -2970.28173828125, |
|
"loss": 0.1025, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": 0.343997061252594, |
|
"rewards/margins": 0.16654863953590393, |
|
"rewards/rejected": 0.17744839191436768, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 9.684576015420277e-08, |
|
"logits/chosen": 0.6135612726211548, |
|
"logits/rejected": 0.5466006398200989, |
|
"logps/chosen": -3316.67578125, |
|
"logps/rejected": -3048.313720703125, |
|
"loss": 0.0874, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.3800659775733948, |
|
"rewards/margins": 0.17580585181713104, |
|
"rewards/rejected": 0.20426008105278015, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 7.854209717842231e-08, |
|
"logits/chosen": 0.6101081967353821, |
|
"logits/rejected": 0.5615028738975525, |
|
"logps/chosen": -3356.20849609375, |
|
"logps/rejected": -3210.05224609375, |
|
"loss": 0.0957, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": 0.39068323373794556, |
|
"rewards/margins": 0.14721594750881195, |
|
"rewards/rejected": 0.2434672862291336, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 6.212661423609184e-08, |
|
"logits/chosen": 0.6116551160812378, |
|
"logits/rejected": 0.47189703583717346, |
|
"logps/chosen": -3391.901611328125, |
|
"logps/rejected": -2864.9482421875, |
|
"loss": 0.0721, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": 0.3698135316371918, |
|
"rewards/margins": 0.1599336564540863, |
|
"rewards/rejected": 0.20987987518310547, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 4.761211162702117e-08, |
|
"logits/chosen": 0.6695979833602905, |
|
"logits/rejected": 0.5461623668670654, |
|
"logps/chosen": -3409.35302734375, |
|
"logps/rejected": -3067.67431640625, |
|
"loss": 0.0853, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.3886914849281311, |
|
"rewards/margins": 0.18535657227039337, |
|
"rewards/rejected": 0.20333492755889893, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.5009907323737826e-08, |
|
"logits/chosen": 0.6564788818359375, |
|
"logits/rejected": 0.5006071925163269, |
|
"logps/chosen": -3463.62255859375, |
|
"logps/rejected": -2909.28857421875, |
|
"loss": 0.0859, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": 0.37883251905441284, |
|
"rewards/margins": 0.15553273260593414, |
|
"rewards/rejected": 0.2232998162508011, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.4329828146074096e-08, |
|
"logits/chosen": 0.6350787878036499, |
|
"logits/rejected": 0.5539794564247131, |
|
"logps/chosen": -3336.46044921875, |
|
"logps/rejected": -3126.2548828125, |
|
"loss": 0.0896, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": 0.4052211344242096, |
|
"rewards/margins": 0.1808997541666031, |
|
"rewards/rejected": 0.2243214100599289, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.5580202098509078e-08, |
|
"logits/chosen": 0.6311666965484619, |
|
"logits/rejected": 0.599091649055481, |
|
"logps/chosen": -3485.178955078125, |
|
"logps/rejected": -3325.38134765625, |
|
"loss": 0.0873, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.39726322889328003, |
|
"rewards/margins": 0.1585725098848343, |
|
"rewards/rejected": 0.23869077861309052, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 8.767851876239075e-09, |
|
"logits/chosen": 0.6433550119400024, |
|
"logits/rejected": 0.5538659691810608, |
|
"logps/chosen": -3310.471923828125, |
|
"logps/rejected": -2977.670166015625, |
|
"loss": 0.0878, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": 0.384525865316391, |
|
"rewards/margins": 0.16290965676307678, |
|
"rewards/rejected": 0.2216162383556366, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.8980895450474455e-09, |
|
"logits/chosen": 0.6242701411247253, |
|
"logits/rejected": 0.5688928365707397, |
|
"logps/chosen": -3227.052490234375, |
|
"logps/rejected": -2975.194091796875, |
|
"loss": 0.0877, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": 0.3587878346443176, |
|
"rewards/margins": 0.16212721168994904, |
|
"rewards/rejected": 0.19666056334972382, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 9.747123991141193e-10, |
|
"logits/chosen": 0.655865490436554, |
|
"logits/rejected": 0.518544614315033, |
|
"logps/chosen": -3414.157470703125, |
|
"logps/rejected": -2845.206298828125, |
|
"loss": 0.0955, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": 0.37420162558555603, |
|
"rewards/margins": 0.18059025704860687, |
|
"rewards/rejected": 0.19361138343811035, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.0, |
|
"logits/chosen": 0.6276296973228455, |
|
"logits/rejected": 0.4904142916202545, |
|
"logps/chosen": -3474.27197265625, |
|
"logps/rejected": -2985.078857421875, |
|
"loss": 0.0931, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.36780911684036255, |
|
"rewards/margins": 0.17106689512729645, |
|
"rewards/rejected": 0.1967422217130661, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 1250, |
|
"total_flos": 0.0, |
|
"train_loss": 0.09343003117442131, |
|
"train_runtime": 11089.5357, |
|
"train_samples_per_second": 1.804, |
|
"train_steps_per_second": 0.113 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1250, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|