|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 300, |
|
"global_step": 3821, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.3054830287206268e-08, |
|
"logits/chosen": -2.7604618072509766, |
|
"logits/rejected": -2.686812162399292, |
|
"logps/chosen": -516.73779296875, |
|
"logps/rejected": -458.60467529296875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.3054830287206266e-07, |
|
"logits/chosen": -2.4880800247192383, |
|
"logits/rejected": -2.4930832386016846, |
|
"logps/chosen": -338.7858581542969, |
|
"logps/rejected": -404.5611572265625, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.4027777910232544, |
|
"rewards/chosen": 0.00024087271594908088, |
|
"rewards/margins": 0.0006852700607851148, |
|
"rewards/rejected": -0.00044439738849177957, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.610966057441253e-07, |
|
"logits/chosen": -2.475435733795166, |
|
"logits/rejected": -2.4197583198547363, |
|
"logps/chosen": -327.35919189453125, |
|
"logps/rejected": -443.83868408203125, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -3.7859845178900287e-05, |
|
"rewards/margins": 1.1227629329368938e-05, |
|
"rewards/rejected": -4.9087520892499015e-05, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.9164490861618804e-07, |
|
"logits/chosen": -2.5618598461151123, |
|
"logits/rejected": -2.5939595699310303, |
|
"logps/chosen": -348.56982421875, |
|
"logps/rejected": -416.8001403808594, |
|
"loss": 0.692, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.00023523520212620497, |
|
"rewards/margins": 0.0015929860528558493, |
|
"rewards/rejected": -0.0013577509671449661, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 5.221932114882506e-07, |
|
"logits/chosen": -2.5246434211730957, |
|
"logits/rejected": -2.4987733364105225, |
|
"logps/chosen": -376.7454528808594, |
|
"logps/rejected": -427.66729736328125, |
|
"loss": 0.692, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.0012155056465417147, |
|
"rewards/margins": 0.0037051704712212086, |
|
"rewards/rejected": -0.00492067588493228, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 6.527415143603135e-07, |
|
"logits/chosen": -2.4959325790405273, |
|
"logits/rejected": -2.4452037811279297, |
|
"logps/chosen": -290.552001953125, |
|
"logps/rejected": -383.3431701660156, |
|
"loss": 0.69, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.0027376641519367695, |
|
"rewards/margins": 0.005442826543003321, |
|
"rewards/rejected": -0.008180489763617516, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 7.832898172323761e-07, |
|
"logits/chosen": -2.4295783042907715, |
|
"logits/rejected": -2.3901355266571045, |
|
"logps/chosen": -377.3544006347656, |
|
"logps/rejected": -410.72991943359375, |
|
"loss": 0.6909, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.006509931292384863, |
|
"rewards/margins": 0.004867006093263626, |
|
"rewards/rejected": -0.011376937851309776, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 9.138381201044387e-07, |
|
"logits/chosen": -2.310166835784912, |
|
"logits/rejected": -2.279524803161621, |
|
"logps/chosen": -279.5904846191406, |
|
"logps/rejected": -370.0677795410156, |
|
"loss": 0.6889, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.007495372090488672, |
|
"rewards/margins": 0.007953675463795662, |
|
"rewards/rejected": -0.015449047088623047, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.0443864229765013e-06, |
|
"logits/chosen": -2.36948561668396, |
|
"logits/rejected": -2.3835222721099854, |
|
"logps/chosen": -342.13653564453125, |
|
"logps/rejected": -447.1036682128906, |
|
"loss": 0.6878, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.010767060332000256, |
|
"rewards/margins": 0.013605493120849133, |
|
"rewards/rejected": -0.024372553452849388, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.1749347258485642e-06, |
|
"logits/chosen": -2.472949266433716, |
|
"logits/rejected": -2.3902525901794434, |
|
"logps/chosen": -325.2154541015625, |
|
"logps/rejected": -401.51751708984375, |
|
"loss": 0.6844, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.012460511177778244, |
|
"rewards/margins": 0.01636466197669506, |
|
"rewards/rejected": -0.028825175017118454, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.305483028720627e-06, |
|
"logits/chosen": -2.5286309719085693, |
|
"logits/rejected": -2.5337207317352295, |
|
"logps/chosen": -365.7882080078125, |
|
"logps/rejected": -409.24261474609375, |
|
"loss": 0.6836, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.017355522140860558, |
|
"rewards/margins": 0.019160564988851547, |
|
"rewards/rejected": -0.036516088992357254, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.4360313315926894e-06, |
|
"logits/chosen": -2.485241651535034, |
|
"logits/rejected": -2.473548412322998, |
|
"logps/chosen": -337.1002197265625, |
|
"logps/rejected": -444.09832763671875, |
|
"loss": 0.6793, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.023945586755871773, |
|
"rewards/margins": 0.02508346177637577, |
|
"rewards/rejected": -0.049029044806957245, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.5665796344647521e-06, |
|
"logits/chosen": -2.437514066696167, |
|
"logits/rejected": -2.439671516418457, |
|
"logps/chosen": -343.27777099609375, |
|
"logps/rejected": -444.11639404296875, |
|
"loss": 0.6747, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.028381770476698875, |
|
"rewards/margins": 0.039804860949516296, |
|
"rewards/rejected": -0.06818662583827972, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.6971279373368146e-06, |
|
"logits/chosen": -2.4196202754974365, |
|
"logits/rejected": -2.329251527786255, |
|
"logps/chosen": -380.3607482910156, |
|
"logps/rejected": -435.93896484375, |
|
"loss": 0.6724, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.03702790290117264, |
|
"rewards/margins": 0.049176327884197235, |
|
"rewards/rejected": -0.08620421588420868, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.8276762402088774e-06, |
|
"logits/chosen": -2.436796188354492, |
|
"logits/rejected": -2.4008259773254395, |
|
"logps/chosen": -364.23687744140625, |
|
"logps/rejected": -456.6722106933594, |
|
"loss": 0.6653, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.04911624267697334, |
|
"rewards/margins": 0.06083670258522034, |
|
"rewards/rejected": -0.10995294898748398, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.9582245430809403e-06, |
|
"logits/chosen": -2.465663433074951, |
|
"logits/rejected": -2.4756152629852295, |
|
"logps/chosen": -344.59808349609375, |
|
"logps/rejected": -450.5936584472656, |
|
"loss": 0.6615, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -0.07244648039340973, |
|
"rewards/margins": 0.07592395693063736, |
|
"rewards/rejected": -0.1483704298734665, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.0887728459530026e-06, |
|
"logits/chosen": -2.365227222442627, |
|
"logits/rejected": -2.3483099937438965, |
|
"logps/chosen": -365.65509033203125, |
|
"logps/rejected": -459.9405822753906, |
|
"loss": 0.6494, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.088950976729393, |
|
"rewards/margins": 0.0978541225194931, |
|
"rewards/rejected": -0.1868050992488861, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.2193211488250653e-06, |
|
"logits/chosen": -2.5415358543395996, |
|
"logits/rejected": -2.4844305515289307, |
|
"logps/chosen": -417.45013427734375, |
|
"logps/rejected": -502.04888916015625, |
|
"loss": 0.6513, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.12002843618392944, |
|
"rewards/margins": 0.09641442447900772, |
|
"rewards/rejected": -0.21644285321235657, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.3498694516971284e-06, |
|
"logits/chosen": -2.373419761657715, |
|
"logits/rejected": -2.328564167022705, |
|
"logps/chosen": -308.98297119140625, |
|
"logps/rejected": -423.2518005371094, |
|
"loss": 0.6334, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.10061755031347275, |
|
"rewards/margins": 0.14423246681690216, |
|
"rewards/rejected": -0.2448500096797943, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.4804177545691907e-06, |
|
"logits/chosen": -2.4381210803985596, |
|
"logits/rejected": -2.4739632606506348, |
|
"logps/chosen": -366.0082092285156, |
|
"logps/rejected": -444.33233642578125, |
|
"loss": 0.6237, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.15544722974300385, |
|
"rewards/margins": 0.15099851787090302, |
|
"rewards/rejected": -0.30644577741622925, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.610966057441254e-06, |
|
"logits/chosen": -2.3546931743621826, |
|
"logits/rejected": -2.3614845275878906, |
|
"logps/chosen": -413.4657287597656, |
|
"logps/rejected": -487.9469299316406, |
|
"loss": 0.6049, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.21883895993232727, |
|
"rewards/margins": 0.22747401893138885, |
|
"rewards/rejected": -0.44631296396255493, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.741514360313316e-06, |
|
"logits/chosen": -2.3287081718444824, |
|
"logits/rejected": -2.2180962562561035, |
|
"logps/chosen": -397.4319763183594, |
|
"logps/rejected": -475.10614013671875, |
|
"loss": 0.6212, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.2562285363674164, |
|
"rewards/margins": 0.17265436053276062, |
|
"rewards/rejected": -0.4288829267024994, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.872062663185379e-06, |
|
"logits/chosen": -2.3937861919403076, |
|
"logits/rejected": -2.323655366897583, |
|
"logps/chosen": -393.7010498046875, |
|
"logps/rejected": -540.32958984375, |
|
"loss": 0.5708, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.31672877073287964, |
|
"rewards/margins": 0.32215583324432373, |
|
"rewards/rejected": -0.6388846039772034, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.0026109660574416e-06, |
|
"logits/chosen": -2.2199885845184326, |
|
"logits/rejected": -2.1488921642303467, |
|
"logps/chosen": -362.84710693359375, |
|
"logps/rejected": -462.92755126953125, |
|
"loss": 0.57, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -0.4027708172798157, |
|
"rewards/margins": 0.28824615478515625, |
|
"rewards/rejected": -0.6910169124603271, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.1331592689295043e-06, |
|
"logits/chosen": -2.1226742267608643, |
|
"logits/rejected": -2.128727674484253, |
|
"logps/chosen": -443.38226318359375, |
|
"logps/rejected": -544.7005615234375, |
|
"loss": 0.5704, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.5918055176734924, |
|
"rewards/margins": 0.33963826298713684, |
|
"rewards/rejected": -0.9314438104629517, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.263707571801567e-06, |
|
"logits/chosen": -2.2666714191436768, |
|
"logits/rejected": -2.192073106765747, |
|
"logps/chosen": -407.093505859375, |
|
"logps/rejected": -539.2747802734375, |
|
"loss": 0.53, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.6421019434928894, |
|
"rewards/margins": 0.4279584288597107, |
|
"rewards/rejected": -1.0700603723526, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.3942558746736293e-06, |
|
"logits/chosen": -2.1499412059783936, |
|
"logits/rejected": -2.0672426223754883, |
|
"logps/chosen": -427.7845764160156, |
|
"logps/rejected": -526.4454956054688, |
|
"loss": 0.5201, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.7411731481552124, |
|
"rewards/margins": 0.33993035554885864, |
|
"rewards/rejected": -1.0811034440994263, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.524804177545692e-06, |
|
"logits/chosen": -2.0998997688293457, |
|
"logits/rejected": -2.022254467010498, |
|
"logps/chosen": -459.3646545410156, |
|
"logps/rejected": -625.1860961914062, |
|
"loss": 0.5132, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.9105283617973328, |
|
"rewards/margins": 0.5623170137405396, |
|
"rewards/rejected": -1.472845435142517, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.6553524804177547e-06, |
|
"logits/chosen": -2.058182716369629, |
|
"logits/rejected": -1.990330457687378, |
|
"logps/chosen": -422.2322692871094, |
|
"logps/rejected": -509.48504638671875, |
|
"loss": 0.5447, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.8607247471809387, |
|
"rewards/margins": 0.5177969932556152, |
|
"rewards/rejected": -1.3785216808319092, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.7859007832898174e-06, |
|
"logits/chosen": -1.908735990524292, |
|
"logits/rejected": -1.7987762689590454, |
|
"logps/chosen": -532.5482788085938, |
|
"logps/rejected": -675.1096801757812, |
|
"loss": 0.4906, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.1365267038345337, |
|
"rewards/margins": 0.7340434789657593, |
|
"rewards/rejected": -1.870570421218872, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.9164490861618806e-06, |
|
"logits/chosen": -1.999489188194275, |
|
"logits/rejected": -1.8826462030410767, |
|
"logps/chosen": -510.0675354003906, |
|
"logps/rejected": -654.4682006835938, |
|
"loss": 0.4906, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.1803276538848877, |
|
"rewards/margins": 0.692645788192749, |
|
"rewards/rejected": -1.8729734420776367, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_logits/chosen": -1.7535996437072754, |
|
"eval_logits/rejected": -1.6233811378479004, |
|
"eval_logps/chosen": -473.8013610839844, |
|
"eval_logps/rejected": -603.2532958984375, |
|
"eval_loss": 0.5339562892913818, |
|
"eval_rewards/accuracies": 0.7310000061988831, |
|
"eval_rewards/chosen": -1.181405782699585, |
|
"eval_rewards/margins": 0.661102831363678, |
|
"eval_rewards/rejected": -1.8425085544586182, |
|
"eval_runtime": 1389.4464, |
|
"eval_samples_per_second": 1.439, |
|
"eval_steps_per_second": 0.36, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.046997389033943e-06, |
|
"logits/chosen": -1.8269140720367432, |
|
"logits/rejected": -1.690843939781189, |
|
"logps/chosen": -498.0406799316406, |
|
"logps/rejected": -633.5381469726562, |
|
"loss": 0.5566, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.2140557765960693, |
|
"rewards/margins": 0.597601592540741, |
|
"rewards/rejected": -1.811657190322876, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.177545691906005e-06, |
|
"logits/chosen": -1.767525315284729, |
|
"logits/rejected": -1.5722942352294922, |
|
"logps/chosen": -499.41839599609375, |
|
"logps/rejected": -658.9512329101562, |
|
"loss": 0.5073, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.2438334226608276, |
|
"rewards/margins": 0.7738653421401978, |
|
"rewards/rejected": -2.0176987648010254, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.308093994778068e-06, |
|
"logits/chosen": -1.8579235076904297, |
|
"logits/rejected": -1.7731454372406006, |
|
"logps/chosen": -443.87689208984375, |
|
"logps/rejected": -572.9539184570312, |
|
"loss": 0.563, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.1261751651763916, |
|
"rewards/margins": 0.6167136430740356, |
|
"rewards/rejected": -1.7428886890411377, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.4386422976501306e-06, |
|
"logits/chosen": -1.7547829151153564, |
|
"logits/rejected": -1.6789696216583252, |
|
"logps/chosen": -377.6453552246094, |
|
"logps/rejected": -584.998779296875, |
|
"loss": 0.4792, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.9642307162284851, |
|
"rewards/margins": 0.8439178466796875, |
|
"rewards/rejected": -1.8081486225128174, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.569190600522193e-06, |
|
"logits/chosen": -1.6814041137695312, |
|
"logits/rejected": -1.4768749475479126, |
|
"logps/chosen": -506.594482421875, |
|
"logps/rejected": -591.21142578125, |
|
"loss": 0.5415, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.2887766361236572, |
|
"rewards/margins": 0.5483629107475281, |
|
"rewards/rejected": -1.8371394872665405, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.699738903394257e-06, |
|
"logits/chosen": -1.6767174005508423, |
|
"logits/rejected": -1.5448577404022217, |
|
"logps/chosen": -424.47357177734375, |
|
"logps/rejected": -592.1474609375, |
|
"loss": 0.4852, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.9117814302444458, |
|
"rewards/margins": 0.8897954225540161, |
|
"rewards/rejected": -1.801576852798462, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.8302872062663196e-06, |
|
"logits/chosen": -1.7710018157958984, |
|
"logits/rejected": -1.6090304851531982, |
|
"logps/chosen": -496.84796142578125, |
|
"logps/rejected": -633.0993041992188, |
|
"loss": 0.5396, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.243511438369751, |
|
"rewards/margins": 0.702240526676178, |
|
"rewards/rejected": -1.9457519054412842, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.9608355091383814e-06, |
|
"logits/chosen": -1.4778488874435425, |
|
"logits/rejected": -1.3310272693634033, |
|
"logps/chosen": -402.7732849121094, |
|
"logps/rejected": -492.5380859375, |
|
"loss": 0.5017, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.9786807298660278, |
|
"rewards/margins": 0.623943567276001, |
|
"rewards/rejected": -1.6026241779327393, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.9999488562447675e-06, |
|
"logits/chosen": -1.40001380443573, |
|
"logits/rejected": -1.2227522134780884, |
|
"logps/chosen": -532.0943603515625, |
|
"logps/rejected": -664.1284790039062, |
|
"loss": 0.6002, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -1.533832311630249, |
|
"rewards/margins": 0.5622987747192383, |
|
"rewards/rejected": -2.0961310863494873, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.999698361256577e-06, |
|
"logits/chosen": -1.461018443107605, |
|
"logits/rejected": -1.1940746307373047, |
|
"logps/chosen": -509.679443359375, |
|
"logps/rejected": -636.8277587890625, |
|
"loss": 0.5079, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.1675481796264648, |
|
"rewards/margins": 0.7109408974647522, |
|
"rewards/rejected": -1.8784888982772827, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.999239142174581e-06, |
|
"logits/chosen": -1.3779613971710205, |
|
"logits/rejected": -1.1279867887496948, |
|
"logps/chosen": -455.8907165527344, |
|
"logps/rejected": -604.815185546875, |
|
"loss": 0.4635, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.2613023519515991, |
|
"rewards/margins": 0.855317234992981, |
|
"rewards/rejected": -2.11661958694458, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.99857123734344e-06, |
|
"logits/chosen": -1.3608977794647217, |
|
"logits/rejected": -0.901921272277832, |
|
"logps/chosen": -503.4825134277344, |
|
"logps/rejected": -629.4042358398438, |
|
"loss": 0.4903, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.280937671661377, |
|
"rewards/margins": 0.8498051762580872, |
|
"rewards/rejected": -2.1307430267333984, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.997694702533016e-06, |
|
"logits/chosen": -1.29677414894104, |
|
"logits/rejected": -1.0692778825759888, |
|
"logps/chosen": -548.71826171875, |
|
"logps/rejected": -724.1248779296875, |
|
"loss": 0.466, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.5205175876617432, |
|
"rewards/margins": 0.9235566854476929, |
|
"rewards/rejected": -2.4440743923187256, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.996609610933713e-06, |
|
"logits/chosen": -1.3901933431625366, |
|
"logits/rejected": -1.1403733491897583, |
|
"logps/chosen": -505.74139404296875, |
|
"logps/rejected": -676.5526123046875, |
|
"loss": 0.5209, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.319435715675354, |
|
"rewards/margins": 0.8349205255508423, |
|
"rewards/rejected": -2.1543564796447754, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.995316053150366e-06, |
|
"logits/chosen": -1.3645076751708984, |
|
"logits/rejected": -1.0972566604614258, |
|
"logps/chosen": -509.98016357421875, |
|
"logps/rejected": -682.1041259765625, |
|
"loss": 0.5307, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.315499186515808, |
|
"rewards/margins": 0.8478175401687622, |
|
"rewards/rejected": -2.163316488265991, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.9938141371946815e-06, |
|
"logits/chosen": -1.3621938228607178, |
|
"logits/rejected": -0.9139550924301147, |
|
"logps/chosen": -506.2652893066406, |
|
"logps/rejected": -685.5975341796875, |
|
"loss": 0.4647, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.2151587009429932, |
|
"rewards/margins": 0.93292635679245, |
|
"rewards/rejected": -2.148085117340088, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.992103988476206e-06, |
|
"logits/chosen": -1.175817847251892, |
|
"logits/rejected": -0.6028262376785278, |
|
"logps/chosen": -476.334716796875, |
|
"logps/rejected": -651.3397216796875, |
|
"loss": 0.4581, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.2715951204299927, |
|
"rewards/margins": 0.8561094999313354, |
|
"rewards/rejected": -2.127704381942749, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.990185749791866e-06, |
|
"logits/chosen": -0.8731945753097534, |
|
"logits/rejected": -0.5909063220024109, |
|
"logps/chosen": -551.8997802734375, |
|
"logps/rejected": -691.8196411132812, |
|
"loss": 0.4837, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.4918056726455688, |
|
"rewards/margins": 1.0931782722473145, |
|
"rewards/rejected": -2.5849835872650146, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.9880595813140395e-06, |
|
"logits/chosen": -0.8462217450141907, |
|
"logits/rejected": -0.4468405246734619, |
|
"logps/chosen": -490.5753479003906, |
|
"logps/rejected": -612.1764526367188, |
|
"loss": 0.4908, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.316733956336975, |
|
"rewards/margins": 0.7920147180557251, |
|
"rewards/rejected": -2.108748435974121, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.985725660577184e-06, |
|
"logits/chosen": -0.6493757963180542, |
|
"logits/rejected": -0.6190133690834045, |
|
"logps/chosen": -496.7198181152344, |
|
"logps/rejected": -738.5999145507812, |
|
"loss": 0.4759, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.4519720077514648, |
|
"rewards/margins": 1.00557541847229, |
|
"rewards/rejected": -2.457547664642334, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.983184182463009e-06, |
|
"logits/chosen": -0.6732873916625977, |
|
"logits/rejected": -0.4988276958465576, |
|
"logps/chosen": -510.90106201171875, |
|
"logps/rejected": -625.2664794921875, |
|
"loss": 0.4795, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.3072543144226074, |
|
"rewards/margins": 0.8223368525505066, |
|
"rewards/rejected": -2.1295909881591797, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.980435359184203e-06, |
|
"logits/chosen": -1.0567286014556885, |
|
"logits/rejected": -0.561193585395813, |
|
"logps/chosen": -564.832275390625, |
|
"logps/rejected": -718.8677368164062, |
|
"loss": 0.5194, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.5574848651885986, |
|
"rewards/margins": 0.938727855682373, |
|
"rewards/rejected": -2.496212959289551, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.9774794202667236e-06, |
|
"logits/chosen": -0.9730321168899536, |
|
"logits/rejected": -0.3585650324821472, |
|
"logps/chosen": -440.9288024902344, |
|
"logps/rejected": -633.592041015625, |
|
"loss": 0.3516, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -1.1980558633804321, |
|
"rewards/margins": 1.1983206272125244, |
|
"rewards/rejected": -2.396376132965088, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.974316612530615e-06, |
|
"logits/chosen": -0.5258805751800537, |
|
"logits/rejected": -0.13965483009815216, |
|
"logps/chosen": -500.12091064453125, |
|
"logps/rejected": -707.7438354492188, |
|
"loss": 0.4002, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.656359314918518, |
|
"rewards/margins": 1.194873571395874, |
|
"rewards/rejected": -2.8512330055236816, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.970947200069416e-06, |
|
"logits/chosen": -0.8342685699462891, |
|
"logits/rejected": 0.23774346709251404, |
|
"logps/chosen": -462.1539611816406, |
|
"logps/rejected": -631.550537109375, |
|
"loss": 0.5032, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.5009446144104004, |
|
"rewards/margins": 1.0611143112182617, |
|
"rewards/rejected": -2.562058925628662, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.967371464228096e-06, |
|
"logits/chosen": -0.44802480936050415, |
|
"logits/rejected": -0.0694938451051712, |
|
"logps/chosen": -492.4813537597656, |
|
"logps/rejected": -663.474365234375, |
|
"loss": 0.4973, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.5952883958816528, |
|
"rewards/margins": 1.0112850666046143, |
|
"rewards/rejected": -2.6065733432769775, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.963589703579569e-06, |
|
"logits/chosen": -0.5677907466888428, |
|
"logits/rejected": 0.08190581947565079, |
|
"logps/chosen": -503.6414489746094, |
|
"logps/rejected": -645.163330078125, |
|
"loss": 0.4839, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.4353834390640259, |
|
"rewards/margins": 0.922812819480896, |
|
"rewards/rejected": -2.358196496963501, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.9596022338997615e-06, |
|
"logits/chosen": -0.5412989854812622, |
|
"logits/rejected": 0.47167086601257324, |
|
"logps/chosen": -497.56103515625, |
|
"logps/rejected": -681.288818359375, |
|
"loss": 0.4859, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.6671890020370483, |
|
"rewards/margins": 1.1972028017044067, |
|
"rewards/rejected": -2.864391803741455, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.955409388141243e-06, |
|
"logits/chosen": -0.8228802680969238, |
|
"logits/rejected": 0.08221787214279175, |
|
"logps/chosen": -544.5670776367188, |
|
"logps/rejected": -733.0662231445312, |
|
"loss": 0.5465, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.6897475719451904, |
|
"rewards/margins": 1.3086159229278564, |
|
"rewards/rejected": -2.998363971710205, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.951011516405429e-06, |
|
"logits/chosen": -0.3682901859283447, |
|
"logits/rejected": 0.21380114555358887, |
|
"logps/chosen": -472.1390686035156, |
|
"logps/rejected": -650.9225463867188, |
|
"loss": 0.4794, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.3374059200286865, |
|
"rewards/margins": 0.9791024923324585, |
|
"rewards/rejected": -2.3165085315704346, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_logits/chosen": 0.4449966549873352, |
|
"eval_logits/rejected": 1.2460291385650635, |
|
"eval_logps/chosen": -494.4773254394531, |
|
"eval_logps/rejected": -666.9945068359375, |
|
"eval_loss": 0.4700670540332794, |
|
"eval_rewards/accuracies": 0.7699999809265137, |
|
"eval_rewards/chosen": -1.38816499710083, |
|
"eval_rewards/margins": 1.0917549133300781, |
|
"eval_rewards/rejected": -2.479919910430908, |
|
"eval_runtime": 1372.1838, |
|
"eval_samples_per_second": 1.458, |
|
"eval_steps_per_second": 0.364, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.946408985913344e-06, |
|
"logits/chosen": -1.1787288188934326, |
|
"logits/rejected": 0.032404374331235886, |
|
"logps/chosen": -540.3707275390625, |
|
"logps/rejected": -690.0636596679688, |
|
"loss": 0.4104, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.2028136253356934, |
|
"rewards/margins": 1.3079878091812134, |
|
"rewards/rejected": -2.510801315307617, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.941602180974958e-06, |
|
"logits/chosen": -0.5976434946060181, |
|
"logits/rejected": 0.23231466114521027, |
|
"logps/chosen": -513.5987548828125, |
|
"logps/rejected": -683.162353515625, |
|
"loss": 0.4517, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.503683090209961, |
|
"rewards/margins": 1.2033047676086426, |
|
"rewards/rejected": -2.7069880962371826, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.936591502957101e-06, |
|
"logits/chosen": -0.6624077558517456, |
|
"logits/rejected": 0.3608100712299347, |
|
"logps/chosen": -447.55230712890625, |
|
"logps/rejected": -660.681640625, |
|
"loss": 0.3989, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -1.3714964389801025, |
|
"rewards/margins": 1.2194106578826904, |
|
"rewards/rejected": -2.590907335281372, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.931377370249946e-06, |
|
"logits/chosen": -0.27748388051986694, |
|
"logits/rejected": 0.2729637026786804, |
|
"logps/chosen": -528.355224609375, |
|
"logps/rejected": -733.2789916992188, |
|
"loss": 0.4943, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.8842767477035522, |
|
"rewards/margins": 1.311034917831421, |
|
"rewards/rejected": -3.1953113079071045, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.925960218232073e-06, |
|
"logits/chosen": -0.4966405928134918, |
|
"logits/rejected": 0.048351895064115524, |
|
"logps/chosen": -491.5657653808594, |
|
"logps/rejected": -743.1395874023438, |
|
"loss": 0.4957, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.6836191415786743, |
|
"rewards/margins": 1.473381519317627, |
|
"rewards/rejected": -3.157000780105591, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.920340499234116e-06, |
|
"logits/chosen": -0.24024248123168945, |
|
"logits/rejected": -0.06672336161136627, |
|
"logps/chosen": -472.2447814941406, |
|
"logps/rejected": -663.1290893554688, |
|
"loss": 0.4382, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.4003952741622925, |
|
"rewards/margins": 1.0303130149841309, |
|
"rewards/rejected": -2.430708408355713, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.914518682500995e-06, |
|
"logits/chosen": -0.8933698534965515, |
|
"logits/rejected": -0.014977499842643738, |
|
"logps/chosen": -509.6695251464844, |
|
"logps/rejected": -674.0376586914062, |
|
"loss": 0.4931, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.4237163066864014, |
|
"rewards/margins": 1.024255394935608, |
|
"rewards/rejected": -2.4479715824127197, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.9084952541527315e-06, |
|
"logits/chosen": -0.5897430181503296, |
|
"logits/rejected": 0.24839851260185242, |
|
"logps/chosen": -533.2153930664062, |
|
"logps/rejected": -633.3540649414062, |
|
"loss": 0.4995, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.5195014476776123, |
|
"rewards/margins": 0.7836617827415466, |
|
"rewards/rejected": -2.3031630516052246, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.902270717143858e-06, |
|
"logits/chosen": -0.20460684597492218, |
|
"logits/rejected": 0.12876734137535095, |
|
"logps/chosen": -454.1747131347656, |
|
"logps/rejected": -706.5838623046875, |
|
"loss": 0.3716, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.4049168825149536, |
|
"rewards/margins": 1.3669251203536987, |
|
"rewards/rejected": -2.7718420028686523, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.895845591221427e-06, |
|
"logits/chosen": -0.5709416270256042, |
|
"logits/rejected": 0.21547503769397736, |
|
"logps/chosen": -543.8369750976562, |
|
"logps/rejected": -720.9224853515625, |
|
"loss": 0.4566, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.6587600708007812, |
|
"rewards/margins": 1.1786607503890991, |
|
"rewards/rejected": -2.83742094039917, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.8892204128816e-06, |
|
"logits/chosen": -0.27632415294647217, |
|
"logits/rejected": -0.12886568903923035, |
|
"logps/chosen": -468.2186584472656, |
|
"logps/rejected": -706.5438232421875, |
|
"loss": 0.4874, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.4443271160125732, |
|
"rewards/margins": 1.2774336338043213, |
|
"rewards/rejected": -2.7217605113983154, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.882395735324864e-06, |
|
"logits/chosen": -0.5171593427658081, |
|
"logits/rejected": 0.06492243707180023, |
|
"logps/chosen": -399.1227111816406, |
|
"logps/rejected": -646.8035888671875, |
|
"loss": 0.4334, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -1.1794382333755493, |
|
"rewards/margins": 1.383467435836792, |
|
"rewards/rejected": -2.5629055500030518, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.87537212840983e-06, |
|
"logits/chosen": -0.38013777136802673, |
|
"logits/rejected": 0.17461785674095154, |
|
"logps/chosen": -431.56939697265625, |
|
"logps/rejected": -727.7451171875, |
|
"loss": 0.403, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.2866822481155396, |
|
"rewards/margins": 1.4361357688903809, |
|
"rewards/rejected": -2.722817897796631, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.8681501786056545e-06, |
|
"logits/chosen": -0.4910075068473816, |
|
"logits/rejected": 0.1193656325340271, |
|
"logps/chosen": -524.5296630859375, |
|
"logps/rejected": -716.439697265625, |
|
"loss": 0.5003, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.4974342584609985, |
|
"rewards/margins": 1.3101706504821777, |
|
"rewards/rejected": -2.8076047897338867, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.860730488943068e-06, |
|
"logits/chosen": -0.4912436902523041, |
|
"logits/rejected": 0.18202224373817444, |
|
"logps/chosen": -527.1237182617188, |
|
"logps/rejected": -659.1048583984375, |
|
"loss": 0.4865, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.5281684398651123, |
|
"rewards/margins": 0.9601262211799622, |
|
"rewards/rejected": -2.4882943630218506, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.853113678964022e-06, |
|
"logits/chosen": -0.8603304624557495, |
|
"logits/rejected": -0.08993472903966904, |
|
"logps/chosen": -432.42279052734375, |
|
"logps/rejected": -642.409912109375, |
|
"loss": 0.3753, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -1.2163296937942505, |
|
"rewards/margins": 1.3121674060821533, |
|
"rewards/rejected": -2.5284969806671143, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.845300384669958e-06, |
|
"logits/chosen": 0.24725647270679474, |
|
"logits/rejected": 0.0013871907722204924, |
|
"logps/chosen": -527.3872680664062, |
|
"logps/rejected": -742.9652099609375, |
|
"loss": 0.4498, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.8072938919067383, |
|
"rewards/margins": 1.2479110956192017, |
|
"rewards/rejected": -3.0552048683166504, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.837291258468701e-06, |
|
"logits/chosen": -0.6004719734191895, |
|
"logits/rejected": 0.5962368249893188, |
|
"logps/chosen": -507.5689392089844, |
|
"logps/rejected": -673.4093627929688, |
|
"loss": 0.4537, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.6435458660125732, |
|
"rewards/margins": 1.2729610204696655, |
|
"rewards/rejected": -2.9165070056915283, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.829086969119984e-06, |
|
"logits/chosen": -0.4771009385585785, |
|
"logits/rejected": 0.03888826444745064, |
|
"logps/chosen": -551.6219482421875, |
|
"logps/rejected": -797.9405517578125, |
|
"loss": 0.439, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.8441495895385742, |
|
"rewards/margins": 1.5053937435150146, |
|
"rewards/rejected": -3.349543333053589, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.820688201679605e-06, |
|
"logits/chosen": -0.36556169390678406, |
|
"logits/rejected": 0.3322374224662781, |
|
"logps/chosen": -510.71661376953125, |
|
"logps/rejected": -704.6385498046875, |
|
"loss": 0.4447, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.6285368204116821, |
|
"rewards/margins": 1.4855858087539673, |
|
"rewards/rejected": -3.1141226291656494, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.8120956574422315e-06, |
|
"logits/chosen": -0.3884666860103607, |
|
"logits/rejected": -0.13175992667675018, |
|
"logps/chosen": -518.641357421875, |
|
"logps/rejected": -642.6422119140625, |
|
"loss": 0.5053, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.5760186910629272, |
|
"rewards/margins": 0.9459335207939148, |
|
"rewards/rejected": -2.5219521522521973, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.803310053882831e-06, |
|
"logits/chosen": -1.0650156736373901, |
|
"logits/rejected": 0.38095536828041077, |
|
"logps/chosen": -542.5806274414062, |
|
"logps/rejected": -690.2403564453125, |
|
"loss": 0.4374, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.6415122747421265, |
|
"rewards/margins": 1.045480728149414, |
|
"rewards/rejected": -2.686992883682251, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.794332124596775e-06, |
|
"logits/chosen": -0.6507046222686768, |
|
"logits/rejected": 0.048465847969055176, |
|
"logps/chosen": -479.3224182128906, |
|
"logps/rejected": -674.3448486328125, |
|
"loss": 0.401, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.3757288455963135, |
|
"rewards/margins": 1.2602720260620117, |
|
"rewards/rejected": -2.636000633239746, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.785162619238575e-06, |
|
"logits/chosen": -0.6413692235946655, |
|
"logits/rejected": -0.042513225227594376, |
|
"logps/chosen": -557.0155029296875, |
|
"logps/rejected": -792.69970703125, |
|
"loss": 0.4201, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.6387875080108643, |
|
"rewards/margins": 1.4760812520980835, |
|
"rewards/rejected": -3.1148688793182373, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.775802303459288e-06, |
|
"logits/chosen": -0.74284428358078, |
|
"logits/rejected": -0.1755208522081375, |
|
"logps/chosen": -588.0230712890625, |
|
"logps/rejected": -802.0186767578125, |
|
"loss": 0.4069, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -1.7381775379180908, |
|
"rewards/margins": 1.4629442691802979, |
|
"rewards/rejected": -3.2011218070983887, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.766251958842589e-06, |
|
"logits/chosen": -0.42949801683425903, |
|
"logits/rejected": -0.4735488295555115, |
|
"logps/chosen": -518.86572265625, |
|
"logps/rejected": -755.33154296875, |
|
"loss": 0.4045, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -1.6570861339569092, |
|
"rewards/margins": 1.4493482112884521, |
|
"rewards/rejected": -3.1064348220825195, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.7565123828395066e-06, |
|
"logits/chosen": -0.7870966196060181, |
|
"logits/rejected": 0.0325060598552227, |
|
"logps/chosen": -515.7349243164062, |
|
"logps/rejected": -726.00927734375, |
|
"loss": 0.5102, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -1.6367769241333008, |
|
"rewards/margins": 1.335331678390503, |
|
"rewards/rejected": -2.9721086025238037, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.746584388701831e-06, |
|
"logits/chosen": -0.7208787202835083, |
|
"logits/rejected": -0.034571003168821335, |
|
"logps/chosen": -488.56341552734375, |
|
"logps/rejected": -712.2532958984375, |
|
"loss": 0.5118, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.7884547710418701, |
|
"rewards/margins": 1.3315757513046265, |
|
"rewards/rejected": -3.120030164718628, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.736468805414218e-06, |
|
"logits/chosen": -0.7053539752960205, |
|
"logits/rejected": -0.015096127986907959, |
|
"logps/chosen": -474.1915588378906, |
|
"logps/rejected": -634.8294677734375, |
|
"loss": 0.4931, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.4765193462371826, |
|
"rewards/margins": 1.0172039270401, |
|
"rewards/rejected": -2.4937233924865723, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.7261664776249595e-06, |
|
"logits/chosen": -1.0484793186187744, |
|
"logits/rejected": -0.35419899225234985, |
|
"logps/chosen": -551.4525756835938, |
|
"logps/rejected": -721.5409545898438, |
|
"loss": 0.4519, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.49507737159729, |
|
"rewards/margins": 1.2039979696273804, |
|
"rewards/rejected": -2.699075222015381, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_logits/chosen": 0.19788537919521332, |
|
"eval_logits/rejected": 1.0802806615829468, |
|
"eval_logps/chosen": -498.0536804199219, |
|
"eval_logps/rejected": -686.2431030273438, |
|
"eval_loss": 0.456636905670166, |
|
"eval_rewards/accuracies": 0.7730000019073486, |
|
"eval_rewards/chosen": -1.42392897605896, |
|
"eval_rewards/margins": 1.248477816581726, |
|
"eval_rewards/rejected": -2.6724064350128174, |
|
"eval_runtime": 1384.9339, |
|
"eval_samples_per_second": 1.444, |
|
"eval_steps_per_second": 0.361, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.715678265575463e-06, |
|
"logits/chosen": -0.890539824962616, |
|
"logits/rejected": 0.22412686049938202, |
|
"logps/chosen": -490.0135803222656, |
|
"logps/rejected": -712.4631958007812, |
|
"loss": 0.3771, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -1.2612826824188232, |
|
"rewards/margins": 1.602736234664917, |
|
"rewards/rejected": -2.8640189170837402, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.705005045028415e-06, |
|
"logits/chosen": -0.8860180974006653, |
|
"logits/rejected": 0.03514351695775986, |
|
"logps/chosen": -594.3389282226562, |
|
"logps/rejected": -782.8358154296875, |
|
"loss": 0.4195, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.7765058279037476, |
|
"rewards/margins": 1.2403103113174438, |
|
"rewards/rejected": -3.0168161392211914, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.694147707194659e-06, |
|
"logits/chosen": -0.5554194450378418, |
|
"logits/rejected": 0.016318077221512794, |
|
"logps/chosen": -585.2276000976562, |
|
"logps/rejected": -805.9215087890625, |
|
"loss": 0.3769, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.8951377868652344, |
|
"rewards/margins": 1.719668984413147, |
|
"rewards/rejected": -3.61480712890625, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.683107158658782e-06, |
|
"logits/chosen": -0.7583510875701904, |
|
"logits/rejected": 0.25492575764656067, |
|
"logps/chosen": -562.8099975585938, |
|
"logps/rejected": -745.3627319335938, |
|
"loss": 0.5012, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.83584725856781, |
|
"rewards/margins": 1.2255539894104004, |
|
"rewards/rejected": -3.061401128768921, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.671884321303407e-06, |
|
"logits/chosen": -1.1498582363128662, |
|
"logits/rejected": 0.7587814331054688, |
|
"logps/chosen": -591.7152099609375, |
|
"logps/rejected": -751.0265502929688, |
|
"loss": 0.4275, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.7667793035507202, |
|
"rewards/margins": 1.4644314050674438, |
|
"rewards/rejected": -3.231210231781006, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.660480132232224e-06, |
|
"logits/chosen": -0.7957364320755005, |
|
"logits/rejected": -0.17932990193367004, |
|
"logps/chosen": -428.00958251953125, |
|
"logps/rejected": -722.1585083007812, |
|
"loss": 0.3945, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.4679275751113892, |
|
"rewards/margins": 1.536849021911621, |
|
"rewards/rejected": -3.0047767162323, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.6488955436917414e-06, |
|
"logits/chosen": -0.7032185196876526, |
|
"logits/rejected": 0.244097039103508, |
|
"logps/chosen": -511.335693359375, |
|
"logps/rejected": -830.0217895507812, |
|
"loss": 0.4831, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.6884835958480835, |
|
"rewards/margins": 1.69620680809021, |
|
"rewards/rejected": -3.384690761566162, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.6371315229917644e-06, |
|
"logits/chosen": -0.9565087556838989, |
|
"logits/rejected": -0.6389614343643188, |
|
"logps/chosen": -467.09814453125, |
|
"logps/rejected": -740.5447998046875, |
|
"loss": 0.4217, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -1.331072449684143, |
|
"rewards/margins": 1.5709624290466309, |
|
"rewards/rejected": -2.9020345211029053, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.625189052424638e-06, |
|
"logits/chosen": -0.9426406621932983, |
|
"logits/rejected": -0.6099969148635864, |
|
"logps/chosen": -464.8932189941406, |
|
"logps/rejected": -654.5234985351562, |
|
"loss": 0.457, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -1.2798701524734497, |
|
"rewards/margins": 1.0147043466567993, |
|
"rewards/rejected": -2.29457426071167, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.613069129183218e-06, |
|
"logits/chosen": -0.9859519004821777, |
|
"logits/rejected": -0.3881329894065857, |
|
"logps/chosen": -416.37750244140625, |
|
"logps/rejected": -661.3399658203125, |
|
"loss": 0.4259, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -1.1251696348190308, |
|
"rewards/margins": 1.4806818962097168, |
|
"rewards/rejected": -2.605851650238037, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.600772765277607e-06, |
|
"logits/chosen": -0.8805161714553833, |
|
"logits/rejected": -0.5182097554206848, |
|
"logps/chosen": -533.8609008789062, |
|
"logps/rejected": -738.94921875, |
|
"loss": 0.4221, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.4609010219573975, |
|
"rewards/margins": 1.319215178489685, |
|
"rewards/rejected": -2.780116319656372, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.588300987450652e-06, |
|
"logits/chosen": -0.7461687922477722, |
|
"logits/rejected": -0.11924894899129868, |
|
"logps/chosen": -504.3556213378906, |
|
"logps/rejected": -709.5269165039062, |
|
"loss": 0.4198, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.595538854598999, |
|
"rewards/margins": 1.395408272743225, |
|
"rewards/rejected": -2.9909470081329346, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.5756548370922136e-06, |
|
"logits/chosen": -0.9196340441703796, |
|
"logits/rejected": 0.15583333373069763, |
|
"logps/chosen": -467.1847229003906, |
|
"logps/rejected": -698.4019165039062, |
|
"loss": 0.471, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -1.4349987506866455, |
|
"rewards/margins": 1.6199867725372314, |
|
"rewards/rejected": -3.054985523223877, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.562835370152206e-06, |
|
"logits/chosen": -0.9880784153938293, |
|
"logits/rejected": -0.4833584427833557, |
|
"logps/chosen": -474.13946533203125, |
|
"logps/rejected": -677.9033813476562, |
|
"loss": 0.4454, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -1.532492756843567, |
|
"rewards/margins": 1.2476747035980225, |
|
"rewards/rejected": -2.7801673412323, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.54984365705243e-06, |
|
"logits/chosen": -0.9044283628463745, |
|
"logits/rejected": 0.2612631916999817, |
|
"logps/chosen": -487.19573974609375, |
|
"logps/rejected": -666.8839721679688, |
|
"loss": 0.4112, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.5281569957733154, |
|
"rewards/margins": 1.2005985975265503, |
|
"rewards/rejected": -2.728755474090576, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.536680782597191e-06, |
|
"logits/chosen": -1.1122404336929321, |
|
"logits/rejected": 0.34001001715660095, |
|
"logps/chosen": -424.924560546875, |
|
"logps/rejected": -637.4059448242188, |
|
"loss": 0.4054, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.2176024913787842, |
|
"rewards/margins": 1.382258653640747, |
|
"rewards/rejected": -2.5998611450195312, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.523347845882718e-06, |
|
"logits/chosen": -1.2397373914718628, |
|
"logits/rejected": -0.0026629925705492496, |
|
"logps/chosen": -473.2481994628906, |
|
"logps/rejected": -670.2132568359375, |
|
"loss": 0.472, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -1.3779747486114502, |
|
"rewards/margins": 1.2943679094314575, |
|
"rewards/rejected": -2.672342300415039, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.50984596020539e-06, |
|
"logits/chosen": -1.0471652746200562, |
|
"logits/rejected": -0.250629723072052, |
|
"logps/chosen": -507.221435546875, |
|
"logps/rejected": -730.1007080078125, |
|
"loss": 0.4251, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.5079420804977417, |
|
"rewards/margins": 1.3903275728225708, |
|
"rewards/rejected": -2.8982696533203125, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.4961762529687745e-06, |
|
"logits/chosen": -0.47195902466773987, |
|
"logits/rejected": -0.18957489728927612, |
|
"logps/chosen": -446.6513671875, |
|
"logps/rejected": -749.2055053710938, |
|
"loss": 0.3931, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -1.421588659286499, |
|
"rewards/margins": 1.843101143836975, |
|
"rewards/rejected": -3.2646899223327637, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.482339865589492e-06, |
|
"logits/chosen": -0.6734046936035156, |
|
"logits/rejected": 0.2455734759569168, |
|
"logps/chosen": -536.9996337890625, |
|
"logps/rejected": -779.7747802734375, |
|
"loss": 0.3976, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -1.8160717487335205, |
|
"rewards/margins": 1.6105674505233765, |
|
"rewards/rejected": -3.4266390800476074, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.468337953401909e-06, |
|
"logits/chosen": -0.7846983075141907, |
|
"logits/rejected": 0.3714667558670044, |
|
"logps/chosen": -643.3924560546875, |
|
"logps/rejected": -889.9669189453125, |
|
"loss": 0.4624, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.112175703048706, |
|
"rewards/margins": 1.6929798126220703, |
|
"rewards/rejected": -3.8051555156707764, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.45417168556166e-06, |
|
"logits/chosen": -0.887243390083313, |
|
"logits/rejected": 0.19038431346416473, |
|
"logps/chosen": -501.67987060546875, |
|
"logps/rejected": -794.1953735351562, |
|
"loss": 0.4672, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.6879377365112305, |
|
"rewards/margins": 1.825568437576294, |
|
"rewards/rejected": -3.5135064125061035, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.439842244948036e-06, |
|
"logits/chosen": -1.2177503108978271, |
|
"logits/rejected": -0.813552975654602, |
|
"logps/chosen": -524.879150390625, |
|
"logps/rejected": -667.7706298828125, |
|
"loss": 0.4243, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.5180702209472656, |
|
"rewards/margins": 1.0134727954864502, |
|
"rewards/rejected": -2.531543016433716, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.425350828065204e-06, |
|
"logits/chosen": -1.0455009937286377, |
|
"logits/rejected": 0.2684328556060791, |
|
"logps/chosen": -534.6132202148438, |
|
"logps/rejected": -721.2943115234375, |
|
"loss": 0.3871, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.7661949396133423, |
|
"rewards/margins": 1.332626223564148, |
|
"rewards/rejected": -3.0988211631774902, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.410698644942303e-06, |
|
"logits/chosen": -0.8786664009094238, |
|
"logits/rejected": 0.14112402498722076, |
|
"logps/chosen": -458.5977478027344, |
|
"logps/rejected": -710.25927734375, |
|
"loss": 0.3959, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.411984920501709, |
|
"rewards/margins": 1.6877896785736084, |
|
"rewards/rejected": -3.0997745990753174, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.395886919032406e-06, |
|
"logits/chosen": -0.8496414422988892, |
|
"logits/rejected": -0.006237986497581005, |
|
"logps/chosen": -508.9517517089844, |
|
"logps/rejected": -735.8714599609375, |
|
"loss": 0.4109, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -1.5958458185195923, |
|
"rewards/margins": 1.4991271495819092, |
|
"rewards/rejected": -3.094973087310791, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.380916887110366e-06, |
|
"logits/chosen": -0.7842418551445007, |
|
"logits/rejected": -0.08200596272945404, |
|
"logps/chosen": -496.2059020996094, |
|
"logps/rejected": -729.1738891601562, |
|
"loss": 0.3737, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -1.5399061441421509, |
|
"rewards/margins": 1.5425506830215454, |
|
"rewards/rejected": -3.0824568271636963, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.365789799169539e-06, |
|
"logits/chosen": -1.2651920318603516, |
|
"logits/rejected": 0.12391755729913712, |
|
"logps/chosen": -512.1771850585938, |
|
"logps/rejected": -717.8344116210938, |
|
"loss": 0.4228, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.5166397094726562, |
|
"rewards/margins": 1.4622437953948975, |
|
"rewards/rejected": -2.9788835048675537, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.350506918317416e-06, |
|
"logits/chosen": -0.9895895719528198, |
|
"logits/rejected": -0.03255582973361015, |
|
"logps/chosen": -519.2945556640625, |
|
"logps/rejected": -687.9844970703125, |
|
"loss": 0.4106, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -1.6044280529022217, |
|
"rewards/margins": 1.4460744857788086, |
|
"rewards/rejected": -3.050502300262451, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.335069520670149e-06, |
|
"logits/chosen": -0.8492997884750366, |
|
"logits/rejected": -0.5858234167098999, |
|
"logps/chosen": -516.3770141601562, |
|
"logps/rejected": -707.2117309570312, |
|
"loss": 0.4034, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.7651269435882568, |
|
"rewards/margins": 1.3661835193634033, |
|
"rewards/rejected": -3.1313109397888184, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_logits/chosen": 0.7243556976318359, |
|
"eval_logits/rejected": 1.7155570983886719, |
|
"eval_logps/chosen": -545.945068359375, |
|
"eval_logps/rejected": -770.7060546875, |
|
"eval_loss": 0.44869744777679443, |
|
"eval_rewards/accuracies": 0.7870000004768372, |
|
"eval_rewards/chosen": -1.9028427600860596, |
|
"eval_rewards/margins": 1.614193320274353, |
|
"eval_rewards/rejected": -3.517036199569702, |
|
"eval_runtime": 1383.7156, |
|
"eval_samples_per_second": 1.445, |
|
"eval_steps_per_second": 0.361, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.319478895246e-06, |
|
"logits/chosen": -0.6312126517295837, |
|
"logits/rejected": 0.010389542207121849, |
|
"logps/chosen": -485.5673828125, |
|
"logps/rejected": -724.3965454101562, |
|
"loss": 0.4596, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.7340030670166016, |
|
"rewards/margins": 1.5444698333740234, |
|
"rewards/rejected": -3.278472900390625, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.303736343857704e-06, |
|
"logits/chosen": -0.5306238532066345, |
|
"logits/rejected": 0.09534727036952972, |
|
"logps/chosen": -501.3701171875, |
|
"logps/rejected": -773.635498046875, |
|
"loss": 0.4065, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.6401008367538452, |
|
"rewards/margins": 1.739175796508789, |
|
"rewards/rejected": -3.379276752471924, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.287843181003772e-06, |
|
"logits/chosen": -0.7599018812179565, |
|
"logits/rejected": 0.544152557849884, |
|
"logps/chosen": -492.7386779785156, |
|
"logps/rejected": -739.1934814453125, |
|
"loss": 0.3467, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.591827392578125, |
|
"rewards/margins": 1.6447445154190063, |
|
"rewards/rejected": -3.236571788787842, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.27180073375873e-06, |
|
"logits/chosen": -0.7321812510490417, |
|
"logits/rejected": -0.04632633179426193, |
|
"logps/chosen": -503.90655517578125, |
|
"logps/rejected": -751.9849243164062, |
|
"loss": 0.4352, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.7118949890136719, |
|
"rewards/margins": 1.5225197076797485, |
|
"rewards/rejected": -3.234414577484131, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.255610341662304e-06, |
|
"logits/chosen": -0.8730325698852539, |
|
"logits/rejected": -0.1791534423828125, |
|
"logps/chosen": -524.244140625, |
|
"logps/rejected": -752.1405029296875, |
|
"loss": 0.3827, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.7566620111465454, |
|
"rewards/margins": 1.7185337543487549, |
|
"rewards/rejected": -3.4751956462860107, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.2392733566075764e-06, |
|
"logits/chosen": -0.8349526524543762, |
|
"logits/rejected": 0.03989090770483017, |
|
"logps/chosen": -555.2600708007812, |
|
"logps/rejected": -799.1808471679688, |
|
"loss": 0.4153, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.8972892761230469, |
|
"rewards/margins": 1.5951424837112427, |
|
"rewards/rejected": -3.492431640625, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.2227911427280975e-06, |
|
"logits/chosen": -1.2052414417266846, |
|
"logits/rejected": 0.40989890694618225, |
|
"logps/chosen": -535.0908203125, |
|
"logps/rejected": -766.23974609375, |
|
"loss": 0.4851, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.7357299327850342, |
|
"rewards/margins": 1.7371448278427124, |
|
"rewards/rejected": -3.472874879837036, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.206165076283983e-06, |
|
"logits/chosen": -1.3161356449127197, |
|
"logits/rejected": -0.31805121898651123, |
|
"logps/chosen": -562.9700927734375, |
|
"logps/rejected": -699.4861450195312, |
|
"loss": 0.4345, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.6805912256240845, |
|
"rewards/margins": 1.0278584957122803, |
|
"rewards/rejected": -2.708449602127075, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.189396545546995e-06, |
|
"logits/chosen": -1.2732408046722412, |
|
"logits/rejected": -0.36048611998558044, |
|
"logps/chosen": -463.95849609375, |
|
"logps/rejected": -677.6444091796875, |
|
"loss": 0.4661, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.385292649269104, |
|
"rewards/margins": 1.3424113988876343, |
|
"rewards/rejected": -2.7277040481567383, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.172486950684627e-06, |
|
"logits/chosen": -1.3132286071777344, |
|
"logits/rejected": 0.031231578439474106, |
|
"logps/chosen": -535.4584350585938, |
|
"logps/rejected": -707.3728637695312, |
|
"loss": 0.439, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.6812185049057007, |
|
"rewards/margins": 1.3230525255203247, |
|
"rewards/rejected": -3.0042712688446045, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.155437703643182e-06, |
|
"logits/chosen": -1.2260167598724365, |
|
"logits/rejected": -0.5245649814605713, |
|
"logps/chosen": -462.94403076171875, |
|
"logps/rejected": -694.3932495117188, |
|
"loss": 0.4018, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.4110779762268066, |
|
"rewards/margins": 1.4465053081512451, |
|
"rewards/rejected": -2.8575832843780518, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.138250228029882e-06, |
|
"logits/chosen": -1.0029791593551636, |
|
"logits/rejected": -0.45876234769821167, |
|
"logps/chosen": -471.4588928222656, |
|
"logps/rejected": -711.7996215820312, |
|
"loss": 0.3972, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.6702163219451904, |
|
"rewards/margins": 1.6204955577850342, |
|
"rewards/rejected": -3.2907118797302246, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.120925958993994e-06, |
|
"logits/chosen": -0.5831348299980164, |
|
"logits/rejected": -0.2067866027355194, |
|
"logps/chosen": -476.1014099121094, |
|
"logps/rejected": -722.8709106445312, |
|
"loss": 0.4493, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.6658756732940674, |
|
"rewards/margins": 1.6795238256454468, |
|
"rewards/rejected": -3.3453993797302246, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.103466343106999e-06, |
|
"logits/chosen": -1.077728033065796, |
|
"logits/rejected": 0.1974649727344513, |
|
"logps/chosen": -636.1361083984375, |
|
"logps/rejected": -829.7213134765625, |
|
"loss": 0.5001, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.2163033485412598, |
|
"rewards/margins": 1.5475972890853882, |
|
"rewards/rejected": -3.7639007568359375, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.085872838241797e-06, |
|
"logits/chosen": -0.8451377749443054, |
|
"logits/rejected": -0.4800887703895569, |
|
"logps/chosen": -523.9595947265625, |
|
"logps/rejected": -813.6463012695312, |
|
"loss": 0.4193, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.937745451927185, |
|
"rewards/margins": 1.8354793787002563, |
|
"rewards/rejected": -3.7732245922088623, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.06814691345098e-06, |
|
"logits/chosen": -0.8073797225952148, |
|
"logits/rejected": -0.67634516954422, |
|
"logps/chosen": -499.55364990234375, |
|
"logps/rejected": -725.6375732421875, |
|
"loss": 0.5085, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.8341518640518188, |
|
"rewards/margins": 1.3492587804794312, |
|
"rewards/rejected": -3.18341064453125, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.050290048844171e-06, |
|
"logits/chosen": -1.3967281579971313, |
|
"logits/rejected": -0.1124715581536293, |
|
"logps/chosen": -586.6700439453125, |
|
"logps/rejected": -785.9407958984375, |
|
"loss": 0.4425, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.6672008037567139, |
|
"rewards/margins": 1.4039936065673828, |
|
"rewards/rejected": -3.0711944103240967, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.032303735464422e-06, |
|
"logits/chosen": -1.3250014781951904, |
|
"logits/rejected": -0.28036853671073914, |
|
"logps/chosen": -523.9191284179688, |
|
"logps/rejected": -757.8228759765625, |
|
"loss": 0.3506, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -1.4709548950195312, |
|
"rewards/margins": 1.7138077020645142, |
|
"rewards/rejected": -3.184762477874756, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.014189475163727e-06, |
|
"logits/chosen": -0.9671980142593384, |
|
"logits/rejected": -0.292216956615448, |
|
"logps/chosen": -559.5821533203125, |
|
"logps/rejected": -784.157958984375, |
|
"loss": 0.3887, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -1.7415062189102173, |
|
"rewards/margins": 1.629124641418457, |
|
"rewards/rejected": -3.370630979537964, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.995948780477605e-06, |
|
"logits/chosen": -1.1137468814849854, |
|
"logits/rejected": -0.3016485273838043, |
|
"logps/chosen": -586.1868286132812, |
|
"logps/rejected": -790.1680908203125, |
|
"loss": 0.4715, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.8678308725357056, |
|
"rewards/margins": 1.3062608242034912, |
|
"rewards/rejected": -3.1740918159484863, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.977583174498816e-06, |
|
"logits/chosen": -0.9699400067329407, |
|
"logits/rejected": -0.7510320544242859, |
|
"logps/chosen": -545.1305541992188, |
|
"logps/rejected": -793.4750366210938, |
|
"loss": 0.4399, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.7863260507583618, |
|
"rewards/margins": 1.481147050857544, |
|
"rewards/rejected": -3.267472743988037, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.959094190750172e-06, |
|
"logits/chosen": -1.3356729745864868, |
|
"logits/rejected": -0.6711053848266602, |
|
"logps/chosen": -574.8829956054688, |
|
"logps/rejected": -773.9825439453125, |
|
"loss": 0.3984, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.7231754064559937, |
|
"rewards/margins": 1.5199847221374512, |
|
"rewards/rejected": -3.2431602478027344, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.9404833730564975e-06, |
|
"logits/chosen": -1.2241183519363403, |
|
"logits/rejected": -0.7589890956878662, |
|
"logps/chosen": -524.2040405273438, |
|
"logps/rejected": -684.8192138671875, |
|
"loss": 0.4751, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.550222396850586, |
|
"rewards/margins": 1.2541849613189697, |
|
"rewards/rejected": -2.8044073581695557, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.921752275415712e-06, |
|
"logits/chosen": -1.2616220712661743, |
|
"logits/rejected": -0.6698473691940308, |
|
"logps/chosen": -494.5272521972656, |
|
"logps/rejected": -719.5067749023438, |
|
"loss": 0.407, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.5580322742462158, |
|
"rewards/margins": 1.4134986400604248, |
|
"rewards/rejected": -2.9715309143066406, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.902902461869079e-06, |
|
"logits/chosen": -0.9971591234207153, |
|
"logits/rejected": -0.4130152761936188, |
|
"logps/chosen": -407.35736083984375, |
|
"logps/rejected": -699.933837890625, |
|
"loss": 0.3878, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.3271167278289795, |
|
"rewards/margins": 1.8487539291381836, |
|
"rewards/rejected": -3.175870895385742, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.883935506370605e-06, |
|
"logits/chosen": -1.3185368776321411, |
|
"logits/rejected": -0.5751763582229614, |
|
"logps/chosen": -538.9137573242188, |
|
"logps/rejected": -727.714599609375, |
|
"loss": 0.3787, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.7377328872680664, |
|
"rewards/margins": 1.1966904401779175, |
|
"rewards/rejected": -2.9344232082366943, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.864852992655617e-06, |
|
"logits/chosen": -1.2166504859924316, |
|
"logits/rejected": -0.45289698243141174, |
|
"logps/chosen": -516.9674682617188, |
|
"logps/rejected": -733.4182739257812, |
|
"loss": 0.3926, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.6966655254364014, |
|
"rewards/margins": 1.5359889268875122, |
|
"rewards/rejected": -3.232654571533203, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.845656514108516e-06, |
|
"logits/chosen": -1.2518984079360962, |
|
"logits/rejected": -0.23703515529632568, |
|
"logps/chosen": -571.0328979492188, |
|
"logps/rejected": -839.0745239257812, |
|
"loss": 0.409, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.9428062438964844, |
|
"rewards/margins": 1.905398964881897, |
|
"rewards/rejected": -3.84820556640625, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.826347673629738e-06, |
|
"logits/chosen": -1.241381049156189, |
|
"logits/rejected": -0.424204021692276, |
|
"logps/chosen": -517.6029052734375, |
|
"logps/rejected": -811.5399169921875, |
|
"loss": 0.4235, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.9763988256454468, |
|
"rewards/margins": 1.8420568704605103, |
|
"rewards/rejected": -3.818455457687378, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.8069280835019062e-06, |
|
"logits/chosen": -1.3552948236465454, |
|
"logits/rejected": -0.24583733081817627, |
|
"logps/chosen": -539.5227661132812, |
|
"logps/rejected": -833.4625244140625, |
|
"loss": 0.4193, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.008434295654297, |
|
"rewards/margins": 2.0153846740722656, |
|
"rewards/rejected": -4.0238189697265625, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_logits/chosen": 0.0019010701216757298, |
|
"eval_logits/rejected": 0.9998253583908081, |
|
"eval_logps/chosen": -544.3021240234375, |
|
"eval_logps/rejected": -767.47119140625, |
|
"eval_loss": 0.44199585914611816, |
|
"eval_rewards/accuracies": 0.7839999794960022, |
|
"eval_rewards/chosen": -1.8864127397537231, |
|
"eval_rewards/margins": 1.5982747077941895, |
|
"eval_rewards/rejected": -3.484687566757202, |
|
"eval_runtime": 1381.698, |
|
"eval_samples_per_second": 1.447, |
|
"eval_steps_per_second": 0.362, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.7873993652552077e-06, |
|
"logits/chosen": -1.0648901462554932, |
|
"logits/rejected": -0.4144531786441803, |
|
"logps/chosen": -475.7496643066406, |
|
"logps/rejected": -861.4631958007812, |
|
"loss": 0.3316, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -1.5341570377349854, |
|
"rewards/margins": 2.4849419593811035, |
|
"rewards/rejected": -4.019099235534668, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.7677631495319953e-06, |
|
"logits/chosen": -1.0717235803604126, |
|
"logits/rejected": -0.44138726592063904, |
|
"logps/chosen": -545.8651733398438, |
|
"logps/rejected": -788.2182006835938, |
|
"loss": 0.4331, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.6811964511871338, |
|
"rewards/margins": 1.6335046291351318, |
|
"rewards/rejected": -3.3147010803222656, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.748021075950633e-06, |
|
"logits/chosen": -1.231894850730896, |
|
"logits/rejected": -0.649477481842041, |
|
"logps/chosen": -472.5684509277344, |
|
"logps/rejected": -648.9671630859375, |
|
"loss": 0.4403, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.6376155614852905, |
|
"rewards/margins": 1.3435395956039429, |
|
"rewards/rejected": -2.9811549186706543, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.7281747929685824e-06, |
|
"logits/chosen": -0.8811131715774536, |
|
"logits/rejected": -0.19349880516529083, |
|
"logps/chosen": -556.5198364257812, |
|
"logps/rejected": -868.0969848632812, |
|
"loss": 0.3711, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -1.9225609302520752, |
|
"rewards/margins": 2.121070384979248, |
|
"rewards/rejected": -4.043631076812744, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.7082259577447604e-06, |
|
"logits/chosen": -1.3660091161727905, |
|
"logits/rejected": -0.37334832549095154, |
|
"logps/chosen": -508.5003356933594, |
|
"logps/rejected": -700.1402587890625, |
|
"loss": 0.4505, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.6233818531036377, |
|
"rewards/margins": 1.5405502319335938, |
|
"rewards/rejected": -3.1639320850372314, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.6881762360011688e-06, |
|
"logits/chosen": -0.9465176463127136, |
|
"logits/rejected": -0.1583428531885147, |
|
"logps/chosen": -568.060302734375, |
|
"logps/rejected": -805.0638427734375, |
|
"loss": 0.3998, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -2.06211256980896, |
|
"rewards/margins": 1.7412744760513306, |
|
"rewards/rejected": -3.80338716506958, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.668027301883802e-06, |
|
"logits/chosen": -0.6530407667160034, |
|
"logits/rejected": 0.17987249791622162, |
|
"logps/chosen": -569.39013671875, |
|
"logps/rejected": -819.4601440429688, |
|
"loss": 0.3991, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.138000726699829, |
|
"rewards/margins": 1.7371118068695068, |
|
"rewards/rejected": -3.8751120567321777, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.64778083782286e-06, |
|
"logits/chosen": -1.1196366548538208, |
|
"logits/rejected": 0.07997065782546997, |
|
"logps/chosen": -588.8242797851562, |
|
"logps/rejected": -888.84619140625, |
|
"loss": 0.3689, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -2.009155750274658, |
|
"rewards/margins": 2.2112624645233154, |
|
"rewards/rejected": -4.220418453216553, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.627438534392268e-06, |
|
"logits/chosen": -1.2351372241973877, |
|
"logits/rejected": -0.21363726258277893, |
|
"logps/chosen": -565.1137084960938, |
|
"logps/rejected": -799.2964477539062, |
|
"loss": 0.4302, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.9956506490707397, |
|
"rewards/margins": 1.7617241144180298, |
|
"rewards/rejected": -3.7573745250701904, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.607002090168506e-06, |
|
"logits/chosen": -1.1077851057052612, |
|
"logits/rejected": -0.3146376609802246, |
|
"logps/chosen": -519.68505859375, |
|
"logps/rejected": -786.7335815429688, |
|
"loss": 0.3644, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.6840426921844482, |
|
"rewards/margins": 1.7921777963638306, |
|
"rewards/rejected": -3.4762203693389893, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.586473211588787e-06, |
|
"logits/chosen": -1.0645841360092163, |
|
"logits/rejected": 0.0885920599102974, |
|
"logps/chosen": -530.0689697265625, |
|
"logps/rejected": -746.6349487304688, |
|
"loss": 0.4293, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.7966455221176147, |
|
"rewards/margins": 1.5383833646774292, |
|
"rewards/rejected": -3.335028886795044, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.5658536128085623e-06, |
|
"logits/chosen": -0.8512083292007446, |
|
"logits/rejected": -0.07845296710729599, |
|
"logps/chosen": -522.5213623046875, |
|
"logps/rejected": -783.8287353515625, |
|
"loss": 0.4465, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.6679248809814453, |
|
"rewards/margins": 1.7967729568481445, |
|
"rewards/rejected": -3.464698076248169, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.545145015558399e-06, |
|
"logits/chosen": -1.1375799179077148, |
|
"logits/rejected": -0.7033450603485107, |
|
"logps/chosen": -488.27825927734375, |
|
"logps/rejected": -743.138427734375, |
|
"loss": 0.4717, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.5062588453292847, |
|
"rewards/margins": 1.479614019393921, |
|
"rewards/rejected": -2.985872983932495, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.5243491490002056e-06, |
|
"logits/chosen": -1.122290849685669, |
|
"logits/rejected": -0.7316358685493469, |
|
"logps/chosen": -507.69036865234375, |
|
"logps/rejected": -690.9684448242188, |
|
"loss": 0.4651, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.5151287317276, |
|
"rewards/margins": 1.2003190517425537, |
|
"rewards/rejected": -2.7154476642608643, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.503467749582857e-06, |
|
"logits/chosen": -1.7979711294174194, |
|
"logits/rejected": -0.23376531898975372, |
|
"logps/chosen": -447.5152282714844, |
|
"logps/rejected": -644.4968872070312, |
|
"loss": 0.4207, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.3395702838897705, |
|
"rewards/margins": 1.2873914241790771, |
|
"rewards/rejected": -2.6269614696502686, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.4825025608971947e-06, |
|
"logits/chosen": -1.1949323415756226, |
|
"logits/rejected": -0.4591255784034729, |
|
"logps/chosen": -531.0167846679688, |
|
"logps/rejected": -708.4334716796875, |
|
"loss": 0.4157, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.6559035778045654, |
|
"rewards/margins": 1.3914124965667725, |
|
"rewards/rejected": -3.047316074371338, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.4614553335304407e-06, |
|
"logits/chosen": -0.9112270474433899, |
|
"logits/rejected": -0.7079882025718689, |
|
"logps/chosen": -477.55645751953125, |
|
"logps/rejected": -665.2545166015625, |
|
"loss": 0.4351, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.638381004333496, |
|
"rewards/margins": 0.9713879823684692, |
|
"rewards/rejected": -2.609769105911255, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.4403278249200222e-06, |
|
"logits/chosen": -1.1047728061676025, |
|
"logits/rejected": -0.209273099899292, |
|
"logps/chosen": -513.626708984375, |
|
"logps/rejected": -751.2394409179688, |
|
"loss": 0.3962, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -1.6597833633422852, |
|
"rewards/margins": 1.5487313270568848, |
|
"rewards/rejected": -3.20851469039917, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.4191217992068293e-06, |
|
"logits/chosen": -1.1829806566238403, |
|
"logits/rejected": 0.04266662523150444, |
|
"logps/chosen": -560.150634765625, |
|
"logps/rejected": -809.93896484375, |
|
"loss": 0.4279, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.8385143280029297, |
|
"rewards/margins": 1.672101378440857, |
|
"rewards/rejected": -3.510615825653076, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.3978390270879056e-06, |
|
"logits/chosen": -1.1551451683044434, |
|
"logits/rejected": -0.16915690898895264, |
|
"logps/chosen": -546.1964111328125, |
|
"logps/rejected": -733.3271484375, |
|
"loss": 0.4386, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.6938936710357666, |
|
"rewards/margins": 1.5033698081970215, |
|
"rewards/rejected": -3.197263240814209, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.3764812856685995e-06, |
|
"logits/chosen": -1.0993391275405884, |
|
"logits/rejected": -0.748005211353302, |
|
"logps/chosen": -503.6968688964844, |
|
"logps/rejected": -733.4053955078125, |
|
"loss": 0.4426, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -1.7742856740951538, |
|
"rewards/margins": 1.4196711778640747, |
|
"rewards/rejected": -3.1939570903778076, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.3550503583141726e-06, |
|
"logits/chosen": -1.1645628213882446, |
|
"logits/rejected": -0.29861804842948914, |
|
"logps/chosen": -516.9429931640625, |
|
"logps/rejected": -731.75634765625, |
|
"loss": 0.4486, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.7132374048233032, |
|
"rewards/margins": 1.4840338230133057, |
|
"rewards/rejected": -3.1972713470458984, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.3335480345008907e-06, |
|
"logits/chosen": -1.0770137310028076, |
|
"logits/rejected": -0.3164665400981903, |
|
"logps/chosen": -475.98345947265625, |
|
"logps/rejected": -717.9310302734375, |
|
"loss": 0.3996, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.5537554025650024, |
|
"rewards/margins": 1.6571037769317627, |
|
"rewards/rejected": -3.2108588218688965, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.3119761096666055e-06, |
|
"logits/chosen": -1.5323493480682373, |
|
"logits/rejected": -0.1781812459230423, |
|
"logps/chosen": -540.9231567382812, |
|
"logps/rejected": -732.0032958984375, |
|
"loss": 0.3972, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.7525060176849365, |
|
"rewards/margins": 1.5016968250274658, |
|
"rewards/rejected": -3.254202365875244, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.290336385060832e-06, |
|
"logits/chosen": -1.1251745223999023, |
|
"logits/rejected": -0.11602558940649033, |
|
"logps/chosen": -634.4666748046875, |
|
"logps/rejected": -833.8592529296875, |
|
"loss": 0.3915, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.158566951751709, |
|
"rewards/margins": 1.6248620748519897, |
|
"rewards/rejected": -3.783428907394409, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.268630667594348e-06, |
|
"logits/chosen": -0.6574426889419556, |
|
"logits/rejected": -0.43358176946640015, |
|
"logps/chosen": -525.9124755859375, |
|
"logps/rejected": -769.3137817382812, |
|
"loss": 0.4214, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.048187017440796, |
|
"rewards/margins": 1.6054658889770508, |
|
"rewards/rejected": -3.6536529064178467, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.2468607696883147e-06, |
|
"logits/chosen": -1.0191020965576172, |
|
"logits/rejected": -0.8884264230728149, |
|
"logps/chosen": -542.0821533203125, |
|
"logps/rejected": -717.0391845703125, |
|
"loss": 0.439, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.9124071598052979, |
|
"rewards/margins": 1.0934618711471558, |
|
"rewards/rejected": -3.0058693885803223, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.225028509122944e-06, |
|
"logits/chosen": -0.6006935834884644, |
|
"logits/rejected": -0.7735892534255981, |
|
"logps/chosen": -506.019287109375, |
|
"logps/rejected": -766.4857177734375, |
|
"loss": 0.4162, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.817831039428711, |
|
"rewards/margins": 1.61776602268219, |
|
"rewards/rejected": -3.4355969429016113, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.2031357088857083e-06, |
|
"logits/chosen": -1.139946699142456, |
|
"logits/rejected": -0.35632461309432983, |
|
"logps/chosen": -478.1253967285156, |
|
"logps/rejected": -795.1430053710938, |
|
"loss": 0.3478, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.6329262256622314, |
|
"rewards/margins": 2.2241363525390625, |
|
"rewards/rejected": -3.857062816619873, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.181184197019127e-06, |
|
"logits/chosen": -1.2290761470794678, |
|
"logits/rejected": -0.23889155685901642, |
|
"logps/chosen": -565.7088623046875, |
|
"logps/rejected": -842.8005981445312, |
|
"loss": 0.409, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -2.107991933822632, |
|
"rewards/margins": 2.049304485321045, |
|
"rewards/rejected": -4.157296180725098, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_logits/chosen": 0.5340744853019714, |
|
"eval_logits/rejected": 1.4875802993774414, |
|
"eval_logps/chosen": -561.5723266601562, |
|
"eval_logps/rejected": -791.2130126953125, |
|
"eval_loss": 0.43651697039604187, |
|
"eval_rewards/accuracies": 0.7919999957084656, |
|
"eval_rewards/chosen": -2.059115409851074, |
|
"eval_rewards/margins": 1.6629897356033325, |
|
"eval_rewards/rejected": -3.7221052646636963, |
|
"eval_runtime": 1381.695, |
|
"eval_samples_per_second": 1.447, |
|
"eval_steps_per_second": 0.362, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.159175806468126e-06, |
|
"logits/chosen": -1.0473666191101074, |
|
"logits/rejected": -0.06902176141738892, |
|
"logps/chosen": -595.341552734375, |
|
"logps/rejected": -831.9974365234375, |
|
"loss": 0.4152, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -1.9551061391830444, |
|
"rewards/margins": 1.7293351888656616, |
|
"rewards/rejected": -3.684441328048706, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.1371123749269804e-06, |
|
"logits/chosen": -1.026745319366455, |
|
"logits/rejected": -0.4049917757511139, |
|
"logps/chosen": -565.2854614257812, |
|
"logps/rejected": -764.7696533203125, |
|
"loss": 0.4013, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.9236133098602295, |
|
"rewards/margins": 1.5329084396362305, |
|
"rewards/rejected": -3.456521511077881, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.114995744685877e-06, |
|
"logits/chosen": -0.9977224469184875, |
|
"logits/rejected": -0.40141773223876953, |
|
"logps/chosen": -466.9925842285156, |
|
"logps/rejected": -759.6148071289062, |
|
"loss": 0.4686, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.7426468133926392, |
|
"rewards/margins": 1.7171356678009033, |
|
"rewards/rejected": -3.459782361984253, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.0928277624770743e-06, |
|
"logits/chosen": -1.512317180633545, |
|
"logits/rejected": -0.05047481134533882, |
|
"logps/chosen": -557.8958129882812, |
|
"logps/rejected": -753.0090942382812, |
|
"loss": 0.4698, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.98238205909729, |
|
"rewards/margins": 1.4256618022918701, |
|
"rewards/rejected": -3.408043622970581, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.070610279320708e-06, |
|
"logits/chosen": -1.1659696102142334, |
|
"logits/rejected": -0.22102081775665283, |
|
"logps/chosen": -540.2110595703125, |
|
"logps/rejected": -764.9635620117188, |
|
"loss": 0.3463, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -1.8047651052474976, |
|
"rewards/margins": 1.6794044971466064, |
|
"rewards/rejected": -3.4841697216033936, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3.0483451503702264e-06, |
|
"logits/chosen": -0.9429510831832886, |
|
"logits/rejected": -0.374999463558197, |
|
"logps/chosen": -633.7833862304688, |
|
"logps/rejected": -881.3572387695312, |
|
"loss": 0.4, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -2.2336530685424805, |
|
"rewards/margins": 1.6140598058700562, |
|
"rewards/rejected": -3.847712993621826, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3.0260342347574916e-06, |
|
"logits/chosen": -0.9639546275138855, |
|
"logits/rejected": -0.8156697154045105, |
|
"logps/chosen": -493.8570861816406, |
|
"logps/rejected": -728.9312744140625, |
|
"loss": 0.3857, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.7304465770721436, |
|
"rewards/margins": 1.2347511053085327, |
|
"rewards/rejected": -2.965198040008545, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3.0036793954375358e-06, |
|
"logits/chosen": -1.5436639785766602, |
|
"logits/rejected": 0.08726786822080612, |
|
"logps/chosen": -512.5186157226562, |
|
"logps/rejected": -664.79296875, |
|
"loss": 0.4216, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.7313350439071655, |
|
"rewards/margins": 1.4134327173233032, |
|
"rewards/rejected": -3.144767999649048, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.981282499033009e-06, |
|
"logits/chosen": -0.6750370264053345, |
|
"logits/rejected": -0.8198210597038269, |
|
"logps/chosen": -542.04052734375, |
|
"logps/rejected": -810.6235961914062, |
|
"loss": 0.4386, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -2.0354888439178467, |
|
"rewards/margins": 1.7842447757720947, |
|
"rewards/rejected": -3.8197338581085205, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.9588454156783163e-06, |
|
"logits/chosen": -1.0820724964141846, |
|
"logits/rejected": -0.7213941812515259, |
|
"logps/chosen": -541.3480224609375, |
|
"logps/rejected": -766.2906494140625, |
|
"loss": 0.4257, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.084608554840088, |
|
"rewards/margins": 1.3346532583236694, |
|
"rewards/rejected": -3.4192614555358887, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.9363700188634597e-06, |
|
"logits/chosen": -1.2453614473342896, |
|
"logits/rejected": -0.5085484385490417, |
|
"logps/chosen": -540.5175170898438, |
|
"logps/rejected": -765.7025756835938, |
|
"loss": 0.4321, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.895167589187622, |
|
"rewards/margins": 1.5056030750274658, |
|
"rewards/rejected": -3.4007697105407715, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.9138581852776053e-06, |
|
"logits/chosen": -1.1196304559707642, |
|
"logits/rejected": 0.026494156569242477, |
|
"logps/chosen": -559.1360473632812, |
|
"logps/rejected": -778.654296875, |
|
"loss": 0.4461, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.9397118091583252, |
|
"rewards/margins": 1.5184619426727295, |
|
"rewards/rejected": -3.4581737518310547, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.8913117946523805e-06, |
|
"logits/chosen": -1.2101647853851318, |
|
"logits/rejected": 0.05119786784052849, |
|
"logps/chosen": -497.8490295410156, |
|
"logps/rejected": -711.6702270507812, |
|
"loss": 0.4391, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.8607673645019531, |
|
"rewards/margins": 1.6270701885223389, |
|
"rewards/rejected": -3.487837314605713, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.8687327296049126e-06, |
|
"logits/chosen": -0.9110754132270813, |
|
"logits/rejected": -0.3987257182598114, |
|
"logps/chosen": -488.52056884765625, |
|
"logps/rejected": -814.98388671875, |
|
"loss": 0.385, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.5353851318359375, |
|
"rewards/margins": 2.019871711730957, |
|
"rewards/rejected": -3.5552570819854736, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.8461228754806376e-06, |
|
"logits/chosen": -1.2645906209945679, |
|
"logits/rejected": 0.05890879034996033, |
|
"logps/chosen": -571.50146484375, |
|
"logps/rejected": -764.4017944335938, |
|
"loss": 0.3804, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.847496747970581, |
|
"rewards/margins": 1.488777995109558, |
|
"rewards/rejected": -3.3362746238708496, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.823484120195865e-06, |
|
"logits/chosen": -1.035412311553955, |
|
"logits/rejected": 0.14751215279102325, |
|
"logps/chosen": -568.7293090820312, |
|
"logps/rejected": -764.1280517578125, |
|
"loss": 0.3716, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.9270780086517334, |
|
"rewards/margins": 1.58747398853302, |
|
"rewards/rejected": -3.514551877975464, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.8008183540801486e-06, |
|
"logits/chosen": -1.152499794960022, |
|
"logits/rejected": -0.26493799686431885, |
|
"logps/chosen": -610.4190063476562, |
|
"logps/rejected": -777.2071533203125, |
|
"loss": 0.3893, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.9002234935760498, |
|
"rewards/margins": 1.6286661624908447, |
|
"rewards/rejected": -3.5288894176483154, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.7781274697184353e-06, |
|
"logits/chosen": -1.2689648866653442, |
|
"logits/rejected": 0.2965567111968994, |
|
"logps/chosen": -519.8406982421875, |
|
"logps/rejected": -776.2745971679688, |
|
"loss": 0.451, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.7539297342300415, |
|
"rewards/margins": 1.7792565822601318, |
|
"rewards/rejected": -3.533186435699463, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.7554133617930397e-06, |
|
"logits/chosen": -1.1502325534820557, |
|
"logits/rejected": -0.16516944766044617, |
|
"logps/chosen": -586.9942626953125, |
|
"logps/rejected": -799.5856323242188, |
|
"loss": 0.3986, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.9755744934082031, |
|
"rewards/margins": 1.5421812534332275, |
|
"rewards/rejected": -3.5177555084228516, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.7326779269254363e-06, |
|
"logits/chosen": -1.2239593267440796, |
|
"logits/rejected": -0.29785847663879395, |
|
"logps/chosen": -577.9757080078125, |
|
"logps/rejected": -780.0157470703125, |
|
"loss": 0.4163, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.8510382175445557, |
|
"rewards/margins": 1.496047019958496, |
|
"rewards/rejected": -3.3470852375030518, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.7099230635178954e-06, |
|
"logits/chosen": -0.8359963297843933, |
|
"logits/rejected": -0.609653115272522, |
|
"logps/chosen": -522.3953857421875, |
|
"logps/rejected": -836.3997192382812, |
|
"loss": 0.3754, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.8302185535430908, |
|
"rewards/margins": 1.9239788055419922, |
|
"rewards/rejected": -3.754197359085083, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.6871506715949608e-06, |
|
"logits/chosen": -1.2713494300842285, |
|
"logits/rejected": 0.4311772286891937, |
|
"logps/chosen": -510.71942138671875, |
|
"logps/rejected": -779.8484497070312, |
|
"loss": 0.2961, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.6959383487701416, |
|
"rewards/margins": 1.9984363317489624, |
|
"rewards/rejected": -3.6943740844726562, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.6643626526448063e-06, |
|
"logits/chosen": -0.745600700378418, |
|
"logits/rejected": -0.04406242445111275, |
|
"logps/chosen": -545.650390625, |
|
"logps/rejected": -726.6231689453125, |
|
"loss": 0.4021, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.117701768875122, |
|
"rewards/margins": 1.459877848625183, |
|
"rewards/rejected": -3.5775794982910156, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.6415609094604562e-06, |
|
"logits/chosen": -0.6423822641372681, |
|
"logits/rejected": -0.5382918119430542, |
|
"logps/chosen": -555.1259765625, |
|
"logps/rejected": -854.6931762695312, |
|
"loss": 0.4133, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -2.1485462188720703, |
|
"rewards/margins": 1.8231878280639648, |
|
"rewards/rejected": -3.9717342853546143, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.618747345980904e-06, |
|
"logits/chosen": -0.942598819732666, |
|
"logits/rejected": -0.03942962735891342, |
|
"logps/chosen": -508.3501892089844, |
|
"logps/rejected": -809.349365234375, |
|
"loss": 0.3591, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.922590970993042, |
|
"rewards/margins": 2.0935001373291016, |
|
"rewards/rejected": -4.016091346740723, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.595923867132136e-06, |
|
"logits/chosen": -0.9166833162307739, |
|
"logits/rejected": 0.08796543627977371, |
|
"logps/chosen": -561.3500366210938, |
|
"logps/rejected": -813.5736083984375, |
|
"loss": 0.3879, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -2.1458635330200195, |
|
"rewards/margins": 1.7413724660873413, |
|
"rewards/rejected": -3.887235641479492, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.5730923786680672e-06, |
|
"logits/chosen": -0.8908794522285461, |
|
"logits/rejected": 0.45012766122817993, |
|
"logps/chosen": -528.106689453125, |
|
"logps/rejected": -768.5990600585938, |
|
"loss": 0.3902, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -2.0598998069763184, |
|
"rewards/margins": 1.840790033340454, |
|
"rewards/rejected": -3.9006900787353516, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.5502547870114137e-06, |
|
"logits/chosen": -0.44030576944351196, |
|
"logits/rejected": 0.27115920186042786, |
|
"logps/chosen": -541.02392578125, |
|
"logps/rejected": -866.5523681640625, |
|
"loss": 0.4173, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -2.1856839656829834, |
|
"rewards/margins": 2.092510223388672, |
|
"rewards/rejected": -4.278193473815918, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.527412999094507e-06, |
|
"logits/chosen": -0.960748553276062, |
|
"logits/rejected": 0.4776690602302551, |
|
"logps/chosen": -641.7643432617188, |
|
"logps/rejected": -842.5614013671875, |
|
"loss": 0.4317, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.455843448638916, |
|
"rewards/margins": 1.5613322257995605, |
|
"rewards/rejected": -4.017176151275635, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.504568922200064e-06, |
|
"logits/chosen": -1.032212495803833, |
|
"logits/rejected": 0.38921135663986206, |
|
"logps/chosen": -526.1879272460938, |
|
"logps/rejected": -780.3800659179688, |
|
"loss": 0.4037, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.8752429485321045, |
|
"rewards/margins": 2.1657333374023438, |
|
"rewards/rejected": -4.040976524353027, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_logits/chosen": 0.9488687515258789, |
|
"eval_logits/rejected": 1.9484919309616089, |
|
"eval_logps/chosen": -568.4110107421875, |
|
"eval_logps/rejected": -807.3529052734375, |
|
"eval_loss": 0.43342798948287964, |
|
"eval_rewards/accuracies": 0.796999990940094, |
|
"eval_rewards/chosen": -2.127501964569092, |
|
"eval_rewards/margins": 1.7560021877288818, |
|
"eval_rewards/rejected": -3.8835039138793945, |
|
"eval_runtime": 1369.4586, |
|
"eval_samples_per_second": 1.46, |
|
"eval_steps_per_second": 0.365, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.4817244638019333e-06, |
|
"logits/chosen": -0.7853974103927612, |
|
"logits/rejected": 0.26276087760925293, |
|
"logps/chosen": -576.4677734375, |
|
"logps/rejected": -882.1696166992188, |
|
"loss": 0.4205, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.413973331451416, |
|
"rewards/margins": 1.9889461994171143, |
|
"rewards/rejected": -4.402919292449951, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.4588815314058155e-06, |
|
"logits/chosen": -0.7402883768081665, |
|
"logits/rejected": 0.13213138282299042, |
|
"logps/chosen": -515.807373046875, |
|
"logps/rejected": -840.2550048828125, |
|
"loss": 0.4364, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.9831836223602295, |
|
"rewards/margins": 2.3909099102020264, |
|
"rewards/rejected": -4.374093532562256, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.4360420323899922e-06, |
|
"logits/chosen": -0.9780701398849487, |
|
"logits/rejected": -0.47581759095191956, |
|
"logps/chosen": -536.0255126953125, |
|
"logps/rejected": -821.7635498046875, |
|
"loss": 0.4016, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.8594684600830078, |
|
"rewards/margins": 1.7201998233795166, |
|
"rewards/rejected": -3.5796680450439453, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.4132078738460585e-06, |
|
"logits/chosen": -0.7851123809814453, |
|
"logits/rejected": -0.3619709610939026, |
|
"logps/chosen": -536.1060791015625, |
|
"logps/rejected": -812.8576049804688, |
|
"loss": 0.3879, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.774129867553711, |
|
"rewards/margins": 1.8565963506698608, |
|
"rewards/rejected": -3.6307265758514404, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.3903809624196826e-06, |
|
"logits/chosen": -1.0001862049102783, |
|
"logits/rejected": -0.3588159680366516, |
|
"logps/chosen": -524.4048461914062, |
|
"logps/rejected": -757.30224609375, |
|
"loss": 0.4045, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.6777470111846924, |
|
"rewards/margins": 1.4243619441986084, |
|
"rewards/rejected": -3.10210919380188, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.3675632041513978e-06, |
|
"logits/chosen": -0.8742032051086426, |
|
"logits/rejected": 0.1250528246164322, |
|
"logps/chosen": -591.0662841796875, |
|
"logps/rejected": -873.0941162109375, |
|
"loss": 0.3503, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -2.1168835163116455, |
|
"rewards/margins": 1.8603336811065674, |
|
"rewards/rejected": -3.9772167205810547, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.3447565043174533e-06, |
|
"logits/chosen": -0.9236310124397278, |
|
"logits/rejected": -0.3211767077445984, |
|
"logps/chosen": -604.6006469726562, |
|
"logps/rejected": -848.4435424804688, |
|
"loss": 0.3555, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -2.398642063140869, |
|
"rewards/margins": 1.6593701839447021, |
|
"rewards/rejected": -4.058012962341309, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.321962767270724e-06, |
|
"logits/chosen": -0.4545938968658447, |
|
"logits/rejected": -0.04614262655377388, |
|
"logps/chosen": -522.8587646484375, |
|
"logps/rejected": -840.7001953125, |
|
"loss": 0.424, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -2.117671012878418, |
|
"rewards/margins": 2.160691499710083, |
|
"rewards/rejected": -4.278363227844238, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.299183896281692e-06, |
|
"logits/chosen": -0.8900741338729858, |
|
"logits/rejected": 0.11245179176330566, |
|
"logps/chosen": -621.9942626953125, |
|
"logps/rejected": -912.0670776367188, |
|
"loss": 0.3948, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.5180599689483643, |
|
"rewards/margins": 2.2509348392486572, |
|
"rewards/rejected": -4.768994331359863, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.2764217933795297e-06, |
|
"logits/chosen": -1.2973425388336182, |
|
"logits/rejected": 0.3502582907676697, |
|
"logps/chosen": -648.1968994140625, |
|
"logps/rejected": -849.2420043945312, |
|
"loss": 0.4569, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.5464279651641846, |
|
"rewards/margins": 1.8135731220245361, |
|
"rewards/rejected": -4.3600006103515625, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.2536783591932786e-06, |
|
"logits/chosen": -1.0717593431472778, |
|
"logits/rejected": -0.04763598367571831, |
|
"logps/chosen": -564.8670654296875, |
|
"logps/rejected": -862.4423828125, |
|
"loss": 0.383, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -2.1421093940734863, |
|
"rewards/margins": 2.115607976913452, |
|
"rewards/rejected": -4.257718086242676, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.230955492793149e-06, |
|
"logits/chosen": -0.6327053308486938, |
|
"logits/rejected": -0.22368088364601135, |
|
"logps/chosen": -552.7775268554688, |
|
"logps/rejected": -753.1697998046875, |
|
"loss": 0.4299, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.939135193824768, |
|
"rewards/margins": 1.3090981245040894, |
|
"rewards/rejected": -3.2482333183288574, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.208255091531947e-06, |
|
"logits/chosen": -0.9401483535766602, |
|
"logits/rejected": 0.13758346438407898, |
|
"logps/chosen": -531.6614990234375, |
|
"logps/rejected": -746.9278564453125, |
|
"loss": 0.4358, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.8913850784301758, |
|
"rewards/margins": 1.5273140668869019, |
|
"rewards/rejected": -3.418699264526367, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.1855790508866435e-06, |
|
"logits/chosen": -1.4784080982208252, |
|
"logits/rejected": 0.4357272982597351, |
|
"logps/chosen": -597.9056396484375, |
|
"logps/rejected": -766.9682006835938, |
|
"loss": 0.4395, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -2.0671706199645996, |
|
"rewards/margins": 1.5233346223831177, |
|
"rewards/rejected": -3.5905051231384277, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.162929264300107e-06, |
|
"logits/chosen": -1.0196201801300049, |
|
"logits/rejected": 0.6838423013687134, |
|
"logps/chosen": -581.2928466796875, |
|
"logps/rejected": -801.315185546875, |
|
"loss": 0.4091, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.0828678607940674, |
|
"rewards/margins": 1.6539685726165771, |
|
"rewards/rejected": -3.7368361949920654, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.1403076230230006e-06, |
|
"logits/chosen": -0.6144061088562012, |
|
"logits/rejected": -0.3462333083152771, |
|
"logps/chosen": -555.2684326171875, |
|
"logps/rejected": -745.2354736328125, |
|
"loss": 0.4423, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.8994195461273193, |
|
"rewards/margins": 1.2702502012252808, |
|
"rewards/rejected": -3.1696696281433105, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.11771601595586e-06, |
|
"logits/chosen": -1.3248759508132935, |
|
"logits/rejected": -0.06512956321239471, |
|
"logps/chosen": -526.8836669921875, |
|
"logps/rejected": -727.4069213867188, |
|
"loss": 0.4316, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -1.7024120092391968, |
|
"rewards/margins": 1.4804304838180542, |
|
"rewards/rejected": -3.182842493057251, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.0951563294913737e-06, |
|
"logits/chosen": -0.8413470387458801, |
|
"logits/rejected": -0.06420852988958359, |
|
"logps/chosen": -514.109375, |
|
"logps/rejected": -770.6260986328125, |
|
"loss": 0.3656, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.8018481731414795, |
|
"rewards/margins": 1.71941339969635, |
|
"rewards/rejected": -3.5212619304656982, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.0726304473568693e-06, |
|
"logits/chosen": -1.5680863857269287, |
|
"logits/rejected": 0.14398300647735596, |
|
"logps/chosen": -554.5525512695312, |
|
"logps/rejected": -771.2307739257812, |
|
"loss": 0.4331, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.7925901412963867, |
|
"rewards/margins": 1.641396164894104, |
|
"rewards/rejected": -3.433986186981201, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.050140250457023e-06, |
|
"logits/chosen": -1.147033452987671, |
|
"logits/rejected": -0.169643372297287, |
|
"logps/chosen": -544.5403442382812, |
|
"logps/rejected": -771.8599243164062, |
|
"loss": 0.411, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.702807068824768, |
|
"rewards/margins": 1.5390836000442505, |
|
"rewards/rejected": -3.2418906688690186, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.0276876167168042e-06, |
|
"logits/chosen": -1.0450040102005005, |
|
"logits/rejected": -0.5411997437477112, |
|
"logps/chosen": -464.33636474609375, |
|
"logps/rejected": -644.3059692382812, |
|
"loss": 0.4458, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.661592721939087, |
|
"rewards/margins": 1.0601894855499268, |
|
"rewards/rejected": -2.7217824459075928, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 2.0052744209246682e-06, |
|
"logits/chosen": -0.9377092123031616, |
|
"logits/rejected": -0.40263956785202026, |
|
"logps/chosen": -498.0686950683594, |
|
"logps/rejected": -739.6976318359375, |
|
"loss": 0.4114, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.7086822986602783, |
|
"rewards/margins": 1.4424405097961426, |
|
"rewards/rejected": -3.151122570037842, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.9829025345760127e-06, |
|
"logits/chosen": -0.7755551338195801, |
|
"logits/rejected": -0.34596508741378784, |
|
"logps/chosen": -440.62872314453125, |
|
"logps/rejected": -650.15478515625, |
|
"loss": 0.3539, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -1.3974610567092896, |
|
"rewards/margins": 1.319804310798645, |
|
"rewards/rejected": -2.7172653675079346, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.9605738257169115e-06, |
|
"logits/chosen": -1.3644423484802246, |
|
"logits/rejected": -0.3873172700405121, |
|
"logps/chosen": -490.96466064453125, |
|
"logps/rejected": -707.909423828125, |
|
"loss": 0.3942, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.6896295547485352, |
|
"rewards/margins": 1.3346831798553467, |
|
"rewards/rejected": -3.0243124961853027, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.9382901587881275e-06, |
|
"logits/chosen": -1.3390328884124756, |
|
"logits/rejected": -0.3258362114429474, |
|
"logps/chosen": -560.3353271484375, |
|
"logps/rejected": -710.94091796875, |
|
"loss": 0.4555, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.788905143737793, |
|
"rewards/margins": 1.1810920238494873, |
|
"rewards/rejected": -2.9699971675872803, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.916053394469437e-06, |
|
"logits/chosen": -1.2866407632827759, |
|
"logits/rejected": -0.09522955119609833, |
|
"logps/chosen": -534.5072021484375, |
|
"logps/rejected": -711.5755004882812, |
|
"loss": 0.3582, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -1.719448447227478, |
|
"rewards/margins": 1.4744646549224854, |
|
"rewards/rejected": -3.193912982940674, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.8938653895242604e-06, |
|
"logits/chosen": -1.3210828304290771, |
|
"logits/rejected": 0.18320707976818085, |
|
"logps/chosen": -570.3659057617188, |
|
"logps/rejected": -748.9262084960938, |
|
"loss": 0.38, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.9527915716171265, |
|
"rewards/margins": 1.4824120998382568, |
|
"rewards/rejected": -3.4352035522460938, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.8717279966446267e-06, |
|
"logits/chosen": -0.6077791452407837, |
|
"logits/rejected": -0.07136712223291397, |
|
"logps/chosen": -561.3985595703125, |
|
"logps/rejected": -833.6058349609375, |
|
"loss": 0.4602, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.8687776327133179, |
|
"rewards/margins": 2.1088478565216064, |
|
"rewards/rejected": -3.9776253700256348, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.8496430642964698e-06, |
|
"logits/chosen": -0.6436141133308411, |
|
"logits/rejected": -0.7390815615653992, |
|
"logps/chosen": -535.98291015625, |
|
"logps/rejected": -785.1193237304688, |
|
"loss": 0.4384, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.8680446147918701, |
|
"rewards/margins": 1.4708973169326782, |
|
"rewards/rejected": -3.338942050933838, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.827612436565286e-06, |
|
"logits/chosen": -0.7472543716430664, |
|
"logits/rejected": -0.552331805229187, |
|
"logps/chosen": -512.9714965820312, |
|
"logps/rejected": -776.1787109375, |
|
"loss": 0.3829, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.701409101486206, |
|
"rewards/margins": 1.6592018604278564, |
|
"rewards/rejected": -3.3606104850769043, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_logits/chosen": 0.5047381520271301, |
|
"eval_logits/rejected": 1.5421233177185059, |
|
"eval_logps/chosen": -543.5669555664062, |
|
"eval_logps/rejected": -768.0193481445312, |
|
"eval_loss": 0.4248420000076294, |
|
"eval_rewards/accuracies": 0.8009999990463257, |
|
"eval_rewards/chosen": -1.879061222076416, |
|
"eval_rewards/margins": 1.611107587814331, |
|
"eval_rewards/rejected": -3.490169048309326, |
|
"eval_runtime": 1378.4799, |
|
"eval_samples_per_second": 1.451, |
|
"eval_steps_per_second": 0.363, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.8056379530021492e-06, |
|
"logits/chosen": -1.644568681716919, |
|
"logits/rejected": 0.2553193271160126, |
|
"logps/chosen": -535.88134765625, |
|
"logps/rejected": -778.2227783203125, |
|
"loss": 0.3769, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.729379415512085, |
|
"rewards/margins": 1.8933357000350952, |
|
"rewards/rejected": -3.6227145195007324, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.7837214484701154e-06, |
|
"logits/chosen": -1.0312998294830322, |
|
"logits/rejected": -0.9269927144050598, |
|
"logps/chosen": -492.1142578125, |
|
"logps/rejected": -759.744140625, |
|
"loss": 0.3937, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -1.6029274463653564, |
|
"rewards/margins": 1.807284951210022, |
|
"rewards/rejected": -3.410212278366089, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.7618647529910043e-06, |
|
"logits/chosen": -1.2176530361175537, |
|
"logits/rejected": 0.223758265376091, |
|
"logps/chosen": -529.0576171875, |
|
"logps/rejected": -756.2348022460938, |
|
"loss": 0.4664, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.851143479347229, |
|
"rewards/margins": 1.5376180410385132, |
|
"rewards/rejected": -3.388761520385742, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.7400696915925996e-06, |
|
"logits/chosen": -1.2333937883377075, |
|
"logits/rejected": -0.22815477848052979, |
|
"logps/chosen": -530.9912719726562, |
|
"logps/rejected": -742.2661743164062, |
|
"loss": 0.396, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.743680715560913, |
|
"rewards/margins": 1.500349521636963, |
|
"rewards/rejected": -3.244030475616455, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.718338084156254e-06, |
|
"logits/chosen": -1.519798994064331, |
|
"logits/rejected": -0.11792447417974472, |
|
"logps/chosen": -558.024658203125, |
|
"logps/rejected": -786.2930908203125, |
|
"loss": 0.4233, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.6490389108657837, |
|
"rewards/margins": 1.6921495199203491, |
|
"rewards/rejected": -3.341188430786133, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.6966717452649372e-06, |
|
"logits/chosen": -1.2660577297210693, |
|
"logits/rejected": -0.0037010847590863705, |
|
"logps/chosen": -472.52471923828125, |
|
"logps/rejected": -651.4722900390625, |
|
"loss": 0.4358, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.6838678121566772, |
|
"rewards/margins": 1.2879998683929443, |
|
"rewards/rejected": -2.971867561340332, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.6750724840517103e-06, |
|
"logits/chosen": -1.2322640419006348, |
|
"logits/rejected": -0.4316504895687103, |
|
"logps/chosen": -495.26544189453125, |
|
"logps/rejected": -720.3032836914062, |
|
"loss": 0.4283, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.5014667510986328, |
|
"rewards/margins": 1.5642062425613403, |
|
"rewards/rejected": -3.0656726360321045, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.6535421040486686e-06, |
|
"logits/chosen": -0.9225546717643738, |
|
"logits/rejected": 0.307370662689209, |
|
"logps/chosen": -537.8445434570312, |
|
"logps/rejected": -764.1954345703125, |
|
"loss": 0.4295, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.8402236700057983, |
|
"rewards/margins": 1.5403947830200195, |
|
"rewards/rejected": -3.3806185722351074, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.6320824030363458e-06, |
|
"logits/chosen": -0.8234320878982544, |
|
"logits/rejected": -0.4902985095977783, |
|
"logps/chosen": -521.9378662109375, |
|
"logps/rejected": -800.2440185546875, |
|
"loss": 0.3775, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.6927446126937866, |
|
"rewards/margins": 1.8545596599578857, |
|
"rewards/rejected": -3.547304630279541, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.6106951728936028e-06, |
|
"logits/chosen": -0.9694339036941528, |
|
"logits/rejected": -0.04356659576296806, |
|
"logps/chosen": -550.1474609375, |
|
"logps/rejected": -714.2723999023438, |
|
"loss": 0.3934, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.6208375692367554, |
|
"rewards/margins": 1.5635387897491455, |
|
"rewards/rejected": -3.1843764781951904, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.5893821994479996e-06, |
|
"logits/chosen": -1.3845258951187134, |
|
"logits/rejected": 0.13586857914924622, |
|
"logps/chosen": -514.47509765625, |
|
"logps/rejected": -733.2557373046875, |
|
"loss": 0.4431, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.7695598602294922, |
|
"rewards/margins": 1.6184390783309937, |
|
"rewards/rejected": -3.3879990577697754, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.5681452623266868e-06, |
|
"logits/chosen": -0.6911166310310364, |
|
"logits/rejected": -0.19654271006584167, |
|
"logps/chosen": -473.919189453125, |
|
"logps/rejected": -720.5162963867188, |
|
"loss": 0.4099, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.6180083751678467, |
|
"rewards/margins": 1.4277281761169434, |
|
"rewards/rejected": -3.04573655128479, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.5469861348078014e-06, |
|
"logits/chosen": -1.3716986179351807, |
|
"logits/rejected": -0.06388586759567261, |
|
"logps/chosen": -564.1226196289062, |
|
"logps/rejected": -798.0968017578125, |
|
"loss": 0.4469, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.9020678997039795, |
|
"rewards/margins": 1.5944623947143555, |
|
"rewards/rejected": -3.496530532836914, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.5259065836724035e-06, |
|
"logits/chosen": -0.8546286821365356, |
|
"logits/rejected": -0.3107864260673523, |
|
"logps/chosen": -550.43017578125, |
|
"logps/rejected": -754.4808349609375, |
|
"loss": 0.3968, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -2.006619453430176, |
|
"rewards/margins": 1.325626015663147, |
|
"rewards/rejected": -3.3322455883026123, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.5049083690569456e-06, |
|
"logits/chosen": -1.1054461002349854, |
|
"logits/rejected": -0.2836257517337799, |
|
"logps/chosen": -498.7848205566406, |
|
"logps/rejected": -744.7251586914062, |
|
"loss": 0.4576, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.7602741718292236, |
|
"rewards/margins": 1.6243937015533447, |
|
"rewards/rejected": -3.3846676349639893, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.4839932443063057e-06, |
|
"logits/chosen": -1.052145004272461, |
|
"logits/rejected": -0.4817884564399719, |
|
"logps/chosen": -500.67022705078125, |
|
"logps/rejected": -772.7791748046875, |
|
"loss": 0.4111, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.7564786672592163, |
|
"rewards/margins": 1.9239161014556885, |
|
"rewards/rejected": -3.6803946495056152, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.4631629558273803e-06, |
|
"logits/chosen": -1.1972812414169312, |
|
"logits/rejected": -0.44412803649902344, |
|
"logps/chosen": -612.2957153320312, |
|
"logps/rejected": -818.8905029296875, |
|
"loss": 0.4121, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.9536006450653076, |
|
"rewards/margins": 1.6227566003799438, |
|
"rewards/rejected": -3.576357364654541, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.4424192429432657e-06, |
|
"logits/chosen": -1.1940516233444214, |
|
"logits/rejected": -0.6190989017486572, |
|
"logps/chosen": -545.1439208984375, |
|
"logps/rejected": -734.8392333984375, |
|
"loss": 0.5085, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.8257439136505127, |
|
"rewards/margins": 1.1313966512680054, |
|
"rewards/rejected": -2.9571404457092285, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.421763837748016e-06, |
|
"logits/chosen": -0.8670721054077148, |
|
"logits/rejected": 0.19919352233409882, |
|
"logps/chosen": -536.2199096679688, |
|
"logps/rejected": -726.8831787109375, |
|
"loss": 0.3427, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -1.5812265872955322, |
|
"rewards/margins": 1.5793380737304688, |
|
"rewards/rejected": -3.160564422607422, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.401198464962021e-06, |
|
"logits/chosen": -0.7592190504074097, |
|
"logits/rejected": -0.6002682447433472, |
|
"logps/chosen": -536.698974609375, |
|
"logps/rejected": -750.5850219726562, |
|
"loss": 0.4207, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.7439777851104736, |
|
"rewards/margins": 1.444570779800415, |
|
"rewards/rejected": -3.1885488033294678, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.3807248417879896e-06, |
|
"logits/chosen": -0.9805696606636047, |
|
"logits/rejected": -0.1842125803232193, |
|
"logps/chosen": -496.516845703125, |
|
"logps/rejected": -714.7666015625, |
|
"loss": 0.4135, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.775261640548706, |
|
"rewards/margins": 1.5725538730621338, |
|
"rewards/rejected": -3.347815752029419, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.3603446777675665e-06, |
|
"logits/chosen": -1.047123670578003, |
|
"logits/rejected": -0.30356377363204956, |
|
"logps/chosen": -510.2223205566406, |
|
"logps/rejected": -704.94091796875, |
|
"loss": 0.4062, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -1.7939850091934204, |
|
"rewards/margins": 1.3114802837371826, |
|
"rewards/rejected": -3.1054649353027344, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.3400596746385817e-06, |
|
"logits/chosen": -1.089388370513916, |
|
"logits/rejected": -0.30947428941726685, |
|
"logps/chosen": -532.441650390625, |
|
"logps/rejected": -787.63720703125, |
|
"loss": 0.3845, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -1.7372934818267822, |
|
"rewards/margins": 1.8088710308074951, |
|
"rewards/rejected": -3.5461642742156982, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.3198715261929587e-06, |
|
"logits/chosen": -0.878930389881134, |
|
"logits/rejected": -0.4860079884529114, |
|
"logps/chosen": -504.37701416015625, |
|
"logps/rejected": -719.3287963867188, |
|
"loss": 0.4429, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.7471263408660889, |
|
"rewards/margins": 1.4954102039337158, |
|
"rewards/rejected": -3.2425365447998047, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.2997819181352823e-06, |
|
"logits/chosen": -1.1908671855926514, |
|
"logits/rejected": 0.06708762049674988, |
|
"logps/chosen": -486.3038024902344, |
|
"logps/rejected": -774.8641357421875, |
|
"loss": 0.3094, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -1.6818081140518188, |
|
"rewards/margins": 2.000152587890625, |
|
"rewards/rejected": -3.6819605827331543, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.2797925279420454e-06, |
|
"logits/chosen": -0.9907468557357788, |
|
"logits/rejected": -0.3420366942882538, |
|
"logps/chosen": -546.3676147460938, |
|
"logps/rejected": -735.4510498046875, |
|
"loss": 0.3966, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.9783893823623657, |
|
"rewards/margins": 1.4355647563934326, |
|
"rewards/rejected": -3.413954257965088, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.2599050247215764e-06, |
|
"logits/chosen": -1.4948866367340088, |
|
"logits/rejected": 0.49219974875450134, |
|
"logps/chosen": -573.19140625, |
|
"logps/rejected": -743.2158203125, |
|
"loss": 0.4396, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.086617946624756, |
|
"rewards/margins": 1.3520699739456177, |
|
"rewards/rejected": -3.438687801361084, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.2401210690746705e-06, |
|
"logits/chosen": -0.823261559009552, |
|
"logits/rejected": -0.15638458728790283, |
|
"logps/chosen": -535.7666625976562, |
|
"logps/rejected": -808.8283081054688, |
|
"loss": 0.4014, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.9818382263183594, |
|
"rewards/margins": 1.6475715637207031, |
|
"rewards/rejected": -3.629409074783325, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.2204423129559306e-06, |
|
"logits/chosen": -0.9111806154251099, |
|
"logits/rejected": -0.8399251699447632, |
|
"logps/chosen": -543.661865234375, |
|
"logps/rejected": -744.8538818359375, |
|
"loss": 0.4439, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.9010549783706665, |
|
"rewards/margins": 1.2256118059158325, |
|
"rewards/rejected": -3.126666784286499, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.20087039953583e-06, |
|
"logits/chosen": -1.051735281944275, |
|
"logits/rejected": -0.22251495718955994, |
|
"logps/chosen": -584.2539672851562, |
|
"logps/rejected": -825.45703125, |
|
"loss": 0.47, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.9151395559310913, |
|
"rewards/margins": 1.5970687866210938, |
|
"rewards/rejected": -3.5122084617614746, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_logits/chosen": 0.5343354940414429, |
|
"eval_logits/rejected": 1.5151687860488892, |
|
"eval_logps/chosen": -541.308837890625, |
|
"eval_logps/rejected": -759.2698974609375, |
|
"eval_loss": 0.4210600256919861, |
|
"eval_rewards/accuracies": 0.8029999732971191, |
|
"eval_rewards/chosen": -1.8564802408218384, |
|
"eval_rewards/margins": 1.5461931228637695, |
|
"eval_rewards/rejected": -3.4026734828948975, |
|
"eval_runtime": 1376.5386, |
|
"eval_samples_per_second": 1.453, |
|
"eval_steps_per_second": 0.363, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.181406963063507e-06, |
|
"logits/chosen": -1.0936121940612793, |
|
"logits/rejected": -0.3421854078769684, |
|
"logps/chosen": -551.6010131835938, |
|
"logps/rejected": -727.1361083984375, |
|
"loss": 0.523, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.9586423635482788, |
|
"rewards/margins": 1.3370798826217651, |
|
"rewards/rejected": -3.295722484588623, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.1620536287303052e-06, |
|
"logits/chosen": -1.2988653182983398, |
|
"logits/rejected": -0.4047287404537201, |
|
"logps/chosen": -515.3692626953125, |
|
"logps/rejected": -714.9964599609375, |
|
"loss": 0.4243, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -1.6312452554702759, |
|
"rewards/margins": 1.321842074394226, |
|
"rewards/rejected": -2.953087091445923, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.1428120125340717e-06, |
|
"logits/chosen": -1.1262309551239014, |
|
"logits/rejected": 0.1220148354768753, |
|
"logps/chosen": -560.1183471679688, |
|
"logps/rejected": -755.31103515625, |
|
"loss": 0.4397, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.7473366260528564, |
|
"rewards/margins": 1.4563024044036865, |
|
"rewards/rejected": -3.203639268875122, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.123683721144223e-06, |
|
"logits/chosen": -1.1425328254699707, |
|
"logits/rejected": -0.3113950788974762, |
|
"logps/chosen": -507.19598388671875, |
|
"logps/rejected": -742.7738037109375, |
|
"loss": 0.3137, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -1.6102272272109985, |
|
"rewards/margins": 1.5865800380706787, |
|
"rewards/rejected": -3.196807384490967, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.1046703517675848e-06, |
|
"logits/chosen": -1.2373132705688477, |
|
"logits/rejected": -0.09322497248649597, |
|
"logps/chosen": -504.95501708984375, |
|
"logps/rejected": -680.431396484375, |
|
"loss": 0.4333, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.792452096939087, |
|
"rewards/margins": 1.1824333667755127, |
|
"rewards/rejected": -2.9748852252960205, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.085773492015028e-06, |
|
"logits/chosen": -0.8464914560317993, |
|
"logits/rejected": -0.11784086376428604, |
|
"logps/chosen": -549.912841796875, |
|
"logps/rejected": -770.5538330078125, |
|
"loss": 0.4082, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.764224648475647, |
|
"rewards/margins": 1.4827383756637573, |
|
"rewards/rejected": -3.2469630241394043, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.0669947197689034e-06, |
|
"logits/chosen": -0.7767351269721985, |
|
"logits/rejected": 0.08518421649932861, |
|
"logps/chosen": -487.90399169921875, |
|
"logps/rejected": -754.19287109375, |
|
"loss": 0.3626, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.7886440753936768, |
|
"rewards/margins": 1.6782163381576538, |
|
"rewards/rejected": -3.466860294342041, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.048335603051291e-06, |
|
"logits/chosen": -0.9023948907852173, |
|
"logits/rejected": 0.5675928592681885, |
|
"logps/chosen": -573.4138793945312, |
|
"logps/rejected": -760.1019897460938, |
|
"loss": 0.3304, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.9305530786514282, |
|
"rewards/margins": 1.7111543416976929, |
|
"rewards/rejected": -3.641707181930542, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.0297976998930665e-06, |
|
"logits/chosen": -1.0089476108551025, |
|
"logits/rejected": 0.039520103484392166, |
|
"logps/chosen": -544.458251953125, |
|
"logps/rejected": -781.134521484375, |
|
"loss": 0.4407, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.9217383861541748, |
|
"rewards/margins": 1.6432183980941772, |
|
"rewards/rejected": -3.5649566650390625, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.0113825582038078e-06, |
|
"logits/chosen": -0.9111200571060181, |
|
"logits/rejected": 0.09320324659347534, |
|
"logps/chosen": -485.19464111328125, |
|
"logps/rejected": -772.6851806640625, |
|
"loss": 0.3662, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -1.6882908344268799, |
|
"rewards/margins": 1.9764955043792725, |
|
"rewards/rejected": -3.6647861003875732, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.930917156425477e-07, |
|
"logits/chosen": -1.3273289203643799, |
|
"logits/rejected": 0.12840789556503296, |
|
"logps/chosen": -539.8751220703125, |
|
"logps/rejected": -770.5255126953125, |
|
"loss": 0.4417, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -1.8574135303497314, |
|
"rewards/margins": 1.6678911447525024, |
|
"rewards/rejected": -3.525304079055786, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.749266994893756e-07, |
|
"logits/chosen": -1.2743171453475952, |
|
"logits/rejected": -0.29028937220573425, |
|
"logps/chosen": -505.72515869140625, |
|
"logps/rejected": -708.1859130859375, |
|
"loss": 0.3927, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.6994062662124634, |
|
"rewards/margins": 1.5197365283966064, |
|
"rewards/rejected": -3.2191429138183594, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.56889026517913e-07, |
|
"logits/chosen": -1.2260441780090332, |
|
"logits/rejected": 0.2879168689250946, |
|
"logps/chosen": -580.123291015625, |
|
"logps/rejected": -747.0691528320312, |
|
"loss": 0.3703, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -2.0501606464385986, |
|
"rewards/margins": 1.359299659729004, |
|
"rewards/rejected": -3.4094605445861816, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.389802028686617e-07, |
|
"logits/chosen": -1.1472103595733643, |
|
"logits/rejected": 0.685859203338623, |
|
"logps/chosen": -564.8762817382812, |
|
"logps/rejected": -833.5897216796875, |
|
"loss": 0.3696, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.9314100742340088, |
|
"rewards/margins": 2.114525318145752, |
|
"rewards/rejected": -4.04593563079834, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 9.212017239232427e-07, |
|
"logits/chosen": -1.1055911779403687, |
|
"logits/rejected": 0.17128732800483704, |
|
"logps/chosen": -461.57757568359375, |
|
"logps/rejected": -762.3133544921875, |
|
"loss": 0.3902, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.596985101699829, |
|
"rewards/margins": 1.919716477394104, |
|
"rewards/rejected": -3.5167019367218018, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 9.03555074179533e-07, |
|
"logits/chosen": -0.5496357679367065, |
|
"logits/rejected": -0.5428146123886108, |
|
"logps/chosen": -548.8982543945312, |
|
"logps/rejected": -794.3575439453125, |
|
"loss": 0.4303, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -1.8840789794921875, |
|
"rewards/margins": 1.6221778392791748, |
|
"rewards/rejected": -3.506256580352783, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 8.860417271277067e-07, |
|
"logits/chosen": -1.2351821660995483, |
|
"logits/rejected": -0.3069414794445038, |
|
"logps/chosen": -512.6089477539062, |
|
"logps/rejected": -765.6232299804688, |
|
"loss": 0.3669, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.8200585842132568, |
|
"rewards/margins": 1.858338713645935, |
|
"rewards/rejected": -3.6783974170684814, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 8.686631451272029e-07, |
|
"logits/chosen": -0.9176104664802551, |
|
"logits/rejected": 0.11124134063720703, |
|
"logps/chosen": -530.0169067382812, |
|
"logps/rejected": -772.3027954101562, |
|
"loss": 0.3666, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.805767297744751, |
|
"rewards/margins": 1.8668705224990845, |
|
"rewards/rejected": -3.672637462615967, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 8.514207792846168e-07, |
|
"logits/chosen": -0.8543869853019714, |
|
"logits/rejected": -0.33468276262283325, |
|
"logps/chosen": -563.6397705078125, |
|
"logps/rejected": -726.9634399414062, |
|
"loss": 0.3891, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -2.042240619659424, |
|
"rewards/margins": 1.2924143075942993, |
|
"rewards/rejected": -3.3346545696258545, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 8.343160693325356e-07, |
|
"logits/chosen": -0.5689171552658081, |
|
"logits/rejected": -0.3701861500740051, |
|
"logps/chosen": -522.55078125, |
|
"logps/rejected": -799.8737182617188, |
|
"loss": 0.3666, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.9046812057495117, |
|
"rewards/margins": 1.847630262374878, |
|
"rewards/rejected": -3.7523112297058105, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 8.173504435093174e-07, |
|
"logits/chosen": -1.1127324104309082, |
|
"logits/rejected": 0.49559181928634644, |
|
"logps/chosen": -580.339599609375, |
|
"logps/rejected": -790.05419921875, |
|
"loss": 0.4036, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -2.1893162727355957, |
|
"rewards/margins": 1.816868543624878, |
|
"rewards/rejected": -4.0061845779418945, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 8.00525318439836e-07, |
|
"logits/chosen": -1.1705108880996704, |
|
"logits/rejected": 0.1877966821193695, |
|
"logps/chosen": -577.6532592773438, |
|
"logps/rejected": -775.518310546875, |
|
"loss": 0.5292, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -2.123340129852295, |
|
"rewards/margins": 1.601309061050415, |
|
"rewards/rejected": -3.724648952484131, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.838420990171927e-07, |
|
"logits/chosen": -0.675153374671936, |
|
"logits/rejected": -0.15449100732803345, |
|
"logps/chosen": -529.3169555664062, |
|
"logps/rejected": -849.1887817382812, |
|
"loss": 0.3346, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -1.992500901222229, |
|
"rewards/margins": 2.1089320182800293, |
|
"rewards/rejected": -4.101432800292969, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.673021782854084e-07, |
|
"logits/chosen": -1.0276451110839844, |
|
"logits/rejected": 0.12752141058444977, |
|
"logps/chosen": -569.2623901367188, |
|
"logps/rejected": -797.4970703125, |
|
"loss": 0.4235, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.9917293787002563, |
|
"rewards/margins": 1.7630789279937744, |
|
"rewards/rejected": -3.7548089027404785, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.509069373231039e-07, |
|
"logits/chosen": -0.532359778881073, |
|
"logits/rejected": -0.37800487875938416, |
|
"logps/chosen": -572.4589233398438, |
|
"logps/rejected": -849.5089111328125, |
|
"loss": 0.4203, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -2.19490385055542, |
|
"rewards/margins": 1.7656660079956055, |
|
"rewards/rejected": -3.9605698585510254, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.346577451281822e-07, |
|
"logits/chosen": -0.9249979853630066, |
|
"logits/rejected": 0.5974918603897095, |
|
"logps/chosen": -583.9189453125, |
|
"logps/rejected": -770.7642822265625, |
|
"loss": 0.4452, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.059628486633301, |
|
"rewards/margins": 1.6830648183822632, |
|
"rewards/rejected": -3.7426934242248535, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 7.185559585035138e-07, |
|
"logits/chosen": -0.7646081447601318, |
|
"logits/rejected": -0.08741030842065811, |
|
"logps/chosen": -494.5181579589844, |
|
"logps/rejected": -819.3825073242188, |
|
"loss": 0.4152, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -1.7815096378326416, |
|
"rewards/margins": 2.074185848236084, |
|
"rewards/rejected": -3.8556952476501465, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 7.026029219436504e-07, |
|
"logits/chosen": -1.0040438175201416, |
|
"logits/rejected": -0.2370959222316742, |
|
"logps/chosen": -547.5843505859375, |
|
"logps/rejected": -775.7852783203125, |
|
"loss": 0.3377, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.8116861581802368, |
|
"rewards/margins": 1.7394940853118896, |
|
"rewards/rejected": -3.551180362701416, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 6.867999675225523e-07, |
|
"logits/chosen": -0.8310391306877136, |
|
"logits/rejected": -0.02760641649365425, |
|
"logps/chosen": -590.3953857421875, |
|
"logps/rejected": -830.1414184570312, |
|
"loss": 0.3744, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -2.0531251430511475, |
|
"rewards/margins": 1.790724515914917, |
|
"rewards/rejected": -3.843848705291748, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.711484147823663e-07, |
|
"logits/chosen": -1.0045768022537231, |
|
"logits/rejected": -0.32932716608047485, |
|
"logps/chosen": -519.6864013671875, |
|
"logps/rejected": -840.6529541015625, |
|
"loss": 0.3769, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -1.8191581964492798, |
|
"rewards/margins": 1.9201765060424805, |
|
"rewards/rejected": -3.7393341064453125, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_logits/chosen": 0.532638430595398, |
|
"eval_logits/rejected": 1.514159917831421, |
|
"eval_logps/chosen": -547.6463012695312, |
|
"eval_logps/rejected": -772.1762084960938, |
|
"eval_loss": 0.4205494523048401, |
|
"eval_rewards/accuracies": 0.8009999990463257, |
|
"eval_rewards/chosen": -1.919854760169983, |
|
"eval_rewards/margins": 1.6118818521499634, |
|
"eval_rewards/rejected": -3.5317368507385254, |
|
"eval_runtime": 1383.4868, |
|
"eval_samples_per_second": 1.446, |
|
"eval_steps_per_second": 0.361, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.556495706232413e-07, |
|
"logits/chosen": -1.1887633800506592, |
|
"logits/rejected": 0.10280628502368927, |
|
"logps/chosen": -592.3970947265625, |
|
"logps/rejected": -802.2684326171875, |
|
"loss": 0.4594, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.981632947921753, |
|
"rewards/margins": 1.690386414527893, |
|
"rewards/rejected": -3.6720194816589355, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.403047291942057e-07, |
|
"logits/chosen": -1.0731422901153564, |
|
"logits/rejected": -0.11721036583185196, |
|
"logps/chosen": -500.95733642578125, |
|
"logps/rejected": -810.4306030273438, |
|
"loss": 0.3202, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -1.6846420764923096, |
|
"rewards/margins": 2.057126522064209, |
|
"rewards/rejected": -3.7417690753936768, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.251151717851023e-07, |
|
"logits/chosen": -0.5943460464477539, |
|
"logits/rejected": -0.3044959008693695, |
|
"logps/chosen": -552.7174682617188, |
|
"logps/rejected": -838.6080322265625, |
|
"loss": 0.3992, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -2.064213275909424, |
|
"rewards/margins": 1.780678153038025, |
|
"rewards/rejected": -3.844891309738159, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 6.100821667196041e-07, |
|
"logits/chosen": -0.9400644302368164, |
|
"logits/rejected": -0.20678548514842987, |
|
"logps/chosen": -539.8009033203125, |
|
"logps/rejected": -764.1534423828125, |
|
"loss": 0.3418, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.653412103652954, |
|
"rewards/margins": 1.8391025066375732, |
|
"rewards/rejected": -3.4925143718719482, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5.952069692493062e-07, |
|
"logits/chosen": -0.7938990592956543, |
|
"logits/rejected": -0.40649691224098206, |
|
"logps/chosen": -596.44873046875, |
|
"logps/rejected": -842.193359375, |
|
"loss": 0.4439, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.0670700073242188, |
|
"rewards/margins": 1.7952619791030884, |
|
"rewards/rejected": -3.8623321056365967, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5.80490821448918e-07, |
|
"logits/chosen": -0.9244669079780579, |
|
"logits/rejected": -0.5335060358047485, |
|
"logps/chosen": -596.2068481445312, |
|
"logps/rejected": -874.0540161132812, |
|
"loss": 0.391, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -2.2196078300476074, |
|
"rewards/margins": 1.8115314245224, |
|
"rewards/rejected": -4.031139373779297, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5.659349521125459e-07, |
|
"logits/chosen": -0.6347008347511292, |
|
"logits/rejected": -0.5196251273155212, |
|
"logps/chosen": -535.7493286132812, |
|
"logps/rejected": -856.0029296875, |
|
"loss": 0.4562, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.9471817016601562, |
|
"rewards/margins": 1.7889111042022705, |
|
"rewards/rejected": -3.7360928058624268, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.5154057665109e-07, |
|
"logits/chosen": -1.1485843658447266, |
|
"logits/rejected": 0.004442277364432812, |
|
"logps/chosen": -522.5572509765625, |
|
"logps/rejected": -784.026123046875, |
|
"loss": 0.3419, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.899583101272583, |
|
"rewards/margins": 1.6547266244888306, |
|
"rewards/rejected": -3.554309844970703, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.373088969907586e-07, |
|
"logits/chosen": -0.7392430901527405, |
|
"logits/rejected": -0.4351174235343933, |
|
"logps/chosen": -475.52294921875, |
|
"logps/rejected": -764.8416748046875, |
|
"loss": 0.3467, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -1.6972386837005615, |
|
"rewards/margins": 1.8677946329116821, |
|
"rewards/rejected": -3.565033435821533, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.23241101472709e-07, |
|
"logits/chosen": -0.841932475566864, |
|
"logits/rejected": -0.11554646492004395, |
|
"logps/chosen": -619.42724609375, |
|
"logps/rejected": -848.4366455078125, |
|
"loss": 0.3589, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.211543560028076, |
|
"rewards/margins": 1.8801252841949463, |
|
"rewards/rejected": -4.091668605804443, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.09338364753818e-07, |
|
"logits/chosen": -1.0390411615371704, |
|
"logits/rejected": 0.6153230667114258, |
|
"logps/chosen": -606.2908325195312, |
|
"logps/rejected": -770.7852783203125, |
|
"loss": 0.5322, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.2032546997070312, |
|
"rewards/margins": 1.4495493173599243, |
|
"rewards/rejected": -3.652804136276245, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.956018477086005e-07, |
|
"logits/chosen": -0.8461894989013672, |
|
"logits/rejected": -0.3413206934928894, |
|
"logps/chosen": -537.2196655273438, |
|
"logps/rejected": -825.6803588867188, |
|
"loss": 0.3784, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.8306541442871094, |
|
"rewards/margins": 1.9641211032867432, |
|
"rewards/rejected": -3.7947754859924316, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.820326973322764e-07, |
|
"logits/chosen": -0.8142082095146179, |
|
"logits/rejected": -0.10756425559520721, |
|
"logps/chosen": -565.7686157226562, |
|
"logps/rejected": -817.3389892578125, |
|
"loss": 0.4628, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.146563768386841, |
|
"rewards/margins": 1.795789122581482, |
|
"rewards/rejected": -3.9423530101776123, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.686320466449981e-07, |
|
"logits/chosen": -0.9902013540267944, |
|
"logits/rejected": -0.15123017132282257, |
|
"logps/chosen": -511.6470642089844, |
|
"logps/rejected": -815.7200317382812, |
|
"loss": 0.3672, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -1.8265291452407837, |
|
"rewards/margins": 2.014096975326538, |
|
"rewards/rejected": -3.8406262397766113, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.554010145972418e-07, |
|
"logits/chosen": -0.8389061689376831, |
|
"logits/rejected": -0.14444035291671753, |
|
"logps/chosen": -551.7734375, |
|
"logps/rejected": -798.772705078125, |
|
"loss": 0.4095, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.063991069793701, |
|
"rewards/margins": 1.5325976610183716, |
|
"rewards/rejected": -3.596588611602783, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.4234070597637455e-07, |
|
"logits/chosen": -0.8169253468513489, |
|
"logits/rejected": 0.03178207948803902, |
|
"logps/chosen": -529.4456787109375, |
|
"logps/rejected": -754.3330078125, |
|
"loss": 0.3978, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -2.110259771347046, |
|
"rewards/margins": 1.4216582775115967, |
|
"rewards/rejected": -3.5319180488586426, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.2945221131440783e-07, |
|
"logits/chosen": -0.7767388224601746, |
|
"logits/rejected": -0.2466239631175995, |
|
"logps/chosen": -586.1234130859375, |
|
"logps/rejected": -792.2913818359375, |
|
"loss": 0.4286, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -2.040156602859497, |
|
"rewards/margins": 1.661505937576294, |
|
"rewards/rejected": -3.701662063598633, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.167366067969381e-07, |
|
"logits/chosen": -0.8122004270553589, |
|
"logits/rejected": -0.4158555567264557, |
|
"logps/chosen": -579.0358276367188, |
|
"logps/rejected": -846.17626953125, |
|
"loss": 0.3993, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.079592227935791, |
|
"rewards/margins": 1.6793367862701416, |
|
"rewards/rejected": -3.758929491043091, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.041949541732826e-07, |
|
"logits/chosen": -1.0485798120498657, |
|
"logits/rejected": -0.36888834834098816, |
|
"logps/chosen": -630.3906860351562, |
|
"logps/rejected": -814.126953125, |
|
"loss": 0.4027, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -2.191230535507202, |
|
"rewards/margins": 1.4397324323654175, |
|
"rewards/rejected": -3.63096284866333, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.9182830066782614e-07, |
|
"logits/chosen": -0.8572107553482056, |
|
"logits/rejected": -0.33156412839889526, |
|
"logps/chosen": -506.94781494140625, |
|
"logps/rejected": -776.4071044921875, |
|
"loss": 0.348, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -1.8441927433013916, |
|
"rewards/margins": 1.8142344951629639, |
|
"rewards/rejected": -3.6584274768829346, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.796376788925771e-07, |
|
"logits/chosen": -1.5727512836456299, |
|
"logits/rejected": 0.38068026304244995, |
|
"logps/chosen": -552.1102905273438, |
|
"logps/rejected": -777.140869140625, |
|
"loss": 0.4251, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.8679554462432861, |
|
"rewards/margins": 1.6639436483383179, |
|
"rewards/rejected": -3.5318992137908936, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.676241067609465e-07, |
|
"logits/chosen": -0.8122023344039917, |
|
"logits/rejected": 0.24542848765850067, |
|
"logps/chosen": -533.0785522460938, |
|
"logps/rejected": -790.8440551757812, |
|
"loss": 0.3919, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.8818871974945068, |
|
"rewards/margins": 1.9045559167861938, |
|
"rewards/rejected": -3.7864432334899902, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.5578858740274976e-07, |
|
"logits/chosen": -1.0608148574829102, |
|
"logits/rejected": -0.1883460134267807, |
|
"logps/chosen": -546.2855224609375, |
|
"logps/rejected": -784.9697265625, |
|
"loss": 0.425, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.9488483667373657, |
|
"rewards/margins": 1.45414400100708, |
|
"rewards/rejected": -3.4029927253723145, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.44132109080447e-07, |
|
"logits/chosen": -1.0673831701278687, |
|
"logits/rejected": -0.0653887614607811, |
|
"logps/chosen": -512.1033935546875, |
|
"logps/rejected": -733.269287109375, |
|
"loss": 0.4068, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.7813838720321655, |
|
"rewards/margins": 1.5279889106750488, |
|
"rewards/rejected": -3.309372663497925, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.3265564510662344e-07, |
|
"logits/chosen": -1.1314384937286377, |
|
"logits/rejected": -0.5283291935920715, |
|
"logps/chosen": -552.1278076171875, |
|
"logps/rejected": -818.47509765625, |
|
"loss": 0.4451, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.8823707103729248, |
|
"rewards/margins": 1.676279067993164, |
|
"rewards/rejected": -3.558649778366089, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.213601537627195e-07, |
|
"logits/chosen": -0.9113371968269348, |
|
"logits/rejected": -0.332157701253891, |
|
"logps/chosen": -565.0186767578125, |
|
"logps/rejected": -805.5143432617188, |
|
"loss": 0.4066, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -2.2068862915039062, |
|
"rewards/margins": 1.6060466766357422, |
|
"rewards/rejected": -3.8129334449768066, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.1024657821901063e-07, |
|
"logits/chosen": -0.7972087264060974, |
|
"logits/rejected": -0.6192452311515808, |
|
"logps/chosen": -460.00244140625, |
|
"logps/rejected": -661.6898193359375, |
|
"loss": 0.4204, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.722604513168335, |
|
"rewards/margins": 1.273694396018982, |
|
"rewards/rejected": -2.9962992668151855, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.9931584645585654e-07, |
|
"logits/chosen": -1.2535960674285889, |
|
"logits/rejected": 0.02778279222548008, |
|
"logps/chosen": -465.5445861816406, |
|
"logps/rejected": -718.2966918945312, |
|
"loss": 0.3941, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.620823860168457, |
|
"rewards/margins": 1.7875875234603882, |
|
"rewards/rejected": -3.4084110260009766, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.885688711862136e-07, |
|
"logits/chosen": -0.6595426201820374, |
|
"logits/rejected": -0.6082831621170044, |
|
"logps/chosen": -518.3923950195312, |
|
"logps/rejected": -844.9713745117188, |
|
"loss": 0.3628, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.020922899246216, |
|
"rewards/margins": 2.008241653442383, |
|
"rewards/rejected": -4.0291643142700195, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.7800654977942486e-07, |
|
"logits/chosen": -1.1551368236541748, |
|
"logits/rejected": 0.04624384641647339, |
|
"logps/chosen": -494.8173828125, |
|
"logps/rejected": -759.0722045898438, |
|
"loss": 0.3921, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.8496835231781006, |
|
"rewards/margins": 1.606042504310608, |
|
"rewards/rejected": -3.455725908279419, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"eval_logits/chosen": 0.5531209707260132, |
|
"eval_logits/rejected": 1.5286740064620972, |
|
"eval_logps/chosen": -559.9616088867188, |
|
"eval_logps/rejected": -791.3992309570312, |
|
"eval_loss": 0.4215858280658722, |
|
"eval_rewards/accuracies": 0.8050000071525574, |
|
"eval_rewards/chosen": -2.043008327484131, |
|
"eval_rewards/margins": 1.680959939956665, |
|
"eval_rewards/rejected": -3.723968267440796, |
|
"eval_runtime": 1375.3072, |
|
"eval_samples_per_second": 1.454, |
|
"eval_steps_per_second": 0.364, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.6762976418628797e-07, |
|
"logits/chosen": -0.6151852011680603, |
|
"logits/rejected": -0.4355131983757019, |
|
"logps/chosen": -569.3573608398438, |
|
"logps/rejected": -845.0255737304688, |
|
"loss": 0.3994, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.9695743322372437, |
|
"rewards/margins": 1.774019479751587, |
|
"rewards/rejected": -3.743593692779541, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.5743938086541354e-07, |
|
"logits/chosen": -1.1306208372116089, |
|
"logits/rejected": -0.369152694940567, |
|
"logps/chosen": -528.9949951171875, |
|
"logps/rejected": -786.5325927734375, |
|
"loss": 0.4035, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.8521554470062256, |
|
"rewards/margins": 2.019233226776123, |
|
"rewards/rejected": -3.8713886737823486, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.4743625071087574e-07, |
|
"logits/chosen": -0.9898750185966492, |
|
"logits/rejected": -0.0026702166069298983, |
|
"logps/chosen": -589.635498046875, |
|
"logps/rejected": -785.4796752929688, |
|
"loss": 0.481, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -2.26185941696167, |
|
"rewards/margins": 1.3372209072113037, |
|
"rewards/rejected": -3.5990803241729736, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.3762120898116498e-07, |
|
"logits/chosen": -1.0256075859069824, |
|
"logits/rejected": -0.4684371054172516, |
|
"logps/chosen": -513.4962158203125, |
|
"logps/rejected": -773.9256591796875, |
|
"loss": 0.489, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.8556444644927979, |
|
"rewards/margins": 1.662695288658142, |
|
"rewards/rejected": -3.5183398723602295, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.2799507522944048e-07, |
|
"logits/chosen": -1.2172832489013672, |
|
"logits/rejected": -0.7886725068092346, |
|
"logps/chosen": -603.7682495117188, |
|
"logps/rejected": -839.2596435546875, |
|
"loss": 0.3825, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.133934259414673, |
|
"rewards/margins": 1.56403386592865, |
|
"rewards/rejected": -3.6979682445526123, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.1855865323510056e-07, |
|
"logits/chosen": -0.9375013113021851, |
|
"logits/rejected": -0.4790850281715393, |
|
"logps/chosen": -529.7852783203125, |
|
"logps/rejected": -789.4864501953125, |
|
"loss": 0.3798, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -1.7933248281478882, |
|
"rewards/margins": 1.7892353534698486, |
|
"rewards/rejected": -3.5825603008270264, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.0931273093666575e-07, |
|
"logits/chosen": -1.0546633005142212, |
|
"logits/rejected": 0.11733438819646835, |
|
"logps/chosen": -566.7720947265625, |
|
"logps/rejected": -769.4909057617188, |
|
"loss": 0.4561, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.1270699501037598, |
|
"rewards/margins": 1.5025631189346313, |
|
"rewards/rejected": -3.6296334266662598, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.002580803659873e-07, |
|
"logits/chosen": -1.0718940496444702, |
|
"logits/rejected": -0.24651813507080078, |
|
"logps/chosen": -563.5213012695312, |
|
"logps/rejected": -746.7576904296875, |
|
"loss": 0.4189, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -2.0106587409973145, |
|
"rewards/margins": 1.5113112926483154, |
|
"rewards/rejected": -3.52197003364563, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.913954575837826e-07, |
|
"logits/chosen": -1.211354374885559, |
|
"logits/rejected": -0.7178353071212769, |
|
"logps/chosen": -547.396728515625, |
|
"logps/rejected": -785.0447998046875, |
|
"loss": 0.3956, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -1.859985113143921, |
|
"rewards/margins": 1.6019798517227173, |
|
"rewards/rejected": -3.4619648456573486, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.827256026165028e-07, |
|
"logits/chosen": -1.3360965251922607, |
|
"logits/rejected": -0.5832281112670898, |
|
"logps/chosen": -572.5958251953125, |
|
"logps/rejected": -827.7864379882812, |
|
"loss": 0.4079, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.8811748027801514, |
|
"rewards/margins": 1.826063871383667, |
|
"rewards/rejected": -3.7072386741638184, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.7424923939454274e-07, |
|
"logits/chosen": -1.0015101432800293, |
|
"logits/rejected": -0.4501993656158447, |
|
"logps/chosen": -516.7034912109375, |
|
"logps/rejected": -737.7506713867188, |
|
"loss": 0.3917, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.871683120727539, |
|
"rewards/margins": 1.2368314266204834, |
|
"rewards/rejected": -3.1085145473480225, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.6596707569179304e-07, |
|
"logits/chosen": -1.220655918121338, |
|
"logits/rejected": -0.36421042680740356, |
|
"logps/chosen": -664.6968994140625, |
|
"logps/rejected": -811.2049560546875, |
|
"loss": 0.5067, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.4624738693237305, |
|
"rewards/margins": 1.2412056922912598, |
|
"rewards/rejected": -3.7036795616149902, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.578798030665385e-07, |
|
"logits/chosen": -0.7742003202438354, |
|
"logits/rejected": -0.1663471907377243, |
|
"logps/chosen": -584.2454833984375, |
|
"logps/rejected": -789.2788696289062, |
|
"loss": 0.4325, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -2.159472703933716, |
|
"rewards/margins": 1.4603602886199951, |
|
"rewards/rejected": -3.619832992553711, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.499880968037165e-07, |
|
"logits/chosen": -0.9450448155403137, |
|
"logits/rejected": -0.24974000453948975, |
|
"logps/chosen": -543.8734130859375, |
|
"logps/rejected": -776.0538330078125, |
|
"loss": 0.3894, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.0840260982513428, |
|
"rewards/margins": 1.4753036499023438, |
|
"rewards/rejected": -3.5593299865722656, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.4229261585852805e-07, |
|
"logits/chosen": -1.1183446645736694, |
|
"logits/rejected": -0.8927680253982544, |
|
"logps/chosen": -516.3631591796875, |
|
"logps/rejected": -750.56884765625, |
|
"loss": 0.4428, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.7409288883209229, |
|
"rewards/margins": 1.517566442489624, |
|
"rewards/rejected": -3.258495330810547, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.3479400280141886e-07, |
|
"logits/chosen": -1.0318033695220947, |
|
"logits/rejected": -0.8086441159248352, |
|
"logps/chosen": -516.8532104492188, |
|
"logps/rejected": -741.1300659179688, |
|
"loss": 0.3884, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.8979904651641846, |
|
"rewards/margins": 1.477073311805725, |
|
"rewards/rejected": -3.37506365776062, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.2749288376442044e-07, |
|
"logits/chosen": -0.9179407358169556, |
|
"logits/rejected": -0.33625102043151855, |
|
"logps/chosen": -536.0686645507812, |
|
"logps/rejected": -847.4317626953125, |
|
"loss": 0.3721, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -1.9626020193099976, |
|
"rewards/margins": 1.956992745399475, |
|
"rewards/rejected": -3.9195950031280518, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.203898683888713e-07, |
|
"logits/chosen": -1.2370408773422241, |
|
"logits/rejected": -0.602063775062561, |
|
"logps/chosen": -547.3104858398438, |
|
"logps/rejected": -757.8153076171875, |
|
"loss": 0.4135, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -1.9618017673492432, |
|
"rewards/margins": 1.5197532176971436, |
|
"rewards/rejected": -3.4815547466278076, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.1348554977451132e-07, |
|
"logits/chosen": -0.8971865773200989, |
|
"logits/rejected": -0.45237869024276733, |
|
"logps/chosen": -549.7821044921875, |
|
"logps/rejected": -819.00439453125, |
|
"loss": 0.422, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -1.9521732330322266, |
|
"rewards/margins": 1.8261715173721313, |
|
"rewards/rejected": -3.7783446311950684, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.0678050442995802e-07, |
|
"logits/chosen": -0.5732991099357605, |
|
"logits/rejected": -0.1439836025238037, |
|
"logps/chosen": -517.4790649414062, |
|
"logps/rejected": -790.7830810546875, |
|
"loss": 0.3695, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.7863785028457642, |
|
"rewards/margins": 1.7641347646713257, |
|
"rewards/rejected": -3.550513505935669, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.0027529222456755e-07, |
|
"logits/chosen": -0.8079764246940613, |
|
"logits/rejected": -0.43154460191726685, |
|
"logps/chosen": -505.2015075683594, |
|
"logps/rejected": -735.8001708984375, |
|
"loss": 0.4543, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.861318588256836, |
|
"rewards/margins": 1.397134780883789, |
|
"rewards/rejected": -3.258453369140625, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 9.397045634168766e-08, |
|
"logits/chosen": -1.5205557346343994, |
|
"logits/rejected": -0.21061238646507263, |
|
"logps/chosen": -541.4683837890625, |
|
"logps/rejected": -778.14453125, |
|
"loss": 0.3821, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.8572975397109985, |
|
"rewards/margins": 1.6669906377792358, |
|
"rewards/rejected": -3.5242881774902344, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 8.78665232332998e-08, |
|
"logits/chosen": -1.0609691143035889, |
|
"logits/rejected": -0.4593663811683655, |
|
"logps/chosen": -509.010986328125, |
|
"logps/rejected": -796.3077392578125, |
|
"loss": 0.363, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.9296966791152954, |
|
"rewards/margins": 1.6614776849746704, |
|
"rewards/rejected": -3.591174364089966, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 8.196400257606208e-08, |
|
"logits/chosen": -0.8713966608047485, |
|
"logits/rejected": -0.9145506024360657, |
|
"logps/chosen": -527.7672729492188, |
|
"logps/rejected": -777.5648193359375, |
|
"loss": 0.4802, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.896409273147583, |
|
"rewards/margins": 1.6549888849258423, |
|
"rewards/rejected": -3.551398515701294, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 7.626338722875076e-08, |
|
"logits/chosen": -1.1116408109664917, |
|
"logits/rejected": -0.13830144703388214, |
|
"logps/chosen": -569.3795166015625, |
|
"logps/rejected": -845.25830078125, |
|
"loss": 0.3774, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.9394264221191406, |
|
"rewards/margins": 2.049816131591797, |
|
"rewards/rejected": -3.9892425537109375, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 7.076515319110688e-08, |
|
"logits/chosen": -0.9675251841545105, |
|
"logits/rejected": -0.38351327180862427, |
|
"logps/chosen": -519.7999267578125, |
|
"logps/rejected": -768.5065307617188, |
|
"loss": 0.3933, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -1.9502222537994385, |
|
"rewards/margins": 1.693610429763794, |
|
"rewards/rejected": -3.6438324451446533, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 6.54697595640899e-08, |
|
"logits/chosen": -0.8377977609634399, |
|
"logits/rejected": -0.5262192487716675, |
|
"logps/chosen": -530.46728515625, |
|
"logps/rejected": -769.6790771484375, |
|
"loss": 0.4231, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.032832622528076, |
|
"rewards/margins": 1.6006113290786743, |
|
"rewards/rejected": -3.633444309234619, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 6.037764851154426e-08, |
|
"logits/chosen": -0.985633373260498, |
|
"logits/rejected": 0.16631217300891876, |
|
"logps/chosen": -595.2657470703125, |
|
"logps/rejected": -853.1846923828125, |
|
"loss": 0.3474, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -1.9297056198120117, |
|
"rewards/margins": 1.8593957424163818, |
|
"rewards/rejected": -3.7891018390655518, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 5.548924522327748e-08, |
|
"logits/chosen": -1.016361951828003, |
|
"logits/rejected": -0.7056697607040405, |
|
"logps/chosen": -516.3272705078125, |
|
"logps/rejected": -732.7681274414062, |
|
"loss": 0.4643, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.8100614547729492, |
|
"rewards/margins": 1.4326629638671875, |
|
"rewards/rejected": -3.242724657058716, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 5.0804957879556915e-08, |
|
"logits/chosen": -1.4801714420318604, |
|
"logits/rejected": -0.21010088920593262, |
|
"logps/chosen": -538.2376708984375, |
|
"logps/rejected": -716.570556640625, |
|
"loss": 0.4249, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.8412452936172485, |
|
"rewards/margins": 1.4520995616912842, |
|
"rewards/rejected": -3.2933449745178223, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_logits/chosen": 0.3916805386543274, |
|
"eval_logits/rejected": 1.3532880544662476, |
|
"eval_logps/chosen": -551.5703735351562, |
|
"eval_logps/rejected": -777.8283081054688, |
|
"eval_loss": 0.4203905165195465, |
|
"eval_rewards/accuracies": 0.800000011920929, |
|
"eval_rewards/chosen": -1.9590957164764404, |
|
"eval_rewards/margins": 1.6291632652282715, |
|
"eval_rewards/rejected": -3.588258981704712, |
|
"eval_runtime": 1380.3242, |
|
"eval_samples_per_second": 1.449, |
|
"eval_steps_per_second": 0.362, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 4.632517761702815e-08, |
|
"logits/chosen": -1.3528892993927002, |
|
"logits/rejected": 0.41183796525001526, |
|
"logps/chosen": -596.6740112304688, |
|
"logps/rejected": -815.2229614257812, |
|
"loss": 0.3384, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.033761739730835, |
|
"rewards/margins": 1.7981764078140259, |
|
"rewards/rejected": -3.8319382667541504, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 4.205027849605359e-08, |
|
"logits/chosen": -0.6885486245155334, |
|
"logits/rejected": -0.6011554002761841, |
|
"logps/chosen": -501.90008544921875, |
|
"logps/rejected": -728.01416015625, |
|
"loss": 0.3797, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.8948495388031006, |
|
"rewards/margins": 1.430154800415039, |
|
"rewards/rejected": -3.3250045776367188, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.798061746947995e-08, |
|
"logits/chosen": -1.1346218585968018, |
|
"logits/rejected": -0.2788551449775696, |
|
"logps/chosen": -572.7951049804688, |
|
"logps/rejected": -818.1143798828125, |
|
"loss": 0.3816, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.859471321105957, |
|
"rewards/margins": 1.9316068887710571, |
|
"rewards/rejected": -3.7910780906677246, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.411653435283158e-08, |
|
"logits/chosen": -1.2903286218643188, |
|
"logits/rejected": -0.4007846713066101, |
|
"logps/chosen": -612.4331665039062, |
|
"logps/rejected": -790.9478759765625, |
|
"loss": 0.4783, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -1.9661962985992432, |
|
"rewards/margins": 1.4086124897003174, |
|
"rewards/rejected": -3.3748087882995605, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.04583517959367e-08, |
|
"logits/chosen": -1.4373764991760254, |
|
"logits/rejected": -0.18271857500076294, |
|
"logps/chosen": -567.571533203125, |
|
"logps/rejected": -814.9035034179688, |
|
"loss": 0.372, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.9404630661010742, |
|
"rewards/margins": 1.8180534839630127, |
|
"rewards/rejected": -3.758516788482666, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.7006375255985984e-08, |
|
"logits/chosen": -0.8399646878242493, |
|
"logits/rejected": -0.41886812448501587, |
|
"logps/chosen": -487.2755432128906, |
|
"logps/rejected": -792.0075073242188, |
|
"loss": 0.3617, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.7799431085586548, |
|
"rewards/margins": 2.1151885986328125, |
|
"rewards/rejected": -3.895132064819336, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.3760892972027328e-08, |
|
"logits/chosen": -0.7552706003189087, |
|
"logits/rejected": -0.37330105900764465, |
|
"logps/chosen": -550.1450805664062, |
|
"logps/rejected": -821.5558471679688, |
|
"loss": 0.4513, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -2.0513758659362793, |
|
"rewards/margins": 1.6616461277008057, |
|
"rewards/rejected": -3.713021755218506, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.072217594089765e-08, |
|
"logits/chosen": -1.048200249671936, |
|
"logits/rejected": -0.7420127987861633, |
|
"logps/chosen": -548.27587890625, |
|
"logps/rejected": -818.7357788085938, |
|
"loss": 0.4605, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.8455013036727905, |
|
"rewards/margins": 1.6794688701629639, |
|
"rewards/rejected": -3.524970293045044, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.789047789459375e-08, |
|
"logits/chosen": -0.8459684252738953, |
|
"logits/rejected": -0.16287431120872498, |
|
"logps/chosen": -508.4056091308594, |
|
"logps/rejected": -766.1015625, |
|
"loss": 0.3671, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.6832281351089478, |
|
"rewards/margins": 1.7482646703720093, |
|
"rewards/rejected": -3.431492567062378, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.5266035279088708e-08, |
|
"logits/chosen": -0.6956599354743958, |
|
"logits/rejected": -0.6482317447662354, |
|
"logps/chosen": -574.1497802734375, |
|
"logps/rejected": -854.5062255859375, |
|
"loss": 0.3785, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.2222962379455566, |
|
"rewards/margins": 1.6956876516342163, |
|
"rewards/rejected": -3.9179844856262207, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.2849067234584623e-08, |
|
"logits/chosen": -0.6731444001197815, |
|
"logits/rejected": -0.2936319410800934, |
|
"logps/chosen": -508.5874938964844, |
|
"logps/rejected": -779.5755004882812, |
|
"loss": 0.3793, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -1.7908990383148193, |
|
"rewards/margins": 1.8119395971298218, |
|
"rewards/rejected": -3.6028385162353516, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.0639775577218625e-08, |
|
"logits/chosen": -1.1039248704910278, |
|
"logits/rejected": -0.3895350396633148, |
|
"logps/chosen": -532.2550048828125, |
|
"logps/rejected": -799.4228515625, |
|
"loss": 0.3993, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.914385437965393, |
|
"rewards/margins": 1.754346489906311, |
|
"rewards/rejected": -3.668731689453125, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 8.638344782207486e-09, |
|
"logits/chosen": -1.1005773544311523, |
|
"logits/rejected": -0.48734521865844727, |
|
"logps/chosen": -638.8570556640625, |
|
"logps/rejected": -876.8406372070312, |
|
"loss": 0.441, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.3087477684020996, |
|
"rewards/margins": 1.5587693452835083, |
|
"rewards/rejected": -3.8675169944763184, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 6.84494196844715e-09, |
|
"logits/chosen": -1.2625192403793335, |
|
"logits/rejected": -0.4201609194278717, |
|
"logps/chosen": -579.9595336914062, |
|
"logps/rejected": -743.4432373046875, |
|
"loss": 0.4261, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -2.0454494953155518, |
|
"rewards/margins": 1.329736351966858, |
|
"rewards/rejected": -3.37518572807312, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 5.259716884556121e-09, |
|
"logits/chosen": -1.0971883535385132, |
|
"logits/rejected": 0.13197267055511475, |
|
"logps/chosen": -497.7351989746094, |
|
"logps/rejected": -844.2490234375, |
|
"loss": 0.3651, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -1.8625205755233765, |
|
"rewards/margins": 2.320369243621826, |
|
"rewards/rejected": -4.182889461517334, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.882801896372967e-09, |
|
"logits/chosen": -1.2590482234954834, |
|
"logits/rejected": 0.06436818838119507, |
|
"logps/chosen": -604.3903198242188, |
|
"logps/rejected": -838.1032104492188, |
|
"loss": 0.3897, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.1124839782714844, |
|
"rewards/margins": 1.6700093746185303, |
|
"rewards/rejected": -3.7824935913085938, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 2.7143119759026614e-09, |
|
"logits/chosen": -0.8534577488899231, |
|
"logits/rejected": -0.31645748019218445, |
|
"logps/chosen": -536.9246215820312, |
|
"logps/rejected": -888.1343994140625, |
|
"loss": 0.2884, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.8014726638793945, |
|
"rewards/margins": 2.4648687839508057, |
|
"rewards/rejected": -4.266341209411621, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.754344691717591e-09, |
|
"logits/chosen": -1.0792902708053589, |
|
"logits/rejected": -0.5682160258293152, |
|
"logps/chosen": -508.158447265625, |
|
"logps/rejected": -773.3580932617188, |
|
"loss": 0.3699, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.8245254755020142, |
|
"rewards/margins": 1.8749605417251587, |
|
"rewards/rejected": -3.6994857788085938, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.0029802008096335e-09, |
|
"logits/chosen": -0.6716892719268799, |
|
"logits/rejected": -0.40372976660728455, |
|
"logps/chosen": -489.3829040527344, |
|
"logps/rejected": -723.220703125, |
|
"loss": 0.4356, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.7527339458465576, |
|
"rewards/margins": 1.6192394495010376, |
|
"rewards/rejected": -3.371973752975464, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 4.602812418974534e-10, |
|
"logits/chosen": -0.8607047200202942, |
|
"logits/rejected": -0.9630060195922852, |
|
"logps/chosen": -502.1089782714844, |
|
"logps/rejected": -731.4326171875, |
|
"loss": 0.4389, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.8323230743408203, |
|
"rewards/margins": 1.3677552938461304, |
|
"rewards/rejected": -3.200078248977661, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.2629313018819312e-10, |
|
"logits/chosen": -0.9664648175239563, |
|
"logits/rejected": 0.03846656158566475, |
|
"logps/chosen": -550.194091796875, |
|
"logps/rejected": -774.918212890625, |
|
"loss": 0.385, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -1.9849185943603516, |
|
"rewards/margins": 1.5859264135360718, |
|
"rewards/rejected": -3.5708446502685547, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.0437535929996855e-12, |
|
"logits/chosen": -1.1103519201278687, |
|
"logits/rejected": -0.32411596179008484, |
|
"logps/chosen": -598.4796752929688, |
|
"logps/rejected": -901.5104370117188, |
|
"loss": 0.4032, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -2.1410813331604004, |
|
"rewards/margins": 1.9627602100372314, |
|
"rewards/rejected": -4.103841304779053, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 3821, |
|
"total_flos": 0.0, |
|
"train_loss": 0.43769783746998087, |
|
"train_runtime": 91189.3347, |
|
"train_samples_per_second": 0.67, |
|
"train_steps_per_second": 0.042 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 3821, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 400, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|