|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9984301412872841, |
|
"eval_steps": 1000, |
|
"global_step": 477, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.0416666666666666e-08, |
|
"logits/chosen": -2.7851884365081787, |
|
"logits/rejected": -2.649298906326294, |
|
"logps/chosen": -296.01092529296875, |
|
"logps/rejected": -290.09039306640625, |
|
"loss": 0.7406, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.083333333333333e-08, |
|
"logits/chosen": -2.681438446044922, |
|
"logits/rejected": -2.7385971546173096, |
|
"logps/chosen": -339.8030090332031, |
|
"logps/rejected": -247.9777374267578, |
|
"loss": 0.829, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.166666666666666e-08, |
|
"logits/chosen": -2.6828837394714355, |
|
"logits/rejected": -2.717893123626709, |
|
"logps/chosen": -246.95411682128906, |
|
"logps/rejected": -230.5216522216797, |
|
"loss": 0.7515, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -1.3528901035897434e-05, |
|
"rewards/margins": -0.00024310783192049712, |
|
"rewards/rejected": 0.00022957894543651491, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 6.25e-08, |
|
"logits/chosen": -2.6765449047088623, |
|
"logits/rejected": -2.7000393867492676, |
|
"logps/chosen": -300.23077392578125, |
|
"logps/rejected": -223.28292846679688, |
|
"loss": 0.8154, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -9.068308281712234e-05, |
|
"rewards/margins": -0.00012977616279385984, |
|
"rewards/rejected": 3.9093021769076586e-05, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 8.333333333333333e-08, |
|
"logits/chosen": -2.599257469177246, |
|
"logits/rejected": -2.6808290481567383, |
|
"logps/chosen": -276.9845886230469, |
|
"logps/rejected": -264.2096252441406, |
|
"loss": 0.7941, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": 0.0009412524523213506, |
|
"rewards/margins": -0.00016639442765153944, |
|
"rewards/rejected": 0.0011076468508690596, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.0416666666666667e-07, |
|
"logits/chosen": -2.6191229820251465, |
|
"logits/rejected": -2.5384933948516846, |
|
"logps/chosen": -259.14239501953125, |
|
"logps/rejected": -247.5746307373047, |
|
"loss": 0.8451, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.002248743548989296, |
|
"rewards/margins": 0.00199419679120183, |
|
"rewards/rejected": 0.0002545467286836356, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.25e-07, |
|
"logits/chosen": -2.6755356788635254, |
|
"logits/rejected": -2.6597650051116943, |
|
"logps/chosen": -240.25408935546875, |
|
"logps/rejected": -215.57801818847656, |
|
"loss": 0.7591, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": 0.003154001198709011, |
|
"rewards/margins": 0.0002609168004710227, |
|
"rewards/rejected": 0.002893084194511175, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.4583333333333335e-07, |
|
"logits/chosen": -2.7294678688049316, |
|
"logits/rejected": -2.6904141902923584, |
|
"logps/chosen": -315.6171875, |
|
"logps/rejected": -283.3052978515625, |
|
"loss": 0.7765, |
|
"rewards/accuracies": 0.546875, |
|
"rewards/chosen": 0.007693930994719267, |
|
"rewards/margins": 0.0004415067960508168, |
|
"rewards/rejected": 0.007252424024045467, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.6666666666666665e-07, |
|
"logits/chosen": -2.674433469772339, |
|
"logits/rejected": -2.635763168334961, |
|
"logps/chosen": -280.9161071777344, |
|
"logps/rejected": -286.4499816894531, |
|
"loss": 0.7959, |
|
"rewards/accuracies": 0.390625, |
|
"rewards/chosen": 0.010045092552900314, |
|
"rewards/margins": -0.0013794752303510904, |
|
"rewards/rejected": 0.011424567550420761, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.875e-07, |
|
"logits/chosen": -2.6775548458099365, |
|
"logits/rejected": -2.6488728523254395, |
|
"logps/chosen": -281.817138671875, |
|
"logps/rejected": -226.1727294921875, |
|
"loss": 0.7651, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": 0.021169912070035934, |
|
"rewards/margins": 0.003915595356374979, |
|
"rewards/rejected": 0.017254315316677094, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.0833333333333333e-07, |
|
"logits/chosen": -2.6540913581848145, |
|
"logits/rejected": -2.652208089828491, |
|
"logps/chosen": -255.7197265625, |
|
"logps/rejected": -211.753662109375, |
|
"loss": 0.812, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.031227679923176765, |
|
"rewards/margins": 0.004203509539365768, |
|
"rewards/rejected": 0.027024172246456146, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.2916666666666663e-07, |
|
"logits/chosen": -2.7285475730895996, |
|
"logits/rejected": -2.690129280090332, |
|
"logps/chosen": -283.28863525390625, |
|
"logps/rejected": -303.4035339355469, |
|
"loss": 0.8139, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.04402179270982742, |
|
"rewards/margins": 0.0007559025543741882, |
|
"rewards/rejected": 0.04326589033007622, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.5e-07, |
|
"logits/chosen": -2.625800609588623, |
|
"logits/rejected": -2.6034979820251465, |
|
"logps/chosen": -281.58795166015625, |
|
"logps/rejected": -234.80442810058594, |
|
"loss": 0.9627, |
|
"rewards/accuracies": 0.546875, |
|
"rewards/chosen": 0.063897043466568, |
|
"rewards/margins": 0.006137948948889971, |
|
"rewards/rejected": 0.05775909498333931, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.708333333333333e-07, |
|
"logits/chosen": -2.7099432945251465, |
|
"logits/rejected": -2.715182304382324, |
|
"logps/chosen": -313.54315185546875, |
|
"logps/rejected": -269.708984375, |
|
"loss": 0.7945, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.09343694150447845, |
|
"rewards/margins": 0.011352106928825378, |
|
"rewards/rejected": 0.08208482712507248, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.916666666666667e-07, |
|
"logits/chosen": -2.683866024017334, |
|
"logits/rejected": -2.666459083557129, |
|
"logps/chosen": -289.9292297363281, |
|
"logps/rejected": -266.2529602050781, |
|
"loss": 0.963, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.11043840646743774, |
|
"rewards/margins": 0.009698072448372841, |
|
"rewards/rejected": 0.10074033588171005, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.1249999999999997e-07, |
|
"logits/chosen": -2.639746904373169, |
|
"logits/rejected": -2.635244369506836, |
|
"logps/chosen": -250.9815673828125, |
|
"logps/rejected": -256.66644287109375, |
|
"loss": 0.782, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": 0.11398710310459137, |
|
"rewards/margins": 0.006070964969694614, |
|
"rewards/rejected": 0.10791613161563873, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.333333333333333e-07, |
|
"logits/chosen": -2.6974880695343018, |
|
"logits/rejected": -2.791822910308838, |
|
"logps/chosen": -311.99462890625, |
|
"logps/rejected": -263.93084716796875, |
|
"loss": 0.7708, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": 0.13753816485404968, |
|
"rewards/margins": 0.02569321170449257, |
|
"rewards/rejected": 0.11184494942426682, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.541666666666667e-07, |
|
"logits/chosen": -2.683668613433838, |
|
"logits/rejected": -2.696622610092163, |
|
"logps/chosen": -290.19091796875, |
|
"logps/rejected": -280.1837158203125, |
|
"loss": 0.8665, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": 0.13525506854057312, |
|
"rewards/margins": 0.01829436421394348, |
|
"rewards/rejected": 0.11696070432662964, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.75e-07, |
|
"logits/chosen": -2.703218698501587, |
|
"logits/rejected": -2.66160249710083, |
|
"logps/chosen": -331.1356201171875, |
|
"logps/rejected": -277.3096618652344, |
|
"loss": 0.8742, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.14532874524593353, |
|
"rewards/margins": 0.02477584406733513, |
|
"rewards/rejected": 0.1205528974533081, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.958333333333333e-07, |
|
"logits/chosen": -2.605548620223999, |
|
"logits/rejected": -2.507476806640625, |
|
"logps/chosen": -275.4030456542969, |
|
"logps/rejected": -268.85137939453125, |
|
"loss": 0.784, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": 0.14142535626888275, |
|
"rewards/margins": 0.016934942454099655, |
|
"rewards/rejected": 0.1244904026389122, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.1666666666666667e-07, |
|
"logits/chosen": -2.599735736846924, |
|
"logits/rejected": -2.6093332767486572, |
|
"logps/chosen": -259.8871154785156, |
|
"logps/rejected": -238.1406707763672, |
|
"loss": 0.7943, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": 0.1309652328491211, |
|
"rewards/margins": 0.03087472915649414, |
|
"rewards/rejected": 0.10009051114320755, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.375e-07, |
|
"logits/chosen": -2.678443670272827, |
|
"logits/rejected": -2.6648781299591064, |
|
"logps/chosen": -245.9468994140625, |
|
"logps/rejected": -246.77980041503906, |
|
"loss": 0.7385, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": 0.12853887677192688, |
|
"rewards/margins": 0.01891843043267727, |
|
"rewards/rejected": 0.10962046682834625, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.5833333333333327e-07, |
|
"logits/chosen": -2.603628158569336, |
|
"logits/rejected": -2.5344913005828857, |
|
"logps/chosen": -331.3908996582031, |
|
"logps/rejected": -259.43768310546875, |
|
"loss": 0.7334, |
|
"rewards/accuracies": 0.828125, |
|
"rewards/chosen": 0.17403613030910492, |
|
"rewards/margins": 0.08045163750648499, |
|
"rewards/rejected": 0.09358450770378113, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.791666666666667e-07, |
|
"logits/chosen": -2.6753909587860107, |
|
"logits/rejected": -2.6965551376342773, |
|
"logps/chosen": -255.45181274414062, |
|
"logps/rejected": -280.82305908203125, |
|
"loss": 0.7035, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": 0.1322551965713501, |
|
"rewards/margins": 0.049186140298843384, |
|
"rewards/rejected": 0.0830690786242485, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": -2.5472424030303955, |
|
"logits/rejected": -2.5995569229125977, |
|
"logps/chosen": -269.2645568847656, |
|
"logps/rejected": -236.20904541015625, |
|
"loss": 0.7093, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.11034946143627167, |
|
"rewards/margins": 0.03396361321210861, |
|
"rewards/rejected": 0.07638585567474365, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.999731868769026e-07, |
|
"logits/chosen": -2.683974266052246, |
|
"logits/rejected": -2.6413497924804688, |
|
"logps/chosen": -319.9888916015625, |
|
"logps/rejected": -328.697265625, |
|
"loss": 0.938, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": 0.1664372831583023, |
|
"rewards/margins": 0.0846775621175766, |
|
"rewards/rejected": 0.08175972104072571, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.998927532591591e-07, |
|
"logits/chosen": -2.695955514907837, |
|
"logits/rejected": -2.660606861114502, |
|
"logps/chosen": -229.35089111328125, |
|
"logps/rejected": -203.45083618164062, |
|
"loss": 0.7139, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.10423125326633453, |
|
"rewards/margins": 0.0388810932636261, |
|
"rewards/rejected": 0.06535016745328903, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.997587164001815e-07, |
|
"logits/chosen": -2.6461551189422607, |
|
"logits/rejected": -2.5729639530181885, |
|
"logps/chosen": -267.51885986328125, |
|
"logps/rejected": -215.0835723876953, |
|
"loss": 0.7405, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": 0.12834982573986053, |
|
"rewards/margins": 0.09162335097789764, |
|
"rewards/rejected": 0.03672647103667259, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.99571105051544e-07, |
|
"logits/chosen": -2.5733587741851807, |
|
"logits/rejected": -2.5592525005340576, |
|
"logps/chosen": -275.8002014160156, |
|
"logps/rejected": -224.9879150390625, |
|
"loss": 0.7809, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": 0.12791308760643005, |
|
"rewards/margins": 0.06309904903173447, |
|
"rewards/rejected": 0.0648140236735344, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.993299594568162e-07, |
|
"logits/chosen": -2.6415131092071533, |
|
"logits/rejected": -2.591508626937866, |
|
"logps/chosen": -280.14813232421875, |
|
"logps/rejected": -254.98541259765625, |
|
"loss": 1.0116, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": 0.12386615574359894, |
|
"rewards/margins": 0.06462900340557098, |
|
"rewards/rejected": 0.05923713743686676, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.990353313429303e-07, |
|
"logits/chosen": -2.635352611541748, |
|
"logits/rejected": -2.581376314163208, |
|
"logps/chosen": -300.7115173339844, |
|
"logps/rejected": -253.92111206054688, |
|
"loss": 0.7298, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": 0.12783905863761902, |
|
"rewards/margins": 0.11859284341335297, |
|
"rewards/rejected": 0.00924623105674982, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.986872839090852e-07, |
|
"logits/chosen": -2.7381272315979004, |
|
"logits/rejected": -2.6615114212036133, |
|
"logps/chosen": -310.81658935546875, |
|
"logps/rejected": -246.3937530517578, |
|
"loss": 0.7147, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": 0.08360395580530167, |
|
"rewards/margins": 0.0849369466304779, |
|
"rewards/rejected": -0.0013329838402569294, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.982858918131906e-07, |
|
"logits/chosen": -2.7732505798339844, |
|
"logits/rejected": -2.7643208503723145, |
|
"logps/chosen": -309.32196044921875, |
|
"logps/rejected": -286.745361328125, |
|
"loss": 0.7717, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": 0.10828227549791336, |
|
"rewards/margins": 0.12604959309101105, |
|
"rewards/rejected": -0.01776731386780739, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.978312411558517e-07, |
|
"logits/chosen": -2.7538211345672607, |
|
"logits/rejected": -2.766498565673828, |
|
"logps/chosen": -260.09765625, |
|
"logps/rejected": -237.29440307617188, |
|
"loss": 0.7561, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": 0.09531789273023605, |
|
"rewards/margins": 0.13862968981266022, |
|
"rewards/rejected": -0.043311797082424164, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.97323429461901e-07, |
|
"logits/chosen": -2.736222505569458, |
|
"logits/rejected": -2.6993401050567627, |
|
"logps/chosen": -218.64276123046875, |
|
"logps/rejected": -247.08609008789062, |
|
"loss": 0.7268, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": 0.102709099650383, |
|
"rewards/margins": 0.15037541091442108, |
|
"rewards/rejected": -0.047666311264038086, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.967625656594781e-07, |
|
"logits/chosen": -2.6768088340759277, |
|
"logits/rejected": -2.720437526702881, |
|
"logps/chosen": -255.47183227539062, |
|
"logps/rejected": -234.1626739501953, |
|
"loss": 0.7297, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.06858564168214798, |
|
"rewards/margins": 0.10109753906726837, |
|
"rewards/rejected": -0.032511889934539795, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.961487700566646e-07, |
|
"logits/chosen": -2.685685157775879, |
|
"logits/rejected": -2.728847026824951, |
|
"logps/chosen": -277.7132873535156, |
|
"logps/rejected": -273.0968017578125, |
|
"loss": 0.843, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": 0.08615937829017639, |
|
"rewards/margins": 0.09151909500360489, |
|
"rewards/rejected": -0.0053597185760736465, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.954821743156767e-07, |
|
"logits/chosen": -2.749175548553467, |
|
"logits/rejected": -2.7879812717437744, |
|
"logps/chosen": -277.5850524902344, |
|
"logps/rejected": -289.2405090332031, |
|
"loss": 0.7339, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.037747230380773544, |
|
"rewards/margins": 0.07498619705438614, |
|
"rewards/rejected": -0.03723897039890289, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.947629214246236e-07, |
|
"logits/chosen": -2.7705671787261963, |
|
"logits/rejected": -2.763953447341919, |
|
"logps/chosen": -252.2857666015625, |
|
"logps/rejected": -292.7469177246094, |
|
"loss": 0.7657, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": 0.017970919609069824, |
|
"rewards/margins": 0.09476744383573532, |
|
"rewards/rejected": -0.0767965242266655, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.939911656668361e-07, |
|
"logits/chosen": -2.682727336883545, |
|
"logits/rejected": -2.6415486335754395, |
|
"logps/chosen": -290.85369873046875, |
|
"logps/rejected": -280.05572509765625, |
|
"loss": 0.7518, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": 0.0839323028922081, |
|
"rewards/margins": 0.10557042807340622, |
|
"rewards/rejected": -0.02163812704384327, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.93167072587771e-07, |
|
"logits/chosen": -2.7483415603637695, |
|
"logits/rejected": -2.7103254795074463, |
|
"logps/chosen": -307.75933837890625, |
|
"logps/rejected": -324.3096923828125, |
|
"loss": 0.7522, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.03351260721683502, |
|
"rewards/margins": 0.10701370239257812, |
|
"rewards/rejected": -0.0735010951757431, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.922908189595017e-07, |
|
"logits/chosen": -2.8289828300476074, |
|
"logits/rejected": -2.8077309131622314, |
|
"logps/chosen": -252.59323120117188, |
|
"logps/rejected": -278.0732727050781, |
|
"loss": 0.7646, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": 0.07634499669075012, |
|
"rewards/margins": 0.1510338932275772, |
|
"rewards/rejected": -0.0746888816356659, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.913625927427995e-07, |
|
"logits/chosen": -2.673996686935425, |
|
"logits/rejected": -2.7177605628967285, |
|
"logps/chosen": -283.3115234375, |
|
"logps/rejected": -235.6822967529297, |
|
"loss": 0.7361, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": 0.027728475630283356, |
|
"rewards/margins": 0.19945986568927765, |
|
"rewards/rejected": -0.1717313826084137, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.903825930468148e-07, |
|
"logits/chosen": -2.790689468383789, |
|
"logits/rejected": -2.8265206813812256, |
|
"logps/chosen": -271.7046813964844, |
|
"logps/rejected": -289.9587097167969, |
|
"loss": 0.7311, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": 0.07139424234628677, |
|
"rewards/margins": 0.22358588874340057, |
|
"rewards/rejected": -0.152191624045372, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.893510300863676e-07, |
|
"logits/chosen": -2.6858608722686768, |
|
"logits/rejected": -2.6849381923675537, |
|
"logps/chosen": -234.48670959472656, |
|
"logps/rejected": -243.48057556152344, |
|
"loss": 0.6433, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": 0.03533104807138443, |
|
"rewards/margins": 0.19699391722679138, |
|
"rewards/rejected": -0.16166287660598755, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.882681251368548e-07, |
|
"logits/chosen": -2.6847596168518066, |
|
"logits/rejected": -2.627260684967041, |
|
"logps/chosen": -242.67559814453125, |
|
"logps/rejected": -233.02520751953125, |
|
"loss": 0.7216, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.0019026286900043488, |
|
"rewards/margins": 0.2595129609107971, |
|
"rewards/rejected": -0.26141557097435, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.871341104867864e-07, |
|
"logits/chosen": -2.861926794052124, |
|
"logits/rejected": -2.8458104133605957, |
|
"logps/chosen": -313.4859619140625, |
|
"logps/rejected": -299.7122497558594, |
|
"loss": 0.6911, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.011806448921561241, |
|
"rewards/margins": 0.1803116649389267, |
|
"rewards/rejected": -0.1921180933713913, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.859492293879573e-07, |
|
"logits/chosen": -2.814488410949707, |
|
"logits/rejected": -2.763936758041382, |
|
"logps/chosen": -251.5836181640625, |
|
"logps/rejected": -268.69342041015625, |
|
"loss": 0.6986, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": 0.0034225257113575935, |
|
"rewards/margins": 0.2998045086860657, |
|
"rewards/rejected": -0.29638200998306274, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.847137360032699e-07, |
|
"logits/chosen": -2.740324020385742, |
|
"logits/rejected": -2.6555376052856445, |
|
"logps/chosen": -267.32647705078125, |
|
"logps/rejected": -311.2066955566406, |
|
"loss": 0.6782, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": 0.04199559986591339, |
|
"rewards/margins": 0.22325468063354492, |
|
"rewards/rejected": -0.18125906586647034, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.834278953522137e-07, |
|
"logits/chosen": -2.788088798522949, |
|
"logits/rejected": -2.769721508026123, |
|
"logps/chosen": -259.531005859375, |
|
"logps/rejected": -282.8291931152344, |
|
"loss": 0.6506, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.01584019511938095, |
|
"rewards/margins": 0.3319249749183655, |
|
"rewards/rejected": -0.347765177488327, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.820919832540181e-07, |
|
"logits/chosen": -2.8391647338867188, |
|
"logits/rejected": -2.7747175693511963, |
|
"logps/chosen": -323.83184814453125, |
|
"logps/rejected": -294.0677795410156, |
|
"loss": 0.7505, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": -0.0377802811563015, |
|
"rewards/margins": 0.21989765763282776, |
|
"rewards/rejected": -0.25767797231674194, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.807062862684873e-07, |
|
"logits/chosen": -2.8058762550354004, |
|
"logits/rejected": -2.7266106605529785, |
|
"logps/chosen": -286.4516906738281, |
|
"logps/rejected": -269.9130554199219, |
|
"loss": 0.7046, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": -0.2667284309864044, |
|
"rewards/margins": 0.10346898436546326, |
|
"rewards/rejected": -0.3701974153518677, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.792711016345321e-07, |
|
"logits/chosen": -2.8397815227508545, |
|
"logits/rejected": -2.845283031463623, |
|
"logps/chosen": -359.9697570800781, |
|
"logps/rejected": -326.89508056640625, |
|
"loss": 0.7683, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.029000351205468178, |
|
"rewards/margins": 0.4069358706474304, |
|
"rewards/rejected": -0.43593621253967285, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.777867372064105e-07, |
|
"logits/chosen": -2.812641143798828, |
|
"logits/rejected": -2.7523622512817383, |
|
"logps/chosen": -313.5459289550781, |
|
"logps/rejected": -299.8595275878906, |
|
"loss": 0.7817, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.13160283863544464, |
|
"rewards/margins": 0.31865066289901733, |
|
"rewards/rejected": -0.4502534866333008, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.7625351138769166e-07, |
|
"logits/chosen": -2.7898976802825928, |
|
"logits/rejected": -2.7862889766693115, |
|
"logps/chosen": -327.0060119628906, |
|
"logps/rejected": -322.96783447265625, |
|
"loss": 0.7708, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -0.08684093505144119, |
|
"rewards/margins": 0.4070696532726288, |
|
"rewards/rejected": -0.49391061067581177, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.7467175306295647e-07, |
|
"logits/chosen": -2.6663591861724854, |
|
"logits/rejected": -2.5961318016052246, |
|
"logps/chosen": -261.4271240234375, |
|
"logps/rejected": -309.5064697265625, |
|
"loss": 0.7365, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.0995909571647644, |
|
"rewards/margins": 0.18599346280097961, |
|
"rewards/rejected": -0.2855844497680664, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.7304180152725024e-07, |
|
"logits/chosen": -2.7205381393432617, |
|
"logits/rejected": -2.632664680480957, |
|
"logps/chosen": -324.07098388671875, |
|
"logps/rejected": -303.51422119140625, |
|
"loss": 0.6995, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.09924812614917755, |
|
"rewards/margins": 0.3877854347229004, |
|
"rewards/rejected": -0.48703351616859436, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.7136400641330245e-07, |
|
"logits/chosen": -2.7004384994506836, |
|
"logits/rejected": -2.6967031955718994, |
|
"logps/chosen": -273.4053039550781, |
|
"logps/rejected": -285.5749206542969, |
|
"loss": 0.6863, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -0.04174273461103439, |
|
"rewards/margins": 0.429397314786911, |
|
"rewards/rejected": -0.471140056848526, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.6963872761652834e-07, |
|
"logits/chosen": -2.7354209423065186, |
|
"logits/rejected": -2.6918838024139404, |
|
"logps/chosen": -313.76708984375, |
|
"logps/rejected": -291.7537536621094, |
|
"loss": 0.7426, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": -0.0618835911154747, |
|
"rewards/margins": 0.23076358437538147, |
|
"rewards/rejected": -0.29264718294143677, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.6786633521783005e-07, |
|
"logits/chosen": -2.6053059101104736, |
|
"logits/rejected": -2.5342700481414795, |
|
"logps/chosen": -244.05995178222656, |
|
"logps/rejected": -259.55023193359375, |
|
"loss": 0.7067, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.07344765961170197, |
|
"rewards/margins": 0.4161744713783264, |
|
"rewards/rejected": -0.34272682666778564, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.6604720940421207e-07, |
|
"logits/chosen": -2.674229145050049, |
|
"logits/rejected": -2.6460635662078857, |
|
"logps/chosen": -293.4000244140625, |
|
"logps/rejected": -237.8984375, |
|
"loss": 0.6553, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.08343453705310822, |
|
"rewards/margins": 0.23878370225429535, |
|
"rewards/rejected": -0.32221823930740356, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.6418174038722924e-07, |
|
"logits/chosen": -2.7431387901306152, |
|
"logits/rejected": -2.6589853763580322, |
|
"logps/chosen": -258.30517578125, |
|
"logps/rejected": -337.9000244140625, |
|
"loss": 0.7977, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": -0.044009700417518616, |
|
"rewards/margins": 0.3760453462600708, |
|
"rewards/rejected": -0.42005497217178345, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.6227032831928483e-07, |
|
"logits/chosen": -2.644824981689453, |
|
"logits/rejected": -2.6608684062957764, |
|
"logps/chosen": -300.009033203125, |
|
"logps/rejected": -324.9970397949219, |
|
"loss": 0.6938, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": -0.11674674600362778, |
|
"rewards/margins": 0.20092640817165375, |
|
"rewards/rejected": -0.31767311692237854, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.603133832077953e-07, |
|
"logits/chosen": -2.586251974105835, |
|
"logits/rejected": -2.579716920852661, |
|
"logps/chosen": -283.69183349609375, |
|
"logps/rejected": -308.4037780761719, |
|
"loss": 0.6959, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": -0.10065947473049164, |
|
"rewards/margins": 0.31346243619918823, |
|
"rewards/rejected": -0.4141218662261963, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.5831132482724193e-07, |
|
"logits/chosen": -2.683396577835083, |
|
"logits/rejected": -2.652515172958374, |
|
"logps/chosen": -294.0975646972656, |
|
"logps/rejected": -284.01007080078125, |
|
"loss": 0.6737, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -0.09628719091415405, |
|
"rewards/margins": 0.3271844983100891, |
|
"rewards/rejected": -0.4234716594219208, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.5626458262912735e-07, |
|
"logits/chosen": -2.662360191345215, |
|
"logits/rejected": -2.656447410583496, |
|
"logps/chosen": -292.4847412109375, |
|
"logps/rejected": -313.1526184082031, |
|
"loss": 0.7135, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.13693922758102417, |
|
"rewards/margins": 0.4902416467666626, |
|
"rewards/rejected": -0.6271809339523315, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.541735956498554e-07, |
|
"logits/chosen": -2.7437210083007812, |
|
"logits/rejected": -2.72233247756958, |
|
"logps/chosen": -291.6712646484375, |
|
"logps/rejected": -298.3407287597656, |
|
"loss": 0.6636, |
|
"rewards/accuracies": 0.828125, |
|
"rewards/chosen": 0.008928142488002777, |
|
"rewards/margins": 0.6095040440559387, |
|
"rewards/rejected": -0.6005759239196777, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.520388124165564e-07, |
|
"logits/chosen": -2.68888521194458, |
|
"logits/rejected": -2.673058271408081, |
|
"logps/chosen": -303.44732666015625, |
|
"logps/rejected": -304.8455810546875, |
|
"loss": 0.7579, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -0.08947059512138367, |
|
"rewards/margins": 0.37194037437438965, |
|
"rewards/rejected": -0.4614109992980957, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.498606908508753e-07, |
|
"logits/chosen": -2.6336781978607178, |
|
"logits/rejected": -2.641366958618164, |
|
"logps/chosen": -310.1975402832031, |
|
"logps/rejected": -321.4742736816406, |
|
"loss": 0.7511, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -0.1359916627407074, |
|
"rewards/margins": 0.3668138384819031, |
|
"rewards/rejected": -0.5028054714202881, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.476396981707453e-07, |
|
"logits/chosen": -2.741149425506592, |
|
"logits/rejected": -2.744832992553711, |
|
"logps/chosen": -283.7612609863281, |
|
"logps/rejected": -319.18011474609375, |
|
"loss": 0.7273, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.14727121591567993, |
|
"rewards/margins": 0.3416295051574707, |
|
"rewards/rejected": -0.48890072107315063, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.453763107901675e-07, |
|
"logits/chosen": -2.769141674041748, |
|
"logits/rejected": -2.6976804733276367, |
|
"logps/chosen": -343.0176086425781, |
|
"logps/rejected": -339.2708740234375, |
|
"loss": 0.684, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.13109339773654938, |
|
"rewards/margins": 0.3063846528530121, |
|
"rewards/rejected": -0.43747806549072266, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.4307101421701755e-07, |
|
"logits/chosen": -2.628458023071289, |
|
"logits/rejected": -2.610029458999634, |
|
"logps/chosen": -299.93060302734375, |
|
"logps/rejected": -290.9799499511719, |
|
"loss": 0.6642, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.17024263739585876, |
|
"rewards/margins": 0.28561675548553467, |
|
"rewards/rejected": -0.45585939288139343, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.4072430294890166e-07, |
|
"logits/chosen": -2.5924410820007324, |
|
"logits/rejected": -2.5785272121429443, |
|
"logps/chosen": -262.089599609375, |
|
"logps/rejected": -269.3330383300781, |
|
"loss": 0.6441, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": 0.034515030682086945, |
|
"rewards/margins": 0.3341864347457886, |
|
"rewards/rejected": -0.2996714115142822, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.3833668036708483e-07, |
|
"logits/chosen": -2.625112771987915, |
|
"logits/rejected": -2.6584105491638184, |
|
"logps/chosen": -306.502685546875, |
|
"logps/rejected": -345.8301086425781, |
|
"loss": 0.6941, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": -0.10175588726997375, |
|
"rewards/margins": 0.30893003940582275, |
|
"rewards/rejected": -0.4106859564781189, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.3590865862851263e-07, |
|
"logits/chosen": -2.617330551147461, |
|
"logits/rejected": -2.5643858909606934, |
|
"logps/chosen": -298.26141357421875, |
|
"logps/rejected": -267.2621154785156, |
|
"loss": 0.6176, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.04226139560341835, |
|
"rewards/margins": 0.45013248920440674, |
|
"rewards/rejected": -0.49239394068717957, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.3344075855595097e-07, |
|
"logits/chosen": -2.4896128177642822, |
|
"logits/rejected": -2.4251513481140137, |
|
"logps/chosen": -294.1082763671875, |
|
"logps/rejected": -305.47332763671875, |
|
"loss": 0.6897, |
|
"rewards/accuracies": 0.828125, |
|
"rewards/chosen": -0.11761671304702759, |
|
"rewards/margins": 0.43049168586730957, |
|
"rewards/rejected": -0.5481083989143372, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.309335095262675e-07, |
|
"logits/chosen": -2.3300864696502686, |
|
"logits/rejected": -2.2978248596191406, |
|
"logps/chosen": -284.1370849609375, |
|
"logps/rejected": -352.7837219238281, |
|
"loss": 0.695, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": -0.26125186681747437, |
|
"rewards/margins": 0.5214322209358215, |
|
"rewards/rejected": -0.7826840877532959, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.2838744935687716e-07, |
|
"logits/chosen": -2.383730888366699, |
|
"logits/rejected": -2.2686848640441895, |
|
"logps/chosen": -285.3652038574219, |
|
"logps/rejected": -313.19207763671875, |
|
"loss": 0.6355, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -0.1003919169306755, |
|
"rewards/margins": 0.49685102701187134, |
|
"rewards/rejected": -0.5972429513931274, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.258031241903777e-07, |
|
"logits/chosen": -2.2691562175750732, |
|
"logits/rejected": -2.1100411415100098, |
|
"logps/chosen": -295.82708740234375, |
|
"logps/rejected": -299.9715881347656, |
|
"loss": 0.6957, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.07401493936777115, |
|
"rewards/margins": 0.35916173458099365, |
|
"rewards/rejected": -0.4331766366958618, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.2318108837739986e-07, |
|
"logits/chosen": -2.101970672607422, |
|
"logits/rejected": -2.052926540374756, |
|
"logps/chosen": -275.3291931152344, |
|
"logps/rejected": -346.45135498046875, |
|
"loss": 0.7169, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.17267772555351257, |
|
"rewards/margins": 0.42162394523620605, |
|
"rewards/rejected": -0.594301700592041, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.2052190435769554e-07, |
|
"logits/chosen": -2.161992073059082, |
|
"logits/rejected": -2.0577569007873535, |
|
"logps/chosen": -329.9812316894531, |
|
"logps/rejected": -323.7291259765625, |
|
"loss": 0.6115, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -0.0013326676562428474, |
|
"rewards/margins": 0.4901263415813446, |
|
"rewards/rejected": -0.4914590120315552, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.1782614253949255e-07, |
|
"logits/chosen": -1.8093454837799072, |
|
"logits/rejected": -1.6997994184494019, |
|
"logps/chosen": -252.4735565185547, |
|
"logps/rejected": -264.23297119140625, |
|
"loss": 0.7009, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.2441805750131607, |
|
"rewards/margins": 0.25481969118118286, |
|
"rewards/rejected": -0.49900031089782715, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.1509438117713863e-07, |
|
"logits/chosen": -1.5648189783096313, |
|
"logits/rejected": -1.543358325958252, |
|
"logps/chosen": -320.0599060058594, |
|
"logps/rejected": -306.7493896484375, |
|
"loss": 0.6848, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": 0.039286356419324875, |
|
"rewards/margins": 0.518799901008606, |
|
"rewards/rejected": -0.47951358556747437, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.123272062470633e-07, |
|
"logits/chosen": -1.3095282316207886, |
|
"logits/rejected": -1.038232684135437, |
|
"logps/chosen": -291.6402282714844, |
|
"logps/rejected": -298.0380554199219, |
|
"loss": 0.6891, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -0.02619929611682892, |
|
"rewards/margins": 0.43957069516181946, |
|
"rewards/rejected": -0.4657699763774872, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.0952521132208267e-07, |
|
"logits/chosen": -0.8358496427536011, |
|
"logits/rejected": -0.8311337828636169, |
|
"logps/chosen": -288.705078125, |
|
"logps/rejected": -253.3335723876953, |
|
"loss": 0.6821, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.1505282074213028, |
|
"rewards/margins": 0.26645365357398987, |
|
"rewards/rejected": -0.41698184609413147, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.0668899744407567e-07, |
|
"logits/chosen": -0.6292511820793152, |
|
"logits/rejected": 0.11955951899290085, |
|
"logps/chosen": -217.69808959960938, |
|
"logps/rejected": -266.32525634765625, |
|
"loss": 0.7194, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.19091403484344482, |
|
"rewards/margins": 0.39874905347824097, |
|
"rewards/rejected": -0.589663028717041, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.0381917299505686e-07, |
|
"logits/chosen": -0.4040292203426361, |
|
"logits/rejected": -0.05061528459191322, |
|
"logps/chosen": -281.4028015136719, |
|
"logps/rejected": -348.06854248046875, |
|
"loss": 0.6614, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -0.022851143032312393, |
|
"rewards/margins": 0.5195820331573486, |
|
"rewards/rejected": -0.5424332022666931, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.00916353566676e-07, |
|
"logits/chosen": 0.4304525554180145, |
|
"logits/rejected": 0.37021252512931824, |
|
"logps/chosen": -268.30401611328125, |
|
"logps/rejected": -340.36376953125, |
|
"loss": 0.6454, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -0.2065473347902298, |
|
"rewards/margins": 0.4695810079574585, |
|
"rewards/rejected": -0.6761283874511719, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.979811618281705e-07, |
|
"logits/chosen": 0.9382424354553223, |
|
"logits/rejected": -0.011063352227210999, |
|
"logps/chosen": -303.7531433105469, |
|
"logps/rejected": -319.4146423339844, |
|
"loss": 0.7602, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.31537631154060364, |
|
"rewards/margins": 0.3811299502849579, |
|
"rewards/rejected": -0.6965062618255615, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.9501422739279953e-07, |
|
"logits/chosen": 0.4460795521736145, |
|
"logits/rejected": 0.570868968963623, |
|
"logps/chosen": -307.8954772949219, |
|
"logps/rejected": -339.8791198730469, |
|
"loss": 0.6882, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.10756506770849228, |
|
"rewards/margins": 0.5310941934585571, |
|
"rewards/rejected": -0.6386592388153076, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.920161866827889e-07, |
|
"logits/chosen": 0.07825072109699249, |
|
"logits/rejected": 0.6348622441291809, |
|
"logps/chosen": -309.1721496582031, |
|
"logps/rejected": -288.8408203125, |
|
"loss": 0.6567, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.1812726855278015, |
|
"rewards/margins": 0.5873675346374512, |
|
"rewards/rejected": -0.7686402797698975, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.889876827928156e-07, |
|
"logits/chosen": 0.37462693452835083, |
|
"logits/rejected": 0.3019832670688629, |
|
"logps/chosen": -253.56881713867188, |
|
"logps/rejected": -316.3982238769531, |
|
"loss": 0.5877, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.04077187180519104, |
|
"rewards/margins": 0.5590379238128662, |
|
"rewards/rejected": -0.5182660222053528, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.859293653520604e-07, |
|
"logits/chosen": 0.6516929268836975, |
|
"logits/rejected": 0.2800888419151306, |
|
"logps/chosen": -276.3404541015625, |
|
"logps/rejected": -317.9354248046875, |
|
"loss": 0.7184, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -0.1999325156211853, |
|
"rewards/margins": 0.5805739164352417, |
|
"rewards/rejected": -0.780506432056427, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.828418903848593e-07, |
|
"logits/chosen": -0.025208771228790283, |
|
"logits/rejected": 0.8900982737541199, |
|
"logps/chosen": -265.7961120605469, |
|
"logps/rejected": -337.1889343261719, |
|
"loss": 0.6888, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.3380471467971802, |
|
"rewards/margins": 0.5418481826782227, |
|
"rewards/rejected": -0.8798953294754028, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.797259201699833e-07, |
|
"logits/chosen": 0.05765300244092941, |
|
"logits/rejected": 0.576537013053894, |
|
"logps/chosen": -331.6053466796875, |
|
"logps/rejected": -351.186767578125, |
|
"loss": 0.6485, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.3514668941497803, |
|
"rewards/margins": 0.43950024247169495, |
|
"rewards/rejected": -0.7909672260284424, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.765821230985757e-07, |
|
"logits/chosen": 0.3566046953201294, |
|
"logits/rejected": 0.24242231249809265, |
|
"logps/chosen": -333.481689453125, |
|
"logps/rejected": -358.510986328125, |
|
"loss": 0.6358, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -0.3405124843120575, |
|
"rewards/margins": 0.591222882270813, |
|
"rewards/rejected": -0.9317353963851929, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.734111735307796e-07, |
|
"logits/chosen": 0.5186017155647278, |
|
"logits/rejected": 0.6986968517303467, |
|
"logps/chosen": -291.35626220703125, |
|
"logps/rejected": -325.29412841796875, |
|
"loss": 0.6586, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -0.3297085464000702, |
|
"rewards/margins": 0.641376256942749, |
|
"rewards/rejected": -0.9710848331451416, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.7021375165108377e-07, |
|
"logits/chosen": 0.40191513299942017, |
|
"logits/rejected": 0.24818360805511475, |
|
"logps/chosen": -289.5572509765625, |
|
"logps/rejected": -323.7060241699219, |
|
"loss": 0.6506, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -0.2648981511592865, |
|
"rewards/margins": 0.49969348311424255, |
|
"rewards/rejected": -0.7645915746688843, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.6699054332241985e-07, |
|
"logits/chosen": -0.4896765351295471, |
|
"logits/rejected": -0.1318652182817459, |
|
"logps/chosen": -350.1439208984375, |
|
"logps/rejected": -288.9870910644531, |
|
"loss": 0.7068, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.22379332780838013, |
|
"rewards/margins": 0.44880181550979614, |
|
"rewards/rejected": -0.6725951433181763, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.6374223993904124e-07, |
|
"logits/chosen": -0.05389963835477829, |
|
"logits/rejected": 0.3870914578437805, |
|
"logps/chosen": -330.797119140625, |
|
"logps/rejected": -357.25347900390625, |
|
"loss": 0.6192, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.20667432248592377, |
|
"rewards/margins": 0.6827113032341003, |
|
"rewards/rejected": -0.8893855810165405, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.604695382782159e-07, |
|
"logits/chosen": -0.1867658495903015, |
|
"logits/rejected": 0.04192525893449783, |
|
"logps/chosen": -306.168212890625, |
|
"logps/rejected": -340.8019104003906, |
|
"loss": 0.594, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.1515503227710724, |
|
"rewards/margins": 0.3953551650047302, |
|
"rewards/rejected": -0.546905517578125, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.571731403507635e-07, |
|
"logits/chosen": -0.3911605179309845, |
|
"logits/rejected": 0.24688732624053955, |
|
"logps/chosen": -315.493408203125, |
|
"logps/rejected": -362.08209228515625, |
|
"loss": 0.667, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.19705705344676971, |
|
"rewards/margins": 0.5905351638793945, |
|
"rewards/rejected": -0.7875921726226807, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.5385375325047163e-07, |
|
"logits/chosen": 0.018862135708332062, |
|
"logits/rejected": 0.042824022471904755, |
|
"logps/chosen": -346.4378662109375, |
|
"logps/rejected": -371.5933837890625, |
|
"loss": 0.7231, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": -0.16377538442611694, |
|
"rewards/margins": 0.5193297863006592, |
|
"rewards/rejected": -0.6831051707267761, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.505120890024195e-07, |
|
"logits/chosen": -0.5048182010650635, |
|
"logits/rejected": 0.798430323600769, |
|
"logps/chosen": -311.4440002441406, |
|
"logps/rejected": -332.12078857421875, |
|
"loss": 0.5901, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": -0.1354539841413498, |
|
"rewards/margins": 0.5432121753692627, |
|
"rewards/rejected": -0.6786662340164185, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.4714886441024573e-07, |
|
"logits/chosen": 0.18471813201904297, |
|
"logits/rejected": 0.5488951206207275, |
|
"logps/chosen": -312.63348388671875, |
|
"logps/rejected": -304.498291015625, |
|
"loss": 0.6626, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.16734659671783447, |
|
"rewards/margins": 0.5686672925949097, |
|
"rewards/rejected": -0.7360140085220337, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.4376480090239047e-07, |
|
"logits/chosen": 0.2744285464286804, |
|
"logits/rejected": 0.882749080657959, |
|
"logps/chosen": -260.8684997558594, |
|
"logps/rejected": -292.6208190917969, |
|
"loss": 0.7122, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": -0.3560115098953247, |
|
"rewards/margins": 0.36396852135658264, |
|
"rewards/rejected": -0.719980001449585, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.403606243773448e-07, |
|
"logits/chosen": 0.10863735526800156, |
|
"logits/rejected": 0.9924963116645813, |
|
"logps/chosen": -277.6872863769531, |
|
"logps/rejected": -296.84375, |
|
"loss": 0.5924, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -0.19410894811153412, |
|
"rewards/margins": 0.49725741147994995, |
|
"rewards/rejected": -0.6913663744926453, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.3693706504794243e-07, |
|
"logits/chosen": -0.0544244647026062, |
|
"logits/rejected": 0.6754695773124695, |
|
"logps/chosen": -276.8150634765625, |
|
"logps/rejected": -309.6656494140625, |
|
"loss": 0.6933, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -0.18632037937641144, |
|
"rewards/margins": 0.4902322292327881, |
|
"rewards/rejected": -0.6765525937080383, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.334948572847253e-07, |
|
"logits/chosen": 0.12526825070381165, |
|
"logits/rejected": 0.2164376825094223, |
|
"logps/chosen": -288.12420654296875, |
|
"logps/rejected": -301.5098876953125, |
|
"loss": 0.644, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.2477717101573944, |
|
"rewards/margins": 0.510245680809021, |
|
"rewards/rejected": -0.7580174207687378, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.300347394584172e-07, |
|
"logits/chosen": -0.517288863658905, |
|
"logits/rejected": -0.44046011567115784, |
|
"logps/chosen": -271.6811218261719, |
|
"logps/rejected": -325.8108215332031, |
|
"loss": 0.6557, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -0.2316058874130249, |
|
"rewards/margins": 0.4710268974304199, |
|
"rewards/rejected": -0.7026327848434448, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.265574537815398e-07, |
|
"logits/chosen": -0.6968811750411987, |
|
"logits/rejected": -0.789453387260437, |
|
"logps/chosen": -290.6319580078125, |
|
"logps/rejected": -339.7631530761719, |
|
"loss": 0.635, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.15982553362846375, |
|
"rewards/margins": 0.5405714511871338, |
|
"rewards/rejected": -0.7003970146179199, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.230637461492043e-07, |
|
"logits/chosen": -0.4320661425590515, |
|
"logits/rejected": -0.4025118052959442, |
|
"logps/chosen": -280.6953430175781, |
|
"logps/rejected": -280.9771728515625, |
|
"loss": 0.6636, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.15578195452690125, |
|
"rewards/margins": 0.5092009902000427, |
|
"rewards/rejected": -0.6649829745292664, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.1955436597911315e-07, |
|
"logits/chosen": -0.9900290369987488, |
|
"logits/rejected": -0.5774145126342773, |
|
"logps/chosen": -257.3419494628906, |
|
"logps/rejected": -318.7354431152344, |
|
"loss": 0.6765, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": 0.020992910489439964, |
|
"rewards/margins": 0.5579670071601868, |
|
"rewards/rejected": -0.5369741320610046, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.160300660508064e-07, |
|
"logits/chosen": -1.4757821559906006, |
|
"logits/rejected": -0.815751314163208, |
|
"logps/chosen": -307.86767578125, |
|
"logps/rejected": -318.9004821777344, |
|
"loss": 0.661, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -0.030079301446676254, |
|
"rewards/margins": 0.5298373103141785, |
|
"rewards/rejected": -0.5599166750907898, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.1249160234418644e-07, |
|
"logits/chosen": -0.9963082671165466, |
|
"logits/rejected": -0.20200899243354797, |
|
"logps/chosen": -275.8343200683594, |
|
"logps/rejected": -320.10943603515625, |
|
"loss": 0.6312, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.09299668669700623, |
|
"rewards/margins": 0.47309738397598267, |
|
"rewards/rejected": -0.5660940408706665, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.0893973387735683e-07, |
|
"logits/chosen": -0.29111677408218384, |
|
"logits/rejected": -0.016862310469150543, |
|
"logps/chosen": -243.76763916015625, |
|
"logps/rejected": -311.628662109375, |
|
"loss": 0.6769, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": -0.1675092577934265, |
|
"rewards/margins": 0.49733197689056396, |
|
"rewards/rejected": -0.6648411750793457, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3.05375222543809e-07, |
|
"logits/chosen": -0.8505387306213379, |
|
"logits/rejected": -0.23501801490783691, |
|
"logps/chosen": -294.7572021484375, |
|
"logps/rejected": -310.2570495605469, |
|
"loss": 0.7179, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.1568153351545334, |
|
"rewards/margins": 0.5180144906044006, |
|
"rewards/rejected": -0.6748298406600952, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3.017988329489923e-07, |
|
"logits/chosen": -0.2259301245212555, |
|
"logits/rejected": 0.060025911778211594, |
|
"logps/chosen": -325.3514099121094, |
|
"logps/rejected": -357.01263427734375, |
|
"loss": 0.7001, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -0.1160479336977005, |
|
"rewards/margins": 0.5354103446006775, |
|
"rewards/rejected": -0.6514582633972168, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.9821133224630223e-07, |
|
"logits/chosen": -0.24249279499053955, |
|
"logits/rejected": 0.2804643511772156, |
|
"logps/chosen": -306.503662109375, |
|
"logps/rejected": -328.2669372558594, |
|
"loss": 0.6416, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -0.3325122892856598, |
|
"rewards/margins": 0.5438024401664734, |
|
"rewards/rejected": -0.8763147592544556, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.946134899725226e-07, |
|
"logits/chosen": -0.21587029099464417, |
|
"logits/rejected": 0.22806911170482635, |
|
"logps/chosen": -272.1225891113281, |
|
"logps/rejected": -317.8914489746094, |
|
"loss": 0.7015, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -0.2932060956954956, |
|
"rewards/margins": 0.544987142086029, |
|
"rewards/rejected": -0.8381932377815247, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.910060778827554e-07, |
|
"logits/chosen": 0.44503313302993774, |
|
"logits/rejected": 0.7824755311012268, |
|
"logps/chosen": -242.06944274902344, |
|
"logps/rejected": -281.43572998046875, |
|
"loss": 0.6554, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.23596498370170593, |
|
"rewards/margins": 0.4384329915046692, |
|
"rewards/rejected": -0.6743979454040527, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.873898697848762e-07, |
|
"logits/chosen": 0.17147797346115112, |
|
"logits/rejected": 0.5953517556190491, |
|
"logps/chosen": -300.029296875, |
|
"logps/rejected": -331.022216796875, |
|
"loss": 0.6662, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -0.23151984810829163, |
|
"rewards/margins": 0.441323846578598, |
|
"rewards/rejected": -0.6728437542915344, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.837656413735479e-07, |
|
"logits/chosen": 0.24648509919643402, |
|
"logits/rejected": 0.7520670294761658, |
|
"logps/chosen": -287.30377197265625, |
|
"logps/rejected": -344.3001708984375, |
|
"loss": 0.6139, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.20726029574871063, |
|
"rewards/margins": 0.7070189118385315, |
|
"rewards/rejected": -0.9142792820930481, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.801341700638307e-07, |
|
"logits/chosen": 0.4327443242073059, |
|
"logits/rejected": 0.28958457708358765, |
|
"logps/chosen": -314.9183349609375, |
|
"logps/rejected": -296.54376220703125, |
|
"loss": 0.6514, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.24840864539146423, |
|
"rewards/margins": 0.4947468638420105, |
|
"rewards/rejected": -0.7431554794311523, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.7649623482442274e-07, |
|
"logits/chosen": 0.5047562718391418, |
|
"logits/rejected": 1.313357949256897, |
|
"logps/chosen": -284.20257568359375, |
|
"logps/rejected": -354.37872314453125, |
|
"loss": 0.6634, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -0.3625330328941345, |
|
"rewards/margins": 0.5466379523277283, |
|
"rewards/rejected": -0.9091709852218628, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.7285261601056697e-07, |
|
"logits/chosen": 0.19208696484565735, |
|
"logits/rejected": 0.3003866970539093, |
|
"logps/chosen": -306.0182189941406, |
|
"logps/rejected": -343.263671875, |
|
"loss": 0.6481, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.1929151564836502, |
|
"rewards/margins": 0.7904263138771057, |
|
"rewards/rejected": -0.9833414554595947, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.692040951966617e-07, |
|
"logits/chosen": 0.5424776077270508, |
|
"logits/rejected": 0.7004806995391846, |
|
"logps/chosen": -312.521728515625, |
|
"logps/rejected": -328.9450378417969, |
|
"loss": 0.5763, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -0.0869397521018982, |
|
"rewards/margins": 0.742694616317749, |
|
"rewards/rejected": -0.8296343088150024, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.655514550086086e-07, |
|
"logits/chosen": 0.3900845944881439, |
|
"logits/rejected": 1.1437915563583374, |
|
"logps/chosen": -270.412353515625, |
|
"logps/rejected": -285.83740234375, |
|
"loss": 0.6635, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 0.039971936494112015, |
|
"rewards/margins": 0.6791943907737732, |
|
"rewards/rejected": -0.6392224431037903, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.618954789559356e-07, |
|
"logits/chosen": -0.6006118059158325, |
|
"logits/rejected": 0.0894058495759964, |
|
"logps/chosen": -329.71051025390625, |
|
"logps/rejected": -360.0182800292969, |
|
"loss": 0.7222, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -0.2027689814567566, |
|
"rewards/margins": 0.3933987617492676, |
|
"rewards/rejected": -0.5961677432060242, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.582369512637302e-07, |
|
"logits/chosen": 0.022289007902145386, |
|
"logits/rejected": 0.7651575207710266, |
|
"logps/chosen": -239.86917114257812, |
|
"logps/rejected": -276.7130126953125, |
|
"loss": 0.6653, |
|
"rewards/accuracies": 0.828125, |
|
"rewards/chosen": 0.07738906890153885, |
|
"rewards/margins": 0.4737430810928345, |
|
"rewards/rejected": -0.3963540196418762, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.5457665670441937e-07, |
|
"logits/chosen": 0.08083158731460571, |
|
"logits/rejected": 0.5649360418319702, |
|
"logps/chosen": -276.38232421875, |
|
"logps/rejected": -330.9521789550781, |
|
"loss": 0.7062, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -0.10466089844703674, |
|
"rewards/margins": 0.6767792701721191, |
|
"rewards/rejected": -0.7814401984214783, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.509153804294318e-07, |
|
"logits/chosen": 0.3234708309173584, |
|
"logits/rejected": 0.7615450024604797, |
|
"logps/chosen": -269.09417724609375, |
|
"logps/rejected": -293.904052734375, |
|
"loss": 0.6256, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": -0.07213997095823288, |
|
"rewards/margins": 0.43272051215171814, |
|
"rewards/rejected": -0.504860520362854, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.4725390780077905e-07, |
|
"logits/chosen": 0.46226564049720764, |
|
"logits/rejected": 0.5075880885124207, |
|
"logps/chosen": -314.9215087890625, |
|
"logps/rejected": -332.1287536621094, |
|
"loss": 0.7012, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.053367726504802704, |
|
"rewards/margins": 0.6606994867324829, |
|
"rewards/rejected": -0.7140672206878662, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.435930242225919e-07, |
|
"logits/chosen": 0.2664536237716675, |
|
"logits/rejected": 1.190063714981079, |
|
"logps/chosen": -302.0753479003906, |
|
"logps/rejected": -365.6280822753906, |
|
"loss": 0.6695, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -0.07531341910362244, |
|
"rewards/margins": 0.6120296120643616, |
|
"rewards/rejected": -0.6873430609703064, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.399335149726463e-07, |
|
"logits/chosen": 0.32580170035362244, |
|
"logits/rejected": 0.5576289892196655, |
|
"logps/chosen": -325.13726806640625, |
|
"logps/rejected": -381.2005615234375, |
|
"loss": 0.6745, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -0.10618472844362259, |
|
"rewards/margins": 0.5702216625213623, |
|
"rewards/rejected": -0.6764063835144043, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.3627616503391812e-07, |
|
"logits/chosen": 0.29107674956321716, |
|
"logits/rejected": 0.5633612871170044, |
|
"logps/chosen": -277.61590576171875, |
|
"logps/rejected": -348.748291015625, |
|
"loss": 0.6212, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -0.12373210489749908, |
|
"rewards/margins": 0.578803300857544, |
|
"rewards/rejected": -0.7025353908538818, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.3262175892620062e-07, |
|
"logits/chosen": 0.6590377688407898, |
|
"logits/rejected": 0.6782903075218201, |
|
"logps/chosen": -301.8876647949219, |
|
"logps/rejected": -348.36968994140625, |
|
"loss": 0.732, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -0.06287754327058792, |
|
"rewards/margins": 0.7469431757926941, |
|
"rewards/rejected": -0.8098206520080566, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.2897108053782e-07, |
|
"logits/chosen": 0.5858335494995117, |
|
"logits/rejected": 1.1848359107971191, |
|
"logps/chosen": -278.7359619140625, |
|
"logps/rejected": -322.556884765625, |
|
"loss": 0.6291, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -0.1839427649974823, |
|
"rewards/margins": 0.5925223231315613, |
|
"rewards/rejected": -0.7764650583267212, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.2532491295748865e-07, |
|
"logits/chosen": 0.5746503472328186, |
|
"logits/rejected": 1.5114972591400146, |
|
"logps/chosen": -256.6669616699219, |
|
"logps/rejected": -291.76373291015625, |
|
"loss": 0.677, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.17329725623130798, |
|
"rewards/margins": 0.6033150553703308, |
|
"rewards/rejected": -0.7766122817993164, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.2168403830632769e-07, |
|
"logits/chosen": 1.2941956520080566, |
|
"logits/rejected": 1.145210862159729, |
|
"logps/chosen": -253.27915954589844, |
|
"logps/rejected": -313.1325378417969, |
|
"loss": 0.63, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.18560369312763214, |
|
"rewards/margins": 0.5705838203430176, |
|
"rewards/rejected": -0.7561875581741333, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.1804923757009882e-07, |
|
"logits/chosen": 0.9151768684387207, |
|
"logits/rejected": 1.2065914869308472, |
|
"logps/chosen": -335.712890625, |
|
"logps/rejected": -334.68817138671875, |
|
"loss": 0.5678, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.24476921558380127, |
|
"rewards/margins": 0.7057139873504639, |
|
"rewards/rejected": -0.9504832029342651, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.1442129043167873e-07, |
|
"logits/chosen": 0.8291666507720947, |
|
"logits/rejected": 1.7541477680206299, |
|
"logps/chosen": -330.841064453125, |
|
"logps/rejected": -347.6577453613281, |
|
"loss": 0.6978, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -0.29731371998786926, |
|
"rewards/margins": 0.6203598380088806, |
|
"rewards/rejected": -0.917673647403717, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.1080097510381294e-07, |
|
"logits/chosen": 0.8476366400718689, |
|
"logits/rejected": 1.6469848155975342, |
|
"logps/chosen": -290.6492004394531, |
|
"logps/rejected": -338.7218017578125, |
|
"loss": 0.7014, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -0.2652449905872345, |
|
"rewards/margins": 0.51957768201828, |
|
"rewards/rejected": -0.7848227024078369, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.0718906816218595e-07, |
|
"logits/chosen": 1.3053678274154663, |
|
"logits/rejected": 1.4397318363189697, |
|
"logps/chosen": -312.151123046875, |
|
"logps/rejected": -353.7760009765625, |
|
"loss": 0.5969, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.3702758550643921, |
|
"rewards/margins": 0.655584454536438, |
|
"rewards/rejected": -1.02586030960083, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.035863443788411e-07, |
|
"logits/chosen": 1.1411147117614746, |
|
"logits/rejected": 1.3497332334518433, |
|
"logps/chosen": -334.48223876953125, |
|
"logps/rejected": -358.02911376953125, |
|
"loss": 0.6894, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -0.24735981225967407, |
|
"rewards/margins": 0.7227092981338501, |
|
"rewards/rejected": -0.9700690507888794, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.9999357655598891e-07, |
|
"logits/chosen": 1.477621078491211, |
|
"logits/rejected": 2.1223461627960205, |
|
"logps/chosen": -277.4604187011719, |
|
"logps/rejected": -316.42803955078125, |
|
"loss": 0.6614, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.5241543650627136, |
|
"rewards/margins": 0.5040414333343506, |
|
"rewards/rejected": -1.0281956195831299, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.9641153536023642e-07, |
|
"logits/chosen": 0.708802342414856, |
|
"logits/rejected": 1.7905950546264648, |
|
"logps/chosen": -260.52166748046875, |
|
"logps/rejected": -309.6279602050781, |
|
"loss": 0.6517, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.22097410261631012, |
|
"rewards/margins": 0.6726060509681702, |
|
"rewards/rejected": -0.8935801982879639, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.928409891572757e-07, |
|
"logits/chosen": 0.6345776915550232, |
|
"logits/rejected": 1.2965819835662842, |
|
"logps/chosen": -296.1861572265625, |
|
"logps/rejected": -325.6953430175781, |
|
"loss": 0.6571, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.15726947784423828, |
|
"rewards/margins": 0.33030712604522705, |
|
"rewards/rejected": -0.48757660388946533, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.8928270384706582e-07, |
|
"logits/chosen": 0.06032524257898331, |
|
"logits/rejected": 0.7254040241241455, |
|
"logps/chosen": -320.2947998046875, |
|
"logps/rejected": -417.827392578125, |
|
"loss": 0.823, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.05728043615818024, |
|
"rewards/margins": 0.4396788477897644, |
|
"rewards/rejected": -0.49695926904678345, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.8573744269954297e-07, |
|
"logits/chosen": -0.6211626529693604, |
|
"logits/rejected": 0.5601530075073242, |
|
"logps/chosen": -387.068359375, |
|
"logps/rejected": -362.6905517578125, |
|
"loss": 0.7065, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": 0.007479578256607056, |
|
"rewards/margins": 0.5313453078269958, |
|
"rewards/rejected": -0.5238656997680664, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.8220596619089573e-07, |
|
"logits/chosen": 0.012296931818127632, |
|
"logits/rejected": 0.47643163800239563, |
|
"logps/chosen": -278.79449462890625, |
|
"logps/rejected": -358.36175537109375, |
|
"loss": 0.6023, |
|
"rewards/accuracies": 0.828125, |
|
"rewards/chosen": -0.02508646249771118, |
|
"rewards/margins": 0.529964029788971, |
|
"rewards/rejected": -0.5550505518913269, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.7868903184043885e-07, |
|
"logits/chosen": 0.22885966300964355, |
|
"logits/rejected": 0.6849693655967712, |
|
"logps/chosen": -245.32431030273438, |
|
"logps/rejected": -260.42095947265625, |
|
"loss": 0.6808, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.2312414050102234, |
|
"rewards/margins": 0.5006843209266663, |
|
"rewards/rejected": -0.7319257855415344, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.7518739404812155e-07, |
|
"logits/chosen": 0.10696160048246384, |
|
"logits/rejected": 0.900860071182251, |
|
"logps/chosen": -304.24365234375, |
|
"logps/rejected": -300.4913330078125, |
|
"loss": 0.6239, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": 0.06586351990699768, |
|
"rewards/margins": 0.6996278762817383, |
|
"rewards/rejected": -0.6337643265724182, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.717018039327053e-07, |
|
"logits/chosen": 0.37065258622169495, |
|
"logits/rejected": 1.0153319835662842, |
|
"logps/chosen": -274.7767639160156, |
|
"logps/rejected": -315.3498840332031, |
|
"loss": 0.6113, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -0.13434213399887085, |
|
"rewards/margins": 0.5673910975456238, |
|
"rewards/rejected": -0.7017332315444946, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.6823300917064458e-07, |
|
"logits/chosen": 0.0030569732189178467, |
|
"logits/rejected": 1.1464507579803467, |
|
"logps/chosen": -292.8100280761719, |
|
"logps/rejected": -329.35333251953125, |
|
"loss": 0.5978, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -0.3217441141605377, |
|
"rewards/margins": 0.6382726430892944, |
|
"rewards/rejected": -0.9600168466567993, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.647817538357072e-07, |
|
"logits/chosen": 0.3857354521751404, |
|
"logits/rejected": 1.0852301120758057, |
|
"logps/chosen": -329.72882080078125, |
|
"logps/rejected": -318.942138671875, |
|
"loss": 0.7355, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -0.1711738556623459, |
|
"rewards/margins": 0.6827853322029114, |
|
"rewards/rejected": -0.8539592027664185, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.6134877823936607e-07, |
|
"logits/chosen": 0.8364083766937256, |
|
"logits/rejected": 1.8597906827926636, |
|
"logps/chosen": -264.943359375, |
|
"logps/rejected": -348.2406921386719, |
|
"loss": 0.5214, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -0.09745172411203384, |
|
"rewards/margins": 1.012548565864563, |
|
"rewards/rejected": -1.1100002527236938, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.5793481877199943e-07, |
|
"logits/chosen": 0.5367469191551208, |
|
"logits/rejected": 1.0159227848052979, |
|
"logps/chosen": -320.8637390136719, |
|
"logps/rejected": -364.44586181640625, |
|
"loss": 0.6954, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.26059049367904663, |
|
"rewards/margins": 0.5368437170982361, |
|
"rewards/rejected": -0.7974342107772827, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.5454060774493065e-07, |
|
"logits/chosen": 1.5845757722854614, |
|
"logits/rejected": 0.7877135872840881, |
|
"logps/chosen": -269.8088684082031, |
|
"logps/rejected": -316.855712890625, |
|
"loss": 0.6368, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.14322423934936523, |
|
"rewards/margins": 0.6599938869476318, |
|
"rewards/rejected": -0.8032180070877075, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.5116687323334464e-07, |
|
"logits/chosen": 0.47069552540779114, |
|
"logits/rejected": 1.2589174509048462, |
|
"logps/chosen": -289.40130615234375, |
|
"logps/rejected": -347.61376953125, |
|
"loss": 0.6596, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.15590085089206696, |
|
"rewards/margins": 0.6152348518371582, |
|
"rewards/rejected": -0.771135687828064, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.478143389201113e-07, |
|
"logits/chosen": 0.5592608451843262, |
|
"logits/rejected": 1.0087881088256836, |
|
"logps/chosen": -281.7051696777344, |
|
"logps/rejected": -335.6897277832031, |
|
"loss": 0.5898, |
|
"rewards/accuracies": 0.859375, |
|
"rewards/chosen": 0.045632533729076385, |
|
"rewards/margins": 0.8765643835067749, |
|
"rewards/rejected": -0.8309319019317627, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.4448372394055246e-07, |
|
"logits/chosen": -0.018945127725601196, |
|
"logits/rejected": 0.802770733833313, |
|
"logps/chosen": -328.23980712890625, |
|
"logps/rejected": -333.0008239746094, |
|
"loss": 0.6108, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.1781616359949112, |
|
"rewards/margins": 0.6047887206077576, |
|
"rewards/rejected": -0.7829503417015076, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.4117574272818386e-07, |
|
"logits/chosen": -0.5540781021118164, |
|
"logits/rejected": 0.035438332706689835, |
|
"logps/chosen": -317.64404296875, |
|
"logps/rejected": -343.2810974121094, |
|
"loss": 0.6491, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.0927032008767128, |
|
"rewards/margins": 0.4850231111049652, |
|
"rewards/rejected": -0.577726423740387, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.3789110486146468e-07, |
|
"logits/chosen": 0.01489407941699028, |
|
"logits/rejected": 0.62424635887146, |
|
"logps/chosen": -293.1672668457031, |
|
"logps/rejected": -329.47479248046875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.16148582100868225, |
|
"rewards/margins": 0.5662094950675964, |
|
"rewards/rejected": -0.7276952862739563, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.3463051491159093e-07, |
|
"logits/chosen": 0.06474697589874268, |
|
"logits/rejected": -0.06373190879821777, |
|
"logps/chosen": -314.8403015136719, |
|
"logps/rejected": -331.58941650390625, |
|
"loss": 0.6393, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -0.08766806125640869, |
|
"rewards/margins": 0.6701959371566772, |
|
"rewards/rejected": -0.7578639984130859, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.3139467229135998e-07, |
|
"logits/chosen": -0.7029250860214233, |
|
"logits/rejected": -0.01946442574262619, |
|
"logps/chosen": -334.6876220703125, |
|
"logps/rejected": -347.177734375, |
|
"loss": 0.63, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -0.16892662644386292, |
|
"rewards/margins": 0.5833321809768677, |
|
"rewards/rejected": -0.752258837223053, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.281842711051438e-07, |
|
"logits/chosen": -0.23442219197750092, |
|
"logits/rejected": 0.7187111973762512, |
|
"logps/chosen": -295.4104309082031, |
|
"logps/rejected": -344.3884582519531, |
|
"loss": 0.5594, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.11131522059440613, |
|
"rewards/margins": 0.7158193588256836, |
|
"rewards/rejected": -0.8271346092224121, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.2500000000000005e-07, |
|
"logits/chosen": -0.23394931852817535, |
|
"logits/rejected": 0.5850412845611572, |
|
"logps/chosen": -316.489990234375, |
|
"logps/rejected": -380.6622009277344, |
|
"loss": 0.695, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.21047508716583252, |
|
"rewards/margins": 0.6145602464675903, |
|
"rewards/rejected": -0.8250353336334229, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.2184254201795363e-07, |
|
"logits/chosen": 0.016806162893772125, |
|
"logits/rejected": 0.5278490781784058, |
|
"logps/chosen": -292.6138916015625, |
|
"logps/rejected": -314.90130615234375, |
|
"loss": 0.6083, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -0.21455985307693481, |
|
"rewards/margins": 0.4614453911781311, |
|
"rewards/rejected": -0.6760051846504211, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.1871257444948096e-07, |
|
"logits/chosen": 0.26967114210128784, |
|
"logits/rejected": 0.47079068422317505, |
|
"logps/chosen": -297.1998291015625, |
|
"logps/rejected": -293.2596435546875, |
|
"loss": 0.744, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.2197016477584839, |
|
"rewards/margins": 0.3860716223716736, |
|
"rewards/rejected": -0.6057732105255127, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.1561076868822755e-07, |
|
"logits/chosen": 0.01600978896021843, |
|
"logits/rejected": 0.6568925380706787, |
|
"logps/chosen": -254.8614044189453, |
|
"logps/rejected": -302.18023681640625, |
|
"loss": 0.6576, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -0.19405773282051086, |
|
"rewards/margins": 0.5532448887825012, |
|
"rewards/rejected": -0.7473027110099792, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.125377900869913e-07, |
|
"logits/chosen": -0.40245479345321655, |
|
"logits/rejected": 0.1633588671684265, |
|
"logps/chosen": -301.2410888671875, |
|
"logps/rejected": -386.735107421875, |
|
"loss": 0.6539, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -0.09816699475049973, |
|
"rewards/margins": 0.6523302793502808, |
|
"rewards/rejected": -0.7504973411560059, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.09494297815e-07, |
|
"logits/chosen": -0.19187738001346588, |
|
"logits/rejected": 1.2669118642807007, |
|
"logps/chosen": -223.18153381347656, |
|
"logps/rejected": -265.8275146484375, |
|
"loss": 0.6018, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -0.05469350516796112, |
|
"rewards/margins": 0.8093068599700928, |
|
"rewards/rejected": -0.8640003800392151, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.0648094471651722e-07, |
|
"logits/chosen": 0.06107745319604874, |
|
"logits/rejected": 1.0187910795211792, |
|
"logps/chosen": -298.7318420410156, |
|
"logps/rejected": -298.97607421875, |
|
"loss": 0.615, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -0.29442766308784485, |
|
"rewards/margins": 0.5528799295425415, |
|
"rewards/rejected": -0.8473076820373535, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.0349837717080347e-07, |
|
"logits/chosen": 0.2968660891056061, |
|
"logits/rejected": 1.533956527709961, |
|
"logps/chosen": -308.34906005859375, |
|
"logps/rejected": -352.53582763671875, |
|
"loss": 0.7653, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -0.2456754446029663, |
|
"rewards/margins": 0.691182553768158, |
|
"rewards/rejected": -0.9368579983711243, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.0054723495346482e-07, |
|
"logits/chosen": 0.6172692775726318, |
|
"logits/rejected": 1.4260393381118774, |
|
"logps/chosen": -255.27175903320312, |
|
"logps/rejected": -352.9705505371094, |
|
"loss": 0.5945, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -0.2834804654121399, |
|
"rewards/margins": 0.7214651107788086, |
|
"rewards/rejected": -1.0049455165863037, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.76281510992176e-08, |
|
"logits/chosen": 0.1523429900407791, |
|
"logits/rejected": 1.0697251558303833, |
|
"logps/chosen": -341.0992736816406, |
|
"logps/rejected": -314.7637023925781, |
|
"loss": 0.5933, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -0.2510679364204407, |
|
"rewards/margins": 0.7420140504837036, |
|
"rewards/rejected": -0.9930820465087891, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.474175176609956e-08, |
|
"logits/chosen": -0.027563797309994698, |
|
"logits/rejected": 0.730286717414856, |
|
"logps/chosen": -306.8202209472656, |
|
"logps/rejected": -328.6500244140625, |
|
"loss": 0.6264, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.21240682899951935, |
|
"rewards/margins": 0.526252269744873, |
|
"rewards/rejected": -0.7386590838432312, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 9.18886561011557e-08, |
|
"logits/chosen": 0.010053783655166626, |
|
"logits/rejected": 0.9134060144424438, |
|
"logps/chosen": -343.5555419921875, |
|
"logps/rejected": -344.8569030761719, |
|
"loss": 0.5614, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.1430027186870575, |
|
"rewards/margins": 0.8304474949836731, |
|
"rewards/rejected": -0.9734501838684082, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 8.906947610762825e-08, |
|
"logits/chosen": 0.48395290970802307, |
|
"logits/rejected": 0.8562638759613037, |
|
"logps/chosen": -303.1999816894531, |
|
"logps/rejected": -394.3343505859375, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.22828936576843262, |
|
"rewards/margins": 0.8299635648727417, |
|
"rewards/rejected": -1.0582529306411743, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 8.628481651367875e-08, |
|
"logits/chosen": 0.9650541543960571, |
|
"logits/rejected": 1.530713438987732, |
|
"logps/chosen": -317.75604248046875, |
|
"logps/rejected": -357.28875732421875, |
|
"loss": 0.569, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -0.3810655474662781, |
|
"rewards/margins": 0.5778117775917053, |
|
"rewards/rejected": -0.9588773250579834, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 8.353527464267104e-08, |
|
"logits/chosen": 0.648531436920166, |
|
"logits/rejected": 0.5499491095542908, |
|
"logps/chosen": -314.2027282714844, |
|
"logps/rejected": -314.53021240234375, |
|
"loss": 0.6003, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.2263791263103485, |
|
"rewards/margins": 0.5237205624580383, |
|
"rewards/rejected": -0.7500997185707092, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 8.082144028504231e-08, |
|
"logits/chosen": 0.57253497838974, |
|
"logits/rejected": 1.7951874732971191, |
|
"logps/chosen": -329.2058410644531, |
|
"logps/rejected": -354.29132080078125, |
|
"loss": 0.6177, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.18844135105609894, |
|
"rewards/margins": 0.7768537402153015, |
|
"rewards/rejected": -0.9652950763702393, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.814389557179016e-08, |
|
"logits/chosen": 0.21237725019454956, |
|
"logits/rejected": 1.0711424350738525, |
|
"logps/chosen": -355.3842468261719, |
|
"logps/rejected": -369.0294494628906, |
|
"loss": 0.6145, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.2611796259880066, |
|
"rewards/margins": 0.7826185822486877, |
|
"rewards/rejected": -1.0437982082366943, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.550321484960251e-08, |
|
"logits/chosen": 0.7930352687835693, |
|
"logits/rejected": 1.6684021949768066, |
|
"logps/chosen": -339.7545471191406, |
|
"logps/rejected": -346.7519226074219, |
|
"loss": 0.6387, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -0.27698060870170593, |
|
"rewards/margins": 0.8290520906448364, |
|
"rewards/rejected": -1.1060327291488647, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.289996455765748e-08, |
|
"logits/chosen": 0.9145855903625488, |
|
"logits/rejected": 1.413263201713562, |
|
"logps/chosen": -331.789794921875, |
|
"logps/rejected": -378.784912109375, |
|
"loss": 0.7024, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -0.29554906487464905, |
|
"rewards/margins": 0.6969149708747864, |
|
"rewards/rejected": -0.9924640655517578, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 7.033470310611945e-08, |
|
"logits/chosen": 0.3435116112232208, |
|
"logits/rejected": 0.8178513050079346, |
|
"logps/chosen": -300.52178955078125, |
|
"logps/rejected": -318.1594543457031, |
|
"loss": 0.5883, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": -0.3312419652938843, |
|
"rewards/margins": 0.560550332069397, |
|
"rewards/rejected": -0.8917922973632812, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 6.780798075635675e-08, |
|
"logits/chosen": 0.1530955582857132, |
|
"logits/rejected": 1.2445425987243652, |
|
"logps/chosen": -338.9671936035156, |
|
"logps/rejected": -394.33905029296875, |
|
"loss": 0.6703, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.10936611145734787, |
|
"rewards/margins": 0.7992250919342041, |
|
"rewards/rejected": -0.9085911512374878, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.532033950290885e-08, |
|
"logits/chosen": 0.3828258514404297, |
|
"logits/rejected": -0.013105906546115875, |
|
"logps/chosen": -247.27374267578125, |
|
"logps/rejected": -330.0480041503906, |
|
"loss": 0.7178, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -0.16955241560935974, |
|
"rewards/margins": 0.5796515941619873, |
|
"rewards/rejected": -0.7492039799690247, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.28723129572247e-08, |
|
"logits/chosen": -0.008059903979301453, |
|
"logits/rejected": 0.4361591339111328, |
|
"logps/chosen": -293.2403259277344, |
|
"logps/rejected": -323.6603698730469, |
|
"loss": 0.6125, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.1709737777709961, |
|
"rewards/margins": 0.5192134380340576, |
|
"rewards/rejected": -0.6901871562004089, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 6.046442623320145e-08, |
|
"logits/chosen": 0.3876532316207886, |
|
"logits/rejected": 0.8840723037719727, |
|
"logps/chosen": -317.1882019042969, |
|
"logps/rejected": -368.5491943359375, |
|
"loss": 0.5847, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -0.15435317158699036, |
|
"rewards/margins": 0.7234021425247192, |
|
"rewards/rejected": -0.8777552843093872, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5.809719583454414e-08, |
|
"logits/chosen": 0.1544257402420044, |
|
"logits/rejected": 0.5524401664733887, |
|
"logps/chosen": -320.5746154785156, |
|
"logps/rejected": -355.8383483886719, |
|
"loss": 0.6153, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": -0.30984005331993103, |
|
"rewards/margins": 0.5343888998031616, |
|
"rewards/rejected": -0.844228982925415, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5.57711295439732e-08, |
|
"logits/chosen": -0.23453757166862488, |
|
"logits/rejected": 0.108016237616539, |
|
"logps/chosen": -317.7830810546875, |
|
"logps/rejected": -392.8485107421875, |
|
"loss": 0.7964, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.26463451981544495, |
|
"rewards/margins": 0.3382280468940735, |
|
"rewards/rejected": -0.6028625965118408, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.3486726314303175e-08, |
|
"logits/chosen": -0.37305283546447754, |
|
"logits/rejected": 0.704928994178772, |
|
"logps/chosen": -298.06561279296875, |
|
"logps/rejected": -325.4785461425781, |
|
"loss": 0.6078, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.00762226153165102, |
|
"rewards/margins": 0.6488328576087952, |
|
"rewards/rejected": -0.6564551591873169, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.1244476161413806e-08, |
|
"logits/chosen": 0.41815757751464844, |
|
"logits/rejected": 1.4270755052566528, |
|
"logps/chosen": -233.82427978515625, |
|
"logps/rejected": -301.65826416015625, |
|
"loss": 0.6295, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -0.19570133090019226, |
|
"rewards/margins": 0.6876392960548401, |
|
"rewards/rejected": -0.8833405375480652, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.904486005914027e-08, |
|
"logits/chosen": -0.3861064612865448, |
|
"logits/rejected": 0.04176158830523491, |
|
"logps/chosen": -294.285400390625, |
|
"logps/rejected": -333.8177185058594, |
|
"loss": 0.6621, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.11257711052894592, |
|
"rewards/margins": 0.5992448329925537, |
|
"rewards/rejected": -0.7118219137191772, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.688834983610082e-08, |
|
"logits/chosen": 0.12746843695640564, |
|
"logits/rejected": -0.0012746751308441162, |
|
"logps/chosen": -273.5867919921875, |
|
"logps/rejected": -360.38873291015625, |
|
"loss": 0.6068, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.17792265117168427, |
|
"rewards/margins": 0.6180278658866882, |
|
"rewards/rejected": -0.7959505915641785, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.477540807448832e-08, |
|
"logits/chosen": -0.5536242127418518, |
|
"logits/rejected": 0.9800190925598145, |
|
"logps/chosen": -342.70782470703125, |
|
"logps/rejected": -373.4602355957031, |
|
"loss": 0.6496, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -0.16559717059135437, |
|
"rewards/margins": 0.689116358757019, |
|
"rewards/rejected": -0.8547135591506958, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.270648801084295e-08, |
|
"logits/chosen": 0.006685473024845123, |
|
"logits/rejected": 0.7440766096115112, |
|
"logps/chosen": -366.8838195800781, |
|
"logps/rejected": -356.4771728515625, |
|
"loss": 0.628, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.31455904245376587, |
|
"rewards/margins": 0.559075653553009, |
|
"rewards/rejected": -0.8736346364021301, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.0682033438831584e-08, |
|
"logits/chosen": -0.7373438477516174, |
|
"logits/rejected": 0.6611166000366211, |
|
"logps/chosen": -362.8245849609375, |
|
"logps/rejected": -376.4985046386719, |
|
"loss": 0.603, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -0.004348123446106911, |
|
"rewards/margins": 0.7439255714416504, |
|
"rewards/rejected": -0.7482736706733704, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.8702478614051345e-08, |
|
"logits/chosen": -0.5010265707969666, |
|
"logits/rejected": 0.7126686573028564, |
|
"logps/chosen": -333.6928405761719, |
|
"logps/rejected": -362.8348388671875, |
|
"loss": 0.5581, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.09581595659255981, |
|
"rewards/margins": 0.7684364914894104, |
|
"rewards/rejected": -0.8642523884773254, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.676824816087978e-08, |
|
"logits/chosen": 0.4850161671638489, |
|
"logits/rejected": 1.3615456819534302, |
|
"logps/chosen": -282.8792419433594, |
|
"logps/rejected": -272.2681579589844, |
|
"loss": 0.628, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.2777165174484253, |
|
"rewards/margins": 0.5231444835662842, |
|
"rewards/rejected": -0.8008609414100647, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.487975698139084e-08, |
|
"logits/chosen": 0.2012457251548767, |
|
"logits/rejected": 0.999991774559021, |
|
"logps/chosen": -308.87738037109375, |
|
"logps/rejected": -351.0428466796875, |
|
"loss": 0.6757, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.27788233757019043, |
|
"rewards/margins": 0.4390586316585541, |
|
"rewards/rejected": -0.7169409394264221, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.303741016635614e-08, |
|
"logits/chosen": 0.05412872135639191, |
|
"logits/rejected": 0.9229797124862671, |
|
"logps/chosen": -312.1148681640625, |
|
"logps/rejected": -305.1954650878906, |
|
"loss": 0.7495, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -0.04841495305299759, |
|
"rewards/margins": 0.6764097809791565, |
|
"rewards/rejected": -0.7248247861862183, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.12416029083514e-08, |
|
"logits/chosen": 0.9064224362373352, |
|
"logits/rejected": 0.9631800651550293, |
|
"logps/chosen": -251.423583984375, |
|
"logps/rejected": -331.3157958984375, |
|
"loss": 0.6625, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -0.3316621482372284, |
|
"rewards/margins": 0.5902231335639954, |
|
"rewards/rejected": -0.9218851923942566, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.9492720416985e-08, |
|
"logits/chosen": 0.768115222454071, |
|
"logits/rejected": 1.219833493232727, |
|
"logps/chosen": -290.3539733886719, |
|
"logps/rejected": -339.0583801269531, |
|
"loss": 0.6143, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.2172953188419342, |
|
"rewards/margins": 0.7515923380851746, |
|
"rewards/rejected": -0.9688878059387207, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.7791137836269158e-08, |
|
"logits/chosen": -0.08727936446666718, |
|
"logits/rejected": 1.5329773426055908, |
|
"logps/chosen": -255.40463256835938, |
|
"logps/rejected": -344.9212951660156, |
|
"loss": 0.6984, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.2787282168865204, |
|
"rewards/margins": 0.8177152276039124, |
|
"rewards/rejected": -1.0964434146881104, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.613722016414943e-08, |
|
"logits/chosen": 0.10900969803333282, |
|
"logits/rejected": 1.1554454565048218, |
|
"logps/chosen": -302.16650390625, |
|
"logps/rejected": -325.09332275390625, |
|
"loss": 0.6676, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -0.15435893833637238, |
|
"rewards/margins": 0.6768224835395813, |
|
"rewards/rejected": -0.8311813473701477, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.4531322174210973e-08, |
|
"logits/chosen": 0.2667168378829956, |
|
"logits/rejected": 0.06283846497535706, |
|
"logps/chosen": -270.20196533203125, |
|
"logps/rejected": -340.9342041015625, |
|
"loss": 0.6368, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -0.2986586093902588, |
|
"rewards/margins": 0.41698285937309265, |
|
"rewards/rejected": -0.7156413793563843, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.297378833957761e-08, |
|
"logits/chosen": 0.11617850512266159, |
|
"logits/rejected": 1.1254775524139404, |
|
"logps/chosen": -359.0841064453125, |
|
"logps/rejected": -370.67474365234375, |
|
"loss": 0.7271, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.12873072922229767, |
|
"rewards/margins": 0.7082916498184204, |
|
"rewards/rejected": -0.8370224237442017, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.1464952759020856e-08, |
|
"logits/chosen": 0.8191357851028442, |
|
"logits/rejected": 0.4740172028541565, |
|
"logps/chosen": -314.5917053222656, |
|
"logps/rejected": -338.27239990234375, |
|
"loss": 0.6364, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -0.10390536487102509, |
|
"rewards/margins": 0.45885345339775085, |
|
"rewards/rejected": -0.5627588033676147, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.0005139085293942e-08, |
|
"logits/chosen": -0.050498366355895996, |
|
"logits/rejected": 1.2883732318878174, |
|
"logps/chosen": -322.22113037109375, |
|
"logps/rejected": -332.8543701171875, |
|
"loss": 0.6629, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -0.1322225034236908, |
|
"rewards/margins": 0.6378248333930969, |
|
"rewards/rejected": -0.7700474262237549, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.8594660455706763e-08, |
|
"logits/chosen": 0.42545390129089355, |
|
"logits/rejected": 0.9098062515258789, |
|
"logps/chosen": -289.212646484375, |
|
"logps/rejected": -291.0245666503906, |
|
"loss": 0.6363, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.17995241284370422, |
|
"rewards/margins": 0.5931330323219299, |
|
"rewards/rejected": -0.7730855345726013, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.7233819424956247e-08, |
|
"logits/chosen": 0.331084281206131, |
|
"logits/rejected": 0.931097686290741, |
|
"logps/chosen": -289.7149353027344, |
|
"logps/rejected": -335.7164306640625, |
|
"loss": 0.5931, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": -0.14608880877494812, |
|
"rewards/margins": 0.6044370532035828, |
|
"rewards/rejected": -0.7505258321762085, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.5922907900227017e-08, |
|
"logits/chosen": -0.002596009522676468, |
|
"logits/rejected": 1.004823088645935, |
|
"logps/chosen": -328.34918212890625, |
|
"logps/rejected": -338.5020751953125, |
|
"loss": 0.6564, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -0.28392377495765686, |
|
"rewards/margins": 0.8925819993019104, |
|
"rewards/rejected": -1.1765056848526, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.4662207078575684e-08, |
|
"logits/chosen": 0.22775022685527802, |
|
"logits/rejected": 0.9491074681282043, |
|
"logps/chosen": -307.7417907714844, |
|
"logps/rejected": -377.443115234375, |
|
"loss": 0.6774, |
|
"rewards/accuracies": 0.828125, |
|
"rewards/chosen": -0.16252580285072327, |
|
"rewards/margins": 0.724989116191864, |
|
"rewards/rejected": -0.8875149488449097, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.345198738661285e-08, |
|
"logits/chosen": 0.4222959280014038, |
|
"logits/rejected": 1.0754032135009766, |
|
"logps/chosen": -275.5315856933594, |
|
"logps/rejected": -338.4194030761719, |
|
"loss": 0.607, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -0.19549164175987244, |
|
"rewards/margins": 0.6255277991294861, |
|
"rewards/rejected": -0.8210194110870361, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.2292508422495157e-08, |
|
"logits/chosen": 0.35431140661239624, |
|
"logits/rejected": 0.6342911124229431, |
|
"logps/chosen": -245.69967651367188, |
|
"logps/rejected": -307.9548034667969, |
|
"loss": 0.5931, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.08900564908981323, |
|
"rewards/margins": 0.7763758897781372, |
|
"rewards/rejected": -0.8653814792633057, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.118401890024001e-08, |
|
"logits/chosen": -0.6884260773658752, |
|
"logits/rejected": 0.5942091941833496, |
|
"logps/chosen": -309.41900634765625, |
|
"logps/rejected": -392.107421875, |
|
"loss": 0.8082, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.12636291980743408, |
|
"rewards/margins": 0.7045049667358398, |
|
"rewards/rejected": -0.8308678865432739, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.0126756596375685e-08, |
|
"logits/chosen": 0.28693687915802, |
|
"logits/rejected": 0.19982758164405823, |
|
"logps/chosen": -284.8650207519531, |
|
"logps/rejected": -322.2666015625, |
|
"loss": 0.7014, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -0.29686933755874634, |
|
"rewards/margins": 0.5921163558959961, |
|
"rewards/rejected": -0.8889857530593872, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 9.12094829893642e-09, |
|
"logits/chosen": -0.22270016372203827, |
|
"logits/rejected": 1.3697696924209595, |
|
"logps/chosen": -282.0764465332031, |
|
"logps/rejected": -308.25970458984375, |
|
"loss": 0.7014, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": -0.24073350429534912, |
|
"rewards/margins": 0.5527843236923218, |
|
"rewards/rejected": -0.7935178279876709, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 8.166809758815895e-09, |
|
"logits/chosen": -0.42256081104278564, |
|
"logits/rejected": 0.24574501812458038, |
|
"logps/chosen": -287.09100341796875, |
|
"logps/rejected": -344.0694580078125, |
|
"loss": 0.6457, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.2197842299938202, |
|
"rewards/margins": 0.44125646352767944, |
|
"rewards/rejected": -0.661040723323822, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 7.2645456434869965e-09, |
|
"logits/chosen": -0.25060874223709106, |
|
"logits/rejected": 0.9384374618530273, |
|
"logps/chosen": -278.2327880859375, |
|
"logps/rejected": -328.0905456542969, |
|
"loss": 0.6304, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -0.1905202716588974, |
|
"rewards/margins": 0.6406797766685486, |
|
"rewards/rejected": -0.8312000036239624, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 6.414349493100129e-09, |
|
"logits/chosen": 0.5399929881095886, |
|
"logits/rejected": 1.2095367908477783, |
|
"logps/chosen": -292.74786376953125, |
|
"logps/rejected": -348.2053527832031, |
|
"loss": 0.6079, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -0.0946485847234726, |
|
"rewards/margins": 0.6198921799659729, |
|
"rewards/rejected": -0.7145407795906067, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 5.616403678967624e-09, |
|
"logits/chosen": -0.07524357736110687, |
|
"logits/rejected": 1.0452533960342407, |
|
"logps/chosen": -356.8501281738281, |
|
"logps/rejected": -411.29931640625, |
|
"loss": 0.707, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": -0.12035229802131653, |
|
"rewards/margins": 0.5983991622924805, |
|
"rewards/rejected": -0.7187514901161194, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 4.8708793644441086e-09, |
|
"logits/chosen": -0.13995477557182312, |
|
"logits/rejected": 0.6635825634002686, |
|
"logps/chosen": -278.5004577636719, |
|
"logps/rejected": -310.28857421875, |
|
"loss": 0.6305, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.18752513825893402, |
|
"rewards/margins": 0.5632314085960388, |
|
"rewards/rejected": -0.750756561756134, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 4.1779364682113794e-09, |
|
"logits/chosen": 0.5096856951713562, |
|
"logits/rejected": 1.3543939590454102, |
|
"logps/chosen": -241.704833984375, |
|
"logps/rejected": -314.39495849609375, |
|
"loss": 0.6309, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.21836420893669128, |
|
"rewards/margins": 0.6935679912567139, |
|
"rewards/rejected": -0.911932110786438, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.5377236299748147e-09, |
|
"logits/chosen": 0.5073993802070618, |
|
"logits/rejected": 0.9775093197822571, |
|
"logps/chosen": -270.1071472167969, |
|
"logps/rejected": -317.7763977050781, |
|
"loss": 0.6822, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -0.27697494626045227, |
|
"rewards/margins": 0.5462560653686523, |
|
"rewards/rejected": -0.8232309818267822, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.9503781785795713e-09, |
|
"logits/chosen": -0.3081734776496887, |
|
"logits/rejected": 0.7983647584915161, |
|
"logps/chosen": -324.9632568359375, |
|
"logps/rejected": -354.402099609375, |
|
"loss": 0.5723, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.0758206844329834, |
|
"rewards/margins": 0.8021882772445679, |
|
"rewards/rejected": -0.878009021282196, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.416026102552732e-09, |
|
"logits/chosen": 0.3920813202857971, |
|
"logits/rejected": 1.4421265125274658, |
|
"logps/chosen": -302.9536437988281, |
|
"logps/rejected": -352.2631530761719, |
|
"loss": 0.6248, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -0.22983548045158386, |
|
"rewards/margins": 0.8996194005012512, |
|
"rewards/rejected": -1.1294549703598022, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.9347820230782295e-09, |
|
"logits/chosen": 0.027735590934753418, |
|
"logits/rejected": 0.36365261673927307, |
|
"logps/chosen": -318.6025390625, |
|
"logps/rejected": -359.25799560546875, |
|
"loss": 0.6865, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -0.2018052041530609, |
|
"rewards/margins": 0.5466940402984619, |
|
"rewards/rejected": -0.7484991550445557, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.5067491694100153e-09, |
|
"logits/chosen": -0.1223829835653305, |
|
"logits/rejected": 0.2881450355052948, |
|
"logps/chosen": -289.8230285644531, |
|
"logps/rejected": -360.09906005859375, |
|
"loss": 0.6747, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -0.1440747082233429, |
|
"rewards/margins": 0.8571825623512268, |
|
"rewards/rejected": -1.0012574195861816, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.1320193567288527e-09, |
|
"logits/chosen": 0.2805168032646179, |
|
"logits/rejected": 0.7300337553024292, |
|
"logps/chosen": -256.3475341796875, |
|
"logps/rejected": -297.0839538574219, |
|
"loss": 0.564, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.2623736560344696, |
|
"rewards/margins": 0.49820947647094727, |
|
"rewards/rejected": -0.7605831027030945, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 8.106729664475176e-10, |
|
"logits/chosen": -0.15276291966438293, |
|
"logits/rejected": 0.6538082957267761, |
|
"logps/chosen": -352.9091796875, |
|
"logps/rejected": -393.73797607421875, |
|
"loss": 0.6624, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.07146578282117844, |
|
"rewards/margins": 0.7627599239349365, |
|
"rewards/rejected": -0.8342257738113403, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 5.427789289685347e-10, |
|
"logits/chosen": 0.2313707172870636, |
|
"logits/rejected": 1.0570056438446045, |
|
"logps/chosen": -367.5287170410156, |
|
"logps/rejected": -360.8330078125, |
|
"loss": 0.6087, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.035427868366241455, |
|
"rewards/margins": 0.8285888433456421, |
|
"rewards/rejected": -0.8640167713165283, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.2839470889836627e-10, |
|
"logits/chosen": -0.49842333793640137, |
|
"logits/rejected": 0.5836061239242554, |
|
"logps/chosen": -309.240234375, |
|
"logps/rejected": -367.9534912109375, |
|
"loss": 0.5799, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -0.18272840976715088, |
|
"rewards/margins": 0.7705845236778259, |
|
"rewards/rejected": -0.9533129930496216, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.6756629272085544e-10, |
|
"logits/chosen": -0.1911655068397522, |
|
"logits/rejected": 0.9592544436454773, |
|
"logps/chosen": -297.9132385253906, |
|
"logps/rejected": -368.43060302734375, |
|
"loss": 0.6029, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -0.12253910303115845, |
|
"rewards/margins": 0.7276279926300049, |
|
"rewards/rejected": -0.8501670956611633, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 6.032817893297793e-11, |
|
"logits/chosen": -0.25915825366973877, |
|
"logits/rejected": 1.385116696357727, |
|
"logps/chosen": -289.7322998046875, |
|
"logps/rejected": -346.7291564941406, |
|
"loss": 0.685, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.21489854156970978, |
|
"rewards/margins": 0.8257070779800415, |
|
"rewards/rejected": -1.0406055450439453, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 6.7033706447061635e-12, |
|
"logits/chosen": 0.4867172837257385, |
|
"logits/rejected": 1.1571818590164185, |
|
"logps/chosen": -337.714599609375, |
|
"logps/rejected": -358.3683166503906, |
|
"loss": 0.6851, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -0.18006440997123718, |
|
"rewards/margins": 0.681747317314148, |
|
"rewards/rejected": -0.8618118166923523, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 477, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6847315603082285, |
|
"train_runtime": 17806.8146, |
|
"train_samples_per_second": 3.433, |
|
"train_steps_per_second": 0.027 |
|
} |
|
], |
|
"logging_steps": 2, |
|
"max_steps": 477, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 1000, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|