|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.0, |
|
"eval_steps": 100, |
|
"global_step": 2776, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0007204610951008645, |
|
"grad_norm": 21.825801849365234, |
|
"learning_rate": 1.7985611510791367e-10, |
|
"logits/chosen": -1.539827823638916, |
|
"logits/rejected": -1.5469944477081299, |
|
"logps/chosen": -40.41275405883789, |
|
"logps/rejected": -44.19762420654297, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.007204610951008645, |
|
"grad_norm": 23.4193058013916, |
|
"learning_rate": 1.7985611510791365e-09, |
|
"logits/chosen": -1.6871981620788574, |
|
"logits/rejected": -1.6693940162658691, |
|
"logps/chosen": -48.1815299987793, |
|
"logps/rejected": -51.31031799316406, |
|
"loss": 0.6939, |
|
"rewards/accuracies": 0.3819444477558136, |
|
"rewards/chosen": -0.0012067599454894662, |
|
"rewards/margins": -0.001382419839501381, |
|
"rewards/rejected": 0.00017565980670042336, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01440922190201729, |
|
"grad_norm": 26.470735549926758, |
|
"learning_rate": 3.597122302158273e-09, |
|
"logits/chosen": -1.7020677328109741, |
|
"logits/rejected": -1.6882435083389282, |
|
"logps/chosen": -50.412376403808594, |
|
"logps/rejected": -53.254310607910156, |
|
"loss": 0.6941, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": 0.00035694619873538613, |
|
"rewards/margins": -0.0017174886306747794, |
|
"rewards/rejected": 0.00207443512044847, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.021613832853025938, |
|
"grad_norm": 31.81087303161621, |
|
"learning_rate": 5.3956834532374095e-09, |
|
"logits/chosen": -1.6516170501708984, |
|
"logits/rejected": -1.6449276208877563, |
|
"logps/chosen": -54.51926803588867, |
|
"logps/rejected": -57.81389617919922, |
|
"loss": 0.694, |
|
"rewards/accuracies": 0.4312500059604645, |
|
"rewards/chosen": -0.0005903076380491257, |
|
"rewards/margins": -0.0014708719681948423, |
|
"rewards/rejected": 0.0008805643883533776, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.02881844380403458, |
|
"grad_norm": 24.485918045043945, |
|
"learning_rate": 7.194244604316546e-09, |
|
"logits/chosen": -1.7025425434112549, |
|
"logits/rejected": -1.6985228061676025, |
|
"logps/chosen": -49.09718704223633, |
|
"logps/rejected": -52.62793731689453, |
|
"loss": 0.6947, |
|
"rewards/accuracies": 0.41874998807907104, |
|
"rewards/chosen": -0.0021219542250037193, |
|
"rewards/margins": -0.002908582566305995, |
|
"rewards/rejected": 0.0007866283995099366, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.03602305475504323, |
|
"grad_norm": 25.75558090209961, |
|
"learning_rate": 8.992805755395683e-09, |
|
"logits/chosen": -1.6198612451553345, |
|
"logits/rejected": -1.6186037063598633, |
|
"logps/chosen": -49.2431526184082, |
|
"logps/rejected": -51.3972282409668, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": 8.340943895746022e-05, |
|
"rewards/margins": 9.575006697559729e-05, |
|
"rewards/rejected": -1.234045521414373e-05, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.043227665706051875, |
|
"grad_norm": 35.717689514160156, |
|
"learning_rate": 1.0791366906474819e-08, |
|
"logits/chosen": -1.702419638633728, |
|
"logits/rejected": -1.6958341598510742, |
|
"logps/chosen": -57.57291793823242, |
|
"logps/rejected": -59.58625030517578, |
|
"loss": 0.6925, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": 0.0025610830634832382, |
|
"rewards/margins": 0.0014890721067786217, |
|
"rewards/rejected": 0.0010720104910433292, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.05043227665706052, |
|
"grad_norm": 20.242334365844727, |
|
"learning_rate": 1.2589928057553956e-08, |
|
"logits/chosen": -1.6722795963287354, |
|
"logits/rejected": -1.6661018133163452, |
|
"logps/chosen": -57.32216262817383, |
|
"logps/rejected": -60.748252868652344, |
|
"loss": 0.6935, |
|
"rewards/accuracies": 0.41874998807907104, |
|
"rewards/chosen": 0.0012209347914904356, |
|
"rewards/margins": -0.0005330587737262249, |
|
"rewards/rejected": 0.0017539940308779478, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.05763688760806916, |
|
"grad_norm": 29.4974308013916, |
|
"learning_rate": 1.4388489208633092e-08, |
|
"logits/chosen": -1.7139599323272705, |
|
"logits/rejected": -1.7067283391952515, |
|
"logps/chosen": -58.90734100341797, |
|
"logps/rejected": -61.24401092529297, |
|
"loss": 0.6927, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": 0.00023442548990715295, |
|
"rewards/margins": 0.001039124559611082, |
|
"rewards/rejected": -0.0008046992006711662, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.06484149855907781, |
|
"grad_norm": 30.03173065185547, |
|
"learning_rate": 1.618705035971223e-08, |
|
"logits/chosen": -1.6469614505767822, |
|
"logits/rejected": -1.643303632736206, |
|
"logps/chosen": -56.198219299316406, |
|
"logps/rejected": -58.478515625, |
|
"loss": 0.6925, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": 0.0021789357997477055, |
|
"rewards/margins": 0.0014319642214104533, |
|
"rewards/rejected": 0.0007469715783372521, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.07204610951008646, |
|
"grad_norm": 28.857566833496094, |
|
"learning_rate": 1.7985611510791365e-08, |
|
"logits/chosen": -1.7217485904693604, |
|
"logits/rejected": -1.7120834589004517, |
|
"logps/chosen": -51.54811477661133, |
|
"logps/rejected": -55.7042236328125, |
|
"loss": 0.6918, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -4.318228457123041e-05, |
|
"rewards/margins": 0.002846790011972189, |
|
"rewards/rejected": -0.002889972412958741, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.0792507204610951, |
|
"grad_norm": 21.317718505859375, |
|
"learning_rate": 1.9784172661870502e-08, |
|
"logits/chosen": -1.5993636846542358, |
|
"logits/rejected": -1.5787856578826904, |
|
"logps/chosen": -55.75529098510742, |
|
"logps/rejected": -58.517738342285156, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.4437499940395355, |
|
"rewards/chosen": 0.0009529569069854915, |
|
"rewards/margins": 0.00015153829008340836, |
|
"rewards/rejected": 0.0008014187915250659, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.08645533141210375, |
|
"grad_norm": 22.020675659179688, |
|
"learning_rate": 2.1582733812949638e-08, |
|
"logits/chosen": -1.6349830627441406, |
|
"logits/rejected": -1.6326115131378174, |
|
"logps/chosen": -50.696022033691406, |
|
"logps/rejected": -53.475746154785156, |
|
"loss": 0.6944, |
|
"rewards/accuracies": 0.45625001192092896, |
|
"rewards/chosen": -6.050623414921574e-05, |
|
"rewards/margins": -0.002361572813242674, |
|
"rewards/rejected": 0.0023010666482150555, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.0936599423631124, |
|
"grad_norm": 27.489294052124023, |
|
"learning_rate": 2.3381294964028775e-08, |
|
"logits/chosen": -1.7193313837051392, |
|
"logits/rejected": -1.7148230075836182, |
|
"logps/chosen": -56.9023323059082, |
|
"logps/rejected": -59.1072998046875, |
|
"loss": 0.6941, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -0.0012045868206769228, |
|
"rewards/margins": -0.0017159022390842438, |
|
"rewards/rejected": 0.0005113151855766773, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.10086455331412104, |
|
"grad_norm": 26.305728912353516, |
|
"learning_rate": 2.517985611510791e-08, |
|
"logits/chosen": -1.606856346130371, |
|
"logits/rejected": -1.595958948135376, |
|
"logps/chosen": -52.90543746948242, |
|
"logps/rejected": -57.972129821777344, |
|
"loss": 0.6934, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": 0.002076763194054365, |
|
"rewards/margins": -0.00030962502933107316, |
|
"rewards/rejected": 0.0023863886017352343, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.10806916426512968, |
|
"grad_norm": 31.3708553314209, |
|
"learning_rate": 2.6978417266187048e-08, |
|
"logits/chosen": -1.6171735525131226, |
|
"logits/rejected": -1.6119439601898193, |
|
"logps/chosen": -54.98027801513672, |
|
"logps/rejected": -59.0755615234375, |
|
"loss": 0.6926, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": 0.0010308735072612762, |
|
"rewards/margins": 0.0012253187596797943, |
|
"rewards/rejected": -0.00019444509234745055, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.11527377521613832, |
|
"grad_norm": 24.7769832611084, |
|
"learning_rate": 2.8776978417266184e-08, |
|
"logits/chosen": -1.6504337787628174, |
|
"logits/rejected": -1.631773591041565, |
|
"logps/chosen": -47.71480941772461, |
|
"logps/rejected": -51.67340850830078, |
|
"loss": 0.6935, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": 0.00014348707918543369, |
|
"rewards/margins": -0.0006808551261201501, |
|
"rewards/rejected": 0.0008243421907536685, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.12247838616714697, |
|
"grad_norm": 28.69447898864746, |
|
"learning_rate": 3.057553956834532e-08, |
|
"logits/chosen": -1.6642261743545532, |
|
"logits/rejected": -1.6457111835479736, |
|
"logps/chosen": -51.293724060058594, |
|
"logps/rejected": -54.08746337890625, |
|
"loss": 0.692, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": 0.0019911406561732292, |
|
"rewards/margins": 0.0023246805649250746, |
|
"rewards/rejected": -0.0003335399378556758, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.12968299711815562, |
|
"grad_norm": 22.81742286682129, |
|
"learning_rate": 3.237410071942446e-08, |
|
"logits/chosen": -1.723880410194397, |
|
"logits/rejected": -1.7132070064544678, |
|
"logps/chosen": -51.556312561035156, |
|
"logps/rejected": -54.2805290222168, |
|
"loss": 0.6919, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 0.0016957769403234124, |
|
"rewards/margins": 0.00254463916644454, |
|
"rewards/rejected": -0.0008488625171594322, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.13688760806916425, |
|
"grad_norm": 24.014127731323242, |
|
"learning_rate": 3.4172661870503594e-08, |
|
"logits/chosen": -1.7289400100708008, |
|
"logits/rejected": -1.7246164083480835, |
|
"logps/chosen": -48.183475494384766, |
|
"logps/rejected": -52.82011032104492, |
|
"loss": 0.6917, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": 0.0009596001473255455, |
|
"rewards/margins": 0.0030194323044270277, |
|
"rewards/rejected": -0.002059832215309143, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.1440922190201729, |
|
"grad_norm": 33.81916046142578, |
|
"learning_rate": 3.597122302158273e-08, |
|
"logits/chosen": -1.68062424659729, |
|
"logits/rejected": -1.6727863550186157, |
|
"logps/chosen": -49.81970977783203, |
|
"logps/rejected": -54.09613800048828, |
|
"loss": 0.6925, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": 2.1038798877270892e-05, |
|
"rewards/margins": 0.0013697268441319466, |
|
"rewards/rejected": -0.001348688150756061, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.15129682997118155, |
|
"grad_norm": 27.09559440612793, |
|
"learning_rate": 3.776978417266187e-08, |
|
"logits/chosen": -1.7069154977798462, |
|
"logits/rejected": -1.6970031261444092, |
|
"logps/chosen": -49.16070556640625, |
|
"logps/rejected": -50.410491943359375, |
|
"loss": 0.6918, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": 0.002084630075842142, |
|
"rewards/margins": 0.0027677714824676514, |
|
"rewards/rejected": -0.0006831414066255093, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.1585014409221902, |
|
"grad_norm": 26.769506454467773, |
|
"learning_rate": 3.9568345323741003e-08, |
|
"logits/chosen": -1.62959885597229, |
|
"logits/rejected": -1.6223684549331665, |
|
"logps/chosen": -53.7839241027832, |
|
"logps/rejected": -60.50789260864258, |
|
"loss": 0.6915, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.004310721065849066, |
|
"rewards/margins": 0.003480097744613886, |
|
"rewards/rejected": 0.0008306234958581626, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.16570605187319884, |
|
"grad_norm": 24.570886611938477, |
|
"learning_rate": 4.136690647482014e-08, |
|
"logits/chosen": -1.6772973537445068, |
|
"logits/rejected": -1.675528883934021, |
|
"logps/chosen": -51.2908935546875, |
|
"logps/rejected": -55.5107536315918, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 0.0018524869810789824, |
|
"rewards/margins": 0.0003210431314073503, |
|
"rewards/rejected": 0.0015314440242946148, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.1729106628242075, |
|
"grad_norm": 26.605432510375977, |
|
"learning_rate": 4.3165467625899276e-08, |
|
"logits/chosen": -1.7227964401245117, |
|
"logits/rejected": -1.7107378244400024, |
|
"logps/chosen": -55.66338348388672, |
|
"logps/rejected": -58.241539001464844, |
|
"loss": 0.6933, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": 0.002910902723670006, |
|
"rewards/margins": -0.0001255483366549015, |
|
"rewards/rejected": 0.003036451293155551, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.18011527377521613, |
|
"grad_norm": 35.790584564208984, |
|
"learning_rate": 4.496402877697841e-08, |
|
"logits/chosen": -1.6011412143707275, |
|
"logits/rejected": -1.5909541845321655, |
|
"logps/chosen": -57.142967224121094, |
|
"logps/rejected": -59.007530212402344, |
|
"loss": 0.6922, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": 0.0032608923502266407, |
|
"rewards/margins": 0.0020222733728587627, |
|
"rewards/rejected": 0.0012386186281219125, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.1873198847262248, |
|
"grad_norm": 29.72098731994629, |
|
"learning_rate": 4.676258992805755e-08, |
|
"logits/chosen": -1.6467279195785522, |
|
"logits/rejected": -1.6427417993545532, |
|
"logps/chosen": -56.70636749267578, |
|
"logps/rejected": -59.045448303222656, |
|
"loss": 0.6941, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": 0.0010839566821232438, |
|
"rewards/margins": -0.0017195299733430147, |
|
"rewards/rejected": 0.0028034865390509367, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.19452449567723343, |
|
"grad_norm": 30.71733856201172, |
|
"learning_rate": 4.8561151079136686e-08, |
|
"logits/chosen": -1.7268844842910767, |
|
"logits/rejected": -1.713024377822876, |
|
"logps/chosen": -53.37934494018555, |
|
"logps/rejected": -56.01610565185547, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": 0.002369955414906144, |
|
"rewards/margins": 0.00020498293451964855, |
|
"rewards/rejected": 0.0021649724803864956, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.2017291066282421, |
|
"grad_norm": 27.323898315429688, |
|
"learning_rate": 4.999992091672379e-08, |
|
"logits/chosen": -1.6942745447158813, |
|
"logits/rejected": -1.679835557937622, |
|
"logps/chosen": -48.37071228027344, |
|
"logps/rejected": -50.23174285888672, |
|
"loss": 0.6921, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": 0.002187022939324379, |
|
"rewards/margins": 0.0021849684417247772, |
|
"rewards/rejected": 2.054649030469591e-06, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.20893371757925072, |
|
"grad_norm": 27.54994773864746, |
|
"learning_rate": 4.999715305459108e-08, |
|
"logits/chosen": -1.7232547998428345, |
|
"logits/rejected": -1.7125844955444336, |
|
"logps/chosen": -51.24810028076172, |
|
"logps/rejected": -52.99908447265625, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": 0.0002954238443635404, |
|
"rewards/margins": -6.616571045015007e-05, |
|
"rewards/rejected": 0.00036158948205411434, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.21613832853025935, |
|
"grad_norm": 28.071247100830078, |
|
"learning_rate": 4.9990431528966836e-08, |
|
"logits/chosen": -1.7029025554656982, |
|
"logits/rejected": -1.6867132186889648, |
|
"logps/chosen": -55.0485954284668, |
|
"logps/rejected": -58.52321243286133, |
|
"loss": 0.6922, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": 0.0034954734146595, |
|
"rewards/margins": 0.0020077379886060953, |
|
"rewards/rejected": 0.0014877354260534048, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.22334293948126802, |
|
"grad_norm": 30.036535263061523, |
|
"learning_rate": 4.997975740295813e-08, |
|
"logits/chosen": -1.5882554054260254, |
|
"logits/rejected": -1.5789012908935547, |
|
"logps/chosen": -54.333534240722656, |
|
"logps/rejected": -57.2091064453125, |
|
"loss": 0.6904, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.004001539200544357, |
|
"rewards/margins": 0.0055999672040343285, |
|
"rewards/rejected": -0.0015984283527359366, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.23054755043227665, |
|
"grad_norm": 23.738216400146484, |
|
"learning_rate": 4.996513236483331e-08, |
|
"logits/chosen": -1.6974598169326782, |
|
"logits/rejected": -1.6820309162139893, |
|
"logps/chosen": -54.326332092285156, |
|
"logps/rejected": -59.19081497192383, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": 0.002228650962933898, |
|
"rewards/margins": 0.0006980501930229366, |
|
"rewards/rejected": 0.0015306004788726568, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.2377521613832853, |
|
"grad_norm": 21.979890823364258, |
|
"learning_rate": 4.9946558727754974e-08, |
|
"logits/chosen": -1.6695890426635742, |
|
"logits/rejected": -1.6626873016357422, |
|
"logps/chosen": -57.21343231201172, |
|
"logps/rejected": -56.08220672607422, |
|
"loss": 0.6934, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": 0.0033706065732985735, |
|
"rewards/margins": -0.0003914666303899139, |
|
"rewards/rejected": 0.003762073116376996, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.24495677233429394, |
|
"grad_norm": 25.381389617919922, |
|
"learning_rate": 4.9924039429414086e-08, |
|
"logits/chosen": -1.723496437072754, |
|
"logits/rejected": -1.7127540111541748, |
|
"logps/chosen": -58.3027458190918, |
|
"logps/rejected": -60.19641876220703, |
|
"loss": 0.6911, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.0054273479618132114, |
|
"rewards/margins": 0.004153185058385134, |
|
"rewards/rejected": 0.0012741630198433995, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.2521613832853026, |
|
"grad_norm": 35.49002456665039, |
|
"learning_rate": 4.989757803156537e-08, |
|
"logits/chosen": -1.655975580215454, |
|
"logits/rejected": -1.6502435207366943, |
|
"logps/chosen": -52.090789794921875, |
|
"logps/rejected": -55.48548126220703, |
|
"loss": 0.69, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.006078408099710941, |
|
"rewards/margins": 0.0064167543314397335, |
|
"rewards/rejected": -0.00033834631904028356, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.25936599423631124, |
|
"grad_norm": 30.3967227935791, |
|
"learning_rate": 4.986717871946393e-08, |
|
"logits/chosen": -1.6415973901748657, |
|
"logits/rejected": -1.6323236227035522, |
|
"logps/chosen": -62.180633544921875, |
|
"logps/rejected": -65.96495819091797, |
|
"loss": 0.6911, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": 0.005377051420509815, |
|
"rewards/margins": 0.004161593038588762, |
|
"rewards/rejected": 0.0012154586147516966, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.2665706051873199, |
|
"grad_norm": 35.132511138916016, |
|
"learning_rate": 4.983284630120331e-08, |
|
"logits/chosen": -1.6300022602081299, |
|
"logits/rejected": -1.6275346279144287, |
|
"logps/chosen": -55.072723388671875, |
|
"logps/rejected": -60.9841423034668, |
|
"loss": 0.6928, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": 0.0034929229877889156, |
|
"rewards/margins": 0.0007132775499485433, |
|
"rewards/rejected": 0.002779645612463355, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.2737752161383285, |
|
"grad_norm": 24.042118072509766, |
|
"learning_rate": 4.979458620695505e-08, |
|
"logits/chosen": -1.7153857946395874, |
|
"logits/rejected": -1.7088816165924072, |
|
"logps/chosen": -53.499717712402344, |
|
"logps/rejected": -55.354148864746094, |
|
"loss": 0.6917, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": 0.005800359416753054, |
|
"rewards/margins": 0.002978697419166565, |
|
"rewards/rejected": 0.002821662463247776, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.28097982708933716, |
|
"grad_norm": 33.635623931884766, |
|
"learning_rate": 4.975240448810977e-08, |
|
"logits/chosen": -1.6882060766220093, |
|
"logits/rejected": -1.678504228591919, |
|
"logps/chosen": -55.21924591064453, |
|
"logps/rejected": -59.813438415527344, |
|
"loss": 0.6913, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": 0.0060536982491612434, |
|
"rewards/margins": 0.003763732733204961, |
|
"rewards/rejected": 0.0022899655159562826, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.2881844380403458, |
|
"grad_norm": 30.458513259887695, |
|
"learning_rate": 4.970630781632009e-08, |
|
"logits/chosen": -1.70065176486969, |
|
"logits/rejected": -1.6941430568695068, |
|
"logps/chosen": -51.30016326904297, |
|
"logps/rejected": -53.36370849609375, |
|
"loss": 0.6919, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 0.008028768934309483, |
|
"rewards/margins": 0.0026502818800508976, |
|
"rewards/rejected": 0.005378487519919872, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.2953890489913545, |
|
"grad_norm": 33.6281852722168, |
|
"learning_rate": 4.965630348244542e-08, |
|
"logits/chosen": -1.6540043354034424, |
|
"logits/rejected": -1.6505072116851807, |
|
"logps/chosen": -54.006988525390625, |
|
"logps/rejected": -56.83721923828125, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": 0.00576495798304677, |
|
"rewards/margins": 0.0007124156691133976, |
|
"rewards/rejected": 0.0050525423139333725, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.3025936599423631, |
|
"grad_norm": 23.50993537902832, |
|
"learning_rate": 4.9602399395398786e-08, |
|
"logits/chosen": -1.619267463684082, |
|
"logits/rejected": -1.627747893333435, |
|
"logps/chosen": -52.449256896972656, |
|
"logps/rejected": -56.338417053222656, |
|
"loss": 0.6933, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": 0.003929648548364639, |
|
"rewards/margins": -0.00018039104179479182, |
|
"rewards/rejected": 0.0041100396774709225, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.30979827089337175, |
|
"grad_norm": 21.24608612060547, |
|
"learning_rate": 4.95446040808959e-08, |
|
"logits/chosen": -1.645350694656372, |
|
"logits/rejected": -1.6449620723724365, |
|
"logps/chosen": -52.73632049560547, |
|
"logps/rejected": -53.45061111450195, |
|
"loss": 0.6908, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": 0.006634493824094534, |
|
"rewards/margins": 0.004952923860400915, |
|
"rewards/rejected": 0.0016815703129395843, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.3170028818443804, |
|
"grad_norm": 22.69368553161621, |
|
"learning_rate": 4.948292668010676e-08, |
|
"logits/chosen": -1.6465650796890259, |
|
"logits/rejected": -1.637599229812622, |
|
"logps/chosen": -52.00286102294922, |
|
"logps/rejected": -56.63525390625, |
|
"loss": 0.6913, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": 0.0058380914852023125, |
|
"rewards/margins": 0.0039370255544781685, |
|
"rewards/rejected": 0.0019010662799701095, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.3242074927953891, |
|
"grad_norm": 28.970970153808594, |
|
"learning_rate": 4.941737694820975e-08, |
|
"logits/chosen": -1.6554502248764038, |
|
"logits/rejected": -1.63752019405365, |
|
"logps/chosen": -60.775146484375, |
|
"logps/rejected": -59.3543701171875, |
|
"loss": 0.6913, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.008422751910984516, |
|
"rewards/margins": 0.003841835306957364, |
|
"rewards/rejected": 0.004580915905535221, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.3314121037463977, |
|
"grad_norm": 37.377037048339844, |
|
"learning_rate": 4.93479652528488e-08, |
|
"logits/chosen": -1.6547534465789795, |
|
"logits/rejected": -1.6382789611816406, |
|
"logps/chosen": -60.82624053955078, |
|
"logps/rejected": -63.30852127075195, |
|
"loss": 0.6906, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": 0.008778962306678295, |
|
"rewards/margins": 0.005369237158447504, |
|
"rewards/rejected": 0.003409724682569504, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.33861671469740634, |
|
"grad_norm": 32.41299819946289, |
|
"learning_rate": 4.9274702572493555e-08, |
|
"logits/chosen": -1.7175623178482056, |
|
"logits/rejected": -1.6901031732559204, |
|
"logps/chosen": -60.22943878173828, |
|
"logps/rejected": -61.229896545410156, |
|
"loss": 0.6922, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 0.004740011878311634, |
|
"rewards/margins": 0.002111945766955614, |
|
"rewards/rejected": 0.002628065412864089, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.345821325648415, |
|
"grad_norm": 31.445178985595703, |
|
"learning_rate": 4.9197600494702955e-08, |
|
"logits/chosen": -1.784799337387085, |
|
"logits/rejected": -1.7709108591079712, |
|
"logps/chosen": -48.04889678955078, |
|
"logps/rejected": -51.65523147583008, |
|
"loss": 0.6899, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.009241985157132149, |
|
"rewards/margins": 0.006657962687313557, |
|
"rewards/rejected": 0.0025840220041573048, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.3530259365994236, |
|
"grad_norm": 37.82373809814453, |
|
"learning_rate": 4.9116671214292526e-08, |
|
"logits/chosen": -1.6648657321929932, |
|
"logits/rejected": -1.654442548751831, |
|
"logps/chosen": -53.06227493286133, |
|
"logps/rejected": -56.86822509765625, |
|
"loss": 0.6899, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.010359242558479309, |
|
"rewards/margins": 0.0067110308445990086, |
|
"rewards/rejected": 0.003648211481049657, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.36023054755043227, |
|
"grad_norm": 26.230350494384766, |
|
"learning_rate": 4.903192753140557e-08, |
|
"logits/chosen": -1.6695973873138428, |
|
"logits/rejected": -1.6475780010223389, |
|
"logps/chosen": -48.755794525146484, |
|
"logps/rejected": -53.21979904174805, |
|
"loss": 0.6892, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": 0.010777877643704414, |
|
"rewards/margins": 0.00803940650075674, |
|
"rewards/rejected": 0.002738471608608961, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.36743515850144093, |
|
"grad_norm": 25.887540817260742, |
|
"learning_rate": 4.894338284948866e-08, |
|
"logits/chosen": -1.775498628616333, |
|
"logits/rejected": -1.7581126689910889, |
|
"logps/chosen": -52.77173614501953, |
|
"logps/rejected": -55.65742111206055, |
|
"loss": 0.6892, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.011902733705937862, |
|
"rewards/margins": 0.008151276968419552, |
|
"rewards/rejected": 0.0037514567375183105, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.3746397694524496, |
|
"grad_norm": 32.86073684692383, |
|
"learning_rate": 4.8851051173171656e-08, |
|
"logits/chosen": -1.670640230178833, |
|
"logits/rejected": -1.666121482849121, |
|
"logps/chosen": -61.16508865356445, |
|
"logps/rejected": -64.01704406738281, |
|
"loss": 0.6911, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": 0.008481292985379696, |
|
"rewards/margins": 0.004413911607116461, |
|
"rewards/rejected": 0.00406738230958581, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.3818443804034582, |
|
"grad_norm": 30.485824584960938, |
|
"learning_rate": 4.8754947106052696e-08, |
|
"logits/chosen": -1.6040318012237549, |
|
"logits/rejected": -1.5821549892425537, |
|
"logps/chosen": -53.919700622558594, |
|
"logps/rejected": -55.5211067199707, |
|
"loss": 0.6894, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.01268856506794691, |
|
"rewards/margins": 0.007814417593181133, |
|
"rewards/rejected": 0.004874147940427065, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.38904899135446686, |
|
"grad_norm": 29.459304809570312, |
|
"learning_rate": 4.865508584838841e-08, |
|
"logits/chosen": -1.6308727264404297, |
|
"logits/rejected": -1.6111412048339844, |
|
"logps/chosen": -52.30418014526367, |
|
"logps/rejected": -55.19745635986328, |
|
"loss": 0.6886, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": 0.014297975227236748, |
|
"rewards/margins": 0.00937692541629076, |
|
"rewards/rejected": 0.004921050742268562, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.3962536023054755, |
|
"grad_norm": 27.596515655517578, |
|
"learning_rate": 4.855148319468979e-08, |
|
"logits/chosen": -1.5790401697158813, |
|
"logits/rejected": -1.5705499649047852, |
|
"logps/chosen": -54.7948112487793, |
|
"logps/rejected": -55.06526565551758, |
|
"loss": 0.6915, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": 0.011280642822384834, |
|
"rewards/margins": 0.0034813634119927883, |
|
"rewards/rejected": 0.007799278944730759, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.4034582132564842, |
|
"grad_norm": 28.585477828979492, |
|
"learning_rate": 4.8444155531224065e-08, |
|
"logits/chosen": -1.7170881032943726, |
|
"logits/rejected": -1.7071220874786377, |
|
"logps/chosen": -54.61487579345703, |
|
"logps/rejected": -56.94572067260742, |
|
"loss": 0.6905, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.011090746149420738, |
|
"rewards/margins": 0.005567173473536968, |
|
"rewards/rejected": 0.005523574538528919, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.4106628242074928, |
|
"grad_norm": 28.21603775024414, |
|
"learning_rate": 4.833311983342292e-08, |
|
"logits/chosen": -1.6930019855499268, |
|
"logits/rejected": -1.6664783954620361, |
|
"logps/chosen": -60.13446807861328, |
|
"logps/rejected": -62.42277145385742, |
|
"loss": 0.6891, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.013689065352082253, |
|
"rewards/margins": 0.008529866114258766, |
|
"rewards/rejected": 0.005159198306500912, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.41786743515850144, |
|
"grad_norm": 30.411657333374023, |
|
"learning_rate": 4.821839366319768e-08, |
|
"logits/chosen": -1.718141794204712, |
|
"logits/rejected": -1.7054805755615234, |
|
"logps/chosen": -49.07467269897461, |
|
"logps/rejected": -51.04106521606445, |
|
"loss": 0.6881, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.014034624211490154, |
|
"rewards/margins": 0.010366623289883137, |
|
"rewards/rejected": 0.0036680016200989485, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.4250720461095101, |
|
"grad_norm": 24.9964599609375, |
|
"learning_rate": 4.8099995166161536e-08, |
|
"logits/chosen": -1.6703847646713257, |
|
"logits/rejected": -1.6679855585098267, |
|
"logps/chosen": -54.932334899902344, |
|
"logps/rejected": -61.33048629760742, |
|
"loss": 0.6893, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": 0.016779553145170212, |
|
"rewards/margins": 0.007991639897227287, |
|
"rewards/rejected": 0.00878791231662035, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.4322766570605187, |
|
"grad_norm": 28.37261390686035, |
|
"learning_rate": 4.797794306875963e-08, |
|
"logits/chosen": -1.7611091136932373, |
|
"logits/rejected": -1.751899003982544, |
|
"logps/chosen": -52.07770919799805, |
|
"logps/rejected": -57.04071044921875, |
|
"loss": 0.6886, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": 0.013305628672242165, |
|
"rewards/margins": 0.009486498311161995, |
|
"rewards/rejected": 0.0038191310595721006, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.43948126801152737, |
|
"grad_norm": 33.9644660949707, |
|
"learning_rate": 4.785225667530716e-08, |
|
"logits/chosen": -1.6755338907241821, |
|
"logits/rejected": -1.6584323644638062, |
|
"logps/chosen": -57.341094970703125, |
|
"logps/rejected": -58.7607307434082, |
|
"loss": 0.69, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.012329719960689545, |
|
"rewards/margins": 0.006740064360201359, |
|
"rewards/rejected": 0.0055896565318107605, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.44668587896253603, |
|
"grad_norm": 24.328838348388672, |
|
"learning_rate": 4.772295586493613e-08, |
|
"logits/chosen": -1.7163664102554321, |
|
"logits/rejected": -1.7021703720092773, |
|
"logps/chosen": -49.007179260253906, |
|
"logps/rejected": -51.45275115966797, |
|
"loss": 0.6898, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.013197936117649078, |
|
"rewards/margins": 0.007094507105648518, |
|
"rewards/rejected": 0.0061034285463392735, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.4538904899135447, |
|
"grad_norm": 21.8724308013916, |
|
"learning_rate": 4.759006108845116e-08, |
|
"logits/chosen": -1.7142736911773682, |
|
"logits/rejected": -1.7078378200531006, |
|
"logps/chosen": -49.206016540527344, |
|
"logps/rejected": -54.04901123046875, |
|
"loss": 0.6856, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.017311111092567444, |
|
"rewards/margins": 0.015471754595637321, |
|
"rewards/rejected": 0.0018393562640994787, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.4610951008645533, |
|
"grad_norm": 27.477933883666992, |
|
"learning_rate": 4.7453593365094926e-08, |
|
"logits/chosen": -1.5769332647323608, |
|
"logits/rejected": -1.571119785308838, |
|
"logps/chosen": -51.74431610107422, |
|
"logps/rejected": -55.1117057800293, |
|
"loss": 0.6874, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.01672833226621151, |
|
"rewards/margins": 0.01204992551356554, |
|
"rewards/rejected": 0.004678409546613693, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.46829971181556196, |
|
"grad_norm": 30.72771644592285, |
|
"learning_rate": 4.731357427922361e-08, |
|
"logits/chosen": -1.7531722784042358, |
|
"logits/rejected": -1.7257356643676758, |
|
"logps/chosen": -51.89509201049805, |
|
"logps/rejected": -52.538978576660156, |
|
"loss": 0.6872, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.019248802214860916, |
|
"rewards/margins": 0.012345701456069946, |
|
"rewards/rejected": 0.00690310075879097, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.4755043227665706, |
|
"grad_norm": 34.05529022216797, |
|
"learning_rate": 4.71700259768931e-08, |
|
"logits/chosen": -1.6869693994522095, |
|
"logits/rejected": -1.685805082321167, |
|
"logps/chosen": -53.994873046875, |
|
"logps/rejected": -57.93560028076172, |
|
"loss": 0.6852, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.018321344628930092, |
|
"rewards/margins": 0.016431275755167007, |
|
"rewards/rejected": 0.0018900686409324408, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.4827089337175792, |
|
"grad_norm": 31.389686584472656, |
|
"learning_rate": 4.7022971162356176e-08, |
|
"logits/chosen": -1.6463180780410767, |
|
"logits/rejected": -1.6191755533218384, |
|
"logps/chosen": -56.87351608276367, |
|
"logps/rejected": -57.94671630859375, |
|
"loss": 0.6863, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": 0.01981157623231411, |
|
"rewards/margins": 0.014039942994713783, |
|
"rewards/rejected": 0.005771632306277752, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.4899135446685879, |
|
"grad_norm": 36.83378601074219, |
|
"learning_rate": 4.6872433094471577e-08, |
|
"logits/chosen": -1.5911400318145752, |
|
"logits/rejected": -1.579105257987976, |
|
"logps/chosen": -56.64697265625, |
|
"logps/rejected": -57.130035400390625, |
|
"loss": 0.6874, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.019423145800828934, |
|
"rewards/margins": 0.012030874378979206, |
|
"rewards/rejected": 0.007392272353172302, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.49711815561959655, |
|
"grad_norm": 32.14356994628906, |
|
"learning_rate": 4.671843558302522e-08, |
|
"logits/chosen": -1.6711755990982056, |
|
"logits/rejected": -1.6611177921295166, |
|
"logps/chosen": -54.51006317138672, |
|
"logps/rejected": -58.520973205566406, |
|
"loss": 0.6889, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": 0.01895291917026043, |
|
"rewards/margins": 0.00888957642018795, |
|
"rewards/rejected": 0.01006334088742733, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.5043227665706052, |
|
"grad_norm": 27.02754020690918, |
|
"learning_rate": 4.656100298496439e-08, |
|
"logits/chosen": -1.6875331401824951, |
|
"logits/rejected": -1.6737067699432373, |
|
"logps/chosen": -53.4271240234375, |
|
"logps/rejected": -57.33203887939453, |
|
"loss": 0.6863, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": 0.020056720823049545, |
|
"rewards/margins": 0.014298888854682446, |
|
"rewards/rejected": 0.005757831037044525, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.5115273775216138, |
|
"grad_norm": 26.375259399414062, |
|
"learning_rate": 4.640016020054527e-08, |
|
"logits/chosen": -1.671545386314392, |
|
"logits/rejected": -1.657135009765625, |
|
"logps/chosen": -47.39130783081055, |
|
"logps/rejected": -50.1197395324707, |
|
"loss": 0.6862, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.020558740943670273, |
|
"rewards/margins": 0.014566670171916485, |
|
"rewards/rejected": 0.005992068909108639, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.5187319884726225, |
|
"grad_norm": 33.271087646484375, |
|
"learning_rate": 4.6235932669394676e-08, |
|
"logits/chosen": -1.6350809335708618, |
|
"logits/rejected": -1.620134711265564, |
|
"logps/chosen": -56.80983352661133, |
|
"logps/rejected": -60.5606803894043, |
|
"loss": 0.687, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": 0.020696226507425308, |
|
"rewards/margins": 0.012868310324847698, |
|
"rewards/rejected": 0.00782791618257761, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.5259365994236311, |
|
"grad_norm": 25.984638214111328, |
|
"learning_rate": 4.6068346366486325e-08, |
|
"logits/chosen": -1.6507568359375, |
|
"logits/rejected": -1.6315109729766846, |
|
"logps/chosen": -55.800132751464844, |
|
"logps/rejected": -58.299346923828125, |
|
"loss": 0.6857, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": 0.020775150507688522, |
|
"rewards/margins": 0.015492427162826061, |
|
"rewards/rejected": 0.005282718688249588, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.5331412103746398, |
|
"grad_norm": 27.39038848876953, |
|
"learning_rate": 4.589742779803259e-08, |
|
"logits/chosen": -1.6461410522460938, |
|
"logits/rejected": -1.6439073085784912, |
|
"logps/chosen": -53.59571075439453, |
|
"logps/rejected": -52.81595993041992, |
|
"loss": 0.6906, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": 0.01788356341421604, |
|
"rewards/margins": 0.005540410988032818, |
|
"rewards/rejected": 0.0123431496322155, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.5403458213256485, |
|
"grad_norm": 26.366432189941406, |
|
"learning_rate": 4.5723203997292146e-08, |
|
"logits/chosen": -1.6698424816131592, |
|
"logits/rejected": -1.6543042659759521, |
|
"logps/chosen": -55.21739959716797, |
|
"logps/rejected": -57.220542907714844, |
|
"loss": 0.6836, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": 0.02430053800344467, |
|
"rewards/margins": 0.0198974572122097, |
|
"rewards/rejected": 0.004403082188218832, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.547550432276657, |
|
"grad_norm": 33.0073356628418, |
|
"learning_rate": 4.554570252029421e-08, |
|
"logits/chosen": -1.6031173467636108, |
|
"logits/rejected": -1.6015863418579102, |
|
"logps/chosen": -52.24799728393555, |
|
"logps/rejected": -55.23774337768555, |
|
"loss": 0.6879, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": 0.019535819068551064, |
|
"rewards/margins": 0.01108732633292675, |
|
"rewards/rejected": 0.008448492735624313, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.5547550432276657, |
|
"grad_norm": 30.090187072753906, |
|
"learning_rate": 4.536495144148021e-08, |
|
"logits/chosen": -1.6216052770614624, |
|
"logits/rejected": -1.6167309284210205, |
|
"logps/chosen": -49.6798095703125, |
|
"logps/rejected": -53.358123779296875, |
|
"loss": 0.6834, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": 0.02593979239463806, |
|
"rewards/margins": 0.02041347324848175, |
|
"rewards/rejected": 0.005526319611817598, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.5619596541786743, |
|
"grad_norm": 23.466976165771484, |
|
"learning_rate": 4.518097934926339e-08, |
|
"logits/chosen": -1.6407028436660767, |
|
"logits/rejected": -1.6307079792022705, |
|
"logps/chosen": -55.18608856201172, |
|
"logps/rejected": -57.64618682861328, |
|
"loss": 0.6848, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.02296794392168522, |
|
"rewards/margins": 0.017504002898931503, |
|
"rewards/rejected": 0.0054639410227537155, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.569164265129683, |
|
"grad_norm": 27.666854858398438, |
|
"learning_rate": 4.499381534150714e-08, |
|
"logits/chosen": -1.685529112815857, |
|
"logits/rejected": -1.6761360168457031, |
|
"logps/chosen": -56.853492736816406, |
|
"logps/rejected": -61.52440643310547, |
|
"loss": 0.6854, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.022552262991666794, |
|
"rewards/margins": 0.016366995871067047, |
|
"rewards/rejected": 0.006185270380228758, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.5763688760806917, |
|
"grad_norm": 30.78841781616211, |
|
"learning_rate": 4.48034890209227e-08, |
|
"logits/chosen": -1.670078992843628, |
|
"logits/rejected": -1.6596672534942627, |
|
"logps/chosen": -53.268516540527344, |
|
"logps/rejected": -55.402740478515625, |
|
"loss": 0.6854, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.024996964260935783, |
|
"rewards/margins": 0.016293780878186226, |
|
"rewards/rejected": 0.008703185245394707, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.5835734870317003, |
|
"grad_norm": 23.246274948120117, |
|
"learning_rate": 4.4610030490387154e-08, |
|
"logits/chosen": -1.6598783731460571, |
|
"logits/rejected": -1.6620323657989502, |
|
"logps/chosen": -51.27886199951172, |
|
"logps/rejected": -55.02397537231445, |
|
"loss": 0.6864, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": 0.020924841985106468, |
|
"rewards/margins": 0.014275836758315563, |
|
"rewards/rejected": 0.006649008486419916, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.590778097982709, |
|
"grad_norm": 25.784561157226562, |
|
"learning_rate": 4.4413470348182124e-08, |
|
"logits/chosen": -1.693752646446228, |
|
"logits/rejected": -1.6728289127349854, |
|
"logps/chosen": -53.787078857421875, |
|
"logps/rejected": -54.61577224731445, |
|
"loss": 0.6851, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": 0.02386101894080639, |
|
"rewards/margins": 0.01699705794453621, |
|
"rewards/rejected": 0.006863957736641169, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.5979827089337176, |
|
"grad_norm": 26.125385284423828, |
|
"learning_rate": 4.421383968315427e-08, |
|
"logits/chosen": -1.6544780731201172, |
|
"logits/rejected": -1.6471678018569946, |
|
"logps/chosen": -48.14168930053711, |
|
"logps/rejected": -52.247154235839844, |
|
"loss": 0.6797, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.03294381499290466, |
|
"rewards/margins": 0.02802552655339241, |
|
"rewards/rejected": 0.00491828890517354, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.6051873198847262, |
|
"grad_norm": 21.14900779724121, |
|
"learning_rate": 4.4011170069798126e-08, |
|
"logits/chosen": -1.6627849340438843, |
|
"logits/rejected": -1.6545110940933228, |
|
"logps/chosen": -53.29193115234375, |
|
"logps/rejected": -54.93217849731445, |
|
"loss": 0.6832, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": 0.0249174851924181, |
|
"rewards/margins": 0.02101990208029747, |
|
"rewards/rejected": 0.003897582646459341, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.6123919308357348, |
|
"grad_norm": 38.04325866699219, |
|
"learning_rate": 4.380549356326208e-08, |
|
"logits/chosen": -1.6758836507797241, |
|
"logits/rejected": -1.663313627243042, |
|
"logps/chosen": -54.3109016418457, |
|
"logps/rejected": -58.55974197387695, |
|
"loss": 0.6822, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.03060699999332428, |
|
"rewards/margins": 0.023099634796380997, |
|
"rewards/rejected": 0.007507366128265858, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.6195965417867435, |
|
"grad_norm": 25.8773193359375, |
|
"learning_rate": 4.359684269427848e-08, |
|
"logits/chosen": -1.6748501062393188, |
|
"logits/rejected": -1.6620460748672485, |
|
"logps/chosen": -65.36933898925781, |
|
"logps/rejected": -65.69126892089844, |
|
"loss": 0.6842, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.025393366813659668, |
|
"rewards/margins": 0.019018063321709633, |
|
"rewards/rejected": 0.006375306751579046, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.6268011527377522, |
|
"grad_norm": 28.597139358520508, |
|
"learning_rate": 4.3385250464018355e-08, |
|
"logits/chosen": -1.7144954204559326, |
|
"logits/rejected": -1.7035188674926758, |
|
"logps/chosen": -54.55393600463867, |
|
"logps/rejected": -58.47465896606445, |
|
"loss": 0.6822, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.02745116874575615, |
|
"rewards/margins": 0.02292410284280777, |
|
"rewards/rejected": 0.004527065437287092, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.6340057636887608, |
|
"grad_norm": 28.048931121826172, |
|
"learning_rate": 4.3170750338871806e-08, |
|
"logits/chosen": -1.7362436056137085, |
|
"logits/rejected": -1.7299144268035889, |
|
"logps/chosen": -53.04833221435547, |
|
"logps/rejected": -54.4532585144043, |
|
"loss": 0.6873, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": 0.020626841112971306, |
|
"rewards/margins": 0.012741965241730213, |
|
"rewards/rejected": 0.007884878665208817, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.6412103746397695, |
|
"grad_norm": 26.831119537353516, |
|
"learning_rate": 4.295337624515485e-08, |
|
"logits/chosen": -1.7312161922454834, |
|
"logits/rejected": -1.7202436923980713, |
|
"logps/chosen": -50.97220993041992, |
|
"logps/rejected": -52.995948791503906, |
|
"loss": 0.6824, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.02905122935771942, |
|
"rewards/margins": 0.022564779967069626, |
|
"rewards/rejected": 0.0064864493906497955, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.6484149855907781, |
|
"grad_norm": 31.435150146484375, |
|
"learning_rate": 4.273316256374342e-08, |
|
"logits/chosen": -1.5930171012878418, |
|
"logits/rejected": -1.5941686630249023, |
|
"logps/chosen": -60.389488220214844, |
|
"logps/rejected": -64.2243881225586, |
|
"loss": 0.6879, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 0.020148711279034615, |
|
"rewards/margins": 0.011882050894200802, |
|
"rewards/rejected": 0.008266657590866089, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.6556195965417867, |
|
"grad_norm": 27.812658309936523, |
|
"learning_rate": 4.2510144124635605e-08, |
|
"logits/chosen": -1.639870285987854, |
|
"logits/rejected": -1.6335127353668213, |
|
"logps/chosen": -58.445831298828125, |
|
"logps/rejected": -59.66516876220703, |
|
"loss": 0.6866, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": 0.024780476465821266, |
|
"rewards/margins": 0.01432741153985262, |
|
"rewards/rejected": 0.010453062132000923, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.6628242074927954, |
|
"grad_norm": 23.902587890625, |
|
"learning_rate": 4.22843562014427e-08, |
|
"logits/chosen": -1.6627442836761475, |
|
"logits/rejected": -1.651299238204956, |
|
"logps/chosen": -47.656944274902344, |
|
"logps/rejected": -50.61472702026367, |
|
"loss": 0.6789, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.03436293825507164, |
|
"rewards/margins": 0.029948776587843895, |
|
"rewards/rejected": 0.004414163064211607, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.670028818443804, |
|
"grad_norm": 29.023405075073242, |
|
"learning_rate": 4.205583450581023e-08, |
|
"logits/chosen": -1.727513074874878, |
|
"logits/rejected": -1.7153295278549194, |
|
"logps/chosen": -53.51002883911133, |
|
"logps/rejected": -56.443153381347656, |
|
"loss": 0.682, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": 0.032039422541856766, |
|
"rewards/margins": 0.02314738929271698, |
|
"rewards/rejected": 0.00889203418046236, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.6772334293948127, |
|
"grad_norm": 22.48113250732422, |
|
"learning_rate": 4.1824615181769577e-08, |
|
"logits/chosen": -1.6178547143936157, |
|
"logits/rejected": -1.6143802404403687, |
|
"logps/chosen": -62.34340286254883, |
|
"logps/rejected": -63.569923400878906, |
|
"loss": 0.6839, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": 0.027915989980101585, |
|
"rewards/margins": 0.01995277777314186, |
|
"rewards/rejected": 0.007963214069604874, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.6844380403458213, |
|
"grad_norm": 27.37798309326172, |
|
"learning_rate": 4.1590734800021354e-08, |
|
"logits/chosen": -1.5735194683074951, |
|
"logits/rejected": -1.5851722955703735, |
|
"logps/chosen": -52.67449188232422, |
|
"logps/rejected": -58.57447052001953, |
|
"loss": 0.6832, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": 0.033303067088127136, |
|
"rewards/margins": 0.0210419949144125, |
|
"rewards/rejected": 0.01226106844842434, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.69164265129683, |
|
"grad_norm": 27.289091110229492, |
|
"learning_rate": 4.1354230352151143e-08, |
|
"logits/chosen": -1.708268165588379, |
|
"logits/rejected": -1.700402855873108, |
|
"logps/chosen": -55.896453857421875, |
|
"logps/rejected": -58.6062126159668, |
|
"loss": 0.6844, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": 0.02778809331357479, |
|
"rewards/margins": 0.01915113627910614, |
|
"rewards/rejected": 0.00863695703446865, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.6988472622478387, |
|
"grad_norm": 34.176612854003906, |
|
"learning_rate": 4.111513924477878e-08, |
|
"logits/chosen": -1.7223440408706665, |
|
"logits/rejected": -1.7140798568725586, |
|
"logps/chosen": -51.11802291870117, |
|
"logps/rejected": -55.39356231689453, |
|
"loss": 0.6805, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.038919635117053986, |
|
"rewards/margins": 0.02688221074640751, |
|
"rewards/rejected": 0.012037424370646477, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.7060518731988472, |
|
"grad_norm": 24.33797264099121, |
|
"learning_rate": 4.087349929364192e-08, |
|
"logits/chosen": -1.5715091228485107, |
|
"logits/rejected": -1.5727804899215698, |
|
"logps/chosen": -58.17668914794922, |
|
"logps/rejected": -63.71800994873047, |
|
"loss": 0.6856, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.02855418249964714, |
|
"rewards/margins": 0.016662323847413063, |
|
"rewards/rejected": 0.011891861446201801, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.7132564841498559, |
|
"grad_norm": 30.351694107055664, |
|
"learning_rate": 4.062934871761497e-08, |
|
"logits/chosen": -1.6828606128692627, |
|
"logits/rejected": -1.6766620874404907, |
|
"logps/chosen": -57.43531036376953, |
|
"logps/rejected": -59.62762451171875, |
|
"loss": 0.6837, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": 0.030923152342438698, |
|
"rewards/margins": 0.020262621343135834, |
|
"rewards/rejected": 0.010660530999302864, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.7204610951008645, |
|
"grad_norm": 27.251075744628906, |
|
"learning_rate": 4.038272613266419e-08, |
|
"logits/chosen": -1.6652915477752686, |
|
"logits/rejected": -1.6540310382843018, |
|
"logps/chosen": -53.43489456176758, |
|
"logps/rejected": -56.68244171142578, |
|
"loss": 0.6821, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.03513840585947037, |
|
"rewards/margins": 0.024065181612968445, |
|
"rewards/rejected": 0.011073225177824497, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.7276657060518732, |
|
"grad_norm": 32.4438362121582, |
|
"learning_rate": 4.0133670545740014e-08, |
|
"logits/chosen": -1.671142578125, |
|
"logits/rejected": -1.6590646505355835, |
|
"logps/chosen": -48.778076171875, |
|
"logps/rejected": -51.36602020263672, |
|
"loss": 0.6815, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.03226137161254883, |
|
"rewards/margins": 0.0248870812356472, |
|
"rewards/rejected": 0.007374290376901627, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.7348703170028819, |
|
"grad_norm": 26.294254302978516, |
|
"learning_rate": 3.988222134860755e-08, |
|
"logits/chosen": -1.6925846338272095, |
|
"logits/rejected": -1.6816070079803467, |
|
"logps/chosen": -52.785797119140625, |
|
"logps/rejected": -55.186126708984375, |
|
"loss": 0.6878, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": 0.02925296127796173, |
|
"rewards/margins": 0.011979686096310616, |
|
"rewards/rejected": 0.017273275181651115, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.7420749279538905, |
|
"grad_norm": 24.478652954101562, |
|
"learning_rate": 3.962841831161617e-08, |
|
"logits/chosen": -1.619998574256897, |
|
"logits/rejected": -1.6116468906402588, |
|
"logps/chosen": -49.49003982543945, |
|
"logps/rejected": -54.25371170043945, |
|
"loss": 0.678, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": 0.04156359285116196, |
|
"rewards/margins": 0.032577551901340485, |
|
"rewards/rejected": 0.008986040018498898, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.7492795389048992, |
|
"grad_norm": 23.191585540771484, |
|
"learning_rate": 3.937230157740931e-08, |
|
"logits/chosen": -1.6642029285430908, |
|
"logits/rejected": -1.6495530605316162, |
|
"logps/chosen": -52.95183563232422, |
|
"logps/rejected": -55.2051887512207, |
|
"loss": 0.6818, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": 0.033531554043293, |
|
"rewards/margins": 0.024701178073883057, |
|
"rewards/rejected": 0.008830374106764793, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.7564841498559077, |
|
"grad_norm": 22.424583435058594, |
|
"learning_rate": 3.9113911654575246e-08, |
|
"logits/chosen": -1.5803316831588745, |
|
"logits/rejected": -1.5679031610488892, |
|
"logps/chosen": -47.259952545166016, |
|
"logps/rejected": -51.2758903503418, |
|
"loss": 0.678, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.03728173300623894, |
|
"rewards/margins": 0.03241972625255585, |
|
"rewards/rejected": 0.004862007685005665, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.7636887608069164, |
|
"grad_norm": 27.110639572143555, |
|
"learning_rate": 3.885328941124014e-08, |
|
"logits/chosen": -1.6533464193344116, |
|
"logits/rejected": -1.6282085180282593, |
|
"logps/chosen": -54.238807678222656, |
|
"logps/rejected": -55.80359649658203, |
|
"loss": 0.6777, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": 0.037798795849084854, |
|
"rewards/margins": 0.03302844241261482, |
|
"rewards/rejected": 0.00477034505456686, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.770893371757925, |
|
"grad_norm": 34.7132453918457, |
|
"learning_rate": 3.8590476068604106e-08, |
|
"logits/chosen": -1.6385762691497803, |
|
"logits/rejected": -1.6310360431671143, |
|
"logps/chosen": -61.6613883972168, |
|
"logps/rejected": -66.19737243652344, |
|
"loss": 0.6774, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": 0.04412104934453964, |
|
"rewards/margins": 0.03382264822721481, |
|
"rewards/rejected": 0.010298402979969978, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.7780979827089337, |
|
"grad_norm": 27.947389602661133, |
|
"learning_rate": 3.832551319442151e-08, |
|
"logits/chosen": -1.6364872455596924, |
|
"logits/rejected": -1.6301469802856445, |
|
"logps/chosen": -55.05015182495117, |
|
"logps/rejected": -59.003196716308594, |
|
"loss": 0.6751, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": 0.04528949409723282, |
|
"rewards/margins": 0.038304783403873444, |
|
"rewards/rejected": 0.006984707899391651, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.7853025936599424, |
|
"grad_norm": 32.10716247558594, |
|
"learning_rate": 3.8058442696426404e-08, |
|
"logits/chosen": -1.6696398258209229, |
|
"logits/rejected": -1.6630815267562866, |
|
"logps/chosen": -60.15520095825195, |
|
"logps/rejected": -64.48773193359375, |
|
"loss": 0.6787, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.02938222512602806, |
|
"rewards/margins": 0.031164387241005898, |
|
"rewards/rejected": -0.001782161882147193, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.792507204610951, |
|
"grad_norm": 30.23760986328125, |
|
"learning_rate": 3.7789306815704216e-08, |
|
"logits/chosen": -1.6817725896835327, |
|
"logits/rejected": -1.6605682373046875, |
|
"logps/chosen": -53.92103958129883, |
|
"logps/rejected": -56.1260986328125, |
|
"loss": 0.6779, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": 0.041012417525053024, |
|
"rewards/margins": 0.03267299756407738, |
|
"rewards/rejected": 0.00833942275494337, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.7997118155619597, |
|
"grad_norm": 20.537240982055664, |
|
"learning_rate": 3.7518148120010705e-08, |
|
"logits/chosen": -1.6919755935668945, |
|
"logits/rejected": -1.6787545680999756, |
|
"logps/chosen": -53.570655822753906, |
|
"logps/rejected": -56.57023239135742, |
|
"loss": 0.6792, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.03128691390156746, |
|
"rewards/margins": 0.03024480864405632, |
|
"rewards/rejected": 0.0010421050246804953, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.8069164265129684, |
|
"grad_norm": 23.85075569152832, |
|
"learning_rate": 3.7245009497039244e-08, |
|
"logits/chosen": -1.6372750997543335, |
|
"logits/rejected": -1.6247594356536865, |
|
"logps/chosen": -52.35149383544922, |
|
"logps/rejected": -53.97734832763672, |
|
"loss": 0.6766, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": 0.04169910401105881, |
|
"rewards/margins": 0.036101967096328735, |
|
"rewards/rejected": 0.005597130861133337, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.8141210374639769, |
|
"grad_norm": 27.246475219726562, |
|
"learning_rate": 3.696993414763753e-08, |
|
"logits/chosen": -1.6642570495605469, |
|
"logits/rejected": -1.658142328262329, |
|
"logps/chosen": -49.33148193359375, |
|
"logps/rejected": -51.482025146484375, |
|
"loss": 0.6839, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": 0.03087095357477665, |
|
"rewards/margins": 0.020789533853530884, |
|
"rewards/rejected": 0.010081417858600616, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.8213256484149856, |
|
"grad_norm": 26.61668586730957, |
|
"learning_rate": 3.66929655789747e-08, |
|
"logits/chosen": -1.706260323524475, |
|
"logits/rejected": -1.6993458271026611, |
|
"logps/chosen": -53.623802185058594, |
|
"logps/rejected": -56.636138916015625, |
|
"loss": 0.6747, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.04618995636701584, |
|
"rewards/margins": 0.03998780995607376, |
|
"rewards/rejected": 0.006202142685651779, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.8285302593659942, |
|
"grad_norm": 25.16993522644043, |
|
"learning_rate": 3.64141475976601e-08, |
|
"logits/chosen": -1.7065346240997314, |
|
"logits/rejected": -1.6957979202270508, |
|
"logps/chosen": -55.83368682861328, |
|
"logps/rejected": -58.4466667175293, |
|
"loss": 0.68, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.034841228276491165, |
|
"rewards/margins": 0.028890833258628845, |
|
"rewards/rejected": 0.005950393620878458, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.8357348703170029, |
|
"grad_norm": 28.79724884033203, |
|
"learning_rate": 3.61335243028146e-08, |
|
"logits/chosen": -1.6835393905639648, |
|
"logits/rejected": -1.6707643270492554, |
|
"logps/chosen": -58.164833068847656, |
|
"logps/rejected": -58.84022903442383, |
|
"loss": 0.6803, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.03287480026483536, |
|
"rewards/margins": 0.028582941740751266, |
|
"rewards/rejected": 0.004291852004826069, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.8429394812680115, |
|
"grad_norm": 31.446813583374023, |
|
"learning_rate": 3.585114007909562e-08, |
|
"logits/chosen": -1.6065078973770142, |
|
"logits/rejected": -1.5808089971542358, |
|
"logps/chosen": -52.96314239501953, |
|
"logps/rejected": -53.44304275512695, |
|
"loss": 0.681, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.03624821454286575, |
|
"rewards/margins": 0.0269475020468235, |
|
"rewards/rejected": 0.009300706908106804, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.8501440922190202, |
|
"grad_norm": 40.8033332824707, |
|
"learning_rate": 3.556703958967716e-08, |
|
"logits/chosen": -1.5730987787246704, |
|
"logits/rejected": -1.560154676437378, |
|
"logps/chosen": -55.611358642578125, |
|
"logps/rejected": -57.49493408203125, |
|
"loss": 0.6802, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": 0.03398990258574486, |
|
"rewards/margins": 0.02881578728556633, |
|
"rewards/rejected": 0.005174115765839815, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.8573487031700289, |
|
"grad_norm": 26.1713809967041, |
|
"learning_rate": 3.528126776918559e-08, |
|
"logits/chosen": -1.732617974281311, |
|
"logits/rejected": -1.7108558416366577, |
|
"logps/chosen": -56.743370056152344, |
|
"logps/rejected": -58.24882888793945, |
|
"loss": 0.6791, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": 0.032749153673648834, |
|
"rewards/margins": 0.03046230599284172, |
|
"rewards/rejected": 0.0022868472151458263, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.8645533141210374, |
|
"grad_norm": 30.190982818603516, |
|
"learning_rate": 3.499386981659262e-08, |
|
"logits/chosen": -1.6135402917861938, |
|
"logits/rejected": -1.5936330556869507, |
|
"logps/chosen": -59.51149368286133, |
|
"logps/rejected": -61.74272918701172, |
|
"loss": 0.678, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.03140435367822647, |
|
"rewards/margins": 0.03346829488873482, |
|
"rewards/rejected": -0.002063935622572899, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.8717579250720461, |
|
"grad_norm": 24.446578979492188, |
|
"learning_rate": 3.47048911880664e-08, |
|
"logits/chosen": -1.594386339187622, |
|
"logits/rejected": -1.592362642288208, |
|
"logps/chosen": -49.790889739990234, |
|
"logps/rejected": -52.867347717285156, |
|
"loss": 0.6794, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.03862444683909416, |
|
"rewards/margins": 0.030268633738160133, |
|
"rewards/rejected": 0.008355814963579178, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.8789625360230547, |
|
"grad_norm": 29.226476669311523, |
|
"learning_rate": 3.4414377589782e-08, |
|
"logits/chosen": -1.642496109008789, |
|
"logits/rejected": -1.6357898712158203, |
|
"logps/chosen": -53.193016052246094, |
|
"logps/rejected": -56.894073486328125, |
|
"loss": 0.6791, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": 0.03899763897061348, |
|
"rewards/margins": 0.03164661303162575, |
|
"rewards/rejected": 0.007351027335971594, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.8861671469740634, |
|
"grad_norm": 26.702550888061523, |
|
"learning_rate": 3.412237497069226e-08, |
|
"logits/chosen": -1.5900542736053467, |
|
"logits/rejected": -1.5684837102890015, |
|
"logps/chosen": -55.75627517700195, |
|
"logps/rejected": -58.2167854309082, |
|
"loss": 0.6743, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": 0.044974491000175476, |
|
"rewards/margins": 0.0419379398226738, |
|
"rewards/rejected": 0.003036542795598507, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.8933717579250721, |
|
"grad_norm": 33.24451446533203, |
|
"learning_rate": 3.382892951526036e-08, |
|
"logits/chosen": -1.647112250328064, |
|
"logits/rejected": -1.6350224018096924, |
|
"logps/chosen": -48.98326873779297, |
|
"logps/rejected": -52.603431701660156, |
|
"loss": 0.6709, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.04122794419527054, |
|
"rewards/margins": 0.04834694787859917, |
|
"rewards/rejected": -0.007119007408618927, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.9005763688760807, |
|
"grad_norm": 24.48476219177246, |
|
"learning_rate": 3.353408763615502e-08, |
|
"logits/chosen": -1.694043755531311, |
|
"logits/rejected": -1.6888548135757446, |
|
"logps/chosen": -58.39896774291992, |
|
"logps/rejected": -62.35167694091797, |
|
"loss": 0.6814, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": 0.035146381705999374, |
|
"rewards/margins": 0.0263100303709507, |
|
"rewards/rejected": 0.008836353197693825, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.9077809798270894, |
|
"grad_norm": 28.14899444580078, |
|
"learning_rate": 3.323789596690971e-08, |
|
"logits/chosen": -1.6525447368621826, |
|
"logits/rejected": -1.665614128112793, |
|
"logps/chosen": -53.552452087402344, |
|
"logps/rejected": -61.17711639404297, |
|
"loss": 0.6799, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": 0.029197026044130325, |
|
"rewards/margins": 0.029619354754686356, |
|
"rewards/rejected": -0.0004223290889058262, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.9149855907780979, |
|
"grad_norm": 28.011920928955078, |
|
"learning_rate": 3.294040135454681e-08, |
|
"logits/chosen": -1.5988832712173462, |
|
"logits/rejected": -1.5843169689178467, |
|
"logps/chosen": -52.511505126953125, |
|
"logps/rejected": -55.977783203125, |
|
"loss": 0.6722, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.04705361649394035, |
|
"rewards/margins": 0.04608291760087013, |
|
"rewards/rejected": 0.0009706999990157783, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.9221902017291066, |
|
"grad_norm": 33.35093307495117, |
|
"learning_rate": 3.264165085216817e-08, |
|
"logits/chosen": -1.7459869384765625, |
|
"logits/rejected": -1.7356443405151367, |
|
"logps/chosen": -53.0772590637207, |
|
"logps/rejected": -57.63134002685547, |
|
"loss": 0.6681, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": 0.05287221819162369, |
|
"rewards/margins": 0.05477180331945419, |
|
"rewards/rejected": -0.0018995910650119185, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.9293948126801153, |
|
"grad_norm": 24.03230857849121, |
|
"learning_rate": 3.2341691711512854e-08, |
|
"logits/chosen": -1.712523102760315, |
|
"logits/rejected": -1.708817720413208, |
|
"logps/chosen": -52.34474563598633, |
|
"logps/rejected": -56.7424430847168, |
|
"loss": 0.6729, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": 0.046087004244327545, |
|
"rewards/margins": 0.04450781270861626, |
|
"rewards/rejected": 0.0015791950281709433, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.9365994236311239, |
|
"grad_norm": 27.065187454223633, |
|
"learning_rate": 3.204057137548371e-08, |
|
"logits/chosen": -1.7500317096710205, |
|
"logits/rejected": -1.7493479251861572, |
|
"logps/chosen": -54.37158203125, |
|
"logps/rejected": -56.19264602661133, |
|
"loss": 0.6884, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": 0.0221403818577528, |
|
"rewards/margins": 0.011624941602349281, |
|
"rewards/rejected": 0.01051543839275837, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.9438040345821326, |
|
"grad_norm": 26.071910858154297, |
|
"learning_rate": 3.173833747064351e-08, |
|
"logits/chosen": -1.709240198135376, |
|
"logits/rejected": -1.7107799053192139, |
|
"logps/chosen": -47.57482147216797, |
|
"logps/rejected": -51.987648010253906, |
|
"loss": 0.6751, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.043011344969272614, |
|
"rewards/margins": 0.03987384960055351, |
|
"rewards/rejected": 0.00313749467022717, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.9510086455331412, |
|
"grad_norm": 28.993371963500977, |
|
"learning_rate": 3.143503779968213e-08, |
|
"logits/chosen": -1.6517963409423828, |
|
"logits/rejected": -1.636230230331421, |
|
"logps/chosen": -53.55908966064453, |
|
"logps/rejected": -56.618064880371094, |
|
"loss": 0.6714, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.047532517462968826, |
|
"rewards/margins": 0.04704531282186508, |
|
"rewards/rejected": 0.0004871990531682968, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.9582132564841499, |
|
"grad_norm": 27.038301467895508, |
|
"learning_rate": 3.113072033385589e-08, |
|
"logits/chosen": -1.6915805339813232, |
|
"logits/rejected": -1.6699497699737549, |
|
"logps/chosen": -58.74944305419922, |
|
"logps/rejected": -60.51899337768555, |
|
"loss": 0.6719, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": 0.04666688293218613, |
|
"rewards/margins": 0.04686695709824562, |
|
"rewards/rejected": -0.00020007490820717067, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.9654178674351584, |
|
"grad_norm": 28.07818031311035, |
|
"learning_rate": 3.082543320540015e-08, |
|
"logits/chosen": -1.6783336400985718, |
|
"logits/rejected": -1.675157904624939, |
|
"logps/chosen": -52.443756103515625, |
|
"logps/rejected": -56.30717849731445, |
|
"loss": 0.6767, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": 0.04153715819120407, |
|
"rewards/margins": 0.03781905025243759, |
|
"rewards/rejected": 0.0037181121297180653, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.9726224783861671, |
|
"grad_norm": 25.59157943725586, |
|
"learning_rate": 3.051922469991655e-08, |
|
"logits/chosen": -1.5502612590789795, |
|
"logits/rejected": -1.5441707372665405, |
|
"logps/chosen": -59.5379524230957, |
|
"logps/rejected": -61.09001541137695, |
|
"loss": 0.6723, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": 0.05017656087875366, |
|
"rewards/margins": 0.04551985114812851, |
|
"rewards/rejected": 0.004656708799302578, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.9798270893371758, |
|
"grad_norm": 32.95548629760742, |
|
"learning_rate": 3.0212143248735886e-08, |
|
"logits/chosen": -1.6339962482452393, |
|
"logits/rejected": -1.6290324926376343, |
|
"logps/chosen": -50.37676239013672, |
|
"logps/rejected": -53.350425720214844, |
|
"loss": 0.6692, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.052200425416231155, |
|
"rewards/margins": 0.051705501973629, |
|
"rewards/rejected": 0.0004949237918481231, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.9870317002881844, |
|
"grad_norm": 23.29275131225586, |
|
"learning_rate": 2.9904237421258046e-08, |
|
"logits/chosen": -1.6470777988433838, |
|
"logits/rejected": -1.6439279317855835, |
|
"logps/chosen": -50.2126579284668, |
|
"logps/rejected": -54.6882438659668, |
|
"loss": 0.6747, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.042076610028743744, |
|
"rewards/margins": 0.04092058166861534, |
|
"rewards/rejected": 0.0011560240527614951, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.9942363112391931, |
|
"grad_norm": 29.630380630493164, |
|
"learning_rate": 2.9595555917269997e-08, |
|
"logits/chosen": -1.5971949100494385, |
|
"logits/rejected": -1.5871821641921997, |
|
"logps/chosen": -53.988670349121094, |
|
"logps/rejected": -55.97953414916992, |
|
"loss": 0.6795, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.04265677556395531, |
|
"rewards/margins": 0.030483752489089966, |
|
"rewards/rejected": 0.012173019349575043, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 1.0014409221902016, |
|
"grad_norm": 29.612489700317383, |
|
"learning_rate": 2.928614755924327e-08, |
|
"logits/chosen": -1.7032960653305054, |
|
"logits/rejected": -1.698068618774414, |
|
"logps/chosen": -50.45367431640625, |
|
"logps/rejected": -53.488685607910156, |
|
"loss": 0.6765, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.0573929063975811, |
|
"rewards/margins": 0.039073482155799866, |
|
"rewards/rejected": 0.018319427967071533, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 1.0086455331412103, |
|
"grad_norm": 20.9265193939209, |
|
"learning_rate": 2.8976061284611908e-08, |
|
"logits/chosen": -1.5756412744522095, |
|
"logits/rejected": -1.564943552017212, |
|
"logps/chosen": -54.020263671875, |
|
"logps/rejected": -56.86461639404297, |
|
"loss": 0.6636, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.05064217373728752, |
|
"rewards/margins": 0.06473135948181152, |
|
"rewards/rejected": -0.01408919133245945, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.015850144092219, |
|
"grad_norm": 24.08535385131836, |
|
"learning_rate": 2.8665346138032327e-08, |
|
"logits/chosen": -1.6361067295074463, |
|
"logits/rejected": -1.6422010660171509, |
|
"logps/chosen": -49.97898483276367, |
|
"logps/rejected": -54.96118927001953, |
|
"loss": 0.6666, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": 0.05061709135770798, |
|
"rewards/margins": 0.05784117057919502, |
|
"rewards/rejected": -0.007224074099212885, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 1.0230547550432276, |
|
"grad_norm": 28.920146942138672, |
|
"learning_rate": 2.8354051263626227e-08, |
|
"logits/chosen": -1.6691162586212158, |
|
"logits/rejected": -1.6595113277435303, |
|
"logps/chosen": -56.175636291503906, |
|
"logps/rejected": -59.953834533691406, |
|
"loss": 0.6682, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.04565143585205078, |
|
"rewards/margins": 0.056333862245082855, |
|
"rewards/rejected": -0.010682420805096626, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 1.0302593659942363, |
|
"grad_norm": 28.18346405029297, |
|
"learning_rate": 2.8042225897207648e-08, |
|
"logits/chosen": -1.71315598487854, |
|
"logits/rejected": -1.7045456171035767, |
|
"logps/chosen": -44.08661651611328, |
|
"logps/rejected": -46.05047607421875, |
|
"loss": 0.6732, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": 0.044674523174762726, |
|
"rewards/margins": 0.045459240674972534, |
|
"rewards/rejected": -0.0007847134256735444, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 1.037463976945245, |
|
"grad_norm": 29.775287628173828, |
|
"learning_rate": 2.7729919358495728e-08, |
|
"logits/chosen": -1.654279351234436, |
|
"logits/rejected": -1.640808343887329, |
|
"logps/chosen": -64.65403747558594, |
|
"logps/rejected": -64.36153411865234, |
|
"loss": 0.6752, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.039084166288375854, |
|
"rewards/margins": 0.040439385920763016, |
|
"rewards/rejected": -0.0013552254531532526, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 1.0446685878962536, |
|
"grad_norm": 23.282686233520508, |
|
"learning_rate": 2.741718104331393e-08, |
|
"logits/chosen": -1.750061273574829, |
|
"logits/rejected": -1.7619024515151978, |
|
"logps/chosen": -48.79555130004883, |
|
"logps/rejected": -56.19548416137695, |
|
"loss": 0.6678, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": 0.03792846202850342, |
|
"rewards/margins": 0.057776786386966705, |
|
"rewards/rejected": -0.01984831690788269, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 1.0518731988472623, |
|
"grad_norm": 28.902856826782227, |
|
"learning_rate": 2.710406041577751e-08, |
|
"logits/chosen": -1.6216195821762085, |
|
"logits/rejected": -1.6019713878631592, |
|
"logps/chosen": -53.21555709838867, |
|
"logps/rejected": -58.60173797607422, |
|
"loss": 0.6597, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": 0.057058185338974, |
|
"rewards/margins": 0.07414309680461884, |
|
"rewards/rejected": -0.017084909602999687, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 1.059077809798271, |
|
"grad_norm": 27.661142349243164, |
|
"learning_rate": 2.679060700046994e-08, |
|
"logits/chosen": -1.7108032703399658, |
|
"logits/rejected": -1.69040846824646, |
|
"logps/chosen": -48.76203536987305, |
|
"logps/rejected": -52.77034378051758, |
|
"loss": 0.6657, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": 0.05403587222099304, |
|
"rewards/margins": 0.06079152971506119, |
|
"rewards/rejected": -0.006755647249519825, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 1.0662824207492796, |
|
"grad_norm": 24.555545806884766, |
|
"learning_rate": 2.647687037460996e-08, |
|
"logits/chosen": -1.650090217590332, |
|
"logits/rejected": -1.6332178115844727, |
|
"logps/chosen": -53.99782180786133, |
|
"logps/rejected": -56.94426727294922, |
|
"loss": 0.6725, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": 0.05287463590502739, |
|
"rewards/margins": 0.04700274020433426, |
|
"rewards/rejected": 0.005871894769370556, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 1.0734870317002883, |
|
"grad_norm": 35.28300857543945, |
|
"learning_rate": 2.616290016021016e-08, |
|
"logits/chosen": -1.62253737449646, |
|
"logits/rejected": -1.610269546508789, |
|
"logps/chosen": -61.49285888671875, |
|
"logps/rejected": -63.3193244934082, |
|
"loss": 0.6735, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": 0.045845337212085724, |
|
"rewards/margins": 0.04515889286994934, |
|
"rewards/rejected": 0.0006864480674266815, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 1.080691642651297, |
|
"grad_norm": 26.37901496887207, |
|
"learning_rate": 2.584874601622854e-08, |
|
"logits/chosen": -1.6861085891723633, |
|
"logits/rejected": -1.6631263494491577, |
|
"logps/chosen": -51.32836151123047, |
|
"logps/rejected": -54.0167236328125, |
|
"loss": 0.6717, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.05208291485905647, |
|
"rewards/margins": 0.04669463634490967, |
|
"rewards/rejected": 0.0053882719948887825, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.0878962536023056, |
|
"grad_norm": 29.798765182495117, |
|
"learning_rate": 2.5534457630714267e-08, |
|
"logits/chosen": -1.6969906091690063, |
|
"logits/rejected": -1.6932684183120728, |
|
"logps/chosen": -49.14203643798828, |
|
"logps/rejected": -53.022621154785156, |
|
"loss": 0.6649, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.05330803245306015, |
|
"rewards/margins": 0.06402654945850372, |
|
"rewards/rejected": -0.010718528181314468, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 1.0951008645533142, |
|
"grad_norm": 29.122047424316406, |
|
"learning_rate": 2.5220084712948764e-08, |
|
"logits/chosen": -1.6610676050186157, |
|
"logits/rejected": -1.6523653268814087, |
|
"logps/chosen": -60.46284103393555, |
|
"logps/rejected": -59.5699577331543, |
|
"loss": 0.6877, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 0.031362272799015045, |
|
"rewards/margins": 0.015023702755570412, |
|
"rewards/rejected": 0.016338571906089783, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 1.1023054755043227, |
|
"grad_norm": 29.082090377807617, |
|
"learning_rate": 2.490567698558343e-08, |
|
"logits/chosen": -1.706171989440918, |
|
"logits/rejected": -1.6938079595565796, |
|
"logps/chosen": -54.25682830810547, |
|
"logps/rejected": -58.964813232421875, |
|
"loss": 0.6691, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": 0.05163877084851265, |
|
"rewards/margins": 0.053050000220537186, |
|
"rewards/rejected": -0.0014112277422100306, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 1.1095100864553313, |
|
"grad_norm": 28.59368324279785, |
|
"learning_rate": 2.4591284176775326e-08, |
|
"logits/chosen": -1.7147783041000366, |
|
"logits/rejected": -1.702530860900879, |
|
"logps/chosen": -52.58336639404297, |
|
"logps/rejected": -56.28779983520508, |
|
"loss": 0.6688, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": 0.05651012808084488, |
|
"rewards/margins": 0.053883958607912064, |
|
"rewards/rejected": 0.002626165049150586, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 1.11671469740634, |
|
"grad_norm": 23.492542266845703, |
|
"learning_rate": 2.4276956012321926e-08, |
|
"logits/chosen": -1.6896263360977173, |
|
"logits/rejected": -1.6793534755706787, |
|
"logps/chosen": -53.504417419433594, |
|
"logps/rejected": -55.97063446044922, |
|
"loss": 0.6709, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.04645276069641113, |
|
"rewards/margins": 0.049985408782958984, |
|
"rewards/rejected": -0.0035326494835317135, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 1.1239193083573487, |
|
"grad_norm": 32.83745574951172, |
|
"learning_rate": 2.3962742207796268e-08, |
|
"logits/chosen": -1.727237343788147, |
|
"logits/rejected": -1.7086503505706787, |
|
"logps/chosen": -54.65483856201172, |
|
"logps/rejected": -58.435203552246094, |
|
"loss": 0.6647, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": 0.05584675073623657, |
|
"rewards/margins": 0.06230972334742546, |
|
"rewards/rejected": -0.0064629726111888885, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 1.1311239193083573, |
|
"grad_norm": 21.926939010620117, |
|
"learning_rate": 2.364869246068368e-08, |
|
"logits/chosen": -1.7023918628692627, |
|
"logits/rejected": -1.6883018016815186, |
|
"logps/chosen": -56.33196258544922, |
|
"logps/rejected": -59.65509796142578, |
|
"loss": 0.6738, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": 0.03886229544878006, |
|
"rewards/margins": 0.04508962482213974, |
|
"rewards/rejected": -0.006227326579391956, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 1.138328530259366, |
|
"grad_norm": 33.17292022705078, |
|
"learning_rate": 2.3334856442521435e-08, |
|
"logits/chosen": -1.6609952449798584, |
|
"logits/rejected": -1.6502721309661865, |
|
"logps/chosen": -56.61347198486328, |
|
"logps/rejected": -60.70231246948242, |
|
"loss": 0.6603, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": 0.05509304255247116, |
|
"rewards/margins": 0.07247869670391083, |
|
"rewards/rejected": -0.017385641112923622, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 1.1455331412103746, |
|
"grad_norm": 34.729270935058594, |
|
"learning_rate": 2.3021283791042474e-08, |
|
"logits/chosen": -1.6414772272109985, |
|
"logits/rejected": -1.629290223121643, |
|
"logps/chosen": -52.38581085205078, |
|
"logps/rejected": -57.90489959716797, |
|
"loss": 0.6635, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": 0.04822907969355583, |
|
"rewards/margins": 0.06632965803146362, |
|
"rewards/rejected": -0.018100585788488388, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 1.1527377521613833, |
|
"grad_norm": 29.043781280517578, |
|
"learning_rate": 2.2708024102324454e-08, |
|
"logits/chosen": -1.689874291419983, |
|
"logits/rejected": -1.6721159219741821, |
|
"logps/chosen": -56.054420471191406, |
|
"logps/rejected": -58.819847106933594, |
|
"loss": 0.6717, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.05031623691320419, |
|
"rewards/margins": 0.04818098247051239, |
|
"rewards/rejected": 0.002135257935151458, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.159942363112392, |
|
"grad_norm": 29.580005645751953, |
|
"learning_rate": 2.23951269229454e-08, |
|
"logits/chosen": -1.613242745399475, |
|
"logits/rejected": -1.6088173389434814, |
|
"logps/chosen": -53.486328125, |
|
"logps/rejected": -56.55217742919922, |
|
"loss": 0.6679, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.04551651328802109, |
|
"rewards/margins": 0.05793655663728714, |
|
"rewards/rejected": -0.012420037761330605, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 1.1671469740634006, |
|
"grad_norm": 31.668487548828125, |
|
"learning_rate": 2.2082641742147238e-08, |
|
"logits/chosen": -1.7278293371200562, |
|
"logits/rejected": -1.7244508266448975, |
|
"logps/chosen": -56.515350341796875, |
|
"logps/rejected": -61.60710906982422, |
|
"loss": 0.6682, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": 0.0525764599442482, |
|
"rewards/margins": 0.054812002927064896, |
|
"rewards/rejected": -0.002235544379800558, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 1.1743515850144093, |
|
"grad_norm": 39.19826889038086, |
|
"learning_rate": 2.177061798400832e-08, |
|
"logits/chosen": -1.5428308248519897, |
|
"logits/rejected": -1.5289558172225952, |
|
"logps/chosen": -58.390045166015625, |
|
"logps/rejected": -58.40192794799805, |
|
"loss": 0.6704, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": 0.04328244924545288, |
|
"rewards/margins": 0.050250399857759476, |
|
"rewards/rejected": -0.006967948284000158, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 1.181556195965418, |
|
"grad_norm": 26.75935935974121, |
|
"learning_rate": 2.145910499962628e-08, |
|
"logits/chosen": -1.6505706310272217, |
|
"logits/rejected": -1.6422561407089233, |
|
"logps/chosen": -52.12128448486328, |
|
"logps/rejected": -53.86430740356445, |
|
"loss": 0.666, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": 0.057096611708402634, |
|
"rewards/margins": 0.06089169532060623, |
|
"rewards/rejected": -0.0037950840778648853, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 1.1887608069164266, |
|
"grad_norm": 26.792505264282227, |
|
"learning_rate": 2.1148152059312437e-08, |
|
"logits/chosen": -1.6734596490859985, |
|
"logits/rejected": -1.663163185119629, |
|
"logps/chosen": -46.515968322753906, |
|
"logps/rejected": -47.968421936035156, |
|
"loss": 0.6765, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.039860546588897705, |
|
"rewards/margins": 0.039603374898433685, |
|
"rewards/rejected": 0.000257173553109169, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 1.195965417867435, |
|
"grad_norm": 25.975000381469727, |
|
"learning_rate": 2.0837808344799028e-08, |
|
"logits/chosen": -1.5417954921722412, |
|
"logits/rejected": -1.5429438352584839, |
|
"logps/chosen": -52.71075439453125, |
|
"logps/rejected": -55.24102783203125, |
|
"loss": 0.668, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.04647735133767128, |
|
"rewards/margins": 0.057897698134183884, |
|
"rewards/rejected": -0.01142034586519003, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 1.2031700288184437, |
|
"grad_norm": 31.299114227294922, |
|
"learning_rate": 2.052812294146033e-08, |
|
"logits/chosen": -1.680318832397461, |
|
"logits/rejected": -1.673017144203186, |
|
"logps/chosen": -52.15236282348633, |
|
"logps/rejected": -56.82316970825195, |
|
"loss": 0.6659, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.05961848050355911, |
|
"rewards/margins": 0.062046296894550323, |
|
"rewards/rejected": -0.002427825704216957, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 1.2103746397694524, |
|
"grad_norm": 31.15201759338379, |
|
"learning_rate": 2.0219144830549163e-08, |
|
"logits/chosen": -1.5866892337799072, |
|
"logits/rejected": -1.5717523097991943, |
|
"logps/chosen": -52.0449333190918, |
|
"logps/rejected": -57.047821044921875, |
|
"loss": 0.6621, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": 0.05372787266969681, |
|
"rewards/margins": 0.06935948133468628, |
|
"rewards/rejected": -0.015631603077054024, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 1.217579250720461, |
|
"grad_norm": 25.879758834838867, |
|
"learning_rate": 1.9910922881449716e-08, |
|
"logits/chosen": -1.65450918674469, |
|
"logits/rejected": -1.646868109703064, |
|
"logps/chosen": -57.28217697143555, |
|
"logps/rejected": -59.71793746948242, |
|
"loss": 0.6631, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": 0.06054146960377693, |
|
"rewards/margins": 0.06607099622488022, |
|
"rewards/rejected": -0.005529527552425861, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 1.2247838616714697, |
|
"grad_norm": 27.07415771484375, |
|
"learning_rate": 1.9603505843948214e-08, |
|
"logits/chosen": -1.6178529262542725, |
|
"logits/rejected": -1.6128696203231812, |
|
"logps/chosen": -56.807891845703125, |
|
"logps/rejected": -60.698814392089844, |
|
"loss": 0.6603, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": 0.06525342166423798, |
|
"rewards/margins": 0.0739850401878357, |
|
"rewards/rejected": -0.008731614798307419, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.2319884726224783, |
|
"grad_norm": 31.243019104003906, |
|
"learning_rate": 1.929694234052239e-08, |
|
"logits/chosen": -1.7153377532958984, |
|
"logits/rejected": -1.6968237161636353, |
|
"logps/chosen": -48.6357421875, |
|
"logps/rejected": -53.58130645751953, |
|
"loss": 0.6613, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.059562791138887405, |
|
"rewards/margins": 0.0704553872346878, |
|
"rewards/rejected": -0.010892586782574654, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 1.239193083573487, |
|
"grad_norm": 21.567594528198242, |
|
"learning_rate": 1.8991280858651157e-08, |
|
"logits/chosen": -1.6218206882476807, |
|
"logits/rejected": -1.6051326990127563, |
|
"logps/chosen": -50.012847900390625, |
|
"logps/rejected": -52.8299674987793, |
|
"loss": 0.6674, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.04892825335264206, |
|
"rewards/margins": 0.06088585779070854, |
|
"rewards/rejected": -0.011957600712776184, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 1.2463976945244957, |
|
"grad_norm": 32.33564758300781, |
|
"learning_rate": 1.868656974314557e-08, |
|
"logits/chosen": -1.6614080667495728, |
|
"logits/rejected": -1.6554081439971924, |
|
"logps/chosen": -52.624114990234375, |
|
"logps/rejected": -55.892662048339844, |
|
"loss": 0.6637, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.04592607915401459, |
|
"rewards/margins": 0.06650461256504059, |
|
"rewards/rejected": -0.020578527823090553, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 1.2536023054755043, |
|
"grad_norm": 23.595190048217773, |
|
"learning_rate": 1.8382857188502422e-08, |
|
"logits/chosen": -1.6442153453826904, |
|
"logits/rejected": -1.6373510360717773, |
|
"logps/chosen": -56.04620361328125, |
|
"logps/rejected": -59.06785202026367, |
|
"loss": 0.6666, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.04996069148182869, |
|
"rewards/margins": 0.06172338128089905, |
|
"rewards/rejected": -0.011762691661715508, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 1.260806916426513, |
|
"grad_norm": 30.709379196166992, |
|
"learning_rate": 1.8080191231281594e-08, |
|
"logits/chosen": -1.5838955640792847, |
|
"logits/rejected": -1.5548055171966553, |
|
"logps/chosen": -57.59186935424805, |
|
"logps/rejected": -56.7793083190918, |
|
"loss": 0.6561, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.06202070042490959, |
|
"rewards/margins": 0.08498839288949966, |
|
"rewards/rejected": -0.022967690601944923, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 1.2680115273775217, |
|
"grad_norm": 27.285667419433594, |
|
"learning_rate": 1.7778619742508345e-08, |
|
"logits/chosen": -1.6928040981292725, |
|
"logits/rejected": -1.6915152072906494, |
|
"logps/chosen": -58.45740509033203, |
|
"logps/rejected": -61.478302001953125, |
|
"loss": 0.6655, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.03818890452384949, |
|
"rewards/margins": 0.0651729553937912, |
|
"rewards/rejected": -0.026984045282006264, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 1.2752161383285303, |
|
"grad_norm": 23.404830932617188, |
|
"learning_rate": 1.7478190420101796e-08, |
|
"logits/chosen": -1.7029117345809937, |
|
"logits/rejected": -1.6898205280303955, |
|
"logps/chosen": -50.935115814208984, |
|
"logps/rejected": -55.13124465942383, |
|
"loss": 0.6696, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": 0.0503745898604393, |
|
"rewards/margins": 0.054055869579315186, |
|
"rewards/rejected": -0.0036812766920775175, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 1.282420749279539, |
|
"grad_norm": 35.39152145385742, |
|
"learning_rate": 1.717895078133088e-08, |
|
"logits/chosen": -1.6763395071029663, |
|
"logits/rejected": -1.6591565608978271, |
|
"logps/chosen": -61.310096740722656, |
|
"logps/rejected": -61.61467742919922, |
|
"loss": 0.6758, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.044988058507442474, |
|
"rewards/margins": 0.03980833292007446, |
|
"rewards/rejected": 0.005179721862077713, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 1.2896253602305476, |
|
"grad_norm": 24.02739715576172, |
|
"learning_rate": 1.688094815529873e-08, |
|
"logits/chosen": -1.6351617574691772, |
|
"logits/rejected": -1.6387808322906494, |
|
"logps/chosen": -53.970558166503906, |
|
"logps/rejected": -56.1927375793457, |
|
"loss": 0.6825, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": 0.047341324388980865, |
|
"rewards/margins": 0.029565196484327316, |
|
"rewards/rejected": 0.017776133492588997, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 1.2968299711815563, |
|
"grad_norm": 32.1756706237793, |
|
"learning_rate": 1.658422967545693e-08, |
|
"logits/chosen": -1.718798041343689, |
|
"logits/rejected": -1.7084662914276123, |
|
"logps/chosen": -52.566993713378906, |
|
"logps/rejected": -59.37955856323242, |
|
"loss": 0.6642, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": 0.05607623979449272, |
|
"rewards/margins": 0.06564650684595108, |
|
"rewards/rejected": -0.009570261463522911, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.304034582132565, |
|
"grad_norm": 20.695886611938477, |
|
"learning_rate": 1.6288842272150614e-08, |
|
"logits/chosen": -1.611883521080017, |
|
"logits/rejected": -1.61124587059021, |
|
"logps/chosen": -52.52956008911133, |
|
"logps/rejected": -56.787391662597656, |
|
"loss": 0.6662, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": 0.0512949600815773, |
|
"rewards/margins": 0.05934779718518257, |
|
"rewards/rejected": -0.008052836172282696, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 1.3112391930835736, |
|
"grad_norm": 31.054288864135742, |
|
"learning_rate": 1.5994832665195853e-08, |
|
"logits/chosen": -1.7016429901123047, |
|
"logits/rejected": -1.691511869430542, |
|
"logps/chosen": -46.15959930419922, |
|
"logps/rejected": -51.521705627441406, |
|
"loss": 0.6549, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.06147737428545952, |
|
"rewards/margins": 0.08640258014202118, |
|
"rewards/rejected": -0.024925213307142258, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 1.318443804034582, |
|
"grad_norm": 22.64872932434082, |
|
"learning_rate": 1.5702247356490134e-08, |
|
"logits/chosen": -1.6389744281768799, |
|
"logits/rejected": -1.639120101928711, |
|
"logps/chosen": -50.16230773925781, |
|
"logps/rejected": -53.1915283203125, |
|
"loss": 0.6641, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": 0.04536745697259903, |
|
"rewards/margins": 0.06614157557487488, |
|
"rewards/rejected": -0.02077411487698555, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 1.3256484149855907, |
|
"grad_norm": 20.916744232177734, |
|
"learning_rate": 1.541113262265748e-08, |
|
"logits/chosen": -1.6537967920303345, |
|
"logits/rejected": -1.6307786703109741, |
|
"logps/chosen": -54.82042694091797, |
|
"logps/rejected": -56.67564010620117, |
|
"loss": 0.6661, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.04908507689833641, |
|
"rewards/margins": 0.0638277605175972, |
|
"rewards/rejected": -0.014742677100002766, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 1.3328530259365994, |
|
"grad_norm": 28.75771141052246, |
|
"learning_rate": 1.5121534507729073e-08, |
|
"logits/chosen": -1.7000787258148193, |
|
"logits/rejected": -1.6867382526397705, |
|
"logps/chosen": -50.214149475097656, |
|
"logps/rejected": -54.311065673828125, |
|
"loss": 0.6588, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.05089396983385086, |
|
"rewards/margins": 0.07929139584302902, |
|
"rewards/rejected": -0.02839742600917816, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 1.340057636887608, |
|
"grad_norm": 29.51345443725586, |
|
"learning_rate": 1.4833498815860756e-08, |
|
"logits/chosen": -1.6562092304229736, |
|
"logits/rejected": -1.6471668481826782, |
|
"logps/chosen": -56.003273010253906, |
|
"logps/rejected": -61.8942985534668, |
|
"loss": 0.6588, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.05405973643064499, |
|
"rewards/margins": 0.0779876634478569, |
|
"rewards/rejected": -0.023927928879857063, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 1.3472622478386167, |
|
"grad_norm": 24.563007354736328, |
|
"learning_rate": 1.4547071104088443e-08, |
|
"logits/chosen": -1.6281534433364868, |
|
"logits/rejected": -1.60134756565094, |
|
"logps/chosen": -48.80409622192383, |
|
"logps/rejected": -52.90191650390625, |
|
"loss": 0.6614, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": 0.05820552632212639, |
|
"rewards/margins": 0.07069076597690582, |
|
"rewards/rejected": -0.01248523872345686, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 1.3544668587896254, |
|
"grad_norm": 28.101259231567383, |
|
"learning_rate": 1.4262296675122592e-08, |
|
"logits/chosen": -1.6599979400634766, |
|
"logits/rejected": -1.6438566446304321, |
|
"logps/chosen": -55.794822692871094, |
|
"logps/rejected": -57.25176239013672, |
|
"loss": 0.6673, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.0343967080116272, |
|
"rewards/margins": 0.060233019292354584, |
|
"rewards/rejected": -0.025836322456598282, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 1.361671469740634, |
|
"grad_norm": 28.072351455688477, |
|
"learning_rate": 1.3979220570182902e-08, |
|
"logits/chosen": -1.59923255443573, |
|
"logits/rejected": -1.5962598323822021, |
|
"logps/chosen": -52.601112365722656, |
|
"logps/rejected": -57.248451232910156, |
|
"loss": 0.6646, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.045020636171102524, |
|
"rewards/margins": 0.06425632536411285, |
|
"rewards/rejected": -0.019235694780945778, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 1.3688760806916427, |
|
"grad_norm": 20.26775360107422, |
|
"learning_rate": 1.369788756187445e-08, |
|
"logits/chosen": -1.6709423065185547, |
|
"logits/rejected": -1.6719890832901, |
|
"logps/chosen": -51.86114501953125, |
|
"logps/rejected": -55.500762939453125, |
|
"loss": 0.6668, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.05930880457162857, |
|
"rewards/margins": 0.060519617050886154, |
|
"rewards/rejected": -0.0012108208611607552, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.3760806916426513, |
|
"grad_norm": 26.241369247436523, |
|
"learning_rate": 1.3418342147106212e-08, |
|
"logits/chosen": -1.7067817449569702, |
|
"logits/rejected": -1.706256628036499, |
|
"logps/chosen": -52.70702362060547, |
|
"logps/rejected": -58.025123596191406, |
|
"loss": 0.6565, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.05462120845913887, |
|
"rewards/margins": 0.08217908442020416, |
|
"rewards/rejected": -0.027557870373129845, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 1.38328530259366, |
|
"grad_norm": 25.819034576416016, |
|
"learning_rate": 1.3140628540053218e-08, |
|
"logits/chosen": -1.726575255393982, |
|
"logits/rejected": -1.7202606201171875, |
|
"logps/chosen": -43.758033752441406, |
|
"logps/rejected": -49.98960494995117, |
|
"loss": 0.6633, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": 0.05085862800478935, |
|
"rewards/margins": 0.06844624131917953, |
|
"rewards/rejected": -0.01758761703968048, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 1.3904899135446687, |
|
"grad_norm": 36.01348114013672, |
|
"learning_rate": 1.286479066516345e-08, |
|
"logits/chosen": -1.5930241346359253, |
|
"logits/rejected": -1.5837476253509521, |
|
"logps/chosen": -59.70249557495117, |
|
"logps/rejected": -61.39208984375, |
|
"loss": 0.6697, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.059021733701229095, |
|
"rewards/margins": 0.05476250499486923, |
|
"rewards/rejected": 0.004259222187101841, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 1.397694524495677, |
|
"grad_norm": 24.216604232788086, |
|
"learning_rate": 1.2590872150210574e-08, |
|
"logits/chosen": -1.7261098623275757, |
|
"logits/rejected": -1.724927544593811, |
|
"logps/chosen": -49.624183654785156, |
|
"logps/rejected": -52.33625030517578, |
|
"loss": 0.6671, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.05638096481561661, |
|
"rewards/margins": 0.06055796891450882, |
|
"rewards/rejected": -0.004177004564553499, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 1.4048991354466858, |
|
"grad_norm": 28.147171020507812, |
|
"learning_rate": 1.2318916319393555e-08, |
|
"logits/chosen": -1.6773601770401, |
|
"logits/rejected": -1.669012427330017, |
|
"logps/chosen": -50.91980743408203, |
|
"logps/rejected": -54.6119499206543, |
|
"loss": 0.6592, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": 0.056795258074998856, |
|
"rewards/margins": 0.07905852049589157, |
|
"rewards/rejected": -0.022263258695602417, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 1.4121037463976944, |
|
"grad_norm": 30.398366928100586, |
|
"learning_rate": 1.2048966186484282e-08, |
|
"logits/chosen": -1.6361877918243408, |
|
"logits/rejected": -1.6289517879486084, |
|
"logps/chosen": -52.20341873168945, |
|
"logps/rejected": -57.09885787963867, |
|
"loss": 0.6561, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": 0.060558564960956573, |
|
"rewards/margins": 0.08404217660427094, |
|
"rewards/rejected": -0.023483622819185257, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 1.419308357348703, |
|
"grad_norm": 32.82428741455078, |
|
"learning_rate": 1.1781064448024333e-08, |
|
"logits/chosen": -1.6930503845214844, |
|
"logits/rejected": -1.6847530603408813, |
|
"logps/chosen": -46.96501159667969, |
|
"logps/rejected": -51.72229766845703, |
|
"loss": 0.6515, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.060513317584991455, |
|
"rewards/margins": 0.09518440812826157, |
|
"rewards/rejected": -0.03467109426856041, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 1.4265129682997117, |
|
"grad_norm": 28.581178665161133, |
|
"learning_rate": 1.1515253476571923e-08, |
|
"logits/chosen": -1.656628966331482, |
|
"logits/rejected": -1.655122995376587, |
|
"logps/chosen": -51.16161346435547, |
|
"logps/rejected": -56.41352081298828, |
|
"loss": 0.6693, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.04029519110918045, |
|
"rewards/margins": 0.05936024710536003, |
|
"rewards/rejected": -0.019065069034695625, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 1.4337175792507204, |
|
"grad_norm": 32.20552062988281, |
|
"learning_rate": 1.1251575314000034e-08, |
|
"logits/chosen": -1.6694313287734985, |
|
"logits/rejected": -1.6587368249893188, |
|
"logps/chosen": -49.67891311645508, |
|
"logps/rejected": -52.66162872314453, |
|
"loss": 0.6647, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.060387950390577316, |
|
"rewards/margins": 0.06579816341400146, |
|
"rewards/rejected": -0.00541021628305316, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 1.440922190201729, |
|
"grad_norm": 19.01755142211914, |
|
"learning_rate": 1.0990071664846861e-08, |
|
"logits/chosen": -1.7050960063934326, |
|
"logits/rejected": -1.6924508810043335, |
|
"logps/chosen": -48.6504020690918, |
|
"logps/rejected": -55.10358810424805, |
|
"loss": 0.6532, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.05204867571592331, |
|
"rewards/margins": 0.09019048511981964, |
|
"rewards/rejected": -0.03814180940389633, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.4481268011527377, |
|
"grad_norm": 29.312150955200195, |
|
"learning_rate": 1.0730783889719711e-08, |
|
"logits/chosen": -1.6206512451171875, |
|
"logits/rejected": -1.6064426898956299, |
|
"logps/chosen": -50.724178314208984, |
|
"logps/rejected": -54.427764892578125, |
|
"loss": 0.6648, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": 0.04072165489196777, |
|
"rewards/margins": 0.06882860511541367, |
|
"rewards/rejected": -0.02810695208609104, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 1.4553314121037464, |
|
"grad_norm": 36.57780456542969, |
|
"learning_rate": 1.0473752998753114e-08, |
|
"logits/chosen": -1.7069648504257202, |
|
"logits/rejected": -1.7023489475250244, |
|
"logps/chosen": -52.97856903076172, |
|
"logps/rejected": -55.6297607421875, |
|
"loss": 0.6662, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": 0.0466485396027565, |
|
"rewards/margins": 0.06170258671045303, |
|
"rewards/rejected": -0.015054039657115936, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 1.462536023054755, |
|
"grad_norm": 31.10540008544922, |
|
"learning_rate": 1.0219019645122575e-08, |
|
"logits/chosen": -1.72427237033844, |
|
"logits/rejected": -1.7157777547836304, |
|
"logps/chosen": -52.942588806152344, |
|
"logps/rejected": -57.13978958129883, |
|
"loss": 0.6698, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": 0.052157748490571976, |
|
"rewards/margins": 0.05456269904971123, |
|
"rewards/rejected": -0.0024049447383731604, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 1.4697406340057637, |
|
"grad_norm": 35.095802307128906, |
|
"learning_rate": 9.966624118614611e-09, |
|
"logits/chosen": -1.6719284057617188, |
|
"logits/rejected": -1.6706676483154297, |
|
"logps/chosen": -57.96167755126953, |
|
"logps/rejected": -62.491973876953125, |
|
"loss": 0.6654, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.051690973341464996, |
|
"rewards/margins": 0.0684889405965805, |
|
"rewards/rejected": -0.016797970980405807, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 1.4769452449567724, |
|
"grad_norm": 28.057044982910156, |
|
"learning_rate": 9.71660633925438e-09, |
|
"logits/chosen": -1.6711444854736328, |
|
"logits/rejected": -1.6477901935577393, |
|
"logps/chosen": -57.71684646606445, |
|
"logps/rejected": -60.13401412963867, |
|
"loss": 0.6615, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.04812953621149063, |
|
"rewards/margins": 0.07305117696523666, |
|
"rewards/rejected": -0.024921633303165436, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 1.484149855907781, |
|
"grad_norm": 25.635986328125, |
|
"learning_rate": 9.469005850991705e-09, |
|
"logits/chosen": -1.6958105564117432, |
|
"logits/rejected": -1.673832654953003, |
|
"logps/chosen": -50.827880859375, |
|
"logps/rejected": -51.07634735107422, |
|
"loss": 0.666, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": 0.034776292741298676, |
|
"rewards/margins": 0.06257718801498413, |
|
"rewards/rejected": -0.027800898998975754, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 1.4913544668587897, |
|
"grad_norm": 36.9734992980957, |
|
"learning_rate": 9.223861815446682e-09, |
|
"logits/chosen": -1.7042324542999268, |
|
"logits/rejected": -1.6777251958847046, |
|
"logps/chosen": -58.7913703918457, |
|
"logps/rejected": -60.8325080871582, |
|
"loss": 0.6662, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.034731216728687286, |
|
"rewards/margins": 0.062271714210510254, |
|
"rewards/rejected": -0.027540501207113266, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 1.4985590778097984, |
|
"grad_norm": 26.555492401123047, |
|
"learning_rate": 8.981213005715627e-09, |
|
"logits/chosen": -1.5820460319519043, |
|
"logits/rejected": -1.5792698860168457, |
|
"logps/chosen": -55.81233596801758, |
|
"logps/rejected": -60.40593338012695, |
|
"loss": 0.6603, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": 0.05028228089213371, |
|
"rewards/margins": 0.07836754620075226, |
|
"rewards/rejected": -0.02808527648448944, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 1.505763688760807, |
|
"grad_norm": 24.672306060791016, |
|
"learning_rate": 8.741097800238617e-09, |
|
"logits/chosen": -1.6848942041397095, |
|
"logits/rejected": -1.6710937023162842, |
|
"logps/chosen": -48.45435333251953, |
|
"logps/rejected": -52.852561950683594, |
|
"loss": 0.6598, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": 0.04019053652882576, |
|
"rewards/margins": 0.07907537370920181, |
|
"rewards/rejected": -0.03888483718037605, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 1.5129682997118157, |
|
"grad_norm": 31.16644859313965, |
|
"learning_rate": 8.503554176729341e-09, |
|
"logits/chosen": -1.6204407215118408, |
|
"logits/rejected": -1.6291402578353882, |
|
"logps/chosen": -47.65703582763672, |
|
"logps/rejected": -51.62767791748047, |
|
"loss": 0.6664, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": 0.05907278507947922, |
|
"rewards/margins": 0.06321103870868683, |
|
"rewards/rejected": -0.00413826247677207, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.5201729106628243, |
|
"grad_norm": 26.00917625427246, |
|
"learning_rate": 8.268619706168376e-09, |
|
"logits/chosen": -1.6322330236434937, |
|
"logits/rejected": -1.6150935888290405, |
|
"logps/chosen": -50.94877624511719, |
|
"logps/rejected": -54.5855598449707, |
|
"loss": 0.6603, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.05506844073534012, |
|
"rewards/margins": 0.07977042347192764, |
|
"rewards/rejected": -0.024701988324522972, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 1.527377521613833, |
|
"grad_norm": 30.194684982299805, |
|
"learning_rate": 8.036331546860777e-09, |
|
"logits/chosen": -1.4862804412841797, |
|
"logits/rejected": -1.4699585437774658, |
|
"logps/chosen": -58.9954948425293, |
|
"logps/rejected": -61.62345504760742, |
|
"loss": 0.6674, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": 0.05533316731452942, |
|
"rewards/margins": 0.06020069867372513, |
|
"rewards/rejected": -0.004867529030889273, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 1.5345821325648417, |
|
"grad_norm": 29.14430809020996, |
|
"learning_rate": 7.806726438559003e-09, |
|
"logits/chosen": -1.6214964389801025, |
|
"logits/rejected": -1.6216814517974854, |
|
"logps/chosen": -57.3265495300293, |
|
"logps/rejected": -60.05931854248047, |
|
"loss": 0.6725, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.04320087283849716, |
|
"rewards/margins": 0.0479980893433094, |
|
"rewards/rejected": -0.004797212313860655, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 1.54178674351585, |
|
"grad_norm": 43.7021484375, |
|
"learning_rate": 7.579840696651938e-09, |
|
"logits/chosen": -1.6111637353897095, |
|
"logits/rejected": -1.598354697227478, |
|
"logps/chosen": -59.49543380737305, |
|
"logps/rejected": -62.404541015625, |
|
"loss": 0.6614, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.04770822077989578, |
|
"rewards/margins": 0.07293849438428879, |
|
"rewards/rejected": -0.025230273604393005, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 1.5489913544668588, |
|
"grad_norm": 31.50617218017578, |
|
"learning_rate": 7.355710206421098e-09, |
|
"logits/chosen": -1.5231688022613525, |
|
"logits/rejected": -1.517321228981018, |
|
"logps/chosen": -58.44475555419922, |
|
"logps/rejected": -63.36370849609375, |
|
"loss": 0.6602, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": 0.05046730488538742, |
|
"rewards/margins": 0.07906536757946014, |
|
"rewards/rejected": -0.028598055243492126, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 1.5561959654178674, |
|
"grad_norm": 27.282373428344727, |
|
"learning_rate": 7.134370417364849e-09, |
|
"logits/chosen": -1.6577104330062866, |
|
"logits/rejected": -1.648329496383667, |
|
"logps/chosen": -59.193382263183594, |
|
"logps/rejected": -60.406333923339844, |
|
"loss": 0.6538, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.05481324344873428, |
|
"rewards/margins": 0.08997657150030136, |
|
"rewards/rejected": -0.035163331776857376, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 1.563400576368876, |
|
"grad_norm": 35.18039321899414, |
|
"learning_rate": 6.915856337591572e-09, |
|
"logits/chosen": -1.6204932928085327, |
|
"logits/rejected": -1.6136722564697266, |
|
"logps/chosen": -50.05255889892578, |
|
"logps/rejected": -54.053077697753906, |
|
"loss": 0.6627, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": 0.07332804799079895, |
|
"rewards/margins": 0.07051368057727814, |
|
"rewards/rejected": 0.002814366715028882, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 1.5706051873198847, |
|
"grad_norm": 32.98238754272461, |
|
"learning_rate": 6.700202528282603e-09, |
|
"logits/chosen": -1.6378253698349, |
|
"logits/rejected": -1.6315876245498657, |
|
"logps/chosen": -60.23234176635742, |
|
"logps/rejected": -64.01595306396484, |
|
"loss": 0.6756, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.02953862026333809, |
|
"rewards/margins": 0.04212776944041252, |
|
"rewards/rejected": -0.012589153833687305, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 1.5778097982708934, |
|
"grad_norm": 35.59558868408203, |
|
"learning_rate": 6.487443098225892e-09, |
|
"logits/chosen": -1.699599027633667, |
|
"logits/rejected": -1.697575569152832, |
|
"logps/chosen": -55.896812438964844, |
|
"logps/rejected": -62.44630813598633, |
|
"loss": 0.6658, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": 0.05821943283081055, |
|
"rewards/margins": 0.06381964683532715, |
|
"rewards/rejected": -0.005600206553936005, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 1.585014409221902, |
|
"grad_norm": 31.080228805541992, |
|
"learning_rate": 6.277611698421179e-09, |
|
"logits/chosen": -1.6873823404312134, |
|
"logits/rejected": -1.6729217767715454, |
|
"logps/chosen": -55.1784782409668, |
|
"logps/rejected": -58.860107421875, |
|
"loss": 0.6497, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": 0.08172028511762619, |
|
"rewards/margins": 0.09651371091604233, |
|
"rewards/rejected": -0.014793431386351585, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.5922190201729105, |
|
"grad_norm": 28.780492782592773, |
|
"learning_rate": 6.070741516757608e-09, |
|
"logits/chosen": -1.6578247547149658, |
|
"logits/rejected": -1.6496769189834595, |
|
"logps/chosen": -55.101890563964844, |
|
"logps/rejected": -56.972412109375, |
|
"loss": 0.6647, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.05884435027837753, |
|
"rewards/margins": 0.06767454743385315, |
|
"rewards/rejected": -0.008830199018120766, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 1.5994236311239192, |
|
"grad_norm": 31.7725887298584, |
|
"learning_rate": 5.866865272764607e-09, |
|
"logits/chosen": -1.639620065689087, |
|
"logits/rejected": -1.632886290550232, |
|
"logps/chosen": -60.59607696533203, |
|
"logps/rejected": -65.77485656738281, |
|
"loss": 0.6552, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.06030546501278877, |
|
"rewards/margins": 0.088438019156456, |
|
"rewards/rejected": -0.028132546693086624, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 1.6066282420749278, |
|
"grad_norm": 26.901762008666992, |
|
"learning_rate": 5.666015212436795e-09, |
|
"logits/chosen": -1.6595134735107422, |
|
"logits/rejected": -1.6518280506134033, |
|
"logps/chosen": -55.67797088623047, |
|
"logps/rejected": -59.78864288330078, |
|
"loss": 0.6686, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": 0.04280759021639824, |
|
"rewards/margins": 0.05854750797152519, |
|
"rewards/rejected": -0.015739915892481804, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 1.6138328530259365, |
|
"grad_norm": 28.554101943969727, |
|
"learning_rate": 5.46822310313379e-09, |
|
"logits/chosen": -1.6626936197280884, |
|
"logits/rejected": -1.6616346836090088, |
|
"logps/chosen": -46.335777282714844, |
|
"logps/rejected": -52.65189743041992, |
|
"loss": 0.6545, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.06703133881092072, |
|
"rewards/margins": 0.09231172502040863, |
|
"rewards/rejected": -0.025280386209487915, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 1.6210374639769451, |
|
"grad_norm": 29.99852752685547, |
|
"learning_rate": 5.273520228555767e-09, |
|
"logits/chosen": -1.712920904159546, |
|
"logits/rejected": -1.695810317993164, |
|
"logps/chosen": -56.5516242980957, |
|
"logps/rejected": -60.3693962097168, |
|
"loss": 0.6731, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": 0.04221775382757187, |
|
"rewards/margins": 0.051514916121959686, |
|
"rewards/rejected": -0.009297164157032967, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 1.6282420749279538, |
|
"grad_norm": 27.436594009399414, |
|
"learning_rate": 5.081937383795484e-09, |
|
"logits/chosen": -1.606261968612671, |
|
"logits/rejected": -1.5864953994750977, |
|
"logps/chosen": -50.1980094909668, |
|
"logps/rejected": -53.731651306152344, |
|
"loss": 0.6628, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.061611782759428024, |
|
"rewards/margins": 0.06988058984279633, |
|
"rewards/rejected": -0.008268805220723152, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 1.6354466858789625, |
|
"grad_norm": 33.33866882324219, |
|
"learning_rate": 4.893504870467588e-09, |
|
"logits/chosen": -1.676597237586975, |
|
"logits/rejected": -1.6628268957138062, |
|
"logps/chosen": -52.58159637451172, |
|
"logps/rejected": -55.40885543823242, |
|
"loss": 0.6674, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": 0.04663368687033653, |
|
"rewards/margins": 0.06050438806414604, |
|
"rewards/rejected": -0.013870703987777233, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 1.6426512968299711, |
|
"grad_norm": 28.910106658935547, |
|
"learning_rate": 4.708252491915951e-09, |
|
"logits/chosen": -1.6143203973770142, |
|
"logits/rejected": -1.6018993854522705, |
|
"logps/chosen": -59.179710388183594, |
|
"logps/rejected": -62.8307991027832, |
|
"loss": 0.6755, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.043866075575351715, |
|
"rewards/margins": 0.04365686699748039, |
|
"rewards/rejected": 0.00020921006216667593, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 1.6498559077809798, |
|
"grad_norm": 36.226402282714844, |
|
"learning_rate": 4.526209548499877e-09, |
|
"logits/chosen": -1.6284698247909546, |
|
"logits/rejected": -1.6161683797836304, |
|
"logps/chosen": -56.088829040527344, |
|
"logps/rejected": -55.797828674316406, |
|
"loss": 0.6607, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": 0.04964672029018402, |
|
"rewards/margins": 0.07595410943031311, |
|
"rewards/rejected": -0.026307392865419388, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 1.6570605187319885, |
|
"grad_norm": 27.529035568237305, |
|
"learning_rate": 4.347404832959775e-09, |
|
"logits/chosen": -1.6585400104522705, |
|
"logits/rejected": -1.6393734216690063, |
|
"logps/chosen": -49.68330764770508, |
|
"logps/rejected": -51.410423278808594, |
|
"loss": 0.6552, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.062341026961803436, |
|
"rewards/margins": 0.09110082685947418, |
|
"rewards/rejected": -0.02875981293618679, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.6642651296829971, |
|
"grad_norm": 36.036582946777344, |
|
"learning_rate": 4.171866625863229e-09, |
|
"logits/chosen": -1.6054232120513916, |
|
"logits/rejected": -1.591740608215332, |
|
"logps/chosen": -62.186973571777344, |
|
"logps/rejected": -63.91868209838867, |
|
"loss": 0.6753, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": 0.06225902959704399, |
|
"rewards/margins": 0.044443391263484955, |
|
"rewards/rejected": 0.01781563274562359, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 1.6714697406340058, |
|
"grad_norm": 24.637149810791016, |
|
"learning_rate": 3.9996226911319546e-09, |
|
"logits/chosen": -1.6677039861679077, |
|
"logits/rejected": -1.6617395877838135, |
|
"logps/chosen": -50.32910919189453, |
|
"logps/rejected": -52.85070037841797, |
|
"loss": 0.6627, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": 0.048066943883895874, |
|
"rewards/margins": 0.07331161946058273, |
|
"rewards/rejected": -0.02524467371404171, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 1.6786743515850144, |
|
"grad_norm": 28.134784698486328, |
|
"learning_rate": 3.830700271650567e-09, |
|
"logits/chosen": -1.7151075601577759, |
|
"logits/rejected": -1.70746648311615, |
|
"logps/chosen": -49.42131042480469, |
|
"logps/rejected": -54.725257873535156, |
|
"loss": 0.6551, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.04775792732834816, |
|
"rewards/margins": 0.09152387082576752, |
|
"rewards/rejected": -0.04376594349741936, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 1.685878962536023, |
|
"grad_norm": 31.970741271972656, |
|
"learning_rate": 3.665126084957723e-09, |
|
"logits/chosen": -1.6030555963516235, |
|
"logits/rejected": -1.5942625999450684, |
|
"logps/chosen": -47.702667236328125, |
|
"logps/rejected": -51.36510467529297, |
|
"loss": 0.6517, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.06474478542804718, |
|
"rewards/margins": 0.09533517807722092, |
|
"rewards/rejected": -0.030590396374464035, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 1.6930835734870318, |
|
"grad_norm": 30.708681106567383, |
|
"learning_rate": 3.502926319020327e-09, |
|
"logits/chosen": -1.5764755010604858, |
|
"logits/rejected": -1.5539076328277588, |
|
"logps/chosen": -60.64631271362305, |
|
"logps/rejected": -61.80460739135742, |
|
"loss": 0.6634, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.047983959317207336, |
|
"rewards/margins": 0.07177029550075531, |
|
"rewards/rejected": -0.023786336183547974, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 1.7002881844380404, |
|
"grad_norm": 29.933921813964844, |
|
"learning_rate": 3.3441266280915427e-09, |
|
"logits/chosen": -1.6186996698379517, |
|
"logits/rejected": -1.5993722677230835, |
|
"logps/chosen": -49.01648712158203, |
|
"logps/rejected": -54.099952697753906, |
|
"loss": 0.6526, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.055793993175029755, |
|
"rewards/margins": 0.09487829357385635, |
|
"rewards/rejected": -0.0390842966735363, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 1.707492795389049, |
|
"grad_norm": 32.35788345336914, |
|
"learning_rate": 3.1887521286532023e-09, |
|
"logits/chosen": -1.7109838724136353, |
|
"logits/rejected": -1.6939365863800049, |
|
"logps/chosen": -57.83002853393555, |
|
"logps/rejected": -58.35017013549805, |
|
"loss": 0.6684, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": 0.055140089243650436, |
|
"rewards/margins": 0.055782973766326904, |
|
"rewards/rejected": -0.0006428823107853532, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 1.7146974063400577, |
|
"grad_norm": 35.61783981323242, |
|
"learning_rate": 3.0368273954432698e-09, |
|
"logits/chosen": -1.7026067972183228, |
|
"logits/rejected": -1.6995939016342163, |
|
"logps/chosen": -57.07318878173828, |
|
"logps/rejected": -60.9394416809082, |
|
"loss": 0.6619, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": 0.05400969833135605, |
|
"rewards/margins": 0.07402704656124115, |
|
"rewards/rejected": -0.0200173519551754, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 1.7219020172910664, |
|
"grad_norm": 25.527734756469727, |
|
"learning_rate": 2.888376457568964e-09, |
|
"logits/chosen": -1.7665998935699463, |
|
"logits/rejected": -1.752995491027832, |
|
"logps/chosen": -52.87370681762695, |
|
"logps/rejected": -56.10619354248047, |
|
"loss": 0.6711, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": 0.0432610847055912, |
|
"rewards/margins": 0.053575366735458374, |
|
"rewards/rejected": -0.01031428575515747, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 1.729106628242075, |
|
"grad_norm": 25.41728973388672, |
|
"learning_rate": 2.7434227947062324e-09, |
|
"logits/chosen": -1.6827430725097656, |
|
"logits/rejected": -1.6787803173065186, |
|
"logps/chosen": -53.131996154785156, |
|
"logps/rejected": -58.491615295410156, |
|
"loss": 0.6501, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.06697873771190643, |
|
"rewards/margins": 0.0972275361418724, |
|
"rewards/rejected": -0.030248800292611122, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.7363112391930837, |
|
"grad_norm": 31.781221389770508, |
|
"learning_rate": 2.6019893333860954e-09, |
|
"logits/chosen": -1.6549618244171143, |
|
"logits/rejected": -1.6534698009490967, |
|
"logps/chosen": -55.04658126831055, |
|
"logps/rejected": -59.05900192260742, |
|
"loss": 0.6644, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.043295614421367645, |
|
"rewards/margins": 0.06643761694431305, |
|
"rewards/rejected": -0.02314199134707451, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 1.7435158501440924, |
|
"grad_norm": 25.171480178833008, |
|
"learning_rate": 2.4640984433684758e-09, |
|
"logits/chosen": -1.639723539352417, |
|
"logits/rejected": -1.6242148876190186, |
|
"logps/chosen": -50.12556457519531, |
|
"logps/rejected": -52.221473693847656, |
|
"loss": 0.6612, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": 0.041132062673568726, |
|
"rewards/margins": 0.07752031087875366, |
|
"rewards/rejected": -0.03638824075460434, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 1.7507204610951008, |
|
"grad_norm": 29.74759292602539, |
|
"learning_rate": 2.3297719341040856e-09, |
|
"logits/chosen": -1.6303246021270752, |
|
"logits/rejected": -1.6217195987701416, |
|
"logps/chosen": -52.71974563598633, |
|
"logps/rejected": -58.83891677856445, |
|
"loss": 0.6638, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.039412498474121094, |
|
"rewards/margins": 0.07280907779932022, |
|
"rewards/rejected": -0.033396583050489426, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 1.7579250720461095, |
|
"grad_norm": 28.20793342590332, |
|
"learning_rate": 2.199031051284972e-09, |
|
"logits/chosen": -1.699127435684204, |
|
"logits/rejected": -1.6883512735366821, |
|
"logps/chosen": -51.349586486816406, |
|
"logps/rejected": -54.69416427612305, |
|
"loss": 0.6645, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": 0.052664875984191895, |
|
"rewards/margins": 0.0667290985584259, |
|
"rewards/rejected": -0.014064219780266285, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 1.7651296829971181, |
|
"grad_norm": 23.49250602722168, |
|
"learning_rate": 2.0718964734841667e-09, |
|
"logits/chosen": -1.6610476970672607, |
|
"logits/rejected": -1.6484695672988892, |
|
"logps/chosen": -56.7994270324707, |
|
"logps/rejected": -58.2902717590332, |
|
"loss": 0.6643, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": 0.050318509340286255, |
|
"rewards/margins": 0.06775031238794327, |
|
"rewards/rejected": -0.01743180863559246, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 1.7723342939481268, |
|
"grad_norm": 23.373260498046875, |
|
"learning_rate": 1.948388308885102e-09, |
|
"logits/chosen": -1.7280973196029663, |
|
"logits/rejected": -1.7087351083755493, |
|
"logps/chosen": -50.309112548828125, |
|
"logps/rejected": -52.12499237060547, |
|
"loss": 0.6544, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": 0.05861986428499222, |
|
"rewards/margins": 0.09302407503128052, |
|
"rewards/rejected": -0.034404207020998, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 1.7795389048991355, |
|
"grad_norm": 37.758689880371094, |
|
"learning_rate": 1.8285260921011846e-09, |
|
"logits/chosen": -1.7070372104644775, |
|
"logits/rejected": -1.6979036331176758, |
|
"logps/chosen": -60.36076736450195, |
|
"logps/rejected": -62.269325256347656, |
|
"loss": 0.6707, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.048447128385305405, |
|
"rewards/margins": 0.05468007177114487, |
|
"rewards/rejected": -0.006232939660549164, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 1.7867435158501441, |
|
"grad_norm": 23.636079788208008, |
|
"learning_rate": 1.712328781086131e-09, |
|
"logits/chosen": -1.6355743408203125, |
|
"logits/rejected": -1.6160838603973389, |
|
"logps/chosen": -55.45972442626953, |
|
"logps/rejected": -57.82331466674805, |
|
"loss": 0.6592, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.05385429412126541, |
|
"rewards/margins": 0.07988177239894867, |
|
"rewards/rejected": -0.026027489453554153, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 1.7939481268011528, |
|
"grad_norm": 24.043498992919922, |
|
"learning_rate": 1.59981475413547e-09, |
|
"logits/chosen": -1.6116002798080444, |
|
"logits/rejected": -1.594987154006958, |
|
"logps/chosen": -49.88407516479492, |
|
"logps/rejected": -53.558563232421875, |
|
"loss": 0.6472, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.07869906723499298, |
|
"rewards/margins": 0.10210321098566055, |
|
"rewards/rejected": -0.023404140025377274, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 1.8011527377521612, |
|
"grad_norm": 27.632863998413086, |
|
"learning_rate": 1.491001806979772e-09, |
|
"logits/chosen": -1.5927178859710693, |
|
"logits/rejected": -1.5906251668930054, |
|
"logps/chosen": -51.23817825317383, |
|
"logps/rejected": -57.8389778137207, |
|
"loss": 0.6476, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.05806579068303108, |
|
"rewards/margins": 0.10276976972818375, |
|
"rewards/rejected": -0.04470398277044296, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.8083573487031699, |
|
"grad_norm": 30.61586570739746, |
|
"learning_rate": 1.3859071499699698e-09, |
|
"logits/chosen": -1.6454540491104126, |
|
"logits/rejected": -1.6320326328277588, |
|
"logps/chosen": -56.015342712402344, |
|
"logps/rejected": -58.20482635498047, |
|
"loss": 0.6633, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": 0.04570765793323517, |
|
"rewards/margins": 0.07200601696968079, |
|
"rewards/rejected": -0.026298364624381065, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 1.8155619596541785, |
|
"grad_norm": 22.36056137084961, |
|
"learning_rate": 1.2845474053553156e-09, |
|
"logits/chosen": -1.6226694583892822, |
|
"logits/rejected": -1.6134302616119385, |
|
"logps/chosen": -55.82828903198242, |
|
"logps/rejected": -58.674468994140625, |
|
"loss": 0.6533, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": 0.04953853040933609, |
|
"rewards/margins": 0.09255535155534744, |
|
"rewards/rejected": -0.043016817420721054, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 1.8227665706051872, |
|
"grad_norm": 26.279998779296875, |
|
"learning_rate": 1.1869386046543222e-09, |
|
"logits/chosen": -1.6014668941497803, |
|
"logits/rejected": -1.59114670753479, |
|
"logps/chosen": -50.55707931518555, |
|
"logps/rejected": -54.89263153076172, |
|
"loss": 0.6394, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": 0.080271877348423, |
|
"rewards/margins": 0.12159235775470734, |
|
"rewards/rejected": -0.04132048413157463, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 1.8299711815561959, |
|
"grad_norm": 29.773160934448242, |
|
"learning_rate": 1.0930961861191302e-09, |
|
"logits/chosen": -1.6185270547866821, |
|
"logits/rejected": -1.6171890497207642, |
|
"logps/chosen": -45.08892059326172, |
|
"logps/rejected": -50.504432678222656, |
|
"loss": 0.6624, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": 0.0479862280189991, |
|
"rewards/margins": 0.07539573311805725, |
|
"rewards/rejected": -0.02740950882434845, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 1.8371757925072045, |
|
"grad_norm": 25.530391693115234, |
|
"learning_rate": 1.003034992293733e-09, |
|
"logits/chosen": -1.6437381505966187, |
|
"logits/rejected": -1.6238796710968018, |
|
"logps/chosen": -46.61026382446289, |
|
"logps/rejected": -51.9169807434082, |
|
"loss": 0.6639, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.06347519159317017, |
|
"rewards/margins": 0.06826033443212509, |
|
"rewards/rejected": -0.004785154014825821, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 1.8443804034582132, |
|
"grad_norm": 24.680849075317383, |
|
"learning_rate": 9.16769267666434e-10, |
|
"logits/chosen": -1.6180617809295654, |
|
"logits/rejected": -1.5980396270751953, |
|
"logps/chosen": -50.46776580810547, |
|
"logps/rejected": -51.920799255371094, |
|
"loss": 0.6602, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": 0.0773412436246872, |
|
"rewards/margins": 0.07882087677717209, |
|
"rewards/rejected": -0.0014796493342146277, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 1.8515850144092219, |
|
"grad_norm": 37.89069747924805, |
|
"learning_rate": 8.343126564168412e-10, |
|
"logits/chosen": -1.6480731964111328, |
|
"logits/rejected": -1.6331026554107666, |
|
"logps/chosen": -56.44977951049805, |
|
"logps/rejected": -60.410072326660156, |
|
"loss": 0.6589, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.05443434789776802, |
|
"rewards/margins": 0.07955367118120193, |
|
"rewards/rejected": -0.025119328871369362, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 1.8587896253602305, |
|
"grad_norm": 27.42110252380371, |
|
"learning_rate": 7.55678200257856e-10, |
|
"logits/chosen": -1.624707818031311, |
|
"logits/rejected": -1.605147361755371, |
|
"logps/chosen": -54.56407928466797, |
|
"logps/rejected": -56.504920959472656, |
|
"loss": 0.6623, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": 0.05488407611846924, |
|
"rewards/margins": 0.07069625705480576, |
|
"rewards/rejected": -0.015812188386917114, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 1.8659942363112392, |
|
"grad_norm": 31.333648681640625, |
|
"learning_rate": 6.808783363729364e-10, |
|
"logits/chosen": -1.5887932777404785, |
|
"logits/rejected": -1.574791431427002, |
|
"logps/chosen": -48.72455596923828, |
|
"logps/rejected": -53.04241180419922, |
|
"loss": 0.6551, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.05235203355550766, |
|
"rewards/margins": 0.09002417325973511, |
|
"rewards/rejected": -0.03767213225364685, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 1.8731988472622478, |
|
"grad_norm": 31.04511260986328, |
|
"learning_rate": 6.099248954489794e-10, |
|
"logits/chosen": -1.6404969692230225, |
|
"logits/rejected": -1.6319074630737305, |
|
"logps/chosen": -50.55232620239258, |
|
"logps/rejected": -53.26167678833008, |
|
"loss": 0.6655, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.06544728577136993, |
|
"rewards/margins": 0.06491607427597046, |
|
"rewards/rejected": 0.0005312118446454406, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.8804034582132565, |
|
"grad_norm": 27.33155632019043, |
|
"learning_rate": 5.428290998051116e-10, |
|
"logits/chosen": -1.6392921209335327, |
|
"logits/rejected": -1.6253206729888916, |
|
"logps/chosen": -49.612022399902344, |
|
"logps/rejected": -52.06800079345703, |
|
"loss": 0.6629, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": 0.0555211678147316, |
|
"rewards/margins": 0.07389000803232193, |
|
"rewards/rejected": -0.018368840217590332, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 1.8876080691642652, |
|
"grad_norm": 24.837400436401367, |
|
"learning_rate": 4.796015616177401e-10, |
|
"logits/chosen": -1.597421407699585, |
|
"logits/rejected": -1.5863425731658936, |
|
"logps/chosen": -50.992122650146484, |
|
"logps/rejected": -53.921241760253906, |
|
"loss": 0.6523, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.06237851455807686, |
|
"rewards/margins": 0.09439820796251297, |
|
"rewards/rejected": -0.03201969712972641, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 1.8948126801152738, |
|
"grad_norm": 22.60968017578125, |
|
"learning_rate": 4.2025228124205335e-10, |
|
"logits/chosen": -1.6710373163223267, |
|
"logits/rejected": -1.6665174961090088, |
|
"logps/chosen": -61.06689453125, |
|
"logps/rejected": -62.720733642578125, |
|
"loss": 0.6719, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.042091116309165955, |
|
"rewards/margins": 0.04989165440201759, |
|
"rewards/rejected": -0.007800539024174213, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 1.9020172910662825, |
|
"grad_norm": 25.86883544921875, |
|
"learning_rate": 3.64790645630339e-10, |
|
"logits/chosen": -1.6532398462295532, |
|
"logits/rejected": -1.6340618133544922, |
|
"logps/chosen": -55.06281661987305, |
|
"logps/rejected": -56.2059440612793, |
|
"loss": 0.6654, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": 0.0360020287334919, |
|
"rewards/margins": 0.06869436055421829, |
|
"rewards/rejected": -0.0326923243701458, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 1.9092219020172911, |
|
"grad_norm": 36.909793853759766, |
|
"learning_rate": 3.1322542684729945e-10, |
|
"logits/chosen": -1.6118682622909546, |
|
"logits/rejected": -1.5950068235397339, |
|
"logps/chosen": -64.02647399902344, |
|
"logps/rejected": -68.84220123291016, |
|
"loss": 0.6553, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.046797286719083786, |
|
"rewards/margins": 0.08603934943675995, |
|
"rewards/rejected": -0.03924206644296646, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 1.9164265129682998, |
|
"grad_norm": 22.410232543945312, |
|
"learning_rate": 2.6556478068261447e-10, |
|
"logits/chosen": -1.6788618564605713, |
|
"logits/rejected": -1.6813141107559204, |
|
"logps/chosen": -54.3393669128418, |
|
"logps/rejected": -57.85358810424805, |
|
"loss": 0.6593, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": 0.0549929141998291, |
|
"rewards/margins": 0.07665206491947174, |
|
"rewards/rejected": -0.02165914885699749, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 1.9236311239193085, |
|
"grad_norm": 28.00406265258789, |
|
"learning_rate": 2.2181624536098952e-10, |
|
"logits/chosen": -1.6921355724334717, |
|
"logits/rejected": -1.6832196712493896, |
|
"logps/chosen": -51.8244514465332, |
|
"logps/rejected": -56.756736755371094, |
|
"loss": 0.6567, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.05170941352844238, |
|
"rewards/margins": 0.08497369289398193, |
|
"rewards/rejected": -0.03326428681612015, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 1.9308357348703171, |
|
"grad_norm": 24.003829956054688, |
|
"learning_rate": 1.819867403498737e-10, |
|
"logits/chosen": -1.6733729839324951, |
|
"logits/rejected": -1.6761459112167358, |
|
"logps/chosen": -55.07045364379883, |
|
"logps/rejected": -60.26169967651367, |
|
"loss": 0.6731, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.05987237021327019, |
|
"rewards/margins": 0.04959547519683838, |
|
"rewards/rejected": 0.01027689315378666, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 1.9380403458213258, |
|
"grad_norm": 30.30132293701172, |
|
"learning_rate": 1.4608256526505157e-10, |
|
"logits/chosen": -1.5777008533477783, |
|
"logits/rejected": -1.569038987159729, |
|
"logps/chosen": -60.8995475769043, |
|
"logps/rejected": -62.53614044189453, |
|
"loss": 0.671, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": 0.04738503322005272, |
|
"rewards/margins": 0.05407433584332466, |
|
"rewards/rejected": -0.006689299829304218, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 1.9452449567723344, |
|
"grad_norm": 34.3319206237793, |
|
"learning_rate": 1.1410939887425141e-10, |
|
"logits/chosen": -1.708012342453003, |
|
"logits/rejected": -1.6876204013824463, |
|
"logps/chosen": -53.042701721191406, |
|
"logps/rejected": -55.08272171020508, |
|
"loss": 0.6628, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": 0.038316644728183746, |
|
"rewards/margins": 0.07014557719230652, |
|
"rewards/rejected": -0.031828928738832474, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.952449567723343, |
|
"grad_norm": 27.495458602905273, |
|
"learning_rate": 8.607229819898865e-11, |
|
"logits/chosen": -1.6768684387207031, |
|
"logits/rejected": -1.6618626117706299, |
|
"logps/chosen": -54.03479766845703, |
|
"logps/rejected": -56.34186553955078, |
|
"loss": 0.6652, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": 0.04240185394883156, |
|
"rewards/margins": 0.06518776714801788, |
|
"rewards/rejected": -0.022785909473896027, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 1.9596541786743515, |
|
"grad_norm": 31.330108642578125, |
|
"learning_rate": 6.19756977147029e-11, |
|
"logits/chosen": -1.660762071609497, |
|
"logits/rejected": -1.6496670246124268, |
|
"logps/chosen": -48.30510711669922, |
|
"logps/rejected": -53.958099365234375, |
|
"loss": 0.66, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.058389853686094284, |
|
"rewards/margins": 0.08087347447872162, |
|
"rewards/rejected": -0.02248362824320793, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 1.9668587896253602, |
|
"grad_norm": 30.01300811767578, |
|
"learning_rate": 4.1823408649391265e-11, |
|
"logits/chosen": -1.5987229347229004, |
|
"logits/rejected": -1.592441439628601, |
|
"logps/chosen": -52.67683792114258, |
|
"logps/rejected": -55.06285858154297, |
|
"loss": 0.6684, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.045865319669246674, |
|
"rewards/margins": 0.05944997817277908, |
|
"rewards/rejected": -0.013584655709564686, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 1.9740634005763689, |
|
"grad_norm": 27.41546058654785, |
|
"learning_rate": 2.5618618380812694e-11, |
|
"logits/chosen": -1.725508451461792, |
|
"logits/rejected": -1.7198139429092407, |
|
"logps/chosen": -48.2393913269043, |
|
"logps/rejected": -53.41130447387695, |
|
"loss": 0.6594, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.0503121018409729, |
|
"rewards/margins": 0.07980336248874664, |
|
"rewards/rejected": -0.029491260647773743, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 1.9812680115273775, |
|
"grad_norm": 24.554004669189453, |
|
"learning_rate": 1.3363889932338501e-11, |
|
"logits/chosen": -1.6208444833755493, |
|
"logits/rejected": -1.6235567331314087, |
|
"logps/chosen": -53.99614334106445, |
|
"logps/rejected": -59.610862731933594, |
|
"loss": 0.666, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.05232435464859009, |
|
"rewards/margins": 0.061102528125047684, |
|
"rewards/rejected": -0.008778175339102745, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 1.9884726224783862, |
|
"grad_norm": 25.978759765625, |
|
"learning_rate": 5.061161567596061e-12, |
|
"logits/chosen": -1.7159192562103271, |
|
"logits/rejected": -1.7112420797348022, |
|
"logps/chosen": -54.39562225341797, |
|
"logps/rejected": -58.398590087890625, |
|
"loss": 0.6618, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.05306984856724739, |
|
"rewards/margins": 0.07363148033618927, |
|
"rewards/rejected": -0.02056163363158703, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 1.9956772334293948, |
|
"grad_norm": 23.055919647216797, |
|
"learning_rate": 7.11746483889053e-13, |
|
"logits/chosen": -1.6436536312103271, |
|
"logits/rejected": -1.631366491317749, |
|
"logps/chosen": -55.227935791015625, |
|
"logps/rejected": -57.9941520690918, |
|
"loss": 0.663, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": 0.051635853946208954, |
|
"rewards/margins": 0.07143048942089081, |
|
"rewards/rejected": -0.019794631749391556, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"step": 2776, |
|
"total_flos": 0.0, |
|
"train_loss": 0.67492206965812, |
|
"train_runtime": 4341.9182, |
|
"train_samples_per_second": 10.227, |
|
"train_steps_per_second": 0.639 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 2776, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|