|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 1724, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 0.43359375, |
|
"learning_rate": 2.890173410404624e-09, |
|
"logits/chosen": 0.1325806975364685, |
|
"logits/rejected": 0.3077998757362366, |
|
"logps/chosen": -239.35935974121094, |
|
"logps/rejected": -304.581298828125, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/margins_max": 0.0, |
|
"rewards/margins_min": 0.0, |
|
"rewards/margins_std": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.416015625, |
|
"learning_rate": 2.890173410404624e-08, |
|
"logits/chosen": -0.010774746537208557, |
|
"logits/rejected": 0.23452165722846985, |
|
"logps/chosen": -243.3074493408203, |
|
"logps/rejected": -304.1199035644531, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.00028879166347905993, |
|
"rewards/margins": 0.0006378353573381901, |
|
"rewards/margins_max": 0.0028404404874891043, |
|
"rewards/margins_min": -0.0015647696563974023, |
|
"rewards/margins_std": 0.0031149541027843952, |
|
"rewards/rejected": -0.00034904375206679106, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.400390625, |
|
"learning_rate": 5.780346820809248e-08, |
|
"logits/chosen": -0.05719061568379402, |
|
"logits/rejected": 0.5148837566375732, |
|
"logps/chosen": -272.7169494628906, |
|
"logps/rejected": -216.58859252929688, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": -0.0008704366046003997, |
|
"rewards/margins": 0.0001740378502290696, |
|
"rewards/margins_max": 0.0022189407609403133, |
|
"rewards/margins_min": -0.0018708650022745132, |
|
"rewards/margins_std": 0.002891929354518652, |
|
"rewards/rejected": -0.0010444745421409607, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.4921875, |
|
"learning_rate": 8.670520231213872e-08, |
|
"logits/chosen": 0.05507341027259827, |
|
"logits/rejected": 0.5646872520446777, |
|
"logps/chosen": -272.96728515625, |
|
"logps/rejected": -252.10733032226562, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": -0.0014279346214607358, |
|
"rewards/margins": -0.001033178297802806, |
|
"rewards/margins_max": 0.002007028553634882, |
|
"rewards/margins_min": -0.004073385149240494, |
|
"rewards/margins_std": 0.00429950188845396, |
|
"rewards/rejected": -0.00039475635276176035, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.447265625, |
|
"learning_rate": 1.1560693641618496e-07, |
|
"logits/chosen": -0.08530770242214203, |
|
"logits/rejected": 0.37523841857910156, |
|
"logps/chosen": -256.03692626953125, |
|
"logps/rejected": -224.8648223876953, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.0013576907804235816, |
|
"rewards/margins": -0.0014004515251144767, |
|
"rewards/margins_max": 0.0015217246254906058, |
|
"rewards/margins_min": -0.004322628024965525, |
|
"rewards/margins_std": 0.0041325814090669155, |
|
"rewards/rejected": 4.276079198461957e-05, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.45703125, |
|
"learning_rate": 1.445086705202312e-07, |
|
"logits/chosen": 0.10976707935333252, |
|
"logits/rejected": 0.40187758207321167, |
|
"logps/chosen": -205.61318969726562, |
|
"logps/rejected": -214.9802703857422, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.0007841205224394798, |
|
"rewards/margins": 0.0018329259473830462, |
|
"rewards/margins_max": 0.004336017183959484, |
|
"rewards/margins_min": -0.0006701658712700009, |
|
"rewards/margins_std": 0.0035399063490331173, |
|
"rewards/rejected": -0.0010488051921129227, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.39453125, |
|
"learning_rate": 1.7341040462427744e-07, |
|
"logits/chosen": 0.2901094853878021, |
|
"logits/rejected": 0.4794164299964905, |
|
"logps/chosen": -207.44509887695312, |
|
"logps/rejected": -231.39382934570312, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.42500001192092896, |
|
"rewards/chosen": -0.001270442851819098, |
|
"rewards/margins": -0.0007280521094799042, |
|
"rewards/margins_max": 0.0019893264397978783, |
|
"rewards/margins_min": -0.0034454308915883303, |
|
"rewards/margins_std": 0.0038429535925388336, |
|
"rewards/rejected": -0.0005423908005468547, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.435546875, |
|
"learning_rate": 2.023121387283237e-07, |
|
"logits/chosen": 0.035371266305446625, |
|
"logits/rejected": 0.4755796492099762, |
|
"logps/chosen": -259.833740234375, |
|
"logps/rejected": -226.2167205810547, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.0010710505302995443, |
|
"rewards/margins": 0.0011786860413849354, |
|
"rewards/margins_max": 0.004792899824678898, |
|
"rewards/margins_min": -0.002435527741909027, |
|
"rewards/margins_std": 0.005111270118504763, |
|
"rewards/rejected": -0.0022497368045151234, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.4609375, |
|
"learning_rate": 2.3121387283236991e-07, |
|
"logits/chosen": 0.27303510904312134, |
|
"logits/rejected": 0.7382463216781616, |
|
"logps/chosen": -217.78671264648438, |
|
"logps/rejected": -208.35910034179688, |
|
"loss": 0.6928, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -2.2639263988821767e-05, |
|
"rewards/margins": 0.0014770211419090629, |
|
"rewards/margins_max": 0.0042491876520216465, |
|
"rewards/margins_min": -0.0012951450189575553, |
|
"rewards/margins_std": 0.003920434974133968, |
|
"rewards/rejected": -0.0014996604295447469, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.6640625, |
|
"learning_rate": 2.601156069364162e-07, |
|
"logits/chosen": -0.20650863647460938, |
|
"logits/rejected": 0.17405006289482117, |
|
"logps/chosen": -226.12808227539062, |
|
"logps/rejected": -233.56381225585938, |
|
"loss": 0.692, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.000633719377219677, |
|
"rewards/margins": 0.0017947215819731355, |
|
"rewards/margins_max": 0.004501459188759327, |
|
"rewards/margins_min": -0.0009120159666053951, |
|
"rewards/margins_std": 0.0038279048167169094, |
|
"rewards/rejected": -0.0011610020883381367, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.431640625, |
|
"learning_rate": 2.890173410404624e-07, |
|
"logits/chosen": -0.019260473549365997, |
|
"logits/rejected": 0.5504380464553833, |
|
"logps/chosen": -292.51995849609375, |
|
"logps/rejected": -235.86843872070312, |
|
"loss": 0.6919, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.001650218851864338, |
|
"rewards/margins": 0.002649242291226983, |
|
"rewards/margins_max": 0.005218566861003637, |
|
"rewards/margins_min": 7.99179106252268e-05, |
|
"rewards/margins_std": 0.0036335731856524944, |
|
"rewards/rejected": -0.0009990233229473233, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.53125, |
|
"learning_rate": 3.1791907514450865e-07, |
|
"logits/chosen": -0.06840448081493378, |
|
"logits/rejected": 0.6899427175521851, |
|
"logps/chosen": -252.0308380126953, |
|
"logps/rejected": -199.84799194335938, |
|
"loss": 0.6918, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.0018273231107741594, |
|
"rewards/margins": 0.00415054801851511, |
|
"rewards/margins_max": 0.0076604606583714485, |
|
"rewards/margins_min": 0.0006406344473361969, |
|
"rewards/margins_std": 0.004963767249137163, |
|
"rewards/rejected": -0.0023232249077409506, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.36328125, |
|
"learning_rate": 3.468208092485549e-07, |
|
"logits/chosen": 0.09203040599822998, |
|
"logits/rejected": 0.5125548243522644, |
|
"logps/chosen": -256.213623046875, |
|
"logps/rejected": -232.49942016601562, |
|
"loss": 0.6915, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.0007183876005001366, |
|
"rewards/margins": 0.004233072511851788, |
|
"rewards/margins_max": 0.007029411382973194, |
|
"rewards/margins_min": 0.0014367332914844155, |
|
"rewards/margins_std": 0.003954620566219091, |
|
"rewards/rejected": -0.0035146852023899555, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.462890625, |
|
"learning_rate": 3.757225433526011e-07, |
|
"logits/chosen": -0.027632858604192734, |
|
"logits/rejected": 0.39557844400405884, |
|
"logps/chosen": -266.2771911621094, |
|
"logps/rejected": -271.76116943359375, |
|
"loss": 0.6907, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.002352924318984151, |
|
"rewards/margins": 0.005208231043070555, |
|
"rewards/margins_max": 0.008825947530567646, |
|
"rewards/margins_min": 0.001590514904819429, |
|
"rewards/margins_std": 0.005116222891956568, |
|
"rewards/rejected": -0.00285530649125576, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.40625, |
|
"learning_rate": 4.046242774566474e-07, |
|
"logits/chosen": 0.06764040887355804, |
|
"logits/rejected": 0.3966519236564636, |
|
"logps/chosen": -178.83749389648438, |
|
"logps/rejected": -188.39877319335938, |
|
"loss": 0.6908, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.0029165446758270264, |
|
"rewards/margins": 0.006306161172688007, |
|
"rewards/margins_max": 0.009462257847189903, |
|
"rewards/margins_min": 0.0031500644981861115, |
|
"rewards/margins_std": 0.004463394172489643, |
|
"rewards/rejected": -0.0033896160311996937, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.447265625, |
|
"learning_rate": 4.3352601156069365e-07, |
|
"logits/chosen": 0.011811649426817894, |
|
"logits/rejected": 0.4984157979488373, |
|
"logps/chosen": -268.1231994628906, |
|
"logps/rejected": -223.78799438476562, |
|
"loss": 0.6899, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.002369340742006898, |
|
"rewards/margins": 0.006674068979918957, |
|
"rewards/margins_max": 0.013764929957687855, |
|
"rewards/margins_min": -0.0004167918232269585, |
|
"rewards/margins_std": 0.010027991607785225, |
|
"rewards/rejected": -0.0043047284707427025, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.322265625, |
|
"learning_rate": 4.6242774566473983e-07, |
|
"logits/chosen": -0.03828499838709831, |
|
"logits/rejected": 0.3794795870780945, |
|
"logps/chosen": -245.52865600585938, |
|
"logps/rejected": -234.1727752685547, |
|
"loss": 0.689, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.004552280530333519, |
|
"rewards/margins": 0.008487861603498459, |
|
"rewards/margins_max": 0.012918056920170784, |
|
"rewards/margins_min": 0.004057666752487421, |
|
"rewards/margins_std": 0.006265241652727127, |
|
"rewards/rejected": -0.003935581538826227, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.49609375, |
|
"learning_rate": 4.913294797687861e-07, |
|
"logits/chosen": -0.0168992280960083, |
|
"logits/rejected": 0.500325620174408, |
|
"logps/chosen": -296.49517822265625, |
|
"logps/rejected": -248.3328094482422, |
|
"loss": 0.6887, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.003083079354837537, |
|
"rewards/margins": 0.006065175868570805, |
|
"rewards/margins_max": 0.011483820155262947, |
|
"rewards/margins_min": 0.0006465300684794784, |
|
"rewards/margins_std": 0.0076631223782896996, |
|
"rewards/rejected": -0.002982096979394555, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.40625, |
|
"learning_rate": 4.999748710138438e-07, |
|
"logits/chosen": 0.14815935492515564, |
|
"logits/rejected": 0.5510139465332031, |
|
"logps/chosen": -233.9811553955078, |
|
"logps/rejected": -228.5449676513672, |
|
"loss": 0.688, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.003167560789734125, |
|
"rewards/margins": 0.007796141318976879, |
|
"rewards/margins_max": 0.012642833404242992, |
|
"rewards/margins_min": 0.002949449699372053, |
|
"rewards/margins_std": 0.006854257546365261, |
|
"rewards/rejected": -0.004628580994904041, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.416015625, |
|
"learning_rate": 4.998518024263461e-07, |
|
"logits/chosen": 0.19040322303771973, |
|
"logits/rejected": 0.6236617565155029, |
|
"logps/chosen": -230.96762084960938, |
|
"logps/rejected": -211.4745330810547, |
|
"loss": 0.6871, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.006373309530317783, |
|
"rewards/margins": 0.012960617430508137, |
|
"rewards/margins_max": 0.01996336504817009, |
|
"rewards/margins_min": 0.0059578740037977695, |
|
"rewards/margins_std": 0.0099033759906888, |
|
"rewards/rejected": -0.006587309297174215, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.416015625, |
|
"learning_rate": 4.996262291366814e-07, |
|
"logits/chosen": 0.054732900112867355, |
|
"logits/rejected": 0.22424785792827606, |
|
"logps/chosen": -210.0012664794922, |
|
"logps/rejected": -233.76388549804688, |
|
"loss": 0.6873, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.004412280861288309, |
|
"rewards/margins": 0.011961949989199638, |
|
"rewards/margins_max": 0.017657486721873283, |
|
"rewards/margins_min": 0.006266415119171143, |
|
"rewards/margins_std": 0.0080547034740448, |
|
"rewards/rejected": -0.007549669593572617, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.498046875, |
|
"learning_rate": 4.992982436890003e-07, |
|
"logits/chosen": 0.09016792476177216, |
|
"logits/rejected": 0.45956069231033325, |
|
"logps/chosen": -226.3985595703125, |
|
"logps/rejected": -221.092529296875, |
|
"loss": 0.6868, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.005489318631589413, |
|
"rewards/margins": 0.013238553889095783, |
|
"rewards/margins_max": 0.018587926402688026, |
|
"rewards/margins_min": 0.00788918323814869, |
|
"rewards/margins_std": 0.007565152831375599, |
|
"rewards/rejected": -0.007749234326183796, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.458984375, |
|
"learning_rate": 4.988679806432711e-07, |
|
"logits/chosen": -0.08951343595981598, |
|
"logits/rejected": 0.46994414925575256, |
|
"logps/chosen": -264.4379577636719, |
|
"logps/rejected": -236.77346801757812, |
|
"loss": 0.6853, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.007678179536014795, |
|
"rewards/margins": 0.01784335821866989, |
|
"rewards/margins_max": 0.025632936507463455, |
|
"rewards/margins_min": 0.010053779929876328, |
|
"rewards/margins_std": 0.011016124859452248, |
|
"rewards/rejected": -0.010165175423026085, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.474609375, |
|
"learning_rate": 4.983356165200751e-07, |
|
"logits/chosen": 0.07358375936746597, |
|
"logits/rejected": 0.617803692817688, |
|
"logps/chosen": -276.56536865234375, |
|
"logps/rejected": -237.3117218017578, |
|
"loss": 0.6848, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.0074386284686625, |
|
"rewards/margins": 0.01824963092803955, |
|
"rewards/margins_max": 0.026552444323897362, |
|
"rewards/margins_min": 0.00994681753218174, |
|
"rewards/margins_std": 0.01174195110797882, |
|
"rewards/rejected": -0.010811002925038338, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.4296875, |
|
"learning_rate": 4.977013697281864e-07, |
|
"logits/chosen": 0.23069170117378235, |
|
"logits/rejected": 0.546830952167511, |
|
"logps/chosen": -229.92764282226562, |
|
"logps/rejected": -231.63357543945312, |
|
"loss": 0.6848, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.005361995659768581, |
|
"rewards/margins": 0.015256190672516823, |
|
"rewards/margins_max": 0.022752556949853897, |
|
"rewards/margins_min": 0.007759819272905588, |
|
"rewards/margins_std": 0.010601467452943325, |
|
"rewards/rejected": -0.009894194081425667, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.412109375, |
|
"learning_rate": 4.969655004749673e-07, |
|
"logits/chosen": 0.05646086856722832, |
|
"logits/rejected": 0.3687281012535095, |
|
"logps/chosen": -203.8467559814453, |
|
"logps/rejected": -216.0234375, |
|
"loss": 0.6846, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.002810864243656397, |
|
"rewards/margins": 0.014029537327587605, |
|
"rewards/margins_max": 0.019475888460874557, |
|
"rewards/margins_min": 0.008583188988268375, |
|
"rewards/margins_std": 0.007702300790697336, |
|
"rewards/rejected": -0.011218673549592495, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.490234375, |
|
"learning_rate": 4.961283106596155e-07, |
|
"logits/chosen": 0.1512751430273056, |
|
"logits/rejected": 0.5323320627212524, |
|
"logps/chosen": -256.96673583984375, |
|
"logps/rejected": -265.65509033203125, |
|
"loss": 0.6829, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.011281570419669151, |
|
"rewards/margins": 0.0202823244035244, |
|
"rewards/margins_max": 0.02979358099400997, |
|
"rewards/margins_min": 0.010771063156425953, |
|
"rewards/margins_std": 0.013450953178107738, |
|
"rewards/rejected": -0.009000752121210098, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.447265625, |
|
"learning_rate": 4.951901437493054e-07, |
|
"logits/chosen": 0.08749596029520035, |
|
"logits/rejected": 0.47565847635269165, |
|
"logps/chosen": -252.97323608398438, |
|
"logps/rejected": -220.1329803466797, |
|
"loss": 0.6826, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.005718126427382231, |
|
"rewards/margins": 0.019988398998975754, |
|
"rewards/margins_max": 0.025959456339478493, |
|
"rewards/margins_min": 0.014017338864505291, |
|
"rewards/margins_std": 0.008444352075457573, |
|
"rewards/rejected": -0.014270270243287086, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.453125, |
|
"learning_rate": 4.941513846382779e-07, |
|
"logits/chosen": 0.31170374155044556, |
|
"logits/rejected": 0.6478020548820496, |
|
"logps/chosen": -207.89794921875, |
|
"logps/rejected": -225.51791381835938, |
|
"loss": 0.6828, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.010051739402115345, |
|
"rewards/margins": 0.019436318427324295, |
|
"rewards/margins_max": 0.025176430121064186, |
|
"rewards/margins_min": 0.013696206733584404, |
|
"rewards/margins_std": 0.008117742836475372, |
|
"rewards/rejected": -0.009384581819176674, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.431640625, |
|
"learning_rate": 4.930124594899313e-07, |
|
"logits/chosen": 0.14136287569999695, |
|
"logits/rejected": 0.5530031323432922, |
|
"logps/chosen": -244.9897918701172, |
|
"logps/rejected": -244.90457153320312, |
|
"loss": 0.6814, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.0166664756834507, |
|
"rewards/margins": 0.02829556167125702, |
|
"rewards/margins_max": 0.037106942385435104, |
|
"rewards/margins_min": 0.019484177231788635, |
|
"rewards/margins_std": 0.012461178004741669, |
|
"rewards/rejected": -0.011629085056483746, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.494140625, |
|
"learning_rate": 4.917738355619842e-07, |
|
"logits/chosen": 0.2040259838104248, |
|
"logits/rejected": 0.6138412356376648, |
|
"logps/chosen": -193.21507263183594, |
|
"logps/rejected": -194.8699188232422, |
|
"loss": 0.6796, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.012191513553261757, |
|
"rewards/margins": 0.026244569569826126, |
|
"rewards/margins_max": 0.036748819053173065, |
|
"rewards/margins_min": 0.015740320086479187, |
|
"rewards/margins_std": 0.014855247922241688, |
|
"rewards/rejected": -0.01405305415391922, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.453125, |
|
"learning_rate": 4.904360210147762e-07, |
|
"logits/chosen": 0.1507195234298706, |
|
"logits/rejected": 0.5720406174659729, |
|
"logps/chosen": -242.0141143798828, |
|
"logps/rejected": -216.76132202148438, |
|
"loss": 0.6791, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.010296806693077087, |
|
"rewards/margins": 0.02473880909383297, |
|
"rewards/margins_max": 0.036660365760326385, |
|
"rewards/margins_min": 0.012817250564694405, |
|
"rewards/margins_std": 0.0168596301227808, |
|
"rewards/rejected": -0.014442001469433308, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.41796875, |
|
"learning_rate": 4.8899956470279e-07, |
|
"logits/chosen": -0.03488525375723839, |
|
"logits/rejected": 0.40159520506858826, |
|
"logps/chosen": -218.23812866210938, |
|
"logps/rejected": -190.8876953125, |
|
"loss": 0.679, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.014135973528027534, |
|
"rewards/margins": 0.02363484725356102, |
|
"rewards/margins_max": 0.036806877702474594, |
|
"rewards/margins_min": 0.010462815873324871, |
|
"rewards/margins_std": 0.018628064543008804, |
|
"rewards/rejected": -0.00949887465685606, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.4375, |
|
"learning_rate": 4.874650559494765e-07, |
|
"logits/chosen": 0.10674601793289185, |
|
"logits/rejected": 0.5667238831520081, |
|
"logps/chosen": -242.5848388671875, |
|
"logps/rejected": -212.60922241210938, |
|
"loss": 0.6782, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.008991287089884281, |
|
"rewards/margins": 0.02689727023243904, |
|
"rewards/margins_max": 0.03854988515377045, |
|
"rewards/margins_min": 0.015244655311107635, |
|
"rewards/margins_std": 0.016479285433888435, |
|
"rewards/rejected": -0.017905984073877335, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.357421875, |
|
"learning_rate": 4.858331243054782e-07, |
|
"logits/chosen": 0.09378918260335922, |
|
"logits/rejected": 0.42793530225753784, |
|
"logps/chosen": -282.80413818359375, |
|
"logps/rejected": -245.1541748046875, |
|
"loss": 0.6796, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.004886592272669077, |
|
"rewards/margins": 0.021504424512386322, |
|
"rewards/margins_max": 0.03542860597372055, |
|
"rewards/margins_min": 0.007580241654068232, |
|
"rewards/margins_std": 0.019691769033670425, |
|
"rewards/rejected": -0.016617832705378532, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.486328125, |
|
"learning_rate": 4.841044392903481e-07, |
|
"logits/chosen": 0.1290682703256607, |
|
"logits/rejected": 0.6047347784042358, |
|
"logps/chosen": -232.40908813476562, |
|
"logps/rejected": -181.57228088378906, |
|
"loss": 0.6783, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.008800150826573372, |
|
"rewards/margins": 0.028118547052145004, |
|
"rewards/margins_max": 0.04057111591100693, |
|
"rewards/margins_min": 0.015665989369153976, |
|
"rewards/margins_std": 0.0176105834543705, |
|
"rewards/rejected": -0.01931839995086193, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.435546875, |
|
"learning_rate": 4.822797101178718e-07, |
|
"logits/chosen": -0.10504484176635742, |
|
"logits/rejected": 0.437595933675766, |
|
"logps/chosen": -256.3827209472656, |
|
"logps/rejected": -231.28836059570312, |
|
"loss": 0.6777, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.014989467337727547, |
|
"rewards/margins": 0.03444572165608406, |
|
"rewards/margins_max": 0.04873298108577728, |
|
"rewards/margins_min": 0.02015846036374569, |
|
"rewards/margins_std": 0.020205235108733177, |
|
"rewards/rejected": -0.019456254318356514, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.390625, |
|
"learning_rate": 4.803596854051038e-07, |
|
"logits/chosen": -0.0018104672199115157, |
|
"logits/rejected": 0.5270112752914429, |
|
"logps/chosen": -251.33740234375, |
|
"logps/rejected": -203.73886108398438, |
|
"loss": 0.6749, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.010898159816861153, |
|
"rewards/margins": 0.02897489070892334, |
|
"rewards/margins_max": 0.041702691465616226, |
|
"rewards/margins_min": 0.016247089952230453, |
|
"rewards/margins_std": 0.01799982599914074, |
|
"rewards/rejected": -0.018076732754707336, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.3671875, |
|
"learning_rate": 4.783451528652382e-07, |
|
"logits/chosen": 0.03281222656369209, |
|
"logits/rejected": 0.3939230740070343, |
|
"logps/chosen": -203.0167694091797, |
|
"logps/rejected": -197.302490234375, |
|
"loss": 0.6775, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.01019463874399662, |
|
"rewards/margins": 0.030594149604439735, |
|
"rewards/margins_max": 0.041967082768678665, |
|
"rewards/margins_min": 0.019221220165491104, |
|
"rewards/margins_std": 0.01608375459909439, |
|
"rewards/rejected": -0.020399510860443115, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.4140625, |
|
"learning_rate": 4.7623693898443963e-07, |
|
"logits/chosen": 0.06993720680475235, |
|
"logits/rejected": 0.44206172227859497, |
|
"logps/chosen": -185.37237548828125, |
|
"logps/rejected": -187.4385986328125, |
|
"loss": 0.6751, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.009011445567011833, |
|
"rewards/margins": 0.03231946378946304, |
|
"rewards/margins_max": 0.04668620228767395, |
|
"rewards/margins_min": 0.017952727153897285, |
|
"rewards/margins_std": 0.02031763456761837, |
|
"rewards/rejected": -0.02330802008509636, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.44140625, |
|
"learning_rate": 4.740359086827685e-07, |
|
"logits/chosen": -0.0161175187677145, |
|
"logits/rejected": 0.4163980484008789, |
|
"logps/chosen": -239.71432495117188, |
|
"logps/rejected": -241.2501678466797, |
|
"loss": 0.6737, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.018473349511623383, |
|
"rewards/margins": 0.04534245282411575, |
|
"rewards/margins_max": 0.06162145733833313, |
|
"rewards/margins_min": 0.02906343713402748, |
|
"rewards/margins_std": 0.0230219978839159, |
|
"rewards/rejected": -0.026869099587202072, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.359375, |
|
"learning_rate": 4.7174296495933593e-07, |
|
"logits/chosen": -0.04076371714472771, |
|
"logits/rejected": 0.20715077221393585, |
|
"logps/chosen": -188.3863525390625, |
|
"logps/rejected": -203.01266479492188, |
|
"loss": 0.6749, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.011351143009960651, |
|
"rewards/margins": 0.03776105120778084, |
|
"rewards/margins_max": 0.05341630056500435, |
|
"rewards/margins_min": 0.022105801850557327, |
|
"rewards/margins_std": 0.022139865905046463, |
|
"rewards/rejected": -0.026409905403852463, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.478515625, |
|
"learning_rate": 4.6935904852183805e-07, |
|
"logits/chosen": 0.29291218519210815, |
|
"logits/rejected": 0.5505505800247192, |
|
"logps/chosen": -203.9456024169922, |
|
"logps/rejected": -217.8910369873047, |
|
"loss": 0.6712, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.012085825204849243, |
|
"rewards/margins": 0.038635291159152985, |
|
"rewards/margins_max": 0.059398896992206573, |
|
"rewards/margins_min": 0.017871689051389694, |
|
"rewards/margins_std": 0.029364168643951416, |
|
"rewards/rejected": -0.02654946781694889, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.431640625, |
|
"learning_rate": 4.6688513740061965e-07, |
|
"logits/chosen": 0.12483358383178711, |
|
"logits/rejected": 0.46587473154067993, |
|
"logps/chosen": -264.0867004394531, |
|
"logps/rejected": -292.27685546875, |
|
"loss": 0.6731, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.019537176936864853, |
|
"rewards/margins": 0.040542975068092346, |
|
"rewards/margins_max": 0.05839340761303902, |
|
"rewards/margins_min": 0.022692536935210228, |
|
"rewards/margins_std": 0.02524433098733425, |
|
"rewards/rejected": -0.021005798131227493, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.4296875, |
|
"learning_rate": 4.6432224654742475e-07, |
|
"logits/chosen": -0.0027520388830453157, |
|
"logits/rejected": 0.48325324058532715, |
|
"logps/chosen": -231.2857208251953, |
|
"logps/rejected": -221.3975372314453, |
|
"loss": 0.6719, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.017787110060453415, |
|
"rewards/margins": 0.04569714143872261, |
|
"rewards/margins_max": 0.06507585942745209, |
|
"rewards/margins_min": 0.026318421587347984, |
|
"rewards/margins_std": 0.027405640110373497, |
|
"rewards/rejected": -0.027910029515624046, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.4375, |
|
"learning_rate": 4.616714274190011e-07, |
|
"logits/chosen": 0.3332589566707611, |
|
"logits/rejected": 0.5584608316421509, |
|
"logps/chosen": -211.74325561523438, |
|
"logps/rejected": -225.31689453125, |
|
"loss": 0.6705, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.010198825970292091, |
|
"rewards/margins": 0.04217001795768738, |
|
"rewards/margins_max": 0.0582113042473793, |
|
"rewards/margins_min": 0.026128727942705154, |
|
"rewards/margins_std": 0.022685810923576355, |
|
"rewards/rejected": -0.031971193850040436, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.435546875, |
|
"learning_rate": 4.589337675457273e-07, |
|
"logits/chosen": 0.10014849901199341, |
|
"logits/rejected": 0.564907431602478, |
|
"logps/chosen": -217.19985961914062, |
|
"logps/rejected": -214.29440307617188, |
|
"loss": 0.6713, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.018607165664434433, |
|
"rewards/margins": 0.05433148890733719, |
|
"rewards/margins_max": 0.07488565146923065, |
|
"rewards/margins_min": 0.033777330070734024, |
|
"rewards/margins_std": 0.02906796894967556, |
|
"rewards/rejected": -0.03572431951761246, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.4609375, |
|
"learning_rate": 4.5611039008544007e-07, |
|
"logits/chosen": 0.13153567910194397, |
|
"logits/rejected": 0.652635931968689, |
|
"logps/chosen": -261.8456726074219, |
|
"logps/rejected": -231.66531372070312, |
|
"loss": 0.671, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.013766567222774029, |
|
"rewards/margins": 0.04572372883558273, |
|
"rewards/margins_max": 0.06320376694202423, |
|
"rewards/margins_min": 0.028243690729141235, |
|
"rewards/margins_std": 0.024720508605241776, |
|
"rewards/rejected": -0.03195716068148613, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 0.419921875, |
|
"learning_rate": 4.532024533626457e-07, |
|
"logits/chosen": 0.0050893365405499935, |
|
"logits/rejected": 0.3075583577156067, |
|
"logps/chosen": -214.87033081054688, |
|
"logps/rejected": -231.591064453125, |
|
"loss": 0.6694, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.012458743527531624, |
|
"rewards/margins": 0.046287618577480316, |
|
"rewards/margins_max": 0.06574501842260361, |
|
"rewards/margins_min": 0.026830215007066727, |
|
"rewards/margins_std": 0.02751692570745945, |
|
"rewards/rejected": -0.03382887691259384, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 0.435546875, |
|
"learning_rate": 4.502111503933032e-07, |
|
"logits/chosen": 0.16573339700698853, |
|
"logits/rejected": 0.5059231519699097, |
|
"logps/chosen": -214.00900268554688, |
|
"logps/rejected": -226.75070190429688, |
|
"loss": 0.6705, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.011546745896339417, |
|
"rewards/margins": 0.03893359750509262, |
|
"rewards/margins_max": 0.0571872778236866, |
|
"rewards/margins_min": 0.020679913461208344, |
|
"rewards/margins_std": 0.0258146021515131, |
|
"rewards/rejected": -0.027386849746108055, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 0.42578125, |
|
"learning_rate": 4.471377083953753e-07, |
|
"logits/chosen": 0.19767063856124878, |
|
"logits/rejected": 0.6161295175552368, |
|
"logps/chosen": -211.5915985107422, |
|
"logps/rejected": -231.336669921875, |
|
"loss": 0.6672, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.021602794528007507, |
|
"rewards/margins": 0.05690021067857742, |
|
"rewards/margins_max": 0.08022460341453552, |
|
"rewards/margins_min": 0.03357581049203873, |
|
"rewards/margins_std": 0.032985687255859375, |
|
"rewards/rejected": -0.03529741242527962, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.4609375, |
|
"learning_rate": 4.4398338828534766e-07, |
|
"logits/chosen": 0.051334965974092484, |
|
"logits/rejected": 0.5114815831184387, |
|
"logps/chosen": -252.36349487304688, |
|
"logps/rejected": -253.6934051513672, |
|
"loss": 0.67, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.021400339901447296, |
|
"rewards/margins": 0.05237139016389847, |
|
"rewards/margins_max": 0.07569600641727448, |
|
"rewards/margins_min": 0.029046764597296715, |
|
"rewards/margins_std": 0.03298599272966385, |
|
"rewards/rejected": -0.030971046537160873, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.40234375, |
|
"learning_rate": 4.407494841609224e-07, |
|
"logits/chosen": 0.16097505390644073, |
|
"logits/rejected": 0.503351092338562, |
|
"logps/chosen": -187.7499542236328, |
|
"logps/rejected": -182.64669799804688, |
|
"loss": 0.6691, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.015485493466258049, |
|
"rewards/margins": 0.039487432688474655, |
|
"rewards/margins_max": 0.0597788468003273, |
|
"rewards/margins_min": 0.019196024164557457, |
|
"rewards/margins_std": 0.028696388006210327, |
|
"rewards/rejected": -0.024001937359571457, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 0.462890625, |
|
"learning_rate": 4.374373227700993e-07, |
|
"logits/chosen": 0.03560265153646469, |
|
"logits/rejected": 0.5799299478530884, |
|
"logps/chosen": -273.8843688964844, |
|
"logps/rejected": -234.033935546875, |
|
"loss": 0.6673, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.007162511348724365, |
|
"rewards/margins": 0.0483052022755146, |
|
"rewards/margins_max": 0.06804867088794708, |
|
"rewards/margins_min": 0.028561726212501526, |
|
"rewards/margins_std": 0.027921488508582115, |
|
"rewards/rejected": -0.04114269092679024, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 0.408203125, |
|
"learning_rate": 4.340482629668615e-07, |
|
"logits/chosen": 0.027306120842695236, |
|
"logits/rejected": 0.671806812286377, |
|
"logps/chosen": -259.85015869140625, |
|
"logps/rejected": -201.55807495117188, |
|
"loss": 0.6673, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.02854643389582634, |
|
"rewards/margins": 0.0538957342505455, |
|
"rewards/margins_max": 0.0864059180021286, |
|
"rewards/margins_min": 0.0213855542242527, |
|
"rewards/margins_std": 0.045976340770721436, |
|
"rewards/rejected": -0.025349300354719162, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.3515625, |
|
"learning_rate": 4.30583695153689e-07, |
|
"logits/chosen": 0.04380347207188606, |
|
"logits/rejected": 0.4509994089603424, |
|
"logps/chosen": -273.69775390625, |
|
"logps/rejected": -259.96966552734375, |
|
"loss": 0.6693, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.022089816629886627, |
|
"rewards/margins": 0.056071024388074875, |
|
"rewards/margins_max": 0.08100839704275131, |
|
"rewards/margins_min": 0.031133651733398438, |
|
"rewards/margins_std": 0.035266775637865067, |
|
"rewards/rejected": -0.033981211483478546, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.4140625, |
|
"learning_rate": 4.2704504071112986e-07, |
|
"logits/chosen": 0.10579466819763184, |
|
"logits/rejected": 0.5407041311264038, |
|
"logps/chosen": -240.98483276367188, |
|
"logps/rejected": -211.9040985107422, |
|
"loss": 0.6687, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.017832906916737556, |
|
"rewards/margins": 0.05916459485888481, |
|
"rewards/margins_max": 0.08200596272945404, |
|
"rewards/margins_min": 0.036323241889476776, |
|
"rewards/margins_std": 0.03230256214737892, |
|
"rewards/rejected": -0.041331697255373, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 0.376953125, |
|
"learning_rate": 4.234337514146612e-07, |
|
"logits/chosen": 0.11410923302173615, |
|
"logits/rejected": 0.6912606954574585, |
|
"logps/chosen": -251.16793823242188, |
|
"logps/rejected": -229.26553344726562, |
|
"loss": 0.6663, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.019808156415820122, |
|
"rewards/margins": 0.05665863677859306, |
|
"rewards/margins_max": 0.08191566169261932, |
|
"rewards/margins_min": 0.0314016118645668, |
|
"rewards/margins_std": 0.03571882098913193, |
|
"rewards/rejected": -0.036850474774837494, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 0.357421875, |
|
"learning_rate": 4.197513088390813e-07, |
|
"logits/chosen": -0.013543277978897095, |
|
"logits/rejected": 0.37492939829826355, |
|
"logps/chosen": -232.13333129882812, |
|
"logps/rejected": -223.6721954345703, |
|
"loss": 0.6657, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.014923980459570885, |
|
"rewards/margins": 0.05013802647590637, |
|
"rewards/margins_max": 0.07493571937084198, |
|
"rewards/margins_min": 0.025340333580970764, |
|
"rewards/margins_std": 0.03506923094391823, |
|
"rewards/rejected": -0.03521404415369034, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 0.51171875, |
|
"learning_rate": 4.1599922375067554e-07, |
|
"logits/chosen": -0.03167729452252388, |
|
"logits/rejected": 0.535004734992981, |
|
"logps/chosen": -325.4375915527344, |
|
"logps/rejected": -253.494873046875, |
|
"loss": 0.6668, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.01660420373082161, |
|
"rewards/margins": 0.059089016169309616, |
|
"rewards/margins_max": 0.08827444911003113, |
|
"rewards/margins_min": 0.029903585091233253, |
|
"rewards/margins_std": 0.041274432092905045, |
|
"rewards/rejected": -0.04248481243848801, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 0.380859375, |
|
"learning_rate": 4.121790354874065e-07, |
|
"logits/chosen": 0.05303360894322395, |
|
"logits/rejected": 0.40770038962364197, |
|
"logps/chosen": -202.06549072265625, |
|
"logps/rejected": -214.628173828125, |
|
"loss": 0.6649, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.005082354880869389, |
|
"rewards/margins": 0.05396551638841629, |
|
"rewards/margins_max": 0.07737747579813004, |
|
"rewards/margins_min": 0.03055354580283165, |
|
"rewards/margins_std": 0.03310951590538025, |
|
"rewards/rejected": -0.04888315126299858, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 0.369140625, |
|
"learning_rate": 4.082923113273822e-07, |
|
"logits/chosen": 0.11870566755533218, |
|
"logits/rejected": 0.464911550283432, |
|
"logps/chosen": -231.35336303710938, |
|
"logps/rejected": -234.9374237060547, |
|
"loss": 0.6666, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.01106190960854292, |
|
"rewards/margins": 0.0625653862953186, |
|
"rewards/margins_max": 0.08917935192584991, |
|
"rewards/margins_min": 0.03595142811536789, |
|
"rewards/margins_std": 0.037637822329998016, |
|
"rewards/rejected": -0.05150347948074341, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 0.443359375, |
|
"learning_rate": 4.043406458458609e-07, |
|
"logits/chosen": 0.09034819900989532, |
|
"logits/rejected": 0.5873952507972717, |
|
"logps/chosen": -265.25396728515625, |
|
"logps/rejected": -214.2862548828125, |
|
"loss": 0.6628, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.0020419310312718153, |
|
"rewards/margins": 0.06574475765228271, |
|
"rewards/margins_max": 0.08710642158985138, |
|
"rewards/margins_min": 0.04438310116529465, |
|
"rewards/margins_std": 0.030209947377443314, |
|
"rewards/rejected": -0.06370283663272858, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 0.4921875, |
|
"learning_rate": 4.0032566026105806e-07, |
|
"logits/chosen": 0.008516276255249977, |
|
"logits/rejected": 0.6535265445709229, |
|
"logps/chosen": -260.87298583984375, |
|
"logps/rejected": -267.5401916503906, |
|
"loss": 0.663, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.03661227226257324, |
|
"rewards/margins": 0.07144369184970856, |
|
"rewards/margins_max": 0.09834811091423035, |
|
"rewards/margins_min": 0.044539276510477066, |
|
"rewards/margins_std": 0.03804859146475792, |
|
"rewards/rejected": -0.03483142331242561, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 0.474609375, |
|
"learning_rate": 3.9624900176902184e-07, |
|
"logits/chosen": 0.013054514303803444, |
|
"logits/rejected": 0.3652392029762268, |
|
"logps/chosen": -235.1199493408203, |
|
"logps/rejected": -248.31411743164062, |
|
"loss": 0.6656, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.014549237675964832, |
|
"rewards/margins": 0.05561714246869087, |
|
"rewards/margins_max": 0.08446307480335236, |
|
"rewards/margins_min": 0.026771211996674538, |
|
"rewards/margins_std": 0.040794309228658676, |
|
"rewards/rejected": -0.041067905724048615, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 0.41015625, |
|
"learning_rate": 3.921123428678511e-07, |
|
"logits/chosen": 0.022506317123770714, |
|
"logits/rejected": 0.6284270882606506, |
|
"logps/chosen": -305.97674560546875, |
|
"logps/rejected": -239.0786590576172, |
|
"loss": 0.666, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.020474497228860855, |
|
"rewards/margins": 0.06788565218448639, |
|
"rewards/margins_max": 0.09115969389677048, |
|
"rewards/margins_min": 0.044611603021621704, |
|
"rewards/margins_std": 0.03291446715593338, |
|
"rewards/rejected": -0.047411151230335236, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 0.478515625, |
|
"learning_rate": 3.8791738067153314e-07, |
|
"logits/chosen": 0.07077694684267044, |
|
"logits/rejected": 0.5682755708694458, |
|
"logps/chosen": -231.22695922851562, |
|
"logps/rejected": -227.6490478515625, |
|
"loss": 0.6622, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.03146480768918991, |
|
"rewards/margins": 0.06544210761785507, |
|
"rewards/margins_max": 0.0967545360326767, |
|
"rewards/margins_min": 0.034129686653614044, |
|
"rewards/margins_std": 0.044282447546720505, |
|
"rewards/rejected": -0.03397729992866516, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 0.41796875, |
|
"learning_rate": 3.83665836213682e-07, |
|
"logits/chosen": 0.12142015993595123, |
|
"logits/rejected": 0.5390751957893372, |
|
"logps/chosen": -207.6114501953125, |
|
"logps/rejected": -215.29849243164062, |
|
"loss": 0.6636, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.011886438354849815, |
|
"rewards/margins": 0.05365458130836487, |
|
"rewards/margins_max": 0.07296213507652283, |
|
"rewards/margins_min": 0.03434702754020691, |
|
"rewards/margins_std": 0.027305006980895996, |
|
"rewards/rejected": -0.0417681448161602, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 0.46875, |
|
"learning_rate": 3.7935945374146417e-07, |
|
"logits/chosen": 0.007061509881168604, |
|
"logits/rejected": 0.3642507493495941, |
|
"logps/chosen": -236.29788208007812, |
|
"logps/rejected": -242.33544921875, |
|
"loss": 0.6631, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.02563950978219509, |
|
"rewards/margins": 0.05955478549003601, |
|
"rewards/margins_max": 0.08539506047964096, |
|
"rewards/margins_min": 0.03371449559926987, |
|
"rewards/margins_std": 0.036543674767017365, |
|
"rewards/rejected": -0.03391526639461517, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.5234375, |
|
"learning_rate": 3.75e-07, |
|
"logits/chosen": 0.08328167349100113, |
|
"logits/rejected": 0.5527598857879639, |
|
"logps/chosen": -239.66159057617188, |
|
"logps/rejected": -235.6712188720703, |
|
"loss": 0.6622, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.023291967809200287, |
|
"rewards/margins": 0.07459411025047302, |
|
"rewards/margins_max": 0.1087113469839096, |
|
"rewards/margins_min": 0.04047687351703644, |
|
"rewards/margins_std": 0.04824905842542648, |
|
"rewards/rejected": -0.051302142441272736, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.40625, |
|
"learning_rate": 3.7058926350753517e-07, |
|
"logits/chosen": 0.04602205008268356, |
|
"logits/rejected": 0.6276509165763855, |
|
"logps/chosen": -247.14205932617188, |
|
"logps/rejected": -208.6519775390625, |
|
"loss": 0.6614, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.022474488243460655, |
|
"rewards/margins": 0.07001164555549622, |
|
"rewards/margins_max": 0.09704446792602539, |
|
"rewards/margins_min": 0.04297882691025734, |
|
"rewards/margins_std": 0.038230184465646744, |
|
"rewards/rejected": -0.04753715917468071, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.4453125, |
|
"learning_rate": 3.661290538216798e-07, |
|
"logits/chosen": 0.291398823261261, |
|
"logits/rejected": 0.6808168292045593, |
|
"logps/chosen": -224.65090942382812, |
|
"logps/rejected": -205.6571807861328, |
|
"loss": 0.6632, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.0020084187854081392, |
|
"rewards/margins": 0.05480460077524185, |
|
"rewards/margins_max": 0.0770978108048439, |
|
"rewards/margins_min": 0.0325113907456398, |
|
"rewards/margins_std": 0.031527359038591385, |
|
"rewards/rejected": -0.05279617756605148, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 0.4375, |
|
"learning_rate": 3.616212007970159e-07, |
|
"logits/chosen": 0.05395558476448059, |
|
"logits/rejected": 0.29135066270828247, |
|
"logps/chosen": -189.52139282226562, |
|
"logps/rejected": -215.48080444335938, |
|
"loss": 0.6633, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.008078034035861492, |
|
"rewards/margins": 0.05178927257657051, |
|
"rewards/margins_max": 0.0689278393983841, |
|
"rewards/margins_min": 0.034650713205337524, |
|
"rewards/margins_std": 0.024237588047981262, |
|
"rewards/rejected": -0.043711237609386444, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 0.4609375, |
|
"learning_rate": 3.5706755383437703e-07, |
|
"logits/chosen": 0.09721295535564423, |
|
"logits/rejected": 0.5186147689819336, |
|
"logps/chosen": -302.69482421875, |
|
"logps/rejected": -258.5033874511719, |
|
"loss": 0.6646, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.020449183881282806, |
|
"rewards/margins": 0.052381712943315506, |
|
"rewards/margins_max": 0.07583948969841003, |
|
"rewards/margins_min": 0.02892393246293068, |
|
"rewards/margins_std": 0.0331743024289608, |
|
"rewards/rejected": -0.0319325253367424, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 0.443359375, |
|
"learning_rate": 3.5246998112210993e-07, |
|
"logits/chosen": 0.13969309628009796, |
|
"logits/rejected": 0.6499422192573547, |
|
"logps/chosen": -262.07000732421875, |
|
"logps/rejected": -253.33364868164062, |
|
"loss": 0.6583, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.020577292889356613, |
|
"rewards/margins": 0.08194496482610703, |
|
"rewards/margins_max": 0.10924677550792694, |
|
"rewards/margins_min": 0.05464313551783562, |
|
"rewards/margins_std": 0.038610607385635376, |
|
"rewards/rejected": -0.061367668211460114, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 0.39453125, |
|
"learning_rate": 3.4783036886962736e-07, |
|
"logits/chosen": 0.15751202404499054, |
|
"logits/rejected": 0.583830714225769, |
|
"logps/chosen": -232.4749298095703, |
|
"logps/rejected": -251.43881225585938, |
|
"loss": 0.6642, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.013448268175125122, |
|
"rewards/margins": 0.06021388620138168, |
|
"rewards/margins_max": 0.08211688697338104, |
|
"rewards/margins_min": 0.03831087797880173, |
|
"rewards/margins_std": 0.030975526198744774, |
|
"rewards/rejected": -0.04676561802625656, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 0.451171875, |
|
"learning_rate": 3.4315062053356847e-07, |
|
"logits/chosen": -0.02616945281624794, |
|
"logits/rejected": 0.5470731854438782, |
|
"logps/chosen": -247.7039031982422, |
|
"logps/rejected": -204.8767547607422, |
|
"loss": 0.6635, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.02075277827680111, |
|
"rewards/margins": 0.06478811800479889, |
|
"rewards/margins_max": 0.09738490730524063, |
|
"rewards/margins_min": 0.03219131752848625, |
|
"rewards/margins_std": 0.04609883576631546, |
|
"rewards/rejected": -0.04403533786535263, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 0.515625, |
|
"learning_rate": 3.384326560368826e-07, |
|
"logits/chosen": 0.040539853274822235, |
|
"logits/rejected": 0.5014762878417969, |
|
"logps/chosen": -249.2455596923828, |
|
"logps/rejected": -242.47781372070312, |
|
"loss": 0.662, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.02245604246854782, |
|
"rewards/margins": 0.05939044803380966, |
|
"rewards/margins_max": 0.08405659347772598, |
|
"rewards/margins_min": 0.03472430631518364, |
|
"rewards/margins_std": 0.03488319739699364, |
|
"rewards/rejected": -0.03693440556526184, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 0.5, |
|
"learning_rate": 3.3367841098115777e-07, |
|
"logits/chosen": 0.05805939435958862, |
|
"logits/rejected": 0.47922706604003906, |
|
"logps/chosen": -286.8292541503906, |
|
"logps/rejected": -230.5067138671875, |
|
"loss": 0.6653, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.004244116134941578, |
|
"rewards/margins": 0.0571456179022789, |
|
"rewards/margins_max": 0.08360336720943451, |
|
"rewards/margins_min": 0.030687877908349037, |
|
"rewards/margins_std": 0.03741690143942833, |
|
"rewards/rejected": -0.052901506423950195, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 0.40234375, |
|
"learning_rate": 3.2888983585251713e-07, |
|
"logits/chosen": 0.11492130905389786, |
|
"logits/rejected": 0.3956727087497711, |
|
"logps/chosen": -204.6266632080078, |
|
"logps/rejected": -208.7443084716797, |
|
"loss": 0.6606, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.011013984680175781, |
|
"rewards/margins": 0.057107020169496536, |
|
"rewards/margins_max": 0.07711775600910187, |
|
"rewards/margins_min": 0.037096280604600906, |
|
"rewards/margins_std": 0.02829946205019951, |
|
"rewards/rejected": -0.046093035489320755, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 0.466796875, |
|
"learning_rate": 3.240688952214085e-07, |
|
"logits/chosen": -0.019520867615938187, |
|
"logits/rejected": 0.34635210037231445, |
|
"logps/chosen": -278.4693298339844, |
|
"logps/rejected": -257.54986572265625, |
|
"loss": 0.6607, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.020895112305879593, |
|
"rewards/margins": 0.08000204712152481, |
|
"rewards/margins_max": 0.1040647029876709, |
|
"rewards/margins_min": 0.05593939870595932, |
|
"rewards/margins_std": 0.034029725939035416, |
|
"rewards/rejected": -0.05910693481564522, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 0.365234375, |
|
"learning_rate": 3.192175669366156e-07, |
|
"logits/chosen": 0.08061734586954117, |
|
"logits/rejected": 0.440199077129364, |
|
"logps/chosen": -216.41323852539062, |
|
"logps/rejected": -240.26333618164062, |
|
"loss": 0.6611, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.011639273725450039, |
|
"rewards/margins": 0.061767347157001495, |
|
"rewards/margins_max": 0.09113974124193192, |
|
"rewards/margins_min": 0.03239493444561958, |
|
"rewards/margins_std": 0.04153885692358017, |
|
"rewards/rejected": -0.050128065049648285, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 0.435546875, |
|
"learning_rate": 3.14337841313822e-07, |
|
"logits/chosen": 0.2162504643201828, |
|
"logits/rejected": 0.6251672506332397, |
|
"logps/chosen": -249.9015655517578, |
|
"logps/rejected": -198.54403686523438, |
|
"loss": 0.6629, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.008589675650000572, |
|
"rewards/margins": 0.05789928883314133, |
|
"rewards/margins_max": 0.07874341309070587, |
|
"rewards/margins_min": 0.03705517202615738, |
|
"rewards/margins_std": 0.029478034004569054, |
|
"rewards/rejected": -0.0493096187710762, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 0.443359375, |
|
"learning_rate": 3.094317203190603e-07, |
|
"logits/chosen": -0.0029448375571519136, |
|
"logits/rejected": 0.4555005431175232, |
|
"logps/chosen": -240.8060760498047, |
|
"logps/rejected": -222.56246948242188, |
|
"loss": 0.6561, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.022363275289535522, |
|
"rewards/margins": 0.08168495446443558, |
|
"rewards/margins_max": 0.11077789962291718, |
|
"rewards/margins_min": 0.052591998130083084, |
|
"rewards/margins_std": 0.04114364832639694, |
|
"rewards/rejected": -0.059321679174900055, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 0.38671875, |
|
"learning_rate": 3.045012167473814e-07, |
|
"logits/chosen": 0.1808149516582489, |
|
"logits/rejected": 0.5233570337295532, |
|
"logps/chosen": -263.43255615234375, |
|
"logps/rejected": -270.8913269042969, |
|
"loss": 0.6616, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.02442259155213833, |
|
"rewards/margins": 0.0733276903629303, |
|
"rewards/margins_max": 0.104800745844841, |
|
"rewards/margins_min": 0.041854631155729294, |
|
"rewards/margins_std": 0.04450962692499161, |
|
"rewards/rejected": -0.04890510439872742, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 0.4140625, |
|
"learning_rate": 2.995483533970809e-07, |
|
"logits/chosen": 0.2622363269329071, |
|
"logits/rejected": 0.7754552960395813, |
|
"logps/chosen": -228.362060546875, |
|
"logps/rejected": -187.44383239746094, |
|
"loss": 0.6618, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.011710538528859615, |
|
"rewards/margins": 0.06277038902044296, |
|
"rewards/margins_max": 0.08341649174690247, |
|
"rewards/margins_min": 0.04212428256869316, |
|
"rewards/margins_std": 0.029198000207543373, |
|
"rewards/rejected": -0.05105985328555107, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 0.453125, |
|
"learning_rate": 2.9457516223982235e-07, |
|
"logits/chosen": 0.11260411888360977, |
|
"logits/rejected": 0.47127556800842285, |
|
"logps/chosen": -251.4638214111328, |
|
"logps/rejected": -251.6316680908203, |
|
"loss": 0.6609, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.009782608598470688, |
|
"rewards/margins": 0.07295442372560501, |
|
"rewards/margins_max": 0.10423107445240021, |
|
"rewards/margins_min": 0.04167778044939041, |
|
"rewards/margins_std": 0.044231854379177094, |
|
"rewards/rejected": -0.06317181885242462, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 0.44921875, |
|
"learning_rate": 2.895836835869962e-07, |
|
"logits/chosen": 0.03560788184404373, |
|
"logits/rejected": 0.4069921374320984, |
|
"logps/chosen": -228.38876342773438, |
|
"logps/rejected": -221.29638671875, |
|
"loss": 0.662, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.009866083040833473, |
|
"rewards/margins": 0.06033489108085632, |
|
"rewards/margins_max": 0.09506522119045258, |
|
"rewards/margins_min": 0.02560456469655037, |
|
"rewards/margins_std": 0.0491160973906517, |
|
"rewards/rejected": -0.050468809902668, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 0.48046875, |
|
"learning_rate": 2.845759652526574e-07, |
|
"logits/chosen": 0.07124204933643341, |
|
"logits/rejected": 0.5192992687225342, |
|
"logps/chosen": -234.10836791992188, |
|
"logps/rejected": -189.55230712890625, |
|
"loss": 0.66, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.01570773683488369, |
|
"rewards/margins": 0.05234966799616814, |
|
"rewards/margins_max": 0.07433562725782394, |
|
"rewards/margins_min": 0.030363699421286583, |
|
"rewards/margins_std": 0.031092852354049683, |
|
"rewards/rejected": -0.036641925573349, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 0.427734375, |
|
"learning_rate": 2.795540617133853e-07, |
|
"logits/chosen": 0.24306873977184296, |
|
"logits/rejected": 0.4881308674812317, |
|
"logps/chosen": -233.5541534423828, |
|
"logps/rejected": -271.29119873046875, |
|
"loss": 0.6601, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.0062574222683906555, |
|
"rewards/margins": 0.06694331020116806, |
|
"rewards/margins_max": 0.0913429707288742, |
|
"rewards/margins_min": 0.04254365712404251, |
|
"rewards/margins_std": 0.03450632840394974, |
|
"rewards/rejected": -0.060685895383358, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 0.40234375, |
|
"learning_rate": 2.7452003326540995e-07, |
|
"logits/chosen": 0.1885126382112503, |
|
"logits/rejected": 0.6096329689025879, |
|
"logps/chosen": -223.55380249023438, |
|
"logps/rejected": -210.834716796875, |
|
"loss": 0.6613, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.01565275713801384, |
|
"rewards/margins": 0.0681251734495163, |
|
"rewards/margins_max": 0.0929432287812233, |
|
"rewards/margins_min": 0.043307114392519, |
|
"rewards/margins_std": 0.035098038613796234, |
|
"rewards/rejected": -0.05247241258621216, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 0.369140625, |
|
"learning_rate": 2.694759451793508e-07, |
|
"logits/chosen": 0.3056187033653259, |
|
"logits/rejected": 0.5238193273544312, |
|
"logps/chosen": -180.62220764160156, |
|
"logps/rejected": -202.76705932617188, |
|
"loss": 0.6628, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.005610722117125988, |
|
"rewards/margins": 0.053133320063352585, |
|
"rewards/margins_max": 0.0700041875243187, |
|
"rewards/margins_min": 0.03626246377825737, |
|
"rewards/margins_std": 0.023858997970819473, |
|
"rewards/rejected": -0.04752260446548462, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 0.48828125, |
|
"learning_rate": 2.644238668529146e-07, |
|
"logits/chosen": 0.21234102547168732, |
|
"logits/rejected": 0.48591142892837524, |
|
"logps/chosen": -223.54971313476562, |
|
"logps/rejected": -248.9346466064453, |
|
"loss": 0.6607, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.017756493762135506, |
|
"rewards/margins": 0.07771660387516022, |
|
"rewards/margins_max": 0.11433382332324982, |
|
"rewards/margins_min": 0.04109939560294151, |
|
"rewards/margins_std": 0.05178455635905266, |
|
"rewards/rejected": -0.05996011570096016, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 0.396484375, |
|
"learning_rate": 2.593658709619001e-07, |
|
"logits/chosen": 0.11299429088830948, |
|
"logits/rejected": 0.5906545519828796, |
|
"logps/chosen": -222.49609375, |
|
"logps/rejected": -204.37290954589844, |
|
"loss": 0.6601, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.02080368809401989, |
|
"rewards/margins": 0.07051359862089157, |
|
"rewards/margins_max": 0.10480418056249619, |
|
"rewards/margins_min": 0.03622300922870636, |
|
"rewards/margins_std": 0.048494212329387665, |
|
"rewards/rejected": -0.04970990866422653, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 0.423828125, |
|
"learning_rate": 2.5430403260985807e-07, |
|
"logits/chosen": 0.11868913471698761, |
|
"logits/rejected": 0.5508742332458496, |
|
"logps/chosen": -212.3166961669922, |
|
"logps/rejected": -219.1356658935547, |
|
"loss": 0.6583, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.021529385820031166, |
|
"rewards/margins": 0.06332559883594513, |
|
"rewards/margins_max": 0.0937047004699707, |
|
"rewards/margins_min": 0.03294649347662926, |
|
"rewards/margins_std": 0.042962536215782166, |
|
"rewards/rejected": -0.04179621487855911, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 0.470703125, |
|
"learning_rate": 2.4924042847675503e-07, |
|
"logits/chosen": 0.06126406043767929, |
|
"logits/rejected": 0.5420705080032349, |
|
"logps/chosen": -294.85845947265625, |
|
"logps/rejected": -215.2727813720703, |
|
"loss": 0.661, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.007373870350420475, |
|
"rewards/margins": 0.05419896915555, |
|
"rewards/margins_max": 0.08067617565393448, |
|
"rewards/margins_min": 0.02772175334393978, |
|
"rewards/margins_std": 0.03744443506002426, |
|
"rewards/rejected": -0.0468250997364521, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 0.47265625, |
|
"learning_rate": 2.441771359669902e-07, |
|
"logits/chosen": 0.13893456757068634, |
|
"logits/rejected": 0.4921324849128723, |
|
"logps/chosen": -235.5193634033203, |
|
"logps/rejected": -225.794189453125, |
|
"loss": 0.6607, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.012106789276003838, |
|
"rewards/margins": 0.06842382997274399, |
|
"rewards/margins_max": 0.100715771317482, |
|
"rewards/margins_min": 0.03613189607858658, |
|
"rewards/margins_std": 0.045667704194784164, |
|
"rewards/rejected": -0.056317038834095, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 0.443359375, |
|
"learning_rate": 2.391162323571161e-07, |
|
"logits/chosen": 0.07089251279830933, |
|
"logits/rejected": 0.48170119524002075, |
|
"logps/chosen": -230.9342498779297, |
|
"logps/rejected": -226.3340301513672, |
|
"loss": 0.6617, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.010878843255341053, |
|
"rewards/margins": 0.06217268109321594, |
|
"rewards/margins_max": 0.08883620798587799, |
|
"rewards/margins_min": 0.03550915792584419, |
|
"rewards/margins_std": 0.037707917392253876, |
|
"rewards/rejected": -0.051293838769197464, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 0.42578125, |
|
"learning_rate": 2.340597939436097e-07, |
|
"logits/chosen": 0.03681742399930954, |
|
"logits/rejected": 0.5955736041069031, |
|
"logps/chosen": -234.0045166015625, |
|
"logps/rejected": -216.2124786376953, |
|
"loss": 0.6614, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.0253006462007761, |
|
"rewards/margins": 0.06550078094005585, |
|
"rewards/margins_max": 0.0953935831785202, |
|
"rewards/margins_min": 0.035607993602752686, |
|
"rewards/margins_std": 0.04227479174733162, |
|
"rewards/rejected": -0.0402001328766346, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 0.42578125, |
|
"learning_rate": 2.2900989519104796e-07, |
|
"logits/chosen": 0.1664225161075592, |
|
"logits/rejected": 0.4196982979774475, |
|
"logps/chosen": -182.28829956054688, |
|
"logps/rejected": -211.08865356445312, |
|
"loss": 0.6625, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.0058049350045621395, |
|
"rewards/margins": 0.06564933061599731, |
|
"rewards/margins_max": 0.09529349207878113, |
|
"rewards/margins_min": 0.036005161702632904, |
|
"rewards/margins_std": 0.04192318022251129, |
|
"rewards/rejected": -0.05984439328312874, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 0.4375, |
|
"learning_rate": 2.2396860788103353e-07, |
|
"logits/chosen": -0.04069889336824417, |
|
"logits/rejected": 0.4455093741416931, |
|
"logps/chosen": -208.73477172851562, |
|
"logps/rejected": -199.85501098632812, |
|
"loss": 0.6608, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.015201890841126442, |
|
"rewards/margins": 0.08097913861274719, |
|
"rewards/margins_max": 0.11325138807296753, |
|
"rewards/margins_min": 0.04870688170194626, |
|
"rewards/margins_std": 0.04563985764980316, |
|
"rewards/rejected": -0.0657772421836853, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 0.451171875, |
|
"learning_rate": 2.1893800026222083e-07, |
|
"logits/chosen": 0.24370861053466797, |
|
"logits/rejected": 0.655241847038269, |
|
"logps/chosen": -239.9451446533203, |
|
"logps/rejected": -255.0171356201172, |
|
"loss": 0.6612, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.01818387396633625, |
|
"rewards/margins": 0.06645138561725616, |
|
"rewards/margins_max": 0.0944729745388031, |
|
"rewards/margins_min": 0.03842979669570923, |
|
"rewards/margins_std": 0.039628516882658005, |
|
"rewards/rejected": -0.048267509788274765, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 0.376953125, |
|
"learning_rate": 2.1392013620179336e-07, |
|
"logits/chosen": -0.15726599097251892, |
|
"logits/rejected": 0.27727076411247253, |
|
"logps/chosen": -208.62881469726562, |
|
"logps/rejected": -205.62429809570312, |
|
"loss": 0.6593, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.012712801806628704, |
|
"rewards/margins": 0.07130307704210281, |
|
"rewards/margins_max": 0.09740529954433441, |
|
"rewards/margins_min": 0.04520086199045181, |
|
"rewards/margins_std": 0.03691411018371582, |
|
"rewards/rejected": -0.05859028175473213, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 0.373046875, |
|
"learning_rate": 2.0891707433873623e-07, |
|
"logits/chosen": 0.2577076256275177, |
|
"logits/rejected": 0.5587279796600342, |
|
"logps/chosen": -232.6507568359375, |
|
"logps/rejected": -236.791015625, |
|
"loss": 0.6608, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.007417677901685238, |
|
"rewards/margins": 0.06323407590389252, |
|
"rewards/margins_max": 0.09169165790081024, |
|
"rewards/margins_min": 0.03477650135755539, |
|
"rewards/margins_std": 0.040245089679956436, |
|
"rewards/rejected": -0.055816400796175, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 0.4609375, |
|
"learning_rate": 2.039308672392556e-07, |
|
"logits/chosen": 0.09692186862230301, |
|
"logits/rejected": 0.5365327000617981, |
|
"logps/chosen": -220.7172393798828, |
|
"logps/rejected": -204.85055541992188, |
|
"loss": 0.6567, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.016125962138175964, |
|
"rewards/margins": 0.06824339926242828, |
|
"rewards/margins_max": 0.10508973896503448, |
|
"rewards/margins_min": 0.03139704838395119, |
|
"rewards/margins_std": 0.052108604460954666, |
|
"rewards/rejected": -0.05211742967367172, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 0.36328125, |
|
"learning_rate": 1.9896356055468845e-07, |
|
"logits/chosen": 0.24312233924865723, |
|
"logits/rejected": 0.5007752180099487, |
|
"logps/chosen": -217.9171600341797, |
|
"logps/rejected": -255.72866821289062, |
|
"loss": 0.6605, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.015429767780005932, |
|
"rewards/margins": 0.06471355259418488, |
|
"rewards/margins_max": 0.09141434729099274, |
|
"rewards/margins_min": 0.03801275044679642, |
|
"rewards/margins_std": 0.03776064142584801, |
|
"rewards/rejected": -0.04928378015756607, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 0.359375, |
|
"learning_rate": 1.940171921822496e-07, |
|
"logits/chosen": 0.007707296404987574, |
|
"logits/rejected": 0.3314017653465271, |
|
"logps/chosen": -218.86654663085938, |
|
"logps/rejected": -214.7074737548828, |
|
"loss": 0.6625, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.010595353320240974, |
|
"rewards/margins": 0.05604109913110733, |
|
"rewards/margins_max": 0.08353577554225922, |
|
"rewards/margins_min": 0.028546428307890892, |
|
"rewards/margins_std": 0.03888333961367607, |
|
"rewards/rejected": -0.045445747673511505, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 0.421875, |
|
"learning_rate": 1.8909379142895977e-07, |
|
"logits/chosen": 0.08975931257009506, |
|
"logits/rejected": 0.49662691354751587, |
|
"logps/chosen": -243.73941040039062, |
|
"logps/rejected": -218.0565643310547, |
|
"loss": 0.6628, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.017341626808047295, |
|
"rewards/margins": 0.06548301875591278, |
|
"rewards/margins_max": 0.10044316947460175, |
|
"rewards/margins_min": 0.030522847548127174, |
|
"rewards/margins_std": 0.0494411401450634, |
|
"rewards/rejected": -0.04814138263463974, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 0.419921875, |
|
"learning_rate": 1.841953781790983e-07, |
|
"logits/chosen": 0.14877240359783173, |
|
"logits/rejected": 0.32807669043540955, |
|
"logps/chosen": -201.35398864746094, |
|
"logps/rejected": -237.98403930664062, |
|
"loss": 0.6614, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.011331291869282722, |
|
"rewards/margins": 0.05169866234064102, |
|
"rewards/margins_max": 0.08101126551628113, |
|
"rewards/margins_min": 0.02238604798913002, |
|
"rewards/margins_std": 0.041454292833805084, |
|
"rewards/rejected": -0.04036737233400345, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 0.5234375, |
|
"learning_rate": 1.793239620655211e-07, |
|
"logits/chosen": 0.10640072822570801, |
|
"logits/rejected": 0.5526248812675476, |
|
"logps/chosen": -198.35403442382812, |
|
"logps/rejected": -196.8388671875, |
|
"loss": 0.6604, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.0263301283121109, |
|
"rewards/margins": 0.07441949844360352, |
|
"rewards/margins_max": 0.1034015566110611, |
|
"rewards/margins_min": 0.045437444001436234, |
|
"rewards/margins_std": 0.040986817330121994, |
|
"rewards/rejected": -0.04808937385678291, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 0.390625, |
|
"learning_rate": 1.744815416451847e-07, |
|
"logits/chosen": 0.1694943606853485, |
|
"logits/rejected": 0.6004883050918579, |
|
"logps/chosen": -255.3223114013672, |
|
"logps/rejected": -243.01541137695312, |
|
"loss": 0.6625, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.01719365268945694, |
|
"rewards/margins": 0.06180461123585701, |
|
"rewards/margins_max": 0.08655586838722229, |
|
"rewards/margins_min": 0.03705335780978203, |
|
"rewards/margins_std": 0.03500355780124664, |
|
"rewards/rejected": -0.04461096227169037, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 0.4453125, |
|
"learning_rate": 1.6967010357921446e-07, |
|
"logits/chosen": 0.11355743557214737, |
|
"logits/rejected": 0.4874862730503082, |
|
"logps/chosen": -210.58767700195312, |
|
"logps/rejected": -219.46701049804688, |
|
"loss": 0.6618, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.005143271759152412, |
|
"rewards/margins": 0.061519283801317215, |
|
"rewards/margins_max": 0.0864943265914917, |
|
"rewards/margins_min": 0.036544252187013626, |
|
"rewards/margins_std": 0.035320036113262177, |
|
"rewards/rejected": -0.05637601017951965, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 0.439453125, |
|
"learning_rate": 1.6489162181785255e-07, |
|
"logits/chosen": 0.15795719623565674, |
|
"logits/rejected": 0.5425394773483276, |
|
"logps/chosen": -245.29562377929688, |
|
"logps/rejected": -233.9000244140625, |
|
"loss": 0.6602, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.021811651065945625, |
|
"rewards/margins": 0.07487231492996216, |
|
"rewards/margins_max": 0.09871380031108856, |
|
"rewards/margins_min": 0.051030855625867844, |
|
"rewards/margins_std": 0.03371693566441536, |
|
"rewards/rejected": -0.05306067317724228, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 0.361328125, |
|
"learning_rate": 1.6014805679062183e-07, |
|
"logits/chosen": -0.04248831048607826, |
|
"logits/rejected": 0.36503881216049194, |
|
"logps/chosen": -204.58383178710938, |
|
"logps/rejected": -203.0003204345703, |
|
"loss": 0.6607, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.020199652761220932, |
|
"rewards/margins": 0.08475508540868759, |
|
"rewards/margins_max": 0.11757893860340118, |
|
"rewards/margins_min": 0.051931243389844894, |
|
"rewards/margins_std": 0.046419933438301086, |
|
"rewards/rejected": -0.06455543637275696, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 0.482421875, |
|
"learning_rate": 1.5544135460203527e-07, |
|
"logits/chosen": 0.250204861164093, |
|
"logits/rejected": 0.5448838472366333, |
|
"logps/chosen": -212.43508911132812, |
|
"logps/rejected": -247.50747680664062, |
|
"loss": 0.6601, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.013406927697360516, |
|
"rewards/margins": 0.07055126130580902, |
|
"rewards/margins_max": 0.09891954064369202, |
|
"rewards/margins_min": 0.04218297451734543, |
|
"rewards/margins_std": 0.04011881351470947, |
|
"rewards/rejected": -0.05714433267712593, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 0.408203125, |
|
"learning_rate": 1.5077344623318388e-07, |
|
"logits/chosen": 0.08146306127309799, |
|
"logits/rejected": 0.5028539896011353, |
|
"logps/chosen": -244.5470733642578, |
|
"logps/rejected": -203.9750213623047, |
|
"loss": 0.6622, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.00543981185182929, |
|
"rewards/margins": 0.0606420524418354, |
|
"rewards/margins_max": 0.09149619191884995, |
|
"rewards/margins_min": 0.029787922278046608, |
|
"rewards/margins_std": 0.043634332716464996, |
|
"rewards/rejected": -0.05520225316286087, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 0.4921875, |
|
"learning_rate": 1.461462467495284e-07, |
|
"logits/chosen": 0.09238779544830322, |
|
"logits/rejected": 0.5282326340675354, |
|
"logps/chosen": -239.08853149414062, |
|
"logps/rejected": -234.31228637695312, |
|
"loss": 0.6582, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.020727628841996193, |
|
"rewards/margins": 0.07139938324689865, |
|
"rewards/margins_max": 0.0972491055727005, |
|
"rewards/margins_min": 0.045549679547548294, |
|
"rewards/margins_std": 0.036557018756866455, |
|
"rewards/rejected": -0.0506717674434185, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 0.400390625, |
|
"learning_rate": 1.4156165451522028e-07, |
|
"logits/chosen": 0.08472833782434464, |
|
"logits/rejected": 0.5027869939804077, |
|
"logps/chosen": -205.4404754638672, |
|
"logps/rejected": -202.98440551757812, |
|
"loss": 0.663, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.011948509141802788, |
|
"rewards/margins": 0.06199117749929428, |
|
"rewards/margins_max": 0.08956360816955566, |
|
"rewards/margins_min": 0.03441876173019409, |
|
"rewards/margins_std": 0.038993291556835175, |
|
"rewards/rejected": -0.05004267022013664, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 0.470703125, |
|
"learning_rate": 1.3702155041427543e-07, |
|
"logits/chosen": 0.1654224544763565, |
|
"logits/rejected": 0.39103928208351135, |
|
"logps/chosen": -221.5464630126953, |
|
"logps/rejected": -246.1484832763672, |
|
"loss": 0.6611, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.008782127872109413, |
|
"rewards/margins": 0.05567712336778641, |
|
"rewards/margins_max": 0.07324758917093277, |
|
"rewards/margins_min": 0.038106657564640045, |
|
"rewards/margins_std": 0.024848390370607376, |
|
"rewards/rejected": -0.046894993633031845, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 0.4375, |
|
"learning_rate": 1.3252779707891902e-07, |
|
"logits/chosen": 0.009541223756968975, |
|
"logits/rejected": 0.48217493295669556, |
|
"logps/chosen": -272.9510192871094, |
|
"logps/rejected": -204.46435546875, |
|
"loss": 0.6611, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.009134182706475258, |
|
"rewards/margins": 0.05944829061627388, |
|
"rewards/margins_max": 0.08002766221761703, |
|
"rewards/margins_min": 0.03886892646551132, |
|
"rewards/margins_std": 0.02910362184047699, |
|
"rewards/rejected": -0.05031410977244377, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 0.462890625, |
|
"learning_rate": 1.2808223812541774e-07, |
|
"logits/chosen": 0.07254563271999359, |
|
"logits/rejected": 0.47662535309791565, |
|
"logps/chosen": -241.54336547851562, |
|
"logps/rejected": -211.88424682617188, |
|
"loss": 0.6606, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.0020990788470953703, |
|
"rewards/margins": 0.05149079114198685, |
|
"rewards/margins_max": 0.08034542202949524, |
|
"rewards/margins_min": 0.022636160254478455, |
|
"rewards/margins_std": 0.040806613862514496, |
|
"rewards/rejected": -0.04939170926809311, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 0.4375, |
|
"learning_rate": 1.2368669739771469e-07, |
|
"logits/chosen": 0.07886068522930145, |
|
"logits/rejected": 0.4947189390659332, |
|
"logps/chosen": -206.33993530273438, |
|
"logps/rejected": -212.7965850830078, |
|
"loss": 0.6578, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.009903495199978352, |
|
"rewards/margins": 0.0682389959692955, |
|
"rewards/margins_max": 0.09637950360774994, |
|
"rewards/margins_min": 0.04009848088026047, |
|
"rewards/margins_std": 0.03979669511318207, |
|
"rewards/rejected": -0.058335501700639725, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 0.439453125, |
|
"learning_rate": 1.1934297821917497e-07, |
|
"logits/chosen": -0.18527595698833466, |
|
"logits/rejected": 0.35417476296424866, |
|
"logps/chosen": -271.8248291015625, |
|
"logps/rejected": -208.87966918945312, |
|
"loss": 0.6619, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.014687316492199898, |
|
"rewards/margins": 0.05254317447543144, |
|
"rewards/margins_max": 0.0765123963356018, |
|
"rewards/margins_min": 0.028573954477906227, |
|
"rewards/margins_std": 0.03389759734272957, |
|
"rewards/rejected": -0.03785586357116699, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 0.40234375, |
|
"learning_rate": 1.1505286265275094e-07, |
|
"logits/chosen": 0.09351782500743866, |
|
"logits/rejected": 0.5304566621780396, |
|
"logps/chosen": -217.6367645263672, |
|
"logps/rejected": -209.18603515625, |
|
"loss": 0.666, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.01146542839705944, |
|
"rewards/margins": 0.07028119266033173, |
|
"rewards/margins_max": 0.10538403689861298, |
|
"rewards/margins_min": 0.03517835959792137, |
|
"rewards/margins_std": 0.0496429018676281, |
|
"rewards/rejected": -0.05881576985120773, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 0.390625, |
|
"learning_rate": 1.1081811076986963e-07, |
|
"logits/chosen": 0.026241421699523926, |
|
"logits/rejected": 0.6041153073310852, |
|
"logps/chosen": -228.3728790283203, |
|
"logps/rejected": -190.1019287109375, |
|
"loss": 0.6596, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.016418198123574257, |
|
"rewards/margins": 0.0706411749124527, |
|
"rewards/margins_max": 0.09941698610782623, |
|
"rewards/margins_min": 0.041865330189466476, |
|
"rewards/margins_std": 0.04069516435265541, |
|
"rewards/rejected": -0.054222963750362396, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 0.427734375, |
|
"learning_rate": 1.0664045992834184e-07, |
|
"logits/chosen": 0.19840288162231445, |
|
"logits/rejected": 0.5584182143211365, |
|
"logps/chosen": -254.10147094726562, |
|
"logps/rejected": -256.0483703613281, |
|
"loss": 0.6583, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.012557362206280231, |
|
"rewards/margins": 0.06964166462421417, |
|
"rewards/margins_max": 0.09085742384195328, |
|
"rewards/margins_min": 0.04842590540647507, |
|
"rewards/margins_std": 0.030003610998392105, |
|
"rewards/rejected": -0.057084303349256516, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 0.484375, |
|
"learning_rate": 1.0252162405959042e-07, |
|
"logits/chosen": -0.029180001467466354, |
|
"logits/rejected": 0.4648149609565735, |
|
"logps/chosen": -273.28375244140625, |
|
"logps/rejected": -244.730712890625, |
|
"loss": 0.6602, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.02007482200860977, |
|
"rewards/margins": 0.06700652837753296, |
|
"rewards/margins_max": 0.10410724580287933, |
|
"rewards/margins_min": 0.029905814677476883, |
|
"rewards/margins_std": 0.05246833711862564, |
|
"rewards/rejected": -0.04693170636892319, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 0.494140625, |
|
"learning_rate": 9.846329296548963e-08, |
|
"logits/chosen": -0.017562460154294968, |
|
"logits/rejected": 0.4763096868991852, |
|
"logps/chosen": -269.8515625, |
|
"logps/rejected": -263.83148193359375, |
|
"loss": 0.6598, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.010733803734183311, |
|
"rewards/margins": 0.07448114454746246, |
|
"rewards/margins_max": 0.10118886083364487, |
|
"rewards/margins_min": 0.04777342826128006, |
|
"rewards/margins_std": 0.03777041286230087, |
|
"rewards/rejected": -0.0637473464012146, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 0.486328125, |
|
"learning_rate": 9.446713162510341e-08, |
|
"logits/chosen": 0.22771111130714417, |
|
"logits/rejected": 0.7621752023696899, |
|
"logps/chosen": -266.06390380859375, |
|
"logps/rejected": -250.635498046875, |
|
"loss": 0.6584, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.030348753556609154, |
|
"rewards/margins": 0.07343067973852158, |
|
"rewards/margins_max": 0.10677297413349152, |
|
"rewards/margins_min": 0.040088407695293427, |
|
"rewards/margins_std": 0.04715309664607048, |
|
"rewards/rejected": -0.04308192804455757, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 0.515625, |
|
"learning_rate": 9.053477951160737e-08, |
|
"logits/chosen": 0.015399669297039509, |
|
"logits/rejected": 0.7483765482902527, |
|
"logps/chosen": -276.5067443847656, |
|
"logps/rejected": -227.33761596679688, |
|
"loss": 0.6579, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.026790842413902283, |
|
"rewards/margins": 0.08279003202915192, |
|
"rewards/margins_max": 0.11221597343683243, |
|
"rewards/margins_min": 0.05336407572031021, |
|
"rewards/margins_std": 0.04161457344889641, |
|
"rewards/rejected": -0.05599917098879814, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 0.396484375, |
|
"learning_rate": 8.666784991967596e-08, |
|
"logits/chosen": 0.010845961980521679, |
|
"logits/rejected": 0.42500224709510803, |
|
"logps/chosen": -213.1592254638672, |
|
"logps/rejected": -199.2817840576172, |
|
"loss": 0.6613, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.014592917636036873, |
|
"rewards/margins": 0.0668349340558052, |
|
"rewards/margins_max": 0.09872870147228241, |
|
"rewards/margins_min": 0.03494114801287651, |
|
"rewards/margins_std": 0.04510461539030075, |
|
"rewards/rejected": -0.05224201828241348, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 0.4921875, |
|
"learning_rate": 8.286792930360823e-08, |
|
"logits/chosen": 0.25165149569511414, |
|
"logits/rejected": 0.6992672681808472, |
|
"logps/chosen": -217.0974884033203, |
|
"logps/rejected": -202.47030639648438, |
|
"loss": 0.6599, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.011730032041668892, |
|
"rewards/margins": 0.0590001717209816, |
|
"rewards/margins_max": 0.07914995402097702, |
|
"rewards/margins_min": 0.03885037824511528, |
|
"rewards/margins_std": 0.02849610149860382, |
|
"rewards/rejected": -0.04727013781666756, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 0.52734375, |
|
"learning_rate": 7.91365766264665e-08, |
|
"logits/chosen": 0.20514824986457825, |
|
"logits/rejected": 0.5356392860412598, |
|
"logps/chosen": -248.6316680908203, |
|
"logps/rejected": -240.5338134765625, |
|
"loss": 0.6591, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.010535435751080513, |
|
"rewards/margins": 0.06282900273799896, |
|
"rewards/margins_max": 0.09407368302345276, |
|
"rewards/margins_min": 0.031584326177835464, |
|
"rewards/margins_std": 0.04418665170669556, |
|
"rewards/rejected": -0.052293576300144196, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 0.455078125, |
|
"learning_rate": 7.547532272049264e-08, |
|
"logits/chosen": 0.25605538487434387, |
|
"logits/rejected": 0.6374403238296509, |
|
"logps/chosen": -255.80410766601562, |
|
"logps/rejected": -255.73764038085938, |
|
"loss": 0.6619, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.013418711721897125, |
|
"rewards/margins": 0.06125851348042488, |
|
"rewards/margins_max": 0.08139893412590027, |
|
"rewards/margins_min": 0.04111810773611069, |
|
"rewards/margins_std": 0.028482843190431595, |
|
"rewards/rejected": -0.047839801758527756, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 0.4140625, |
|
"learning_rate": 7.188566965906584e-08, |
|
"logits/chosen": 0.10137088596820831, |
|
"logits/rejected": 0.5515474081039429, |
|
"logps/chosen": -271.2210693359375, |
|
"logps/rejected": -272.3622131347656, |
|
"loss": 0.6598, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.00015007219917606562, |
|
"rewards/margins": 0.06623668223619461, |
|
"rewards/margins_max": 0.10004226863384247, |
|
"rewards/margins_min": 0.03243108466267586, |
|
"rewards/margins_std": 0.04780833050608635, |
|
"rewards/rejected": -0.06638675183057785, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 0.412109375, |
|
"learning_rate": 6.836909014045924e-08, |
|
"logits/chosen": 0.005819192621856928, |
|
"logits/rejected": 0.38501212000846863, |
|
"logps/chosen": -247.23056030273438, |
|
"logps/rejected": -238.4652557373047, |
|
"loss": 0.6607, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.01672416180372238, |
|
"rewards/margins": 0.07304920256137848, |
|
"rewards/margins_max": 0.10092739760875702, |
|
"rewards/margins_min": 0.04517098516225815, |
|
"rewards/margins_std": 0.039425741881132126, |
|
"rewards/rejected": -0.0563250370323658, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 0.4609375, |
|
"learning_rate": 6.492702688364737e-08, |
|
"logits/chosen": -0.07613168656826019, |
|
"logits/rejected": 0.20295462012290955, |
|
"logps/chosen": -203.92233276367188, |
|
"logps/rejected": -247.69277954101562, |
|
"loss": 0.6604, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.014894701540470123, |
|
"rewards/margins": 0.06641440093517303, |
|
"rewards/margins_max": 0.09283626079559326, |
|
"rewards/margins_min": 0.039992526173591614, |
|
"rewards/margins_std": 0.037366170436143875, |
|
"rewards/rejected": -0.05151969939470291, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 0.4375, |
|
"learning_rate": 6.156089203641373e-08, |
|
"logits/chosen": -0.014948748052120209, |
|
"logits/rejected": 0.4398605227470398, |
|
"logps/chosen": -247.429931640625, |
|
"logps/rejected": -251.06826782226562, |
|
"loss": 0.6571, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.0333079919219017, |
|
"rewards/margins": 0.08266235888004303, |
|
"rewards/margins_max": 0.10667815059423447, |
|
"rewards/margins_min": 0.0586465522646904, |
|
"rewards/margins_std": 0.03396347165107727, |
|
"rewards/rejected": -0.04935435950756073, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 0.427734375, |
|
"learning_rate": 5.827206659599987e-08, |
|
"logits/chosen": 0.28106218576431274, |
|
"logits/rejected": 0.7749143242835999, |
|
"logps/chosen": -222.03665161132812, |
|
"logps/rejected": -200.11221313476562, |
|
"loss": 0.6576, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.017674388363957405, |
|
"rewards/margins": 0.07599468529224396, |
|
"rewards/margins_max": 0.11385379731655121, |
|
"rewards/margins_min": 0.038135576993227005, |
|
"rewards/margins_std": 0.05354086682200432, |
|
"rewards/rejected": -0.058320302516222, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 0.50390625, |
|
"learning_rate": 5.506189984253501e-08, |
|
"logits/chosen": 0.16949541866779327, |
|
"logits/rejected": 0.4548502564430237, |
|
"logps/chosen": -205.447265625, |
|
"logps/rejected": -221.4696044921875, |
|
"loss": 0.6611, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.003050294006243348, |
|
"rewards/margins": 0.06650832295417786, |
|
"rewards/margins_max": 0.09234586358070374, |
|
"rewards/margins_min": 0.040670786052942276, |
|
"rewards/margins_std": 0.036539800465106964, |
|
"rewards/rejected": -0.06345803290605545, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 0.482421875, |
|
"learning_rate": 5.1931708785477506e-08, |
|
"logits/chosen": 0.11355874687433243, |
|
"logits/rejected": 0.6481127738952637, |
|
"logps/chosen": -216.15432739257812, |
|
"logps/rejected": -187.30389404296875, |
|
"loss": 0.6592, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.015445582568645477, |
|
"rewards/margins": 0.05808136984705925, |
|
"rewards/margins_max": 0.08922155201435089, |
|
"rewards/margins_min": 0.026941198855638504, |
|
"rewards/margins_std": 0.04403885826468468, |
|
"rewards/rejected": -0.04263579100370407, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 0.380859375, |
|
"learning_rate": 4.888277762329582e-08, |
|
"logits/chosen": 0.11872565746307373, |
|
"logits/rejected": 0.5771151185035706, |
|
"logps/chosen": -215.25442504882812, |
|
"logps/rejected": -214.4876251220703, |
|
"loss": 0.6619, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.01657172292470932, |
|
"rewards/margins": 0.06676243245601654, |
|
"rewards/margins_max": 0.0983147844672203, |
|
"rewards/margins_min": 0.03521009162068367, |
|
"rewards/margins_std": 0.04462175816297531, |
|
"rewards/rejected": -0.05019070953130722, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 0.439453125, |
|
"learning_rate": 4.591635721661072e-08, |
|
"logits/chosen": 0.1136382669210434, |
|
"logits/rejected": 0.5482941269874573, |
|
"logps/chosen": -243.9540557861328, |
|
"logps/rejected": -231.51473999023438, |
|
"loss": 0.6606, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.01714186929166317, |
|
"rewards/margins": 0.07303180545568466, |
|
"rewards/margins_max": 0.10039409250020981, |
|
"rewards/margins_min": 0.045669522136449814, |
|
"rewards/margins_std": 0.03869611397385597, |
|
"rewards/rejected": -0.05588993430137634, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 0.431640625, |
|
"learning_rate": 4.3033664575015005e-08, |
|
"logits/chosen": 0.24127981066703796, |
|
"logits/rejected": 0.6273223161697388, |
|
"logps/chosen": -258.4788818359375, |
|
"logps/rejected": -255.1360321044922, |
|
"loss": 0.6591, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.0254741869866848, |
|
"rewards/margins": 0.0617264024913311, |
|
"rewards/margins_max": 0.08791927993297577, |
|
"rewards/margins_min": 0.035533517599105835, |
|
"rewards/margins_std": 0.03704233095049858, |
|
"rewards/rejected": -0.036252211779356, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 0.4453125, |
|
"learning_rate": 4.023588235778019e-08, |
|
"logits/chosen": 0.048088885843753815, |
|
"logits/rejected": 0.4085961878299713, |
|
"logps/chosen": -235.32763671875, |
|
"logps/rejected": -246.94937133789062, |
|
"loss": 0.6625, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.017656199634075165, |
|
"rewards/margins": 0.07100087404251099, |
|
"rewards/margins_max": 0.09923985600471497, |
|
"rewards/margins_min": 0.042761895805597305, |
|
"rewards/margins_std": 0.039935946464538574, |
|
"rewards/rejected": -0.05334467440843582, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 0.4609375, |
|
"learning_rate": 3.752415838865664e-08, |
|
"logits/chosen": -0.09887398779392242, |
|
"logits/rejected": 0.5310045480728149, |
|
"logps/chosen": -245.59951782226562, |
|
"logps/rejected": -266.8290100097656, |
|
"loss": 0.6586, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.018602244555950165, |
|
"rewards/margins": 0.08193326741456985, |
|
"rewards/margins_max": 0.11139090359210968, |
|
"rewards/margins_min": 0.05247562378644943, |
|
"rewards/margins_std": 0.041659384965896606, |
|
"rewards/rejected": -0.06333102285861969, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 0.439453125, |
|
"learning_rate": 3.4899605184965206e-08, |
|
"logits/chosen": 0.03019891306757927, |
|
"logits/rejected": 0.44324207305908203, |
|
"logps/chosen": -225.20443725585938, |
|
"logps/rejected": -183.06094360351562, |
|
"loss": 0.6609, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.0028962846845388412, |
|
"rewards/margins": 0.0560896173119545, |
|
"rewards/margins_max": 0.07679092139005661, |
|
"rewards/margins_min": 0.035388313233852386, |
|
"rewards/margins_std": 0.02927606739103794, |
|
"rewards/rejected": -0.05319333076477051, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 0.439453125, |
|
"learning_rate": 3.23632995011732e-08, |
|
"logits/chosen": -0.06648756563663483, |
|
"logits/rejected": 0.29680854082107544, |
|
"logps/chosen": -226.04983520507812, |
|
"logps/rejected": -258.3298034667969, |
|
"loss": 0.6587, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.03215508535504341, |
|
"rewards/margins": 0.08979654312133789, |
|
"rewards/margins_max": 0.12097585201263428, |
|
"rewards/margins_min": 0.058617234230041504, |
|
"rewards/margins_std": 0.044094208627939224, |
|
"rewards/rejected": -0.057641465216875076, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 0.455078125, |
|
"learning_rate": 2.991628188714351e-08, |
|
"logits/chosen": 0.00623916694894433, |
|
"logits/rejected": 0.48251962661743164, |
|
"logps/chosen": -313.39935302734375, |
|
"logps/rejected": -245.91720581054688, |
|
"loss": 0.6596, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.004381291568279266, |
|
"rewards/margins": 0.07124367356300354, |
|
"rewards/margins_max": 0.09969727694988251, |
|
"rewards/margins_min": 0.04279007390141487, |
|
"rewards/margins_std": 0.04023946821689606, |
|
"rewards/rejected": -0.06686238944530487, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 0.4375, |
|
"learning_rate": 2.755955626123596e-08, |
|
"logits/chosen": 0.12439896166324615, |
|
"logits/rejected": 0.6011586785316467, |
|
"logps/chosen": -250.7643585205078, |
|
"logps/rejected": -217.0757293701172, |
|
"loss": 0.6624, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.018308712169528008, |
|
"rewards/margins": 0.05787688493728638, |
|
"rewards/margins_max": 0.09185748547315598, |
|
"rewards/margins_min": 0.023896273225545883, |
|
"rewards/margins_std": 0.04805583506822586, |
|
"rewards/rejected": -0.03956816717982292, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 0.42578125, |
|
"learning_rate": 2.5294089498438225e-08, |
|
"logits/chosen": 0.024487819522619247, |
|
"logits/rejected": 0.5533932447433472, |
|
"logps/chosen": -245.57492065429688, |
|
"logps/rejected": -220.93258666992188, |
|
"loss": 0.6584, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.010946778580546379, |
|
"rewards/margins": 0.06493957340717316, |
|
"rewards/margins_max": 0.0981217697262764, |
|
"rewards/margins_min": 0.03175736218690872, |
|
"rewards/margins_std": 0.046926725655794144, |
|
"rewards/rejected": -0.05399278551340103, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 0.48046875, |
|
"learning_rate": 2.312081103369354e-08, |
|
"logits/chosen": 0.10629892349243164, |
|
"logits/rejected": 0.5729449987411499, |
|
"logps/chosen": -227.0969696044922, |
|
"logps/rejected": -209.62841796875, |
|
"loss": 0.659, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.013625606894493103, |
|
"rewards/margins": 0.05797373503446579, |
|
"rewards/margins_max": 0.0893624946475029, |
|
"rewards/margins_min": 0.02658497728407383, |
|
"rewards/margins_std": 0.04439040273427963, |
|
"rewards/rejected": -0.04434812813997269, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 0.48046875, |
|
"learning_rate": 2.104061248058872e-08, |
|
"logits/chosen": 0.10214777290821075, |
|
"logits/rejected": 0.4200982451438904, |
|
"logps/chosen": -213.7083740234375, |
|
"logps/rejected": -225.8516845703125, |
|
"loss": 0.6666, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.018484923988580704, |
|
"rewards/margins": 0.058260779827833176, |
|
"rewards/margins_max": 0.08636601269245148, |
|
"rewards/margins_min": 0.030155545100569725, |
|
"rewards/margins_std": 0.03974680230021477, |
|
"rewards/rejected": -0.03977585583925247, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 0.44921875, |
|
"learning_rate": 1.9054347265559213e-08, |
|
"logits/chosen": 0.1583404242992401, |
|
"logits/rejected": 0.6649370193481445, |
|
"logps/chosen": -259.9563903808594, |
|
"logps/rejected": -223.4931640625, |
|
"loss": 0.6565, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.014935478568077087, |
|
"rewards/margins": 0.07356850802898407, |
|
"rewards/margins_max": 0.10868100821971893, |
|
"rewards/margins_min": 0.0384560152888298, |
|
"rewards/margins_std": 0.049656566232442856, |
|
"rewards/rejected": -0.058633022010326385, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 0.498046875, |
|
"learning_rate": 1.716283027776061e-08, |
|
"logits/chosen": 0.2019151747226715, |
|
"logits/rejected": 0.8282853364944458, |
|
"logps/chosen": -291.37066650390625, |
|
"logps/rejected": -222.61831665039062, |
|
"loss": 0.6634, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.016527246683835983, |
|
"rewards/margins": 0.07255034148693085, |
|
"rewards/margins_max": 0.1086968407034874, |
|
"rewards/margins_min": 0.036403849720954895, |
|
"rewards/margins_std": 0.05111886188387871, |
|
"rewards/rejected": -0.05602309852838516, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 0.4296875, |
|
"learning_rate": 1.536683753475043e-08, |
|
"logits/chosen": 0.22870250046253204, |
|
"logits/rejected": 0.4174967408180237, |
|
"logps/chosen": -219.11306762695312, |
|
"logps/rejected": -241.36563110351562, |
|
"loss": 0.6615, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.0025456459261476994, |
|
"rewards/margins": 0.059264473617076874, |
|
"rewards/margins_max": 0.08250005543231964, |
|
"rewards/margins_min": 0.036028891801834106, |
|
"rewards/margins_std": 0.032860077917575836, |
|
"rewards/rejected": -0.061810124665498734, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 0.390625, |
|
"learning_rate": 1.3667105864117873e-08, |
|
"logits/chosen": 0.21612632274627686, |
|
"logits/rejected": 0.39824485778808594, |
|
"logps/chosen": -200.84498596191406, |
|
"logps/rejected": -228.2679901123047, |
|
"loss": 0.6605, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.008642548695206642, |
|
"rewards/margins": 0.0651601254940033, |
|
"rewards/margins_max": 0.10423406213521957, |
|
"rewards/margins_min": 0.026086175814270973, |
|
"rewards/margins_std": 0.05525890737771988, |
|
"rewards/rejected": -0.0565175786614418, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 0.41796875, |
|
"learning_rate": 1.2064332601191163e-08, |
|
"logits/chosen": -0.04893340915441513, |
|
"logits/rejected": 0.339263916015625, |
|
"logps/chosen": -222.4666748046875, |
|
"logps/rejected": -217.02999877929688, |
|
"loss": 0.6612, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.0008535057422704995, |
|
"rewards/margins": 0.05954117700457573, |
|
"rewards/margins_max": 0.0829622894525528, |
|
"rewards/margins_min": 0.03612007200717926, |
|
"rewards/margins_std": 0.03312245011329651, |
|
"rewards/rejected": -0.06039468199014664, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 0.39453125, |
|
"learning_rate": 1.0559175302947476e-08, |
|
"logits/chosen": 0.012552693486213684, |
|
"logits/rejected": 0.5173078775405884, |
|
"logps/chosen": -260.0834045410156, |
|
"logps/rejected": -247.43447875976562, |
|
"loss": 0.6595, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.011661765165627003, |
|
"rewards/margins": 0.06366874277591705, |
|
"rewards/margins_max": 0.09778660535812378, |
|
"rewards/margins_min": 0.029550885781645775, |
|
"rewards/margins_std": 0.04824993759393692, |
|
"rewards/rejected": -0.052006978541612625, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 0.349609375, |
|
"learning_rate": 9.152251478242417e-09, |
|
"logits/chosen": -0.02594194933772087, |
|
"logits/rejected": 0.4399421215057373, |
|
"logps/chosen": -212.4099578857422, |
|
"logps/rejected": -199.73458862304688, |
|
"loss": 0.6594, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.007081738207489252, |
|
"rewards/margins": 0.06215248256921768, |
|
"rewards/margins_max": 0.08854631334543228, |
|
"rewards/margins_min": 0.03575865179300308, |
|
"rewards/margins_std": 0.03732650727033615, |
|
"rewards/rejected": -0.055070746690034866, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 0.427734375, |
|
"learning_rate": 7.844138334469425e-09, |
|
"logits/chosen": 0.4558231234550476, |
|
"logits/rejected": 0.8965223431587219, |
|
"logps/chosen": -201.3118438720703, |
|
"logps/rejected": -192.5732421875, |
|
"loss": 0.6628, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.009340132586658001, |
|
"rewards/margins": 0.0616113655269146, |
|
"rewards/margins_max": 0.09181926399469376, |
|
"rewards/margins_min": 0.03140346333384514, |
|
"rewards/margins_std": 0.04272041842341423, |
|
"rewards/rejected": -0.05227123573422432, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 0.37109375, |
|
"learning_rate": 6.635372540753498e-09, |
|
"logits/chosen": 0.11258337646722794, |
|
"logits/rejected": 0.6999211311340332, |
|
"logps/chosen": -240.33975219726562, |
|
"logps/rejected": -214.0699920654297, |
|
"loss": 0.6577, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.027147358283400536, |
|
"rewards/margins": 0.0817473754286766, |
|
"rewards/margins_max": 0.12004182487726212, |
|
"rewards/margins_min": 0.0434529110789299, |
|
"rewards/margins_std": 0.05415653437376022, |
|
"rewards/rejected": -0.05460001155734062, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 0.4609375, |
|
"learning_rate": 5.526450007776435e-09, |
|
"logits/chosen": 0.1300087720155716, |
|
"logits/rejected": 0.5238357782363892, |
|
"logps/chosen": -292.7140197753906, |
|
"logps/rejected": -246.2644805908203, |
|
"loss": 0.6611, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.0037552430294454098, |
|
"rewards/margins": 0.05609096214175224, |
|
"rewards/margins_max": 0.07447664439678192, |
|
"rewards/margins_min": 0.03770528361201286, |
|
"rewards/margins_std": 0.026001274585723877, |
|
"rewards/rejected": -0.052335720509290695, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 0.431640625, |
|
"learning_rate": 4.517825684323323e-09, |
|
"logits/chosen": 0.18602465093135834, |
|
"logits/rejected": 0.5172281861305237, |
|
"logps/chosen": -223.3422088623047, |
|
"logps/rejected": -241.034912109375, |
|
"loss": 0.6596, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.00845097191631794, |
|
"rewards/margins": 0.06410791724920273, |
|
"rewards/margins_max": 0.09119440615177155, |
|
"rewards/margins_min": 0.037021439522504807, |
|
"rewards/margins_std": 0.03830606862902641, |
|
"rewards/rejected": -0.05565694719552994, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 0.474609375, |
|
"learning_rate": 3.6099133706344044e-09, |
|
"logits/chosen": 0.13008326292037964, |
|
"logits/rejected": 0.6074930429458618, |
|
"logps/chosen": -223.1219940185547, |
|
"logps/rejected": -207.696044921875, |
|
"loss": 0.6569, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.02304968610405922, |
|
"rewards/margins": 0.07380314916372299, |
|
"rewards/margins_max": 0.09590893238782883, |
|
"rewards/margins_min": 0.05169736221432686, |
|
"rewards/margins_std": 0.03126230835914612, |
|
"rewards/rejected": -0.05075346678495407, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 0.4921875, |
|
"learning_rate": 2.8030855486386174e-09, |
|
"logits/chosen": 0.28828924894332886, |
|
"logits/rejected": 0.6710017919540405, |
|
"logps/chosen": -256.94903564453125, |
|
"logps/rejected": -281.40411376953125, |
|
"loss": 0.6586, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.023295782506465912, |
|
"rewards/margins": 0.071876659989357, |
|
"rewards/margins_max": 0.09554243832826614, |
|
"rewards/margins_min": 0.048210885375738144, |
|
"rewards/margins_std": 0.03346845880150795, |
|
"rewards/rejected": -0.04858088120818138, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 0.515625, |
|
"learning_rate": 2.097673229138286e-09, |
|
"logits/chosen": 0.16988131403923035, |
|
"logits/rejected": 0.47897881269454956, |
|
"logps/chosen": -224.6415557861328, |
|
"logps/rejected": -232.2594451904297, |
|
"loss": 0.6587, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.012618700973689556, |
|
"rewards/margins": 0.07099349051713943, |
|
"rewards/margins_max": 0.10776303708553314, |
|
"rewards/margins_min": 0.03422392159700394, |
|
"rewards/margins_std": 0.0520000159740448, |
|
"rewards/rejected": -0.05837478116154671, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 0.44921875, |
|
"learning_rate": 1.493965816008136e-09, |
|
"logits/chosen": -0.009510600939393044, |
|
"logits/rejected": 0.3807966113090515, |
|
"logps/chosen": -211.14254760742188, |
|
"logps/rejected": -236.635498046875, |
|
"loss": 0.6601, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.00740268686786294, |
|
"rewards/margins": 0.07398059964179993, |
|
"rewards/margins_max": 0.10376466810703278, |
|
"rewards/margins_min": 0.04419652372598648, |
|
"rewards/margins_std": 0.0421210452914238, |
|
"rewards/rejected": -0.06657791137695312, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 0.447265625, |
|
"learning_rate": 9.922109874636875e-10, |
|
"logits/chosen": 0.19054090976715088, |
|
"logits/rejected": 0.557522177696228, |
|
"logps/chosen": -233.7532501220703, |
|
"logps/rejected": -239.6273651123047, |
|
"loss": 0.6579, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.015365364961326122, |
|
"rewards/margins": 0.08128596842288971, |
|
"rewards/margins_max": 0.11999186128377914, |
|
"rewards/margins_min": 0.04258008301258087, |
|
"rewards/margins_std": 0.05473839119076729, |
|
"rewards/rejected": -0.06592060625553131, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 0.416015625, |
|
"learning_rate": 5.926145944483984e-10, |
|
"logits/chosen": 0.04970569908618927, |
|
"logits/rejected": 0.41454869508743286, |
|
"logps/chosen": -197.70941162109375, |
|
"logps/rejected": -207.9854278564453, |
|
"loss": 0.6625, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.009294511750340462, |
|
"rewards/margins": 0.05480729788541794, |
|
"rewards/margins_max": 0.08153598010540009, |
|
"rewards/margins_min": 0.02807862125337124, |
|
"rewards/margins_std": 0.03780006244778633, |
|
"rewards/rejected": -0.04551279544830322, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 0.42578125, |
|
"learning_rate": 2.9534057618091356e-10, |
|
"logits/chosen": 0.1366875320672989, |
|
"logits/rejected": 0.4813140034675598, |
|
"logps/chosen": -195.55368041992188, |
|
"logps/rejected": -211.63711547851562, |
|
"loss": 0.6599, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.014302869327366352, |
|
"rewards/margins": 0.0652112141251564, |
|
"rewards/margins_max": 0.09685875475406647, |
|
"rewards/margins_min": 0.03356366977095604, |
|
"rewards/margins_std": 0.04475637897849083, |
|
"rewards/rejected": -0.05090833827853203, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 0.4453125, |
|
"learning_rate": 1.0051089289686565e-10, |
|
"logits/chosen": 0.20965194702148438, |
|
"logits/rejected": 0.5980690121650696, |
|
"logps/chosen": -218.3548583984375, |
|
"logps/rejected": -252.60159301757812, |
|
"loss": 0.6601, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.01929156482219696, |
|
"rewards/margins": 0.06570716202259064, |
|
"rewards/margins_max": 0.09711313247680664, |
|
"rewards/margins_min": 0.03430120274424553, |
|
"rewards/margins_std": 0.044414736330509186, |
|
"rewards/rejected": -0.04641559720039368, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.404296875, |
|
"learning_rate": 8.205475813372054e-12, |
|
"logits/chosen": 0.07036467641592026, |
|
"logits/rejected": 0.6885267496109009, |
|
"logps/chosen": -334.186279296875, |
|
"logps/rejected": -232.6072998046875, |
|
"loss": 0.6604, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.015851657837629318, |
|
"rewards/margins": 0.06690393388271332, |
|
"rewards/margins_max": 0.0959465354681015, |
|
"rewards/margins_min": 0.037861332297325134, |
|
"rewards/margins_std": 0.041072435677051544, |
|
"rewards/rejected": -0.0510522723197937, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_logits/chosen": 0.7297662496566772, |
|
"eval_logits/rejected": 0.8997808694839478, |
|
"eval_logps/chosen": -337.8507080078125, |
|
"eval_logps/rejected": -318.01556396484375, |
|
"eval_loss": 0.6928703784942627, |
|
"eval_rewards/accuracies": 0.5364999771118164, |
|
"eval_rewards/chosen": 0.002909434260800481, |
|
"eval_rewards/margins": 0.0005662557086907327, |
|
"eval_rewards/margins_max": 0.07228709012269974, |
|
"eval_rewards/margins_min": -0.08225506544113159, |
|
"eval_rewards/margins_std": 0.050406549125909805, |
|
"eval_rewards/rejected": 0.002343178726732731, |
|
"eval_runtime": 864.7602, |
|
"eval_samples_per_second": 9.251, |
|
"eval_steps_per_second": 0.289, |
|
"step": 1724 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 1724, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6676546893927447, |
|
"train_runtime": 9120.8228, |
|
"train_samples_per_second": 3.024, |
|
"train_steps_per_second": 0.189 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1724, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|