|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.6920914137408983, |
|
"eval_steps": 100, |
|
"global_step": 600, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0023069713791363275, |
|
"grad_norm": 70.983255945064, |
|
"learning_rate": 4e-09, |
|
"logits/chosen": -1.6907414197921753, |
|
"logits/rejected": -1.6978764533996582, |
|
"logps/chosen": -135.08778381347656, |
|
"logps/rejected": -140.00140380859375, |
|
"loss": 0.6978, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.010493194684386253, |
|
"rewards/margins": 0.006632559932768345, |
|
"rewards/rejected": 0.003860633820295334, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.004613942758272655, |
|
"grad_norm": 79.97063682582153, |
|
"learning_rate": 8e-09, |
|
"logits/chosen": -1.6330227851867676, |
|
"logits/rejected": -1.7231806516647339, |
|
"logps/chosen": -197.88365173339844, |
|
"logps/rejected": -218.62255859375, |
|
"loss": 0.6925, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": 0.008352389559149742, |
|
"rewards/margins": -0.00352578517049551, |
|
"rewards/rejected": 0.011878175660967827, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.006920914137408983, |
|
"grad_norm": 78.73277588414827, |
|
"learning_rate": 1.1999999999999998e-08, |
|
"logits/chosen": -1.7628690004348755, |
|
"logits/rejected": -1.6921380758285522, |
|
"logps/chosen": -181.12741088867188, |
|
"logps/rejected": -177.64956665039062, |
|
"loss": 0.6919, |
|
"rewards/accuracies": 0.40625, |
|
"rewards/chosen": -0.005539673380553722, |
|
"rewards/margins": -0.012507464736700058, |
|
"rewards/rejected": 0.006967790424823761, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.00922788551654531, |
|
"grad_norm": 84.94475101410946, |
|
"learning_rate": 1.6e-08, |
|
"logits/chosen": -1.6862337589263916, |
|
"logits/rejected": -1.6957104206085205, |
|
"logps/chosen": -229.57574462890625, |
|
"logps/rejected": -308.63421630859375, |
|
"loss": 0.6949, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.019691964611411095, |
|
"rewards/margins": 0.022208284586668015, |
|
"rewards/rejected": -0.0025163227692246437, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.011534856895681638, |
|
"grad_norm": 78.53713189911603, |
|
"learning_rate": 2e-08, |
|
"logits/chosen": -1.72577965259552, |
|
"logits/rejected": -1.72530198097229, |
|
"logps/chosen": -182.21597290039062, |
|
"logps/rejected": -197.15383911132812, |
|
"loss": 0.6876, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -0.00015587342204526067, |
|
"rewards/margins": -0.00128166563808918, |
|
"rewards/rejected": 0.0011257934384047985, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.013841828274817966, |
|
"grad_norm": 60.639642502259356, |
|
"learning_rate": 2.3999999999999997e-08, |
|
"logits/chosen": -1.5887395143508911, |
|
"logits/rejected": -1.7574589252471924, |
|
"logps/chosen": -121.71543884277344, |
|
"logps/rejected": -164.58782958984375, |
|
"loss": 0.6915, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.004625019151717424, |
|
"rewards/margins": 0.00861622579395771, |
|
"rewards/rejected": -0.003991207107901573, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.016148799653954292, |
|
"grad_norm": 86.96054524483631, |
|
"learning_rate": 2.8000000000000003e-08, |
|
"logits/chosen": -1.5507514476776123, |
|
"logits/rejected": -1.5499210357666016, |
|
"logps/chosen": -147.94631958007812, |
|
"logps/rejected": -200.87417602539062, |
|
"loss": 0.6926, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": 0.013226826675236225, |
|
"rewards/margins": 0.021001461893320084, |
|
"rewards/rejected": -0.00777463661506772, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.01845577103309062, |
|
"grad_norm": 77.14253037311525, |
|
"learning_rate": 3.2e-08, |
|
"logits/chosen": -1.6721559762954712, |
|
"logits/rejected": -1.7068090438842773, |
|
"logps/chosen": -157.89622497558594, |
|
"logps/rejected": -199.2628631591797, |
|
"loss": 0.6951, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -0.0034806535113602877, |
|
"rewards/margins": 0.006839222740381956, |
|
"rewards/rejected": -0.010319876484572887, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.020762742412226948, |
|
"grad_norm": 78.17299743340894, |
|
"learning_rate": 3.6e-08, |
|
"logits/chosen": -1.6556451320648193, |
|
"logits/rejected": -1.7276983261108398, |
|
"logps/chosen": -135.32223510742188, |
|
"logps/rejected": -158.1257781982422, |
|
"loss": 0.6884, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.01184301357716322, |
|
"rewards/margins": 0.023063620552420616, |
|
"rewards/rejected": -0.011220606043934822, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.023069713791363276, |
|
"grad_norm": 77.30701284634611, |
|
"learning_rate": 4e-08, |
|
"logits/chosen": -1.735813856124878, |
|
"logits/rejected": -1.789333701133728, |
|
"logps/chosen": -157.4953155517578, |
|
"logps/rejected": -186.05862426757812, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": 0.004054094199091196, |
|
"rewards/margins": 0.010992627590894699, |
|
"rewards/rejected": -0.006938533391803503, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.025376685170499604, |
|
"grad_norm": 77.95778126726918, |
|
"learning_rate": 4.4e-08, |
|
"logits/chosen": -1.5402836799621582, |
|
"logits/rejected": -1.5884689092636108, |
|
"logps/chosen": -133.27554321289062, |
|
"logps/rejected": -170.48594665527344, |
|
"loss": 0.6944, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.009935155510902405, |
|
"rewards/margins": 0.019674377515912056, |
|
"rewards/rejected": -0.009739222005009651, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.02768365654963593, |
|
"grad_norm": 78.69027024475307, |
|
"learning_rate": 4.799999999999999e-08, |
|
"logits/chosen": -1.4641175270080566, |
|
"logits/rejected": -1.6491130590438843, |
|
"logps/chosen": -139.53375244140625, |
|
"logps/rejected": -193.21438598632812, |
|
"loss": 0.6945, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -0.010036014020442963, |
|
"rewards/margins": 0.005420446861535311, |
|
"rewards/rejected": -0.015456462278962135, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.02999062792877226, |
|
"grad_norm": 82.14181039722058, |
|
"learning_rate": 5.2e-08, |
|
"logits/chosen": -1.7350623607635498, |
|
"logits/rejected": -1.6639574766159058, |
|
"logps/chosen": -161.62164306640625, |
|
"logps/rejected": -160.58840942382812, |
|
"loss": 0.6926, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.0011872733011841774, |
|
"rewards/margins": -0.006157425232231617, |
|
"rewards/rejected": 0.0049701533280313015, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.032297599307908584, |
|
"grad_norm": 73.66413843287592, |
|
"learning_rate": 5.6000000000000005e-08, |
|
"logits/chosen": -1.4280009269714355, |
|
"logits/rejected": -1.6318210363388062, |
|
"logps/chosen": -131.8518524169922, |
|
"logps/rejected": -166.72335815429688, |
|
"loss": 0.6964, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.01029287837445736, |
|
"rewards/margins": -0.014272996224462986, |
|
"rewards/rejected": 0.003980117850005627, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.034604570687044915, |
|
"grad_norm": 86.40461346238433, |
|
"learning_rate": 6e-08, |
|
"logits/chosen": -1.6838908195495605, |
|
"logits/rejected": -1.7304034233093262, |
|
"logps/chosen": -124.49476623535156, |
|
"logps/rejected": -140.77841186523438, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": 0.0041793473064899445, |
|
"rewards/margins": -0.0030185298528522253, |
|
"rewards/rejected": 0.007197877857834101, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.03691154206618124, |
|
"grad_norm": 79.23808836252253, |
|
"learning_rate": 6.4e-08, |
|
"logits/chosen": -1.5962588787078857, |
|
"logits/rejected": -1.549019455909729, |
|
"logps/chosen": -194.4256591796875, |
|
"logps/rejected": -237.36117553710938, |
|
"loss": 0.7003, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.0013054789742454886, |
|
"rewards/margins": -0.018302934244275093, |
|
"rewards/rejected": 0.016997454687952995, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.03921851344531757, |
|
"grad_norm": 74.59682312018288, |
|
"learning_rate": 6.8e-08, |
|
"logits/chosen": -1.740609884262085, |
|
"logits/rejected": -1.6540827751159668, |
|
"logps/chosen": -157.9348907470703, |
|
"logps/rejected": -158.6222686767578, |
|
"loss": 0.6889, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": 0.01421279925853014, |
|
"rewards/margins": 0.03194922208786011, |
|
"rewards/rejected": -0.017736420035362244, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.041525484824453895, |
|
"grad_norm": 79.71071945309694, |
|
"learning_rate": 7.2e-08, |
|
"logits/chosen": -1.5709240436553955, |
|
"logits/rejected": -1.6920911073684692, |
|
"logps/chosen": -173.94007873535156, |
|
"logps/rejected": -219.2288818359375, |
|
"loss": 0.6962, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.011220686137676239, |
|
"rewards/margins": -0.008418139070272446, |
|
"rewards/rejected": -0.0028025463689118624, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.04383245620359023, |
|
"grad_norm": 86.21753265595521, |
|
"learning_rate": 7.599999999999999e-08, |
|
"logits/chosen": -1.7426936626434326, |
|
"logits/rejected": -1.5694864988327026, |
|
"logps/chosen": -159.00730895996094, |
|
"logps/rejected": -146.18772888183594, |
|
"loss": 0.6971, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.01584451086819172, |
|
"rewards/margins": -0.012521232478320599, |
|
"rewards/rejected": -0.003323277225717902, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.04613942758272655, |
|
"grad_norm": 76.87714834723751, |
|
"learning_rate": 8e-08, |
|
"logits/chosen": -1.7129234075546265, |
|
"logits/rejected": -1.6838488578796387, |
|
"logps/chosen": -191.7474822998047, |
|
"logps/rejected": -162.81466674804688, |
|
"loss": 0.6986, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.0020668436773121357, |
|
"rewards/margins": -0.008048251271247864, |
|
"rewards/rejected": 0.010115095414221287, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.04844639896186288, |
|
"grad_norm": 71.62490665795943, |
|
"learning_rate": 8.4e-08, |
|
"logits/chosen": -1.7202041149139404, |
|
"logits/rejected": -1.7000675201416016, |
|
"logps/chosen": -157.0576934814453, |
|
"logps/rejected": -186.76138305664062, |
|
"loss": 0.6889, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": 0.0023848186247050762, |
|
"rewards/margins": 0.002543981885537505, |
|
"rewards/rejected": -0.00015916326083242893, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.05075337034099921, |
|
"grad_norm": 81.75722048702342, |
|
"learning_rate": 8.8e-08, |
|
"logits/chosen": -1.5299909114837646, |
|
"logits/rejected": -1.5974533557891846, |
|
"logps/chosen": -158.53753662109375, |
|
"logps/rejected": -179.9112548828125, |
|
"loss": 0.6924, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.006203922443091869, |
|
"rewards/margins": 0.02108878269791603, |
|
"rewards/rejected": -0.014884857460856438, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.05306034172013553, |
|
"grad_norm": 78.9572151515279, |
|
"learning_rate": 9.2e-08, |
|
"logits/chosen": -1.6293164491653442, |
|
"logits/rejected": -1.6444960832595825, |
|
"logps/chosen": -152.41412353515625, |
|
"logps/rejected": -170.09869384765625, |
|
"loss": 0.6925, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.010927575640380383, |
|
"rewards/margins": 0.0014875519555062056, |
|
"rewards/rejected": 0.009440025314688683, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.05536731309927186, |
|
"grad_norm": 81.84986800244135, |
|
"learning_rate": 9.599999999999999e-08, |
|
"logits/chosen": -1.7789497375488281, |
|
"logits/rejected": -1.7547317743301392, |
|
"logps/chosen": -214.71987915039062, |
|
"logps/rejected": -240.7999725341797, |
|
"loss": 0.6889, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": 0.0025361552834510803, |
|
"rewards/margins": 0.006769699975848198, |
|
"rewards/rejected": -0.004233543295413256, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.05767428447840819, |
|
"grad_norm": 76.76668544313137, |
|
"learning_rate": 1e-07, |
|
"logits/chosen": -1.491020917892456, |
|
"logits/rejected": -1.5447454452514648, |
|
"logps/chosen": -201.1317901611328, |
|
"logps/rejected": -267.04248046875, |
|
"loss": 0.6974, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.005572921596467495, |
|
"rewards/margins": 0.01456289179623127, |
|
"rewards/rejected": -0.02013581432402134, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.05998125585754452, |
|
"grad_norm": 84.23078533480681, |
|
"learning_rate": 1.04e-07, |
|
"logits/chosen": -1.6120061874389648, |
|
"logits/rejected": -1.7649461030960083, |
|
"logps/chosen": -183.1412353515625, |
|
"logps/rejected": -282.4610290527344, |
|
"loss": 0.695, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.02553856186568737, |
|
"rewards/margins": -0.038517288863658905, |
|
"rewards/rejected": 0.012978724204003811, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.06228822723668084, |
|
"grad_norm": 92.0173388072239, |
|
"learning_rate": 1.08e-07, |
|
"logits/chosen": -1.4607347249984741, |
|
"logits/rejected": -1.625195026397705, |
|
"logps/chosen": -152.8062744140625, |
|
"logps/rejected": -219.1183624267578, |
|
"loss": 0.6938, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.012677345424890518, |
|
"rewards/margins": -0.032261885702610016, |
|
"rewards/rejected": 0.01958453841507435, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.06459519861581717, |
|
"grad_norm": 79.32388836348366, |
|
"learning_rate": 1.1200000000000001e-07, |
|
"logits/chosen": -1.6375060081481934, |
|
"logits/rejected": -1.5966099500656128, |
|
"logps/chosen": -223.85108947753906, |
|
"logps/rejected": -256.30072021484375, |
|
"loss": 0.699, |
|
"rewards/accuracies": 0.40625, |
|
"rewards/chosen": -0.023084305226802826, |
|
"rewards/margins": -0.02949170023202896, |
|
"rewards/rejected": 0.006407391745597124, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.0669021699949535, |
|
"grad_norm": 69.72321013611293, |
|
"learning_rate": 1.1599999999999999e-07, |
|
"logits/chosen": -1.5106103420257568, |
|
"logits/rejected": -1.596940517425537, |
|
"logps/chosen": -188.92218017578125, |
|
"logps/rejected": -275.0534973144531, |
|
"loss": 0.6941, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.017192328348755836, |
|
"rewards/margins": 0.03240702301263809, |
|
"rewards/rejected": -0.015214694663882256, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.06920914137408983, |
|
"grad_norm": 72.64787237644839, |
|
"learning_rate": 1.2e-07, |
|
"logits/chosen": -1.667373776435852, |
|
"logits/rejected": -1.6947064399719238, |
|
"logps/chosen": -103.05116271972656, |
|
"logps/rejected": -151.50807189941406, |
|
"loss": 0.6928, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.00641946354880929, |
|
"rewards/margins": 0.032484300434589386, |
|
"rewards/rejected": -0.026064833626151085, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.07151611275322615, |
|
"grad_norm": 75.24112563737103, |
|
"learning_rate": 1.24e-07, |
|
"logits/chosen": -1.5305185317993164, |
|
"logits/rejected": -1.6095894575119019, |
|
"logps/chosen": -154.5998077392578, |
|
"logps/rejected": -176.33970642089844, |
|
"loss": 0.6965, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": 0.00010838371235877275, |
|
"rewards/margins": 0.006697986274957657, |
|
"rewards/rejected": -0.006589602679014206, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.07382308413236248, |
|
"grad_norm": 70.73819169103417, |
|
"learning_rate": 1.28e-07, |
|
"logits/chosen": -1.6058859825134277, |
|
"logits/rejected": -1.6498842239379883, |
|
"logps/chosen": -151.62286376953125, |
|
"logps/rejected": -169.5069122314453, |
|
"loss": 0.6927, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": 0.003975578583776951, |
|
"rewards/margins": 0.006644865497946739, |
|
"rewards/rejected": -0.0026692876126617193, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.0761300555114988, |
|
"grad_norm": 79.80905942384086, |
|
"learning_rate": 1.32e-07, |
|
"logits/chosen": -1.566676139831543, |
|
"logits/rejected": -1.635036587715149, |
|
"logps/chosen": -213.95590209960938, |
|
"logps/rejected": -268.84747314453125, |
|
"loss": 0.6878, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.0030877357348799706, |
|
"rewards/margins": -0.004700601100921631, |
|
"rewards/rejected": 0.0016128652496263385, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.07843702689063514, |
|
"grad_norm": 68.290418726697, |
|
"learning_rate": 1.36e-07, |
|
"logits/chosen": -1.625745177268982, |
|
"logits/rejected": -1.7014200687408447, |
|
"logps/chosen": -191.14842224121094, |
|
"logps/rejected": -222.51779174804688, |
|
"loss": 0.6954, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": 0.0007452260470017791, |
|
"rewards/margins": 0.00873138289898634, |
|
"rewards/rejected": -0.007986157201230526, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.08074399826977147, |
|
"grad_norm": 79.51788387191074, |
|
"learning_rate": 1.3999999999999998e-07, |
|
"logits/chosen": -1.5817363262176514, |
|
"logits/rejected": -1.6993348598480225, |
|
"logps/chosen": -131.7422637939453, |
|
"logps/rejected": -162.98304748535156, |
|
"loss": 0.6941, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.0074032689444720745, |
|
"rewards/margins": -0.010667804628610611, |
|
"rewards/rejected": 0.003264536615461111, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.08305096964890779, |
|
"grad_norm": 83.37639460390962, |
|
"learning_rate": 1.44e-07, |
|
"logits/chosen": -1.5910240411758423, |
|
"logits/rejected": -1.6661615371704102, |
|
"logps/chosen": -181.304931640625, |
|
"logps/rejected": -209.23526000976562, |
|
"loss": 0.6943, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.0042681945487856865, |
|
"rewards/margins": -0.02247396856546402, |
|
"rewards/rejected": 0.018205774948000908, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.08535794102804412, |
|
"grad_norm": 85.04605551266509, |
|
"learning_rate": 1.48e-07, |
|
"logits/chosen": -1.5876970291137695, |
|
"logits/rejected": -1.7304015159606934, |
|
"logps/chosen": -146.5454559326172, |
|
"logps/rejected": -182.179931640625, |
|
"loss": 0.6891, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -0.011930807493627071, |
|
"rewards/margins": -0.013042710721492767, |
|
"rewards/rejected": 0.0011119036935269833, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.08766491240718045, |
|
"grad_norm": 71.97159401776734, |
|
"learning_rate": 1.5199999999999998e-07, |
|
"logits/chosen": -1.6893718242645264, |
|
"logits/rejected": -1.6752575635910034, |
|
"logps/chosen": -163.57489013671875, |
|
"logps/rejected": -162.34803771972656, |
|
"loss": 0.6906, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.0007063052617013454, |
|
"rewards/margins": 0.002567308023571968, |
|
"rewards/rejected": -0.0018610022962093353, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.08997188378631678, |
|
"grad_norm": 80.09910611023234, |
|
"learning_rate": 1.56e-07, |
|
"logits/chosen": -1.612238883972168, |
|
"logits/rejected": -1.5296804904937744, |
|
"logps/chosen": -143.39723205566406, |
|
"logps/rejected": -165.65318298339844, |
|
"loss": 0.6871, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.009035947732627392, |
|
"rewards/margins": 0.025550464168190956, |
|
"rewards/rejected": -0.01651451550424099, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.0922788551654531, |
|
"grad_norm": 70.85997602518799, |
|
"learning_rate": 1.6e-07, |
|
"logits/chosen": -1.6167306900024414, |
|
"logits/rejected": -1.720908522605896, |
|
"logps/chosen": -137.2986602783203, |
|
"logps/rejected": -246.95404052734375, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.006022963672876358, |
|
"rewards/margins": 0.008436123840510845, |
|
"rewards/rejected": -0.0024131599348038435, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.09458582654458943, |
|
"grad_norm": 73.43468397188529, |
|
"learning_rate": 1.6399999999999999e-07, |
|
"logits/chosen": -1.7178070545196533, |
|
"logits/rejected": -1.7651526927947998, |
|
"logps/chosen": -154.39617919921875, |
|
"logps/rejected": -187.47491455078125, |
|
"loss": 0.6985, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.014386245980858803, |
|
"rewards/margins": -0.011840756982564926, |
|
"rewards/rejected": -0.002545490860939026, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.09689279792372577, |
|
"grad_norm": 82.20956953972079, |
|
"learning_rate": 1.68e-07, |
|
"logits/chosen": -1.5902974605560303, |
|
"logits/rejected": -1.604806900024414, |
|
"logps/chosen": -127.57711029052734, |
|
"logps/rejected": -146.8506317138672, |
|
"loss": 0.6887, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.006446457467973232, |
|
"rewards/margins": 0.008786877617239952, |
|
"rewards/rejected": -0.015233333222568035, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.09919976930286209, |
|
"grad_norm": 76.58342600254258, |
|
"learning_rate": 1.7199999999999998e-07, |
|
"logits/chosen": -1.6294573545455933, |
|
"logits/rejected": -1.6458450555801392, |
|
"logps/chosen": -248.47845458984375, |
|
"logps/rejected": -246.7737579345703, |
|
"loss": 0.6895, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.009375479072332382, |
|
"rewards/margins": 0.025924015790224075, |
|
"rewards/rejected": -0.03529949486255646, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.10150674068199841, |
|
"grad_norm": 73.05738349044326, |
|
"learning_rate": 1.76e-07, |
|
"logits/chosen": -1.84279465675354, |
|
"logits/rejected": -1.7476646900177002, |
|
"logps/chosen": -153.08554077148438, |
|
"logps/rejected": -154.7803497314453, |
|
"loss": 0.6909, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.002370176836848259, |
|
"rewards/margins": -0.0033320121001452208, |
|
"rewards/rejected": 0.0009618350304663181, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.10381371206113474, |
|
"grad_norm": 77.95992122604585, |
|
"learning_rate": 1.8e-07, |
|
"logits/chosen": -1.6355715990066528, |
|
"logits/rejected": -1.6984450817108154, |
|
"logps/chosen": -169.7340087890625, |
|
"logps/rejected": -185.59031677246094, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -0.01826353184878826, |
|
"rewards/margins": -0.008648518472909927, |
|
"rewards/rejected": -0.009615011513233185, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.10612068344027106, |
|
"grad_norm": 74.70608110655593, |
|
"learning_rate": 1.84e-07, |
|
"logits/chosen": -1.5153872966766357, |
|
"logits/rejected": -1.5389485359191895, |
|
"logps/chosen": -214.6402130126953, |
|
"logps/rejected": -224.4317626953125, |
|
"loss": 0.6877, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.00619399081915617, |
|
"rewards/margins": 0.011179441586136818, |
|
"rewards/rejected": -0.004985451698303223, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.1084276548194074, |
|
"grad_norm": 74.73028912491218, |
|
"learning_rate": 1.88e-07, |
|
"logits/chosen": -1.5655710697174072, |
|
"logits/rejected": -1.5568063259124756, |
|
"logps/chosen": -170.8540496826172, |
|
"logps/rejected": -195.4169158935547, |
|
"loss": 0.6887, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.016467105597257614, |
|
"rewards/margins": 0.011973596177995205, |
|
"rewards/rejected": -0.028440698981285095, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.11073462619854373, |
|
"grad_norm": 81.65007402302682, |
|
"learning_rate": 1.9199999999999997e-07, |
|
"logits/chosen": -1.7869484424591064, |
|
"logits/rejected": -1.7626042366027832, |
|
"logps/chosen": -208.46002197265625, |
|
"logps/rejected": -256.5970458984375, |
|
"loss": 0.6846, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.009895925410091877, |
|
"rewards/margins": 0.011812476441264153, |
|
"rewards/rejected": -0.021708402782678604, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.11304159757768005, |
|
"grad_norm": 77.09604224965807, |
|
"learning_rate": 1.9599999999999998e-07, |
|
"logits/chosen": -1.6421016454696655, |
|
"logits/rejected": -1.5873386859893799, |
|
"logps/chosen": -176.68853759765625, |
|
"logps/rejected": -188.64544677734375, |
|
"loss": 0.69, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.01306343637406826, |
|
"rewards/margins": 0.021781262010335922, |
|
"rewards/rejected": -0.03484470024704933, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.11534856895681637, |
|
"grad_norm": 77.37699461097638, |
|
"learning_rate": 2e-07, |
|
"logits/chosen": -1.5747566223144531, |
|
"logits/rejected": -1.5757074356079102, |
|
"logps/chosen": -146.1053009033203, |
|
"logps/rejected": -177.65733337402344, |
|
"loss": 0.6926, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.01261211559176445, |
|
"rewards/margins": 0.005542438477277756, |
|
"rewards/rejected": -0.018154552206397057, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.11534856895681637, |
|
"eval_logits/chosen": -1.592301368713379, |
|
"eval_logits/rejected": -1.4917248487472534, |
|
"eval_logps/chosen": -185.32534790039062, |
|
"eval_logps/rejected": -150.51693725585938, |
|
"eval_loss": 0.6938029527664185, |
|
"eval_rewards/accuracies": 0.4000000059604645, |
|
"eval_rewards/chosen": -0.014318165369331837, |
|
"eval_rewards/margins": -0.014164167456328869, |
|
"eval_rewards/rejected": -0.00015399709809571505, |
|
"eval_runtime": 22.8572, |
|
"eval_samples_per_second": 4.375, |
|
"eval_steps_per_second": 1.094, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.11765554033595271, |
|
"grad_norm": 83.18929756458088, |
|
"learning_rate": 1.9999925887938156e-07, |
|
"logits/chosen": -1.553455114364624, |
|
"logits/rejected": -1.6009831428527832, |
|
"logps/chosen": -171.79664611816406, |
|
"logps/rejected": -223.1472930908203, |
|
"loss": 0.6923, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.0011008224682882428, |
|
"rewards/margins": 0.010834511369466782, |
|
"rewards/rejected": -0.009733689948916435, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.11996251171508904, |
|
"grad_norm": 73.85193862190509, |
|
"learning_rate": 1.9999703552851146e-07, |
|
"logits/chosen": -1.7583006620407104, |
|
"logits/rejected": -1.714582920074463, |
|
"logps/chosen": -209.88302612304688, |
|
"logps/rejected": -255.11888122558594, |
|
"loss": 0.6913, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.030152078717947006, |
|
"rewards/margins": -0.011741320602595806, |
|
"rewards/rejected": -0.018410757184028625, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.12226948309422536, |
|
"grad_norm": 73.38727733625704, |
|
"learning_rate": 1.9999332998034512e-07, |
|
"logits/chosen": -1.6966747045516968, |
|
"logits/rejected": -1.6100220680236816, |
|
"logps/chosen": -160.12281799316406, |
|
"logps/rejected": -167.38145446777344, |
|
"loss": 0.686, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.008994976989924908, |
|
"rewards/margins": 0.011080076918005943, |
|
"rewards/rejected": -0.020075054839253426, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.12457645447336169, |
|
"grad_norm": 79.80728796393491, |
|
"learning_rate": 1.9998814228980768e-07, |
|
"logits/chosen": -1.6656932830810547, |
|
"logits/rejected": -1.7435060739517212, |
|
"logps/chosen": -156.0963897705078, |
|
"logps/rejected": -208.7286376953125, |
|
"loss": 0.6956, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.01158140879124403, |
|
"rewards/margins": -0.003068419173359871, |
|
"rewards/rejected": -0.008512990549206734, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.126883425852498, |
|
"grad_norm": 71.572270187751, |
|
"learning_rate": 1.9998147253379324e-07, |
|
"logits/chosen": -1.7250394821166992, |
|
"logits/rejected": -1.720632791519165, |
|
"logps/chosen": -143.606201171875, |
|
"logps/rejected": -164.64126586914062, |
|
"loss": 0.6911, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.00029725395143032074, |
|
"rewards/margins": 0.004560052417218685, |
|
"rewards/rejected": -0.004857306368649006, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.12919039723163434, |
|
"grad_norm": 76.7363281665151, |
|
"learning_rate": 1.999733208111637e-07, |
|
"logits/chosen": -1.680725336074829, |
|
"logits/rejected": -1.7269576787948608, |
|
"logps/chosen": -141.92416381835938, |
|
"logps/rejected": -163.63902282714844, |
|
"loss": 0.6878, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.013096440583467484, |
|
"rewards/margins": 0.013841900043189526, |
|
"rewards/rejected": -0.026938341557979584, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.13149736861077066, |
|
"grad_norm": 71.23095597503095, |
|
"learning_rate": 1.9996368724274726e-07, |
|
"logits/chosen": -1.7746036052703857, |
|
"logits/rejected": -1.651440978050232, |
|
"logps/chosen": -201.5678253173828, |
|
"logps/rejected": -208.96060180664062, |
|
"loss": 0.6814, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -0.020835982635617256, |
|
"rewards/margins": 0.0012882971204817295, |
|
"rewards/rejected": -0.02212427742779255, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.133804339989907, |
|
"grad_norm": 78.35820889872501, |
|
"learning_rate": 1.999525719713366e-07, |
|
"logits/chosen": -1.6184967756271362, |
|
"logits/rejected": -1.6276531219482422, |
|
"logps/chosen": -138.03579711914062, |
|
"logps/rejected": -156.24818420410156, |
|
"loss": 0.6893, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.024172522127628326, |
|
"rewards/margins": -0.007286247797310352, |
|
"rewards/rejected": -0.0168862733989954, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.13611131136904334, |
|
"grad_norm": 73.30378065799947, |
|
"learning_rate": 1.9993997516168685e-07, |
|
"logits/chosen": -1.5095572471618652, |
|
"logits/rejected": -1.4317773580551147, |
|
"logps/chosen": -168.31259155273438, |
|
"logps/rejected": -181.01010131835938, |
|
"loss": 0.6946, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.02184070646762848, |
|
"rewards/margins": -0.0038399603217840195, |
|
"rewards/rejected": -0.01800074614584446, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.13841828274817966, |
|
"grad_norm": 76.67682193401895, |
|
"learning_rate": 1.9992589700051315e-07, |
|
"logits/chosen": -1.6505416631698608, |
|
"logits/rejected": -1.6528055667877197, |
|
"logps/chosen": -163.4833221435547, |
|
"logps/rejected": -173.32627868652344, |
|
"loss": 0.691, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.037532731890678406, |
|
"rewards/margins": -0.001648992532864213, |
|
"rewards/rejected": -0.035883739590644836, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.14072525412731599, |
|
"grad_norm": 79.25219710625622, |
|
"learning_rate": 1.9991033769648782e-07, |
|
"logits/chosen": -1.6732072830200195, |
|
"logits/rejected": -1.6914747953414917, |
|
"logps/chosen": -192.20941162109375, |
|
"logps/rejected": -249.57064819335938, |
|
"loss": 0.6785, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.02337605319917202, |
|
"rewards/margins": 0.03376854211091995, |
|
"rewards/rejected": -0.05714459717273712, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.1430322255064523, |
|
"grad_norm": 74.1794316582206, |
|
"learning_rate": 1.9989329748023723e-07, |
|
"logits/chosen": -1.6055612564086914, |
|
"logits/rejected": -1.6374058723449707, |
|
"logps/chosen": -150.5140838623047, |
|
"logps/rejected": -178.90463256835938, |
|
"loss": 0.6838, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.03467298671603203, |
|
"rewards/margins": 0.017992481589317322, |
|
"rewards/rejected": -0.05266546458005905, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.14533919688558863, |
|
"grad_norm": 76.65585990685855, |
|
"learning_rate": 1.9987477660433854e-07, |
|
"logits/chosen": -1.6969408988952637, |
|
"logits/rejected": -1.7563108205795288, |
|
"logps/chosen": -142.4885711669922, |
|
"logps/rejected": -210.4172821044922, |
|
"loss": 0.689, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.024329736828804016, |
|
"rewards/margins": 0.004793995060026646, |
|
"rewards/rejected": -0.029123730957508087, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.14764616826472496, |
|
"grad_norm": 78.42733649878224, |
|
"learning_rate": 1.998547753433158e-07, |
|
"logits/chosen": -1.6231815814971924, |
|
"logits/rejected": -1.4993540048599243, |
|
"logps/chosen": -248.5255584716797, |
|
"logps/rejected": -283.0628662109375, |
|
"loss": 0.6867, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.022915348410606384, |
|
"rewards/margins": 0.04015136882662773, |
|
"rewards/rejected": -0.06306671351194382, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.14995313964386128, |
|
"grad_norm": 81.34489884736102, |
|
"learning_rate": 1.9983329399363594e-07, |
|
"logits/chosen": -1.696123719215393, |
|
"logits/rejected": -1.5894306898117065, |
|
"logps/chosen": -157.25205993652344, |
|
"logps/rejected": -169.28765869140625, |
|
"loss": 0.6888, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.02339826337993145, |
|
"rewards/margins": 0.029402071610093117, |
|
"rewards/rejected": -0.05280033499002457, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.1522601110229976, |
|
"grad_norm": 71.86675386697708, |
|
"learning_rate": 1.998103328737044e-07, |
|
"logits/chosen": -1.614111304283142, |
|
"logits/rejected": -1.668984055519104, |
|
"logps/chosen": -169.32870483398438, |
|
"logps/rejected": -184.28652954101562, |
|
"loss": 0.694, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.032653309404850006, |
|
"rewards/margins": -0.012304544448852539, |
|
"rewards/rejected": -0.020348764955997467, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.15456708240213396, |
|
"grad_norm": 81.85224601265395, |
|
"learning_rate": 1.9978589232386034e-07, |
|
"logits/chosen": -1.715609073638916, |
|
"logits/rejected": -1.7786422967910767, |
|
"logps/chosen": -167.58688354492188, |
|
"logps/rejected": -199.66270446777344, |
|
"loss": 0.6864, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.03811431676149368, |
|
"rewards/margins": 0.017543859779834747, |
|
"rewards/rejected": -0.05565817654132843, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.15687405378127028, |
|
"grad_norm": 73.7622516118343, |
|
"learning_rate": 1.9975997270637168e-07, |
|
"logits/chosen": -1.6321560144424438, |
|
"logits/rejected": -1.7015608549118042, |
|
"logps/chosen": -159.351318359375, |
|
"logps/rejected": -176.723876953125, |
|
"loss": 0.6859, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.02714327722787857, |
|
"rewards/margins": 0.021138466894626617, |
|
"rewards/rejected": -0.04828174412250519, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.1591810251604066, |
|
"grad_norm": 71.96760531715911, |
|
"learning_rate": 1.997325744054297e-07, |
|
"logits/chosen": -1.5530474185943604, |
|
"logits/rejected": -1.5373188257217407, |
|
"logps/chosen": -158.63812255859375, |
|
"logps/rejected": -204.0651092529297, |
|
"loss": 0.6845, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.01808898150920868, |
|
"rewards/margins": 0.017602307721972466, |
|
"rewards/rejected": -0.035691291093826294, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.16148799653954293, |
|
"grad_norm": 73.93273755038686, |
|
"learning_rate": 1.9970369782714328e-07, |
|
"logits/chosen": -1.522450566291809, |
|
"logits/rejected": -1.635149598121643, |
|
"logps/chosen": -142.74459838867188, |
|
"logps/rejected": -149.5770721435547, |
|
"loss": 0.6894, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.025994691997766495, |
|
"rewards/margins": -0.006413338705897331, |
|
"rewards/rejected": -0.019581351429224014, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.16379496791867926, |
|
"grad_norm": 79.55861212361464, |
|
"learning_rate": 1.99673343399533e-07, |
|
"logits/chosen": -1.525217056274414, |
|
"logits/rejected": -1.5952740907669067, |
|
"logps/chosen": -116.86170959472656, |
|
"logps/rejected": -175.00779724121094, |
|
"loss": 0.6832, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.020393915474414825, |
|
"rewards/margins": 0.032093390822410583, |
|
"rewards/rejected": -0.05248731002211571, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.16610193929781558, |
|
"grad_norm": 83.97535534931897, |
|
"learning_rate": 1.9964151157252466e-07, |
|
"logits/chosen": -1.6767423152923584, |
|
"logits/rejected": -1.6693997383117676, |
|
"logps/chosen": -207.50936889648438, |
|
"logps/rejected": -216.3134002685547, |
|
"loss": 0.6789, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.03636579588055611, |
|
"rewards/margins": 0.03607642278075218, |
|
"rewards/rejected": -0.07244221866130829, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.1684089106769519, |
|
"grad_norm": 73.85111113100436, |
|
"learning_rate": 1.996082028179428e-07, |
|
"logits/chosen": -1.4807971715927124, |
|
"logits/rejected": -1.4092496633529663, |
|
"logps/chosen": -168.455078125, |
|
"logps/rejected": -172.4587860107422, |
|
"loss": 0.689, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.06219344958662987, |
|
"rewards/margins": 0.011872416362166405, |
|
"rewards/rejected": -0.07406586408615112, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.17071588205608823, |
|
"grad_norm": 80.66954314511047, |
|
"learning_rate": 1.9957341762950344e-07, |
|
"logits/chosen": -1.5618644952774048, |
|
"logits/rejected": -1.67661452293396, |
|
"logps/chosen": -114.58411407470703, |
|
"logps/rejected": -158.1700439453125, |
|
"loss": 0.6807, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.04578070342540741, |
|
"rewards/margins": 0.029419898986816406, |
|
"rewards/rejected": -0.07520060241222382, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.17302285343522458, |
|
"grad_norm": 71.43623661516392, |
|
"learning_rate": 1.9953715652280706e-07, |
|
"logits/chosen": -1.6976016759872437, |
|
"logits/rejected": -1.6299835443496704, |
|
"logps/chosen": -228.1553497314453, |
|
"logps/rejected": -214.32037353515625, |
|
"loss": 0.6848, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.06526876986026764, |
|
"rewards/margins": -0.002617661375552416, |
|
"rewards/rejected": -0.06265110522508621, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.1753298248143609, |
|
"grad_norm": 85.31030453694517, |
|
"learning_rate": 1.9949942003533064e-07, |
|
"logits/chosen": -1.7211732864379883, |
|
"logits/rejected": -1.720245122909546, |
|
"logps/chosen": -138.57936096191406, |
|
"logps/rejected": -158.134521484375, |
|
"loss": 0.6829, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.042685676366090775, |
|
"rewards/margins": -0.006229763850569725, |
|
"rewards/rejected": -0.0364559069275856, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.17763679619349723, |
|
"grad_norm": 75.37656195588123, |
|
"learning_rate": 1.9946020872642006e-07, |
|
"logits/chosen": -1.602712631225586, |
|
"logits/rejected": -1.5105926990509033, |
|
"logps/chosen": -152.95616149902344, |
|
"logps/rejected": -252.92359924316406, |
|
"loss": 0.6848, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.05936397612094879, |
|
"rewards/margins": 0.023188650608062744, |
|
"rewards/rejected": -0.08255261927843094, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.17994376757263356, |
|
"grad_norm": 74.43256656918146, |
|
"learning_rate": 1.9941952317728147e-07, |
|
"logits/chosen": -1.6266837120056152, |
|
"logits/rejected": -1.5794254541397095, |
|
"logps/chosen": -154.70660400390625, |
|
"logps/rejected": -171.6692657470703, |
|
"loss": 0.688, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.0499938428401947, |
|
"rewards/margins": 0.022745870053768158, |
|
"rewards/rejected": -0.07273972034454346, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.18225073895176988, |
|
"grad_norm": 75.91057680239186, |
|
"learning_rate": 1.993773639909728e-07, |
|
"logits/chosen": -1.49541437625885, |
|
"logits/rejected": -1.6966127157211304, |
|
"logps/chosen": -165.41343688964844, |
|
"logps/rejected": -208.544189453125, |
|
"loss": 0.6768, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.02937229909002781, |
|
"rewards/margins": 0.061954449862241745, |
|
"rewards/rejected": -0.0913267433643341, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.1845577103309062, |
|
"grad_norm": 79.53079693796676, |
|
"learning_rate": 1.99333731792395e-07, |
|
"logits/chosen": -1.5714216232299805, |
|
"logits/rejected": -1.543687343597412, |
|
"logps/chosen": -153.09767150878906, |
|
"logps/rejected": -177.41847229003906, |
|
"loss": 0.684, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.0551866851747036, |
|
"rewards/margins": 0.03179415315389633, |
|
"rewards/rejected": -0.08698083460330963, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.18686468171004253, |
|
"grad_norm": 73.91686194821585, |
|
"learning_rate": 1.9928862722828242e-07, |
|
"logits/chosen": -1.7037162780761719, |
|
"logits/rejected": -1.675144076347351, |
|
"logps/chosen": -153.01358032226562, |
|
"logps/rejected": -175.93673706054688, |
|
"loss": 0.6781, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.023145336657762527, |
|
"rewards/margins": 0.06261962652206421, |
|
"rewards/rejected": -0.08576496690511703, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.18917165308917885, |
|
"grad_norm": 76.57698818122466, |
|
"learning_rate": 1.9924205096719357e-07, |
|
"logits/chosen": -1.5918736457824707, |
|
"logits/rejected": -1.4768625497817993, |
|
"logps/chosen": -196.1853485107422, |
|
"logps/rejected": -179.04530334472656, |
|
"loss": 0.6692, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.04375737905502319, |
|
"rewards/margins": 0.0569818913936615, |
|
"rewards/rejected": -0.10073927044868469, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.19147862446831518, |
|
"grad_norm": 77.81644602396882, |
|
"learning_rate": 1.9919400369950097e-07, |
|
"logits/chosen": -1.4722576141357422, |
|
"logits/rejected": -1.540255069732666, |
|
"logps/chosen": -205.6660614013672, |
|
"logps/rejected": -248.9489288330078, |
|
"loss": 0.6786, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.043852321803569794, |
|
"rewards/margins": 0.0406915545463562, |
|
"rewards/rejected": -0.084543876349926, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.19378559584745153, |
|
"grad_norm": 75.44269350923808, |
|
"learning_rate": 1.9914448613738103e-07, |
|
"logits/chosen": -1.529039740562439, |
|
"logits/rejected": -1.5173804759979248, |
|
"logps/chosen": -202.2668914794922, |
|
"logps/rejected": -226.52684020996094, |
|
"loss": 0.6763, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.07004385441541672, |
|
"rewards/margins": 0.021601226180791855, |
|
"rewards/rejected": -0.09164508432149887, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.19609256722658785, |
|
"grad_norm": 76.79086636181425, |
|
"learning_rate": 1.9909349901480347e-07, |
|
"logits/chosen": -1.610205888748169, |
|
"logits/rejected": -1.622270107269287, |
|
"logps/chosen": -152.17263793945312, |
|
"logps/rejected": -153.09571838378906, |
|
"loss": 0.6826, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.049044616520404816, |
|
"rewards/margins": 0.020372100174427032, |
|
"rewards/rejected": -0.06941672414541245, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.19839953860572418, |
|
"grad_norm": 65.6162753738054, |
|
"learning_rate": 1.990410430875205e-07, |
|
"logits/chosen": -1.6482963562011719, |
|
"logits/rejected": -1.6161506175994873, |
|
"logps/chosen": -131.635986328125, |
|
"logps/rejected": -142.7827606201172, |
|
"loss": 0.6691, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.028512008488178253, |
|
"rewards/margins": 0.06365156173706055, |
|
"rewards/rejected": -0.0921635702252388, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.2007065099848605, |
|
"grad_norm": 90.14268267387868, |
|
"learning_rate": 1.9898711913305547e-07, |
|
"logits/chosen": -1.5566825866699219, |
|
"logits/rejected": -1.6173129081726074, |
|
"logps/chosen": -174.24017333984375, |
|
"logps/rejected": -181.01629638671875, |
|
"loss": 0.6766, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.05642353743314743, |
|
"rewards/margins": 0.03944730758666992, |
|
"rewards/rejected": -0.09587083756923676, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.20301348136399683, |
|
"grad_norm": 76.41450508388954, |
|
"learning_rate": 1.9893172795069142e-07, |
|
"logits/chosen": -1.5998440980911255, |
|
"logits/rejected": -1.6545708179473877, |
|
"logps/chosen": -156.8663330078125, |
|
"logps/rejected": -159.1892547607422, |
|
"loss": 0.6915, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.05183999985456467, |
|
"rewards/margins": 0.0032500806264579296, |
|
"rewards/rejected": -0.055090077221393585, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.20532045274313315, |
|
"grad_norm": 87.56584383747318, |
|
"learning_rate": 1.988748703614594e-07, |
|
"logits/chosen": -1.6627997159957886, |
|
"logits/rejected": -1.6540586948394775, |
|
"logps/chosen": -155.84368896484375, |
|
"logps/rejected": -186.67495727539062, |
|
"loss": 0.6755, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.03810984268784523, |
|
"rewards/margins": 0.029699210077524185, |
|
"rewards/rejected": -0.06780905276536942, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.20762742412226948, |
|
"grad_norm": 70.4422512373765, |
|
"learning_rate": 1.9881654720812592e-07, |
|
"logits/chosen": -1.5361154079437256, |
|
"logits/rejected": -1.610466480255127, |
|
"logps/chosen": -115.56837463378906, |
|
"logps/rejected": -142.57766723632812, |
|
"loss": 0.6805, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.02763887494802475, |
|
"rewards/margins": 0.0444108322262764, |
|
"rewards/rejected": -0.07204970717430115, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.2099343955014058, |
|
"grad_norm": 71.9817063853092, |
|
"learning_rate": 1.9875675935518094e-07, |
|
"logits/chosen": -1.547518014907837, |
|
"logits/rejected": -1.5500645637512207, |
|
"logps/chosen": -226.55401611328125, |
|
"logps/rejected": -206.99913024902344, |
|
"loss": 0.6836, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.09524687379598618, |
|
"rewards/margins": 0.004950803238898516, |
|
"rewards/rejected": -0.1001976728439331, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.21224136688054213, |
|
"grad_norm": 84.4720159255109, |
|
"learning_rate": 1.9869550768882454e-07, |
|
"logits/chosen": -1.5599523782730103, |
|
"logits/rejected": -1.5133187770843506, |
|
"logps/chosen": -182.1778564453125, |
|
"logps/rejected": -241.0075225830078, |
|
"loss": 0.6671, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.057929787784814835, |
|
"rewards/margins": 0.0736684501171112, |
|
"rewards/rejected": -0.13159823417663574, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.21454833825967848, |
|
"grad_norm": 73.76638464490958, |
|
"learning_rate": 1.9863279311695428e-07, |
|
"logits/chosen": -1.4902362823486328, |
|
"logits/rejected": -1.55423903465271, |
|
"logps/chosen": -219.845703125, |
|
"logps/rejected": -273.73712158203125, |
|
"loss": 0.6773, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.07643218338489532, |
|
"rewards/margins": 0.07768993079662323, |
|
"rewards/rejected": -0.15412212908267975, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.2168553096388148, |
|
"grad_norm": 68.86254513199266, |
|
"learning_rate": 1.985686165691514e-07, |
|
"logits/chosen": -1.704699993133545, |
|
"logits/rejected": -1.6342915296554565, |
|
"logps/chosen": -120.14341735839844, |
|
"logps/rejected": -114.1607666015625, |
|
"loss": 0.6819, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.03834118694067001, |
|
"rewards/margins": -0.00312834233045578, |
|
"rewards/rejected": -0.03521284461021423, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.21916228101795113, |
|
"grad_norm": 77.39475970610044, |
|
"learning_rate": 1.9850297899666707e-07, |
|
"logits/chosen": -1.6166346073150635, |
|
"logits/rejected": -1.666224479675293, |
|
"logps/chosen": -138.47250366210938, |
|
"logps/rejected": -183.75860595703125, |
|
"loss": 0.6784, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.059065092355012894, |
|
"rewards/margins": 0.0348266139626503, |
|
"rewards/rejected": -0.09389171749353409, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.22146925239708745, |
|
"grad_norm": 79.62622809714712, |
|
"learning_rate": 1.9843588137240855e-07, |
|
"logits/chosen": -1.4786595106124878, |
|
"logits/rejected": -1.5819900035858154, |
|
"logps/chosen": -156.80630493164062, |
|
"logps/rejected": -225.65887451171875, |
|
"loss": 0.6727, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.06785481423139572, |
|
"rewards/margins": 0.046741731464862823, |
|
"rewards/rejected": -0.11459654569625854, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.22377622377622378, |
|
"grad_norm": 71.61908905732174, |
|
"learning_rate": 1.9836732469092446e-07, |
|
"logits/chosen": -1.7382750511169434, |
|
"logits/rejected": -1.7238702774047852, |
|
"logps/chosen": -135.97625732421875, |
|
"logps/rejected": -134.9537353515625, |
|
"loss": 0.6751, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.07399953901767731, |
|
"rewards/margins": -0.0150267593562603, |
|
"rewards/rejected": -0.05897277966141701, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.2260831951553601, |
|
"grad_norm": 77.39998086488785, |
|
"learning_rate": 1.982973099683902e-07, |
|
"logits/chosen": -1.6806734800338745, |
|
"logits/rejected": -1.7225916385650635, |
|
"logps/chosen": -139.36279296875, |
|
"logps/rejected": -160.1461639404297, |
|
"loss": 0.6592, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.05896836146712303, |
|
"rewards/margins": 0.05167616903781891, |
|
"rewards/rejected": -0.11064451932907104, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.22839016653449642, |
|
"grad_norm": 71.42176978474515, |
|
"learning_rate": 1.982258382425928e-07, |
|
"logits/chosen": -1.53923499584198, |
|
"logits/rejected": -1.5509108304977417, |
|
"logps/chosen": -145.9883575439453, |
|
"logps/rejected": -173.95388793945312, |
|
"loss": 0.6819, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.06814471632242203, |
|
"rewards/margins": 0.05520961806178093, |
|
"rewards/rejected": -0.12335430830717087, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.23069713791363275, |
|
"grad_norm": 65.61388633290626, |
|
"learning_rate": 1.9815291057291578e-07, |
|
"logits/chosen": -1.5758477449417114, |
|
"logits/rejected": -1.6140058040618896, |
|
"logps/chosen": -105.85581970214844, |
|
"logps/rejected": -122.97176361083984, |
|
"loss": 0.679, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.06785964965820312, |
|
"rewards/margins": 0.01952926628291607, |
|
"rewards/rejected": -0.08738891780376434, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.23069713791363275, |
|
"eval_logits/chosen": -1.5610222816467285, |
|
"eval_logits/rejected": -1.462950587272644, |
|
"eval_logps/chosen": -186.2912139892578, |
|
"eval_logps/rejected": -151.6300048828125, |
|
"eval_loss": 0.6922155618667603, |
|
"eval_rewards/accuracies": 0.5600000023841858, |
|
"eval_rewards/chosen": -0.11090204119682312, |
|
"eval_rewards/margins": 0.0005596327828243375, |
|
"eval_rewards/rejected": -0.11146167665719986, |
|
"eval_runtime": 21.7555, |
|
"eval_samples_per_second": 4.597, |
|
"eval_steps_per_second": 1.149, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.23300410929276907, |
|
"grad_norm": 69.78727065961301, |
|
"learning_rate": 1.9807852804032302e-07, |
|
"logits/chosen": -1.4734337329864502, |
|
"logits/rejected": -1.491389513015747, |
|
"logps/chosen": -154.9561767578125, |
|
"logps/rejected": -204.73797607421875, |
|
"loss": 0.6705, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.046838290989398956, |
|
"rewards/margins": 0.11147616803646088, |
|
"rewards/rejected": -0.15831446647644043, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.23531108067190543, |
|
"grad_norm": 82.59595988703826, |
|
"learning_rate": 1.980026917473432e-07, |
|
"logits/chosen": -1.5889283418655396, |
|
"logits/rejected": -1.7049566507339478, |
|
"logps/chosen": -174.74598693847656, |
|
"logps/rejected": -223.11842346191406, |
|
"loss": 0.6737, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.05181250721216202, |
|
"rewards/margins": 0.09021350741386414, |
|
"rewards/rejected": -0.14202602207660675, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.23761805205104175, |
|
"grad_norm": 67.38723201323596, |
|
"learning_rate": 1.9792540281805298e-07, |
|
"logits/chosen": -1.4892499446868896, |
|
"logits/rejected": -1.517817497253418, |
|
"logps/chosen": -140.6378631591797, |
|
"logps/rejected": -161.47348022460938, |
|
"loss": 0.6682, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.08649797737598419, |
|
"rewards/margins": 0.03692768141627312, |
|
"rewards/rejected": -0.12342565506696701, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.23992502343017807, |
|
"grad_norm": 73.06374890842753, |
|
"learning_rate": 1.9784666239806089e-07, |
|
"logits/chosen": -1.5101206302642822, |
|
"logits/rejected": -1.5768136978149414, |
|
"logps/chosen": -164.27035522460938, |
|
"logps/rejected": -203.25975036621094, |
|
"loss": 0.6718, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.06570874899625778, |
|
"rewards/margins": 0.09578941017389297, |
|
"rewards/rejected": -0.16149815917015076, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.2422319948093144, |
|
"grad_norm": 75.60130708995507, |
|
"learning_rate": 1.9776647165448983e-07, |
|
"logits/chosen": -1.5699687004089355, |
|
"logits/rejected": -1.520723581314087, |
|
"logps/chosen": -188.6508331298828, |
|
"logps/rejected": -217.18365478515625, |
|
"loss": 0.6708, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.08720759302377701, |
|
"rewards/margins": 0.02055392973124981, |
|
"rewards/rejected": -0.10776151716709137, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.24453896618845072, |
|
"grad_norm": 76.08847895651103, |
|
"learning_rate": 1.9768483177596006e-07, |
|
"logits/chosen": -1.5900119543075562, |
|
"logits/rejected": -1.6237448453903198, |
|
"logps/chosen": -143.37664794921875, |
|
"logps/rejected": -166.52413940429688, |
|
"loss": 0.6689, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.0496542751789093, |
|
"rewards/margins": 0.06798863410949707, |
|
"rewards/rejected": -0.11764290928840637, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.24684593756758705, |
|
"grad_norm": 85.55002048517719, |
|
"learning_rate": 1.9760174397257153e-07, |
|
"logits/chosen": -1.5799341201782227, |
|
"logits/rejected": -1.5739325284957886, |
|
"logps/chosen": -187.42578125, |
|
"logps/rejected": -227.91946411132812, |
|
"loss": 0.6871, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.11932831257581711, |
|
"rewards/margins": 0.00022871512919664383, |
|
"rewards/rejected": -0.11955701559782028, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.24915290894672337, |
|
"grad_norm": 79.3908422125236, |
|
"learning_rate": 1.97517209475886e-07, |
|
"logits/chosen": -1.578735589981079, |
|
"logits/rejected": -1.7003827095031738, |
|
"logps/chosen": -147.41778564453125, |
|
"logps/rejected": -185.31602478027344, |
|
"loss": 0.6678, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.07496561855077744, |
|
"rewards/margins": 0.0857120081782341, |
|
"rewards/rejected": -0.16067762672901154, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.2514598803258597, |
|
"grad_norm": 78.5386783375159, |
|
"learning_rate": 1.9743122953890854e-07, |
|
"logits/chosen": -1.5871162414550781, |
|
"logits/rejected": -1.5231672525405884, |
|
"logps/chosen": -174.3480682373047, |
|
"logps/rejected": -196.07998657226562, |
|
"loss": 0.6548, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.07399033010005951, |
|
"rewards/margins": 0.05520808696746826, |
|
"rewards/rejected": -0.12919840216636658, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.253766851704996, |
|
"grad_norm": 80.06682238716625, |
|
"learning_rate": 1.9734380543606927e-07, |
|
"logits/chosen": -1.643662452697754, |
|
"logits/rejected": -1.6430741548538208, |
|
"logps/chosen": -200.37826538085938, |
|
"logps/rejected": -207.41136169433594, |
|
"loss": 0.6815, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.07183612138032913, |
|
"rewards/margins": 0.07066242396831512, |
|
"rewards/rejected": -0.14249853789806366, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.25607382308413235, |
|
"grad_norm": 73.47957521005397, |
|
"learning_rate": 1.972549384632043e-07, |
|
"logits/chosen": -1.5852243900299072, |
|
"logits/rejected": -1.736151099205017, |
|
"logps/chosen": -167.52194213867188, |
|
"logps/rejected": -218.90122985839844, |
|
"loss": 0.6604, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.0951998233795166, |
|
"rewards/margins": 0.027591748163104057, |
|
"rewards/rejected": -0.12279157340526581, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.25838079446326867, |
|
"grad_norm": 79.05067580811021, |
|
"learning_rate": 1.9716462993753655e-07, |
|
"logits/chosen": -1.476207971572876, |
|
"logits/rejected": -1.5456207990646362, |
|
"logps/chosen": -288.57379150390625, |
|
"logps/rejected": -338.8498840332031, |
|
"loss": 0.6567, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.17371979355812073, |
|
"rewards/margins": 0.15009327232837677, |
|
"rewards/rejected": -0.3238130807876587, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.260687765842405, |
|
"grad_norm": 67.9414989656304, |
|
"learning_rate": 1.9707288119765622e-07, |
|
"logits/chosen": -1.5781480073928833, |
|
"logits/rejected": -1.569219708442688, |
|
"logps/chosen": -124.80656433105469, |
|
"logps/rejected": -141.52476501464844, |
|
"loss": 0.6732, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.12547817826271057, |
|
"rewards/margins": 0.03562304750084877, |
|
"rewards/rejected": -0.16110120713710785, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.2629947372215413, |
|
"grad_norm": 78.11670530443735, |
|
"learning_rate": 1.9697969360350095e-07, |
|
"logits/chosen": -1.6346409320831299, |
|
"logits/rejected": -1.565224051475525, |
|
"logps/chosen": -178.9912109375, |
|
"logps/rejected": -190.82681274414062, |
|
"loss": 0.6661, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.10180149972438812, |
|
"rewards/margins": 0.05022910237312317, |
|
"rewards/rejected": -0.1520306020975113, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.2653017086006777, |
|
"grad_norm": 68.3329236115507, |
|
"learning_rate": 1.968850685363357e-07, |
|
"logits/chosen": -1.7000384330749512, |
|
"logits/rejected": -1.7287462949752808, |
|
"logps/chosen": -199.75430297851562, |
|
"logps/rejected": -241.5220947265625, |
|
"loss": 0.6556, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.09640266001224518, |
|
"rewards/margins": 0.09287622570991516, |
|
"rewards/rejected": -0.18927887082099915, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.267608679979814, |
|
"grad_norm": 82.26094651176318, |
|
"learning_rate": 1.9678900739873226e-07, |
|
"logits/chosen": -1.677142858505249, |
|
"logits/rejected": -1.6745737791061401, |
|
"logps/chosen": -170.59425354003906, |
|
"logps/rejected": -181.05661010742188, |
|
"loss": 0.6694, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.1134408637881279, |
|
"rewards/margins": 0.036339692771434784, |
|
"rewards/rejected": -0.14978057146072388, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.26991565135895035, |
|
"grad_norm": 78.36181831181821, |
|
"learning_rate": 1.966915116145484e-07, |
|
"logits/chosen": -1.4915921688079834, |
|
"logits/rejected": -1.523095726966858, |
|
"logps/chosen": -155.88290405273438, |
|
"logps/rejected": -164.45022583007812, |
|
"loss": 0.6567, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.0988837480545044, |
|
"rewards/margins": 0.09765380620956421, |
|
"rewards/rejected": -0.196537584066391, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.2722226227380867, |
|
"grad_norm": 83.64468364737313, |
|
"learning_rate": 1.965925826289068e-07, |
|
"logits/chosen": -1.6482906341552734, |
|
"logits/rejected": -1.6469372510910034, |
|
"logps/chosen": -185.45001220703125, |
|
"logps/rejected": -208.0437469482422, |
|
"loss": 0.6708, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.0873761773109436, |
|
"rewards/margins": 0.06748253107070923, |
|
"rewards/rejected": -0.15485870838165283, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.274529594117223, |
|
"grad_norm": 74.16820675500993, |
|
"learning_rate": 1.964922219081738e-07, |
|
"logits/chosen": -1.764983057975769, |
|
"logits/rejected": -1.7145969867706299, |
|
"logps/chosen": -223.3017578125, |
|
"logps/rejected": -218.1916046142578, |
|
"loss": 0.6555, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.11543229222297668, |
|
"rewards/margins": 0.12305162101984024, |
|
"rewards/rejected": -0.23848390579223633, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.2768365654963593, |
|
"grad_norm": 75.00121800473413, |
|
"learning_rate": 1.9639043093993727e-07, |
|
"logits/chosen": -1.5264173746109009, |
|
"logits/rejected": -1.4717910289764404, |
|
"logps/chosen": -178.43338012695312, |
|
"logps/rejected": -188.60101318359375, |
|
"loss": 0.6481, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.09335748851299286, |
|
"rewards/margins": 0.026005972176790237, |
|
"rewards/rejected": -0.1193634569644928, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.27914353687549565, |
|
"grad_norm": 64.46972140040022, |
|
"learning_rate": 1.9628721123298492e-07, |
|
"logits/chosen": -1.6837042570114136, |
|
"logits/rejected": -1.6980068683624268, |
|
"logps/chosen": -161.4723663330078, |
|
"logps/rejected": -171.20248413085938, |
|
"loss": 0.6609, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.11237211525440216, |
|
"rewards/margins": 0.049555521458387375, |
|
"rewards/rejected": -0.16192764043807983, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.28145050825463197, |
|
"grad_norm": 66.85001786944659, |
|
"learning_rate": 1.961825643172819e-07, |
|
"logits/chosen": -1.5771496295928955, |
|
"logits/rejected": -1.5039366483688354, |
|
"logps/chosen": -158.33685302734375, |
|
"logps/rejected": -160.24057006835938, |
|
"loss": 0.6701, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.14522022008895874, |
|
"rewards/margins": 0.04340605437755585, |
|
"rewards/rejected": -0.1886262595653534, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.2837574796337683, |
|
"grad_norm": 76.39303352760503, |
|
"learning_rate": 1.9607649174394787e-07, |
|
"logits/chosen": -1.4101349115371704, |
|
"logits/rejected": -1.4513871669769287, |
|
"logps/chosen": -147.43826293945312, |
|
"logps/rejected": -182.31005859375, |
|
"loss": 0.6596, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.08130650967359543, |
|
"rewards/margins": 0.0953046903014183, |
|
"rewards/rejected": -0.17661119997501373, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.2860644510129046, |
|
"grad_norm": 84.03579410656208, |
|
"learning_rate": 1.959689950852343e-07, |
|
"logits/chosen": -1.6520403623580933, |
|
"logits/rejected": -1.6739228963851929, |
|
"logps/chosen": -172.19305419921875, |
|
"logps/rejected": -184.803466796875, |
|
"loss": 0.6669, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.16027307510375977, |
|
"rewards/margins": 0.02343956008553505, |
|
"rewards/rejected": -0.1837126612663269, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.28837142239204094, |
|
"grad_norm": 78.31280460476106, |
|
"learning_rate": 1.9586007593450095e-07, |
|
"logits/chosen": -1.568188190460205, |
|
"logits/rejected": -1.586582064628601, |
|
"logps/chosen": -169.95675659179688, |
|
"logps/rejected": -188.78858947753906, |
|
"loss": 0.6779, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.1373595893383026, |
|
"rewards/margins": 0.02867070771753788, |
|
"rewards/rejected": -0.16603030264377594, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.29067839377117727, |
|
"grad_norm": 77.82835801736759, |
|
"learning_rate": 1.957497359061924e-07, |
|
"logits/chosen": -1.5796047449111938, |
|
"logits/rejected": -1.5543608665466309, |
|
"logps/chosen": -191.53219604492188, |
|
"logps/rejected": -220.70361328125, |
|
"loss": 0.6393, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.15552642941474915, |
|
"rewards/margins": 0.09192191064357758, |
|
"rewards/rejected": -0.24744835495948792, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.2929853651503136, |
|
"grad_norm": 81.6835137198976, |
|
"learning_rate": 1.956379766358141e-07, |
|
"logits/chosen": -1.5779876708984375, |
|
"logits/rejected": -1.504298448562622, |
|
"logps/chosen": -218.59942626953125, |
|
"logps/rejected": -230.2102813720703, |
|
"loss": 0.6635, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.14433127641677856, |
|
"rewards/margins": 0.08938172459602356, |
|
"rewards/rejected": -0.23371298611164093, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.2952923365294499, |
|
"grad_norm": 74.03670184892391, |
|
"learning_rate": 1.9552479977990798e-07, |
|
"logits/chosen": -1.6765474081039429, |
|
"logits/rejected": -1.643741488456726, |
|
"logps/chosen": -185.69444274902344, |
|
"logps/rejected": -199.7008819580078, |
|
"loss": 0.676, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -0.14419934153556824, |
|
"rewards/margins": 0.016861233860254288, |
|
"rewards/rejected": -0.16106057167053223, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.29759930790858624, |
|
"grad_norm": 79.12914884219855, |
|
"learning_rate": 1.954102070160281e-07, |
|
"logits/chosen": -1.6632733345031738, |
|
"logits/rejected": -1.6073827743530273, |
|
"logps/chosen": -149.79641723632812, |
|
"logps/rejected": -174.7237091064453, |
|
"loss": 0.6638, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.10197722166776657, |
|
"rewards/margins": 0.07854845374822617, |
|
"rewards/rejected": -0.18052567541599274, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.29990627928772257, |
|
"grad_norm": 80.451175401901, |
|
"learning_rate": 1.9529420004271567e-07, |
|
"logits/chosen": -1.5313125848770142, |
|
"logits/rejected": -1.5560095310211182, |
|
"logps/chosen": -207.1497802734375, |
|
"logps/rejected": -222.1211395263672, |
|
"loss": 0.6407, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.16675114631652832, |
|
"rewards/margins": 0.1034877672791481, |
|
"rewards/rejected": -0.2702389061450958, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.3022132506668589, |
|
"grad_norm": 66.85531080853532, |
|
"learning_rate": 1.9517678057947382e-07, |
|
"logits/chosen": -1.6430004835128784, |
|
"logits/rejected": -1.597357153892517, |
|
"logps/chosen": -135.1138153076172, |
|
"logps/rejected": -132.63619995117188, |
|
"loss": 0.6618, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.11752544343471527, |
|
"rewards/margins": 0.03515633940696716, |
|
"rewards/rejected": -0.15268178284168243, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.3045202220459952, |
|
"grad_norm": 80.22448615221649, |
|
"learning_rate": 1.9505795036674232e-07, |
|
"logits/chosen": -1.6319184303283691, |
|
"logits/rejected": -1.4991899728775024, |
|
"logps/chosen": -217.16680908203125, |
|
"logps/rejected": -245.2107696533203, |
|
"loss": 0.6523, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.18451935052871704, |
|
"rewards/margins": 0.11840308457612991, |
|
"rewards/rejected": -0.30292242765426636, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.3068271934251316, |
|
"grad_norm": 69.95576974969472, |
|
"learning_rate": 1.9493771116587156e-07, |
|
"logits/chosen": -1.5522364377975464, |
|
"logits/rejected": -1.5948469638824463, |
|
"logps/chosen": -113.81831359863281, |
|
"logps/rejected": -155.99346923828125, |
|
"loss": 0.6551, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.08541279286146164, |
|
"rewards/margins": 0.14459526538848877, |
|
"rewards/rejected": -0.230008065700531, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.3091341648042679, |
|
"grad_norm": 75.25744246014891, |
|
"learning_rate": 1.9481606475909656e-07, |
|
"logits/chosen": -1.500025749206543, |
|
"logits/rejected": -1.5494239330291748, |
|
"logps/chosen": -125.84722900390625, |
|
"logps/rejected": -164.76669311523438, |
|
"loss": 0.6526, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -0.08980143815279007, |
|
"rewards/margins": 0.17133310437202454, |
|
"rewards/rejected": -0.2611345648765564, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.31144113618340424, |
|
"grad_norm": 77.53434606640313, |
|
"learning_rate": 1.9469301294951057e-07, |
|
"logits/chosen": -1.6267601251602173, |
|
"logits/rejected": -1.5587116479873657, |
|
"logps/chosen": -172.08139038085938, |
|
"logps/rejected": -181.32717895507812, |
|
"loss": 0.6596, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.1692037582397461, |
|
"rewards/margins": 0.051171936094760895, |
|
"rewards/rejected": -0.2203756868839264, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.31374810756254057, |
|
"grad_norm": 74.84897771580975, |
|
"learning_rate": 1.9456855756103816e-07, |
|
"logits/chosen": -1.5624661445617676, |
|
"logits/rejected": -1.6530312299728394, |
|
"logps/chosen": -147.84597778320312, |
|
"logps/rejected": -174.6589813232422, |
|
"loss": 0.6707, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.13743659853935242, |
|
"rewards/margins": 0.05220307409763336, |
|
"rewards/rejected": -0.18963965773582458, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.3160550789416769, |
|
"grad_norm": 71.99044362933952, |
|
"learning_rate": 1.9444270043840852e-07, |
|
"logits/chosen": -1.6625701189041138, |
|
"logits/rejected": -1.5914949178695679, |
|
"logps/chosen": -147.29147338867188, |
|
"logps/rejected": -129.6570587158203, |
|
"loss": 0.6831, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -0.21893730759620667, |
|
"rewards/margins": -0.0283275805413723, |
|
"rewards/rejected": -0.19060972332954407, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.3183620503208132, |
|
"grad_norm": 75.21160091684173, |
|
"learning_rate": 1.9431544344712772e-07, |
|
"logits/chosen": -1.4378788471221924, |
|
"logits/rejected": -1.3864963054656982, |
|
"logps/chosen": -147.2783660888672, |
|
"logps/rejected": -177.4646759033203, |
|
"loss": 0.6472, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.11688334494829178, |
|
"rewards/margins": 0.11695411056280136, |
|
"rewards/rejected": -0.23383745551109314, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.32066902169994954, |
|
"grad_norm": 72.07898599790276, |
|
"learning_rate": 1.9418678847345146e-07, |
|
"logits/chosen": -1.5210872888565063, |
|
"logits/rejected": -1.5768458843231201, |
|
"logps/chosen": -164.58419799804688, |
|
"logps/rejected": -213.6575469970703, |
|
"loss": 0.6664, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.12316928803920746, |
|
"rewards/margins": 0.07401876151561737, |
|
"rewards/rejected": -0.19718804955482483, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.32297599307908587, |
|
"grad_norm": 67.17404804695744, |
|
"learning_rate": 1.9405673742435676e-07, |
|
"logits/chosen": -1.5087511539459229, |
|
"logits/rejected": -1.5612874031066895, |
|
"logps/chosen": -142.5220947265625, |
|
"logps/rejected": -195.3551483154297, |
|
"loss": 0.6718, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.14393991231918335, |
|
"rewards/margins": 0.11825156211853027, |
|
"rewards/rejected": -0.2621914744377136, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.3252829644582222, |
|
"grad_norm": 81.75237089659649, |
|
"learning_rate": 1.939252922275139e-07, |
|
"logits/chosen": -1.6113684177398682, |
|
"logits/rejected": -1.520400047302246, |
|
"logps/chosen": -215.8910675048828, |
|
"logps/rejected": -227.26637268066406, |
|
"loss": 0.6556, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.2654890716075897, |
|
"rewards/margins": 0.09542025625705719, |
|
"rewards/rejected": -0.3609093129634857, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.3275899358373585, |
|
"grad_norm": 65.02297736065502, |
|
"learning_rate": 1.937924548312578e-07, |
|
"logits/chosen": -1.6812703609466553, |
|
"logits/rejected": -1.7281326055526733, |
|
"logps/chosen": -130.5011749267578, |
|
"logps/rejected": -195.49452209472656, |
|
"loss": 0.6431, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.12183240056037903, |
|
"rewards/margins": 0.12844915688037872, |
|
"rewards/rejected": -0.25028154253959656, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.32989690721649484, |
|
"grad_norm": 75.13263031113792, |
|
"learning_rate": 1.9365822720455912e-07, |
|
"logits/chosen": -1.4847445487976074, |
|
"logits/rejected": -1.4161133766174316, |
|
"logps/chosen": -154.5245361328125, |
|
"logps/rejected": -203.3861541748047, |
|
"loss": 0.6537, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.16228517889976501, |
|
"rewards/margins": 0.12002203613519669, |
|
"rewards/rejected": -0.2823072075843811, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.33220387859563116, |
|
"grad_norm": 78.41024724428831, |
|
"learning_rate": 1.935226113369951e-07, |
|
"logits/chosen": -1.686346173286438, |
|
"logits/rejected": -1.6542606353759766, |
|
"logps/chosen": -172.25059509277344, |
|
"logps/rejected": -199.93182373046875, |
|
"loss": 0.6469, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.12491661310195923, |
|
"rewards/margins": 0.12406705319881439, |
|
"rewards/rejected": -0.24898366630077362, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.3345108499747675, |
|
"grad_norm": 74.32689822052723, |
|
"learning_rate": 1.9338560923872006e-07, |
|
"logits/chosen": -1.5119750499725342, |
|
"logits/rejected": -1.524541974067688, |
|
"logps/chosen": -159.21376037597656, |
|
"logps/rejected": -237.09561157226562, |
|
"loss": 0.6455, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.1832338273525238, |
|
"rewards/margins": 0.2291288673877716, |
|
"rewards/rejected": -0.4123626947402954, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.3368178213539038, |
|
"grad_norm": 77.04623177811982, |
|
"learning_rate": 1.9324722294043556e-07, |
|
"logits/chosen": -1.6212831735610962, |
|
"logits/rejected": -1.5947524309158325, |
|
"logps/chosen": -187.361572265625, |
|
"logps/rejected": -187.34519958496094, |
|
"loss": 0.6585, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.24990221858024597, |
|
"rewards/margins": 0.07834864407777786, |
|
"rewards/rejected": -0.3282508850097656, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.33912479273304014, |
|
"grad_norm": 83.55231847560428, |
|
"learning_rate": 1.9310745449336044e-07, |
|
"logits/chosen": -1.58076012134552, |
|
"logits/rejected": -1.5445674657821655, |
|
"logps/chosen": -192.48617553710938, |
|
"logps/rejected": -215.64193725585938, |
|
"loss": 0.6418, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.1813565194606781, |
|
"rewards/margins": 0.1242499127984047, |
|
"rewards/rejected": -0.3056064546108246, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.34143176411217646, |
|
"grad_norm": 73.20320061572161, |
|
"learning_rate": 1.929663059692002e-07, |
|
"logits/chosen": -1.477115273475647, |
|
"logits/rejected": -1.5140092372894287, |
|
"logps/chosen": -154.4539794921875, |
|
"logps/rejected": -214.9960174560547, |
|
"loss": 0.6894, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.2464270293712616, |
|
"rewards/margins": 0.08608925342559814, |
|
"rewards/rejected": -0.33251628279685974, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.3437387354913128, |
|
"grad_norm": 82.85464536249332, |
|
"learning_rate": 1.928237794601165e-07, |
|
"logits/chosen": -1.5687949657440186, |
|
"logits/rejected": -1.6849851608276367, |
|
"logps/chosen": -140.14784240722656, |
|
"logps/rejected": -234.17706298828125, |
|
"loss": 0.6525, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.1264043152332306, |
|
"rewards/margins": 0.24397864937782288, |
|
"rewards/rejected": -0.3703829348087311, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.34604570687044917, |
|
"grad_norm": 65.65777237412837, |
|
"learning_rate": 1.9267987707869604e-07, |
|
"logits/chosen": -1.4391192197799683, |
|
"logits/rejected": -1.4724018573760986, |
|
"logps/chosen": -153.69284057617188, |
|
"logps/rejected": -173.3372039794922, |
|
"loss": 0.6486, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.16697266697883606, |
|
"rewards/margins": 0.13511566817760468, |
|
"rewards/rejected": -0.30208835005760193, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.34604570687044917, |
|
"eval_logits/chosen": -1.5305781364440918, |
|
"eval_logits/rejected": -1.4347938299179077, |
|
"eval_logps/chosen": -187.96263122558594, |
|
"eval_logps/rejected": -153.34820556640625, |
|
"eval_loss": 0.678679347038269, |
|
"eval_rewards/accuracies": 0.6399999856948853, |
|
"eval_rewards/chosen": -0.278046578168869, |
|
"eval_rewards/margins": 0.005234198644757271, |
|
"eval_rewards/rejected": -0.28328076004981995, |
|
"eval_runtime": 21.7114, |
|
"eval_samples_per_second": 4.606, |
|
"eval_steps_per_second": 1.151, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.3483526782495855, |
|
"grad_norm": 69.96196416042814, |
|
"learning_rate": 1.9253460095791922e-07, |
|
"logits/chosen": -1.5020473003387451, |
|
"logits/rejected": -1.4953689575195312, |
|
"logps/chosen": -106.53646087646484, |
|
"logps/rejected": -165.1669158935547, |
|
"loss": 0.6546, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.15904603898525238, |
|
"rewards/margins": 0.06554871797561646, |
|
"rewards/rejected": -0.22459478676319122, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.3506596496287218, |
|
"grad_norm": 74.69729400373957, |
|
"learning_rate": 1.9238795325112868e-07, |
|
"logits/chosen": -1.636529803276062, |
|
"logits/rejected": -1.6348826885223389, |
|
"logps/chosen": -140.86441040039062, |
|
"logps/rejected": -174.48370361328125, |
|
"loss": 0.6433, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -0.12615619599819183, |
|
"rewards/margins": 0.20733490586280823, |
|
"rewards/rejected": -0.3334910571575165, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.35296662100785814, |
|
"grad_norm": 84.17293540044481, |
|
"learning_rate": 1.9223993613199713e-07, |
|
"logits/chosen": -1.6913816928863525, |
|
"logits/rejected": -1.6646835803985596, |
|
"logps/chosen": -152.25997924804688, |
|
"logps/rejected": -171.05575561523438, |
|
"loss": 0.6514, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.11823489516973495, |
|
"rewards/margins": 0.18948128819465637, |
|
"rewards/rejected": -0.3077161908149719, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.35527359238699446, |
|
"grad_norm": 83.6870493511653, |
|
"learning_rate": 1.9209055179449537e-07, |
|
"logits/chosen": -1.517793893814087, |
|
"logits/rejected": -1.6404225826263428, |
|
"logps/chosen": -91.36832427978516, |
|
"logps/rejected": -134.06529235839844, |
|
"loss": 0.6551, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.10601670295000076, |
|
"rewards/margins": 0.14076808094978333, |
|
"rewards/rejected": -0.24678479135036469, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.3575805637661308, |
|
"grad_norm": 64.57674968550867, |
|
"learning_rate": 1.9193980245285966e-07, |
|
"logits/chosen": -1.4689788818359375, |
|
"logits/rejected": -1.3954423666000366, |
|
"logps/chosen": -143.7101287841797, |
|
"logps/rejected": -169.8336181640625, |
|
"loss": 0.6402, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.16834121942520142, |
|
"rewards/margins": 0.08874449878931046, |
|
"rewards/rejected": -0.25708574056625366, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.3598875351452671, |
|
"grad_norm": 81.4185321584637, |
|
"learning_rate": 1.9178769034155887e-07, |
|
"logits/chosen": -1.6560229063034058, |
|
"logits/rejected": -1.7177590131759644, |
|
"logps/chosen": -144.23033142089844, |
|
"logps/rejected": -166.01162719726562, |
|
"loss": 0.6303, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.19495287537574768, |
|
"rewards/margins": 0.08614547550678253, |
|
"rewards/rejected": -0.281098335981369, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.36219450652440344, |
|
"grad_norm": 70.47869326950462, |
|
"learning_rate": 1.9163421771526151e-07, |
|
"logits/chosen": -1.5131672620773315, |
|
"logits/rejected": -1.548357367515564, |
|
"logps/chosen": -146.3427734375, |
|
"logps/rejected": -159.85092163085938, |
|
"loss": 0.6536, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.1731819212436676, |
|
"rewards/margins": 0.1254611313343048, |
|
"rewards/rejected": -0.29864302277565, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.36450147790353976, |
|
"grad_norm": 79.69549984021036, |
|
"learning_rate": 1.914793868488021e-07, |
|
"logits/chosen": -1.512197732925415, |
|
"logits/rejected": -1.4396047592163086, |
|
"logps/chosen": -97.64339447021484, |
|
"logps/rejected": -117.3057632446289, |
|
"loss": 0.6579, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.1673259437084198, |
|
"rewards/margins": 0.045555103570222855, |
|
"rewards/rejected": -0.21288102865219116, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.3668084492826761, |
|
"grad_norm": 82.99383875929993, |
|
"learning_rate": 1.9132320003714754e-07, |
|
"logits/chosen": -1.5376619100570679, |
|
"logits/rejected": -1.5551142692565918, |
|
"logps/chosen": -207.0707244873047, |
|
"logps/rejected": -242.56712341308594, |
|
"loss": 0.6439, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.24572816491127014, |
|
"rewards/margins": 0.16944444179534912, |
|
"rewards/rejected": -0.41517263650894165, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.3691154206618124, |
|
"grad_norm": 78.2099765504223, |
|
"learning_rate": 1.9116565959536327e-07, |
|
"logits/chosen": -1.4779236316680908, |
|
"logits/rejected": -1.4861027002334595, |
|
"logps/chosen": -193.60748291015625, |
|
"logps/rejected": -232.04690551757812, |
|
"loss": 0.6534, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.16232052445411682, |
|
"rewards/margins": 0.13388732075691223, |
|
"rewards/rejected": -0.29620781540870667, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.37142239204094873, |
|
"grad_norm": 74.80406821040707, |
|
"learning_rate": 1.9100676785857857e-07, |
|
"logits/chosen": -1.6256941556930542, |
|
"logits/rejected": -1.5659886598587036, |
|
"logps/chosen": -170.6388702392578, |
|
"logps/rejected": -198.07733154296875, |
|
"loss": 0.6395, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.17732584476470947, |
|
"rewards/margins": 0.1462487280368805, |
|
"rewards/rejected": -0.32357457280158997, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.37372936342008506, |
|
"grad_norm": 81.93843569632895, |
|
"learning_rate": 1.9084652718195236e-07, |
|
"logits/chosen": -1.5257925987243652, |
|
"logits/rejected": -1.4617056846618652, |
|
"logps/chosen": -208.795166015625, |
|
"logps/rejected": -243.7969970703125, |
|
"loss": 0.6648, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.2373400181531906, |
|
"rewards/margins": 0.16046729683876038, |
|
"rewards/rejected": -0.3978073298931122, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.3760363347992214, |
|
"grad_norm": 68.63199696676665, |
|
"learning_rate": 1.9068493994063798e-07, |
|
"logits/chosen": -1.4899076223373413, |
|
"logits/rejected": -1.5616645812988281, |
|
"logps/chosen": -133.66110229492188, |
|
"logps/rejected": -236.15924072265625, |
|
"loss": 0.6245, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.15444569289684296, |
|
"rewards/margins": 0.2277567982673645, |
|
"rewards/rejected": -0.38220247626304626, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.3783433061783577, |
|
"grad_norm": 77.96696778978115, |
|
"learning_rate": 1.905220085297482e-07, |
|
"logits/chosen": -1.5441091060638428, |
|
"logits/rejected": -1.6405153274536133, |
|
"logps/chosen": -204.56991577148438, |
|
"logps/rejected": -610.9658203125, |
|
"loss": 0.6369, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.25125816464424133, |
|
"rewards/margins": 0.27758753299713135, |
|
"rewards/rejected": -0.5288456678390503, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.38065027755749403, |
|
"grad_norm": 70.94819657566394, |
|
"learning_rate": 1.9035773536431955e-07, |
|
"logits/chosen": -1.5916917324066162, |
|
"logits/rejected": -1.529220461845398, |
|
"logps/chosen": -137.5714111328125, |
|
"logps/rejected": -160.11544799804688, |
|
"loss": 0.628, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.20854628086090088, |
|
"rewards/margins": 0.11146115511655807, |
|
"rewards/rejected": -0.32000741362571716, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.38295724893663036, |
|
"grad_norm": 74.31467840644032, |
|
"learning_rate": 1.901921228792766e-07, |
|
"logits/chosen": -1.5668599605560303, |
|
"logits/rejected": -1.6017038822174072, |
|
"logps/chosen": -253.0677947998047, |
|
"logps/rejected": -266.9024658203125, |
|
"loss": 0.6419, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.2701232433319092, |
|
"rewards/margins": 0.1171327605843544, |
|
"rewards/rejected": -0.387255996465683, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.3852642203157667, |
|
"grad_norm": 80.19418315617096, |
|
"learning_rate": 1.9002517352939596e-07, |
|
"logits/chosen": -1.538657784461975, |
|
"logits/rejected": -1.4902359247207642, |
|
"logps/chosen": -151.844482421875, |
|
"logps/rejected": -182.43423461914062, |
|
"loss": 0.6542, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.20499791204929352, |
|
"rewards/margins": 0.14708584547042847, |
|
"rewards/rejected": -0.3520837724208832, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.38757119169490306, |
|
"grad_norm": 78.45881437768317, |
|
"learning_rate": 1.898568897892697e-07, |
|
"logits/chosen": -1.502273440361023, |
|
"logits/rejected": -1.567176342010498, |
|
"logps/chosen": -149.17568969726562, |
|
"logps/rejected": -218.93869018554688, |
|
"loss": 0.6324, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.21270516514778137, |
|
"rewards/margins": 0.24096481502056122, |
|
"rewards/rejected": -0.4536699950695038, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.3898781630740394, |
|
"grad_norm": 69.72871536048268, |
|
"learning_rate": 1.8968727415326882e-07, |
|
"logits/chosen": -1.595134973526001, |
|
"logits/rejected": -1.6751508712768555, |
|
"logps/chosen": -112.13485717773438, |
|
"logps/rejected": -138.27838134765625, |
|
"loss": 0.6302, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.11406655609607697, |
|
"rewards/margins": 0.13377144932746887, |
|
"rewards/rejected": -0.24783800542354584, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.3921851344531757, |
|
"grad_norm": 66.47735099680594, |
|
"learning_rate": 1.8951632913550623e-07, |
|
"logits/chosen": -1.6112767457962036, |
|
"logits/rejected": -1.5350615978240967, |
|
"logps/chosen": -212.4505615234375, |
|
"logps/rejected": -239.0753173828125, |
|
"loss": 0.621, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.12918683886528015, |
|
"rewards/margins": 0.254965603351593, |
|
"rewards/rejected": -0.3841524124145508, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.39449210583231203, |
|
"grad_norm": 81.17863346925296, |
|
"learning_rate": 1.8934405726979945e-07, |
|
"logits/chosen": -1.4070253372192383, |
|
"logits/rejected": -1.4879088401794434, |
|
"logps/chosen": -166.3784942626953, |
|
"logps/rejected": -204.57489013671875, |
|
"loss": 0.6395, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.31329959630966187, |
|
"rewards/margins": 0.13568538427352905, |
|
"rewards/rejected": -0.4489849805831909, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.39679907721144836, |
|
"grad_norm": 72.25844304700202, |
|
"learning_rate": 1.8917046110963314e-07, |
|
"logits/chosen": -1.6808464527130127, |
|
"logits/rejected": -1.6618741750717163, |
|
"logps/chosen": -184.7408905029297, |
|
"logps/rejected": -213.8212127685547, |
|
"loss": 0.6414, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.1948232203722, |
|
"rewards/margins": 0.18943095207214355, |
|
"rewards/rejected": -0.3842541575431824, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.3991060485905847, |
|
"grad_norm": 69.12287284056892, |
|
"learning_rate": 1.8899554322812116e-07, |
|
"logits/chosen": -1.677032470703125, |
|
"logits/rejected": -1.6319351196289062, |
|
"logps/chosen": -114.67143249511719, |
|
"logps/rejected": -125.2265625, |
|
"loss": 0.6256, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.18165619671344757, |
|
"rewards/margins": 0.17791113257408142, |
|
"rewards/rejected": -0.3595673143863678, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.401413019969721, |
|
"grad_norm": 68.82861341006546, |
|
"learning_rate": 1.8881930621796846e-07, |
|
"logits/chosen": -1.531043291091919, |
|
"logits/rejected": -1.4552069902420044, |
|
"logps/chosen": -172.90670776367188, |
|
"logps/rejected": -228.29833984375, |
|
"loss": 0.6321, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.21518906950950623, |
|
"rewards/margins": 0.16281384229660034, |
|
"rewards/rejected": -0.37800291180610657, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.40371999134885733, |
|
"grad_norm": 79.01675049183694, |
|
"learning_rate": 1.8864175269143273e-07, |
|
"logits/chosen": -1.628811001777649, |
|
"logits/rejected": -1.5073944330215454, |
|
"logps/chosen": -162.4159393310547, |
|
"logps/rejected": -173.65521240234375, |
|
"loss": 0.6361, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.17217856645584106, |
|
"rewards/margins": 0.20255069434642792, |
|
"rewards/rejected": -0.3747292459011078, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.40602696272799366, |
|
"grad_norm": 80.14358020089544, |
|
"learning_rate": 1.8846288528028552e-07, |
|
"logits/chosen": -1.2868863344192505, |
|
"logits/rejected": -1.4563894271850586, |
|
"logps/chosen": -176.4993438720703, |
|
"logps/rejected": -219.99745178222656, |
|
"loss": 0.6388, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.34355729818344116, |
|
"rewards/margins": 0.19085751473903656, |
|
"rewards/rejected": -0.5344148278236389, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.40833393410713, |
|
"grad_norm": 72.34750725400806, |
|
"learning_rate": 1.8828270663577336e-07, |
|
"logits/chosen": -1.5702780485153198, |
|
"logits/rejected": -1.6198755502700806, |
|
"logps/chosen": -135.76097106933594, |
|
"logps/rejected": -133.5688018798828, |
|
"loss": 0.6593, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.28700345754623413, |
|
"rewards/margins": 0.014538988471031189, |
|
"rewards/rejected": -0.3015424311161041, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.4106409054862663, |
|
"grad_norm": 71.70524840332104, |
|
"learning_rate": 1.8810121942857845e-07, |
|
"logits/chosen": -1.5310659408569336, |
|
"logits/rejected": -1.547040343284607, |
|
"logps/chosen": -137.63137817382812, |
|
"logps/rejected": -175.15028381347656, |
|
"loss": 0.6293, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.1476406753063202, |
|
"rewards/margins": 0.20084424316883087, |
|
"rewards/rejected": -0.34848493337631226, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.41294787686540263, |
|
"grad_norm": 77.60677795627835, |
|
"learning_rate": 1.8791842634877896e-07, |
|
"logits/chosen": -1.546626091003418, |
|
"logits/rejected": -1.6076010465621948, |
|
"logps/chosen": -136.61058044433594, |
|
"logps/rejected": -187.11056518554688, |
|
"loss": 0.6506, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.2092825025320053, |
|
"rewards/margins": 0.11802927404642105, |
|
"rewards/rejected": -0.32731181383132935, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.41525484824453895, |
|
"grad_norm": 76.22986147865214, |
|
"learning_rate": 1.8773433010580933e-07, |
|
"logits/chosen": -1.5016052722930908, |
|
"logits/rejected": -1.6018908023834229, |
|
"logps/chosen": -129.33348083496094, |
|
"logps/rejected": -151.12342834472656, |
|
"loss": 0.627, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.1853492707014084, |
|
"rewards/margins": 0.10909079760313034, |
|
"rewards/rejected": -0.2944400906562805, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.4175618196236753, |
|
"grad_norm": 71.86807271397895, |
|
"learning_rate": 1.8754893342842e-07, |
|
"logits/chosen": -1.5751183032989502, |
|
"logits/rejected": -1.4908232688903809, |
|
"logps/chosen": -187.5486602783203, |
|
"logps/rejected": -194.04296875, |
|
"loss": 0.6223, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.27427998185157776, |
|
"rewards/margins": 0.1835474967956543, |
|
"rewards/rejected": -0.45782750844955444, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.4198687910028116, |
|
"grad_norm": 70.36519300815779, |
|
"learning_rate": 1.8736223906463695e-07, |
|
"logits/chosen": -1.6419646739959717, |
|
"logits/rejected": -1.6212923526763916, |
|
"logps/chosen": -165.32421875, |
|
"logps/rejected": -171.27830505371094, |
|
"loss": 0.6154, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.21126417815685272, |
|
"rewards/margins": 0.188466876745224, |
|
"rewards/rejected": -0.3997310400009155, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.4221757623819479, |
|
"grad_norm": 70.09468918933095, |
|
"learning_rate": 1.8717424978172102e-07, |
|
"logits/chosen": -1.3921918869018555, |
|
"logits/rejected": -1.469792127609253, |
|
"logps/chosen": -167.81964111328125, |
|
"logps/rejected": -210.77825927734375, |
|
"loss": 0.6308, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.2520577609539032, |
|
"rewards/margins": 0.21120049059391022, |
|
"rewards/rejected": -0.463258296251297, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.42448273376108425, |
|
"grad_norm": 83.57733506311956, |
|
"learning_rate": 1.8698496836612691e-07, |
|
"logits/chosen": -1.494173288345337, |
|
"logits/rejected": -1.5522290468215942, |
|
"logps/chosen": -163.31491088867188, |
|
"logps/rejected": -189.11239624023438, |
|
"loss": 0.6605, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.2657621204853058, |
|
"rewards/margins": 0.16207075119018555, |
|
"rewards/rejected": -0.42783284187316895, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.4267897051402206, |
|
"grad_norm": 81.29498139829452, |
|
"learning_rate": 1.8679439762346184e-07, |
|
"logits/chosen": -1.5649724006652832, |
|
"logits/rejected": -1.6319153308868408, |
|
"logps/chosen": -208.2643585205078, |
|
"logps/rejected": -215.9363555908203, |
|
"loss": 0.6724, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.27036455273628235, |
|
"rewards/margins": 0.1651400327682495, |
|
"rewards/rejected": -0.43550461530685425, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.42909667651935696, |
|
"grad_norm": 76.18451864107462, |
|
"learning_rate": 1.8660254037844388e-07, |
|
"logits/chosen": -1.4427084922790527, |
|
"logits/rejected": -1.5188959836959839, |
|
"logps/chosen": -171.85968017578125, |
|
"logps/rejected": -233.1151580810547, |
|
"loss": 0.629, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.27071186900138855, |
|
"rewards/margins": 0.2559873163700104, |
|
"rewards/rejected": -0.5266991853713989, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.4314036478984933, |
|
"grad_norm": 82.63010621157098, |
|
"learning_rate": 1.8640939947486023e-07, |
|
"logits/chosen": -1.5887802839279175, |
|
"logits/rejected": -1.355837106704712, |
|
"logps/chosen": -242.5066375732422, |
|
"logps/rejected": -230.2034912109375, |
|
"loss": 0.6329, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.3870730698108673, |
|
"rewards/margins": 0.15506887435913086, |
|
"rewards/rejected": -0.5421419143676758, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.4337106192776296, |
|
"grad_norm": 59.14499379914714, |
|
"learning_rate": 1.8621497777552505e-07, |
|
"logits/chosen": -1.420657992362976, |
|
"logits/rejected": -1.4776450395584106, |
|
"logps/chosen": -127.46673583984375, |
|
"logps/rejected": -184.2600860595703, |
|
"loss": 0.5869, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.15772147476673126, |
|
"rewards/margins": 0.3883221745491028, |
|
"rewards/rejected": -0.5460436344146729, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.43601759065676593, |
|
"grad_norm": 76.51933767322383, |
|
"learning_rate": 1.8601927816223695e-07, |
|
"logits/chosen": -1.3575465679168701, |
|
"logits/rejected": -1.3156774044036865, |
|
"logps/chosen": -218.0836944580078, |
|
"logps/rejected": -228.03778076171875, |
|
"loss": 0.6557, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.4051874279975891, |
|
"rewards/margins": 0.143568217754364, |
|
"rewards/rejected": -0.5487555861473083, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.43832456203590225, |
|
"grad_norm": 61.424133205634206, |
|
"learning_rate": 1.8582230353573624e-07, |
|
"logits/chosen": -1.4618622064590454, |
|
"logits/rejected": -1.4945478439331055, |
|
"logps/chosen": -95.66145324707031, |
|
"logps/rejected": -135.7235870361328, |
|
"loss": 0.6206, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.1784054934978485, |
|
"rewards/margins": 0.23733605444431305, |
|
"rewards/rejected": -0.415741503238678, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.4406315334150386, |
|
"grad_norm": 64.92661329207279, |
|
"learning_rate": 1.8562405681566214e-07, |
|
"logits/chosen": -1.5636019706726074, |
|
"logits/rejected": -1.5756021738052368, |
|
"logps/chosen": -201.42442321777344, |
|
"logps/rejected": -188.35606384277344, |
|
"loss": 0.6289, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.3109050691127777, |
|
"rewards/margins": 0.10487519204616547, |
|
"rewards/rejected": -0.415780246257782, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.4429385047941749, |
|
"grad_norm": 83.39366061705226, |
|
"learning_rate": 1.854245409405092e-07, |
|
"logits/chosen": -1.6649830341339111, |
|
"logits/rejected": -1.5097665786743164, |
|
"logps/chosen": -217.35536193847656, |
|
"logps/rejected": -223.5187225341797, |
|
"loss": 0.6113, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.2543387711048126, |
|
"rewards/margins": 0.2463696151971817, |
|
"rewards/rejected": -0.5007083415985107, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.4452454761733112, |
|
"grad_norm": 74.49558456416251, |
|
"learning_rate": 1.852237588675841e-07, |
|
"logits/chosen": -1.582183599472046, |
|
"logits/rejected": -1.7068113088607788, |
|
"logps/chosen": -162.75521850585938, |
|
"logps/rejected": -220.6885986328125, |
|
"loss": 0.5992, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.21387754380702972, |
|
"rewards/margins": 0.31847310066223145, |
|
"rewards/rejected": -0.5323505997657776, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.44755244755244755, |
|
"grad_norm": 72.0795411450381, |
|
"learning_rate": 1.850217135729614e-07, |
|
"logits/chosen": -1.605985164642334, |
|
"logits/rejected": -1.5858122110366821, |
|
"logps/chosen": -196.78073120117188, |
|
"logps/rejected": -213.26580810546875, |
|
"loss": 0.6034, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.44325143098831177, |
|
"rewards/margins": 0.07666480541229248, |
|
"rewards/rejected": -0.5199161767959595, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.4498594189315839, |
|
"grad_norm": 72.48651390442274, |
|
"learning_rate": 1.8481840805143987e-07, |
|
"logits/chosen": -1.5632058382034302, |
|
"logits/rejected": -1.5244344472885132, |
|
"logps/chosen": -127.80747985839844, |
|
"logps/rejected": -152.81256103515625, |
|
"loss": 0.6163, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.1298586130142212, |
|
"rewards/margins": 0.42240971326828003, |
|
"rewards/rejected": -0.5522683262825012, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.4521663903107202, |
|
"grad_norm": 74.34299341635638, |
|
"learning_rate": 1.8461384531649773e-07, |
|
"logits/chosen": -1.4820444583892822, |
|
"logits/rejected": -1.605046033859253, |
|
"logps/chosen": -105.68638610839844, |
|
"logps/rejected": -156.26785278320312, |
|
"loss": 0.6202, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.1893598437309265, |
|
"rewards/margins": 0.2589360773563385, |
|
"rewards/rejected": -0.4482958912849426, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.4544733616898565, |
|
"grad_norm": 76.36773452235572, |
|
"learning_rate": 1.844080284002482e-07, |
|
"logits/chosen": -1.5065568685531616, |
|
"logits/rejected": -1.5656404495239258, |
|
"logps/chosen": -158.7242889404297, |
|
"logps/rejected": -228.84844970703125, |
|
"loss": 0.6139, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.251006543636322, |
|
"rewards/margins": 0.21102304756641388, |
|
"rewards/rejected": -0.46202951669692993, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.45678033306899285, |
|
"grad_norm": 71.03674812873284, |
|
"learning_rate": 1.8420096035339452e-07, |
|
"logits/chosen": -1.5289005041122437, |
|
"logits/rejected": -1.527197003364563, |
|
"logps/chosen": -200.40029907226562, |
|
"logps/rejected": -212.3697967529297, |
|
"loss": 0.6187, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.2883009612560272, |
|
"rewards/margins": 0.30317747592926025, |
|
"rewards/rejected": -0.5914784073829651, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.4590873044481292, |
|
"grad_norm": 81.19707296013529, |
|
"learning_rate": 1.8399264424518465e-07, |
|
"logits/chosen": -1.494114875793457, |
|
"logits/rejected": -1.4553757905960083, |
|
"logps/chosen": -173.10043334960938, |
|
"logps/rejected": -222.2396240234375, |
|
"loss": 0.5955, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.3878926932811737, |
|
"rewards/margins": 0.3027462959289551, |
|
"rewards/rejected": -0.6906389594078064, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.4613942758272655, |
|
"grad_norm": 89.13135103863338, |
|
"learning_rate": 1.8378308316336582e-07, |
|
"logits/chosen": -1.618680715560913, |
|
"logits/rejected": -1.5578938722610474, |
|
"logps/chosen": -191.10128784179688, |
|
"logps/rejected": -280.5110778808594, |
|
"loss": 0.6411, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.4683380126953125, |
|
"rewards/margins": 0.19769813120365143, |
|
"rewards/rejected": -0.6660361289978027, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.4613942758272655, |
|
"eval_logits/chosen": -1.4853571653366089, |
|
"eval_logits/rejected": -1.3932629823684692, |
|
"eval_logps/chosen": -189.0384521484375, |
|
"eval_logps/rejected": -156.24160766601562, |
|
"eval_loss": 0.654194176197052, |
|
"eval_rewards/accuracies": 0.6800000071525574, |
|
"eval_rewards/chosen": -0.38562828302383423, |
|
"eval_rewards/margins": 0.18699264526367188, |
|
"eval_rewards/rejected": -0.5726209282875061, |
|
"eval_runtime": 26.5299, |
|
"eval_samples_per_second": 3.769, |
|
"eval_steps_per_second": 0.942, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.4637012472064018, |
|
"grad_norm": 69.21606890792003, |
|
"learning_rate": 1.8357228021413883e-07, |
|
"logits/chosen": -1.5431230068206787, |
|
"logits/rejected": -1.7365866899490356, |
|
"logps/chosen": -147.3966827392578, |
|
"logps/rejected": -170.9712371826172, |
|
"loss": 0.6581, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.30663323402404785, |
|
"rewards/margins": 0.11269617080688477, |
|
"rewards/rejected": -0.4193294048309326, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.46600821858553815, |
|
"grad_norm": 78.7990153253576, |
|
"learning_rate": 1.8336023852211194e-07, |
|
"logits/chosen": -1.5721492767333984, |
|
"logits/rejected": -1.4822769165039062, |
|
"logps/chosen": -148.9419403076172, |
|
"logps/rejected": -158.44668579101562, |
|
"loss": 0.609, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.27455994486808777, |
|
"rewards/margins": 0.3990754187107086, |
|
"rewards/rejected": -0.6736353039741516, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.4683151899646745, |
|
"grad_norm": 67.81492283153628, |
|
"learning_rate": 1.8314696123025453e-07, |
|
"logits/chosen": -1.6370363235473633, |
|
"logits/rejected": -1.5174671411514282, |
|
"logps/chosen": -145.17050170898438, |
|
"logps/rejected": -142.74551391601562, |
|
"loss": 0.6312, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.28109437227249146, |
|
"rewards/margins": 0.2069387137889862, |
|
"rewards/rejected": -0.48803308606147766, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.47062216134381085, |
|
"grad_norm": 78.2843593072173, |
|
"learning_rate": 1.8293245149985053e-07, |
|
"logits/chosen": -1.5488444566726685, |
|
"logits/rejected": -1.4798938035964966, |
|
"logps/chosen": -161.83570861816406, |
|
"logps/rejected": -162.7615509033203, |
|
"loss": 0.6484, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.2718978822231293, |
|
"rewards/margins": 0.15639187395572662, |
|
"rewards/rejected": -0.4282897710800171, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.4729291327229472, |
|
"grad_norm": 73.10449012391845, |
|
"learning_rate": 1.827167125104517e-07, |
|
"logits/chosen": -1.4978845119476318, |
|
"logits/rejected": -1.4839560985565186, |
|
"logps/chosen": -148.445556640625, |
|
"logps/rejected": -161.85986328125, |
|
"loss": 0.6481, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.27761712670326233, |
|
"rewards/margins": 0.09577606618404388, |
|
"rewards/rejected": -0.3733932077884674, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.4752361041020835, |
|
"grad_norm": 77.23312704566136, |
|
"learning_rate": 1.8249974745983021e-07, |
|
"logits/chosen": -1.4896149635314941, |
|
"logits/rejected": -1.4279950857162476, |
|
"logps/chosen": -136.3888397216797, |
|
"logps/rejected": -184.14625549316406, |
|
"loss": 0.6186, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.3546374440193176, |
|
"rewards/margins": 0.3140718638896942, |
|
"rewards/rejected": -0.6687093377113342, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.4775430754812198, |
|
"grad_norm": 65.58481770102698, |
|
"learning_rate": 1.822815595639316e-07, |
|
"logits/chosen": -1.4790016412734985, |
|
"logits/rejected": -1.525940179824829, |
|
"logps/chosen": -162.99288940429688, |
|
"logps/rejected": -190.2974853515625, |
|
"loss": 0.6112, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.36069726943969727, |
|
"rewards/margins": 0.20576652884483337, |
|
"rewards/rejected": -0.5664637684822083, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.47985004686035615, |
|
"grad_norm": 68.7972400850831, |
|
"learning_rate": 1.820621520568268e-07, |
|
"logits/chosen": -1.5574984550476074, |
|
"logits/rejected": -1.4820420742034912, |
|
"logps/chosen": -178.15878295898438, |
|
"logps/rejected": -191.66177368164062, |
|
"loss": 0.6, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.314214825630188, |
|
"rewards/margins": 0.32970941066741943, |
|
"rewards/rejected": -0.6439242362976074, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.4821570182394925, |
|
"grad_norm": 77.22458475405976, |
|
"learning_rate": 1.8184152819066434e-07, |
|
"logits/chosen": -1.5454033613204956, |
|
"logits/rejected": -1.5681257247924805, |
|
"logps/chosen": -206.4539031982422, |
|
"logps/rejected": -221.17599487304688, |
|
"loss": 0.6395, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.4876091778278351, |
|
"rewards/margins": 0.06031504273414612, |
|
"rewards/rejected": -0.5479242205619812, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.4844639896186288, |
|
"grad_norm": 69.59230881656185, |
|
"learning_rate": 1.8161969123562217e-07, |
|
"logits/chosen": -1.54752516746521, |
|
"logits/rejected": -1.5821384191513062, |
|
"logps/chosen": -182.0235137939453, |
|
"logps/rejected": -163.29364013671875, |
|
"loss": 0.6107, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.3480142056941986, |
|
"rewards/margins": 0.3120378255844116, |
|
"rewards/rejected": -0.6600520610809326, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.4867709609977651, |
|
"grad_norm": 68.29468448816121, |
|
"learning_rate": 1.813966444798591e-07, |
|
"logits/chosen": -1.513810634613037, |
|
"logits/rejected": -1.4666978120803833, |
|
"logps/chosen": -204.99462890625, |
|
"logps/rejected": -204.5595245361328, |
|
"loss": 0.6143, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.3375055491924286, |
|
"rewards/margins": 0.3794183135032654, |
|
"rewards/rejected": -0.7169238328933716, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.48907793237690145, |
|
"grad_norm": 73.69015362328696, |
|
"learning_rate": 1.8117239122946611e-07, |
|
"logits/chosen": -1.3477180004119873, |
|
"logits/rejected": -1.4509586095809937, |
|
"logps/chosen": -118.67777252197266, |
|
"logps/rejected": -176.48667907714844, |
|
"loss": 0.6192, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.3034321069717407, |
|
"rewards/margins": 0.12479298561811447, |
|
"rewards/rejected": -0.4282251298427582, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.49138490375603777, |
|
"grad_norm": 78.31541581493791, |
|
"learning_rate": 1.809469348084174e-07, |
|
"logits/chosen": -1.459653377532959, |
|
"logits/rejected": -1.5776402950286865, |
|
"logps/chosen": -159.45347595214844, |
|
"logps/rejected": -189.2720489501953, |
|
"loss": 0.6554, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.37468722462654114, |
|
"rewards/margins": 0.1383470892906189, |
|
"rewards/rejected": -0.5130342841148376, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.4936918751351741, |
|
"grad_norm": 130.5379676824635, |
|
"learning_rate": 1.8072027855852095e-07, |
|
"logits/chosen": -1.4528967142105103, |
|
"logits/rejected": -1.423844814300537, |
|
"logps/chosen": -172.85316467285156, |
|
"logps/rejected": -215.22189331054688, |
|
"loss": 0.6639, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.41784724593162537, |
|
"rewards/margins": 0.3192124366760254, |
|
"rewards/rejected": -0.7370596528053284, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.4959988465143104, |
|
"grad_norm": 63.21984381769687, |
|
"learning_rate": 1.8049242583936918e-07, |
|
"logits/chosen": -1.5084190368652344, |
|
"logits/rejected": -1.4574109315872192, |
|
"logps/chosen": -165.896484375, |
|
"logps/rejected": -227.423828125, |
|
"loss": 0.5893, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.25652381777763367, |
|
"rewards/margins": 0.47441697120666504, |
|
"rewards/rejected": -0.7309407591819763, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.49830581789344675, |
|
"grad_norm": 71.69590925642426, |
|
"learning_rate": 1.802633800282891e-07, |
|
"logits/chosen": -1.516315221786499, |
|
"logits/rejected": -1.6526371240615845, |
|
"logps/chosen": -229.77777099609375, |
|
"logps/rejected": -292.7660827636719, |
|
"loss": 0.5979, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.3780279755592346, |
|
"rewards/margins": 0.49888893961906433, |
|
"rewards/rejected": -0.8769169449806213, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.5006127892725831, |
|
"grad_norm": 72.54608833334152, |
|
"learning_rate": 1.8003314452029213e-07, |
|
"logits/chosen": -1.5792149305343628, |
|
"logits/rejected": -1.550574779510498, |
|
"logps/chosen": -226.616455078125, |
|
"logps/rejected": -228.4210205078125, |
|
"loss": 0.6046, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.5066580176353455, |
|
"rewards/margins": 0.34013134241104126, |
|
"rewards/rejected": -0.8467893600463867, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.5029197606517194, |
|
"grad_norm": 73.04169645370872, |
|
"learning_rate": 1.7980172272802395e-07, |
|
"logits/chosen": -1.5109785795211792, |
|
"logits/rejected": -1.499125361442566, |
|
"logps/chosen": -154.92233276367188, |
|
"logps/rejected": -175.07643127441406, |
|
"loss": 0.5817, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.25602594017982483, |
|
"rewards/margins": 0.5013114809989929, |
|
"rewards/rejected": -0.7573373913764954, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.5052267320308558, |
|
"grad_norm": 69.05059334922119, |
|
"learning_rate": 1.7956911808171373e-07, |
|
"logits/chosen": -1.561600923538208, |
|
"logits/rejected": -1.5301151275634766, |
|
"logps/chosen": -217.26930236816406, |
|
"logps/rejected": -240.7093048095703, |
|
"loss": 0.6151, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.46973368525505066, |
|
"rewards/margins": 0.2093038558959961, |
|
"rewards/rejected": -0.6790375113487244, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.507533703409992, |
|
"grad_norm": 74.68873536524164, |
|
"learning_rate": 1.793353340291235e-07, |
|
"logits/chosen": -1.3198765516281128, |
|
"logits/rejected": -1.4805912971496582, |
|
"logps/chosen": -175.9479217529297, |
|
"logps/rejected": -226.83265686035156, |
|
"loss": 0.6134, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.5549490451812744, |
|
"rewards/margins": 0.23202911019325256, |
|
"rewards/rejected": -0.7869781851768494, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.5098406747891284, |
|
"grad_norm": 73.37532376774183, |
|
"learning_rate": 1.7910037403549692e-07, |
|
"logits/chosen": -1.4717934131622314, |
|
"logits/rejected": -1.5461549758911133, |
|
"logps/chosen": -159.91883850097656, |
|
"logps/rejected": -204.87376403808594, |
|
"loss": 0.6459, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.4326345920562744, |
|
"rewards/margins": 0.22945694625377655, |
|
"rewards/rejected": -0.6620914936065674, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.5121476461682647, |
|
"grad_norm": 69.28741446430803, |
|
"learning_rate": 1.7886424158350782e-07, |
|
"logits/chosen": -1.5604138374328613, |
|
"logits/rejected": -1.663907766342163, |
|
"logps/chosen": -158.54408264160156, |
|
"logps/rejected": -192.7698516845703, |
|
"loss": 0.5921, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.3090921640396118, |
|
"rewards/margins": 0.3891502916812897, |
|
"rewards/rejected": -0.6982424855232239, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.5144546175474011, |
|
"grad_norm": 77.66154968693108, |
|
"learning_rate": 1.7862694017320886e-07, |
|
"logits/chosen": -1.3435657024383545, |
|
"logits/rejected": -1.3843066692352295, |
|
"logps/chosen": -174.62672424316406, |
|
"logps/rejected": -288.0128173828125, |
|
"loss": 0.6145, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.4857187271118164, |
|
"rewards/margins": 0.4818662703037262, |
|
"rewards/rejected": -0.9675850868225098, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.5167615889265373, |
|
"grad_norm": 86.0701716220196, |
|
"learning_rate": 1.7838847332197937e-07, |
|
"logits/chosen": -1.4369436502456665, |
|
"logits/rejected": -1.5111709833145142, |
|
"logps/chosen": -193.0187225341797, |
|
"logps/rejected": -258.660400390625, |
|
"loss": 0.6179, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.4460400640964508, |
|
"rewards/margins": 0.4027029871940613, |
|
"rewards/rejected": -0.8487430810928345, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.5190685603056737, |
|
"grad_norm": 84.40844346826594, |
|
"learning_rate": 1.7814884456447335e-07, |
|
"logits/chosen": -1.5306761264801025, |
|
"logits/rejected": -1.4944154024124146, |
|
"logps/chosen": -195.49612426757812, |
|
"logps/rejected": -222.01425170898438, |
|
"loss": 0.6006, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.2904947102069855, |
|
"rewards/margins": 0.5166550874710083, |
|
"rewards/rejected": -0.8071498870849609, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.52137553168481, |
|
"grad_norm": 86.3712126774886, |
|
"learning_rate": 1.7790805745256703e-07, |
|
"logits/chosen": -1.3275847434997559, |
|
"logits/rejected": -1.38175630569458, |
|
"logps/chosen": -136.90707397460938, |
|
"logps/rejected": -184.36331176757812, |
|
"loss": 0.6767, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.44699156284332275, |
|
"rewards/margins": 0.12617343664169312, |
|
"rewards/rejected": -0.5731649398803711, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.5236825030639464, |
|
"grad_norm": 66.61833278109548, |
|
"learning_rate": 1.7766611555530635e-07, |
|
"logits/chosen": -1.6141921281814575, |
|
"logits/rejected": -1.5151243209838867, |
|
"logps/chosen": -156.77407836914062, |
|
"logps/rejected": -154.7230682373047, |
|
"loss": 0.5733, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.3759933114051819, |
|
"rewards/margins": 0.17464786767959595, |
|
"rewards/rejected": -0.5506411790847778, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 0.5259894744430826, |
|
"grad_norm": 69.26758309677136, |
|
"learning_rate": 1.774230224588538e-07, |
|
"logits/chosen": -1.3204282522201538, |
|
"logits/rejected": -1.4286822080612183, |
|
"logps/chosen": -152.52542114257812, |
|
"logps/rejected": -232.16189575195312, |
|
"loss": 0.5494, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.4260653853416443, |
|
"rewards/margins": 0.5102941989898682, |
|
"rewards/rejected": -0.9363595247268677, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.528296445822219, |
|
"grad_norm": 81.07739462727531, |
|
"learning_rate": 1.771787817664356e-07, |
|
"logits/chosen": -1.508811116218567, |
|
"logits/rejected": -1.5395921468734741, |
|
"logps/chosen": -134.4735565185547, |
|
"logps/rejected": -166.41592407226562, |
|
"loss": 0.6351, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.49481019377708435, |
|
"rewards/margins": 0.1262877732515335, |
|
"rewards/rejected": -0.6210979223251343, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 0.5306034172013554, |
|
"grad_norm": 86.01343093557993, |
|
"learning_rate": 1.769333970982879e-07, |
|
"logits/chosen": -1.518664836883545, |
|
"logits/rejected": -1.3482635021209717, |
|
"logps/chosen": -173.78538513183594, |
|
"logps/rejected": -160.53573608398438, |
|
"loss": 0.5857, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.49463319778442383, |
|
"rewards/margins": 0.202806293964386, |
|
"rewards/rejected": -0.6974395513534546, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.5329103885804917, |
|
"grad_norm": 85.16027410016599, |
|
"learning_rate": 1.766868720916035e-07, |
|
"logits/chosen": -1.359481930732727, |
|
"logits/rejected": -1.3029265403747559, |
|
"logps/chosen": -134.05616760253906, |
|
"logps/rejected": -134.0654754638672, |
|
"loss": 0.6487, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.4239296019077301, |
|
"rewards/margins": 0.03123108297586441, |
|
"rewards/rejected": -0.4551607072353363, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.535217359959628, |
|
"grad_norm": 84.5629811685175, |
|
"learning_rate": 1.7643921040047766e-07, |
|
"logits/chosen": -1.6018937826156616, |
|
"logits/rejected": -1.6816954612731934, |
|
"logps/chosen": -237.3992919921875, |
|
"logps/rejected": -253.08688354492188, |
|
"loss": 0.597, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.6288573741912842, |
|
"rewards/margins": 0.15610165894031525, |
|
"rewards/rejected": -0.7849590182304382, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.5375243313387643, |
|
"grad_norm": 80.72916842158041, |
|
"learning_rate": 1.7619041569585418e-07, |
|
"logits/chosen": -1.4444328546524048, |
|
"logits/rejected": -1.4673030376434326, |
|
"logps/chosen": -170.2801971435547, |
|
"logps/rejected": -214.7718963623047, |
|
"loss": 0.6181, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.564181923866272, |
|
"rewards/margins": 0.2302751988172531, |
|
"rewards/rejected": -0.7944571375846863, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 0.5398313027179007, |
|
"grad_norm": 76.00828750498393, |
|
"learning_rate": 1.759404916654707e-07, |
|
"logits/chosen": -1.4668854475021362, |
|
"logits/rejected": -1.421462059020996, |
|
"logps/chosen": -360.7674560546875, |
|
"logps/rejected": -301.1515197753906, |
|
"loss": 0.6139, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.6432144641876221, |
|
"rewards/margins": 0.3255874514579773, |
|
"rewards/rejected": -0.9688019156455994, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.542138274097037, |
|
"grad_norm": 75.00038820917719, |
|
"learning_rate": 1.756894420138043e-07, |
|
"logits/chosen": -1.5766559839248657, |
|
"logits/rejected": -1.656800627708435, |
|
"logps/chosen": -216.8627471923828, |
|
"logps/rejected": -270.90850830078125, |
|
"loss": 0.615, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.4949862062931061, |
|
"rewards/margins": 0.4039486050605774, |
|
"rewards/rejected": -0.8989347815513611, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.5444452454761733, |
|
"grad_norm": 86.17675092820859, |
|
"learning_rate": 1.754372704620164e-07, |
|
"logits/chosen": -1.4618090391159058, |
|
"logits/rejected": -1.5533053874969482, |
|
"logps/chosen": -202.59561157226562, |
|
"logps/rejected": -221.70413208007812, |
|
"loss": 0.6478, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.44822004437446594, |
|
"rewards/margins": 0.28794264793395996, |
|
"rewards/rejected": -0.7361626625061035, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.5467522168553096, |
|
"grad_norm": 72.36150215283246, |
|
"learning_rate": 1.7518398074789774e-07, |
|
"logits/chosen": -1.4804517030715942, |
|
"logits/rejected": -1.5212501287460327, |
|
"logps/chosen": -195.58935546875, |
|
"logps/rejected": -247.99276733398438, |
|
"loss": 0.553, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.44707149267196655, |
|
"rewards/margins": 0.6286894679069519, |
|
"rewards/rejected": -1.0757609605789185, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 0.549059188234446, |
|
"grad_norm": 73.94947964279808, |
|
"learning_rate": 1.7492957662581294e-07, |
|
"logits/chosen": -1.3577089309692383, |
|
"logits/rejected": -1.4486963748931885, |
|
"logps/chosen": -133.3319091796875, |
|
"logps/rejected": -188.2812957763672, |
|
"loss": 0.6001, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.34889039397239685, |
|
"rewards/margins": 0.3021068871021271, |
|
"rewards/rejected": -0.6509972214698792, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.5513661596135823, |
|
"grad_norm": 74.0047644626624, |
|
"learning_rate": 1.7467406186664473e-07, |
|
"logits/chosen": -1.5747010707855225, |
|
"logits/rejected": -1.5058567523956299, |
|
"logps/chosen": -216.6630401611328, |
|
"logps/rejected": -223.66598510742188, |
|
"loss": 0.6345, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.5371094346046448, |
|
"rewards/margins": 0.3996596932411194, |
|
"rewards/rejected": -0.9367691874504089, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 0.5536731309927186, |
|
"grad_norm": 50.915761396824145, |
|
"learning_rate": 1.7441744025773834e-07, |
|
"logits/chosen": -1.4014126062393188, |
|
"logits/rejected": -1.569306492805481, |
|
"logps/chosen": -156.43629455566406, |
|
"logps/rejected": -228.84625244140625, |
|
"loss": 0.5975, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.399608850479126, |
|
"rewards/margins": 0.29513585567474365, |
|
"rewards/rejected": -0.6947447061538696, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.5559801023718549, |
|
"grad_norm": 80.40246802194461, |
|
"learning_rate": 1.74159715602845e-07, |
|
"logits/chosen": -1.49760103225708, |
|
"logits/rejected": -1.4302232265472412, |
|
"logps/chosen": -152.4906005859375, |
|
"logps/rejected": -165.43942260742188, |
|
"loss": 0.6511, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.4252295196056366, |
|
"rewards/margins": 0.12136977910995483, |
|
"rewards/rejected": -0.5465993285179138, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 0.5582870737509913, |
|
"grad_norm": 70.56990492477674, |
|
"learning_rate": 1.739008917220659e-07, |
|
"logits/chosen": -1.4919289350509644, |
|
"logits/rejected": -1.5267033576965332, |
|
"logps/chosen": -187.85191345214844, |
|
"logps/rejected": -220.8524169921875, |
|
"loss": 0.5689, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.5196070671081543, |
|
"rewards/margins": 0.3590528666973114, |
|
"rewards/rejected": -0.8786599636077881, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.5605940451301276, |
|
"grad_norm": 78.98020718967784, |
|
"learning_rate": 1.7364097245179527e-07, |
|
"logits/chosen": -1.599880337715149, |
|
"logits/rejected": -1.5224246978759766, |
|
"logps/chosen": -196.72555541992188, |
|
"logps/rejected": -213.14309692382812, |
|
"loss": 0.5892, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.5108906030654907, |
|
"rewards/margins": 0.1902090609073639, |
|
"rewards/rejected": -0.701099693775177, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 0.5629010165092639, |
|
"grad_norm": 75.35371757401214, |
|
"learning_rate": 1.733799616446637e-07, |
|
"logits/chosen": -1.4978597164154053, |
|
"logits/rejected": -1.5102261304855347, |
|
"logps/chosen": -186.15167236328125, |
|
"logps/rejected": -226.00375366210938, |
|
"loss": 0.6112, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.43081170320510864, |
|
"rewards/margins": 0.36774906516075134, |
|
"rewards/rejected": -0.7985607385635376, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 0.5652079878884002, |
|
"grad_norm": 75.43303696622675, |
|
"learning_rate": 1.7311786316948108e-07, |
|
"logits/chosen": -1.418121337890625, |
|
"logits/rejected": -1.4920923709869385, |
|
"logps/chosen": -179.17889404296875, |
|
"logps/rejected": -229.40098571777344, |
|
"loss": 0.5938, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.6558996438980103, |
|
"rewards/margins": 0.27045130729675293, |
|
"rewards/rejected": -0.9263509511947632, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.5675149592675366, |
|
"grad_norm": 71.0686050492484, |
|
"learning_rate": 1.7285468091117904e-07, |
|
"logits/chosen": -1.4989047050476074, |
|
"logits/rejected": -1.4156945943832397, |
|
"logps/chosen": -153.10214233398438, |
|
"logps/rejected": -172.13262939453125, |
|
"loss": 0.5901, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.4824844300746918, |
|
"rewards/margins": 0.44079095125198364, |
|
"rewards/rejected": -0.9232754707336426, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 0.569821930646673, |
|
"grad_norm": 67.99918941849218, |
|
"learning_rate": 1.7259041877075352e-07, |
|
"logits/chosen": -1.430630087852478, |
|
"logits/rejected": -1.3989218473434448, |
|
"logps/chosen": -209.73452758789062, |
|
"logps/rejected": -254.0313720703125, |
|
"loss": 0.5729, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.5363369584083557, |
|
"rewards/margins": 0.5890082120895386, |
|
"rewards/rejected": -1.125345230102539, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 0.5721289020258092, |
|
"grad_norm": 78.40754956054191, |
|
"learning_rate": 1.7232508066520698e-07, |
|
"logits/chosen": -1.5510261058807373, |
|
"logits/rejected": -1.5487847328186035, |
|
"logps/chosen": -211.16983032226562, |
|
"logps/rejected": -240.33824157714844, |
|
"loss": 0.5772, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.4531714916229248, |
|
"rewards/margins": 0.2688879370689392, |
|
"rewards/rejected": -0.7220594882965088, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 0.5744358734049456, |
|
"grad_norm": 61.990430466819326, |
|
"learning_rate": 1.7205867052749023e-07, |
|
"logits/chosen": -1.363396167755127, |
|
"logits/rejected": -1.3964465856552124, |
|
"logps/chosen": -147.12242126464844, |
|
"logps/rejected": -180.23667907714844, |
|
"loss": 0.6459, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.5750865340232849, |
|
"rewards/margins": 0.11157172918319702, |
|
"rewards/rejected": -0.6866582632064819, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 0.5767428447840819, |
|
"grad_norm": 76.0573953537264, |
|
"learning_rate": 1.717911923064442e-07, |
|
"logits/chosen": -1.5747530460357666, |
|
"logits/rejected": -1.4509817361831665, |
|
"logps/chosen": -181.61216735839844, |
|
"logps/rejected": -153.97573852539062, |
|
"loss": 0.6012, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.5273740887641907, |
|
"rewards/margins": 0.1454104781150818, |
|
"rewards/rejected": -0.6727845668792725, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.5767428447840819, |
|
"eval_logits/chosen": -1.440444827079773, |
|
"eval_logits/rejected": -1.3533989191055298, |
|
"eval_logps/chosen": -191.4648895263672, |
|
"eval_logps/rejected": -158.6099395751953, |
|
"eval_loss": 0.636239767074585, |
|
"eval_rewards/accuracies": 0.6800000071525574, |
|
"eval_rewards/chosen": -0.628268837928772, |
|
"eval_rewards/margins": 0.18118661642074585, |
|
"eval_rewards/rejected": -0.809455394744873, |
|
"eval_runtime": 37.9799, |
|
"eval_samples_per_second": 2.633, |
|
"eval_steps_per_second": 0.658, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.5790498161632183, |
|
"grad_norm": 77.78220283215643, |
|
"learning_rate": 1.7152264996674135e-07, |
|
"logits/chosen": -1.4428610801696777, |
|
"logits/rejected": -1.2872042655944824, |
|
"logps/chosen": -184.39501953125, |
|
"logps/rejected": -238.38723754882812, |
|
"loss": 0.5953, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.6475786566734314, |
|
"rewards/margins": 0.2779845893383026, |
|
"rewards/rejected": -0.9255632758140564, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 0.5813567875423545, |
|
"grad_norm": 93.29916680291039, |
|
"learning_rate": 1.71253047488827e-07, |
|
"logits/chosen": -1.4898688793182373, |
|
"logits/rejected": -1.5620332956314087, |
|
"logps/chosen": -178.47802734375, |
|
"logps/rejected": -205.5224609375, |
|
"loss": 0.6703, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.5680350065231323, |
|
"rewards/margins": 0.18766377866268158, |
|
"rewards/rejected": -0.7556988000869751, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 0.5836637589214909, |
|
"grad_norm": 77.19105499219319, |
|
"learning_rate": 1.7098238886886024e-07, |
|
"logits/chosen": -1.4835506677627563, |
|
"logits/rejected": -1.5302045345306396, |
|
"logps/chosen": -203.8736114501953, |
|
"logps/rejected": -228.69265747070312, |
|
"loss": 0.5951, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.47867119312286377, |
|
"rewards/margins": 0.22942683100700378, |
|
"rewards/rejected": -0.7080979943275452, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 0.5859707303006272, |
|
"grad_norm": 67.4261860354, |
|
"learning_rate": 1.7071067811865473e-07, |
|
"logits/chosen": -1.4649958610534668, |
|
"logits/rejected": -1.4145183563232422, |
|
"logps/chosen": -199.42066955566406, |
|
"logps/rejected": -235.40292358398438, |
|
"loss": 0.5368, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.4195340573787689, |
|
"rewards/margins": 0.551209032535553, |
|
"rewards/rejected": -0.9707430601119995, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 0.5882777016797636, |
|
"grad_norm": 87.85240065033273, |
|
"learning_rate": 1.7043791926561932e-07, |
|
"logits/chosen": -1.5964919328689575, |
|
"logits/rejected": -1.561856746673584, |
|
"logps/chosen": -201.67276000976562, |
|
"logps/rejected": -234.04359436035156, |
|
"loss": 0.651, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.6162290573120117, |
|
"rewards/margins": 0.4439167082309723, |
|
"rewards/rejected": -1.0601458549499512, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.5905846730588998, |
|
"grad_norm": 62.42968300457303, |
|
"learning_rate": 1.7016411635269815e-07, |
|
"logits/chosen": -1.4615092277526855, |
|
"logits/rejected": -1.4488492012023926, |
|
"logps/chosen": -151.2560577392578, |
|
"logps/rejected": -176.4474334716797, |
|
"loss": 0.609, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.33995571732521057, |
|
"rewards/margins": 0.2483442723751068, |
|
"rewards/rejected": -0.5882999897003174, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 0.5928916444380362, |
|
"grad_norm": 74.39629379240114, |
|
"learning_rate": 1.6988927343831091e-07, |
|
"logits/chosen": -1.5747379064559937, |
|
"logits/rejected": -1.4773468971252441, |
|
"logps/chosen": -198.891845703125, |
|
"logps/rejected": -210.0729522705078, |
|
"loss": 0.61, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.47531554102897644, |
|
"rewards/margins": 0.47791624069213867, |
|
"rewards/rejected": -0.9532317519187927, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 0.5951986158171725, |
|
"grad_norm": 70.19350216590036, |
|
"learning_rate": 1.6961339459629266e-07, |
|
"logits/chosen": -1.4481630325317383, |
|
"logits/rejected": -1.4714566469192505, |
|
"logps/chosen": -190.8370361328125, |
|
"logps/rejected": -242.71621704101562, |
|
"loss": 0.5872, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.5172877907752991, |
|
"rewards/margins": 0.48140281438827515, |
|
"rewards/rejected": -0.998690664768219, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 0.5975055871963089, |
|
"grad_norm": 73.75535823993799, |
|
"learning_rate": 1.6933648391583328e-07, |
|
"logits/chosen": -1.531792163848877, |
|
"logits/rejected": -1.4680547714233398, |
|
"logps/chosen": -144.9717559814453, |
|
"logps/rejected": -172.87686157226562, |
|
"loss": 0.6006, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.3757992386817932, |
|
"rewards/margins": 0.35130438208580017, |
|
"rewards/rejected": -0.7271036505699158, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 0.5998125585754451, |
|
"grad_norm": 69.85303523035323, |
|
"learning_rate": 1.6905854550141714e-07, |
|
"logits/chosen": -1.5805073976516724, |
|
"logits/rejected": -1.5384862422943115, |
|
"logps/chosen": -171.9115753173828, |
|
"logps/rejected": -169.82862854003906, |
|
"loss": 0.5875, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.5081273317337036, |
|
"rewards/margins": 0.2863667607307434, |
|
"rewards/rejected": -0.794494092464447, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.6021195299545815, |
|
"grad_norm": 69.03602758187714, |
|
"learning_rate": 1.6877958347276197e-07, |
|
"logits/chosen": -1.4844419956207275, |
|
"logits/rejected": -1.4906061887741089, |
|
"logps/chosen": -149.6005859375, |
|
"logps/rejected": -163.59097290039062, |
|
"loss": 0.6013, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.42841285467147827, |
|
"rewards/margins": 0.30834630131721497, |
|
"rewards/rejected": -0.7367592453956604, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 0.6044265013337178, |
|
"grad_norm": 80.75337933099041, |
|
"learning_rate": 1.6849960196475805e-07, |
|
"logits/chosen": -1.5245236158370972, |
|
"logits/rejected": -1.5345442295074463, |
|
"logps/chosen": -148.5638885498047, |
|
"logps/rejected": -178.37429809570312, |
|
"loss": 0.5909, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.3656730651855469, |
|
"rewards/margins": 0.3520704507827759, |
|
"rewards/rejected": -0.7177435159683228, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 0.6067334727128542, |
|
"grad_norm": 79.6488573037571, |
|
"learning_rate": 1.682186051274067e-07, |
|
"logits/chosen": -1.4462357759475708, |
|
"logits/rejected": -1.4616801738739014, |
|
"logps/chosen": -144.83853149414062, |
|
"logps/rejected": -191.320556640625, |
|
"loss": 0.5847, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.6087457537651062, |
|
"rewards/margins": 0.3239368498325348, |
|
"rewards/rejected": -0.9326826930046082, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 0.6090404440919904, |
|
"grad_norm": 82.53815106903608, |
|
"learning_rate": 1.6793659712575895e-07, |
|
"logits/chosen": -1.5642480850219727, |
|
"logits/rejected": -1.4599685668945312, |
|
"logps/chosen": -215.29837036132812, |
|
"logps/rejected": -199.14767456054688, |
|
"loss": 0.5928, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.5695382356643677, |
|
"rewards/margins": 0.271673321723938, |
|
"rewards/rejected": -0.8412115573883057, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 0.6113474154711268, |
|
"grad_norm": 86.53571512694035, |
|
"learning_rate": 1.676535821398537e-07, |
|
"logits/chosen": -1.3208836317062378, |
|
"logits/rejected": -1.3146097660064697, |
|
"logps/chosen": -189.41128540039062, |
|
"logps/rejected": -232.5477294921875, |
|
"loss": 0.6013, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.654186487197876, |
|
"rewards/margins": 0.4602148234844208, |
|
"rewards/rejected": -1.1144013404846191, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.6136543868502632, |
|
"grad_norm": 70.64851504723866, |
|
"learning_rate": 1.6736956436465573e-07, |
|
"logits/chosen": -1.3590030670166016, |
|
"logits/rejected": -1.4608113765716553, |
|
"logps/chosen": -148.809326171875, |
|
"logps/rejected": -203.59759521484375, |
|
"loss": 0.5861, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.496415913105011, |
|
"rewards/margins": 0.31767329573631287, |
|
"rewards/rejected": -0.814089298248291, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 0.6159613582293995, |
|
"grad_norm": 73.57136513502368, |
|
"learning_rate": 1.6708454800999366e-07, |
|
"logits/chosen": -1.4504910707473755, |
|
"logits/rejected": -1.4983229637145996, |
|
"logps/chosen": -166.2091522216797, |
|
"logps/rejected": -206.8488311767578, |
|
"loss": 0.6153, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.49555644392967224, |
|
"rewards/margins": 0.3523869812488556, |
|
"rewards/rejected": -0.8479433655738831, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 0.6182683296085358, |
|
"grad_norm": 67.83021038753246, |
|
"learning_rate": 1.667985373004974e-07, |
|
"logits/chosen": -1.4747323989868164, |
|
"logits/rejected": -1.3922568559646606, |
|
"logps/chosen": -159.47254943847656, |
|
"logps/rejected": -177.21884155273438, |
|
"loss": 0.5691, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.2918567657470703, |
|
"rewards/margins": 0.5216075778007507, |
|
"rewards/rejected": -0.8134642839431763, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 0.6205753009876721, |
|
"grad_norm": 75.55693314924734, |
|
"learning_rate": 1.6651153647553567e-07, |
|
"logits/chosen": -1.6021491289138794, |
|
"logits/rejected": -1.6126930713653564, |
|
"logps/chosen": -165.55172729492188, |
|
"logps/rejected": -197.1583251953125, |
|
"loss": 0.5986, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.505136251449585, |
|
"rewards/margins": 0.2592867612838745, |
|
"rewards/rejected": -0.7644230127334595, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 0.6228822723668085, |
|
"grad_norm": 74.57237448077612, |
|
"learning_rate": 1.6622354978915304e-07, |
|
"logits/chosen": -1.3560292720794678, |
|
"logits/rejected": -1.4895740747451782, |
|
"logps/chosen": -152.60386657714844, |
|
"logps/rejected": -200.48497009277344, |
|
"loss": 0.5976, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.450514554977417, |
|
"rewards/margins": 0.42979568243026733, |
|
"rewards/rejected": -0.8803102374076843, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.6251892437459448, |
|
"grad_norm": 76.07758708375029, |
|
"learning_rate": 1.6593458151000687e-07, |
|
"logits/chosen": -1.418495535850525, |
|
"logits/rejected": -1.5285032987594604, |
|
"logps/chosen": -174.468017578125, |
|
"logps/rejected": -212.58534240722656, |
|
"loss": 0.6021, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.4992409944534302, |
|
"rewards/margins": 0.357663631439209, |
|
"rewards/rejected": -0.8569046854972839, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 0.6274962151250811, |
|
"grad_norm": 67.61668250943133, |
|
"learning_rate": 1.6564463592130426e-07, |
|
"logits/chosen": -1.6000475883483887, |
|
"logits/rejected": -1.5714551210403442, |
|
"logps/chosen": -129.46788024902344, |
|
"logps/rejected": -137.58729553222656, |
|
"loss": 0.6027, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.4155838191509247, |
|
"rewards/margins": 0.31966376304626465, |
|
"rewards/rejected": -0.7352475523948669, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 0.6298031865042174, |
|
"grad_norm": 67.37831547087359, |
|
"learning_rate": 1.6535371732073823e-07, |
|
"logits/chosen": -1.5627467632293701, |
|
"logits/rejected": -1.4833993911743164, |
|
"logps/chosen": -115.5599594116211, |
|
"logps/rejected": -121.90804290771484, |
|
"loss": 0.5859, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.286516010761261, |
|
"rewards/margins": 0.36314332485198975, |
|
"rewards/rejected": -0.6496593356132507, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 0.6321101578833538, |
|
"grad_norm": 79.67037148877638, |
|
"learning_rate": 1.650618300204242e-07, |
|
"logits/chosen": -1.4731521606445312, |
|
"logits/rejected": -1.5530614852905273, |
|
"logps/chosen": -218.06552124023438, |
|
"logps/rejected": -257.6269226074219, |
|
"loss": 0.6104, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.7696484923362732, |
|
"rewards/margins": 0.28321802616119385, |
|
"rewards/rejected": -1.0528665781021118, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 0.63441712926249, |
|
"grad_norm": 67.9423797863854, |
|
"learning_rate": 1.6476897834683618e-07, |
|
"logits/chosen": -1.4056189060211182, |
|
"logits/rejected": -1.4078246355056763, |
|
"logps/chosen": -147.92111206054688, |
|
"logps/rejected": -188.60968017578125, |
|
"loss": 0.6018, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.5256268978118896, |
|
"rewards/margins": 0.4678364396095276, |
|
"rewards/rejected": -0.9934633374214172, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.6367241006416264, |
|
"grad_norm": 68.15375283996126, |
|
"learning_rate": 1.644751666407424e-07, |
|
"logits/chosen": -1.2929272651672363, |
|
"logits/rejected": -1.3170608282089233, |
|
"logps/chosen": -207.3567352294922, |
|
"logps/rejected": -262.3974609375, |
|
"loss": 0.5823, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.7716534733772278, |
|
"rewards/margins": 0.6446899771690369, |
|
"rewards/rejected": -1.4163434505462646, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 0.6390310720207627, |
|
"grad_norm": 71.41650018580867, |
|
"learning_rate": 1.6418039925714115e-07, |
|
"logits/chosen": -1.3858839273452759, |
|
"logits/rejected": -1.3953114748001099, |
|
"logps/chosen": -160.35096740722656, |
|
"logps/rejected": -186.47933959960938, |
|
"loss": 0.5559, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.5581396222114563, |
|
"rewards/margins": 0.3457927703857422, |
|
"rewards/rejected": -0.9039323329925537, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 0.6413380433998991, |
|
"grad_norm": 76.78836475295354, |
|
"learning_rate": 1.6388468056519612e-07, |
|
"logits/chosen": -1.4668548107147217, |
|
"logits/rejected": -1.4067307710647583, |
|
"logps/chosen": -212.10546875, |
|
"logps/rejected": -193.7842254638672, |
|
"loss": 0.5721, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.618504524230957, |
|
"rewards/margins": 0.36426225304603577, |
|
"rewards/rejected": -0.9827668070793152, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 0.6436450147790354, |
|
"grad_norm": 66.95864858123714, |
|
"learning_rate": 1.6358801494817172e-07, |
|
"logits/chosen": -1.4181556701660156, |
|
"logits/rejected": -1.409440279006958, |
|
"logps/chosen": -139.5923309326172, |
|
"logps/rejected": -183.9441375732422, |
|
"loss": 0.5663, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.42550671100616455, |
|
"rewards/margins": 0.626122236251831, |
|
"rewards/rejected": -1.0516289472579956, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 0.6459519861581717, |
|
"grad_norm": 88.18680458715171, |
|
"learning_rate": 1.6329040680336805e-07, |
|
"logits/chosen": -1.468677282333374, |
|
"logits/rejected": -1.5043675899505615, |
|
"logps/chosen": -161.72213745117188, |
|
"logps/rejected": -206.85214233398438, |
|
"loss": 0.572, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.5167573690414429, |
|
"rewards/margins": 0.36671191453933716, |
|
"rewards/rejected": -0.8834693431854248, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.648258957537308, |
|
"grad_norm": 71.84112642036989, |
|
"learning_rate": 1.6299186054205575e-07, |
|
"logits/chosen": -1.5098912715911865, |
|
"logits/rejected": -1.4657700061798096, |
|
"logps/chosen": -177.00067138671875, |
|
"logps/rejected": -190.06985473632812, |
|
"loss": 0.5365, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.3948441743850708, |
|
"rewards/margins": 0.5432202816009521, |
|
"rewards/rejected": -0.9380643963813782, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 0.6505659289164444, |
|
"grad_norm": 77.21845596596229, |
|
"learning_rate": 1.6269238058941067e-07, |
|
"logits/chosen": -1.5354855060577393, |
|
"logits/rejected": -1.4872441291809082, |
|
"logps/chosen": -220.86279296875, |
|
"logps/rejected": -242.259765625, |
|
"loss": 0.6141, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.5020161867141724, |
|
"rewards/margins": 0.3912605345249176, |
|
"rewards/rejected": -0.8932766914367676, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 0.6528729002955808, |
|
"grad_norm": 77.14842839642075, |
|
"learning_rate": 1.6239197138444807e-07, |
|
"logits/chosen": -1.4313609600067139, |
|
"logits/rejected": -1.4305431842803955, |
|
"logps/chosen": -99.62786865234375, |
|
"logps/rejected": -128.8907928466797, |
|
"loss": 0.5895, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.1888483613729477, |
|
"rewards/margins": 0.4503237307071686, |
|
"rewards/rejected": -0.6391721367835999, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 0.655179871674717, |
|
"grad_norm": 62.79374975719681, |
|
"learning_rate": 1.6209063737995714e-07, |
|
"logits/chosen": -1.4637759923934937, |
|
"logits/rejected": -1.4549309015274048, |
|
"logps/chosen": -144.82948303222656, |
|
"logps/rejected": -185.9346466064453, |
|
"loss": 0.5515, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.44154876470565796, |
|
"rewards/margins": 0.37137869000434875, |
|
"rewards/rejected": -0.8129273653030396, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 0.6574868430538534, |
|
"grad_norm": 77.33084496555169, |
|
"learning_rate": 1.6178838304243472e-07, |
|
"logits/chosen": -1.491298794746399, |
|
"logits/rejected": -1.5582300424575806, |
|
"logps/chosen": -193.7870635986328, |
|
"logps/rejected": -242.5855712890625, |
|
"loss": 0.5723, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.5082133412361145, |
|
"rewards/margins": 0.6296249628067017, |
|
"rewards/rejected": -1.1378382444381714, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.6597938144329897, |
|
"grad_norm": 67.02472308421605, |
|
"learning_rate": 1.6148521285201927e-07, |
|
"logits/chosen": -1.4817756414413452, |
|
"logits/rejected": -1.402366042137146, |
|
"logps/chosen": -154.45765686035156, |
|
"logps/rejected": -178.16561889648438, |
|
"loss": 0.5564, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.3961385488510132, |
|
"rewards/margins": 0.5840703248977661, |
|
"rewards/rejected": -0.9802089333534241, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 0.6621007858121261, |
|
"grad_norm": 73.0106659319347, |
|
"learning_rate": 1.6118113130242432e-07, |
|
"logits/chosen": -1.4550271034240723, |
|
"logits/rejected": -1.4115763902664185, |
|
"logps/chosen": -221.6585235595703, |
|
"logps/rejected": -195.1796417236328, |
|
"loss": 0.5774, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.8219617009162903, |
|
"rewards/margins": 0.16280440986156464, |
|
"rewards/rejected": -0.9847662448883057, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 0.6644077571912623, |
|
"grad_norm": 77.31259598468839, |
|
"learning_rate": 1.6087614290087206e-07, |
|
"logits/chosen": -1.4929287433624268, |
|
"logits/rejected": -1.4764537811279297, |
|
"logps/chosen": -230.29653930664062, |
|
"logps/rejected": -284.22412109375, |
|
"loss": 0.5818, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.6301875114440918, |
|
"rewards/margins": 0.7476638555526733, |
|
"rewards/rejected": -1.3778512477874756, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 0.6667147285703987, |
|
"grad_norm": 69.04855850678052, |
|
"learning_rate": 1.605702521680263e-07, |
|
"logits/chosen": -1.3067015409469604, |
|
"logits/rejected": -1.338529348373413, |
|
"logps/chosen": -147.36080932617188, |
|
"logps/rejected": -193.80665588378906, |
|
"loss": 0.5757, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.6234080791473389, |
|
"rewards/margins": 0.39194294810295105, |
|
"rewards/rejected": -1.0153510570526123, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 0.669021699949535, |
|
"grad_norm": 81.45402825293101, |
|
"learning_rate": 1.6026346363792565e-07, |
|
"logits/chosen": -1.4524238109588623, |
|
"logits/rejected": -1.3550243377685547, |
|
"logps/chosen": -187.0885772705078, |
|
"logps/rejected": -177.09780883789062, |
|
"loss": 0.6058, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.7711231708526611, |
|
"rewards/margins": 0.17797166109085083, |
|
"rewards/rejected": -0.9490947127342224, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.6713286713286714, |
|
"grad_norm": 65.47602685653504, |
|
"learning_rate": 1.5995578185791616e-07, |
|
"logits/chosen": -1.387951374053955, |
|
"logits/rejected": -1.3309695720672607, |
|
"logps/chosen": -158.39202880859375, |
|
"logps/rejected": -186.85105895996094, |
|
"loss": 0.5825, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.48583418130874634, |
|
"rewards/margins": 0.503716230392456, |
|
"rewards/rejected": -0.9895503520965576, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 0.6736356427078076, |
|
"grad_norm": 76.89288613284735, |
|
"learning_rate": 1.596472113885841e-07, |
|
"logits/chosen": -1.4493763446807861, |
|
"logits/rejected": -1.4876127243041992, |
|
"logps/chosen": -180.78541564941406, |
|
"logps/rejected": -220.08172607421875, |
|
"loss": 0.5822, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.5832819938659668, |
|
"rewards/margins": 0.494464248418808, |
|
"rewards/rejected": -1.0777461528778076, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 0.675942614086944, |
|
"grad_norm": 82.2690699212878, |
|
"learning_rate": 1.5933775680368822e-07, |
|
"logits/chosen": -1.4559937715530396, |
|
"logits/rejected": -1.5102128982543945, |
|
"logps/chosen": -169.15960693359375, |
|
"logps/rejected": -176.64280700683594, |
|
"loss": 0.6272, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.5040290355682373, |
|
"rewards/margins": 0.27444028854370117, |
|
"rewards/rejected": -0.7784693241119385, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 0.6782495854660803, |
|
"grad_norm": 76.21062906880101, |
|
"learning_rate": 1.5902742269009194e-07, |
|
"logits/chosen": -1.348806381225586, |
|
"logits/rejected": -1.293540358543396, |
|
"logps/chosen": -135.5105438232422, |
|
"logps/rejected": -156.5147705078125, |
|
"loss": 0.5875, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.5231513977050781, |
|
"rewards/margins": 0.4782097041606903, |
|
"rewards/rejected": -1.0013611316680908, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 0.6805565568452167, |
|
"grad_norm": 75.50192821178838, |
|
"learning_rate": 1.5871621364769553e-07, |
|
"logits/chosen": -1.5168403387069702, |
|
"logits/rejected": -1.4424357414245605, |
|
"logps/chosen": -183.81605529785156, |
|
"logps/rejected": -171.45872497558594, |
|
"loss": 0.6035, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.7719120979309082, |
|
"rewards/margins": 0.2601196765899658, |
|
"rewards/rejected": -1.0320318937301636, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.6828635282243529, |
|
"grad_norm": 84.93892075040027, |
|
"learning_rate": 1.5840413428936766e-07, |
|
"logits/chosen": -1.3720101118087769, |
|
"logits/rejected": -1.391021490097046, |
|
"logps/chosen": -171.98031616210938, |
|
"logps/rejected": -176.23892211914062, |
|
"loss": 0.599, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.7516859769821167, |
|
"rewards/margins": 0.21854539215564728, |
|
"rewards/rejected": -0.9702314138412476, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 0.6851704996034893, |
|
"grad_norm": 66.70595859312724, |
|
"learning_rate": 1.5809118924087733e-07, |
|
"logits/chosen": -1.4547669887542725, |
|
"logits/rejected": -1.430787205696106, |
|
"logps/chosen": -177.32481384277344, |
|
"logps/rejected": -208.61553955078125, |
|
"loss": 0.6102, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.5358410477638245, |
|
"rewards/margins": 0.26219645142555237, |
|
"rewards/rejected": -0.7980375289916992, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 0.6874774709826256, |
|
"grad_norm": 82.62176636567787, |
|
"learning_rate": 1.5777738314082511e-07, |
|
"logits/chosen": -1.4137248992919922, |
|
"logits/rejected": -1.404469609260559, |
|
"logps/chosen": -164.01600646972656, |
|
"logps/rejected": -184.97645568847656, |
|
"loss": 0.6472, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.5918564200401306, |
|
"rewards/margins": 0.21411672234535217, |
|
"rewards/rejected": -0.8059731721878052, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 0.689784442361762, |
|
"grad_norm": 72.16505210857706, |
|
"learning_rate": 1.5746272064057439e-07, |
|
"logits/chosen": -1.3921738862991333, |
|
"logits/rejected": -1.3382896184921265, |
|
"logps/chosen": -199.48634338378906, |
|
"logps/rejected": -226.77871704101562, |
|
"loss": 0.5858, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.5180130004882812, |
|
"rewards/margins": 0.4014572501182556, |
|
"rewards/rejected": -0.9194702506065369, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 0.6920914137408983, |
|
"grad_norm": 78.66776375616931, |
|
"learning_rate": 1.5714720640418247e-07, |
|
"logits/chosen": -1.511127233505249, |
|
"logits/rejected": -1.5256671905517578, |
|
"logps/chosen": -182.10826110839844, |
|
"logps/rejected": -198.63510131835938, |
|
"loss": 0.618, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.6393432021141052, |
|
"rewards/margins": 0.16456884145736694, |
|
"rewards/rejected": -0.8039120435714722, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.6920914137408983, |
|
"eval_logits/chosen": -1.4086966514587402, |
|
"eval_logits/rejected": -1.3254387378692627, |
|
"eval_logps/chosen": -191.96621704101562, |
|
"eval_logps/rejected": -160.9102325439453, |
|
"eval_loss": 0.6056262850761414, |
|
"eval_rewards/accuracies": 0.7200000286102295, |
|
"eval_rewards/chosen": -0.6784057021141052, |
|
"eval_rewards/margins": 0.3610783815383911, |
|
"eval_rewards/rejected": -1.0394840240478516, |
|
"eval_runtime": 37.022, |
|
"eval_samples_per_second": 2.701, |
|
"eval_steps_per_second": 0.675, |
|
"step": 600 |
|
} |
|
], |
|
"logging_steps": 2, |
|
"max_steps": 1732, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 300, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|