|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 684, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 1.972968290218204, |
|
"learning_rate": 7.246376811594203e-09, |
|
"logits/chosen": -2.8746490478515625, |
|
"logits/rejected": -2.840811252593994, |
|
"logps/chosen": -227.73272705078125, |
|
"logps/rejected": -174.71890258789062, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/margins_max": 0.0, |
|
"rewards/margins_min": 0.0, |
|
"rewards/margins_std": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 1.8679339461073734, |
|
"learning_rate": 7.246376811594203e-08, |
|
"logits/chosen": -2.769803047180176, |
|
"logits/rejected": -2.7261266708374023, |
|
"logps/chosen": -244.6114959716797, |
|
"logps/rejected": -240.41116333007812, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.3611111044883728, |
|
"rewards/chosen": 0.00021203850337769836, |
|
"rewards/margins": 0.00021335652854759246, |
|
"rewards/margins_max": 0.0021964015904814005, |
|
"rewards/margins_min": -0.0016354921972379088, |
|
"rewards/margins_std": 0.0016972733428701758, |
|
"rewards/rejected": -1.318060753874306e-06, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.1095097251535906, |
|
"learning_rate": 1.4492753623188405e-07, |
|
"logits/chosen": -2.8978943824768066, |
|
"logits/rejected": -2.859205961227417, |
|
"logps/chosen": -317.23260498046875, |
|
"logps/rejected": -239.8469696044922, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": 0.00018418056424707174, |
|
"rewards/margins": 9.037666313815862e-05, |
|
"rewards/margins_max": 0.003943216986954212, |
|
"rewards/margins_min": -0.003490231465548277, |
|
"rewards/margins_std": 0.0033192276023328304, |
|
"rewards/rejected": 9.380385745316744e-05, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 2.08467778375872, |
|
"learning_rate": 2.1739130434782607e-07, |
|
"logits/chosen": -2.811413288116455, |
|
"logits/rejected": -2.7905545234680176, |
|
"logps/chosen": -264.98712158203125, |
|
"logps/rejected": -223.62734985351562, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 0.00012606059317477047, |
|
"rewards/margins": -7.177007319114637e-06, |
|
"rewards/margins_max": 0.0028896895237267017, |
|
"rewards/margins_min": -0.0032867384143173695, |
|
"rewards/margins_std": 0.0027837478555738926, |
|
"rewards/rejected": 0.000133237597765401, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 1.940398970016681, |
|
"learning_rate": 2.898550724637681e-07, |
|
"logits/chosen": -2.8652420043945312, |
|
"logits/rejected": -2.83544659614563, |
|
"logps/chosen": -305.147216796875, |
|
"logps/rejected": -267.1144714355469, |
|
"loss": 0.6928, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.0004788968653883785, |
|
"rewards/margins": 0.0008589730714447796, |
|
"rewards/margins_max": 0.0041023739613592625, |
|
"rewards/margins_min": -0.002046389738097787, |
|
"rewards/margins_std": 0.0028562676161527634, |
|
"rewards/rejected": -0.0003800761769525707, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 1.6929010917265717, |
|
"learning_rate": 3.6231884057971015e-07, |
|
"logits/chosen": -2.8695337772369385, |
|
"logits/rejected": -2.837745189666748, |
|
"logps/chosen": -260.9767150878906, |
|
"logps/rejected": -235.2560577392578, |
|
"loss": 0.6924, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.0011892963666468859, |
|
"rewards/margins": 0.0010751936351880431, |
|
"rewards/margins_max": 0.004216945730149746, |
|
"rewards/margins_min": -0.002206298988312483, |
|
"rewards/margins_std": 0.0029160729609429836, |
|
"rewards/rejected": 0.00011410261504352093, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 1.6771532679917263, |
|
"learning_rate": 4.3478260869565214e-07, |
|
"logits/chosen": -2.8912997245788574, |
|
"logits/rejected": -2.8439412117004395, |
|
"logps/chosen": -281.3103942871094, |
|
"logps/rejected": -248.8180389404297, |
|
"loss": 0.6919, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.0023077281657606363, |
|
"rewards/margins": 0.0024695510510355234, |
|
"rewards/margins_max": 0.006759033538401127, |
|
"rewards/margins_min": -0.0015218419721350074, |
|
"rewards/margins_std": 0.00368516705930233, |
|
"rewards/rejected": -0.0001618233509361744, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 1.9158671065853772, |
|
"learning_rate": 4.999967381905813e-07, |
|
"logits/chosen": -2.7889466285705566, |
|
"logits/rejected": -2.7540149688720703, |
|
"logps/chosen": -290.43804931640625, |
|
"logps/rejected": -212.5797882080078, |
|
"loss": 0.691, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.003653948428109288, |
|
"rewards/margins": 0.004532678984105587, |
|
"rewards/margins_max": 0.0123423608019948, |
|
"rewards/margins_min": -0.001225766958668828, |
|
"rewards/margins_std": 0.0061333803460001945, |
|
"rewards/rejected": -0.000878730439580977, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 1.71294323830613, |
|
"learning_rate": 4.996054240392509e-07, |
|
"logits/chosen": -2.777132749557495, |
|
"logits/rejected": -2.7576847076416016, |
|
"logps/chosen": -265.20281982421875, |
|
"logps/rejected": -243.9671173095703, |
|
"loss": 0.6907, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.003907615318894386, |
|
"rewards/margins": 0.003999439999461174, |
|
"rewards/margins_max": 0.012583956122398376, |
|
"rewards/margins_min": -0.0027496658731251955, |
|
"rewards/margins_std": 0.00690504303202033, |
|
"rewards/rejected": -9.18240548344329e-05, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 2.4508716900973826, |
|
"learning_rate": 4.985629178361649e-07, |
|
"logits/chosen": -2.889789581298828, |
|
"logits/rejected": -2.8571457862854004, |
|
"logps/chosen": -273.2809753417969, |
|
"logps/rejected": -246.13528442382812, |
|
"loss": 0.6894, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.0063544102013111115, |
|
"rewards/margins": 0.007208968047052622, |
|
"rewards/margins_max": 0.018225526437163353, |
|
"rewards/margins_min": -0.0013279046397656202, |
|
"rewards/margins_std": 0.008873926475644112, |
|
"rewards/rejected": -0.0008545577293261886, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 1.8386412495416258, |
|
"learning_rate": 4.968719393609756e-07, |
|
"logits/chosen": -2.8424429893493652, |
|
"logits/rejected": -2.7764132022857666, |
|
"logps/chosen": -337.29986572265625, |
|
"logps/rejected": -243.8036651611328, |
|
"loss": 0.6876, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": 0.010191970504820347, |
|
"rewards/margins": 0.012033768929541111, |
|
"rewards/margins_max": 0.029753312468528748, |
|
"rewards/margins_min": -0.0020332676358520985, |
|
"rewards/margins_std": 0.014395820908248425, |
|
"rewards/rejected": -0.0018417991232126951, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_logits/chosen": -2.8052334785461426, |
|
"eval_logits/rejected": -2.7664124965667725, |
|
"eval_logps/chosen": -284.0943603515625, |
|
"eval_logps/rejected": -258.4696960449219, |
|
"eval_loss": 0.691686749458313, |
|
"eval_rewards/accuracies": 0.6079999804496765, |
|
"eval_rewards/chosen": 0.004990490153431892, |
|
"eval_rewards/margins": 0.0038980983663350344, |
|
"eval_rewards/margins_max": 0.023511478677392006, |
|
"eval_rewards/margins_min": -0.012509307824075222, |
|
"eval_rewards/margins_std": 0.011862216517329216, |
|
"eval_rewards/rejected": 0.0010923919035121799, |
|
"eval_runtime": 444.4734, |
|
"eval_samples_per_second": 4.5, |
|
"eval_steps_per_second": 0.281, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 1.9556267546868065, |
|
"learning_rate": 4.945369001834514e-07, |
|
"logits/chosen": -2.8761301040649414, |
|
"logits/rejected": -2.8216347694396973, |
|
"logps/chosen": -287.6670837402344, |
|
"logps/rejected": -223.6521453857422, |
|
"loss": 0.6881, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.009855161421000957, |
|
"rewards/margins": 0.010875609703361988, |
|
"rewards/margins_max": 0.027963850647211075, |
|
"rewards/margins_min": -0.00028645730344578624, |
|
"rewards/margins_std": 0.012777927331626415, |
|
"rewards/rejected": -0.0010204474674537778, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 1.9579674466888082, |
|
"learning_rate": 4.915638921541951e-07, |
|
"logits/chosen": -2.8273448944091797, |
|
"logits/rejected": -2.8115439414978027, |
|
"logps/chosen": -257.2424011230469, |
|
"logps/rejected": -237.67904663085938, |
|
"loss": 0.6866, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": 0.00945218000560999, |
|
"rewards/margins": 0.012427609413862228, |
|
"rewards/margins_max": 0.033436521887779236, |
|
"rewards/margins_min": -0.0018920926377177238, |
|
"rewards/margins_std": 0.015944166108965874, |
|
"rewards/rejected": -0.0029754305724054575, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 1.5966946942385376, |
|
"learning_rate": 4.879606715117018e-07, |
|
"logits/chosen": -2.878997325897217, |
|
"logits/rejected": -2.8253443241119385, |
|
"logps/chosen": -275.1449890136719, |
|
"logps/rejected": -235.9576873779297, |
|
"loss": 0.6859, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.00955723412334919, |
|
"rewards/margins": 0.012582411989569664, |
|
"rewards/margins_max": 0.03178320452570915, |
|
"rewards/margins_min": -0.004044829867780209, |
|
"rewards/margins_std": 0.0158962644636631, |
|
"rewards/rejected": -0.003025178564712405, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 1.612892528929156, |
|
"learning_rate": 4.837366386472174e-07, |
|
"logits/chosen": -2.8901007175445557, |
|
"logits/rejected": -2.827146053314209, |
|
"logps/chosen": -297.2471618652344, |
|
"logps/rejected": -254.73318481445312, |
|
"loss": 0.686, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": 0.015631090849637985, |
|
"rewards/margins": 0.017191508784890175, |
|
"rewards/margins_max": 0.04147082567214966, |
|
"rewards/margins_min": -0.0013556934427469969, |
|
"rewards/margins_std": 0.019727854058146477, |
|
"rewards/rejected": -0.0015604153741151094, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 1.7836254409467616, |
|
"learning_rate": 4.789028135801918e-07, |
|
"logits/chosen": -2.847933769226074, |
|
"logits/rejected": -2.8365511894226074, |
|
"logps/chosen": -297.95208740234375, |
|
"logps/rejected": -272.38201904296875, |
|
"loss": 0.684, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.016203617677092552, |
|
"rewards/margins": 0.018767360597848892, |
|
"rewards/margins_max": 0.05341927334666252, |
|
"rewards/margins_min": -0.011627629399299622, |
|
"rewards/margins_std": 0.029866989701986313, |
|
"rewards/rejected": -0.0025637417566031218, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 8.25250622221843, |
|
"learning_rate": 4.7347180720830627e-07, |
|
"logits/chosen": -2.867987632751465, |
|
"logits/rejected": -2.7911014556884766, |
|
"logps/chosen": -300.35626220703125, |
|
"logps/rejected": -261.74908447265625, |
|
"loss": 0.6847, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 0.016810361295938492, |
|
"rewards/margins": 0.01963501051068306, |
|
"rewards/margins_max": 0.05201994627714157, |
|
"rewards/margins_min": -0.0029959846287965775, |
|
"rewards/margins_std": 0.024577533826231956, |
|
"rewards/rejected": -0.002824649680405855, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 1.8565831100637469, |
|
"learning_rate": 4.6745778840708107e-07, |
|
"logits/chosen": -2.7958624362945557, |
|
"logits/rejected": -2.783989429473877, |
|
"logps/chosen": -295.93438720703125, |
|
"logps/rejected": -227.43661499023438, |
|
"loss": 0.6779, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.024501120671629906, |
|
"rewards/margins": 0.02991095557808876, |
|
"rewards/margins_max": 0.06729420274496078, |
|
"rewards/margins_min": -0.003415555926039815, |
|
"rewards/margins_std": 0.03072194755077362, |
|
"rewards/rejected": -0.005409830249845982, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 1.8914594463949175, |
|
"learning_rate": 4.6087644706489703e-07, |
|
"logits/chosen": -2.8270645141601562, |
|
"logits/rejected": -2.787436008453369, |
|
"logps/chosen": -294.5668640136719, |
|
"logps/rejected": -255.82962036132812, |
|
"loss": 0.6816, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.01939496397972107, |
|
"rewards/margins": 0.02427856996655464, |
|
"rewards/margins_max": 0.059351809322834015, |
|
"rewards/margins_min": -0.008079716935753822, |
|
"rewards/margins_std": 0.03051997348666191, |
|
"rewards/rejected": -0.004883607849478722, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 1.724680638811993, |
|
"learning_rate": 4.537449531498687e-07, |
|
"logits/chosen": -2.767610788345337, |
|
"logits/rejected": -2.719372272491455, |
|
"logps/chosen": -281.36297607421875, |
|
"logps/rejected": -229.79306030273438, |
|
"loss": 0.6809, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.02440481074154377, |
|
"rewards/margins": 0.026266787201166153, |
|
"rewards/margins_max": 0.06888638436794281, |
|
"rewards/margins_min": -0.009829925373196602, |
|
"rewards/margins_std": 0.034937743097543716, |
|
"rewards/rejected": -0.0018619761103764176, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 1.587562818110368, |
|
"learning_rate": 4.4608191191535736e-07, |
|
"logits/chosen": -2.869741201400757, |
|
"logits/rejected": -2.8258328437805176, |
|
"logps/chosen": -275.5715637207031, |
|
"logps/rejected": -244.32763671875, |
|
"loss": 0.6792, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.02022332139313221, |
|
"rewards/margins": 0.026959722861647606, |
|
"rewards/margins_max": 0.07024272531270981, |
|
"rewards/margins_min": -0.013307643122971058, |
|
"rewards/margins_std": 0.037044934928417206, |
|
"rewards/rejected": -0.006736403796821833, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_logits/chosen": -2.7954251766204834, |
|
"eval_logits/rejected": -2.7569642066955566, |
|
"eval_logps/chosen": -283.2934875488281, |
|
"eval_logps/rejected": -258.5861511230469, |
|
"eval_loss": 0.6882554888725281, |
|
"eval_rewards/accuracies": 0.6370000243186951, |
|
"eval_rewards/chosen": 0.012999121099710464, |
|
"eval_rewards/margins": 0.013071166351437569, |
|
"eval_rewards/margins_max": 0.07187327742576599, |
|
"eval_rewards/margins_min": -0.03631452098488808, |
|
"eval_rewards/margins_std": 0.035920411348342896, |
|
"eval_rewards/rejected": -7.204585563158616e-05, |
|
"eval_runtime": 453.7395, |
|
"eval_samples_per_second": 4.408, |
|
"eval_steps_per_second": 0.275, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 1.7443006213577599, |
|
"learning_rate": 4.379073153609896e-07, |
|
"logits/chosen": -2.8446342945098877, |
|
"logits/rejected": -2.8066139221191406, |
|
"logps/chosen": -298.4417724609375, |
|
"logps/rejected": -260.84881591796875, |
|
"loss": 0.6787, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.028581559658050537, |
|
"rewards/margins": 0.03288479894399643, |
|
"rewards/margins_max": 0.08662258088588715, |
|
"rewards/margins_min": -0.013487100601196289, |
|
"rewards/margins_std": 0.04499204084277153, |
|
"rewards/rejected": -0.0043032425455749035, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 1.7530104764635095, |
|
"learning_rate": 4.292424900758128e-07, |
|
"logits/chosen": -2.8034873008728027, |
|
"logits/rejected": -2.775111675262451, |
|
"logps/chosen": -279.15478515625, |
|
"logps/rejected": -223.8057403564453, |
|
"loss": 0.6778, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.028650784865021706, |
|
"rewards/margins": 0.03896017372608185, |
|
"rewards/margins_max": 0.08817549049854279, |
|
"rewards/margins_min": -0.0010216787923127413, |
|
"rewards/margins_std": 0.04009110480546951, |
|
"rewards/rejected": -0.010309383273124695, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 1.6934833415236739, |
|
"learning_rate": 4.201100415996597e-07, |
|
"logits/chosen": -2.7763991355895996, |
|
"logits/rejected": -2.7445781230926514, |
|
"logps/chosen": -263.20074462890625, |
|
"logps/rejected": -261.6719970703125, |
|
"loss": 0.6785, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.030025389045476913, |
|
"rewards/margins": 0.031550828367471695, |
|
"rewards/margins_max": 0.09253410995006561, |
|
"rewards/margins_min": -0.01012241281569004, |
|
"rewards/margins_std": 0.04607797786593437, |
|
"rewards/rejected": -0.001525437692180276, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 1.9254815601584274, |
|
"learning_rate": 4.1053379544787557e-07, |
|
"logits/chosen": -2.8438620567321777, |
|
"logits/rejected": -2.8013641834259033, |
|
"logps/chosen": -286.17901611328125, |
|
"logps/rejected": -249.131591796875, |
|
"loss": 0.6777, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.03425859659910202, |
|
"rewards/margins": 0.04053955897688866, |
|
"rewards/margins_max": 0.09026241302490234, |
|
"rewards/margins_min": -0.004998114425688982, |
|
"rewards/margins_std": 0.043195050209760666, |
|
"rewards/rejected": -0.006280961446464062, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 1.773759451391351, |
|
"learning_rate": 4.0053873495326964e-07, |
|
"logits/chosen": -2.802912950515747, |
|
"logits/rejected": -2.7872378826141357, |
|
"logps/chosen": -243.8607177734375, |
|
"logps/rejected": -229.35678100585938, |
|
"loss": 0.674, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 0.026193741708993912, |
|
"rewards/margins": 0.03452175855636597, |
|
"rewards/margins_max": 0.08425874263048172, |
|
"rewards/margins_min": -0.007185367401689291, |
|
"rewards/margins_std": 0.04130570963025093, |
|
"rewards/rejected": -0.00832801777869463, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 1.89562327580133, |
|
"learning_rate": 3.9015093608745143e-07, |
|
"logits/chosen": -2.8584208488464355, |
|
"logits/rejected": -2.7942659854888916, |
|
"logps/chosen": -283.54974365234375, |
|
"logps/rejected": -242.9059295654297, |
|
"loss": 0.6717, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.03558926284313202, |
|
"rewards/margins": 0.0444478802382946, |
|
"rewards/margins_max": 0.11205202341079712, |
|
"rewards/margins_min": -0.004177084192633629, |
|
"rewards/margins_std": 0.05269969254732132, |
|
"rewards/rejected": -0.008858618326485157, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 2.30599597059098, |
|
"learning_rate": 3.79397499431599e-07, |
|
"logits/chosen": -2.8217787742614746, |
|
"logits/rejected": -2.7673909664154053, |
|
"logps/chosen": -291.3817138671875, |
|
"logps/rejected": -263.5201721191406, |
|
"loss": 0.672, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.04040498286485672, |
|
"rewards/margins": 0.04550846666097641, |
|
"rewards/margins_max": 0.10735081136226654, |
|
"rewards/margins_min": -0.009063487872481346, |
|
"rewards/margins_std": 0.051175691187381744, |
|
"rewards/rejected": -0.00510348379611969, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 1.6888761913438506, |
|
"learning_rate": 3.6830647947413694e-07, |
|
"logits/chosen": -2.88932728767395, |
|
"logits/rejected": -2.8418757915496826, |
|
"logps/chosen": -274.30218505859375, |
|
"logps/rejected": -248.6231231689453, |
|
"loss": 0.671, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.03000582382082939, |
|
"rewards/margins": 0.04024919122457504, |
|
"rewards/margins_max": 0.10416732728481293, |
|
"rewards/margins_min": -0.011732708662748337, |
|
"rewards/margins_std": 0.05244187265634537, |
|
"rewards/rejected": -0.0102433692663908, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 1.6649865105801362, |
|
"learning_rate": 3.5690681141977837e-07, |
|
"logits/chosen": -2.822051525115967, |
|
"logits/rejected": -2.7766671180725098, |
|
"logps/chosen": -253.5765380859375, |
|
"logps/rejected": -206.1942901611328, |
|
"loss": 0.6729, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.031104344874620438, |
|
"rewards/margins": 0.04246490076184273, |
|
"rewards/margins_max": 0.12114210426807404, |
|
"rewards/margins_min": -0.018173199146986008, |
|
"rewards/margins_std": 0.06279204040765762, |
|
"rewards/rejected": -0.011360556818544865, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 2.1922548116517397, |
|
"learning_rate": 3.4522823570088067e-07, |
|
"logits/chosen": -2.821171998977661, |
|
"logits/rejected": -2.808584451675415, |
|
"logps/chosen": -259.92913818359375, |
|
"logps/rejected": -254.540283203125, |
|
"loss": 0.6697, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.03274675831198692, |
|
"rewards/margins": 0.047333501279354095, |
|
"rewards/margins_max": 0.13525982201099396, |
|
"rewards/margins_min": -0.01479897927492857, |
|
"rewards/margins_std": 0.06737245619297028, |
|
"rewards/rejected": -0.01458674855530262, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_logits/chosen": -2.786345958709717, |
|
"eval_logits/rejected": -2.7488794326782227, |
|
"eval_logps/chosen": -282.5645751953125, |
|
"eval_logps/rejected": -258.747314453125, |
|
"eval_loss": 0.6848979592323303, |
|
"eval_rewards/accuracies": 0.6169999837875366, |
|
"eval_rewards/chosen": 0.020288635045289993, |
|
"eval_rewards/margins": 0.02197239361703396, |
|
"eval_rewards/margins_max": 0.1184224858880043, |
|
"eval_rewards/margins_min": -0.059722770005464554, |
|
"eval_rewards/margins_std": 0.05925743281841278, |
|
"eval_rewards/rejected": -0.0016837569419294596, |
|
"eval_runtime": 440.5364, |
|
"eval_samples_per_second": 4.54, |
|
"eval_steps_per_second": 0.284, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 1.884851957725623, |
|
"learning_rate": 3.3330122038805277e-07, |
|
"logits/chosen": -2.8432490825653076, |
|
"logits/rejected": -2.795261859893799, |
|
"logps/chosen": -272.4532470703125, |
|
"logps/rejected": -227.98348999023438, |
|
"loss": 0.6672, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.038232967257499695, |
|
"rewards/margins": 0.0511365607380867, |
|
"rewards/margins_max": 0.1235017329454422, |
|
"rewards/margins_min": -0.007081184536218643, |
|
"rewards/margins_std": 0.05993686243891716, |
|
"rewards/rejected": -0.012903591617941856, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 1.732519789963906, |
|
"learning_rate": 3.2115688170243734e-07, |
|
"logits/chosen": -2.831395387649536, |
|
"logits/rejected": -2.8028111457824707, |
|
"logps/chosen": -270.9764709472656, |
|
"logps/rejected": -252.1440887451172, |
|
"loss": 0.6681, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.037659503519535065, |
|
"rewards/margins": 0.05491337180137634, |
|
"rewards/margins_max": 0.132755845785141, |
|
"rewards/margins_min": -0.010344445705413818, |
|
"rewards/margins_std": 0.06564854830503464, |
|
"rewards/rejected": -0.01725386641919613, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 1.5242926911564552, |
|
"learning_rate": 3.088269028370435e-07, |
|
"logits/chosen": -2.8538591861724854, |
|
"logits/rejected": -2.8225364685058594, |
|
"logps/chosen": -295.54388427734375, |
|
"logps/rejected": -250.438232421875, |
|
"loss": 0.663, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 0.04337853938341141, |
|
"rewards/margins": 0.06583191454410553, |
|
"rewards/margins_max": 0.15647490322589874, |
|
"rewards/margins_min": -0.0003548143431544304, |
|
"rewards/margins_std": 0.07130275666713715, |
|
"rewards/rejected": -0.022453375160694122, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 1.6819074997159351, |
|
"learning_rate": 2.9634345129891294e-07, |
|
"logits/chosen": -2.80195689201355, |
|
"logits/rejected": -2.765192985534668, |
|
"logps/chosen": -288.1705627441406, |
|
"logps/rejected": -258.40667724609375, |
|
"loss": 0.6647, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": 0.04105386883020401, |
|
"rewards/margins": 0.06338892132043839, |
|
"rewards/margins_max": 0.1566300094127655, |
|
"rewards/margins_min": -0.008608223870396614, |
|
"rewards/margins_std": 0.0741976946592331, |
|
"rewards/rejected": -0.022335056215524673, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 1.9298838156481428, |
|
"learning_rate": 2.8373909498776744e-07, |
|
"logits/chosen": -2.8505892753601074, |
|
"logits/rejected": -2.8334896564483643, |
|
"logps/chosen": -280.2142028808594, |
|
"logps/rejected": -279.97857666015625, |
|
"loss": 0.6686, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": 0.03649730607867241, |
|
"rewards/margins": 0.05252464860677719, |
|
"rewards/margins_max": 0.12718316912651062, |
|
"rewards/margins_min": -0.00450880965217948, |
|
"rewards/margins_std": 0.061397988349199295, |
|
"rewards/rejected": -0.016027342528104782, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 1.9549755637356359, |
|
"learning_rate": 2.710467172300768e-07, |
|
"logits/chosen": -2.756687879562378, |
|
"logits/rejected": -2.71925687789917, |
|
"logps/chosen": -318.5474548339844, |
|
"logps/rejected": -252.8844757080078, |
|
"loss": 0.6597, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": 0.05029547959566116, |
|
"rewards/margins": 0.06931338459253311, |
|
"rewards/margins_max": 0.15269415080547333, |
|
"rewards/margins_min": 0.0053079272620379925, |
|
"rewards/margins_std": 0.06585155427455902, |
|
"rewards/rejected": -0.01901790127158165, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 2.265857763501263, |
|
"learning_rate": 2.582994309902146e-07, |
|
"logits/chosen": -2.7980430126190186, |
|
"logits/rejected": -2.7501606941223145, |
|
"logps/chosen": -305.3988342285156, |
|
"logps/rejected": -258.4122314453125, |
|
"loss": 0.6649, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.04739305004477501, |
|
"rewards/margins": 0.06349317729473114, |
|
"rewards/margins_max": 0.17864832282066345, |
|
"rewards/margins_min": -0.03638879954814911, |
|
"rewards/margins_std": 0.09656090289354324, |
|
"rewards/rejected": -0.01610013097524643, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 2.2730703155677743, |
|
"learning_rate": 2.455304924825151e-07, |
|
"logits/chosen": -2.813772678375244, |
|
"logits/rejected": -2.8081743717193604, |
|
"logps/chosen": -276.4326477050781, |
|
"logps/rejected": -260.1085510253906, |
|
"loss": 0.6546, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 0.044434770941734314, |
|
"rewards/margins": 0.07333989441394806, |
|
"rewards/margins_max": 0.16991741955280304, |
|
"rewards/margins_min": 0.007076957728713751, |
|
"rewards/margins_std": 0.07423131167888641, |
|
"rewards/rejected": -0.028905129060149193, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 1.8322064736841432, |
|
"learning_rate": 2.3277321440960732e-07, |
|
"logits/chosen": -2.811725616455078, |
|
"logits/rejected": -2.7685177326202393, |
|
"logps/chosen": -268.05914306640625, |
|
"logps/rejected": -266.083740234375, |
|
"loss": 0.6614, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": 0.04107924923300743, |
|
"rewards/margins": 0.06845887005329132, |
|
"rewards/margins_max": 0.18714329600334167, |
|
"rewards/margins_min": -0.010029973462224007, |
|
"rewards/margins_std": 0.08763924241065979, |
|
"rewards/rejected": -0.02737962268292904, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 2.128975347395003, |
|
"learning_rate": 2.2006087905337698e-07, |
|
"logits/chosen": -2.8421759605407715, |
|
"logits/rejected": -2.816889762878418, |
|
"logps/chosen": -239.36984252929688, |
|
"logps/rejected": -236.58364868164062, |
|
"loss": 0.6571, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 0.04006009176373482, |
|
"rewards/margins": 0.07172206044197083, |
|
"rewards/margins_max": 0.17035157978534698, |
|
"rewards/margins_min": -0.0009761411929503083, |
|
"rewards/margins_std": 0.07894248515367508, |
|
"rewards/rejected": -0.03166196495294571, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_logits/chosen": -2.7727160453796387, |
|
"eval_logits/rejected": -2.735231399536133, |
|
"eval_logps/chosen": -282.5451965332031, |
|
"eval_logps/rejected": -259.56536865234375, |
|
"eval_loss": 0.6818826198577881, |
|
"eval_rewards/accuracies": 0.6330000162124634, |
|
"eval_rewards/chosen": 0.02048237808048725, |
|
"eval_rewards/margins": 0.03034677356481552, |
|
"eval_rewards/margins_max": 0.15978190302848816, |
|
"eval_rewards/margins_min": -0.08066722005605698, |
|
"eval_rewards/margins_std": 0.08006121963262558, |
|
"eval_rewards/rejected": -0.00986439362168312, |
|
"eval_runtime": 428.8143, |
|
"eval_samples_per_second": 4.664, |
|
"eval_steps_per_second": 0.292, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 1.464979648644022, |
|
"learning_rate": 2.0742665144529372e-07, |
|
"logits/chosen": -2.8245086669921875, |
|
"logits/rejected": -2.7860279083251953, |
|
"logps/chosen": -308.59234619140625, |
|
"logps/rejected": -286.37725830078125, |
|
"loss": 0.6545, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.05577210336923599, |
|
"rewards/margins": 0.08082688599824905, |
|
"rewards/margins_max": 0.21429844200611115, |
|
"rewards/margins_min": -0.014085543341934681, |
|
"rewards/margins_std": 0.10368019342422485, |
|
"rewards/rejected": -0.02505478635430336, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 2.2565152886733024, |
|
"learning_rate": 1.9490349284263033e-07, |
|
"logits/chosen": -2.7606394290924072, |
|
"logits/rejected": -2.7328662872314453, |
|
"logps/chosen": -257.64727783203125, |
|
"logps/rejected": -247.62216186523438, |
|
"loss": 0.6651, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": 0.034510333091020584, |
|
"rewards/margins": 0.05616886541247368, |
|
"rewards/margins_max": 0.16494083404541016, |
|
"rewards/margins_min": -0.04768489673733711, |
|
"rewards/margins_std": 0.09831468015909195, |
|
"rewards/rejected": -0.021658534184098244, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 2.3425120835618545, |
|
"learning_rate": 1.8252407473630605e-07, |
|
"logits/chosen": -2.8338799476623535, |
|
"logits/rejected": -2.780625820159912, |
|
"logps/chosen": -296.00457763671875, |
|
"logps/rejected": -234.919189453125, |
|
"loss": 0.6593, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.05103176832199097, |
|
"rewards/margins": 0.07940290123224258, |
|
"rewards/margins_max": 0.18009448051452637, |
|
"rewards/margins_min": -0.008073708042502403, |
|
"rewards/margins_std": 0.08305726200342178, |
|
"rewards/rejected": -0.028371136635541916, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 1.9987304949927014, |
|
"learning_rate": 1.7032069361469764e-07, |
|
"logits/chosen": -2.7400341033935547, |
|
"logits/rejected": -2.7252862453460693, |
|
"logps/chosen": -228.4139404296875, |
|
"logps/rejected": -274.11822509765625, |
|
"loss": 0.6614, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": 0.04276793450117111, |
|
"rewards/margins": 0.0704236626625061, |
|
"rewards/margins_max": 0.18232765793800354, |
|
"rewards/margins_min": -0.0089184595271945, |
|
"rewards/margins_std": 0.08663706481456757, |
|
"rewards/rejected": -0.02765573561191559, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 1.5785697838984911, |
|
"learning_rate": 1.58325186705788e-07, |
|
"logits/chosen": -2.8510193824768066, |
|
"logits/rejected": -2.829686403274536, |
|
"logps/chosen": -269.9481506347656, |
|
"logps/rejected": -256.58563232421875, |
|
"loss": 0.6643, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.028526362031698227, |
|
"rewards/margins": 0.06000509113073349, |
|
"rewards/margins_max": 0.16429737210273743, |
|
"rewards/margins_min": -0.04647505283355713, |
|
"rewards/margins_std": 0.09461401402950287, |
|
"rewards/rejected": -0.03147872909903526, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 1.737884201823339, |
|
"learning_rate": 1.4656884891747395e-07, |
|
"logits/chosen": -2.7824554443359375, |
|
"logits/rejected": -2.7381398677825928, |
|
"logps/chosen": -283.9850158691406, |
|
"logps/rejected": -239.4892120361328, |
|
"loss": 0.6607, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.037972342222929, |
|
"rewards/margins": 0.07290490716695786, |
|
"rewards/margins_max": 0.1753791719675064, |
|
"rewards/margins_min": -0.014705635607242584, |
|
"rewards/margins_std": 0.0863322764635086, |
|
"rewards/rejected": -0.03493257611989975, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 1.8923794511223242, |
|
"learning_rate": 1.3508235119272466e-07, |
|
"logits/chosen": -2.8365092277526855, |
|
"logits/rejected": -2.7932674884796143, |
|
"logps/chosen": -324.05743408203125, |
|
"logps/rejected": -294.9861755371094, |
|
"loss": 0.6479, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.05230081081390381, |
|
"rewards/margins": 0.09341531991958618, |
|
"rewards/margins_max": 0.25887981057167053, |
|
"rewards/margins_min": -0.017766449600458145, |
|
"rewards/margins_std": 0.12934701144695282, |
|
"rewards/rejected": -0.04111451655626297, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 1.5554031948375129, |
|
"learning_rate": 1.2389566049259336e-07, |
|
"logits/chosen": -2.7813267707824707, |
|
"logits/rejected": -2.7599194049835205, |
|
"logps/chosen": -239.9017333984375, |
|
"logps/rejected": -236.3795928955078, |
|
"loss": 0.6554, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.04591267183423042, |
|
"rewards/margins": 0.08855243027210236, |
|
"rewards/margins_max": 0.20911240577697754, |
|
"rewards/margins_min": -0.007775710429996252, |
|
"rewards/margins_std": 0.09931908547878265, |
|
"rewards/rejected": -0.042639754712581635, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 2.494715637025419, |
|
"learning_rate": 1.1303796161583762e-07, |
|
"logits/chosen": -2.889634847640991, |
|
"logits/rejected": -2.8090715408325195, |
|
"logps/chosen": -317.6177062988281, |
|
"logps/rejected": -262.18414306640625, |
|
"loss": 0.6536, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": 0.06907807290554047, |
|
"rewards/margins": 0.10660415887832642, |
|
"rewards/margins_max": 0.251437246799469, |
|
"rewards/margins_min": -0.01369224488735199, |
|
"rewards/margins_std": 0.11950767040252686, |
|
"rewards/rejected": -0.03752607852220535, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 1.9249901065324473, |
|
"learning_rate": 1.0253758105911167e-07, |
|
"logits/chosen": -2.8541712760925293, |
|
"logits/rejected": -2.8100945949554443, |
|
"logps/chosen": -331.4628601074219, |
|
"logps/rejected": -291.27984619140625, |
|
"loss": 0.6508, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 0.07663814723491669, |
|
"rewards/margins": 0.11299224942922592, |
|
"rewards/margins_max": 0.2441258728504181, |
|
"rewards/margins_min": 0.010978538542985916, |
|
"rewards/margins_std": 0.10471439361572266, |
|
"rewards/rejected": -0.036354102194309235, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_logits/chosen": -2.768547296524048, |
|
"eval_logits/rejected": -2.7314090728759766, |
|
"eval_logps/chosen": -283.3109436035156, |
|
"eval_logps/rejected": -260.8111267089844, |
|
"eval_loss": 0.6802051067352295, |
|
"eval_rewards/accuracies": 0.6269999742507935, |
|
"eval_rewards/chosen": 0.012824743054807186, |
|
"eval_rewards/margins": 0.035146910697221756, |
|
"eval_rewards/margins_max": 0.1843804121017456, |
|
"eval_rewards/margins_min": -0.09391897916793823, |
|
"eval_rewards/margins_std": 0.09255214780569077, |
|
"eval_rewards/rejected": -0.022322168573737144, |
|
"eval_runtime": 449.4803, |
|
"eval_samples_per_second": 4.45, |
|
"eval_steps_per_second": 0.278, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 2.267072438870165, |
|
"learning_rate": 9.242191311637049e-08, |
|
"logits/chosen": -2.799065113067627, |
|
"logits/rejected": -2.7682583332061768, |
|
"logps/chosen": -279.16363525390625, |
|
"logps/rejected": -241.4091796875, |
|
"loss": 0.6585, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": 0.043598730117082596, |
|
"rewards/margins": 0.08321089297533035, |
|
"rewards/margins_max": 0.2153284102678299, |
|
"rewards/margins_min": -0.01286692637950182, |
|
"rewards/margins_std": 0.10618630796670914, |
|
"rewards/rejected": -0.03961215913295746, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 1.9414414953336778, |
|
"learning_rate": 8.271734841028552e-08, |
|
"logits/chosen": -2.8196640014648438, |
|
"logits/rejected": -2.747915029525757, |
|
"logps/chosen": -337.2265625, |
|
"logps/rejected": -270.56146240234375, |
|
"loss": 0.6494, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": 0.054617054760456085, |
|
"rewards/margins": 0.09917386621236801, |
|
"rewards/margins_max": 0.24081799387931824, |
|
"rewards/margins_min": -0.0010756913106888533, |
|
"rewards/margins_std": 0.11212246119976044, |
|
"rewards/rejected": -0.04455682262778282, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 1.952363376892767, |
|
"learning_rate": 7.344920504212243e-08, |
|
"logits/chosen": -2.8049850463867188, |
|
"logits/rejected": -2.7458760738372803, |
|
"logps/chosen": -263.7733154296875, |
|
"logps/rejected": -222.8297882080078, |
|
"loss": 0.6563, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.02564847469329834, |
|
"rewards/margins": 0.06961268186569214, |
|
"rewards/margins_max": 0.18673083186149597, |
|
"rewards/margins_min": -0.030092215165495872, |
|
"rewards/margins_std": 0.09981914609670639, |
|
"rewards/rejected": -0.0439642071723938, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 1.7758645410300213, |
|
"learning_rate": 6.46416625397067e-08, |
|
"logits/chosen": -2.801713466644287, |
|
"logits/rejected": -2.745816230773926, |
|
"logps/chosen": -321.74554443359375, |
|
"logps/rejected": -301.52325439453125, |
|
"loss": 0.6518, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.055145345628261566, |
|
"rewards/margins": 0.0942390188574791, |
|
"rewards/margins_max": 0.2147323191165924, |
|
"rewards/margins_min": -0.01778433658182621, |
|
"rewards/margins_std": 0.10189126431941986, |
|
"rewards/rejected": -0.03909367322921753, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 1.6203543941779939, |
|
"learning_rate": 5.6317698775795344e-08, |
|
"logits/chosen": -2.8398869037628174, |
|
"logits/rejected": -2.8026833534240723, |
|
"logps/chosen": -280.53826904296875, |
|
"logps/rejected": -275.3709411621094, |
|
"loss": 0.6538, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.03587827831506729, |
|
"rewards/margins": 0.06856991350650787, |
|
"rewards/margins_max": 0.1956954300403595, |
|
"rewards/margins_min": -0.03170696645975113, |
|
"rewards/margins_std": 0.10253816843032837, |
|
"rewards/rejected": -0.03269163519144058, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 1.8985615756457714, |
|
"learning_rate": 4.849903002143113e-08, |
|
"logits/chosen": -2.875913143157959, |
|
"logits/rejected": -2.8281137943267822, |
|
"logps/chosen": -329.9501037597656, |
|
"logps/rejected": -278.819580078125, |
|
"loss": 0.6436, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 0.06364526599645615, |
|
"rewards/margins": 0.11146412044763565, |
|
"rewards/margins_max": 0.2525936961174011, |
|
"rewards/margins_min": -0.006486054509878159, |
|
"rewards/margins_std": 0.11787240207195282, |
|
"rewards/rejected": -0.047818850725889206, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 1.7787852048841497, |
|
"learning_rate": 4.1206054290670535e-08, |
|
"logits/chosen": -2.827500104904175, |
|
"logits/rejected": -2.7943129539489746, |
|
"logps/chosen": -267.7633056640625, |
|
"logps/rejected": -264.48822021484375, |
|
"loss": 0.6537, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 0.05329934507608414, |
|
"rewards/margins": 0.09477965533733368, |
|
"rewards/margins_max": 0.20895680785179138, |
|
"rewards/margins_min": 0.0013521288055926561, |
|
"rewards/margins_std": 0.0940418690443039, |
|
"rewards/rejected": -0.041480325162410736, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 2.0202098422067625, |
|
"learning_rate": 3.44577981244944e-08, |
|
"logits/chosen": -2.7897348403930664, |
|
"logits/rejected": -2.777766704559326, |
|
"logps/chosen": -260.3923645019531, |
|
"logps/rejected": -262.2550964355469, |
|
"loss": 0.6591, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.034461263567209244, |
|
"rewards/margins": 0.07005412131547928, |
|
"rewards/margins_max": 0.2330094575881958, |
|
"rewards/margins_min": -0.05284310132265091, |
|
"rewards/margins_std": 0.1283414661884308, |
|
"rewards/rejected": -0.03559286147356033, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 1.9903127787741515, |
|
"learning_rate": 2.8271866952734814e-08, |
|
"logits/chosen": -2.8326079845428467, |
|
"logits/rejected": -2.8090529441833496, |
|
"logps/chosen": -304.954345703125, |
|
"logps/rejected": -290.8445129394531, |
|
"loss": 0.6574, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.037466924637556076, |
|
"rewards/margins": 0.07924413681030273, |
|
"rewards/margins_max": 0.22350183129310608, |
|
"rewards/margins_min": -0.030017787590622902, |
|
"rewards/margins_std": 0.11654887348413467, |
|
"rewards/rejected": -0.04177721589803696, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 2.150802586702874, |
|
"learning_rate": 2.2664399163518782e-08, |
|
"logits/chosen": -2.7674005031585693, |
|
"logits/rejected": -2.7506704330444336, |
|
"logps/chosen": -290.6200256347656, |
|
"logps/rejected": -253.0946044921875, |
|
"loss": 0.6444, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 0.06507185846567154, |
|
"rewards/margins": 0.10532490164041519, |
|
"rewards/margins_max": 0.24639275670051575, |
|
"rewards/margins_min": -0.00248835701495409, |
|
"rewards/margins_std": 0.11410228908061981, |
|
"rewards/rejected": -0.040253035724163055, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"eval_logits/chosen": -2.7678329944610596, |
|
"eval_logits/rejected": -2.7307965755462646, |
|
"eval_logps/chosen": -283.465576171875, |
|
"eval_logps/rejected": -261.1460266113281, |
|
"eval_loss": 0.6795856356620789, |
|
"eval_rewards/accuracies": 0.6230000257492065, |
|
"eval_rewards/chosen": 0.011278249323368073, |
|
"eval_rewards/margins": 0.036949291825294495, |
|
"eval_rewards/margins_max": 0.1936866044998169, |
|
"eval_rewards/margins_min": -0.09831613302230835, |
|
"eval_rewards/margins_std": 0.0972999855875969, |
|
"eval_rewards/rejected": -0.025671038776636124, |
|
"eval_runtime": 428.6504, |
|
"eval_samples_per_second": 4.666, |
|
"eval_steps_per_second": 0.292, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 1.4413450481375123, |
|
"learning_rate": 1.7650024000056414e-08, |
|
"logits/chosen": -2.750030517578125, |
|
"logits/rejected": -2.7483627796173096, |
|
"logps/chosen": -243.78244018554688, |
|
"logps/rejected": -249.83358764648438, |
|
"loss": 0.6525, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.04149278253316879, |
|
"rewards/margins": 0.08110538125038147, |
|
"rewards/margins_max": 0.20331616699695587, |
|
"rewards/margins_min": -0.0176301971077919, |
|
"rewards/margins_std": 0.09766165912151337, |
|
"rewards/rejected": -0.03961259126663208, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 1.9882715371125113, |
|
"learning_rate": 1.3241823394615437e-08, |
|
"logits/chosen": -2.872929811477661, |
|
"logits/rejected": -2.835972309112549, |
|
"logps/chosen": -306.00555419921875, |
|
"logps/rejected": -293.34014892578125, |
|
"loss": 0.6513, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": 0.05589998513460159, |
|
"rewards/margins": 0.10602303594350815, |
|
"rewards/margins_max": 0.22401413321495056, |
|
"rewards/margins_min": -0.0072759948670864105, |
|
"rewards/margins_std": 0.10588717460632324, |
|
"rewards/rejected": -0.05012305825948715, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 2.1198299570085473, |
|
"learning_rate": 9.451297839253913e-09, |
|
"logits/chosen": -2.749361276626587, |
|
"logits/rejected": -2.7258260250091553, |
|
"logps/chosen": -277.8694763183594, |
|
"logps/rejected": -285.40814208984375, |
|
"loss": 0.6532, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.05345848202705383, |
|
"rewards/margins": 0.10288135707378387, |
|
"rewards/margins_max": 0.24511775374412537, |
|
"rewards/margins_min": -0.012477993965148926, |
|
"rewards/margins_std": 0.11347142606973648, |
|
"rewards/rejected": -0.04942287132143974, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 1.5752496491228836, |
|
"learning_rate": 6.288336382349463e-09, |
|
"logits/chosen": -2.767456293106079, |
|
"logits/rejected": -2.73591685295105, |
|
"logps/chosen": -247.0557098388672, |
|
"logps/rejected": -214.10733032226562, |
|
"loss": 0.6441, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.04774732142686844, |
|
"rewards/margins": 0.09429731965065002, |
|
"rewards/margins_max": 0.22121095657348633, |
|
"rewards/margins_min": 0.002260456560179591, |
|
"rewards/margins_std": 0.10257305949926376, |
|
"rewards/rejected": -0.046550001949071884, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 2.9091264169866813, |
|
"learning_rate": 3.7611908292010665e-09, |
|
"logits/chosen": -2.8238768577575684, |
|
"logits/rejected": -2.7892489433288574, |
|
"logps/chosen": -310.2026672363281, |
|
"logps/rejected": -265.02740478515625, |
|
"loss": 0.6485, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.030774693936109543, |
|
"rewards/margins": 0.08213461190462112, |
|
"rewards/margins_max": 0.2235296070575714, |
|
"rewards/margins_min": -0.03506668284535408, |
|
"rewards/margins_std": 0.11517021805047989, |
|
"rewards/rejected": -0.05135990306735039, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 2.099534541726007, |
|
"learning_rate": 1.8764542140112527e-09, |
|
"logits/chosen": -2.767094135284424, |
|
"logits/rejected": -2.772709369659424, |
|
"logps/chosen": -222.73178100585938, |
|
"logps/rejected": -231.007080078125, |
|
"loss": 0.6568, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.018592417240142822, |
|
"rewards/margins": 0.06605438143014908, |
|
"rewards/margins_max": 0.18254120647907257, |
|
"rewards/margins_min": -0.025726070627570152, |
|
"rewards/margins_std": 0.09429889917373657, |
|
"rewards/rejected": -0.047461964190006256, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 2.1873875180344093, |
|
"learning_rate": 6.390435994127752e-10, |
|
"logits/chosen": -2.793241024017334, |
|
"logits/rejected": -2.760741710662842, |
|
"logps/chosen": -281.3489074707031, |
|
"logps/rejected": -301.64892578125, |
|
"loss": 0.6538, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.04053753241896629, |
|
"rewards/margins": 0.08871294558048248, |
|
"rewards/margins_max": 0.2136339396238327, |
|
"rewards/margins_min": -0.008021386340260506, |
|
"rewards/margins_std": 0.1009400486946106, |
|
"rewards/rejected": -0.04817541316151619, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 2.9786376127683343, |
|
"learning_rate": 5.2187248413465555e-11, |
|
"logits/chosen": -2.7850821018218994, |
|
"logits/rejected": -2.7602639198303223, |
|
"logps/chosen": -298.0012512207031, |
|
"logps/rejected": -265.384521484375, |
|
"loss": 0.6519, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.05158674716949463, |
|
"rewards/margins": 0.10068739950656891, |
|
"rewards/margins_max": 0.258645623922348, |
|
"rewards/margins_min": -0.020790638402104378, |
|
"rewards/margins_std": 0.1269642412662506, |
|
"rewards/rejected": -0.04910064488649368, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 684, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6679948979651021, |
|
"train_runtime": 9131.6092, |
|
"train_samples_per_second": 1.198, |
|
"train_steps_per_second": 0.075 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 684, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|