|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"eval_steps": 100, |
|
"global_step": 1065, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 2.1205010754043525, |
|
"learning_rate": 4.672897196261682e-08, |
|
"logits/chosen": -2.8477635383605957, |
|
"logits/rejected": -2.8469698429107666, |
|
"logps/chosen": -522.6112670898438, |
|
"logps/rejected": -359.48583984375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/margins_max": 0.0, |
|
"rewards/margins_min": 0.0, |
|
"rewards/margins_std": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 10.218544680897951, |
|
"learning_rate": 4.6728971962616824e-07, |
|
"logits/chosen": -2.9212379455566406, |
|
"logits/rejected": -2.7965469360351562, |
|
"logps/chosen": -313.4451904296875, |
|
"logps/rejected": -170.3771209716797, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.0002524534647818655, |
|
"rewards/margins": 0.0003799269034061581, |
|
"rewards/margins_max": 0.0016077507752925158, |
|
"rewards/margins_min": -0.0008478969684801996, |
|
"rewards/margins_std": 0.0017364051891490817, |
|
"rewards/rejected": -0.0001274734386242926, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 2.0408708876984667, |
|
"learning_rate": 9.345794392523365e-07, |
|
"logits/chosen": -2.7633142471313477, |
|
"logits/rejected": -2.7104804515838623, |
|
"logps/chosen": -380.93878173828125, |
|
"logps/rejected": -244.42214965820312, |
|
"loss": 0.6916, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.0017110242042690516, |
|
"rewards/margins": 0.002610816154628992, |
|
"rewards/margins_max": 0.004759171046316624, |
|
"rewards/margins_min": 0.0004624614375643432, |
|
"rewards/margins_std": 0.0030382319819182158, |
|
"rewards/rejected": -0.0008997917175292969, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 2.293731718484229, |
|
"learning_rate": 1.4018691588785047e-06, |
|
"logits/chosen": -2.8749966621398926, |
|
"logits/rejected": -2.8233141899108887, |
|
"logps/chosen": -375.4239196777344, |
|
"logps/rejected": -252.9129638671875, |
|
"loss": 0.687, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.0067976354621350765, |
|
"rewards/margins": 0.009298587217926979, |
|
"rewards/margins_max": 0.015676181763410568, |
|
"rewards/margins_min": 0.0029209901113063097, |
|
"rewards/margins_std": 0.009019283577799797, |
|
"rewards/rejected": -0.0025009517557919025, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 1.9265009094442067, |
|
"learning_rate": 1.869158878504673e-06, |
|
"logits/chosen": -2.7316184043884277, |
|
"logits/rejected": -2.7654078006744385, |
|
"logps/chosen": -305.0208740234375, |
|
"logps/rejected": -318.15576171875, |
|
"loss": 0.6783, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.01904786378145218, |
|
"rewards/margins": 0.02529343031346798, |
|
"rewards/margins_max": 0.03756815567612648, |
|
"rewards/margins_min": 0.013018706813454628, |
|
"rewards/margins_std": 0.017359081655740738, |
|
"rewards/rejected": -0.006245566997677088, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 2.2762718753507225, |
|
"learning_rate": 2.3364485981308413e-06, |
|
"logits/chosen": -2.7840142250061035, |
|
"logits/rejected": -2.695960521697998, |
|
"logps/chosen": -241.2890167236328, |
|
"logps/rejected": -175.4230194091797, |
|
"loss": 0.6612, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.039340294897556305, |
|
"rewards/margins": 0.05124547332525253, |
|
"rewards/margins_max": 0.07519420981407166, |
|
"rewards/margins_min": 0.027296727523207664, |
|
"rewards/margins_std": 0.03386863321065903, |
|
"rewards/rejected": -0.011905180290341377, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 2.278929693070735, |
|
"learning_rate": 2.8037383177570094e-06, |
|
"logits/chosen": -2.7337279319763184, |
|
"logits/rejected": -2.6699888706207275, |
|
"logps/chosen": -257.01812744140625, |
|
"logps/rejected": -237.2047119140625, |
|
"loss": 0.636, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.10417316108942032, |
|
"rewards/margins": 0.12125153839588165, |
|
"rewards/margins_max": 0.19414573907852173, |
|
"rewards/margins_min": 0.04835732653737068, |
|
"rewards/margins_std": 0.10308797657489777, |
|
"rewards/rejected": -0.01707836613059044, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 1.9261684067245632, |
|
"learning_rate": 3.2710280373831774e-06, |
|
"logits/chosen": -2.6452136039733887, |
|
"logits/rejected": -2.649742364883423, |
|
"logps/chosen": -320.9119567871094, |
|
"logps/rejected": -220.4650421142578, |
|
"loss": 0.6066, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.1302875578403473, |
|
"rewards/margins": 0.186918243765831, |
|
"rewards/margins_max": 0.2680404782295227, |
|
"rewards/margins_min": 0.10579605400562286, |
|
"rewards/margins_std": 0.11472412198781967, |
|
"rewards/rejected": -0.0566307008266449, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 1.899604093562728, |
|
"learning_rate": 3.738317757009346e-06, |
|
"logits/chosen": -2.856180191040039, |
|
"logits/rejected": -2.781043291091919, |
|
"logps/chosen": -324.0494079589844, |
|
"logps/rejected": -299.65643310546875, |
|
"loss": 0.5744, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.12999968230724335, |
|
"rewards/margins": 0.25530779361724854, |
|
"rewards/margins_max": 0.37520045042037964, |
|
"rewards/margins_min": 0.13541515171527863, |
|
"rewards/margins_std": 0.16955383121967316, |
|
"rewards/rejected": -0.12530812621116638, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 2.438635537156189, |
|
"learning_rate": 4.205607476635514e-06, |
|
"logits/chosen": -2.6444644927978516, |
|
"logits/rejected": -2.6486284732818604, |
|
"logps/chosen": -272.92718505859375, |
|
"logps/rejected": -228.8600616455078, |
|
"loss": 0.523, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.04048062115907669, |
|
"rewards/margins": 0.29693564772605896, |
|
"rewards/margins_max": 0.4845455288887024, |
|
"rewards/margins_min": 0.1093258485198021, |
|
"rewards/margins_std": 0.265320360660553, |
|
"rewards/rejected": -0.25645506381988525, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 2.676590355830037, |
|
"learning_rate": 4.6728971962616825e-06, |
|
"logits/chosen": -2.7964138984680176, |
|
"logits/rejected": -2.735548973083496, |
|
"logps/chosen": -437.5833435058594, |
|
"logps/rejected": -379.58123779296875, |
|
"loss": 0.4777, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.20675165951251984, |
|
"rewards/margins": 0.581081748008728, |
|
"rewards/margins_max": 0.8298590779304504, |
|
"rewards/margins_min": 0.3323042690753937, |
|
"rewards/margins_std": 0.3518243730068207, |
|
"rewards/rejected": -0.3743300139904022, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"eval_logits/chosen": -2.670954704284668, |
|
"eval_logits/rejected": -2.6312379837036133, |
|
"eval_logps/chosen": -321.22222900390625, |
|
"eval_logps/rejected": -301.6253967285156, |
|
"eval_loss": 0.6754581928253174, |
|
"eval_rewards/accuracies": 0.60317462682724, |
|
"eval_rewards/chosen": -0.3600099980831146, |
|
"eval_rewards/margins": 0.06441720575094223, |
|
"eval_rewards/margins_max": 0.35590171813964844, |
|
"eval_rewards/margins_min": -0.22098243236541748, |
|
"eval_rewards/margins_std": 0.25287726521492004, |
|
"eval_rewards/rejected": -0.42442721128463745, |
|
"eval_runtime": 283.3412, |
|
"eval_samples_per_second": 7.059, |
|
"eval_steps_per_second": 0.222, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 2.5201742608505686, |
|
"learning_rate": 4.999879018839288e-06, |
|
"logits/chosen": -2.637324810028076, |
|
"logits/rejected": -2.529784679412842, |
|
"logps/chosen": -315.1212158203125, |
|
"logps/rejected": -298.06903076171875, |
|
"loss": 0.4234, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.12577927112579346, |
|
"rewards/margins": 0.6422899961471558, |
|
"rewards/margins_max": 0.9393427968025208, |
|
"rewards/margins_min": 0.3452370762825012, |
|
"rewards/margins_std": 0.42009615898132324, |
|
"rewards/rejected": -0.5165106058120728, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 6.261552433653697, |
|
"learning_rate": 4.99772856836941e-06, |
|
"logits/chosen": -2.7266364097595215, |
|
"logits/rejected": -2.7145590782165527, |
|
"logps/chosen": -347.3783264160156, |
|
"logps/rejected": -389.63299560546875, |
|
"loss": 0.3956, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.24562442302703857, |
|
"rewards/margins": 0.8258479237556458, |
|
"rewards/margins_max": 1.141953468322754, |
|
"rewards/margins_min": 0.5097422003746033, |
|
"rewards/margins_std": 0.44704094529151917, |
|
"rewards/rejected": -0.5802234411239624, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 2.5117234961196413, |
|
"learning_rate": 4.992892309373227e-06, |
|
"logits/chosen": -2.5119540691375732, |
|
"logits/rejected": -2.4644391536712646, |
|
"logps/chosen": -370.6039733886719, |
|
"logps/rejected": -361.2594909667969, |
|
"loss": 0.3218, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.20368309319019318, |
|
"rewards/margins": 1.2330464124679565, |
|
"rewards/margins_max": 1.4150781631469727, |
|
"rewards/margins_min": 1.0510146617889404, |
|
"rewards/margins_std": 0.25743168592453003, |
|
"rewards/rejected": -1.0293633937835693, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 5.066809244826759, |
|
"learning_rate": 4.985375442281969e-06, |
|
"logits/chosen": -2.325155019760132, |
|
"logits/rejected": -2.2663826942443848, |
|
"logps/chosen": -366.98211669921875, |
|
"logps/rejected": -403.01495361328125, |
|
"loss": 0.2761, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.1577085703611374, |
|
"rewards/margins": 1.5553103685379028, |
|
"rewards/margins_max": 2.037226676940918, |
|
"rewards/margins_min": 1.0733940601348877, |
|
"rewards/margins_std": 0.681532621383667, |
|
"rewards/rejected": -1.7130190134048462, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 7.190427764349362, |
|
"learning_rate": 4.9751860499858175e-06, |
|
"logits/chosen": -2.1403324604034424, |
|
"logits/rejected": -2.041670560836792, |
|
"logps/chosen": -324.15667724609375, |
|
"logps/rejected": -441.0560607910156, |
|
"loss": 0.2399, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.27334439754486084, |
|
"rewards/margins": 1.659519910812378, |
|
"rewards/margins_max": 2.2249293327331543, |
|
"rewards/margins_min": 1.0941104888916016, |
|
"rewards/margins_std": 0.7996099591255188, |
|
"rewards/rejected": -1.9328645467758179, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 7.116224539942571, |
|
"learning_rate": 4.962335089142376e-06, |
|
"logits/chosen": -1.9535696506500244, |
|
"logits/rejected": -1.7718425989151, |
|
"logps/chosen": -358.6165466308594, |
|
"logps/rejected": -501.46856689453125, |
|
"loss": 0.1556, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.26896899938583374, |
|
"rewards/margins": 2.3143906593322754, |
|
"rewards/margins_max": 2.8530867099761963, |
|
"rewards/margins_min": 1.7756941318511963, |
|
"rewards/margins_std": 0.7618317008018494, |
|
"rewards/rejected": -2.5833592414855957, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 12.210481387434758, |
|
"learning_rate": 4.946836378394967e-06, |
|
"logits/chosen": -1.838096022605896, |
|
"logits/rejected": -1.5799922943115234, |
|
"logps/chosen": -445.1002502441406, |
|
"logps/rejected": -597.6307373046875, |
|
"loss": 0.1406, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4461892545223236, |
|
"rewards/margins": 3.19466233253479, |
|
"rewards/margins_max": 4.110939979553223, |
|
"rewards/margins_min": 2.2783844470977783, |
|
"rewards/margins_std": 1.2958126068115234, |
|
"rewards/rejected": -3.6408514976501465, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 27.562973883397905, |
|
"learning_rate": 4.928706583513441e-06, |
|
"logits/chosen": -1.3463890552520752, |
|
"logits/rejected": -1.2715332508087158, |
|
"logps/chosen": -605.5383911132812, |
|
"logps/rejected": -967.7098388671875, |
|
"loss": 0.1672, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -2.81402325630188, |
|
"rewards/margins": 3.0660033226013184, |
|
"rewards/margins_max": 3.8246688842773438, |
|
"rewards/margins_min": 2.307338237762451, |
|
"rewards/margins_std": 1.072914719581604, |
|
"rewards/rejected": -5.880026817321777, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 3.9080684244028343, |
|
"learning_rate": 4.907965199473471e-06, |
|
"logits/chosen": -1.3362934589385986, |
|
"logits/rejected": -1.0377042293548584, |
|
"logps/chosen": -732.0992431640625, |
|
"logps/rejected": -907.0653076171875, |
|
"loss": 0.131, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.7598698139190674, |
|
"rewards/margins": 4.08551549911499, |
|
"rewards/margins_max": 4.806515693664551, |
|
"rewards/margins_min": 3.3645145893096924, |
|
"rewards/margins_std": 1.019648551940918, |
|
"rewards/rejected": -6.8453850746154785, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 42.83035382744783, |
|
"learning_rate": 4.884634529493591e-06, |
|
"logits/chosen": -1.4783378839492798, |
|
"logits/rejected": -1.2933928966522217, |
|
"logps/chosen": -735.5909423828125, |
|
"logps/rejected": -1023.0391845703125, |
|
"loss": 0.1416, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.047953128814697, |
|
"rewards/margins": 4.137004375457764, |
|
"rewards/margins_max": 5.257144927978516, |
|
"rewards/margins_min": 3.0168652534484863, |
|
"rewards/margins_std": 1.5841166973114014, |
|
"rewards/rejected": -8.184958457946777, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"eval_logits/chosen": -1.4607926607131958, |
|
"eval_logits/rejected": -1.4055131673812866, |
|
"eval_logps/chosen": -955.6170043945312, |
|
"eval_logps/rejected": -980.7882080078125, |
|
"eval_loss": 0.9053447246551514, |
|
"eval_rewards/accuracies": 0.6269841194152832, |
|
"eval_rewards/chosen": -6.703957557678223, |
|
"eval_rewards/margins": 0.5120973587036133, |
|
"eval_rewards/margins_max": 2.7698452472686768, |
|
"eval_rewards/margins_min": -1.7983918190002441, |
|
"eval_rewards/margins_std": 2.0239174365997314, |
|
"eval_rewards/rejected": -7.216055393218994, |
|
"eval_runtime": 281.707, |
|
"eval_samples_per_second": 7.1, |
|
"eval_steps_per_second": 0.224, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 11.323675041923366, |
|
"learning_rate": 4.858739661052539e-06, |
|
"logits/chosen": -1.350990891456604, |
|
"logits/rejected": -1.2011955976486206, |
|
"logps/chosen": -738.5956420898438, |
|
"logps/rejected": -1072.1134033203125, |
|
"loss": 0.1359, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -3.817591905593872, |
|
"rewards/margins": 4.215450286865234, |
|
"rewards/margins_max": 6.099488735198975, |
|
"rewards/margins_min": 2.3314108848571777, |
|
"rewards/margins_std": 2.664433240890503, |
|
"rewards/rejected": -8.033041000366211, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 2.145861603880887, |
|
"learning_rate": 4.830308438912687e-06, |
|
"logits/chosen": -1.5942816734313965, |
|
"logits/rejected": -1.3603050708770752, |
|
"logps/chosen": -854.7412109375, |
|
"logps/rejected": -1243.659423828125, |
|
"loss": 0.0774, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.454717636108398, |
|
"rewards/margins": 5.1989240646362305, |
|
"rewards/margins_max": 6.37256383895874, |
|
"rewards/margins_min": 4.025284290313721, |
|
"rewards/margins_std": 1.6597778797149658, |
|
"rewards/rejected": -9.653641700744629, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 4.962012371252307, |
|
"learning_rate": 4.799371435178544e-06, |
|
"logits/chosen": -1.7452170848846436, |
|
"logits/rejected": -1.609167456626892, |
|
"logps/chosen": -769.598876953125, |
|
"logps/rejected": -1189.131103515625, |
|
"loss": 0.104, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.888404130935669, |
|
"rewards/margins": 4.6370439529418945, |
|
"rewards/margins_max": 5.980400085449219, |
|
"rewards/margins_min": 3.293687343597412, |
|
"rewards/margins_std": 1.8997926712036133, |
|
"rewards/rejected": -8.5254487991333, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 2.001005873458455, |
|
"learning_rate": 4.765961916422575e-06, |
|
"logits/chosen": -1.6597576141357422, |
|
"logits/rejected": -1.444551944732666, |
|
"logps/chosen": -838.1024169921875, |
|
"logps/rejected": -1238.279052734375, |
|
"loss": 0.0955, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -4.969546318054199, |
|
"rewards/margins": 4.475127696990967, |
|
"rewards/margins_max": 5.603785514831543, |
|
"rewards/margins_min": 3.346471071243286, |
|
"rewards/margins_std": 1.59616219997406, |
|
"rewards/rejected": -9.444674491882324, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 17.06427775193877, |
|
"learning_rate": 4.730115807913627e-06, |
|
"logits/chosen": -1.6722052097320557, |
|
"logits/rejected": -1.393259882926941, |
|
"logps/chosen": -916.7503662109375, |
|
"logps/rejected": -1274.2889404296875, |
|
"loss": 0.0866, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -5.015233039855957, |
|
"rewards/margins": 5.172359943389893, |
|
"rewards/margins_max": 6.111589431762695, |
|
"rewards/margins_min": 4.233129501342773, |
|
"rewards/margins_std": 1.328271508216858, |
|
"rewards/rejected": -10.187592506408691, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 1.9182916124757974, |
|
"learning_rate": 4.691871654986485e-06, |
|
"logits/chosen": -1.7107824087142944, |
|
"logits/rejected": -1.6128714084625244, |
|
"logps/chosen": -878.5494384765625, |
|
"logps/rejected": -1255.8555908203125, |
|
"loss": 0.079, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -5.5672712326049805, |
|
"rewards/margins": 4.748871803283691, |
|
"rewards/margins_max": 5.786838531494141, |
|
"rewards/margins_min": 3.7109055519104004, |
|
"rewards/margins_std": 1.4679062366485596, |
|
"rewards/rejected": -10.316143035888672, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 14.786553042508123, |
|
"learning_rate": 4.651270581594054e-06, |
|
"logits/chosen": -1.8650672435760498, |
|
"logits/rejected": -1.613443374633789, |
|
"logps/chosen": -834.0842895507812, |
|
"logps/rejected": -1138.3665771484375, |
|
"loss": 0.0875, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.050877571105957, |
|
"rewards/margins": 5.007403373718262, |
|
"rewards/margins_max": 5.84472131729126, |
|
"rewards/margins_min": 4.170086860656738, |
|
"rewards/margins_std": 1.184145212173462, |
|
"rewards/rejected": -9.058280944824219, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 5.30439894597876, |
|
"learning_rate": 4.6083562460867545e-06, |
|
"logits/chosen": -1.6716859340667725, |
|
"logits/rejected": -1.5429413318634033, |
|
"logps/chosen": -701.3162841796875, |
|
"logps/rejected": -1120.8736572265625, |
|
"loss": 0.0896, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -3.7223961353302, |
|
"rewards/margins": 4.8294267654418945, |
|
"rewards/margins_max": 6.9812211990356445, |
|
"rewards/margins_min": 2.6776328086853027, |
|
"rewards/margins_std": 3.0430965423583984, |
|
"rewards/rejected": -8.551824569702148, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 12.724182318476426, |
|
"learning_rate": 4.563174794266684e-06, |
|
"logits/chosen": -1.8460794687271118, |
|
"logits/rejected": -1.6377445459365845, |
|
"logps/chosen": -858.4215698242188, |
|
"logps/rejected": -1289.198974609375, |
|
"loss": 0.0576, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.965760231018066, |
|
"rewards/margins": 5.2121992111206055, |
|
"rewards/margins_max": 6.927371025085449, |
|
"rewards/margins_min": 3.49702525138855, |
|
"rewards/margins_std": 2.4256205558776855, |
|
"rewards/rejected": -10.177958488464355, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 5.778488241840074, |
|
"learning_rate": 4.5157748097670125e-06, |
|
"logits/chosen": -1.7077114582061768, |
|
"logits/rejected": -1.5558173656463623, |
|
"logps/chosen": -739.67333984375, |
|
"logps/rejected": -1423.210693359375, |
|
"loss": 0.0426, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.0192999839782715, |
|
"rewards/margins": 7.085653781890869, |
|
"rewards/margins_max": 7.969016075134277, |
|
"rewards/margins_min": 6.202291488647461, |
|
"rewards/margins_std": 1.2492637634277344, |
|
"rewards/rejected": -11.104954719543457, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"eval_logits/chosen": -1.7101370096206665, |
|
"eval_logits/rejected": -1.6507517099380493, |
|
"eval_logps/chosen": -1041.5823974609375, |
|
"eval_logps/rejected": -1121.1776123046875, |
|
"eval_loss": 0.9213338494300842, |
|
"eval_rewards/accuracies": 0.6785714030265808, |
|
"eval_rewards/chosen": -7.563611030578613, |
|
"eval_rewards/margins": 1.0563386678695679, |
|
"eval_rewards/margins_max": 4.265172481536865, |
|
"eval_rewards/margins_min": -2.1614327430725098, |
|
"eval_rewards/margins_std": 2.8564813137054443, |
|
"eval_rewards/rejected": -8.619950294494629, |
|
"eval_runtime": 281.7456, |
|
"eval_samples_per_second": 7.099, |
|
"eval_steps_per_second": 0.224, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 12.853675144552225, |
|
"learning_rate": 4.466207261809989e-06, |
|
"logits/chosen": -1.9336496591567993, |
|
"logits/rejected": -1.6221659183502197, |
|
"logps/chosen": -901.4439697265625, |
|
"logps/rejected": -1262.938720703125, |
|
"loss": 0.0633, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.771965980529785, |
|
"rewards/margins": 5.247581958770752, |
|
"rewards/margins_max": 6.526535987854004, |
|
"rewards/margins_min": 3.968628406524658, |
|
"rewards/margins_std": 1.8087135553359985, |
|
"rewards/rejected": -10.019546508789062, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 12.332833632235157, |
|
"learning_rate": 4.414525450399713e-06, |
|
"logits/chosen": -1.6821091175079346, |
|
"logits/rejected": -1.511785626411438, |
|
"logps/chosen": -956.3181762695312, |
|
"logps/rejected": -1481.1754150390625, |
|
"loss": 0.0978, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -6.100653171539307, |
|
"rewards/margins": 6.3301496505737305, |
|
"rewards/margins_max": 8.061585426330566, |
|
"rewards/margins_min": 4.598714828491211, |
|
"rewards/margins_std": 2.4486188888549805, |
|
"rewards/rejected": -12.430803298950195, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 3.9044155848949162, |
|
"learning_rate": 4.360784949008615e-06, |
|
"logits/chosen": -1.768561601638794, |
|
"logits/rejected": -1.5437813997268677, |
|
"logps/chosen": -1006.9339599609375, |
|
"logps/rejected": -1522.902587890625, |
|
"loss": 0.1091, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -6.272473335266113, |
|
"rewards/margins": 6.482227325439453, |
|
"rewards/margins_max": 8.401371002197266, |
|
"rewards/margins_min": 4.563082695007324, |
|
"rewards/margins_std": 2.7140800952911377, |
|
"rewards/rejected": -12.754700660705566, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 4.01171637277802, |
|
"learning_rate": 4.30504354481929e-06, |
|
"logits/chosen": -1.7665777206420898, |
|
"logits/rejected": -1.5484760999679565, |
|
"logps/chosen": -942.85888671875, |
|
"logps/rejected": -1260.244384765625, |
|
"loss": 0.0741, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -5.743631839752197, |
|
"rewards/margins": 4.815784931182861, |
|
"rewards/margins_max": 6.530648708343506, |
|
"rewards/margins_min": 3.1009204387664795, |
|
"rewards/margins_std": 2.425184488296509, |
|
"rewards/rejected": -10.559415817260742, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 12.659683176327913, |
|
"learning_rate": 4.247361176585904e-06, |
|
"logits/chosen": -1.831321120262146, |
|
"logits/rejected": -1.6549314260482788, |
|
"logps/chosen": -909.5006713867188, |
|
"logps/rejected": -1532.635986328125, |
|
"loss": 0.0943, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.810971736907959, |
|
"rewards/margins": 7.531504154205322, |
|
"rewards/margins_max": 8.548044204711914, |
|
"rewards/margins_min": 6.514962673187256, |
|
"rewards/margins_std": 1.4376055002212524, |
|
"rewards/rejected": -12.342476844787598, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"grad_norm": 3.001942641389469, |
|
"learning_rate": 4.187799870182038e-06, |
|
"logits/chosen": -1.7835716009140015, |
|
"logits/rejected": -1.5620241165161133, |
|
"logps/chosen": -896.9002075195312, |
|
"logps/rejected": -1392.6307373046875, |
|
"loss": 0.0555, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -5.5069427490234375, |
|
"rewards/margins": 6.391612529754639, |
|
"rewards/margins_max": 7.894322872161865, |
|
"rewards/margins_min": 4.888903617858887, |
|
"rewards/margins_std": 2.125152349472046, |
|
"rewards/rejected": -11.898555755615234, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"grad_norm": 34.14422714120664, |
|
"learning_rate": 4.1264236719042365e-06, |
|
"logits/chosen": -1.5919651985168457, |
|
"logits/rejected": -1.5377094745635986, |
|
"logps/chosen": -915.7950439453125, |
|
"logps/rejected": -1490.6865234375, |
|
"loss": 0.0808, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -5.6413750648498535, |
|
"rewards/margins": 6.627654075622559, |
|
"rewards/margins_max": 8.43530559539795, |
|
"rewards/margins_min": 4.820002555847168, |
|
"rewards/margins_std": 2.5564048290252686, |
|
"rewards/rejected": -12.26902961730957, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"grad_norm": 2.1290534012360847, |
|
"learning_rate": 4.063298579603001e-06, |
|
"logits/chosen": -1.8492443561553955, |
|
"logits/rejected": -1.5422757863998413, |
|
"logps/chosen": -937.0126953125, |
|
"logps/rejected": -1458.616455078125, |
|
"loss": 0.0231, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -5.460320949554443, |
|
"rewards/margins": 7.281059265136719, |
|
"rewards/margins_max": 8.49816608428955, |
|
"rewards/margins_min": 6.0639543533325195, |
|
"rewards/margins_std": 1.7212467193603516, |
|
"rewards/rejected": -12.74138069152832, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"grad_norm": 5.584775064800199, |
|
"learning_rate": 3.998492471715272e-06, |
|
"logits/chosen": -1.8397998809814453, |
|
"logits/rejected": -1.6857073307037354, |
|
"logps/chosen": -913.9352416992188, |
|
"logps/rejected": -1781.8939208984375, |
|
"loss": 0.0278, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -5.164222717285156, |
|
"rewards/margins": 9.338297843933105, |
|
"rewards/margins_max": 11.463502883911133, |
|
"rewards/margins_min": 7.2130937576293945, |
|
"rewards/margins_std": 3.005493640899658, |
|
"rewards/rejected": -14.502520561218262, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"grad_norm": 0.9893449328848739, |
|
"learning_rate": 3.932075034274723e-06, |
|
"logits/chosen": -1.5922348499298096, |
|
"logits/rejected": -1.4688727855682373, |
|
"logps/chosen": -871.9650268554688, |
|
"logps/rejected": -1526.658935546875, |
|
"loss": 0.0537, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -5.7322564125061035, |
|
"rewards/margins": 7.261972904205322, |
|
"rewards/margins_max": 8.895970344543457, |
|
"rewards/margins_min": 5.627974510192871, |
|
"rewards/margins_std": 2.3108224868774414, |
|
"rewards/rejected": -12.994227409362793, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"eval_logits/chosen": -1.6575742959976196, |
|
"eval_logits/rejected": -1.5926053524017334, |
|
"eval_logps/chosen": -1505.182861328125, |
|
"eval_logps/rejected": -1577.3876953125, |
|
"eval_loss": 1.1419050693511963, |
|
"eval_rewards/accuracies": 0.64682537317276, |
|
"eval_rewards/chosen": -12.199617385864258, |
|
"eval_rewards/margins": 0.9824325442314148, |
|
"eval_rewards/margins_max": 5.48787260055542, |
|
"eval_rewards/margins_min": -3.0621237754821777, |
|
"eval_rewards/margins_std": 3.7889323234558105, |
|
"eval_rewards/rejected": -13.182049751281738, |
|
"eval_runtime": 282.4562, |
|
"eval_samples_per_second": 7.081, |
|
"eval_steps_per_second": 0.223, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"grad_norm": 0.9794540017501292, |
|
"learning_rate": 3.864117685978339e-06, |
|
"logits/chosen": -1.6234560012817383, |
|
"logits/rejected": -1.4928052425384521, |
|
"logps/chosen": -1131.8265380859375, |
|
"logps/rejected": -1794.791015625, |
|
"loss": 0.0776, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -8.371360778808594, |
|
"rewards/margins": 7.494576454162598, |
|
"rewards/margins_max": 10.048029899597168, |
|
"rewards/margins_min": 4.941121578216553, |
|
"rewards/margins_std": 3.61112904548645, |
|
"rewards/rejected": -15.865939140319824, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"grad_norm": 5.020955613205059, |
|
"learning_rate": 3.794693501389861e-06, |
|
"logits/chosen": -1.7987747192382812, |
|
"logits/rejected": -1.6164734363555908, |
|
"logps/chosen": -1037.0328369140625, |
|
"logps/rejected": -1667.540283203125, |
|
"loss": 0.054, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -6.372786045074463, |
|
"rewards/margins": 7.646895408630371, |
|
"rewards/margins_max": 8.891626358032227, |
|
"rewards/margins_min": 6.402162075042725, |
|
"rewards/margins_std": 1.7603172063827515, |
|
"rewards/rejected": -14.019680976867676, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"grad_norm": 15.978168852619268, |
|
"learning_rate": 3.7238771323626822e-06, |
|
"logits/chosen": -1.6425611972808838, |
|
"logits/rejected": -1.4570006132125854, |
|
"logps/chosen": -1138.6572265625, |
|
"logps/rejected": -1780.6002197265625, |
|
"loss": 0.044, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -7.515681266784668, |
|
"rewards/margins": 7.655673027038574, |
|
"rewards/margins_max": 9.563043594360352, |
|
"rewards/margins_min": 5.748303413391113, |
|
"rewards/margins_std": 2.6974284648895264, |
|
"rewards/rejected": -15.171353340148926, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"grad_norm": 1.4394479904186748, |
|
"learning_rate": 3.651744727766676e-06, |
|
"logits/chosen": -1.565843939781189, |
|
"logits/rejected": -1.3031253814697266, |
|
"logps/chosen": -1135.116943359375, |
|
"logps/rejected": -1897.188232421875, |
|
"loss": 0.0356, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -8.198633193969727, |
|
"rewards/margins": 8.82483196258545, |
|
"rewards/margins_max": 11.5381441116333, |
|
"rewards/margins_min": 6.1115217208862305, |
|
"rewards/margins_std": 3.8372015953063965, |
|
"rewards/rejected": -17.023466110229492, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"grad_norm": 2.5233082457705853, |
|
"learning_rate": 3.57837385160529e-06, |
|
"logits/chosen": -1.6333341598510742, |
|
"logits/rejected": -1.419213056564331, |
|
"logps/chosen": -991.2794799804688, |
|
"logps/rejected": -1686.808837890625, |
|
"loss": 0.0246, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -6.5310492515563965, |
|
"rewards/margins": 7.6606926918029785, |
|
"rewards/margins_max": 9.670614242553711, |
|
"rewards/margins_min": 5.650770664215088, |
|
"rewards/margins_std": 2.842459201812744, |
|
"rewards/rejected": -14.191740036010742, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"grad_norm": 1.432241857413985, |
|
"learning_rate": 3.503843399610941e-06, |
|
"logits/chosen": -1.6662094593048096, |
|
"logits/rejected": -1.5159740447998047, |
|
"logps/chosen": -1023.26220703125, |
|
"logps/rejected": -1997.1787109375, |
|
"loss": 0.0208, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -6.279843330383301, |
|
"rewards/margins": 9.666014671325684, |
|
"rewards/margins_max": 11.908063888549805, |
|
"rewards/margins_min": 7.423966407775879, |
|
"rewards/margins_std": 3.1707358360290527, |
|
"rewards/rejected": -15.945857048034668, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"grad_norm": 1.3845844015706055, |
|
"learning_rate": 3.4282335144083985e-06, |
|
"logits/chosen": -1.5941836833953857, |
|
"logits/rejected": -1.34697425365448, |
|
"logps/chosen": -1180.2171630859375, |
|
"logps/rejected": -1964.836181640625, |
|
"loss": 0.0304, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -8.229662895202637, |
|
"rewards/margins": 9.211896896362305, |
|
"rewards/margins_max": 11.3733549118042, |
|
"rewards/margins_min": 7.050437927246094, |
|
"rewards/margins_std": 3.0567641258239746, |
|
"rewards/rejected": -17.441558837890625, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"grad_norm": 0.25091350074864577, |
|
"learning_rate": 3.351625499337395e-06, |
|
"logits/chosen": -1.7405236959457397, |
|
"logits/rejected": -1.4616386890411377, |
|
"logps/chosen": -1157.209716796875, |
|
"logps/rejected": -1899.130126953125, |
|
"loss": 0.014, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -7.413580417633057, |
|
"rewards/margins": 8.81358528137207, |
|
"rewards/margins_max": 10.952999114990234, |
|
"rewards/margins_min": 6.674172401428223, |
|
"rewards/margins_std": 3.0255870819091797, |
|
"rewards/rejected": -16.227169036865234, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"grad_norm": 1.9987349085330508, |
|
"learning_rate": 3.2741017310271056e-06, |
|
"logits/chosen": -1.3325449228286743, |
|
"logits/rejected": -1.044908881187439, |
|
"logps/chosen": -1130.028076171875, |
|
"logps/rejected": -2392.521728515625, |
|
"loss": 0.0448, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -9.127466201782227, |
|
"rewards/margins": 12.631993293762207, |
|
"rewards/margins_max": 19.333314895629883, |
|
"rewards/margins_min": 5.930669784545898, |
|
"rewards/margins_std": 9.47710132598877, |
|
"rewards/rejected": -21.759456634521484, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"grad_norm": 1.7094204242814826, |
|
"learning_rate": 3.195745570816532e-06, |
|
"logits/chosen": -1.3385294675827026, |
|
"logits/rejected": -1.144627571105957, |
|
"logps/chosen": -1425.61474609375, |
|
"logps/rejected": -2558.358642578125, |
|
"loss": 0.0197, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -10.41409969329834, |
|
"rewards/margins": 12.790387153625488, |
|
"rewards/margins_max": 14.778757095336914, |
|
"rewards/margins_min": 10.802019119262695, |
|
"rewards/margins_std": 2.811978340148926, |
|
"rewards/rejected": -23.204486846923828, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"eval_logits/chosen": -1.5026105642318726, |
|
"eval_logits/rejected": -1.4330366849899292, |
|
"eval_logps/chosen": -2000.166259765625, |
|
"eval_logps/rejected": -2146.479736328125, |
|
"eval_loss": 1.684375524520874, |
|
"eval_rewards/accuracies": 0.6666666865348816, |
|
"eval_rewards/chosen": -17.149450302124023, |
|
"eval_rewards/margins": 1.7235194444656372, |
|
"eval_rewards/margins_max": 9.41946029663086, |
|
"eval_rewards/margins_min": -5.146158218383789, |
|
"eval_rewards/margins_std": 6.577420711517334, |
|
"eval_rewards/rejected": -18.872970581054688, |
|
"eval_runtime": 282.6761, |
|
"eval_samples_per_second": 7.075, |
|
"eval_steps_per_second": 0.223, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"grad_norm": 19.195207569920772, |
|
"learning_rate": 3.116641275116018e-06, |
|
"logits/chosen": -1.2405312061309814, |
|
"logits/rejected": -0.9798258543014526, |
|
"logps/chosen": -1318.967041015625, |
|
"logps/rejected": -3077.10986328125, |
|
"loss": 0.0229, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -10.26286792755127, |
|
"rewards/margins": 17.355688095092773, |
|
"rewards/margins_max": 25.170244216918945, |
|
"rewards/margins_min": 9.541135787963867, |
|
"rewards/margins_std": 11.051448822021484, |
|
"rewards/rejected": -27.618555068969727, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"grad_norm": 18.23076880980296, |
|
"learning_rate": 3.0368739048062956e-06, |
|
"logits/chosen": -1.6826045513153076, |
|
"logits/rejected": -1.4554195404052734, |
|
"logps/chosen": -1159.925048828125, |
|
"logps/rejected": -2069.19580078125, |
|
"loss": 0.0355, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -8.183090209960938, |
|
"rewards/margins": 10.176679611206055, |
|
"rewards/margins_max": 13.777229309082031, |
|
"rewards/margins_min": 6.5761308670043945, |
|
"rewards/margins_std": 5.091946125030518, |
|
"rewards/rejected": -18.359769821166992, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"grad_norm": 7.345312333811953, |
|
"learning_rate": 2.956529233772492e-06, |
|
"logits/chosen": -1.6696984767913818, |
|
"logits/rejected": -1.566896915435791, |
|
"logps/chosen": -1206.398681640625, |
|
"logps/rejected": -2070.3857421875, |
|
"loss": 0.0184, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -8.273930549621582, |
|
"rewards/margins": 9.733041763305664, |
|
"rewards/margins_max": 12.174661636352539, |
|
"rewards/margins_min": 7.291422367095947, |
|
"rewards/margins_std": 3.4529712200164795, |
|
"rewards/rejected": -18.006973266601562, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"grad_norm": 21.78105244485373, |
|
"learning_rate": 2.8756936566714317e-06, |
|
"logits/chosen": -1.8572250604629517, |
|
"logits/rejected": -1.5829768180847168, |
|
"logps/chosen": -1132.333740234375, |
|
"logps/rejected": -1908.844970703125, |
|
"loss": 0.0256, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -7.327805519104004, |
|
"rewards/margins": 9.385960578918457, |
|
"rewards/margins_max": 10.629077911376953, |
|
"rewards/margins_min": 8.142843246459961, |
|
"rewards/margins_std": 1.7580335140228271, |
|
"rewards/rejected": -16.713764190673828, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"grad_norm": 0.0011589092808777935, |
|
"learning_rate": 2.794454096031429e-06, |
|
"logits/chosen": -1.7256653308868408, |
|
"logits/rejected": -1.5292785167694092, |
|
"logps/chosen": -1160.131591796875, |
|
"logps/rejected": -2000.1337890625, |
|
"loss": 0.0223, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -8.491829872131348, |
|
"rewards/margins": 8.8389892578125, |
|
"rewards/margins_max": 10.393911361694336, |
|
"rewards/margins_min": 7.284067630767822, |
|
"rewards/margins_std": 2.1989917755126953, |
|
"rewards/rejected": -17.33081817626953, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"grad_norm": 1.1029358007262624, |
|
"learning_rate": 2.71289790878446e-06, |
|
"logits/chosen": -1.5588399171829224, |
|
"logits/rejected": -1.3718044757843018, |
|
"logps/chosen": -1313.054443359375, |
|
"logps/rejected": -2318.33544921875, |
|
"loss": 0.0303, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -9.963714599609375, |
|
"rewards/margins": 9.831637382507324, |
|
"rewards/margins_max": 12.691813468933105, |
|
"rewards/margins_min": 6.971460819244385, |
|
"rewards/margins_std": 4.044900894165039, |
|
"rewards/rejected": -19.795352935791016, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"grad_norm": 0.032589510422147, |
|
"learning_rate": 2.6311127923312156e-06, |
|
"logits/chosen": -1.7382599115371704, |
|
"logits/rejected": -1.5052683353424072, |
|
"logps/chosen": -1249.270263671875, |
|
"logps/rejected": -2084.659912109375, |
|
"loss": 0.0177, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -8.170693397521973, |
|
"rewards/margins": 9.51733684539795, |
|
"rewards/margins_max": 11.196283340454102, |
|
"rewards/margins_min": 7.8383917808532715, |
|
"rewards/margins_std": 2.374387741088867, |
|
"rewards/rejected": -17.68802833557129, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"grad_norm": 12.99158263963332, |
|
"learning_rate": 2.549186690240057e-06, |
|
"logits/chosen": -1.610082983970642, |
|
"logits/rejected": -1.3717553615570068, |
|
"logps/chosen": -1186.931884765625, |
|
"logps/rejected": -2215.44970703125, |
|
"loss": 0.0096, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -8.884663581848145, |
|
"rewards/margins": 11.055347442626953, |
|
"rewards/margins_max": 13.794784545898438, |
|
"rewards/margins_min": 8.315912246704102, |
|
"rewards/margins_std": 3.874147891998291, |
|
"rewards/rejected": -19.94001007080078, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"grad_norm": 0.09893386521593805, |
|
"learning_rate": 2.4672076976812548e-06, |
|
"logits/chosen": -1.504370927810669, |
|
"logits/rejected": -1.24093759059906, |
|
"logps/chosen": -1294.6529541015625, |
|
"logps/rejected": -2374.53271484375, |
|
"loss": 0.0182, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -9.253921508789062, |
|
"rewards/margins": 11.56922721862793, |
|
"rewards/margins_max": 16.103586196899414, |
|
"rewards/margins_min": 7.034867763519287, |
|
"rewards/margins_std": 6.412552833557129, |
|
"rewards/rejected": -20.823148727416992, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"grad_norm": 1.4677452546622722, |
|
"learning_rate": 2.3852639666982218e-06, |
|
"logits/chosen": -1.5387322902679443, |
|
"logits/rejected": -1.3424365520477295, |
|
"logps/chosen": -1172.688232421875, |
|
"logps/rejected": -2390.56689453125, |
|
"loss": 0.0029, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -8.940356254577637, |
|
"rewards/margins": 12.360175132751465, |
|
"rewards/margins_max": 14.774116516113281, |
|
"rewards/margins_min": 9.946235656738281, |
|
"rewards/margins_std": 3.4138267040252686, |
|
"rewards/rejected": -21.300533294677734, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"eval_logits/chosen": -1.5330660343170166, |
|
"eval_logits/rejected": -1.4547291994094849, |
|
"eval_logps/chosen": -1739.8331298828125, |
|
"eval_logps/rejected": -2005.7900390625, |
|
"eval_loss": 1.9743393659591675, |
|
"eval_rewards/accuracies": 0.6865079402923584, |
|
"eval_rewards/chosen": -14.546117782592773, |
|
"eval_rewards/margins": 2.9199535846710205, |
|
"eval_rewards/margins_max": 12.400845527648926, |
|
"eval_rewards/margins_min": -5.716708660125732, |
|
"eval_rewards/margins_std": 8.164259910583496, |
|
"eval_rewards/rejected": -17.46607208251953, |
|
"eval_runtime": 281.995, |
|
"eval_samples_per_second": 7.092, |
|
"eval_steps_per_second": 0.223, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"grad_norm": 16.662428863900104, |
|
"learning_rate": 2.303443611417584e-06, |
|
"logits/chosen": -1.2892029285430908, |
|
"logits/rejected": -1.0749212503433228, |
|
"logps/chosen": -1583.099609375, |
|
"logps/rejected": -2742.760498046875, |
|
"loss": 0.3581, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -12.176101684570312, |
|
"rewards/margins": 12.56828498840332, |
|
"rewards/margins_max": 17.369625091552734, |
|
"rewards/margins_min": 7.766943454742432, |
|
"rewards/margins_std": 6.790121555328369, |
|
"rewards/rejected": -24.744388580322266, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"grad_norm": 0.1502185307527533, |
|
"learning_rate": 2.2218346133000264e-06, |
|
"logits/chosen": -1.1851621866226196, |
|
"logits/rejected": -0.8747516870498657, |
|
"logps/chosen": -1684.5989990234375, |
|
"logps/rejected": -2998.321044921875, |
|
"loss": 0.0851, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -13.733156204223633, |
|
"rewards/margins": 14.27801513671875, |
|
"rewards/margins_max": 20.737751007080078, |
|
"rewards/margins_min": 7.818281650543213, |
|
"rewards/margins_std": 9.135442733764648, |
|
"rewards/rejected": -28.011173248291016, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"grad_norm": 0.608737783564001, |
|
"learning_rate": 2.140524726533792e-06, |
|
"logits/chosen": -1.4635207653045654, |
|
"logits/rejected": -1.206559658050537, |
|
"logps/chosen": -1263.6993408203125, |
|
"logps/rejected": -2158.978759765625, |
|
"loss": 0.0474, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -8.689355850219727, |
|
"rewards/margins": 10.659037590026855, |
|
"rewards/margins_max": 13.989839553833008, |
|
"rewards/margins_min": 7.3282365798950195, |
|
"rewards/margins_std": 4.710465431213379, |
|
"rewards/rejected": -19.3483943939209, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"grad_norm": 37.51094566818964, |
|
"learning_rate": 2.059601383672566e-06, |
|
"logits/chosen": -1.6980371475219727, |
|
"logits/rejected": -1.5178521871566772, |
|
"logps/chosen": -964.2796630859375, |
|
"logps/rejected": -1743.4036865234375, |
|
"loss": 0.0669, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -6.6180419921875, |
|
"rewards/margins": 8.817036628723145, |
|
"rewards/margins_max": 10.244000434875488, |
|
"rewards/margins_min": 7.390072822570801, |
|
"rewards/margins_std": 2.018031597137451, |
|
"rewards/rejected": -15.435079574584961, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"grad_norm": 0.824336798291059, |
|
"learning_rate": 1.9791516016192214e-06, |
|
"logits/chosen": -1.8461487293243408, |
|
"logits/rejected": -1.5655087232589722, |
|
"logps/chosen": -941.0548706054688, |
|
"logps/rejected": -1621.322265625, |
|
"loss": 0.0587, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -6.0665507316589355, |
|
"rewards/margins": 7.918545722961426, |
|
"rewards/margins_max": 10.15103530883789, |
|
"rewards/margins_min": 5.6860551834106445, |
|
"rewards/margins_std": 3.157217502593994, |
|
"rewards/rejected": -13.985095024108887, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"grad_norm": 0.2329366656877762, |
|
"learning_rate": 1.8992618880565039e-06, |
|
"logits/chosen": -1.4127376079559326, |
|
"logits/rejected": -1.204310655593872, |
|
"logps/chosen": -974.7972412109375, |
|
"logps/rejected": -1706.96484375, |
|
"loss": 0.0472, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -6.5077385902404785, |
|
"rewards/margins": 8.661420822143555, |
|
"rewards/margins_max": 11.35025691986084, |
|
"rewards/margins_min": 5.972585678100586, |
|
"rewards/margins_std": 3.8025870323181152, |
|
"rewards/rejected": -15.169160842895508, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"grad_norm": 0.2766932797893532, |
|
"learning_rate": 1.8200181484252888e-06, |
|
"logits/chosen": -1.6775104999542236, |
|
"logits/rejected": -1.5603760480880737, |
|
"logps/chosen": -1146.943603515625, |
|
"logps/rejected": -2180.825927734375, |
|
"loss": 0.0303, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -7.641868591308594, |
|
"rewards/margins": 10.99293327331543, |
|
"rewards/margins_max": 14.466341018676758, |
|
"rewards/margins_min": 7.519525051116943, |
|
"rewards/margins_std": 4.912140369415283, |
|
"rewards/rejected": -18.634801864624023, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"grad_norm": 1.9894517252535326, |
|
"learning_rate": 1.7415055935504234e-06, |
|
"logits/chosen": -1.6779143810272217, |
|
"logits/rejected": -1.3088996410369873, |
|
"logps/chosen": -1250.79345703125, |
|
"logps/rejected": -2332.5302734375, |
|
"loss": 0.0268, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -8.574339866638184, |
|
"rewards/margins": 11.780553817749023, |
|
"rewards/margins_max": 17.217056274414062, |
|
"rewards/margins_min": 6.344052314758301, |
|
"rewards/margins_std": 7.688374996185303, |
|
"rewards/rejected": -20.35489273071289, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"grad_norm": 1.2264882447915335, |
|
"learning_rate": 1.6638086480134954e-06, |
|
"logits/chosen": -1.133843183517456, |
|
"logits/rejected": -0.9121431112289429, |
|
"logps/chosen": -1320.951171875, |
|
"logps/rejected": -2429.5537109375, |
|
"loss": 0.014, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -10.83985710144043, |
|
"rewards/margins": 12.160634994506836, |
|
"rewards/margins_max": 17.855926513671875, |
|
"rewards/margins_min": 6.465344429016113, |
|
"rewards/margins_std": 8.054357528686523, |
|
"rewards/rejected": -23.000492095947266, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"grad_norm": 4.223913353219136, |
|
"learning_rate": 1.5870108593710473e-06, |
|
"logits/chosen": -1.4314680099487305, |
|
"logits/rejected": -1.1393955945968628, |
|
"logps/chosen": -1421.0302734375, |
|
"logps/rejected": -2616.06005859375, |
|
"loss": 0.018, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -9.940652847290039, |
|
"rewards/margins": 14.069793701171875, |
|
"rewards/margins_max": 18.50979995727539, |
|
"rewards/margins_min": 9.62978744506836, |
|
"rewards/margins_std": 6.279117584228516, |
|
"rewards/rejected": -24.010446548461914, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"eval_logits/chosen": -1.4977593421936035, |
|
"eval_logits/rejected": -1.4133175611495972, |
|
"eval_logps/chosen": -1938.2783203125, |
|
"eval_logps/rejected": -2177.001708984375, |
|
"eval_loss": 1.8029882907867432, |
|
"eval_rewards/accuracies": 0.6785714030265808, |
|
"eval_rewards/chosen": -16.53057098388672, |
|
"eval_rewards/margins": 2.6476187705993652, |
|
"eval_rewards/margins_max": 11.230785369873047, |
|
"eval_rewards/margins_min": -5.27154541015625, |
|
"eval_rewards/margins_std": 7.43382453918457, |
|
"eval_rewards/rejected": -19.178190231323242, |
|
"eval_runtime": 282.2867, |
|
"eval_samples_per_second": 7.085, |
|
"eval_steps_per_second": 0.223, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 0.027200756028801846, |
|
"learning_rate": 1.511194808315853e-06, |
|
"logits/chosen": -1.4225877523422241, |
|
"logits/rejected": -1.1490380764007568, |
|
"logps/chosen": -1361.941162109375, |
|
"logps/rejected": -2227.452880859375, |
|
"loss": 0.0423, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -10.556672096252441, |
|
"rewards/margins": 9.88037109375, |
|
"rewards/margins_max": 13.63640022277832, |
|
"rewards/margins_min": 6.124342441558838, |
|
"rewards/margins_std": 5.311827182769775, |
|
"rewards/rejected": -20.437042236328125, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"grad_norm": 0.318786591879142, |
|
"learning_rate": 1.4364420198778662e-06, |
|
"logits/chosen": -1.5894582271575928, |
|
"logits/rejected": -1.3686472177505493, |
|
"logps/chosen": -1422.156005859375, |
|
"logps/rejected": -2683.84814453125, |
|
"loss": 0.0033, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -10.745410919189453, |
|
"rewards/margins": 12.789144515991211, |
|
"rewards/margins_max": 16.427227020263672, |
|
"rewards/margins_min": 9.15106201171875, |
|
"rewards/margins_std": 5.14502477645874, |
|
"rewards/rejected": -23.53455352783203, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"grad_norm": 1.5807231251466567, |
|
"learning_rate": 1.3628328757603243e-06, |
|
"logits/chosen": -1.6512333154678345, |
|
"logits/rejected": -1.3885473012924194, |
|
"logps/chosen": -1368.7022705078125, |
|
"logps/rejected": -2550.4912109375, |
|
"loss": 0.0091, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -9.459519386291504, |
|
"rewards/margins": 13.517751693725586, |
|
"rewards/margins_max": 18.180484771728516, |
|
"rewards/margins_min": 8.855023384094238, |
|
"rewards/margins_std": 6.5940961837768555, |
|
"rewards/rejected": -22.97727394104004, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"grad_norm": 0.1516893711186873, |
|
"learning_rate": 1.2904465279052725e-06, |
|
"logits/chosen": -1.6209065914154053, |
|
"logits/rejected": -1.351872444152832, |
|
"logps/chosen": -1231.8480224609375, |
|
"logps/rejected": -2237.622802734375, |
|
"loss": 0.0085, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -8.798944473266602, |
|
"rewards/margins": 11.324702262878418, |
|
"rewards/margins_max": 13.88591480255127, |
|
"rewards/margins_min": 8.763489723205566, |
|
"rewards/margins_std": 3.6221022605895996, |
|
"rewards/rejected": -20.123645782470703, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"grad_norm": 0.8035507691467565, |
|
"learning_rate": 1.219360813381446e-06, |
|
"logits/chosen": -1.247396469116211, |
|
"logits/rejected": -1.033151388168335, |
|
"logps/chosen": -1316.85546875, |
|
"logps/rejected": -2502.35400390625, |
|
"loss": 0.0042, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -11.282798767089844, |
|
"rewards/margins": 12.374329566955566, |
|
"rewards/margins_max": 16.396432876586914, |
|
"rewards/margins_min": 8.352226257324219, |
|
"rewards/margins_std": 5.688112258911133, |
|
"rewards/rejected": -23.657127380371094, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"grad_norm": 0.10201527009610997, |
|
"learning_rate": 1.1496521706860392e-06, |
|
"logits/chosen": -1.5233542919158936, |
|
"logits/rejected": -1.1838680505752563, |
|
"logps/chosen": -1417.0087890625, |
|
"logps/rejected": -2805.773681640625, |
|
"loss": 0.0051, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -10.839475631713867, |
|
"rewards/margins": 14.590258598327637, |
|
"rewards/margins_max": 17.661457061767578, |
|
"rewards/margins_min": 11.519063949584961, |
|
"rewards/margins_std": 4.343328475952148, |
|
"rewards/rejected": -25.429737091064453, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"grad_norm": 0.0015806759819360625, |
|
"learning_rate": 1.0813955575503588e-06, |
|
"logits/chosen": -1.355691909790039, |
|
"logits/rejected": -1.144424557685852, |
|
"logps/chosen": -1348.842041015625, |
|
"logps/rejected": -2898.0224609375, |
|
"loss": 0.0066, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -10.152058601379395, |
|
"rewards/margins": 16.251543045043945, |
|
"rewards/margins_max": 22.687950134277344, |
|
"rewards/margins_min": 9.815134048461914, |
|
"rewards/margins_std": 9.102456092834473, |
|
"rewards/rejected": -26.40359878540039, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"grad_norm": 0.408380187113466, |
|
"learning_rate": 1.0146643703377488e-06, |
|
"logits/chosen": -1.6056991815567017, |
|
"logits/rejected": -1.3266913890838623, |
|
"logps/chosen": -1298.9927978515625, |
|
"logps/rejected": -2409.390869140625, |
|
"loss": 0.0075, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -9.719507217407227, |
|
"rewards/margins": 12.09427261352539, |
|
"rewards/margins_max": 15.695422172546387, |
|
"rewards/margins_min": 8.493124008178711, |
|
"rewards/margins_std": 5.092793941497803, |
|
"rewards/rejected": -21.813779830932617, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"grad_norm": 0.001344347508367163, |
|
"learning_rate": 9.495303651204496e-07, |
|
"logits/chosen": -1.563906192779541, |
|
"logits/rejected": -1.3474560976028442, |
|
"logps/chosen": -1254.9219970703125, |
|
"logps/rejected": -2623.2822265625, |
|
"loss": 0.0009, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -8.889430046081543, |
|
"rewards/margins": 14.575796127319336, |
|
"rewards/margins_max": 18.69800567626953, |
|
"rewards/margins_min": 10.453584671020508, |
|
"rewards/margins_std": 5.829684734344482, |
|
"rewards/rejected": -23.465227127075195, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"grad_norm": 1.6920469977748351, |
|
"learning_rate": 8.860635805202616e-07, |
|
"logits/chosen": -1.551922082901001, |
|
"logits/rejected": -1.2580442428588867, |
|
"logps/chosen": -1456.9490966796875, |
|
"logps/rejected": -2604.62744140625, |
|
"loss": 0.0014, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -10.935505867004395, |
|
"rewards/margins": 12.657417297363281, |
|
"rewards/margins_max": 15.51282024383545, |
|
"rewards/margins_min": 9.802014350891113, |
|
"rewards/margins_std": 4.038149833679199, |
|
"rewards/rejected": -23.59292221069336, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"eval_logits/chosen": -1.5266377925872803, |
|
"eval_logits/rejected": -1.4433014392852783, |
|
"eval_logps/chosen": -1957.578857421875, |
|
"eval_logps/rejected": -2208.484375, |
|
"eval_loss": 1.8519227504730225, |
|
"eval_rewards/accuracies": 0.6746031641960144, |
|
"eval_rewards/chosen": -16.72357749938965, |
|
"eval_rewards/margins": 2.7694385051727295, |
|
"eval_rewards/margins_max": 11.662981033325195, |
|
"eval_rewards/margins_min": -5.304656982421875, |
|
"eval_rewards/margins_std": 7.62367582321167, |
|
"eval_rewards/rejected": -19.493017196655273, |
|
"eval_runtime": 282.5434, |
|
"eval_samples_per_second": 7.079, |
|
"eval_steps_per_second": 0.223, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"grad_norm": 3.2305387145726234, |
|
"learning_rate": 8.24332262395994e-07, |
|
"logits/chosen": -1.5742024183273315, |
|
"logits/rejected": -1.3343318700790405, |
|
"logps/chosen": -1459.0062255859375, |
|
"logps/rejected": -2835.21044921875, |
|
"loss": 0.0055, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -11.644388198852539, |
|
"rewards/margins": 14.268835067749023, |
|
"rewards/margins_max": 19.221527099609375, |
|
"rewards/margins_min": 9.316144943237305, |
|
"rewards/margins_std": 7.0041632652282715, |
|
"rewards/rejected": -25.913223266601562, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"grad_norm": 0.26542768442550385, |
|
"learning_rate": 7.644027904586587e-07, |
|
"logits/chosen": -1.50737726688385, |
|
"logits/rejected": -1.2445927858352661, |
|
"logps/chosen": -1452.3663330078125, |
|
"logps/rejected": -2697.02880859375, |
|
"loss": 0.0067, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -11.468404769897461, |
|
"rewards/margins": 13.425836563110352, |
|
"rewards/margins_max": 16.106616973876953, |
|
"rewards/margins_min": 10.745055198669434, |
|
"rewards/margins_std": 3.791196823120117, |
|
"rewards/rejected": -24.894241333007812, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"grad_norm": 0.8567763833713586, |
|
"learning_rate": 7.06339606893347e-07, |
|
"logits/chosen": -1.6803547143936157, |
|
"logits/rejected": -1.4048993587493896, |
|
"logps/chosen": -1588.3795166015625, |
|
"logps/rejected": -2856.94873046875, |
|
"loss": 0.0218, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -11.344830513000488, |
|
"rewards/margins": 14.68242073059082, |
|
"rewards/margins_max": 20.33969497680664, |
|
"rewards/margins_min": 9.025145530700684, |
|
"rewards/margins_std": 8.000594139099121, |
|
"rewards/rejected": -26.02724838256836, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"grad_norm": 0.19797390603665133, |
|
"learning_rate": 6.502051470645149e-07, |
|
"logits/chosen": -1.7654281854629517, |
|
"logits/rejected": -1.40230393409729, |
|
"logps/chosen": -1327.5189208984375, |
|
"logps/rejected": -2276.90771484375, |
|
"loss": 0.0218, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -9.360559463500977, |
|
"rewards/margins": 10.55632495880127, |
|
"rewards/margins_max": 12.99437141418457, |
|
"rewards/margins_min": 8.118279457092285, |
|
"rewards/margins_std": 3.4479167461395264, |
|
"rewards/rejected": -19.916885375976562, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"grad_norm": 0.0023467881665189677, |
|
"learning_rate": 5.960597723792194e-07, |
|
"logits/chosen": -1.5812981128692627, |
|
"logits/rejected": -1.1608024835586548, |
|
"logps/chosen": -1374.124267578125, |
|
"logps/rejected": -2819.462158203125, |
|
"loss": 0.0049, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -9.890588760375977, |
|
"rewards/margins": 15.723424911499023, |
|
"rewards/margins_max": 21.0240421295166, |
|
"rewards/margins_min": 10.422807693481445, |
|
"rewards/margins_std": 7.4962053298950195, |
|
"rewards/rejected": -25.614009857177734, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"grad_norm": 1.4084849928658003, |
|
"learning_rate": 5.43961705380465e-07, |
|
"logits/chosen": -1.646162986755371, |
|
"logits/rejected": -1.4091808795928955, |
|
"logps/chosen": -1218.2606201171875, |
|
"logps/rejected": -2409.643798828125, |
|
"loss": 0.0078, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -8.628401756286621, |
|
"rewards/margins": 12.78498649597168, |
|
"rewards/margins_max": 17.431535720825195, |
|
"rewards/margins_min": 8.138437271118164, |
|
"rewards/margins_std": 6.5712127685546875, |
|
"rewards/rejected": -21.413387298583984, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"grad_norm": 0.13595105985996128, |
|
"learning_rate": 4.939669671404871e-07, |
|
"logits/chosen": -1.5396533012390137, |
|
"logits/rejected": -1.2183513641357422, |
|
"logps/chosen": -1237.326904296875, |
|
"logps/rejected": -3156.015380859375, |
|
"loss": 0.0039, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -8.967730522155762, |
|
"rewards/margins": 19.433839797973633, |
|
"rewards/margins_max": 26.383316040039062, |
|
"rewards/margins_min": 12.484365463256836, |
|
"rewards/margins_std": 9.828042984008789, |
|
"rewards/rejected": -28.40157127380371, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"grad_norm": 0.012403182973777866, |
|
"learning_rate": 4.461293170212644e-07, |
|
"logits/chosen": -1.6268768310546875, |
|
"logits/rejected": -1.3297674655914307, |
|
"logps/chosen": -1231.2391357421875, |
|
"logps/rejected": -2482.310546875, |
|
"loss": 0.0125, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -9.140237808227539, |
|
"rewards/margins": 13.229069709777832, |
|
"rewards/margins_max": 16.058679580688477, |
|
"rewards/margins_min": 10.399457931518555, |
|
"rewards/margins_std": 4.001674175262451, |
|
"rewards/rejected": -22.369308471679688, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"grad_norm": 5.925107209728559, |
|
"learning_rate": 4.005001948670606e-07, |
|
"logits/chosen": -1.7953965663909912, |
|
"logits/rejected": -1.5808696746826172, |
|
"logps/chosen": -1377.26611328125, |
|
"logps/rejected": -2234.20849609375, |
|
"loss": 0.0043, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -9.003216743469238, |
|
"rewards/margins": 10.078218460083008, |
|
"rewards/margins_max": 11.774847030639648, |
|
"rewards/margins_min": 8.381589889526367, |
|
"rewards/margins_std": 2.39939546585083, |
|
"rewards/rejected": -19.08143424987793, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"grad_norm": 0.0018034560654693567, |
|
"learning_rate": 3.571286656911377e-07, |
|
"logits/chosen": -1.6509956121444702, |
|
"logits/rejected": -1.2617855072021484, |
|
"logps/chosen": -1374.924072265625, |
|
"logps/rejected": -2686.83154296875, |
|
"loss": 0.0034, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -9.74584674835205, |
|
"rewards/margins": 14.469047546386719, |
|
"rewards/margins_max": 20.866533279418945, |
|
"rewards/margins_min": 8.071561813354492, |
|
"rewards/margins_std": 9.04741096496582, |
|
"rewards/rejected": -24.214895248413086, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"eval_logits/chosen": -1.5324345827102661, |
|
"eval_logits/rejected": -1.4488511085510254, |
|
"eval_logps/chosen": -1899.9781494140625, |
|
"eval_logps/rejected": -2137.156982421875, |
|
"eval_loss": 1.6798701286315918, |
|
"eval_rewards/accuracies": 0.6865079402923584, |
|
"eval_rewards/chosen": -16.14756965637207, |
|
"eval_rewards/margins": 2.632173776626587, |
|
"eval_rewards/margins_max": 10.763092994689941, |
|
"eval_rewards/margins_min": -4.875840663909912, |
|
"eval_rewards/margins_std": 7.033862590789795, |
|
"eval_rewards/rejected": -18.77974510192871, |
|
"eval_runtime": 281.9065, |
|
"eval_samples_per_second": 7.095, |
|
"eval_steps_per_second": 0.223, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"grad_norm": 0.39851941407344293, |
|
"learning_rate": 3.1606136691612555e-07, |
|
"logits/chosen": -1.7041774988174438, |
|
"logits/rejected": -1.4187756776809692, |
|
"logps/chosen": -1301.1878662109375, |
|
"logps/rejected": -2172.826904296875, |
|
"loss": 0.001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -8.820059776306152, |
|
"rewards/margins": 10.524114608764648, |
|
"rewards/margins_max": 12.688272476196289, |
|
"rewards/margins_min": 8.359955787658691, |
|
"rewards/margins_std": 3.060582160949707, |
|
"rewards/rejected": -19.344173431396484, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"grad_norm": 0.0005374838985619683, |
|
"learning_rate": 2.773424582247844e-07, |
|
"logits/chosen": -1.5690796375274658, |
|
"logits/rejected": -1.2215526103973389, |
|
"logps/chosen": -1358.075927734375, |
|
"logps/rejected": -2381.899169921875, |
|
"loss": 0.0024, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -9.972057342529297, |
|
"rewards/margins": 11.921777725219727, |
|
"rewards/margins_max": 14.729642868041992, |
|
"rewards/margins_min": 9.113912582397461, |
|
"rewards/margins_std": 3.970921754837036, |
|
"rewards/rejected": -21.893835067749023, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"grad_norm": 0.8257494267996711, |
|
"learning_rate": 2.410135740750821e-07, |
|
"logits/chosen": -1.5338929891586304, |
|
"logits/rejected": -1.259865164756775, |
|
"logps/chosen": -1410.4990234375, |
|
"logps/rejected": -2998.914794921875, |
|
"loss": 0.0099, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -10.63892936706543, |
|
"rewards/margins": 16.653705596923828, |
|
"rewards/margins_max": 21.365177154541016, |
|
"rewards/margins_min": 11.942238807678223, |
|
"rewards/margins_std": 6.663023471832275, |
|
"rewards/rejected": -27.29263687133789, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"grad_norm": 0.06916221157748438, |
|
"learning_rate": 2.0711377893064182e-07, |
|
"logits/chosen": -1.5516988039016724, |
|
"logits/rejected": -1.2729582786560059, |
|
"logps/chosen": -1308.211669921875, |
|
"logps/rejected": -2490.35693359375, |
|
"loss": 0.0053, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -9.68997573852539, |
|
"rewards/margins": 13.111665725708008, |
|
"rewards/margins_max": 18.273632049560547, |
|
"rewards/margins_min": 7.9496965408325195, |
|
"rewards/margins_std": 7.300126075744629, |
|
"rewards/rejected": -22.801639556884766, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"grad_norm": 2.498417925921994, |
|
"learning_rate": 1.756795252547111e-07, |
|
"logits/chosen": -1.4785737991333008, |
|
"logits/rejected": -1.2068592309951782, |
|
"logps/chosen": -1470.0135498046875, |
|
"logps/rejected": -2859.243408203125, |
|
"loss": 0.0078, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -11.678686141967773, |
|
"rewards/margins": 14.885587692260742, |
|
"rewards/margins_max": 18.92436981201172, |
|
"rewards/margins_min": 10.846805572509766, |
|
"rewards/margins_std": 5.7117018699646, |
|
"rewards/rejected": -26.564273834228516, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"grad_norm": 0.30835027385045066, |
|
"learning_rate": 1.4674461431281013e-07, |
|
"logits/chosen": -1.6750847101211548, |
|
"logits/rejected": -1.3757655620574951, |
|
"logps/chosen": -1276.86669921875, |
|
"logps/rejected": -2703.418701171875, |
|
"loss": 0.0151, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -9.499726295471191, |
|
"rewards/margins": 15.09521198272705, |
|
"rewards/margins_max": 21.079849243164062, |
|
"rewards/margins_min": 9.11056900024414, |
|
"rewards/margins_std": 8.463561058044434, |
|
"rewards/rejected": -24.59493637084961, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"grad_norm": 0.23235990194938522, |
|
"learning_rate": 1.2034015982622243e-07, |
|
"logits/chosen": -1.5666346549987793, |
|
"logits/rejected": -1.2590982913970947, |
|
"logps/chosen": -1482.5379638671875, |
|
"logps/rejected": -2852.9375, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -11.074012756347656, |
|
"rewards/margins": 14.420585632324219, |
|
"rewards/margins_max": 18.83799934387207, |
|
"rewards/margins_min": 10.003174781799316, |
|
"rewards/margins_std": 6.24716329574585, |
|
"rewards/rejected": -25.494598388671875, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"grad_norm": 0.003130078676672441, |
|
"learning_rate": 9.649455451539419e-08, |
|
"logits/chosen": -1.2376658916473389, |
|
"logits/rejected": -0.9727104306221008, |
|
"logps/chosen": -1320.026123046875, |
|
"logps/rejected": -2890.248291015625, |
|
"loss": 0.0043, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -10.823871612548828, |
|
"rewards/margins": 16.33503532409668, |
|
"rewards/margins_max": 22.118406295776367, |
|
"rewards/margins_min": 10.551666259765625, |
|
"rewards/margins_std": 8.178921699523926, |
|
"rewards/rejected": -27.15890884399414, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"grad_norm": 0.01106748013868886, |
|
"learning_rate": 7.523343956923196e-08, |
|
"logits/chosen": -1.6014173030853271, |
|
"logits/rejected": -1.3725566864013672, |
|
"logps/chosen": -1455.7508544921875, |
|
"logps/rejected": -2784.856201171875, |
|
"loss": 0.0014, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -11.036726951599121, |
|
"rewards/margins": 13.958398818969727, |
|
"rewards/margins_max": 18.721614837646484, |
|
"rewards/margins_min": 9.19517993927002, |
|
"rewards/margins_std": 6.736205101013184, |
|
"rewards/rejected": -24.995126724243164, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"grad_norm": 0.21777107682252947, |
|
"learning_rate": 5.657967707312195e-08, |
|
"logits/chosen": -1.4147546291351318, |
|
"logits/rejected": -1.2533682584762573, |
|
"logps/chosen": -1340.80859375, |
|
"logps/rejected": -2710.937255859375, |
|
"loss": 0.0118, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -10.588825225830078, |
|
"rewards/margins": 13.658398628234863, |
|
"rewards/margins_max": 17.033788681030273, |
|
"rewards/margins_min": 10.28300666809082, |
|
"rewards/margins_std": 4.773523807525635, |
|
"rewards/rejected": -24.247220993041992, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"eval_logits/chosen": -1.51563560962677, |
|
"eval_logits/rejected": -1.4296027421951294, |
|
"eval_logps/chosen": -1952.324462890625, |
|
"eval_logps/rejected": -2219.474609375, |
|
"eval_loss": 1.8351484537124634, |
|
"eval_rewards/accuracies": 0.682539701461792, |
|
"eval_rewards/chosen": -16.671031951904297, |
|
"eval_rewards/margins": 2.931889057159424, |
|
"eval_rewards/margins_max": 11.962862014770508, |
|
"eval_rewards/margins_min": -5.289890766143799, |
|
"eval_rewards/margins_std": 7.766205787658691, |
|
"eval_rewards/rejected": -19.602922439575195, |
|
"eval_runtime": 281.5027, |
|
"eval_samples_per_second": 7.105, |
|
"eval_steps_per_second": 0.224, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"grad_norm": 0.4419550733032763, |
|
"learning_rate": 4.055332542531959e-08, |
|
"logits/chosen": -1.5433815717697144, |
|
"logits/rejected": -1.295972228050232, |
|
"logps/chosen": -1293.6630859375, |
|
"logps/rejected": -2648.736572265625, |
|
"loss": 0.0096, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -9.779963493347168, |
|
"rewards/margins": 14.112527847290039, |
|
"rewards/margins_max": 18.39639663696289, |
|
"rewards/margins_min": 9.828656196594238, |
|
"rewards/margins_std": 6.058306694030762, |
|
"rewards/rejected": -23.89249038696289, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"grad_norm": 0.14005943320430667, |
|
"learning_rate": 2.7171617768147472e-08, |
|
"logits/chosen": -1.398990273475647, |
|
"logits/rejected": -1.063157320022583, |
|
"logps/chosen": -1454.0186767578125, |
|
"logps/rejected": -2948.3251953125, |
|
"loss": 0.0009, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -11.425373077392578, |
|
"rewards/margins": 15.727473258972168, |
|
"rewards/margins_max": 20.60434341430664, |
|
"rewards/margins_min": 10.850606918334961, |
|
"rewards/margins_std": 6.896933078765869, |
|
"rewards/rejected": -27.152847290039062, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"grad_norm": 0.2626213621970617, |
|
"learning_rate": 1.6448943457189616e-08, |
|
"logits/chosen": -1.5582804679870605, |
|
"logits/rejected": -1.3218994140625, |
|
"logps/chosen": -1478.698974609375, |
|
"logps/rejected": -2884.353271484375, |
|
"loss": 0.0057, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -11.135309219360352, |
|
"rewards/margins": 14.943025588989258, |
|
"rewards/margins_max": 20.703128814697266, |
|
"rewards/margins_min": 9.1829195022583, |
|
"rewards/margins_std": 8.146018981933594, |
|
"rewards/rejected": -26.07833480834961, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"grad_norm": 2.8326701528782565, |
|
"learning_rate": 8.39683258841123e-09, |
|
"logits/chosen": -1.5044890642166138, |
|
"logits/rejected": -1.2109694480895996, |
|
"logps/chosen": -1402.8773193359375, |
|
"logps/rejected": -2849.219970703125, |
|
"loss": 0.0062, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -10.566572189331055, |
|
"rewards/margins": 15.638870239257812, |
|
"rewards/margins_max": 20.092174530029297, |
|
"rewards/margins_min": 11.185564041137695, |
|
"rewards/margins_std": 6.297926425933838, |
|
"rewards/rejected": -26.2054443359375, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"grad_norm": 0.3213477153635432, |
|
"learning_rate": 3.0239435998430376e-09, |
|
"logits/chosen": -1.4634066820144653, |
|
"logits/rejected": -1.1483074426651, |
|
"logps/chosen": -1369.406494140625, |
|
"logps/rejected": -2688.2548828125, |
|
"loss": 0.0034, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -10.378218650817871, |
|
"rewards/margins": 13.92640495300293, |
|
"rewards/margins_max": 18.696613311767578, |
|
"rewards/margins_min": 9.156195640563965, |
|
"rewards/margins_std": 6.746094703674316, |
|
"rewards/rejected": -24.304622650146484, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"grad_norm": 0.31694097428400714, |
|
"learning_rate": 3.3605396115826695e-10, |
|
"logits/chosen": -1.4050662517547607, |
|
"logits/rejected": -1.1527583599090576, |
|
"logps/chosen": -1549.754150390625, |
|
"logps/rejected": -2639.6474609375, |
|
"loss": 0.0027, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -12.238971710205078, |
|
"rewards/margins": 12.063154220581055, |
|
"rewards/margins_max": 15.284955978393555, |
|
"rewards/margins_min": 8.841352462768555, |
|
"rewards/margins_std": 4.5563154220581055, |
|
"rewards/rejected": -24.302127838134766, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 1065, |
|
"total_flos": 0.0, |
|
"train_loss": 0.1103198329137612, |
|
"train_runtime": 9245.0119, |
|
"train_samples_per_second": 1.843, |
|
"train_steps_per_second": 0.115 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1065, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|