phi3m0128-cds-0.8-kendall-onof-neg_if-corr-max-2-simpo-max1500-default
/
checkpoint-600
/trainer_state.json
{ | |
"best_metric": null, | |
"best_model_checkpoint": null, | |
"epoch": 0.5156854318865493, | |
"eval_steps": 50, | |
"global_step": 600, | |
"is_hyper_param_search": false, | |
"is_local_process_zero": true, | |
"is_world_process_zero": true, | |
"log_history": [ | |
{ | |
"epoch": 0.008594757198109154, | |
"grad_norm": 0.05934199318289757, | |
"learning_rate": 4.999451708687114e-06, | |
"logits/chosen": 14.762972831726074, | |
"logits/rejected": 15.199728012084961, | |
"logps/chosen": -0.3259914815425873, | |
"logps/rejected": -0.34297481179237366, | |
"loss": 0.9377, | |
"rewards/accuracies": 0.4000000059604645, | |
"rewards/chosen": -0.4889872074127197, | |
"rewards/margins": 0.02547495998442173, | |
"rewards/rejected": -0.5144621729850769, | |
"step": 10 | |
}, | |
{ | |
"epoch": 0.017189514396218308, | |
"grad_norm": 0.06342790275812149, | |
"learning_rate": 4.997807075247147e-06, | |
"logits/chosen": 14.351249694824219, | |
"logits/rejected": 15.068448066711426, | |
"logps/chosen": -0.2809392511844635, | |
"logps/rejected": -0.3711296617984772, | |
"loss": 0.9352, | |
"rewards/accuracies": 0.574999988079071, | |
"rewards/chosen": -0.42140883207321167, | |
"rewards/margins": 0.1352856159210205, | |
"rewards/rejected": -0.5566944479942322, | |
"step": 20 | |
}, | |
{ | |
"epoch": 0.02578427159432746, | |
"grad_norm": 0.053961098194122314, | |
"learning_rate": 4.9950668210706795e-06, | |
"logits/chosen": 14.636960983276367, | |
"logits/rejected": 15.265243530273438, | |
"logps/chosen": -0.2820780873298645, | |
"logps/rejected": -0.34024301171302795, | |
"loss": 0.9351, | |
"rewards/accuracies": 0.4749999940395355, | |
"rewards/chosen": -0.42311716079711914, | |
"rewards/margins": 0.08724743127822876, | |
"rewards/rejected": -0.5103646516799927, | |
"step": 30 | |
}, | |
{ | |
"epoch": 0.034379028792436615, | |
"grad_norm": 0.13506193459033966, | |
"learning_rate": 4.9912321481237616e-06, | |
"logits/chosen": 14.4556884765625, | |
"logits/rejected": 15.048967361450195, | |
"logps/chosen": -0.2897028625011444, | |
"logps/rejected": -0.34129124879837036, | |
"loss": 0.922, | |
"rewards/accuracies": 0.44999998807907104, | |
"rewards/chosen": -0.43455424904823303, | |
"rewards/margins": 0.07738252729177475, | |
"rewards/rejected": -0.5119368433952332, | |
"step": 40 | |
}, | |
{ | |
"epoch": 0.042973785990545764, | |
"grad_norm": 0.05230574309825897, | |
"learning_rate": 4.986304738420684e-06, | |
"logits/chosen": 14.628789901733398, | |
"logits/rejected": 15.307828903198242, | |
"logps/chosen": -0.28786614537239075, | |
"logps/rejected": -0.3513876795768738, | |
"loss": 0.9201, | |
"rewards/accuracies": 0.5375000238418579, | |
"rewards/chosen": -0.4317992329597473, | |
"rewards/margins": 0.09528233855962753, | |
"rewards/rejected": -0.5270815491676331, | |
"step": 50 | |
}, | |
{ | |
"epoch": 0.042973785990545764, | |
"eval_logits/chosen": 14.234943389892578, | |
"eval_logits/rejected": 15.258601188659668, | |
"eval_logps/chosen": -0.2844341993331909, | |
"eval_logps/rejected": -0.3695394694805145, | |
"eval_loss": 0.9226060509681702, | |
"eval_rewards/accuracies": 0.5157894492149353, | |
"eval_rewards/chosen": -0.42665132880210876, | |
"eval_rewards/margins": 0.1276579648256302, | |
"eval_rewards/rejected": -0.5543092489242554, | |
"eval_runtime": 25.9356, | |
"eval_samples_per_second": 29.033, | |
"eval_steps_per_second": 3.663, | |
"step": 50 | |
}, | |
{ | |
"epoch": 0.05156854318865492, | |
"grad_norm": 0.09328428655862808, | |
"learning_rate": 4.980286753286196e-06, | |
"logits/chosen": 14.35963249206543, | |
"logits/rejected": 15.055354118347168, | |
"logps/chosen": -0.27534741163253784, | |
"logps/rejected": -0.33098170161247253, | |
"loss": 0.9356, | |
"rewards/accuracies": 0.512499988079071, | |
"rewards/chosen": -0.4130210876464844, | |
"rewards/margins": 0.08345144242048264, | |
"rewards/rejected": -0.4964725375175476, | |
"step": 60 | |
}, | |
{ | |
"epoch": 0.060163300386764075, | |
"grad_norm": 0.06518550217151642, | |
"learning_rate": 4.973180832407471e-06, | |
"logits/chosen": 14.599525451660156, | |
"logits/rejected": 14.825297355651855, | |
"logps/chosen": -0.2708163857460022, | |
"logps/rejected": -0.3305850923061371, | |
"loss": 0.9257, | |
"rewards/accuracies": 0.550000011920929, | |
"rewards/chosen": -0.4062245786190033, | |
"rewards/margins": 0.08965305984020233, | |
"rewards/rejected": -0.4958776533603668, | |
"step": 70 | |
}, | |
{ | |
"epoch": 0.06875805758487323, | |
"grad_norm": 0.07543154805898666, | |
"learning_rate": 4.964990092676263e-06, | |
"logits/chosen": 14.947430610656738, | |
"logits/rejected": 15.093690872192383, | |
"logps/chosen": -0.2602943778038025, | |
"logps/rejected": -0.31820863485336304, | |
"loss": 0.9168, | |
"rewards/accuracies": 0.5, | |
"rewards/chosen": -0.39044153690338135, | |
"rewards/margins": 0.08687138557434082, | |
"rewards/rejected": -0.47731298208236694, | |
"step": 80 | |
}, | |
{ | |
"epoch": 0.07735281478298238, | |
"grad_norm": 0.06628195196390152, | |
"learning_rate": 4.9557181268217225e-06, | |
"logits/chosen": 14.43529987335205, | |
"logits/rejected": 14.750699043273926, | |
"logps/chosen": -0.2884291708469391, | |
"logps/rejected": -0.34193652868270874, | |
"loss": 0.9273, | |
"rewards/accuracies": 0.5249999761581421, | |
"rewards/chosen": -0.43264374136924744, | |
"rewards/margins": 0.08026103675365448, | |
"rewards/rejected": -0.5129047632217407, | |
"step": 90 | |
}, | |
{ | |
"epoch": 0.08594757198109153, | |
"grad_norm": 0.08684897422790527, | |
"learning_rate": 4.9453690018345144e-06, | |
"logits/chosen": 13.573002815246582, | |
"logits/rejected": 14.441877365112305, | |
"logps/chosen": -0.2569890320301056, | |
"logps/rejected": -0.37049269676208496, | |
"loss": 0.9009, | |
"rewards/accuracies": 0.574999988079071, | |
"rewards/chosen": -0.3854835629463196, | |
"rewards/margins": 0.17025551199913025, | |
"rewards/rejected": -0.5557390451431274, | |
"step": 100 | |
}, | |
{ | |
"epoch": 0.08594757198109153, | |
"eval_logits/chosen": 14.026633262634277, | |
"eval_logits/rejected": 15.08835220336914, | |
"eval_logps/chosen": -0.2761566936969757, | |
"eval_logps/rejected": -0.3717801570892334, | |
"eval_loss": 0.9138591885566711, | |
"eval_rewards/accuracies": 0.5368421077728271, | |
"eval_rewards/chosen": -0.41423505544662476, | |
"eval_rewards/margins": 0.1434352546930313, | |
"eval_rewards/rejected": -0.5576702952384949, | |
"eval_runtime": 25.3996, | |
"eval_samples_per_second": 29.646, | |
"eval_steps_per_second": 3.74, | |
"step": 100 | |
}, | |
{ | |
"epoch": 0.09454232917920069, | |
"grad_norm": 0.08046824485063553, | |
"learning_rate": 4.933947257182901e-06, | |
"logits/chosen": 14.500630378723145, | |
"logits/rejected": 14.831761360168457, | |
"logps/chosen": -0.30049553513526917, | |
"logps/rejected": -0.3315966725349426, | |
"loss": 0.916, | |
"rewards/accuracies": 0.4625000059604645, | |
"rewards/chosen": -0.45074325799942017, | |
"rewards/margins": 0.04665176197886467, | |
"rewards/rejected": -0.49739497900009155, | |
"step": 110 | |
}, | |
{ | |
"epoch": 0.10313708637730984, | |
"grad_norm": 0.12244562804698944, | |
"learning_rate": 4.921457902821578e-06, | |
"logits/chosen": 14.26713752746582, | |
"logits/rejected": 14.495455741882324, | |
"logps/chosen": -0.2670941650867462, | |
"logps/rejected": -0.32481229305267334, | |
"loss": 0.9167, | |
"rewards/accuracies": 0.550000011920929, | |
"rewards/chosen": -0.4006412625312805, | |
"rewards/margins": 0.08657723665237427, | |
"rewards/rejected": -0.4872184693813324, | |
"step": 120 | |
}, | |
{ | |
"epoch": 0.11173184357541899, | |
"grad_norm": 0.1828213334083557, | |
"learning_rate": 4.907906416994146e-06, | |
"logits/chosen": 14.009546279907227, | |
"logits/rejected": 14.297094345092773, | |
"logps/chosen": -0.27995598316192627, | |
"logps/rejected": -0.3530685007572174, | |
"loss": 0.9087, | |
"rewards/accuracies": 0.5249999761581421, | |
"rewards/chosen": -0.419933944940567, | |
"rewards/margins": 0.10966875404119492, | |
"rewards/rejected": -0.5296027660369873, | |
"step": 130 | |
}, | |
{ | |
"epoch": 0.12032660077352815, | |
"grad_norm": 0.10407563298940659, | |
"learning_rate": 4.893298743830168e-06, | |
"logits/chosen": 13.689155578613281, | |
"logits/rejected": 14.1933012008667, | |
"logps/chosen": -0.25955715775489807, | |
"logps/rejected": -0.3815004229545593, | |
"loss": 0.9053, | |
"rewards/accuracies": 0.612500011920929, | |
"rewards/chosen": -0.3893357217311859, | |
"rewards/margins": 0.18291489779949188, | |
"rewards/rejected": -0.5722506046295166, | |
"step": 140 | |
}, | |
{ | |
"epoch": 0.1289213579716373, | |
"grad_norm": 0.10028588026762009, | |
"learning_rate": 4.8776412907378845e-06, | |
"logits/chosen": 12.851397514343262, | |
"logits/rejected": 13.509778022766113, | |
"logps/chosen": -0.23652991652488708, | |
"logps/rejected": -0.3720462918281555, | |
"loss": 0.8999, | |
"rewards/accuracies": 0.625, | |
"rewards/chosen": -0.3547949194908142, | |
"rewards/margins": 0.2032744586467743, | |
"rewards/rejected": -0.5580693483352661, | |
"step": 150 | |
}, | |
{ | |
"epoch": 0.1289213579716373, | |
"eval_logits/chosen": 12.384929656982422, | |
"eval_logits/rejected": 13.672826766967773, | |
"eval_logps/chosen": -0.27857670187950134, | |
"eval_logps/rejected": -0.4014737904071808, | |
"eval_loss": 0.8956203460693359, | |
"eval_rewards/accuracies": 0.5684210658073425, | |
"eval_rewards/chosen": -0.4178650677204132, | |
"eval_rewards/margins": 0.18434564769268036, | |
"eval_rewards/rejected": -0.6022107601165771, | |
"eval_runtime": 25.4176, | |
"eval_samples_per_second": 29.625, | |
"eval_steps_per_second": 3.738, | |
"step": 150 | |
}, | |
{ | |
"epoch": 0.13751611516974646, | |
"grad_norm": 0.12453093379735947, | |
"learning_rate": 4.860940925593703e-06, | |
"logits/chosen": 12.110003471374512, | |
"logits/rejected": 13.076980590820312, | |
"logps/chosen": -0.27192068099975586, | |
"logps/rejected": -0.3863692879676819, | |
"loss": 0.8907, | |
"rewards/accuracies": 0.5874999761581421, | |
"rewards/chosen": -0.4078810214996338, | |
"rewards/margins": 0.1716729700565338, | |
"rewards/rejected": -0.5795539617538452, | |
"step": 160 | |
}, | |
{ | |
"epoch": 0.1461108723678556, | |
"grad_norm": 0.17137788236141205, | |
"learning_rate": 4.84320497372973e-06, | |
"logits/chosen": 11.92918586730957, | |
"logits/rejected": 12.573629379272461, | |
"logps/chosen": -0.27472984790802, | |
"logps/rejected": -0.41249385476112366, | |
"loss": 0.8831, | |
"rewards/accuracies": 0.612500011920929, | |
"rewards/chosen": -0.41209474205970764, | |
"rewards/margins": 0.20664596557617188, | |
"rewards/rejected": -0.6187406778335571, | |
"step": 170 | |
}, | |
{ | |
"epoch": 0.15470562956596476, | |
"grad_norm": 0.3904883861541748, | |
"learning_rate": 4.824441214720629e-06, | |
"logits/chosen": 11.182531356811523, | |
"logits/rejected": 12.176573753356934, | |
"logps/chosen": -0.2953718304634094, | |
"logps/rejected": -0.4208717942237854, | |
"loss": 0.8736, | |
"rewards/accuracies": 0.5375000238418579, | |
"rewards/chosen": -0.4430577754974365, | |
"rewards/margins": 0.18824996054172516, | |
"rewards/rejected": -0.6313077211380005, | |
"step": 180 | |
}, | |
{ | |
"epoch": 0.1633003867640739, | |
"grad_norm": 0.17574089765548706, | |
"learning_rate": 4.804657878971252e-06, | |
"logits/chosen": 10.119890213012695, | |
"logits/rejected": 11.05900764465332, | |
"logps/chosen": -0.29340866208076477, | |
"logps/rejected": -0.4555762708187103, | |
"loss": 0.884, | |
"rewards/accuracies": 0.625, | |
"rewards/chosen": -0.44011297821998596, | |
"rewards/margins": 0.24325144290924072, | |
"rewards/rejected": -0.6833644509315491, | |
"step": 190 | |
}, | |
{ | |
"epoch": 0.17189514396218306, | |
"grad_norm": 0.2242884337902069, | |
"learning_rate": 4.783863644106502e-06, | |
"logits/chosen": 9.674784660339355, | |
"logits/rejected": 10.418611526489258, | |
"logps/chosen": -0.3504490852355957, | |
"logps/rejected": -0.5431731939315796, | |
"loss": 0.8419, | |
"rewards/accuracies": 0.612500011920929, | |
"rewards/chosen": -0.5256736278533936, | |
"rewards/margins": 0.2890861928462982, | |
"rewards/rejected": -0.8147598505020142, | |
"step": 200 | |
}, | |
{ | |
"epoch": 0.17189514396218306, | |
"eval_logits/chosen": 7.944870471954346, | |
"eval_logits/rejected": 8.979729652404785, | |
"eval_logps/chosen": -0.33341673016548157, | |
"eval_logps/rejected": -0.5431775450706482, | |
"eval_loss": 0.8462886810302734, | |
"eval_rewards/accuracies": 0.6000000238418579, | |
"eval_rewards/chosen": -0.5001251101493835, | |
"eval_rewards/margins": 0.3146411180496216, | |
"eval_rewards/rejected": -0.8147663474082947, | |
"eval_runtime": 25.419, | |
"eval_samples_per_second": 29.623, | |
"eval_steps_per_second": 3.737, | |
"step": 200 | |
}, | |
{ | |
"epoch": 0.18048990116029223, | |
"grad_norm": 0.32119837403297424, | |
"learning_rate": 4.762067631165049e-06, | |
"logits/chosen": 7.16138219833374, | |
"logits/rejected": 8.43680477142334, | |
"logps/chosen": -0.36649250984191895, | |
"logps/rejected": -0.5420924425125122, | |
"loss": 0.8187, | |
"rewards/accuracies": 0.612500011920929, | |
"rewards/chosen": -0.5497387647628784, | |
"rewards/margins": 0.2633998692035675, | |
"rewards/rejected": -0.8131386041641235, | |
"step": 210 | |
}, | |
{ | |
"epoch": 0.18908465835840138, | |
"grad_norm": 0.48516562581062317, | |
"learning_rate": 4.7392794005985324e-06, | |
"logits/chosen": 4.770083427429199, | |
"logits/rejected": 5.710458278656006, | |
"logps/chosen": -0.34041497111320496, | |
"logps/rejected": -0.6309320330619812, | |
"loss": 0.8448, | |
"rewards/accuracies": 0.675000011920929, | |
"rewards/chosen": -0.510622501373291, | |
"rewards/margins": 0.4357755780220032, | |
"rewards/rejected": -0.9463980793952942, | |
"step": 220 | |
}, | |
{ | |
"epoch": 0.19767941555651053, | |
"grad_norm": 0.29154208302497864, | |
"learning_rate": 4.715508948078037e-06, | |
"logits/chosen": 5.168765068054199, | |
"logits/rejected": 5.421420574188232, | |
"logps/chosen": -0.3792352080345154, | |
"logps/rejected": -0.65748131275177, | |
"loss": 0.8066, | |
"rewards/accuracies": 0.6625000238418579, | |
"rewards/chosen": -0.5688528418540955, | |
"rewards/margins": 0.41736921668052673, | |
"rewards/rejected": -0.986221969127655, | |
"step": 230 | |
}, | |
{ | |
"epoch": 0.20627417275461968, | |
"grad_norm": 0.42973750829696655, | |
"learning_rate": 4.690766700109659e-06, | |
"logits/chosen": 4.204717636108398, | |
"logits/rejected": 3.706291913986206, | |
"logps/chosen": -0.39414530992507935, | |
"logps/rejected": -0.7194588780403137, | |
"loss": 0.7787, | |
"rewards/accuracies": 0.6499999761581421, | |
"rewards/chosen": -0.5912179350852966, | |
"rewards/margins": 0.4879704415798187, | |
"rewards/rejected": -1.079188346862793, | |
"step": 240 | |
}, | |
{ | |
"epoch": 0.21486892995272883, | |
"grad_norm": 0.5244571566581726, | |
"learning_rate": 4.665063509461098e-06, | |
"logits/chosen": 3.335484743118286, | |
"logits/rejected": 3.3176345825195312, | |
"logps/chosen": -0.4493131637573242, | |
"logps/rejected": -0.8293434381484985, | |
"loss": 0.7776, | |
"rewards/accuracies": 0.625, | |
"rewards/chosen": -0.6739697456359863, | |
"rewards/margins": 0.5700454115867615, | |
"rewards/rejected": -1.244015097618103, | |
"step": 250 | |
}, | |
{ | |
"epoch": 0.21486892995272883, | |
"eval_logits/chosen": 2.590949058532715, | |
"eval_logits/rejected": 2.2929749488830566, | |
"eval_logps/chosen": -0.48714593052864075, | |
"eval_logps/rejected": -0.9267774224281311, | |
"eval_loss": 0.7469337582588196, | |
"eval_rewards/accuracies": 0.6526315808296204, | |
"eval_rewards/chosen": -0.7307189106941223, | |
"eval_rewards/margins": 0.659447193145752, | |
"eval_rewards/rejected": -1.390166163444519, | |
"eval_runtime": 25.3944, | |
"eval_samples_per_second": 29.652, | |
"eval_steps_per_second": 3.741, | |
"step": 250 | |
}, | |
{ | |
"epoch": 0.22346368715083798, | |
"grad_norm": 0.39347293972969055, | |
"learning_rate": 4.638410650401267e-06, | |
"logits/chosen": 2.2975668907165527, | |
"logits/rejected": 1.2855035066604614, | |
"logps/chosen": -0.5228341817855835, | |
"logps/rejected": -1.00227952003479, | |
"loss": 0.6981, | |
"rewards/accuracies": 0.7124999761581421, | |
"rewards/chosen": -0.78425133228302, | |
"rewards/margins": 0.7191681265830994, | |
"rewards/rejected": -1.5034195184707642, | |
"step": 260 | |
}, | |
{ | |
"epoch": 0.23205844434894715, | |
"grad_norm": 0.69575434923172, | |
"learning_rate": 4.610819813755038e-06, | |
"logits/chosen": 2.8782780170440674, | |
"logits/rejected": 1.9394336938858032, | |
"logps/chosen": -0.4982885718345642, | |
"logps/rejected": -1.035541296005249, | |
"loss": 0.7174, | |
"rewards/accuracies": 0.7250000238418579, | |
"rewards/chosen": -0.7474328875541687, | |
"rewards/margins": 0.8058789372444153, | |
"rewards/rejected": -1.5533119440078735, | |
"step": 270 | |
}, | |
{ | |
"epoch": 0.2406532015470563, | |
"grad_norm": 0.7858326435089111, | |
"learning_rate": 4.582303101775249e-06, | |
"logits/chosen": 2.710908889770508, | |
"logits/rejected": 1.6444288492202759, | |
"logps/chosen": -0.600068211555481, | |
"logps/rejected": -1.1271780729293823, | |
"loss": 0.6972, | |
"rewards/accuracies": 0.6625000238418579, | |
"rewards/chosen": -0.9001023173332214, | |
"rewards/margins": 0.7906648516654968, | |
"rewards/rejected": -1.6907672882080078, | |
"step": 280 | |
}, | |
{ | |
"epoch": 0.24924795874516545, | |
"grad_norm": 0.7384620904922485, | |
"learning_rate": 4.55287302283426e-06, | |
"logits/chosen": 1.5841500759124756, | |
"logits/rejected": 0.640514612197876, | |
"logps/chosen": -0.6465060710906982, | |
"logps/rejected": -1.4245095252990723, | |
"loss": 0.6192, | |
"rewards/accuracies": 0.612500011920929, | |
"rewards/chosen": -0.9697591066360474, | |
"rewards/margins": 1.1670053005218506, | |
"rewards/rejected": -2.1367642879486084, | |
"step": 290 | |
}, | |
{ | |
"epoch": 0.2578427159432746, | |
"grad_norm": 0.8262321352958679, | |
"learning_rate": 4.522542485937369e-06, | |
"logits/chosen": 1.7300422191619873, | |
"logits/rejected": 0.7782856225967407, | |
"logps/chosen": -0.7083590626716614, | |
"logps/rejected": -1.6742557287216187, | |
"loss": 0.5721, | |
"rewards/accuracies": 0.612500011920929, | |
"rewards/chosen": -1.062538504600525, | |
"rewards/margins": 1.4488452672958374, | |
"rewards/rejected": -2.511383533477783, | |
"step": 300 | |
}, | |
{ | |
"epoch": 0.2578427159432746, | |
"eval_logits/chosen": 1.3559931516647339, | |
"eval_logits/rejected": 0.6592276096343994, | |
"eval_logps/chosen": -0.7815767526626587, | |
"eval_logps/rejected": -2.1176154613494873, | |
"eval_loss": 0.5730626583099365, | |
"eval_rewards/accuracies": 0.7052631378173828, | |
"eval_rewards/chosen": -1.1723653078079224, | |
"eval_rewards/margins": 2.0040581226348877, | |
"eval_rewards/rejected": -3.1764233112335205, | |
"eval_runtime": 25.539, | |
"eval_samples_per_second": 29.484, | |
"eval_steps_per_second": 3.72, | |
"step": 300 | |
}, | |
{ | |
"epoch": 0.2664374731413838, | |
"grad_norm": 0.8472572565078735, | |
"learning_rate": 4.491324795060491e-06, | |
"logits/chosen": 1.4461088180541992, | |
"logits/rejected": 0.49669915437698364, | |
"logps/chosen": -0.7694377899169922, | |
"logps/rejected": -2.362783432006836, | |
"loss": 0.5091, | |
"rewards/accuracies": 0.762499988079071, | |
"rewards/chosen": -1.1541565656661987, | |
"rewards/margins": 2.390018939971924, | |
"rewards/rejected": -3.544174909591675, | |
"step": 310 | |
}, | |
{ | |
"epoch": 0.2750322303394929, | |
"grad_norm": 0.41847530007362366, | |
"learning_rate": 4.4592336433146e-06, | |
"logits/chosen": 2.172646999359131, | |
"logits/rejected": 1.0526962280273438, | |
"logps/chosen": -0.7410945296287537, | |
"logps/rejected": -1.9158353805541992, | |
"loss": 0.5352, | |
"rewards/accuracies": 0.675000011920929, | |
"rewards/chosen": -1.1116416454315186, | |
"rewards/margins": 1.7621114253997803, | |
"rewards/rejected": -2.873753070831299, | |
"step": 320 | |
}, | |
{ | |
"epoch": 0.28362698753760207, | |
"grad_norm": 1.7422096729278564, | |
"learning_rate": 4.426283106939474e-06, | |
"logits/chosen": 2.611234188079834, | |
"logits/rejected": 1.7068111896514893, | |
"logps/chosen": -0.8319486379623413, | |
"logps/rejected": -2.32024884223938, | |
"loss": 0.5397, | |
"rewards/accuracies": 0.6000000238418579, | |
"rewards/chosen": -1.2479230165481567, | |
"rewards/margins": 2.232450008392334, | |
"rewards/rejected": -3.480372905731201, | |
"step": 330 | |
}, | |
{ | |
"epoch": 0.2922217447357112, | |
"grad_norm": 0.8699240684509277, | |
"learning_rate": 4.3924876391293915e-06, | |
"logits/chosen": 1.996747612953186, | |
"logits/rejected": 1.1473515033721924, | |
"logps/chosen": -0.8445833921432495, | |
"logps/rejected": -2.675687551498413, | |
"loss": 0.4817, | |
"rewards/accuracies": 0.699999988079071, | |
"rewards/chosen": -1.2668750286102295, | |
"rewards/margins": 2.7466559410095215, | |
"rewards/rejected": -4.01353120803833, | |
"step": 340 | |
}, | |
{ | |
"epoch": 0.30081650193382037, | |
"grad_norm": 2.089289426803589, | |
"learning_rate": 4.357862063693486e-06, | |
"logits/chosen": 1.7134803533554077, | |
"logits/rejected": 1.3000510931015015, | |
"logps/chosen": -0.8976927995681763, | |
"logps/rejected": -2.1593873500823975, | |
"loss": 0.5098, | |
"rewards/accuracies": 0.574999988079071, | |
"rewards/chosen": -1.3465392589569092, | |
"rewards/margins": 1.8925418853759766, | |
"rewards/rejected": -3.2390809059143066, | |
"step": 350 | |
}, | |
{ | |
"epoch": 0.30081650193382037, | |
"eval_logits/chosen": 1.6772903203964233, | |
"eval_logits/rejected": 1.2370609045028687, | |
"eval_logps/chosen": -0.9737761616706848, | |
"eval_logps/rejected": -3.1528680324554443, | |
"eval_loss": 0.5162621736526489, | |
"eval_rewards/accuracies": 0.7263157963752747, | |
"eval_rewards/chosen": -1.46066415309906, | |
"eval_rewards/margins": 3.2686376571655273, | |
"eval_rewards/rejected": -4.729301929473877, | |
"eval_runtime": 25.4163, | |
"eval_samples_per_second": 29.627, | |
"eval_steps_per_second": 3.738, | |
"step": 350 | |
}, | |
{ | |
"epoch": 0.3094112591319295, | |
"grad_norm": 0.47079572081565857, | |
"learning_rate": 4.322421568553529e-06, | |
"logits/chosen": 1.9561872482299805, | |
"logits/rejected": 0.8960329294204712, | |
"logps/chosen": -0.9378088712692261, | |
"logps/rejected": -2.8065876960754395, | |
"loss": 0.5046, | |
"rewards/accuracies": 0.675000011920929, | |
"rewards/chosen": -1.4067132472991943, | |
"rewards/margins": 2.8031680583953857, | |
"rewards/rejected": -4.209881782531738, | |
"step": 360 | |
}, | |
{ | |
"epoch": 0.31800601633003867, | |
"grad_norm": 0.6202365159988403, | |
"learning_rate": 4.286181699082008e-06, | |
"logits/chosen": 2.152726411819458, | |
"logits/rejected": 1.4309433698654175, | |
"logps/chosen": -1.007157564163208, | |
"logps/rejected": -3.3813462257385254, | |
"loss": 0.4526, | |
"rewards/accuracies": 0.800000011920929, | |
"rewards/chosen": -1.5107364654541016, | |
"rewards/margins": 3.561283588409424, | |
"rewards/rejected": -5.072019577026367, | |
"step": 370 | |
}, | |
{ | |
"epoch": 0.3266007735281478, | |
"grad_norm": 1.080393671989441, | |
"learning_rate": 4.249158351283414e-06, | |
"logits/chosen": 1.7528371810913086, | |
"logits/rejected": 1.3293968439102173, | |
"logps/chosen": -1.0258004665374756, | |
"logps/rejected": -2.984057903289795, | |
"loss": 0.4879, | |
"rewards/accuracies": 0.675000011920929, | |
"rewards/chosen": -1.5387006998062134, | |
"rewards/margins": 2.9373860359191895, | |
"rewards/rejected": -4.476086616516113, | |
"step": 380 | |
}, | |
{ | |
"epoch": 0.33519553072625696, | |
"grad_norm": 1.4520032405853271, | |
"learning_rate": 4.211367764821722e-06, | |
"logits/chosen": 3.061373233795166, | |
"logits/rejected": 2.0103466510772705, | |
"logps/chosen": -1.0191391706466675, | |
"logps/rejected": -2.9054081439971924, | |
"loss": 0.4776, | |
"rewards/accuracies": 0.625, | |
"rewards/chosen": -1.5287089347839355, | |
"rewards/margins": 2.8294031620025635, | |
"rewards/rejected": -4.358112335205078, | |
"step": 390 | |
}, | |
{ | |
"epoch": 0.3437902879243661, | |
"grad_norm": 0.5479139089584351, | |
"learning_rate": 4.172826515897146e-06, | |
"logits/chosen": 2.8395092487335205, | |
"logits/rejected": 2.0935282707214355, | |
"logps/chosen": -1.0769506692886353, | |
"logps/rejected": -3.11635160446167, | |
"loss": 0.4686, | |
"rewards/accuracies": 0.6875, | |
"rewards/chosen": -1.6154258251190186, | |
"rewards/margins": 3.0591015815734863, | |
"rewards/rejected": -4.674527168273926, | |
"step": 400 | |
}, | |
{ | |
"epoch": 0.3437902879243661, | |
"eval_logits/chosen": 2.5064592361450195, | |
"eval_logits/rejected": 2.108433485031128, | |
"eval_logps/chosen": -1.1957285404205322, | |
"eval_logps/rejected": -3.7678382396698, | |
"eval_loss": 0.46578800678253174, | |
"eval_rewards/accuracies": 0.7368420958518982, | |
"eval_rewards/chosen": -1.793592929840088, | |
"eval_rewards/margins": 3.8581647872924805, | |
"eval_rewards/rejected": -5.651757717132568, | |
"eval_runtime": 25.415, | |
"eval_samples_per_second": 29.628, | |
"eval_steps_per_second": 3.738, | |
"step": 400 | |
}, | |
{ | |
"epoch": 0.3523850451224753, | |
"grad_norm": 0.9966821670532227, | |
"learning_rate": 4.133551509975264e-06, | |
"logits/chosen": 2.6411917209625244, | |
"logits/rejected": 1.8634885549545288, | |
"logps/chosen": -1.0934125185012817, | |
"logps/rejected": -3.2207794189453125, | |
"loss": 0.4335, | |
"rewards/accuracies": 0.6625000238418579, | |
"rewards/chosen": -1.6401188373565674, | |
"rewards/margins": 3.1910502910614014, | |
"rewards/rejected": -4.831169128417969, | |
"step": 410 | |
}, | |
{ | |
"epoch": 0.36097980232058446, | |
"grad_norm": 0.6384722590446472, | |
"learning_rate": 4.093559974371725e-06, | |
"logits/chosen": 3.1368844509124756, | |
"logits/rejected": 2.3800251483917236, | |
"logps/chosen": -1.2108217477798462, | |
"logps/rejected": -3.484806537628174, | |
"loss": 0.4543, | |
"rewards/accuracies": 0.7124999761581421, | |
"rewards/chosen": -1.816232681274414, | |
"rewards/margins": 3.4109771251678467, | |
"rewards/rejected": -5.227209568023682, | |
"step": 420 | |
}, | |
{ | |
"epoch": 0.3695745595186936, | |
"grad_norm": 0.856741726398468, | |
"learning_rate": 4.052869450695776e-06, | |
"logits/chosen": 3.155728816986084, | |
"logits/rejected": 2.257838726043701, | |
"logps/chosen": -1.4214586019515991, | |
"logps/rejected": -4.186622619628906, | |
"loss": 0.4091, | |
"rewards/accuracies": 0.7749999761581421, | |
"rewards/chosen": -2.132187604904175, | |
"rewards/margins": 4.1477460861206055, | |
"rewards/rejected": -6.279933929443359, | |
"step": 430 | |
}, | |
{ | |
"epoch": 0.37816931671680276, | |
"grad_norm": 1.3310774564743042, | |
"learning_rate": 4.011497787155938e-06, | |
"logits/chosen": 1.9942185878753662, | |
"logits/rejected": 1.6246827840805054, | |
"logps/chosen": -1.8575637340545654, | |
"logps/rejected": -4.5355329513549805, | |
"loss": 0.3995, | |
"rewards/accuracies": 0.862500011920929, | |
"rewards/chosen": -2.7863457202911377, | |
"rewards/margins": 4.016953945159912, | |
"rewards/rejected": -6.8032989501953125, | |
"step": 440 | |
}, | |
{ | |
"epoch": 0.3867640739149119, | |
"grad_norm": 2.0849101543426514, | |
"learning_rate": 3.969463130731183e-06, | |
"logits/chosen": 2.406555652618408, | |
"logits/rejected": 2.0490009784698486, | |
"logps/chosen": -2.392570972442627, | |
"logps/rejected": -5.055584907531738, | |
"loss": 0.3671, | |
"rewards/accuracies": 0.887499988079071, | |
"rewards/chosen": -3.588855743408203, | |
"rewards/margins": 3.994520902633667, | |
"rewards/rejected": -7.583376884460449, | |
"step": 450 | |
}, | |
{ | |
"epoch": 0.3867640739149119, | |
"eval_logits/chosen": 2.2324020862579346, | |
"eval_logits/rejected": 2.365755319595337, | |
"eval_logps/chosen": -2.736898422241211, | |
"eval_logps/rejected": -5.73967170715332, | |
"eval_loss": 0.3965117633342743, | |
"eval_rewards/accuracies": 0.8736842274665833, | |
"eval_rewards/chosen": -4.105347633361816, | |
"eval_rewards/margins": 4.504159927368164, | |
"eval_rewards/rejected": -8.60950756072998, | |
"eval_runtime": 25.428, | |
"eval_samples_per_second": 29.613, | |
"eval_steps_per_second": 3.736, | |
"step": 450 | |
}, | |
{ | |
"epoch": 0.39535883111302106, | |
"grad_norm": 2.223949432373047, | |
"learning_rate": 3.92678391921108e-06, | |
"logits/chosen": 2.651564598083496, | |
"logits/rejected": 2.383842945098877, | |
"logps/chosen": -2.591308355331421, | |
"logps/rejected": -5.308972358703613, | |
"loss": 0.3412, | |
"rewards/accuracies": 0.762499988079071, | |
"rewards/chosen": -3.886962413787842, | |
"rewards/margins": 4.07649564743042, | |
"rewards/rejected": -7.963458061218262, | |
"step": 460 | |
}, | |
{ | |
"epoch": 0.4039535883111302, | |
"grad_norm": 3.110624074935913, | |
"learning_rate": 3.88347887310836e-06, | |
"logits/chosen": 2.5435309410095215, | |
"logits/rejected": 2.46763277053833, | |
"logps/chosen": -2.413583993911743, | |
"logps/rejected": -5.543262481689453, | |
"loss": 0.3832, | |
"rewards/accuracies": 0.824999988079071, | |
"rewards/chosen": -3.620375871658325, | |
"rewards/margins": 4.694517135620117, | |
"rewards/rejected": -8.314892768859863, | |
"step": 470 | |
}, | |
{ | |
"epoch": 0.41254834550923936, | |
"grad_norm": 1.6255794763565063, | |
"learning_rate": 3.839566987447492e-06, | |
"logits/chosen": 3.842928409576416, | |
"logits/rejected": 3.5797982215881348, | |
"logps/chosen": -2.6448044776916504, | |
"logps/rejected": -4.98160982131958, | |
"loss": 0.3547, | |
"rewards/accuracies": 0.8125, | |
"rewards/chosen": -3.9672069549560547, | |
"rewards/margins": 3.5052082538604736, | |
"rewards/rejected": -7.472414493560791, | |
"step": 480 | |
}, | |
{ | |
"epoch": 0.4211431027073485, | |
"grad_norm": 2.9274284839630127, | |
"learning_rate": 3.795067523432826e-06, | |
"logits/chosen": 3.3297150135040283, | |
"logits/rejected": 3.0205535888671875, | |
"logps/chosen": -2.811923027038574, | |
"logps/rejected": -6.040881156921387, | |
"loss": 0.3097, | |
"rewards/accuracies": 0.887499988079071, | |
"rewards/chosen": -4.217884063720703, | |
"rewards/margins": 4.843437194824219, | |
"rewards/rejected": -9.061322212219238, | |
"step": 490 | |
}, | |
{ | |
"epoch": 0.42973785990545765, | |
"grad_norm": 2.9143636226654053, | |
"learning_rate": 3.7500000000000005e-06, | |
"logits/chosen": 2.760014772415161, | |
"logits/rejected": 2.535520315170288, | |
"logps/chosen": -3.068406820297241, | |
"logps/rejected": -5.877435684204102, | |
"loss": 0.3031, | |
"rewards/accuracies": 0.875, | |
"rewards/chosen": -4.602609634399414, | |
"rewards/margins": 4.21354341506958, | |
"rewards/rejected": -8.816153526306152, | |
"step": 500 | |
}, | |
{ | |
"epoch": 0.42973785990545765, | |
"eval_logits/chosen": 2.0952131748199463, | |
"eval_logits/rejected": 2.1864659786224365, | |
"eval_logps/chosen": -3.392296075820923, | |
"eval_logps/rejected": -6.948195457458496, | |
"eval_loss": 0.33660775423049927, | |
"eval_rewards/accuracies": 0.9263157844543457, | |
"eval_rewards/chosen": -5.088444232940674, | |
"eval_rewards/margins": 5.3338494300842285, | |
"eval_rewards/rejected": -10.422293663024902, | |
"eval_runtime": 25.4226, | |
"eval_samples_per_second": 29.619, | |
"eval_steps_per_second": 3.737, | |
"step": 500 | |
}, | |
{ | |
"epoch": 0.4383326171035668, | |
"grad_norm": 2.563810348510742, | |
"learning_rate": 3.7043841852542884e-06, | |
"logits/chosen": 2.950286388397217, | |
"logits/rejected": 2.619025945663452, | |
"logps/chosen": -3.237391710281372, | |
"logps/rejected": -5.953216552734375, | |
"loss": 0.318, | |
"rewards/accuracies": 0.9125000238418579, | |
"rewards/chosen": -4.856087684631348, | |
"rewards/margins": 4.073737144470215, | |
"rewards/rejected": -8.929824829101562, | |
"step": 510 | |
}, | |
{ | |
"epoch": 0.44692737430167595, | |
"grad_norm": 2.0339434146881104, | |
"learning_rate": 3.658240087799655e-06, | |
"logits/chosen": 2.987595558166504, | |
"logits/rejected": 2.6243975162506104, | |
"logps/chosen": -3.5633530616760254, | |
"logps/rejected": -7.0458879470825195, | |
"loss": 0.3053, | |
"rewards/accuracies": 0.8999999761581421, | |
"rewards/chosen": -5.345029354095459, | |
"rewards/margins": 5.223802089691162, | |
"rewards/rejected": -10.568831443786621, | |
"step": 520 | |
}, | |
{ | |
"epoch": 0.45552213149978515, | |
"grad_norm": 4.091029644012451, | |
"learning_rate": 3.611587947962319e-06, | |
"logits/chosen": 2.297576904296875, | |
"logits/rejected": 2.0218777656555176, | |
"logps/chosen": -3.297245502471924, | |
"logps/rejected": -6.101919651031494, | |
"loss": 0.3255, | |
"rewards/accuracies": 0.887499988079071, | |
"rewards/chosen": -4.945868015289307, | |
"rewards/margins": 4.207010746002197, | |
"rewards/rejected": -9.152878761291504, | |
"step": 530 | |
}, | |
{ | |
"epoch": 0.4641168886978943, | |
"grad_norm": 2.7896900177001953, | |
"learning_rate": 3.564448228912682e-06, | |
"logits/chosen": 2.103950023651123, | |
"logits/rejected": 1.9478647708892822, | |
"logps/chosen": -2.9360263347625732, | |
"logps/rejected": -6.406435489654541, | |
"loss": 0.3361, | |
"rewards/accuracies": 0.8999999761581421, | |
"rewards/chosen": -4.40403938293457, | |
"rewards/margins": 5.20561408996582, | |
"rewards/rejected": -9.60965347290039, | |
"step": 540 | |
}, | |
{ | |
"epoch": 0.47271164589600345, | |
"grad_norm": 2.657970905303955, | |
"learning_rate": 3.516841607689501e-06, | |
"logits/chosen": 2.1658639907836914, | |
"logits/rejected": 2.214900493621826, | |
"logps/chosen": -3.084073066711426, | |
"logps/rejected": -6.935500144958496, | |
"loss": 0.2928, | |
"rewards/accuracies": 0.862500011920929, | |
"rewards/chosen": -4.626110076904297, | |
"rewards/margins": 5.7771406173706055, | |
"rewards/rejected": -10.403249740600586, | |
"step": 550 | |
}, | |
{ | |
"epoch": 0.47271164589600345, | |
"eval_logits/chosen": 2.285294771194458, | |
"eval_logits/rejected": 2.3312103748321533, | |
"eval_logps/chosen": -3.35794997215271, | |
"eval_logps/rejected": -7.37537145614624, | |
"eval_loss": 0.3121817409992218, | |
"eval_rewards/accuracies": 0.9263157844543457, | |
"eval_rewards/chosen": -5.036925792694092, | |
"eval_rewards/margins": 6.026132106781006, | |
"eval_rewards/rejected": -11.063057899475098, | |
"eval_runtime": 25.4015, | |
"eval_samples_per_second": 29.644, | |
"eval_steps_per_second": 3.74, | |
"step": 550 | |
}, | |
{ | |
"epoch": 0.4813064030941126, | |
"grad_norm": 2.940019369125366, | |
"learning_rate": 3.4687889661302577e-06, | |
"logits/chosen": 1.9122416973114014, | |
"logits/rejected": 1.9943454265594482, | |
"logps/chosen": -3.27177095413208, | |
"logps/rejected": -7.023342132568359, | |
"loss": 0.3105, | |
"rewards/accuracies": 0.925000011920929, | |
"rewards/chosen": -4.907656669616699, | |
"rewards/margins": 5.6273579597473145, | |
"rewards/rejected": -10.535014152526855, | |
"step": 560 | |
}, | |
{ | |
"epoch": 0.48990116029222175, | |
"grad_norm": 1.8887412548065186, | |
"learning_rate": 3.4203113817116955e-06, | |
"logits/chosen": 2.274843692779541, | |
"logits/rejected": 2.392199993133545, | |
"logps/chosen": -3.383749008178711, | |
"logps/rejected": -7.265415191650391, | |
"loss": 0.3003, | |
"rewards/accuracies": 0.9750000238418579, | |
"rewards/chosen": -5.075623512268066, | |
"rewards/margins": 5.8224992752075195, | |
"rewards/rejected": -10.898123741149902, | |
"step": 570 | |
}, | |
{ | |
"epoch": 0.4984959174903309, | |
"grad_norm": 1.6364414691925049, | |
"learning_rate": 3.3714301183045382e-06, | |
"logits/chosen": 2.423910617828369, | |
"logits/rejected": 2.244985818862915, | |
"logps/chosen": -3.0959205627441406, | |
"logps/rejected": -6.822405815124512, | |
"loss": 0.2471, | |
"rewards/accuracies": 0.9624999761581421, | |
"rewards/chosen": -4.643880844116211, | |
"rewards/margins": 5.58972692489624, | |
"rewards/rejected": -10.233609199523926, | |
"step": 580 | |
}, | |
{ | |
"epoch": 0.50709067468844, | |
"grad_norm": 2.6540188789367676, | |
"learning_rate": 3.3221666168464584e-06, | |
"logits/chosen": 2.8146812915802, | |
"logits/rejected": 2.5971922874450684, | |
"logps/chosen": -4.139407157897949, | |
"logps/rejected": -7.71649694442749, | |
"loss": 0.2809, | |
"rewards/accuracies": 0.9624999761581421, | |
"rewards/chosen": -6.209111213684082, | |
"rewards/margins": 5.365634441375732, | |
"rewards/rejected": -11.574746131896973, | |
"step": 590 | |
}, | |
{ | |
"epoch": 0.5156854318865493, | |
"grad_norm": 4.229885578155518, | |
"learning_rate": 3.272542485937369e-06, | |
"logits/chosen": 2.2735249996185303, | |
"logits/rejected": 1.8577899932861328, | |
"logps/chosen": -3.731342315673828, | |
"logps/rejected": -7.2900390625, | |
"loss": 0.2956, | |
"rewards/accuracies": 0.887499988079071, | |
"rewards/chosen": -5.5970139503479, | |
"rewards/margins": 5.338044166564941, | |
"rewards/rejected": -10.93505859375, | |
"step": 600 | |
}, | |
{ | |
"epoch": 0.5156854318865493, | |
"eval_logits/chosen": 2.3333992958068848, | |
"eval_logits/rejected": 2.529745578765869, | |
"eval_logps/chosen": -3.679597854614258, | |
"eval_logps/rejected": -7.917842864990234, | |
"eval_loss": 0.3030374050140381, | |
"eval_rewards/accuracies": 0.9263157844543457, | |
"eval_rewards/chosen": -5.519396781921387, | |
"eval_rewards/margins": 6.357367992401123, | |
"eval_rewards/rejected": -11.876765251159668, | |
"eval_runtime": 25.5622, | |
"eval_samples_per_second": 29.458, | |
"eval_steps_per_second": 3.716, | |
"step": 600 | |
} | |
], | |
"logging_steps": 10, | |
"max_steps": 1500, | |
"num_input_tokens_seen": 0, | |
"num_train_epochs": 2, | |
"save_steps": 50, | |
"stateful_callbacks": { | |
"TrainerControl": { | |
"args": { | |
"should_epoch_stop": false, | |
"should_evaluate": false, | |
"should_log": false, | |
"should_save": true, | |
"should_training_stop": false | |
}, | |
"attributes": {} | |
} | |
}, | |
"total_flos": 1.3837243562275635e+18, | |
"train_batch_size": 1, | |
"trial_name": null, | |
"trial_params": null | |
} | |