|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"eval_steps": 100, |
|
"global_step": 1065, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"dpo_losses": 0.6931471824645996, |
|
"epoch": 0.0, |
|
"grad_norm": 1.601693496929256, |
|
"learning_rate": 4.6728971962616815e-09, |
|
"logits/chosen": -2.861618995666504, |
|
"logits/rejected": -2.8205904960632324, |
|
"logps/chosen": -271.06011962890625, |
|
"logps/rejected": -211.1704559326172, |
|
"loss": 0.6931, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/margins_max": 0.0, |
|
"rewards/margins_min": 0.0, |
|
"rewards/margins_std": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"dpo_losses": 0.6933066844940186, |
|
"epoch": 0.03, |
|
"grad_norm": 20.410276988279243, |
|
"learning_rate": 4.672897196261682e-08, |
|
"logits/chosen": -2.8335423469543457, |
|
"logits/rejected": -2.7909910678863525, |
|
"logps/chosen": -325.0599365234375, |
|
"logps/rejected": -274.9435119628906, |
|
"loss": 0.6965, |
|
"positive_losses": 0.050453320145606995, |
|
"rewards/accuracies": 0.2638888955116272, |
|
"rewards/chosen": -9.644959936849773e-05, |
|
"rewards/margins": -0.00031749275512993336, |
|
"rewards/margins_max": 0.0014122920110821724, |
|
"rewards/margins_min": -0.0021905910689383745, |
|
"rewards/margins_std": 0.0016328593483194709, |
|
"rewards/rejected": 0.00022104315576143563, |
|
"step": 10 |
|
}, |
|
{ |
|
"dpo_losses": 0.6930336952209473, |
|
"epoch": 0.06, |
|
"grad_norm": 21.230310271372627, |
|
"learning_rate": 9.345794392523364e-08, |
|
"logits/chosen": -2.725196361541748, |
|
"logits/rejected": -2.706851005554199, |
|
"logps/chosen": -293.7933654785156, |
|
"logps/rejected": -215.7693328857422, |
|
"loss": 0.6994, |
|
"positive_losses": 0.07486093044281006, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": 0.00032553955679759383, |
|
"rewards/margins": 0.00022889305546414107, |
|
"rewards/margins_max": 0.002858394058421254, |
|
"rewards/margins_min": -0.0026940270327031612, |
|
"rewards/margins_std": 0.0024744768161326647, |
|
"rewards/rejected": 9.664653771324083e-05, |
|
"step": 20 |
|
}, |
|
{ |
|
"dpo_losses": 0.6927663087844849, |
|
"epoch": 0.08, |
|
"grad_norm": 17.85805321688973, |
|
"learning_rate": 1.4018691588785045e-07, |
|
"logits/chosen": -2.819422960281372, |
|
"logits/rejected": -2.750398874282837, |
|
"logps/chosen": -303.5675048828125, |
|
"logps/rejected": -232.4523162841797, |
|
"loss": 0.6975, |
|
"positive_losses": 0.04014534875750542, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.0013837231090292335, |
|
"rewards/margins": 0.0007641493575647473, |
|
"rewards/margins_max": 0.003605480073019862, |
|
"rewards/margins_min": -0.0024470784701406956, |
|
"rewards/margins_std": 0.002705852035433054, |
|
"rewards/rejected": 0.000619573867879808, |
|
"step": 30 |
|
}, |
|
{ |
|
"dpo_losses": 0.6928492188453674, |
|
"epoch": 0.11, |
|
"grad_norm": 1.7332928692545067, |
|
"learning_rate": 1.8691588785046729e-07, |
|
"logits/chosen": -2.8429579734802246, |
|
"logits/rejected": -2.7627620697021484, |
|
"logps/chosen": -277.1645202636719, |
|
"logps/rejected": -225.4744415283203, |
|
"loss": 0.6937, |
|
"positive_losses": 0.007175350096076727, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.003201347542926669, |
|
"rewards/margins": 0.0005985596217215061, |
|
"rewards/margins_max": 0.003463293891400099, |
|
"rewards/margins_min": -0.00237638084217906, |
|
"rewards/margins_std": 0.0026152683421969414, |
|
"rewards/rejected": 0.0026027881540358067, |
|
"step": 40 |
|
}, |
|
{ |
|
"dpo_losses": 0.6924293637275696, |
|
"epoch": 0.14, |
|
"grad_norm": 2.2231645321380933, |
|
"learning_rate": 2.336448598130841e-07, |
|
"logits/chosen": -2.8112452030181885, |
|
"logits/rejected": -2.7390694618225098, |
|
"logps/chosen": -275.0422058105469, |
|
"logps/rejected": -233.49331665039062, |
|
"loss": 0.6929, |
|
"positive_losses": 0.007281684782356024, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.0052792662754654884, |
|
"rewards/margins": 0.001439323415979743, |
|
"rewards/margins_max": 0.0054040043614804745, |
|
"rewards/margins_min": -0.0021666125394403934, |
|
"rewards/margins_std": 0.003360858652740717, |
|
"rewards/rejected": 0.003839943092316389, |
|
"step": 50 |
|
}, |
|
{ |
|
"dpo_losses": 0.6919517517089844, |
|
"epoch": 0.17, |
|
"grad_norm": 2.413825162312939, |
|
"learning_rate": 2.803738317757009e-07, |
|
"logits/chosen": -2.7509729862213135, |
|
"logits/rejected": -2.7115964889526367, |
|
"logps/chosen": -312.9291076660156, |
|
"logps/rejected": -262.30059814453125, |
|
"loss": 0.6935, |
|
"positive_losses": 0.00518798828125, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.007105088327080011, |
|
"rewards/margins": 0.002395912306383252, |
|
"rewards/margins_max": 0.00661232927814126, |
|
"rewards/margins_min": -0.000950089015532285, |
|
"rewards/margins_std": 0.0033020805567502975, |
|
"rewards/rejected": 0.004709175787866116, |
|
"step": 60 |
|
}, |
|
{ |
|
"dpo_losses": 0.6914928555488586, |
|
"epoch": 0.2, |
|
"grad_norm": 1.8246267381191894, |
|
"learning_rate": 3.271028037383177e-07, |
|
"logits/chosen": -2.78471302986145, |
|
"logits/rejected": -2.722522258758545, |
|
"logps/chosen": -321.91546630859375, |
|
"logps/rejected": -235.0277557373047, |
|
"loss": 0.6913, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.009457603096961975, |
|
"rewards/margins": 0.0033192108385264874, |
|
"rewards/margins_max": 0.009357670322060585, |
|
"rewards/margins_min": -0.001532487804070115, |
|
"rewards/margins_std": 0.0049352385103702545, |
|
"rewards/rejected": 0.006138390861451626, |
|
"step": 70 |
|
}, |
|
{ |
|
"dpo_losses": 0.6906715631484985, |
|
"epoch": 0.23, |
|
"grad_norm": 1.8774986784177312, |
|
"learning_rate": 3.7383177570093457e-07, |
|
"logits/chosen": -2.8345515727996826, |
|
"logits/rejected": -2.7776432037353516, |
|
"logps/chosen": -320.06634521484375, |
|
"logps/rejected": -285.67156982421875, |
|
"loss": 0.6909, |
|
"positive_losses": 0.0006896972772665322, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.012041566893458366, |
|
"rewards/margins": 0.004970206413418055, |
|
"rewards/margins_max": 0.013391142711043358, |
|
"rewards/margins_min": -0.0014745177468284965, |
|
"rewards/margins_std": 0.006737919058650732, |
|
"rewards/rejected": 0.007071360945701599, |
|
"step": 80 |
|
}, |
|
{ |
|
"dpo_losses": 0.6892363429069519, |
|
"epoch": 0.25, |
|
"grad_norm": 2.6194297181523774, |
|
"learning_rate": 4.205607476635514e-07, |
|
"logits/chosen": -2.7694807052612305, |
|
"logits/rejected": -2.726945400238037, |
|
"logps/chosen": -274.4305725097656, |
|
"logps/rejected": -198.20901489257812, |
|
"loss": 0.6894, |
|
"positive_losses": 0.0005071639898233116, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 0.015641968697309494, |
|
"rewards/margins": 0.007860125042498112, |
|
"rewards/margins_max": 0.01880500093102455, |
|
"rewards/margins_min": -0.0005177496350370347, |
|
"rewards/margins_std": 0.008911145851016045, |
|
"rewards/rejected": 0.007781843654811382, |
|
"step": 90 |
|
}, |
|
{ |
|
"dpo_losses": 0.6890109777450562, |
|
"epoch": 0.28, |
|
"grad_norm": 1.9205518680432034, |
|
"learning_rate": 4.672897196261682e-07, |
|
"logits/chosen": -2.80806303024292, |
|
"logits/rejected": -2.7697219848632812, |
|
"logps/chosen": -279.78045654296875, |
|
"logps/rejected": -261.3872375488281, |
|
"loss": 0.6884, |
|
"positive_losses": 0.0013298034900799394, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": 0.016312729567289352, |
|
"rewards/margins": 0.00831080786883831, |
|
"rewards/margins_max": 0.018694214522838593, |
|
"rewards/margins_min": 0.00036361132515594363, |
|
"rewards/margins_std": 0.00843831431120634, |
|
"rewards/rejected": 0.008001920767128468, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"eval_dpo_losses": 0.6917725205421448, |
|
"eval_logits/chosen": -2.8041651248931885, |
|
"eval_logits/rejected": -2.7650842666625977, |
|
"eval_logps/chosen": -283.2318420410156, |
|
"eval_logps/rejected": -257.4949035644531, |
|
"eval_loss": 0.6931358575820923, |
|
"eval_positive_losses": 0.011085770092904568, |
|
"eval_rewards/accuracies": 0.6079999804496765, |
|
"eval_rewards/chosen": 0.0136156240478158, |
|
"eval_rewards/margins": 0.0027753659524023533, |
|
"eval_rewards/margins_max": 0.017850197851657867, |
|
"eval_rewards/margins_min": -0.010674619115889072, |
|
"eval_rewards/margins_std": 0.009409897960722446, |
|
"eval_rewards/rejected": 0.010840258561074734, |
|
"eval_runtime": 428.0491, |
|
"eval_samples_per_second": 4.672, |
|
"eval_steps_per_second": 0.292, |
|
"step": 100 |
|
}, |
|
{ |
|
"dpo_losses": 0.6845903396606445, |
|
"epoch": 0.31, |
|
"grad_norm": 2.377556980384867, |
|
"learning_rate": 4.999879018839287e-07, |
|
"logits/chosen": -2.8750226497650146, |
|
"logits/rejected": -2.7789218425750732, |
|
"logps/chosen": -372.0056457519531, |
|
"logps/rejected": -285.5588073730469, |
|
"loss": 0.6848, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 0.026000410318374634, |
|
"rewards/margins": 0.017250509932637215, |
|
"rewards/margins_max": 0.0338827520608902, |
|
"rewards/margins_min": 0.0025702244602143764, |
|
"rewards/margins_std": 0.01393597386777401, |
|
"rewards/rejected": 0.008749897591769695, |
|
"step": 110 |
|
}, |
|
{ |
|
"dpo_losses": 0.6825487613677979, |
|
"epoch": 0.34, |
|
"grad_norm": 2.115970951796753, |
|
"learning_rate": 4.997728568369408e-07, |
|
"logits/chosen": -2.844456911087036, |
|
"logits/rejected": -2.790116786956787, |
|
"logps/chosen": -345.28887939453125, |
|
"logps/rejected": -265.2372741699219, |
|
"loss": 0.6828, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.03097311221063137, |
|
"rewards/margins": 0.02140369825065136, |
|
"rewards/margins_max": 0.042990267276763916, |
|
"rewards/margins_min": 0.005240675527602434, |
|
"rewards/margins_std": 0.017378441989421844, |
|
"rewards/rejected": 0.009569412097334862, |
|
"step": 120 |
|
}, |
|
{ |
|
"dpo_losses": 0.679545521736145, |
|
"epoch": 0.37, |
|
"grad_norm": 1.8392631764823357, |
|
"learning_rate": 4.992892309373227e-07, |
|
"logits/chosen": -2.8388783931732178, |
|
"logits/rejected": -2.7566184997558594, |
|
"logps/chosen": -378.5120849609375, |
|
"logps/rejected": -299.33154296875, |
|
"loss": 0.6796, |
|
"positive_losses": 0.00618324289098382, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 0.0385219044983387, |
|
"rewards/margins": 0.027528375387191772, |
|
"rewards/margins_max": 0.05331053584814072, |
|
"rewards/margins_min": 0.003396461484953761, |
|
"rewards/margins_std": 0.021965984255075455, |
|
"rewards/rejected": 0.01099353563040495, |
|
"step": 130 |
|
}, |
|
{ |
|
"dpo_losses": 0.6778298020362854, |
|
"epoch": 0.39, |
|
"grad_norm": 1.4924163642849055, |
|
"learning_rate": 4.985375442281968e-07, |
|
"logits/chosen": -2.791003704071045, |
|
"logits/rejected": -2.751797914505005, |
|
"logps/chosen": -314.1080322265625, |
|
"logps/rejected": -268.6662902832031, |
|
"loss": 0.6798, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 0.04306010901927948, |
|
"rewards/margins": 0.03122769668698311, |
|
"rewards/margins_max": 0.06796383112668991, |
|
"rewards/margins_min": 0.004374162759631872, |
|
"rewards/margins_std": 0.02954345941543579, |
|
"rewards/rejected": 0.011832410469651222, |
|
"step": 140 |
|
}, |
|
{ |
|
"dpo_losses": 0.6768456101417542, |
|
"epoch": 0.42, |
|
"grad_norm": 2.003305207476629, |
|
"learning_rate": 4.975186049985817e-07, |
|
"logits/chosen": -2.7922985553741455, |
|
"logits/rejected": -2.752359628677368, |
|
"logps/chosen": -284.1427307128906, |
|
"logps/rejected": -211.85299682617188, |
|
"loss": 0.6759, |
|
"positive_losses": 0.000370025634765625, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 0.04542272537946701, |
|
"rewards/margins": 0.033125050365924835, |
|
"rewards/margins_max": 0.06794625520706177, |
|
"rewards/margins_min": 0.004188986029475927, |
|
"rewards/margins_std": 0.029155880212783813, |
|
"rewards/rejected": 0.012297680601477623, |
|
"step": 150 |
|
}, |
|
{ |
|
"dpo_losses": 0.6737908720970154, |
|
"epoch": 0.45, |
|
"grad_norm": 1.673622074152846, |
|
"learning_rate": 4.962335089142375e-07, |
|
"logits/chosen": -2.770982265472412, |
|
"logits/rejected": -2.716006278991699, |
|
"logps/chosen": -295.1300048828125, |
|
"logps/rejected": -232.91378784179688, |
|
"loss": 0.6732, |
|
"positive_losses": 0.00577545166015625, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.054168350994586945, |
|
"rewards/margins": 0.03937327116727829, |
|
"rewards/margins_max": 0.0770280510187149, |
|
"rewards/margins_min": 0.005630848463624716, |
|
"rewards/margins_std": 0.03246045857667923, |
|
"rewards/rejected": 0.014795074239373207, |
|
"step": 160 |
|
}, |
|
{ |
|
"dpo_losses": 0.6671137809753418, |
|
"epoch": 0.48, |
|
"grad_norm": 1.849316679565777, |
|
"learning_rate": 4.946836378394966e-07, |
|
"logits/chosen": -2.8234620094299316, |
|
"logits/rejected": -2.76485276222229, |
|
"logps/chosen": -323.1846923828125, |
|
"logps/rejected": -295.15374755859375, |
|
"loss": 0.6687, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": 0.06436900794506073, |
|
"rewards/margins": 0.05324960872530937, |
|
"rewards/margins_max": 0.1005321741104126, |
|
"rewards/margins_min": 0.010768366977572441, |
|
"rewards/margins_std": 0.04033435881137848, |
|
"rewards/rejected": 0.011119391769170761, |
|
"step": 170 |
|
}, |
|
{ |
|
"dpo_losses": 0.6649172902107239, |
|
"epoch": 0.51, |
|
"grad_norm": 1.9724747678160601, |
|
"learning_rate": 4.92870658351344e-07, |
|
"logits/chosen": -2.815851926803589, |
|
"logits/rejected": -2.727149248123169, |
|
"logps/chosen": -324.7388916015625, |
|
"logps/rejected": -292.8143615722656, |
|
"loss": 0.6646, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.07161912322044373, |
|
"rewards/margins": 0.057913679629564285, |
|
"rewards/margins_max": 0.1177336573600769, |
|
"rewards/margins_min": 0.01562586799263954, |
|
"rewards/margins_std": 0.04572517052292824, |
|
"rewards/rejected": 0.013705444522202015, |
|
"step": 180 |
|
}, |
|
{ |
|
"dpo_losses": 0.6602508425712585, |
|
"epoch": 0.54, |
|
"grad_norm": 2.366543924703805, |
|
"learning_rate": 4.90796519947347e-07, |
|
"logits/chosen": -2.776212215423584, |
|
"logits/rejected": -2.6835803985595703, |
|
"logps/chosen": -313.767578125, |
|
"logps/rejected": -236.1117706298828, |
|
"loss": 0.6621, |
|
"positive_losses": 0.008851242251694202, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 0.08193298429250717, |
|
"rewards/margins": 0.0681738331913948, |
|
"rewards/margins_max": 0.1408631056547165, |
|
"rewards/margins_min": 0.011917511001229286, |
|
"rewards/margins_std": 0.059797000139951706, |
|
"rewards/rejected": 0.013759145513176918, |
|
"step": 190 |
|
}, |
|
{ |
|
"dpo_losses": 0.659784197807312, |
|
"epoch": 0.56, |
|
"grad_norm": 8.125986362153837, |
|
"learning_rate": 4.88463452949359e-07, |
|
"logits/chosen": -2.851494312286377, |
|
"logits/rejected": -2.774346113204956, |
|
"logps/chosen": -330.9751281738281, |
|
"logps/rejected": -263.9167175292969, |
|
"loss": 0.6627, |
|
"positive_losses": 0.008233070373535156, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.08557725697755814, |
|
"rewards/margins": 0.06877057254314423, |
|
"rewards/margins_max": 0.13887056708335876, |
|
"rewards/margins_min": 0.01435057632625103, |
|
"rewards/margins_std": 0.05443096160888672, |
|
"rewards/rejected": 0.016806693747639656, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"eval_dpo_losses": 0.6858174800872803, |
|
"eval_logits/chosen": -2.7871391773223877, |
|
"eval_logits/rejected": -2.7485153675079346, |
|
"eval_logps/chosen": -279.9480895996094, |
|
"eval_logps/rejected": -255.46397399902344, |
|
"eval_loss": 0.6995190382003784, |
|
"eval_positive_losses": 0.1222764253616333, |
|
"eval_rewards/accuracies": 0.5960000157356262, |
|
"eval_rewards/chosen": 0.04645317792892456, |
|
"eval_rewards/margins": 0.015303360298275948, |
|
"eval_rewards/margins_max": 0.08988457173109055, |
|
"eval_rewards/margins_min": -0.049583785235881805, |
|
"eval_rewards/margins_std": 0.04650707170367241, |
|
"eval_rewards/rejected": 0.031149819493293762, |
|
"eval_runtime": 428.2785, |
|
"eval_samples_per_second": 4.67, |
|
"eval_steps_per_second": 0.292, |
|
"step": 200 |
|
}, |
|
{ |
|
"dpo_losses": 0.6529717445373535, |
|
"epoch": 0.59, |
|
"grad_norm": 1.9509652365320336, |
|
"learning_rate": 4.858739661052539e-07, |
|
"logits/chosen": -2.7506725788116455, |
|
"logits/rejected": -2.722893238067627, |
|
"logps/chosen": -310.4784240722656, |
|
"logps/rejected": -255.10678100585938, |
|
"loss": 0.6562, |
|
"positive_losses": 0.017461013048887253, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.09705875813961029, |
|
"rewards/margins": 0.08387880027294159, |
|
"rewards/margins_max": 0.17185379564762115, |
|
"rewards/margins_min": 0.010865801945328712, |
|
"rewards/margins_std": 0.07355490326881409, |
|
"rewards/rejected": 0.013179955072700977, |
|
"step": 210 |
|
}, |
|
{ |
|
"dpo_losses": 0.6484541893005371, |
|
"epoch": 0.62, |
|
"grad_norm": 1.8456619177361002, |
|
"learning_rate": 4.830308438912687e-07, |
|
"logits/chosen": -2.8496181964874268, |
|
"logits/rejected": -2.7542800903320312, |
|
"logps/chosen": -342.6438293457031, |
|
"logps/rejected": -272.774169921875, |
|
"loss": 0.6524, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.10812418162822723, |
|
"rewards/margins": 0.0925908237695694, |
|
"rewards/margins_max": 0.1654980480670929, |
|
"rewards/margins_min": 0.022400889545679092, |
|
"rewards/margins_std": 0.06456473469734192, |
|
"rewards/rejected": 0.015533369965851307, |
|
"step": 220 |
|
}, |
|
{ |
|
"dpo_losses": 0.6464593410491943, |
|
"epoch": 0.65, |
|
"grad_norm": 1.9535872255784013, |
|
"learning_rate": 4.799371435178545e-07, |
|
"logits/chosen": -2.849377393722534, |
|
"logits/rejected": -2.7559587955474854, |
|
"logps/chosen": -360.3794860839844, |
|
"logps/rejected": -260.52764892578125, |
|
"loss": 0.6444, |
|
"positive_losses": 0.015559768304228783, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.1129622682929039, |
|
"rewards/margins": 0.09730460494756699, |
|
"rewards/margins_max": 0.18746943771839142, |
|
"rewards/margins_min": 0.02147417888045311, |
|
"rewards/margins_std": 0.07491712272167206, |
|
"rewards/rejected": 0.015657661482691765, |
|
"step": 230 |
|
}, |
|
{ |
|
"dpo_losses": 0.653369128704071, |
|
"epoch": 0.68, |
|
"grad_norm": 1.576015913761948, |
|
"learning_rate": 4.765961916422574e-07, |
|
"logits/chosen": -2.7950048446655273, |
|
"logits/rejected": -2.725090742111206, |
|
"logps/chosen": -314.9557800292969, |
|
"logps/rejected": -281.58880615234375, |
|
"loss": 0.6537, |
|
"positive_losses": 0.016252517700195312, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.09841253608465195, |
|
"rewards/margins": 0.08255226910114288, |
|
"rewards/margins_max": 0.16673865914344788, |
|
"rewards/margins_min": 0.015284004621207714, |
|
"rewards/margins_std": 0.06914026290178299, |
|
"rewards/rejected": 0.015860268846154213, |
|
"step": 240 |
|
}, |
|
{ |
|
"dpo_losses": 0.6478245258331299, |
|
"epoch": 0.7, |
|
"grad_norm": 1.8968881320655915, |
|
"learning_rate": 4.730115807913626e-07, |
|
"logits/chosen": -2.792325258255005, |
|
"logits/rejected": -2.715862989425659, |
|
"logps/chosen": -300.74859619140625, |
|
"logps/rejected": -249.9875030517578, |
|
"loss": 0.6546, |
|
"positive_losses": 0.023810099810361862, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.1053255945444107, |
|
"rewards/margins": 0.09475791454315186, |
|
"rewards/margins_max": 0.19134044647216797, |
|
"rewards/margins_min": 0.01620624028146267, |
|
"rewards/margins_std": 0.08089859038591385, |
|
"rewards/rejected": 0.01056766789406538, |
|
"step": 250 |
|
}, |
|
{ |
|
"dpo_losses": 0.6441227197647095, |
|
"epoch": 0.73, |
|
"grad_norm": 1.7119013888667354, |
|
"learning_rate": 4.691871654986485e-07, |
|
"logits/chosen": -2.839354991912842, |
|
"logits/rejected": -2.756789445877075, |
|
"logps/chosen": -280.3956298828125, |
|
"logps/rejected": -257.5377197265625, |
|
"loss": 0.6395, |
|
"positive_losses": 0.004910898394882679, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.11697886139154434, |
|
"rewards/margins": 0.10304615646600723, |
|
"rewards/margins_max": 0.20939341187477112, |
|
"rewards/margins_min": 0.02202121913433075, |
|
"rewards/margins_std": 0.08740874379873276, |
|
"rewards/rejected": 0.01393270492553711, |
|
"step": 260 |
|
}, |
|
{ |
|
"dpo_losses": 0.6456023454666138, |
|
"epoch": 0.76, |
|
"grad_norm": 1.984892770095366, |
|
"learning_rate": 4.6512705815940536e-07, |
|
"logits/chosen": -2.783090591430664, |
|
"logits/rejected": -2.730664014816284, |
|
"logps/chosen": -297.0513000488281, |
|
"logps/rejected": -260.38958740234375, |
|
"loss": 0.6522, |
|
"positive_losses": 0.08494539558887482, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 0.12064293771982193, |
|
"rewards/margins": 0.09954321384429932, |
|
"rewards/margins_max": 0.18979784846305847, |
|
"rewards/margins_min": 0.028997600078582764, |
|
"rewards/margins_std": 0.07252895832061768, |
|
"rewards/rejected": 0.021099697798490524, |
|
"step": 270 |
|
}, |
|
{ |
|
"dpo_losses": 0.6418065428733826, |
|
"epoch": 0.79, |
|
"grad_norm": 1.7581310746474135, |
|
"learning_rate": 4.6083562460867544e-07, |
|
"logits/chosen": -2.75692081451416, |
|
"logits/rejected": -2.717991590499878, |
|
"logps/chosen": -290.9193420410156, |
|
"logps/rejected": -270.82281494140625, |
|
"loss": 0.6449, |
|
"positive_losses": 0.07803992927074432, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.1253766566514969, |
|
"rewards/margins": 0.10763327777385712, |
|
"rewards/margins_max": 0.20058684051036835, |
|
"rewards/margins_min": 0.02709307335317135, |
|
"rewards/margins_std": 0.07942862808704376, |
|
"rewards/rejected": 0.017743363976478577, |
|
"step": 280 |
|
}, |
|
{ |
|
"dpo_losses": 0.6451443433761597, |
|
"epoch": 0.82, |
|
"grad_norm": 1.741728444197785, |
|
"learning_rate": 4.563174794266683e-07, |
|
"logits/chosen": -2.7625350952148438, |
|
"logits/rejected": -2.7133007049560547, |
|
"logps/chosen": -275.4216003417969, |
|
"logps/rejected": -262.64422607421875, |
|
"loss": 0.655, |
|
"positive_losses": 0.12835732102394104, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.11494598537683487, |
|
"rewards/margins": 0.10120587050914764, |
|
"rewards/margins_max": 0.21776075661182404, |
|
"rewards/margins_min": 0.010339265689253807, |
|
"rewards/margins_std": 0.09577289968729019, |
|
"rewards/rejected": 0.013740080408751965, |
|
"step": 290 |
|
}, |
|
{ |
|
"dpo_losses": 0.6270988583564758, |
|
"epoch": 0.85, |
|
"grad_norm": 1.8935337862274761, |
|
"learning_rate": 4.515774809767012e-07, |
|
"logits/chosen": -2.8255112171173096, |
|
"logits/rejected": -2.748577833175659, |
|
"logps/chosen": -331.1516418457031, |
|
"logps/rejected": -288.11700439453125, |
|
"loss": 0.6293, |
|
"positive_losses": 0.02850809134542942, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.1536138951778412, |
|
"rewards/margins": 0.1404499113559723, |
|
"rewards/margins_max": 0.27252131700515747, |
|
"rewards/margins_min": 0.027482062578201294, |
|
"rewards/margins_std": 0.10937514156103134, |
|
"rewards/rejected": 0.01316398847848177, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"eval_dpo_losses": 0.6802704930305481, |
|
"eval_logits/chosen": -2.768414258956909, |
|
"eval_logits/rejected": -2.730605363845825, |
|
"eval_logps/chosen": -277.8385009765625, |
|
"eval_logps/rejected": -254.60328674316406, |
|
"eval_loss": 0.7193401455879211, |
|
"eval_positive_losses": 0.35523319244384766, |
|
"eval_rewards/accuracies": 0.5960000157356262, |
|
"eval_rewards/chosen": 0.06754905730485916, |
|
"eval_rewards/margins": 0.027792593464255333, |
|
"eval_rewards/margins_max": 0.16008007526397705, |
|
"eval_rewards/margins_min": -0.08630798757076263, |
|
"eval_rewards/margins_std": 0.08258534967899323, |
|
"eval_rewards/rejected": 0.03975646197795868, |
|
"eval_runtime": 428.0151, |
|
"eval_samples_per_second": 4.673, |
|
"eval_steps_per_second": 0.292, |
|
"step": 300 |
|
}, |
|
{ |
|
"dpo_losses": 0.6279357671737671, |
|
"epoch": 0.87, |
|
"grad_norm": 1.7270459495090738, |
|
"learning_rate": 4.4662072618099887e-07, |
|
"logits/chosen": -2.820918560028076, |
|
"logits/rejected": -2.7266643047332764, |
|
"logps/chosen": -282.9930725097656, |
|
"logps/rejected": -213.27236938476562, |
|
"loss": 0.6267, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.14822781085968018, |
|
"rewards/margins": 0.1387120932340622, |
|
"rewards/margins_max": 0.27559134364128113, |
|
"rewards/margins_min": 0.03083965554833412, |
|
"rewards/margins_std": 0.10963471978902817, |
|
"rewards/rejected": 0.009515730664134026, |
|
"step": 310 |
|
}, |
|
{ |
|
"dpo_losses": 0.6319230794906616, |
|
"epoch": 0.9, |
|
"grad_norm": 7.736909208406228, |
|
"learning_rate": 4.414525450399712e-07, |
|
"logits/chosen": -2.78419828414917, |
|
"logits/rejected": -2.72727370262146, |
|
"logps/chosen": -294.29937744140625, |
|
"logps/rejected": -259.6752014160156, |
|
"loss": 0.6334, |
|
"positive_losses": 0.07355575263500214, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.13973799347877502, |
|
"rewards/margins": 0.12978938221931458, |
|
"rewards/margins_max": 0.26664164662361145, |
|
"rewards/margins_min": 0.02372616156935692, |
|
"rewards/margins_std": 0.10929105430841446, |
|
"rewards/rejected": 0.009948636405169964, |
|
"step": 320 |
|
}, |
|
{ |
|
"dpo_losses": 0.6180365085601807, |
|
"epoch": 0.93, |
|
"grad_norm": 1.9077047206667683, |
|
"learning_rate": 4.360784949008615e-07, |
|
"logits/chosen": -2.8559248447418213, |
|
"logits/rejected": -2.775313138961792, |
|
"logps/chosen": -338.38507080078125, |
|
"logps/rejected": -267.0349426269531, |
|
"loss": 0.6252, |
|
"positive_losses": 0.016361523419618607, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 0.15934768319129944, |
|
"rewards/margins": 0.16030099987983704, |
|
"rewards/margins_max": 0.2862982153892517, |
|
"rewards/margins_min": 0.039303008466959, |
|
"rewards/margins_std": 0.11248087882995605, |
|
"rewards/rejected": -0.0009533234988339245, |
|
"step": 330 |
|
}, |
|
{ |
|
"dpo_losses": 0.6223454475402832, |
|
"epoch": 0.96, |
|
"grad_norm": 1.835838607397251, |
|
"learning_rate": 4.305043544819289e-07, |
|
"logits/chosen": -2.741628646850586, |
|
"logits/rejected": -2.690565586090088, |
|
"logps/chosen": -283.5008239746094, |
|
"logps/rejected": -249.989013671875, |
|
"loss": 0.621, |
|
"positive_losses": 0.06820764392614365, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.16344675421714783, |
|
"rewards/margins": 0.15164029598236084, |
|
"rewards/margins_max": 0.32016128301620483, |
|
"rewards/margins_min": 0.039167340844869614, |
|
"rewards/margins_std": 0.12687677145004272, |
|
"rewards/rejected": 0.011806446127593517, |
|
"step": 340 |
|
}, |
|
{ |
|
"dpo_losses": 0.6281483173370361, |
|
"epoch": 0.99, |
|
"grad_norm": 5.3160905260769695, |
|
"learning_rate": 4.247361176585903e-07, |
|
"logits/chosen": -2.7237563133239746, |
|
"logits/rejected": -2.6812942028045654, |
|
"logps/chosen": -300.01416015625, |
|
"logps/rejected": -285.3011779785156, |
|
"loss": 0.6314, |
|
"positive_losses": 0.09978675842285156, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.1494494080543518, |
|
"rewards/margins": 0.1379416286945343, |
|
"rewards/margins_max": 0.26943984627723694, |
|
"rewards/margins_min": 0.03328308090567589, |
|
"rewards/margins_std": 0.10527817159891129, |
|
"rewards/rejected": 0.011507781222462654, |
|
"step": 350 |
|
}, |
|
{ |
|
"dpo_losses": 0.6205986142158508, |
|
"epoch": 1.01, |
|
"grad_norm": 1.8675515047500797, |
|
"learning_rate": 4.187799870182038e-07, |
|
"logits/chosen": -2.76216983795166, |
|
"logits/rejected": -2.704974412918091, |
|
"logps/chosen": -284.5892028808594, |
|
"logps/rejected": -252.9727783203125, |
|
"loss": 0.6217, |
|
"positive_losses": 0.02007141150534153, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.1552152931690216, |
|
"rewards/margins": 0.15586945414543152, |
|
"rewards/margins_max": 0.3017035126686096, |
|
"rewards/margins_min": 0.036038514226675034, |
|
"rewards/margins_std": 0.11902729421854019, |
|
"rewards/rejected": -0.0006541303591802716, |
|
"step": 360 |
|
}, |
|
{ |
|
"dpo_losses": 0.6088902354240417, |
|
"epoch": 1.04, |
|
"grad_norm": 1.9531792595002184, |
|
"learning_rate": 4.126423671904236e-07, |
|
"logits/chosen": -2.7716732025146484, |
|
"logits/rejected": -2.7078254222869873, |
|
"logps/chosen": -336.2352600097656, |
|
"logps/rejected": -244.32177734375, |
|
"loss": 0.6125, |
|
"positive_losses": 0.02638239786028862, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.17914316058158875, |
|
"rewards/margins": 0.18257828056812286, |
|
"rewards/margins_max": 0.3468923568725586, |
|
"rewards/margins_min": 0.04474998638033867, |
|
"rewards/margins_std": 0.1425740271806717, |
|
"rewards/rejected": -0.003435105085372925, |
|
"step": 370 |
|
}, |
|
{ |
|
"dpo_losses": 0.6105533838272095, |
|
"epoch": 1.07, |
|
"grad_norm": 1.9479255132083297, |
|
"learning_rate": 4.0632985796030007e-07, |
|
"logits/chosen": -2.7388248443603516, |
|
"logits/rejected": -2.6531803607940674, |
|
"logps/chosen": -268.4936218261719, |
|
"logps/rejected": -236.57095336914062, |
|
"loss": 0.61, |
|
"positive_losses": 0.008747863583266735, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.1682673990726471, |
|
"rewards/margins": 0.17755943536758423, |
|
"rewards/margins_max": 0.3241942226886749, |
|
"rewards/margins_min": 0.05380522087216377, |
|
"rewards/margins_std": 0.125107079744339, |
|
"rewards/rejected": -0.00929203350096941, |
|
"step": 380 |
|
}, |
|
{ |
|
"dpo_losses": 0.6172813773155212, |
|
"epoch": 1.1, |
|
"grad_norm": 8.407134849747322, |
|
"learning_rate": 3.9984924717152713e-07, |
|
"logits/chosen": -2.7552475929260254, |
|
"logits/rejected": -2.7269043922424316, |
|
"logps/chosen": -285.97760009765625, |
|
"logps/rejected": -262.98431396484375, |
|
"loss": 0.6282, |
|
"positive_losses": 0.06859800964593887, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.15436658263206482, |
|
"rewards/margins": 0.1631786823272705, |
|
"rewards/margins_max": 0.3312760889530182, |
|
"rewards/margins_min": 0.041763290762901306, |
|
"rewards/margins_std": 0.13355527818202972, |
|
"rewards/rejected": -0.008812101557850838, |
|
"step": 390 |
|
}, |
|
{ |
|
"dpo_losses": 0.6174243688583374, |
|
"epoch": 1.13, |
|
"grad_norm": 3.651013920669953, |
|
"learning_rate": 3.932075034274723e-07, |
|
"logits/chosen": -2.758373975753784, |
|
"logits/rejected": -2.6981711387634277, |
|
"logps/chosen": -276.49871826171875, |
|
"logps/rejected": -245.08224487304688, |
|
"loss": 0.6236, |
|
"positive_losses": 0.0625574141740799, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.15731404721736908, |
|
"rewards/margins": 0.16287846863269806, |
|
"rewards/margins_max": 0.3262539505958557, |
|
"rewards/margins_min": 0.043569326400756836, |
|
"rewards/margins_std": 0.13064977526664734, |
|
"rewards/rejected": -0.005564402788877487, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"eval_dpo_losses": 0.6756274700164795, |
|
"eval_logits/chosen": -2.749445915222168, |
|
"eval_logits/rejected": -2.711932420730591, |
|
"eval_logps/chosen": -276.5967712402344, |
|
"eval_logps/rejected": -254.45849609375, |
|
"eval_loss": 0.7519370317459106, |
|
"eval_positive_losses": 0.6894010305404663, |
|
"eval_rewards/accuracies": 0.609000027179718, |
|
"eval_rewards/chosen": 0.07996628433465958, |
|
"eval_rewards/margins": 0.03876199945807457, |
|
"eval_rewards/margins_max": 0.21824941039085388, |
|
"eval_rewards/margins_min": -0.11400224268436432, |
|
"eval_rewards/margins_std": 0.11133058369159698, |
|
"eval_rewards/rejected": 0.0412042960524559, |
|
"eval_runtime": 427.9231, |
|
"eval_samples_per_second": 4.674, |
|
"eval_steps_per_second": 0.292, |
|
"step": 400 |
|
}, |
|
{ |
|
"dpo_losses": 0.6167701482772827, |
|
"epoch": 1.15, |
|
"grad_norm": 1.9654856670345158, |
|
"learning_rate": 3.8641176859783383e-07, |
|
"logits/chosen": -2.7697155475616455, |
|
"logits/rejected": -2.707765817642212, |
|
"logps/chosen": -278.96673583984375, |
|
"logps/rejected": -248.4454345703125, |
|
"loss": 0.6213, |
|
"positive_losses": 0.21929892897605896, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 0.17709851264953613, |
|
"rewards/margins": 0.16643425822257996, |
|
"rewards/margins_max": 0.3442317843437195, |
|
"rewards/margins_min": 0.015078430995345116, |
|
"rewards/margins_std": 0.15533670783042908, |
|
"rewards/rejected": 0.010664242319762707, |
|
"step": 410 |
|
}, |
|
{ |
|
"dpo_losses": 0.6068592071533203, |
|
"epoch": 1.18, |
|
"grad_norm": 2.3100164325344315, |
|
"learning_rate": 3.7946935013898606e-07, |
|
"logits/chosen": -2.7368392944335938, |
|
"logits/rejected": -2.677018642425537, |
|
"logps/chosen": -298.12542724609375, |
|
"logps/rejected": -266.35406494140625, |
|
"loss": 0.6085, |
|
"positive_losses": 0.038385771214962006, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.19851230084896088, |
|
"rewards/margins": 0.18865473568439484, |
|
"rewards/margins_max": 0.4086156487464905, |
|
"rewards/margins_min": 0.0322859063744545, |
|
"rewards/margins_std": 0.16774147748947144, |
|
"rewards/rejected": 0.00985757913440466, |
|
"step": 420 |
|
}, |
|
{ |
|
"dpo_losses": 0.6006068587303162, |
|
"epoch": 1.21, |
|
"grad_norm": 6.525192737290529, |
|
"learning_rate": 3.7238771323626817e-07, |
|
"logits/chosen": -2.768768787384033, |
|
"logits/rejected": -2.681751012802124, |
|
"logps/chosen": -323.71514892578125, |
|
"logps/rejected": -260.93621826171875, |
|
"loss": 0.6082, |
|
"positive_losses": 0.09947166591882706, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.1950441300868988, |
|
"rewards/margins": 0.2026427984237671, |
|
"rewards/margins_max": 0.3679996430873871, |
|
"rewards/margins_min": 0.06226770952343941, |
|
"rewards/margins_std": 0.1420767605304718, |
|
"rewards/rejected": -0.007598669268190861, |
|
"step": 430 |
|
}, |
|
{ |
|
"dpo_losses": 0.6032605171203613, |
|
"epoch": 1.24, |
|
"grad_norm": 1.6385345947087144, |
|
"learning_rate": 3.651744727766676e-07, |
|
"logits/chosen": -2.768833637237549, |
|
"logits/rejected": -2.702613353729248, |
|
"logps/chosen": -273.583740234375, |
|
"logps/rejected": -246.775390625, |
|
"loss": 0.6049, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 0.17607031762599945, |
|
"rewards/margins": 0.1937752068042755, |
|
"rewards/margins_max": 0.34389567375183105, |
|
"rewards/margins_min": 0.06312290579080582, |
|
"rewards/margins_std": 0.1278633177280426, |
|
"rewards/rejected": -0.017704878002405167, |
|
"step": 440 |
|
}, |
|
{ |
|
"dpo_losses": 0.6007322072982788, |
|
"epoch": 1.27, |
|
"grad_norm": 1.9767106796026328, |
|
"learning_rate": 3.5783738516052897e-07, |
|
"logits/chosen": -2.7184462547302246, |
|
"logits/rejected": -2.658703327178955, |
|
"logps/chosen": -294.31878662109375, |
|
"logps/rejected": -261.46221923828125, |
|
"loss": 0.6054, |
|
"positive_losses": 0.05530524253845215, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.17491844296455383, |
|
"rewards/margins": 0.20017877221107483, |
|
"rewards/margins_max": 0.37646952271461487, |
|
"rewards/margins_min": 0.05014311149716377, |
|
"rewards/margins_std": 0.14955812692642212, |
|
"rewards/rejected": -0.02526034787297249, |
|
"step": 450 |
|
}, |
|
{ |
|
"dpo_losses": 0.5913182497024536, |
|
"epoch": 1.3, |
|
"grad_norm": 1.9303555587009904, |
|
"learning_rate": 3.5038433996109404e-07, |
|
"logits/chosen": -2.786879777908325, |
|
"logits/rejected": -2.722357749938965, |
|
"logps/chosen": -335.79766845703125, |
|
"logps/rejected": -299.84027099609375, |
|
"loss": 0.5936, |
|
"positive_losses": 0.03368115425109863, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.19536344707012177, |
|
"rewards/margins": 0.22316019237041473, |
|
"rewards/margins_max": 0.4146662652492523, |
|
"rewards/margins_min": 0.05830659344792366, |
|
"rewards/margins_std": 0.1629330813884735, |
|
"rewards/rejected": -0.027796756476163864, |
|
"step": 460 |
|
}, |
|
{ |
|
"dpo_losses": 0.5880565643310547, |
|
"epoch": 1.32, |
|
"grad_norm": 5.289024274003296, |
|
"learning_rate": 3.428233514408398e-07, |
|
"logits/chosen": -2.7258143424987793, |
|
"logits/rejected": -2.703859806060791, |
|
"logps/chosen": -296.7102355957031, |
|
"logps/rejected": -250.7404022216797, |
|
"loss": 0.6047, |
|
"positive_losses": 0.15512129664421082, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.19506558775901794, |
|
"rewards/margins": 0.23098190128803253, |
|
"rewards/margins_max": 0.4174633026123047, |
|
"rewards/margins_min": 0.0681539997458458, |
|
"rewards/margins_std": 0.15801438689231873, |
|
"rewards/rejected": -0.0359162911772728, |
|
"step": 470 |
|
}, |
|
{ |
|
"dpo_losses": 0.5843189358711243, |
|
"epoch": 1.35, |
|
"grad_norm": 1.5003059393102265, |
|
"learning_rate": 3.3516254993373945e-07, |
|
"logits/chosen": -2.755491018295288, |
|
"logits/rejected": -2.681509256362915, |
|
"logps/chosen": -311.5091247558594, |
|
"logps/rejected": -245.88937377929688, |
|
"loss": 0.5939, |
|
"positive_losses": 0.055457282811403275, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.19728389382362366, |
|
"rewards/margins": 0.23806998133659363, |
|
"rewards/margins_max": 0.4085385203361511, |
|
"rewards/margins_min": 0.06948045641183853, |
|
"rewards/margins_std": 0.15313410758972168, |
|
"rewards/rejected": -0.04078609496355057, |
|
"step": 480 |
|
}, |
|
{ |
|
"dpo_losses": 0.588597297668457, |
|
"epoch": 1.38, |
|
"grad_norm": 2.2336443793585277, |
|
"learning_rate": 3.274101731027105e-07, |
|
"logits/chosen": -2.709615707397461, |
|
"logits/rejected": -2.663390636444092, |
|
"logps/chosen": -318.6739807128906, |
|
"logps/rejected": -286.79541015625, |
|
"loss": 0.593, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.20303764939308167, |
|
"rewards/margins": 0.23158419132232666, |
|
"rewards/margins_max": 0.457578182220459, |
|
"rewards/margins_min": 0.04808598756790161, |
|
"rewards/margins_std": 0.18310314416885376, |
|
"rewards/rejected": -0.028546560555696487, |
|
"step": 490 |
|
}, |
|
{ |
|
"dpo_losses": 0.598129153251648, |
|
"epoch": 1.41, |
|
"grad_norm": 1.719544868467305, |
|
"learning_rate": 3.1957455708165314e-07, |
|
"logits/chosen": -2.6921727657318115, |
|
"logits/rejected": -2.678170680999756, |
|
"logps/chosen": -280.46063232421875, |
|
"logps/rejected": -249.769287109375, |
|
"loss": 0.6009, |
|
"positive_losses": 0.036452483385801315, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.17304545640945435, |
|
"rewards/margins": 0.2069230079650879, |
|
"rewards/margins_max": 0.40413007140159607, |
|
"rewards/margins_min": 0.054103028029203415, |
|
"rewards/margins_std": 0.15941500663757324, |
|
"rewards/rejected": -0.03387756645679474, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"eval_dpo_losses": 0.6718186140060425, |
|
"eval_logits/chosen": -2.7294745445251465, |
|
"eval_logits/rejected": -2.692014694213867, |
|
"eval_logps/chosen": -278.2060852050781, |
|
"eval_logps/rejected": -257.0343322753906, |
|
"eval_loss": 0.8434445858001709, |
|
"eval_positive_losses": 1.5495002269744873, |
|
"eval_rewards/accuracies": 0.609000027179718, |
|
"eval_rewards/chosen": 0.06387308984994888, |
|
"eval_rewards/margins": 0.04842698201537132, |
|
"eval_rewards/margins_max": 0.2708706855773926, |
|
"eval_rewards/margins_min": -0.1440476030111313, |
|
"eval_rewards/margins_std": 0.1388508826494217, |
|
"eval_rewards/rejected": 0.015446108765900135, |
|
"eval_runtime": 427.8728, |
|
"eval_samples_per_second": 4.674, |
|
"eval_steps_per_second": 0.292, |
|
"step": 500 |
|
}, |
|
{ |
|
"dpo_losses": 0.5843192338943481, |
|
"epoch": 1.44, |
|
"grad_norm": 3.509981635934115, |
|
"learning_rate": 3.116641275116018e-07, |
|
"logits/chosen": -2.7144105434417725, |
|
"logits/rejected": -2.628718614578247, |
|
"logps/chosen": -321.0252380371094, |
|
"logps/rejected": -263.8638000488281, |
|
"loss": 0.5918, |
|
"positive_losses": 0.06754092872142792, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.1961548924446106, |
|
"rewards/margins": 0.24090898036956787, |
|
"rewards/margins_max": 0.46518999338150024, |
|
"rewards/margins_min": 0.07308268547058105, |
|
"rewards/margins_std": 0.1766786277294159, |
|
"rewards/rejected": -0.04475412517786026, |
|
"step": 510 |
|
}, |
|
{ |
|
"dpo_losses": 0.5933550596237183, |
|
"epoch": 1.46, |
|
"grad_norm": 1.7887167117515541, |
|
"learning_rate": 3.036873904806295e-07, |
|
"logits/chosen": -2.7657430171966553, |
|
"logits/rejected": -2.6845974922180176, |
|
"logps/chosen": -281.34649658203125, |
|
"logps/rejected": -241.7858123779297, |
|
"loss": 0.6137, |
|
"positive_losses": 0.2598763406276703, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 0.19071096181869507, |
|
"rewards/margins": 0.22014352679252625, |
|
"rewards/margins_max": 0.44077128171920776, |
|
"rewards/margins_min": 0.0637890174984932, |
|
"rewards/margins_std": 0.17364180088043213, |
|
"rewards/rejected": -0.02943255938589573, |
|
"step": 520 |
|
}, |
|
{ |
|
"dpo_losses": 0.5821532011032104, |
|
"epoch": 1.49, |
|
"grad_norm": 10.11338724174373, |
|
"learning_rate": 2.956529233772492e-07, |
|
"logits/chosen": -2.769479513168335, |
|
"logits/rejected": -2.7147421836853027, |
|
"logps/chosen": -314.7546081542969, |
|
"logps/rejected": -300.4794921875, |
|
"loss": 0.618, |
|
"positive_losses": 0.35480666160583496, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 0.19147683680057526, |
|
"rewards/margins": 0.2443142831325531, |
|
"rewards/margins_max": 0.41471806168556213, |
|
"rewards/margins_min": 0.07710012048482895, |
|
"rewards/margins_std": 0.14999482035636902, |
|
"rewards/rejected": -0.05283746123313904, |
|
"step": 530 |
|
}, |
|
{ |
|
"dpo_losses": 0.5760180950164795, |
|
"epoch": 1.52, |
|
"grad_norm": 1.9182115700216993, |
|
"learning_rate": 2.875693656671431e-07, |
|
"logits/chosen": -2.761521577835083, |
|
"logits/rejected": -2.678863286972046, |
|
"logps/chosen": -320.120849609375, |
|
"logps/rejected": -249.3918914794922, |
|
"loss": 0.5887, |
|
"positive_losses": 0.12351331859827042, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.2093885838985443, |
|
"rewards/margins": 0.26241612434387207, |
|
"rewards/margins_max": 0.5384218692779541, |
|
"rewards/margins_min": 0.05588661879301071, |
|
"rewards/margins_std": 0.2135990560054779, |
|
"rewards/rejected": -0.05302751809358597, |
|
"step": 540 |
|
}, |
|
{ |
|
"dpo_losses": 0.5680991411209106, |
|
"epoch": 1.55, |
|
"grad_norm": 1.947863946973032, |
|
"learning_rate": 2.794454096031429e-07, |
|
"logits/chosen": -2.7631947994232178, |
|
"logits/rejected": -2.695374011993408, |
|
"logps/chosen": -349.5855407714844, |
|
"logps/rejected": -319.5202941894531, |
|
"loss": 0.5909, |
|
"positive_losses": 0.10299448668956757, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 0.23324398696422577, |
|
"rewards/margins": 0.2784574627876282, |
|
"rewards/margins_max": 0.5278440713882446, |
|
"rewards/margins_min": 0.08051635324954987, |
|
"rewards/margins_std": 0.20143046975135803, |
|
"rewards/rejected": -0.04521343857049942, |
|
"step": 550 |
|
}, |
|
{ |
|
"dpo_losses": 0.5882316827774048, |
|
"epoch": 1.58, |
|
"grad_norm": 6.883043057999946, |
|
"learning_rate": 2.7128979087844593e-07, |
|
"logits/chosen": -2.733025312423706, |
|
"logits/rejected": -2.6712567806243896, |
|
"logps/chosen": -285.25115966796875, |
|
"logps/rejected": -269.1498107910156, |
|
"loss": 0.5847, |
|
"positive_losses": 0.1969757080078125, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.1997162401676178, |
|
"rewards/margins": 0.23153042793273926, |
|
"rewards/margins_max": 0.4535094201564789, |
|
"rewards/margins_min": 0.05927438288927078, |
|
"rewards/margins_std": 0.17876756191253662, |
|
"rewards/rejected": -0.03181419149041176, |
|
"step": 560 |
|
}, |
|
{ |
|
"dpo_losses": 0.5802090764045715, |
|
"epoch": 1.61, |
|
"grad_norm": 1.7758243312216686, |
|
"learning_rate": 2.6311127923312153e-07, |
|
"logits/chosen": -2.7622435092926025, |
|
"logits/rejected": -2.6936147212982178, |
|
"logps/chosen": -315.955322265625, |
|
"logps/rejected": -297.02313232421875, |
|
"loss": 0.5892, |
|
"positive_losses": 0.21824970841407776, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.1967122107744217, |
|
"rewards/margins": 0.2509615123271942, |
|
"rewards/margins_max": 0.4743427336215973, |
|
"rewards/margins_min": 0.049542300403118134, |
|
"rewards/margins_std": 0.19265086948871613, |
|
"rewards/rejected": -0.05424932390451431, |
|
"step": 570 |
|
}, |
|
{ |
|
"dpo_losses": 0.5920171737670898, |
|
"epoch": 1.63, |
|
"grad_norm": 1.860643226808477, |
|
"learning_rate": 2.5491866902400565e-07, |
|
"logits/chosen": -2.7078280448913574, |
|
"logits/rejected": -2.6779098510742188, |
|
"logps/chosen": -269.2068786621094, |
|
"logps/rejected": -271.0436096191406, |
|
"loss": 0.5991, |
|
"positive_losses": 0.12158451229333878, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.17235751450061798, |
|
"rewards/margins": 0.22299370169639587, |
|
"rewards/margins_max": 0.43409761786460876, |
|
"rewards/margins_min": 0.05049045756459236, |
|
"rewards/margins_std": 0.1718008816242218, |
|
"rewards/rejected": -0.05063622072339058, |
|
"step": 580 |
|
}, |
|
{ |
|
"dpo_losses": 0.5816795229911804, |
|
"epoch": 1.66, |
|
"grad_norm": 24.09510524317517, |
|
"learning_rate": 2.4672076976812543e-07, |
|
"logits/chosen": -2.720360279083252, |
|
"logits/rejected": -2.6442887783050537, |
|
"logps/chosen": -288.3584289550781, |
|
"logps/rejected": -267.7664794921875, |
|
"loss": 0.5961, |
|
"positive_losses": 0.3148724436759949, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.183799147605896, |
|
"rewards/margins": 0.24748222529888153, |
|
"rewards/margins_max": 0.4496613144874573, |
|
"rewards/margins_min": 0.06704072654247284, |
|
"rewards/margins_std": 0.17722192406654358, |
|
"rewards/rejected": -0.06368308514356613, |
|
"step": 590 |
|
}, |
|
{ |
|
"dpo_losses": 0.576720118522644, |
|
"epoch": 1.69, |
|
"grad_norm": 2.1497917207479507, |
|
"learning_rate": 2.385263966698222e-07, |
|
"logits/chosen": -2.728609085083008, |
|
"logits/rejected": -2.6675384044647217, |
|
"logps/chosen": -273.47369384765625, |
|
"logps/rejected": -296.99310302734375, |
|
"loss": 0.6136, |
|
"positive_losses": 0.32597580552101135, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.19901221990585327, |
|
"rewards/margins": 0.26053741574287415, |
|
"rewards/margins_max": 0.5135586261749268, |
|
"rewards/margins_min": 0.06456376612186432, |
|
"rewards/margins_std": 0.20331530272960663, |
|
"rewards/rejected": -0.06152517348527908, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"eval_dpo_losses": 0.6691358685493469, |
|
"eval_logits/chosen": -2.7203145027160645, |
|
"eval_logits/rejected": -2.682708263397217, |
|
"eval_logps/chosen": -277.7243957519531, |
|
"eval_logps/rejected": -257.23602294921875, |
|
"eval_loss": 0.8727347254753113, |
|
"eval_positive_losses": 1.8302409648895264, |
|
"eval_rewards/accuracies": 0.6129999756813049, |
|
"eval_rewards/chosen": 0.06869003176689148, |
|
"eval_rewards/margins": 0.055261291563510895, |
|
"eval_rewards/margins_max": 0.30490291118621826, |
|
"eval_rewards/margins_min": -0.15945661067962646, |
|
"eval_rewards/margins_std": 0.1552800089120865, |
|
"eval_rewards/rejected": 0.013428742997348309, |
|
"eval_runtime": 427.6004, |
|
"eval_samples_per_second": 4.677, |
|
"eval_steps_per_second": 0.292, |
|
"step": 600 |
|
}, |
|
{ |
|
"dpo_losses": 0.5747733116149902, |
|
"epoch": 1.72, |
|
"grad_norm": 6.65096965403295, |
|
"learning_rate": 2.3034436114175838e-07, |
|
"logits/chosen": -2.7278988361358643, |
|
"logits/rejected": -2.6467108726501465, |
|
"logps/chosen": -305.2555847167969, |
|
"logps/rejected": -246.3316192626953, |
|
"loss": 0.5995, |
|
"positive_losses": 0.20225219428539276, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.22444753348827362, |
|
"rewards/margins": 0.2688870429992676, |
|
"rewards/margins_max": 0.5200773477554321, |
|
"rewards/margins_min": 0.04230537265539169, |
|
"rewards/margins_std": 0.21579697728157043, |
|
"rewards/rejected": -0.044439516961574554, |
|
"step": 610 |
|
}, |
|
{ |
|
"dpo_losses": 0.5641311407089233, |
|
"epoch": 1.75, |
|
"grad_norm": 11.81079993553305, |
|
"learning_rate": 2.2218346133000264e-07, |
|
"logits/chosen": -2.7867112159729004, |
|
"logits/rejected": -2.71598744392395, |
|
"logps/chosen": -318.68121337890625, |
|
"logps/rejected": -289.48309326171875, |
|
"loss": 0.6115, |
|
"positive_losses": 0.15610671043395996, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 0.22943755984306335, |
|
"rewards/margins": 0.29184073209762573, |
|
"rewards/margins_max": 0.5524952411651611, |
|
"rewards/margins_min": 0.07403835654258728, |
|
"rewards/margins_std": 0.2165592461824417, |
|
"rewards/rejected": -0.06240314990282059, |
|
"step": 620 |
|
}, |
|
{ |
|
"dpo_losses": 0.5642560720443726, |
|
"epoch": 1.77, |
|
"grad_norm": 7.357647930061899, |
|
"learning_rate": 2.1405247265337917e-07, |
|
"logits/chosen": -2.693892002105713, |
|
"logits/rejected": -2.6591708660125732, |
|
"logps/chosen": -349.5628356933594, |
|
"logps/rejected": -286.9067687988281, |
|
"loss": 0.5786, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.2449304610490799, |
|
"rewards/margins": 0.28937476873397827, |
|
"rewards/margins_max": 0.5107169151306152, |
|
"rewards/margins_min": 0.09730032831430435, |
|
"rewards/margins_std": 0.18998651206493378, |
|
"rewards/rejected": -0.04444430395960808, |
|
"step": 630 |
|
}, |
|
{ |
|
"dpo_losses": 0.5737847089767456, |
|
"epoch": 1.8, |
|
"grad_norm": 1.3349176272864234, |
|
"learning_rate": 2.0596013836725657e-07, |
|
"logits/chosen": -2.779600143432617, |
|
"logits/rejected": -2.700307846069336, |
|
"logps/chosen": -290.4892578125, |
|
"logps/rejected": -238.4832305908203, |
|
"loss": 0.5953, |
|
"positive_losses": 0.20754241943359375, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.2137231081724167, |
|
"rewards/margins": 0.2684459686279297, |
|
"rewards/margins_max": 0.4815581440925598, |
|
"rewards/margins_min": 0.08431941270828247, |
|
"rewards/margins_std": 0.1738905906677246, |
|
"rewards/rejected": -0.0547228567302227, |
|
"step": 640 |
|
}, |
|
{ |
|
"dpo_losses": 0.5745794773101807, |
|
"epoch": 1.83, |
|
"grad_norm": 2.223670875381027, |
|
"learning_rate": 1.9791516016192213e-07, |
|
"logits/chosen": -2.7533204555511475, |
|
"logits/rejected": -2.6931979656219482, |
|
"logps/chosen": -308.6822814941406, |
|
"logps/rejected": -296.13787841796875, |
|
"loss": 0.6091, |
|
"positive_losses": 0.41213884949684143, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.21507827937602997, |
|
"rewards/margins": 0.26380831003189087, |
|
"rewards/margins_max": 0.4691304564476013, |
|
"rewards/margins_min": 0.051321595907211304, |
|
"rewards/margins_std": 0.18508048355579376, |
|
"rewards/rejected": -0.04873000457882881, |
|
"step": 650 |
|
}, |
|
{ |
|
"dpo_losses": 0.5978478193283081, |
|
"epoch": 1.86, |
|
"grad_norm": 1.9229193850326198, |
|
"learning_rate": 1.8992618880565036e-07, |
|
"logits/chosen": -2.676647663116455, |
|
"logits/rejected": -2.6538872718811035, |
|
"logps/chosen": -228.5106658935547, |
|
"logps/rejected": -218.3628387451172, |
|
"loss": 0.6228, |
|
"positive_losses": 0.36836346983909607, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.17657354474067688, |
|
"rewards/margins": 0.20908644795417786, |
|
"rewards/margins_max": 0.38408344984054565, |
|
"rewards/margins_min": 0.0495840385556221, |
|
"rewards/margins_std": 0.1525198072195053, |
|
"rewards/rejected": -0.03251289203763008, |
|
"step": 660 |
|
}, |
|
{ |
|
"dpo_losses": 0.5676581859588623, |
|
"epoch": 1.89, |
|
"grad_norm": 10.31975804008308, |
|
"learning_rate": 1.8200181484252885e-07, |
|
"logits/chosen": -2.772998809814453, |
|
"logits/rejected": -2.742371082305908, |
|
"logps/chosen": -282.56695556640625, |
|
"logps/rejected": -246.2154083251953, |
|
"loss": 0.5836, |
|
"positive_losses": 0.19931070506572723, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.23336899280548096, |
|
"rewards/margins": 0.28042665123939514, |
|
"rewards/margins_max": 0.5176241397857666, |
|
"rewards/margins_min": 0.08305230736732483, |
|
"rewards/margins_std": 0.19908221065998077, |
|
"rewards/rejected": -0.047057636082172394, |
|
"step": 670 |
|
}, |
|
{ |
|
"dpo_losses": 0.570050835609436, |
|
"epoch": 1.92, |
|
"grad_norm": 6.2395285814671375, |
|
"learning_rate": 1.7415055935504233e-07, |
|
"logits/chosen": -2.791836738586426, |
|
"logits/rejected": -2.7229678630828857, |
|
"logps/chosen": -289.1046142578125, |
|
"logps/rejected": -257.9730224609375, |
|
"loss": 0.5814, |
|
"positive_losses": 0.15246662497520447, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.21605853736400604, |
|
"rewards/margins": 0.274807333946228, |
|
"rewards/margins_max": 0.46929216384887695, |
|
"rewards/margins_min": 0.07633324712514877, |
|
"rewards/margins_std": 0.17314451932907104, |
|
"rewards/rejected": -0.05874878168106079, |
|
"step": 680 |
|
}, |
|
{ |
|
"dpo_losses": 0.5896421074867249, |
|
"epoch": 1.94, |
|
"grad_norm": 9.413363887459912, |
|
"learning_rate": 1.6638086480134952e-07, |
|
"logits/chosen": -2.7295353412628174, |
|
"logits/rejected": -2.695042133331299, |
|
"logps/chosen": -219.7963409423828, |
|
"logps/rejected": -206.1276397705078, |
|
"loss": 0.5879, |
|
"positive_losses": 0.11815419048070908, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.18556587398052216, |
|
"rewards/margins": 0.22936753928661346, |
|
"rewards/margins_max": 0.44191575050354004, |
|
"rewards/margins_min": 0.05077395588159561, |
|
"rewards/margins_std": 0.17514900863170624, |
|
"rewards/rejected": -0.0438016876578331, |
|
"step": 690 |
|
}, |
|
{ |
|
"dpo_losses": 0.5733457207679749, |
|
"epoch": 1.97, |
|
"grad_norm": 5.98740527370756, |
|
"learning_rate": 1.5870108593710471e-07, |
|
"logits/chosen": -2.6704626083374023, |
|
"logits/rejected": -2.5912749767303467, |
|
"logps/chosen": -277.55157470703125, |
|
"logps/rejected": -214.6815948486328, |
|
"loss": 0.5918, |
|
"positive_losses": 0.17365257441997528, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 0.21979399025440216, |
|
"rewards/margins": 0.26772385835647583, |
|
"rewards/margins_max": 0.4699093699455261, |
|
"rewards/margins_min": 0.09799458831548691, |
|
"rewards/margins_std": 0.17043128609657288, |
|
"rewards/rejected": -0.047929856926202774, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"eval_dpo_losses": 0.6676550507545471, |
|
"eval_logits/chosen": -2.7172420024871826, |
|
"eval_logits/rejected": -2.679697036743164, |
|
"eval_logps/chosen": -277.88079833984375, |
|
"eval_logps/rejected": -257.7734069824219, |
|
"eval_loss": 0.8997932076454163, |
|
"eval_positive_losses": 2.0811269283294678, |
|
"eval_rewards/accuracies": 0.621999979019165, |
|
"eval_rewards/chosen": 0.06712605804204941, |
|
"eval_rewards/margins": 0.0590706393122673, |
|
"eval_rewards/margins_max": 0.32306957244873047, |
|
"eval_rewards/margins_min": -0.16852563619613647, |
|
"eval_rewards/margins_std": 0.1640576869249344, |
|
"eval_rewards/rejected": 0.00805541779845953, |
|
"eval_runtime": 427.6887, |
|
"eval_samples_per_second": 4.676, |
|
"eval_steps_per_second": 0.292, |
|
"step": 700 |
|
}, |
|
{ |
|
"dpo_losses": 0.5846664309501648, |
|
"epoch": 2.0, |
|
"grad_norm": 2.012900588641833, |
|
"learning_rate": 1.5111948083158528e-07, |
|
"logits/chosen": -2.665996551513672, |
|
"logits/rejected": -2.6332955360412598, |
|
"logps/chosen": -233.13113403320312, |
|
"logps/rejected": -231.0898895263672, |
|
"loss": 0.5777, |
|
"positive_losses": 0.08777942508459091, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 0.17990438640117645, |
|
"rewards/margins": 0.23856917023658752, |
|
"rewards/margins_max": 0.4145428240299225, |
|
"rewards/margins_min": 0.06277941167354584, |
|
"rewards/margins_std": 0.15543320775032043, |
|
"rewards/rejected": -0.058664750307798386, |
|
"step": 710 |
|
}, |
|
{ |
|
"dpo_losses": 0.5619566440582275, |
|
"epoch": 2.03, |
|
"grad_norm": 8.927999929393918, |
|
"learning_rate": 1.4364420198778658e-07, |
|
"logits/chosen": -2.803697109222412, |
|
"logits/rejected": -2.725480556488037, |
|
"logps/chosen": -341.5400085449219, |
|
"logps/rejected": -310.74261474609375, |
|
"loss": 0.5941, |
|
"positive_losses": 0.07888917624950409, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.2251276969909668, |
|
"rewards/margins": 0.2960202395915985, |
|
"rewards/margins_max": 0.5476168394088745, |
|
"rewards/margins_min": 0.08313913643360138, |
|
"rewards/margins_std": 0.21061666309833527, |
|
"rewards/rejected": -0.0708925798535347, |
|
"step": 720 |
|
}, |
|
{ |
|
"dpo_losses": 0.5583115816116333, |
|
"epoch": 2.06, |
|
"grad_norm": 1.8517093519732466, |
|
"learning_rate": 1.3628328757603242e-07, |
|
"logits/chosen": -2.7375550270080566, |
|
"logits/rejected": -2.673107862472534, |
|
"logps/chosen": -314.8241271972656, |
|
"logps/rejected": -281.47967529296875, |
|
"loss": 0.5671, |
|
"positive_losses": 0.10358180850744247, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.23430731892585754, |
|
"rewards/margins": 0.3044988512992859, |
|
"rewards/margins_max": 0.5319117307662964, |
|
"rewards/margins_min": 0.0928182452917099, |
|
"rewards/margins_std": 0.199130579829216, |
|
"rewards/rejected": -0.07019157707691193, |
|
"step": 730 |
|
}, |
|
{ |
|
"dpo_losses": 0.5659884810447693, |
|
"epoch": 2.08, |
|
"grad_norm": 9.541706290572293, |
|
"learning_rate": 1.2904465279052723e-07, |
|
"logits/chosen": -2.7077741622924805, |
|
"logits/rejected": -2.6461472511291504, |
|
"logps/chosen": -299.73486328125, |
|
"logps/rejected": -277.62725830078125, |
|
"loss": 0.6025, |
|
"positive_losses": 0.5914154052734375, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.20683905482292175, |
|
"rewards/margins": 0.28546684980392456, |
|
"rewards/margins_max": 0.5176615715026855, |
|
"rewards/margins_min": 0.09277740865945816, |
|
"rewards/margins_std": 0.1960216760635376, |
|
"rewards/rejected": -0.07862778007984161, |
|
"step": 740 |
|
}, |
|
{ |
|
"dpo_losses": 0.5690917372703552, |
|
"epoch": 2.11, |
|
"grad_norm": 1.8282721044580463, |
|
"learning_rate": 1.219360813381446e-07, |
|
"logits/chosen": -2.6811976432800293, |
|
"logits/rejected": -2.6621005535125732, |
|
"logps/chosen": -266.5814514160156, |
|
"logps/rejected": -242.2976837158203, |
|
"loss": 0.5786, |
|
"positive_losses": 0.02016754075884819, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.20154385268688202, |
|
"rewards/margins": 0.27638477087020874, |
|
"rewards/margins_max": 0.4697951376438141, |
|
"rewards/margins_min": 0.1011359691619873, |
|
"rewards/margins_std": 0.16673585772514343, |
|
"rewards/rejected": -0.0748409777879715, |
|
"step": 750 |
|
}, |
|
{ |
|
"dpo_losses": 0.5634604096412659, |
|
"epoch": 2.14, |
|
"grad_norm": 8.095290107453106, |
|
"learning_rate": 1.149652170686039e-07, |
|
"logits/chosen": -2.648603916168213, |
|
"logits/rejected": -2.6005616188049316, |
|
"logps/chosen": -267.0931396484375, |
|
"logps/rejected": -269.1485595703125, |
|
"loss": 0.5637, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.2069512903690338, |
|
"rewards/margins": 0.2928617596626282, |
|
"rewards/margins_max": 0.559156596660614, |
|
"rewards/margins_min": 0.08480425924062729, |
|
"rewards/margins_std": 0.21423542499542236, |
|
"rewards/rejected": -0.08591042459011078, |
|
"step": 760 |
|
}, |
|
{ |
|
"dpo_losses": 0.5496206879615784, |
|
"epoch": 2.17, |
|
"grad_norm": 6.6085926070789, |
|
"learning_rate": 1.0813955575503587e-07, |
|
"logits/chosen": -2.6958391666412354, |
|
"logits/rejected": -2.6329185962677, |
|
"logps/chosen": -281.89190673828125, |
|
"logps/rejected": -222.1160125732422, |
|
"loss": 0.5739, |
|
"positive_losses": 0.15439815819263458, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.22945666313171387, |
|
"rewards/margins": 0.3267485499382019, |
|
"rewards/margins_max": 0.5543676018714905, |
|
"rewards/margins_min": 0.1200588196516037, |
|
"rewards/margins_std": 0.20475919544696808, |
|
"rewards/rejected": -0.09729186445474625, |
|
"step": 770 |
|
}, |
|
{ |
|
"dpo_losses": 0.5734694004058838, |
|
"epoch": 2.2, |
|
"grad_norm": 5.752056251728322, |
|
"learning_rate": 1.0146643703377486e-07, |
|
"logits/chosen": -2.751596212387085, |
|
"logits/rejected": -2.658079147338867, |
|
"logps/chosen": -297.37677001953125, |
|
"logps/rejected": -233.66806030273438, |
|
"loss": 0.6086, |
|
"positive_losses": 0.2737409174442291, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.20195269584655762, |
|
"rewards/margins": 0.268646776676178, |
|
"rewards/margins_max": 0.4993320405483246, |
|
"rewards/margins_min": 0.051596127450466156, |
|
"rewards/margins_std": 0.20203988254070282, |
|
"rewards/rejected": -0.06669410318136215, |
|
"step": 780 |
|
}, |
|
{ |
|
"dpo_losses": 0.5670967102050781, |
|
"epoch": 2.23, |
|
"grad_norm": 2.435776321138929, |
|
"learning_rate": 9.495303651204494e-08, |
|
"logits/chosen": -2.712763786315918, |
|
"logits/rejected": -2.630561113357544, |
|
"logps/chosen": -300.8298645019531, |
|
"logps/rejected": -252.09829711914062, |
|
"loss": 0.5701, |
|
"positive_losses": 0.03285813331604004, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.2152232676744461, |
|
"rewards/margins": 0.2839241027832031, |
|
"rewards/margins_max": 0.5225785970687866, |
|
"rewards/margins_min": 0.08764372766017914, |
|
"rewards/margins_std": 0.1984366476535797, |
|
"rewards/rejected": -0.06870082765817642, |
|
"step": 790 |
|
}, |
|
{ |
|
"dpo_losses": 0.5538218021392822, |
|
"epoch": 2.25, |
|
"grad_norm": 7.082501471048372, |
|
"learning_rate": 8.860635805202615e-08, |
|
"logits/chosen": -2.7329680919647217, |
|
"logits/rejected": -2.6982383728027344, |
|
"logps/chosen": -299.06622314453125, |
|
"logps/rejected": -320.72845458984375, |
|
"loss": 0.5636, |
|
"positive_losses": 0.2872983515262604, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.2248470038175583, |
|
"rewards/margins": 0.3167414963245392, |
|
"rewards/margins_max": 0.5594197511672974, |
|
"rewards/margins_min": 0.11445317417383194, |
|
"rewards/margins_std": 0.19880005717277527, |
|
"rewards/rejected": -0.0918944776058197, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"eval_dpo_losses": 0.6666676998138428, |
|
"eval_logits/chosen": -2.7115840911865234, |
|
"eval_logits/rejected": -2.6734206676483154, |
|
"eval_logps/chosen": -278.4820251464844, |
|
"eval_logps/rejected": -258.64727783203125, |
|
"eval_loss": 0.9370628595352173, |
|
"eval_positive_losses": 2.4201414585113525, |
|
"eval_rewards/accuracies": 0.6259999871253967, |
|
"eval_rewards/chosen": 0.06111405789852142, |
|
"eval_rewards/margins": 0.06179738789796829, |
|
"eval_rewards/margins_max": 0.3369947075843811, |
|
"eval_rewards/margins_min": -0.17766639590263367, |
|
"eval_rewards/margins_std": 0.17163515090942383, |
|
"eval_rewards/rejected": -0.0006833283696323633, |
|
"eval_runtime": 428.0428, |
|
"eval_samples_per_second": 4.672, |
|
"eval_steps_per_second": 0.292, |
|
"step": 800 |
|
}, |
|
{ |
|
"dpo_losses": 0.561680793762207, |
|
"epoch": 2.28, |
|
"grad_norm": 6.853436586712954, |
|
"learning_rate": 8.24332262395994e-08, |
|
"logits/chosen": -2.770097255706787, |
|
"logits/rejected": -2.7309253215789795, |
|
"logps/chosen": -273.7652587890625, |
|
"logps/rejected": -278.20025634765625, |
|
"loss": 0.5736, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.21009182929992676, |
|
"rewards/margins": 0.2937797009944916, |
|
"rewards/margins_max": 0.5182637572288513, |
|
"rewards/margins_min": 0.0909772664308548, |
|
"rewards/margins_std": 0.1924486607313156, |
|
"rewards/rejected": -0.0836879163980484, |
|
"step": 810 |
|
}, |
|
{ |
|
"dpo_losses": 0.5531629920005798, |
|
"epoch": 2.31, |
|
"grad_norm": 2.2576986633959164, |
|
"learning_rate": 7.644027904586586e-08, |
|
"logits/chosen": -2.707874059677124, |
|
"logits/rejected": -2.6698241233825684, |
|
"logps/chosen": -305.3839416503906, |
|
"logps/rejected": -288.03485107421875, |
|
"loss": 0.5736, |
|
"positive_losses": 0.12947359681129456, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 0.24521002173423767, |
|
"rewards/margins": 0.32028818130493164, |
|
"rewards/margins_max": 0.5808194875717163, |
|
"rewards/margins_min": 0.09252934157848358, |
|
"rewards/margins_std": 0.21892566978931427, |
|
"rewards/rejected": -0.07507814466953278, |
|
"step": 820 |
|
}, |
|
{ |
|
"dpo_losses": 0.5370634198188782, |
|
"epoch": 2.34, |
|
"grad_norm": 2.1285258942046736, |
|
"learning_rate": 7.063396068933469e-08, |
|
"logits/chosen": -2.7070274353027344, |
|
"logits/rejected": -2.6111221313476562, |
|
"logps/chosen": -358.33660888671875, |
|
"logps/rejected": -278.06463623046875, |
|
"loss": 0.5593, |
|
"positive_losses": 0.08015155792236328, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 0.2682201564311981, |
|
"rewards/margins": 0.35513558983802795, |
|
"rewards/margins_max": 0.5836988091468811, |
|
"rewards/margins_min": 0.14265316724777222, |
|
"rewards/margins_std": 0.19289033114910126, |
|
"rewards/rejected": -0.08691541850566864, |
|
"step": 830 |
|
}, |
|
{ |
|
"dpo_losses": 0.5690010786056519, |
|
"epoch": 2.37, |
|
"grad_norm": 8.70012334977324, |
|
"learning_rate": 6.502051470645148e-08, |
|
"logits/chosen": -2.7239556312561035, |
|
"logits/rejected": -2.6661264896392822, |
|
"logps/chosen": -305.3841857910156, |
|
"logps/rejected": -253.9221649169922, |
|
"loss": 0.5836, |
|
"positive_losses": 0.07451782375574112, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.20678095519542694, |
|
"rewards/margins": 0.2766776978969574, |
|
"rewards/margins_max": 0.5134538412094116, |
|
"rewards/margins_min": 0.10238063335418701, |
|
"rewards/margins_std": 0.18080976605415344, |
|
"rewards/rejected": -0.06989672034978867, |
|
"step": 840 |
|
}, |
|
{ |
|
"dpo_losses": 0.5521928071975708, |
|
"epoch": 2.39, |
|
"grad_norm": 2.0434453054659096, |
|
"learning_rate": 5.960597723792194e-08, |
|
"logits/chosen": -2.7380244731903076, |
|
"logits/rejected": -2.6666131019592285, |
|
"logps/chosen": -271.77508544921875, |
|
"logps/rejected": -247.48190307617188, |
|
"loss": 0.5717, |
|
"positive_losses": 0.22315779328346252, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 0.2434767186641693, |
|
"rewards/margins": 0.3210228681564331, |
|
"rewards/margins_max": 0.5941131711006165, |
|
"rewards/margins_min": 0.11233635246753693, |
|
"rewards/margins_std": 0.21944975852966309, |
|
"rewards/rejected": -0.077546127140522, |
|
"step": 850 |
|
}, |
|
{ |
|
"dpo_losses": 0.5401885509490967, |
|
"epoch": 2.42, |
|
"grad_norm": 2.1235783253077907, |
|
"learning_rate": 5.4396170538046486e-08, |
|
"logits/chosen": -2.7592415809631348, |
|
"logits/rejected": -2.7021079063415527, |
|
"logps/chosen": -321.6221923828125, |
|
"logps/rejected": -266.2222900390625, |
|
"loss": 0.5618, |
|
"positive_losses": 0.12075729668140411, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.2578734755516052, |
|
"rewards/margins": 0.35080546140670776, |
|
"rewards/margins_max": 0.625725269317627, |
|
"rewards/margins_min": 0.10036028921604156, |
|
"rewards/margins_std": 0.2362469732761383, |
|
"rewards/rejected": -0.09293195605278015, |
|
"step": 860 |
|
}, |
|
{ |
|
"dpo_losses": 0.5563656687736511, |
|
"epoch": 2.45, |
|
"grad_norm": 1.8366012996504069, |
|
"learning_rate": 4.93966967140487e-08, |
|
"logits/chosen": -2.7032082080841064, |
|
"logits/rejected": -2.6532039642333984, |
|
"logps/chosen": -290.3298034667969, |
|
"logps/rejected": -287.1317138671875, |
|
"loss": 0.5634, |
|
"positive_losses": 0.1806434690952301, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 0.2245173156261444, |
|
"rewards/margins": 0.3118094205856323, |
|
"rewards/margins_max": 0.5898565053939819, |
|
"rewards/margins_min": 0.10215729475021362, |
|
"rewards/margins_std": 0.22476975619792938, |
|
"rewards/rejected": -0.08729207515716553, |
|
"step": 870 |
|
}, |
|
{ |
|
"dpo_losses": 0.5541495084762573, |
|
"epoch": 2.48, |
|
"grad_norm": 6.44345291592453, |
|
"learning_rate": 4.4612931702126433e-08, |
|
"logits/chosen": -2.8107595443725586, |
|
"logits/rejected": -2.7267684936523438, |
|
"logps/chosen": -304.7294006347656, |
|
"logps/rejected": -270.358642578125, |
|
"loss": 0.5863, |
|
"positive_losses": 0.24675989151000977, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2371862679719925, |
|
"rewards/margins": 0.31598663330078125, |
|
"rewards/margins_max": 0.5958508253097534, |
|
"rewards/margins_min": 0.08327536284923553, |
|
"rewards/margins_std": 0.23089298605918884, |
|
"rewards/rejected": -0.07880039513111115, |
|
"step": 880 |
|
}, |
|
{ |
|
"dpo_losses": 0.5567336082458496, |
|
"epoch": 2.51, |
|
"grad_norm": 7.843101687444187, |
|
"learning_rate": 4.005001948670605e-08, |
|
"logits/chosen": -2.775538682937622, |
|
"logits/rejected": -2.697503089904785, |
|
"logps/chosen": -322.6019287109375, |
|
"logps/rejected": -273.635986328125, |
|
"loss": 0.5736, |
|
"positive_losses": 0.37784984707832336, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 0.2152644395828247, |
|
"rewards/margins": 0.3087191581726074, |
|
"rewards/margins_max": 0.5784704685211182, |
|
"rewards/margins_min": 0.09841950982809067, |
|
"rewards/margins_std": 0.21595220267772675, |
|
"rewards/rejected": -0.09345470368862152, |
|
"step": 890 |
|
}, |
|
{ |
|
"dpo_losses": 0.543450117111206, |
|
"epoch": 2.54, |
|
"grad_norm": 8.016670976771136, |
|
"learning_rate": 3.571286656911376e-08, |
|
"logits/chosen": -2.77311635017395, |
|
"logits/rejected": -2.6821038722991943, |
|
"logps/chosen": -304.2124938964844, |
|
"logps/rejected": -293.931884765625, |
|
"loss": 0.5736, |
|
"positive_losses": 0.34936437010765076, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 0.2413357049226761, |
|
"rewards/margins": 0.34193912148475647, |
|
"rewards/margins_max": 0.5774747133255005, |
|
"rewards/margins_min": 0.10269900411367416, |
|
"rewards/margins_std": 0.21173134446144104, |
|
"rewards/rejected": -0.10060342401266098, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"eval_dpo_losses": 0.6658776998519897, |
|
"eval_logits/chosen": -2.7107093334198, |
|
"eval_logits/rejected": -2.672572612762451, |
|
"eval_logps/chosen": -278.8089904785156, |
|
"eval_logps/rejected": -259.1817321777344, |
|
"eval_loss": 0.9591056704521179, |
|
"eval_positive_losses": 2.626781702041626, |
|
"eval_rewards/accuracies": 0.6320000290870667, |
|
"eval_rewards/chosen": 0.0578441321849823, |
|
"eval_rewards/margins": 0.06387220323085785, |
|
"eval_rewards/margins_max": 0.34665346145629883, |
|
"eval_rewards/margins_min": -0.18229486048221588, |
|
"eval_rewards/margins_std": 0.1764223277568817, |
|
"eval_rewards/rejected": -0.006028064992278814, |
|
"eval_runtime": 427.6906, |
|
"eval_samples_per_second": 4.676, |
|
"eval_steps_per_second": 0.292, |
|
"step": 900 |
|
}, |
|
{ |
|
"dpo_losses": 0.5519391894340515, |
|
"epoch": 2.56, |
|
"grad_norm": 25.625793147024005, |
|
"learning_rate": 3.160613669161255e-08, |
|
"logits/chosen": -2.73115873336792, |
|
"logits/rejected": -2.670806407928467, |
|
"logps/chosen": -299.96832275390625, |
|
"logps/rejected": -239.826904296875, |
|
"loss": 0.5768, |
|
"positive_losses": 0.3415166437625885, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.23844274878501892, |
|
"rewards/margins": 0.3223797678947449, |
|
"rewards/margins_max": 0.5813696980476379, |
|
"rewards/margins_min": 0.09977176040410995, |
|
"rewards/margins_std": 0.21656760573387146, |
|
"rewards/rejected": -0.08393705636262894, |
|
"step": 910 |
|
}, |
|
{ |
|
"dpo_losses": 0.5531378388404846, |
|
"epoch": 2.59, |
|
"grad_norm": 2.1656044654918523, |
|
"learning_rate": 2.7734245822478436e-08, |
|
"logits/chosen": -2.7032344341278076, |
|
"logits/rejected": -2.612525463104248, |
|
"logps/chosen": -289.3112487792969, |
|
"logps/rejected": -218.96713256835938, |
|
"loss": 0.5749, |
|
"positive_losses": 0.18140992522239685, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 0.2416727989912033, |
|
"rewards/margins": 0.32405149936676025, |
|
"rewards/margins_max": 0.6709355115890503, |
|
"rewards/margins_min": 0.08473904430866241, |
|
"rewards/margins_std": 0.26692530512809753, |
|
"rewards/rejected": -0.08237870782613754, |
|
"step": 920 |
|
}, |
|
{ |
|
"dpo_losses": 0.5606621503829956, |
|
"epoch": 2.62, |
|
"grad_norm": 8.473170854444827, |
|
"learning_rate": 2.410135740750821e-08, |
|
"logits/chosen": -2.735752582550049, |
|
"logits/rejected": -2.671776294708252, |
|
"logps/chosen": -259.04132080078125, |
|
"logps/rejected": -235.8688201904297, |
|
"loss": 0.5834, |
|
"positive_losses": 0.36757582426071167, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.22701683640480042, |
|
"rewards/margins": 0.30118703842163086, |
|
"rewards/margins_max": 0.5452762246131897, |
|
"rewards/margins_min": 0.08702322095632553, |
|
"rewards/margins_std": 0.20802298188209534, |
|
"rewards/rejected": -0.07417017966508865, |
|
"step": 930 |
|
}, |
|
{ |
|
"dpo_losses": 0.5595995187759399, |
|
"epoch": 2.65, |
|
"grad_norm": 6.88019656676779, |
|
"learning_rate": 2.071137789306418e-08, |
|
"logits/chosen": -2.7638261318206787, |
|
"logits/rejected": -2.685157537460327, |
|
"logps/chosen": -326.85858154296875, |
|
"logps/rejected": -302.33209228515625, |
|
"loss": 0.572, |
|
"positive_losses": 0.20179709792137146, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.2141149938106537, |
|
"rewards/margins": 0.3031490445137024, |
|
"rewards/margins_max": 0.5786975026130676, |
|
"rewards/margins_min": 0.07930903881788254, |
|
"rewards/margins_std": 0.2305254191160202, |
|
"rewards/rejected": -0.08903402090072632, |
|
"step": 940 |
|
}, |
|
{ |
|
"dpo_losses": 0.5905728936195374, |
|
"epoch": 2.68, |
|
"grad_norm": 2.0169170880957616, |
|
"learning_rate": 1.7567952525471107e-08, |
|
"logits/chosen": -2.696136951446533, |
|
"logits/rejected": -2.6454825401306152, |
|
"logps/chosen": -245.9570770263672, |
|
"logps/rejected": -213.52334594726562, |
|
"loss": 0.6002, |
|
"positive_losses": 0.25799694657325745, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 0.1696554720401764, |
|
"rewards/margins": 0.22757229208946228, |
|
"rewards/margins_max": 0.4704417288303375, |
|
"rewards/margins_min": 0.05507839843630791, |
|
"rewards/margins_std": 0.18926933407783508, |
|
"rewards/rejected": -0.057916827499866486, |
|
"step": 950 |
|
}, |
|
{ |
|
"dpo_losses": 0.560580313205719, |
|
"epoch": 2.7, |
|
"grad_norm": 7.04290537877064, |
|
"learning_rate": 1.467446143128101e-08, |
|
"logits/chosen": -2.787949800491333, |
|
"logits/rejected": -2.7239291667938232, |
|
"logps/chosen": -260.6053161621094, |
|
"logps/rejected": -223.7249755859375, |
|
"loss": 0.5671, |
|
"positive_losses": 0.06990127265453339, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.22634287178516388, |
|
"rewards/margins": 0.29788738489151, |
|
"rewards/margins_max": 0.5511468648910522, |
|
"rewards/margins_min": 0.08614132553339005, |
|
"rewards/margins_std": 0.2106626331806183, |
|
"rewards/rejected": -0.07154452800750732, |
|
"step": 960 |
|
}, |
|
{ |
|
"dpo_losses": 0.5514933466911316, |
|
"epoch": 2.73, |
|
"grad_norm": 1.9051880301646325, |
|
"learning_rate": 1.2034015982622243e-08, |
|
"logits/chosen": -2.7561697959899902, |
|
"logits/rejected": -2.6536028385162354, |
|
"logps/chosen": -329.5728759765625, |
|
"logps/rejected": -290.1389465332031, |
|
"loss": 0.5909, |
|
"positive_losses": 0.08531048148870468, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.22805485129356384, |
|
"rewards/margins": 0.3209593594074249, |
|
"rewards/margins_max": 0.5957485437393188, |
|
"rewards/margins_min": 0.09393687546253204, |
|
"rewards/margins_std": 0.2260352075099945, |
|
"rewards/rejected": -0.09290449321269989, |
|
"step": 970 |
|
}, |
|
{ |
|
"dpo_losses": 0.5581797957420349, |
|
"epoch": 2.76, |
|
"grad_norm": 13.216663962544713, |
|
"learning_rate": 9.649455451539418e-09, |
|
"logits/chosen": -2.6970033645629883, |
|
"logits/rejected": -2.651496410369873, |
|
"logps/chosen": -266.47296142578125, |
|
"logps/rejected": -233.91098022460938, |
|
"loss": 0.5837, |
|
"positive_losses": 0.05353069305419922, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21983060240745544, |
|
"rewards/margins": 0.30383530259132385, |
|
"rewards/margins_max": 0.517738401889801, |
|
"rewards/margins_min": 0.09517470002174377, |
|
"rewards/margins_std": 0.19478817284107208, |
|
"rewards/rejected": -0.08400467783212662, |
|
"step": 980 |
|
}, |
|
{ |
|
"dpo_losses": 0.5490652322769165, |
|
"epoch": 2.79, |
|
"grad_norm": 2.0486277290431736, |
|
"learning_rate": 7.523343956923194e-09, |
|
"logits/chosen": -2.7556817531585693, |
|
"logits/rejected": -2.6662416458129883, |
|
"logps/chosen": -294.7388916015625, |
|
"logps/rejected": -267.9339904785156, |
|
"loss": 0.5647, |
|
"positive_losses": 0.10996627807617188, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.2385723888874054, |
|
"rewards/margins": 0.3286248743534088, |
|
"rewards/margins_max": 0.6235631108283997, |
|
"rewards/margins_min": 0.11854185163974762, |
|
"rewards/margins_std": 0.2294415682554245, |
|
"rewards/rejected": -0.09005247056484222, |
|
"step": 990 |
|
}, |
|
{ |
|
"dpo_losses": 0.5738095045089722, |
|
"epoch": 2.82, |
|
"grad_norm": 6.03858746514603, |
|
"learning_rate": 5.6579677073121945e-09, |
|
"logits/chosen": -2.7139980792999268, |
|
"logits/rejected": -2.6967344284057617, |
|
"logps/chosen": -273.75885009765625, |
|
"logps/rejected": -276.884033203125, |
|
"loss": 0.5825, |
|
"positive_losses": 0.2053932249546051, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.1967349648475647, |
|
"rewards/margins": 0.2664409279823303, |
|
"rewards/margins_max": 0.49395784735679626, |
|
"rewards/margins_min": 0.09026306867599487, |
|
"rewards/margins_std": 0.1819494068622589, |
|
"rewards/rejected": -0.06970598548650742, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"eval_dpo_losses": 0.665816068649292, |
|
"eval_logits/chosen": -2.712677001953125, |
|
"eval_logits/rejected": -2.674919366836548, |
|
"eval_logps/chosen": -278.6134033203125, |
|
"eval_logps/rejected": -259.0028076171875, |
|
"eval_loss": 0.9543355107307434, |
|
"eval_positive_losses": 2.581048011779785, |
|
"eval_rewards/accuracies": 0.6290000081062317, |
|
"eval_rewards/chosen": 0.0598001554608345, |
|
"eval_rewards/margins": 0.06403880566358566, |
|
"eval_rewards/margins_max": 0.34747231006622314, |
|
"eval_rewards/margins_min": -0.1826418787240982, |
|
"eval_rewards/margins_std": 0.17671293020248413, |
|
"eval_rewards/rejected": -0.004238648805767298, |
|
"eval_runtime": 427.7348, |
|
"eval_samples_per_second": 4.676, |
|
"eval_steps_per_second": 0.292, |
|
"step": 1000 |
|
}, |
|
{ |
|
"dpo_losses": 0.5594234466552734, |
|
"epoch": 2.85, |
|
"grad_norm": 10.058501971668669, |
|
"learning_rate": 4.0553325425319585e-09, |
|
"logits/chosen": -2.789537191390991, |
|
"logits/rejected": -2.7190775871276855, |
|
"logps/chosen": -301.9007568359375, |
|
"logps/rejected": -313.83831787109375, |
|
"loss": 0.6005, |
|
"positive_losses": 0.10010738670825958, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.21791288256645203, |
|
"rewards/margins": 0.30005866289138794, |
|
"rewards/margins_max": 0.5106627345085144, |
|
"rewards/margins_min": 0.13350318372249603, |
|
"rewards/margins_std": 0.18063752353191376, |
|
"rewards/rejected": -0.0821457952260971, |
|
"step": 1010 |
|
}, |
|
{ |
|
"dpo_losses": 0.5577588081359863, |
|
"epoch": 2.87, |
|
"grad_norm": 1.8795598215831135, |
|
"learning_rate": 2.717161776814747e-09, |
|
"logits/chosen": -2.7825233936309814, |
|
"logits/rejected": -2.7229418754577637, |
|
"logps/chosen": -291.11932373046875, |
|
"logps/rejected": -272.02923583984375, |
|
"loss": 0.5722, |
|
"positive_losses": 0.11093978583812714, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.22510568797588348, |
|
"rewards/margins": 0.30276286602020264, |
|
"rewards/margins_max": 0.5086154341697693, |
|
"rewards/margins_min": 0.1081671267747879, |
|
"rewards/margins_std": 0.1743108481168747, |
|
"rewards/rejected": -0.07765716314315796, |
|
"step": 1020 |
|
}, |
|
{ |
|
"dpo_losses": 0.555158257484436, |
|
"epoch": 2.9, |
|
"grad_norm": 1.7202517104603963, |
|
"learning_rate": 1.6448943457189613e-09, |
|
"logits/chosen": -2.6988253593444824, |
|
"logits/rejected": -2.6433205604553223, |
|
"logps/chosen": -286.4718322753906, |
|
"logps/rejected": -265.35052490234375, |
|
"loss": 0.5888, |
|
"positive_losses": 0.37335652112960815, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 0.23134306073188782, |
|
"rewards/margins": 0.31557995080947876, |
|
"rewards/margins_max": 0.594192624092102, |
|
"rewards/margins_min": 0.10240230709314346, |
|
"rewards/margins_std": 0.2274932563304901, |
|
"rewards/rejected": -0.08423684537410736, |
|
"step": 1030 |
|
}, |
|
{ |
|
"dpo_losses": 0.5689266324043274, |
|
"epoch": 2.93, |
|
"grad_norm": 15.663649464629284, |
|
"learning_rate": 8.396832588411229e-10, |
|
"logits/chosen": -2.6604883670806885, |
|
"logits/rejected": -2.586012840270996, |
|
"logps/chosen": -273.3360290527344, |
|
"logps/rejected": -243.2672882080078, |
|
"loss": 0.5953, |
|
"positive_losses": 0.21942074596881866, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.21510818600654602, |
|
"rewards/margins": 0.2806702256202698, |
|
"rewards/margins_max": 0.5532082319259644, |
|
"rewards/margins_min": 0.06844080239534378, |
|
"rewards/margins_std": 0.22315998375415802, |
|
"rewards/rejected": -0.06556206941604614, |
|
"step": 1040 |
|
}, |
|
{ |
|
"dpo_losses": 0.5607948303222656, |
|
"epoch": 2.96, |
|
"grad_norm": 7.720480782752179, |
|
"learning_rate": 3.0239435998430374e-10, |
|
"logits/chosen": -2.7420949935913086, |
|
"logits/rejected": -2.663933038711548, |
|
"logps/chosen": -278.32171630859375, |
|
"logps/rejected": -232.65087890625, |
|
"loss": 0.58, |
|
"positive_losses": 0.3233460783958435, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.23241767287254333, |
|
"rewards/margins": 0.2982451915740967, |
|
"rewards/margins_max": 0.537132978439331, |
|
"rewards/margins_min": 0.06783739477396011, |
|
"rewards/margins_std": 0.2153671681880951, |
|
"rewards/rejected": -0.06582748889923096, |
|
"step": 1050 |
|
}, |
|
{ |
|
"dpo_losses": 0.5575666427612305, |
|
"epoch": 2.99, |
|
"grad_norm": 1.9829415978985032, |
|
"learning_rate": 3.360539611582669e-11, |
|
"logits/chosen": -2.6903703212738037, |
|
"logits/rejected": -2.675687789916992, |
|
"logps/chosen": -256.32183837890625, |
|
"logps/rejected": -261.22113037109375, |
|
"loss": 0.5684, |
|
"positive_losses": 0.07837722450494766, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.23069465160369873, |
|
"rewards/margins": 0.30815768241882324, |
|
"rewards/margins_max": 0.5907589793205261, |
|
"rewards/margins_min": 0.066887766122818, |
|
"rewards/margins_std": 0.2346896380186081, |
|
"rewards/rejected": -0.0774630457162857, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 1065, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6146632369135467, |
|
"train_runtime": 13546.83, |
|
"train_samples_per_second": 1.257, |
|
"train_steps_per_second": 0.079 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1065, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|