{ "best_metric": null, "best_model_checkpoint": null, "epoch": 6.259780907668231, "eval_steps": 50, "global_step": 500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.40062597809076683, "grad_norm": 54.871246337890625, "learning_rate": 3.2e-07, "log_odds_chosen": -0.2863271236419678, "log_odds_ratio": -0.8682101964950562, "logits/chosen": -2.625612735748291, "logits/rejected": -2.950411558151245, "logps/chosen": -1.0862526893615723, "logps/rejected": -0.9258921146392822, "loss": 1.5757, "nll_loss": 1.4893277883529663, "rewards/accuracies": 0.26953125, "rewards/chosen": -0.10862527787685394, "rewards/margins": -0.01603606529533863, "rewards/rejected": -0.09258921444416046, "step": 32 }, { "epoch": 0.6259780907668232, "eval_log_odds_chosen": -0.008341665379703045, "eval_log_odds_ratio": -0.7013140320777893, "eval_logits/chosen": -2.989178419113159, "eval_logits/rejected": -3.091775417327881, "eval_logps/chosen": -0.919834554195404, "eval_logps/rejected": -0.9251189827919006, "eval_loss": 1.1759616136550903, "eval_nll_loss": 1.0507723093032837, "eval_rewards/accuracies": 0.5714285969734192, "eval_rewards/chosen": -0.09198347479104996, "eval_rewards/margins": 0.0005284372018650174, "eval_rewards/rejected": -0.09251189976930618, "eval_runtime": 3.582, "eval_samples_per_second": 14.517, "eval_steps_per_second": 1.954, "step": 50 }, { "epoch": 0.8012519561815337, "grad_norm": 14.854985237121582, "learning_rate": 4.988068499954577e-07, "log_odds_chosen": -0.08584073185920715, "log_odds_ratio": -0.7622759342193604, "logits/chosen": -2.6125504970550537, "logits/rejected": -2.8110339641571045, "logps/chosen": -0.7727512121200562, "logps/rejected": -0.7502321004867554, "loss": 0.9889, "nll_loss": 0.9098491668701172, "rewards/accuracies": 0.43359375, "rewards/chosen": -0.0772751197218895, "rewards/margins": -0.0022519100457429886, "rewards/rejected": -0.07502321898937225, "step": 64 }, { "epoch": 1.2018779342723005, "grad_norm": 12.362813949584961, "learning_rate": 4.872190029111241e-07, "log_odds_chosen": 0.6338525414466858, "log_odds_ratio": -0.46056824922561646, "logits/chosen": -2.487048387527466, "logits/rejected": -2.679857015609741, "logps/chosen": -0.6807280778884888, "logps/rejected": -1.0647395849227905, "loss": 0.7611, "nll_loss": 0.7052887082099915, "rewards/accuracies": 0.8984375, "rewards/chosen": -0.06807281076908112, "rewards/margins": 0.038401152938604355, "rewards/rejected": -0.10647396743297577, "step": 96 }, { "epoch": 1.2519561815336462, "eval_log_odds_chosen": 1.3032406568527222, "eval_log_odds_ratio": -0.27346786856651306, "eval_logits/chosen": -2.8948192596435547, "eval_logits/rejected": -2.813701868057251, "eval_logps/chosen": -0.8319589495658875, "eval_logps/rejected": -1.7709095478057861, "eval_loss": 0.874918520450592, "eval_nll_loss": 0.8324368596076965, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": -0.08319590240716934, "eval_rewards/margins": 0.0938950628042221, "eval_rewards/rejected": -0.17709095776081085, "eval_runtime": 3.5651, "eval_samples_per_second": 14.586, "eval_steps_per_second": 1.963, "step": 100 }, { "epoch": 1.6025039123630673, "grad_norm": 11.88025188446045, "learning_rate": 4.6384106504012665e-07, "log_odds_chosen": 1.6872429847717285, "log_odds_ratio": -0.21468885242938995, "logits/chosen": -2.4869632720947266, "logits/rejected": -2.536886215209961, "logps/chosen": -0.634860098361969, "logps/rejected": -1.7767176628112793, "loss": 0.6843, "nll_loss": 0.6487288475036621, "rewards/accuracies": 0.99609375, "rewards/chosen": -0.0634860098361969, "rewards/margins": 0.11418575048446655, "rewards/rejected": -0.17767177522182465, "step": 128 }, { "epoch": 1.8779342723004695, "eval_log_odds_chosen": 2.711949586868286, "eval_log_odds_ratio": -0.10135732591152191, "eval_logits/chosen": -2.8942391872406006, "eval_logits/rejected": -2.879112482070923, "eval_logps/chosen": -0.8020210862159729, "eval_logps/rejected": -2.9775755405426025, "eval_loss": 0.8163785338401794, "eval_nll_loss": 0.795821487903595, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": -0.08020210266113281, "eval_rewards/margins": 0.217555433511734, "eval_rewards/rejected": -0.2977575361728668, "eval_runtime": 3.5856, "eval_samples_per_second": 14.502, "eval_steps_per_second": 1.952, "step": 150 }, { "epoch": 2.003129890453834, "grad_norm": 11.101346969604492, "learning_rate": 4.2983495008466273e-07, "log_odds_chosen": 2.479166030883789, "log_odds_ratio": -0.11427275836467743, "logits/chosen": -2.462007522583008, "logits/rejected": -2.5404274463653564, "logps/chosen": -0.6245267391204834, "logps/rejected": -2.437384843826294, "loss": 0.6521, "nll_loss": 0.6418842077255249, "rewards/accuracies": 1.0, "rewards/chosen": -0.06245267391204834, "rewards/margins": 0.18128584325313568, "rewards/rejected": -0.24373850226402283, "step": 160 }, { "epoch": 2.403755868544601, "grad_norm": 9.568058013916016, "learning_rate": 3.8689080587313755e-07, "log_odds_chosen": 2.8940343856811523, "log_odds_ratio": -0.08038710057735443, "logits/chosen": -2.4281036853790283, "logits/rejected": -2.5184569358825684, "logps/chosen": -0.587062418460846, "logps/rejected": -2.7329537868499756, "loss": 0.6314, "nll_loss": 0.5980546474456787, "rewards/accuracies": 1.0, "rewards/chosen": -0.05870624631643295, "rewards/margins": 0.21458914875984192, "rewards/rejected": -0.27329540252685547, "step": 192 }, { "epoch": 2.5039123630672924, "eval_log_odds_chosen": 3.3576512336730957, "eval_log_odds_ratio": -0.06285899877548218, "eval_logits/chosen": -2.901575803756714, "eval_logits/rejected": -2.8905088901519775, "eval_logps/chosen": -0.7869912385940552, "eval_logps/rejected": -3.557527780532837, "eval_loss": 0.7937864065170288, "eval_nll_loss": 0.7795100808143616, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": -0.07869912683963776, "eval_rewards/margins": 0.2770536541938782, "eval_rewards/rejected": -0.3557527959346771, "eval_runtime": 3.5965, "eval_samples_per_second": 14.458, "eval_steps_per_second": 1.946, "step": 200 }, { "epoch": 2.804381846635368, "grad_norm": 10.476876258850098, "learning_rate": 3.371430118304538e-07, "log_odds_chosen": 3.5498757362365723, "log_odds_ratio": -0.05812463164329529, "logits/chosen": -2.4844484329223633, "logits/rejected": -2.5605552196502686, "logps/chosen": -0.6014833450317383, "logps/rejected": -3.397700786590576, "loss": 0.6194, "nll_loss": 0.6138021945953369, "rewards/accuracies": 1.0, "rewards/chosen": -0.06014833599328995, "rewards/margins": 0.27962177991867065, "rewards/rejected": -0.3397701382637024, "step": 224 }, { "epoch": 3.1298904538341157, "eval_log_odds_chosen": 4.2982916831970215, "eval_log_odds_ratio": -0.03043905831873417, "eval_logits/chosen": -2.9097611904144287, "eval_logits/rejected": -2.854037046432495, "eval_logps/chosen": -0.7769914865493774, "eval_logps/rejected": -4.445803165435791, "eval_loss": 0.7800766825675964, "eval_nll_loss": 0.7686944603919983, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": -0.07769914716482162, "eval_rewards/margins": 0.3668811619281769, "eval_rewards/rejected": -0.4445803463459015, "eval_runtime": 3.5656, "eval_samples_per_second": 14.584, "eval_steps_per_second": 1.963, "step": 250 }, { "epoch": 3.2050078247261347, "grad_norm": 10.105437278747559, "learning_rate": 2.830640975642806e-07, "log_odds_chosen": 4.234708786010742, "log_odds_ratio": -0.03429976850748062, "logits/chosen": -2.4910290241241455, "logits/rejected": -2.5494701862335205, "logps/chosen": -0.6145447492599487, "logps/rejected": -4.0717267990112305, "loss": 0.6159, "nll_loss": 0.6162381768226624, "rewards/accuracies": 1.0, "rewards/chosen": -0.06145448237657547, "rewards/margins": 0.3457182049751282, "rewards/rejected": -0.40717267990112305, "step": 256 }, { "epoch": 3.6056338028169015, "grad_norm": 9.233214378356934, "learning_rate": 2.2734185495055498e-07, "log_odds_chosen": 4.952095031738281, "log_odds_ratio": -0.01972360536456108, "logits/chosen": -2.4912912845611572, "logits/rejected": -2.502811908721924, "logps/chosen": -0.594947874546051, "logps/rejected": -4.724546432495117, "loss": 0.6043, "nll_loss": 0.6036252379417419, "rewards/accuracies": 1.0, "rewards/chosen": -0.05949478596448898, "rewards/margins": 0.41295987367630005, "rewards/rejected": -0.47245466709136963, "step": 288 }, { "epoch": 3.755868544600939, "eval_log_odds_chosen": 5.4648847579956055, "eval_log_odds_ratio": -0.010219605639576912, "eval_logits/chosen": -2.894416093826294, "eval_logits/rejected": -2.8168509006500244, "eval_logps/chosen": -0.772580623626709, "eval_logps/rejected": -5.580881595611572, "eval_loss": 0.7731113433837891, "eval_nll_loss": 0.7631542086601257, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": -0.07725805789232254, "eval_rewards/margins": 0.48083004355430603, "eval_rewards/rejected": -0.5580881237983704, "eval_runtime": 3.5679, "eval_samples_per_second": 14.574, "eval_steps_per_second": 1.962, "step": 300 }, { "epoch": 4.006259780907668, "grad_norm": 10.460640907287598, "learning_rate": 1.7274575140626315e-07, "log_odds_chosen": 5.956634998321533, "log_odds_ratio": -0.01081022247672081, "logits/chosen": -2.474257707595825, "logits/rejected": -2.4944746494293213, "logps/chosen": -0.584967315196991, "logps/rejected": -5.693123817443848, "loss": 0.5961, "nll_loss": 0.591893196105957, "rewards/accuracies": 1.0, "rewards/chosen": -0.058496732264757156, "rewards/margins": 0.5108156800270081, "rewards/rejected": -0.5693123936653137, "step": 320 }, { "epoch": 4.381846635367762, "eval_log_odds_chosen": 6.661163330078125, "eval_log_odds_ratio": -0.003369454061612487, "eval_logits/chosen": -2.8870294094085693, "eval_logits/rejected": -2.8113913536071777, "eval_logps/chosen": -0.7696248888969421, "eval_logps/rejected": -6.763743877410889, "eval_loss": 0.769009530544281, "eval_nll_loss": 0.7600502967834473, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": -0.07696248590946198, "eval_rewards/margins": 0.5994119644165039, "eval_rewards/rejected": -0.6763744354248047, "eval_runtime": 3.5818, "eval_samples_per_second": 14.518, "eval_steps_per_second": 1.954, "step": 350 }, { "epoch": 4.406885758998435, "grad_norm": 10.492298126220703, "learning_rate": 1.2198928378235715e-07, "log_odds_chosen": 6.660679817199707, "log_odds_ratio": -0.00656685046851635, "logits/chosen": -2.4854841232299805, "logits/rejected": -2.487821102142334, "logps/chosen": -0.5842890739440918, "logps/rejected": -6.385722637176514, "loss": 0.5976, "nll_loss": 0.5909620523452759, "rewards/accuracies": 1.0, "rewards/chosen": -0.05842890590429306, "rewards/margins": 0.5801433324813843, "rewards/rejected": -0.6385722160339355, "step": 352 }, { "epoch": 4.807511737089202, "grad_norm": 10.00313949584961, "learning_rate": 7.759511406608255e-08, "log_odds_chosen": 6.706086158752441, "log_odds_ratio": -0.0058883922174572945, "logits/chosen": -2.4211370944976807, "logits/rejected": -2.4340803623199463, "logps/chosen": -0.570120632648468, "logps/rejected": -6.413407325744629, "loss": 0.584, "nll_loss": 0.5762451887130737, "rewards/accuracies": 1.0, "rewards/chosen": -0.05701206251978874, "rewards/margins": 0.5843286514282227, "rewards/rejected": -0.6413407921791077, "step": 384 }, { "epoch": 5.007824726134586, "eval_log_odds_chosen": 7.109870910644531, "eval_log_odds_ratio": -0.002329548355191946, "eval_logits/chosen": -2.9075374603271484, "eval_logits/rejected": -2.810256242752075, "eval_logps/chosen": -0.7671002149581909, "eval_logps/rejected": -7.206047534942627, "eval_loss": 0.7668870091438293, "eval_nll_loss": 0.7577933073043823, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": -0.07671000808477402, "eval_rewards/margins": 0.6438947319984436, "eval_rewards/rejected": -0.7206048369407654, "eval_runtime": 3.5587, "eval_samples_per_second": 14.612, "eval_steps_per_second": 1.967, "step": 400 }, { "epoch": 5.208137715179968, "grad_norm": 12.630816459655762, "learning_rate": 4.176968982247514e-08, "log_odds_chosen": 7.1072587966918945, "log_odds_ratio": -0.0049699898809194565, "logits/chosen": -2.4647462368011475, "logits/rejected": -2.4573941230773926, "logps/chosen": -0.5844926834106445, "logps/rejected": -6.838533401489258, "loss": 0.5949, "nll_loss": 0.5898107290267944, "rewards/accuracies": 1.0, "rewards/chosen": -0.05844927579164505, "rewards/margins": 0.6254041194915771, "rewards/rejected": -0.6838533878326416, "step": 416 }, { "epoch": 5.608763693270736, "grad_norm": 50.02872848510742, "learning_rate": 1.629358090099639e-08, "log_odds_chosen": 7.274372577667236, "log_odds_ratio": -0.004320599138736725, "logits/chosen": -2.405644178390503, "logits/rejected": -2.42146635055542, "logps/chosen": -0.5744296908378601, "logps/rejected": -6.983857154846191, "loss": 0.5954, "nll_loss": 0.5800217986106873, "rewards/accuracies": 1.0, "rewards/chosen": -0.05744296684861183, "rewards/margins": 0.6409427523612976, "rewards/rejected": -0.6983856558799744, "step": 448 }, { "epoch": 5.633802816901408, "eval_log_odds_chosen": 7.28384256362915, "eval_log_odds_ratio": -0.0020152912475168705, "eval_logits/chosen": -2.903193235397339, "eval_logits/rejected": -2.808192014694214, "eval_logps/chosen": -0.767649233341217, "eval_logps/rejected": -7.380805969238281, "eval_loss": 0.7667036652565002, "eval_nll_loss": 0.7581475377082825, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": -0.07676493376493454, "eval_rewards/margins": 0.6613157391548157, "eval_rewards/rejected": -0.738080620765686, "eval_runtime": 3.589, "eval_samples_per_second": 14.489, "eval_steps_per_second": 1.95, "step": 450 }, { "epoch": 6.009389671361502, "grad_norm": 19.15870475769043, "learning_rate": 2.4329828146074096e-09, "log_odds_chosen": 7.12039852142334, "log_odds_ratio": -0.0058927275240421295, "logits/chosen": -2.4525253772735596, "logits/rejected": -2.446058988571167, "logps/chosen": -0.5847591757774353, "logps/rejected": -6.854161739349365, "loss": 0.589, "nll_loss": 0.589131236076355, "rewards/accuracies": 1.0, "rewards/chosen": -0.05847591161727905, "rewards/margins": 0.6269403100013733, "rewards/rejected": -0.6854162216186523, "step": 480 }, { "epoch": 6.259780907668231, "grad_norm": 9.93666934967041, "learning_rate": 0.0, "log_odds_chosen": 7.366458892822266, "log_odds_ratio": -0.00489471573382616, "logits/chosen": -2.495069980621338, "logits/rejected": -2.4710330963134766, "logps/chosen": -0.5860047936439514, "logps/rejected": -7.1015305519104, "loss": 0.589, "nll_loss": 0.593083381652832, "rewards/accuracies": 1.0, "rewards/chosen": -0.058600474148988724, "rewards/margins": 0.6515525579452515, "rewards/rejected": -0.7101531028747559, "step": 500 }, { "epoch": 6.259780907668231, "eval_log_odds_chosen": 7.277224063873291, "eval_log_odds_ratio": -0.0020051717292517424, "eval_logits/chosen": -2.901961088180542, "eval_logits/rejected": -2.8077356815338135, "eval_logps/chosen": -0.7674554586410522, "eval_logps/rejected": -7.3736891746521, "eval_loss": 0.7665765285491943, "eval_nll_loss": 0.7579033970832825, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": -0.07674554735422134, "eval_rewards/margins": 0.6606234312057495, "eval_rewards/rejected": -0.7373689413070679, "eval_runtime": 3.5704, "eval_samples_per_second": 14.564, "eval_steps_per_second": 1.961, "step": 500 } ], "logging_steps": 32, "max_steps": 500, "num_input_tokens_seen": 0, "num_train_epochs": 7, "save_steps": 1, "total_flos": 0.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }