|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"eval_steps": 100, |
|
"global_step": 1065, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"dpo_losses": 0.6931471824645996, |
|
"epoch": 0.0, |
|
"grad_norm": 2.1182166269339366, |
|
"learning_rate": 4.672897196261682e-08, |
|
"logits/chosen": -2.8477635383605957, |
|
"logits/rejected": -2.8469698429107666, |
|
"logps/chosen": -522.6112670898438, |
|
"logps/rejected": -359.48583984375, |
|
"loss": 0.6931, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/margins_max": 0.0, |
|
"rewards/margins_min": 0.0, |
|
"rewards/margins_std": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"dpo_losses": 0.6931488513946533, |
|
"epoch": 0.03, |
|
"grad_norm": 18.90606759984328, |
|
"learning_rate": 4.6728971962616824e-07, |
|
"logits/chosen": -2.921452522277832, |
|
"logits/rejected": -2.7972779273986816, |
|
"logps/chosen": -313.4413146972656, |
|
"logps/rejected": -170.33502197265625, |
|
"loss": 0.698, |
|
"positive_losses": 0.056756019592285156, |
|
"rewards/accuracies": 0.5555555820465088, |
|
"rewards/chosen": 0.00029136601369827986, |
|
"rewards/margins": -2.1007015220675385e-06, |
|
"rewards/margins_max": 0.0011362881632521749, |
|
"rewards/margins_min": -0.001140489592216909, |
|
"rewards/margins_std": 0.0016099249478429556, |
|
"rewards/rejected": 0.0002934667863883078, |
|
"step": 10 |
|
}, |
|
{ |
|
"dpo_losses": 0.6920409202575684, |
|
"epoch": 0.06, |
|
"grad_norm": 2.048573611790903, |
|
"learning_rate": 9.345794392523365e-07, |
|
"logits/chosen": -2.7631053924560547, |
|
"logits/rejected": -2.7107467651367188, |
|
"logps/chosen": -380.45953369140625, |
|
"logps/rejected": -243.90365600585938, |
|
"loss": 0.6928, |
|
"positive_losses": 0.02124938927590847, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.006503595970571041, |
|
"rewards/margins": 0.0022183754481375217, |
|
"rewards/margins_max": 0.004751545377075672, |
|
"rewards/margins_min": -0.00031479448080062866, |
|
"rewards/margins_std": 0.0035824428778141737, |
|
"rewards/rejected": 0.004285220988094807, |
|
"step": 20 |
|
}, |
|
{ |
|
"dpo_losses": 0.6899846792221069, |
|
"epoch": 0.08, |
|
"grad_norm": 2.2814863944700297, |
|
"learning_rate": 1.4018691588785047e-06, |
|
"logits/chosen": -2.87530779838562, |
|
"logits/rejected": -2.823071002960205, |
|
"logps/chosen": -374.14593505859375, |
|
"logps/rejected": -251.3402099609375, |
|
"loss": 0.6884, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.01957734487950802, |
|
"rewards/margins": 0.0063508180901408195, |
|
"rewards/margins_max": 0.011502384208142757, |
|
"rewards/margins_min": 0.0011992522049695253, |
|
"rewards/margins_std": 0.007285414729267359, |
|
"rewards/rejected": 0.013226528652012348, |
|
"step": 30 |
|
}, |
|
{ |
|
"dpo_losses": 0.6816079616546631, |
|
"epoch": 0.11, |
|
"grad_norm": 1.9338584641637362, |
|
"learning_rate": 1.869158878504673e-06, |
|
"logits/chosen": -2.732996940612793, |
|
"logits/rejected": -2.7668612003326416, |
|
"logps/chosen": -302.9008483886719, |
|
"logps/rejected": -315.843505859375, |
|
"loss": 0.6807, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.04024823382496834, |
|
"rewards/margins": 0.023371612653136253, |
|
"rewards/margins_max": 0.03446139022707939, |
|
"rewards/margins_min": 0.01228183414787054, |
|
"rewards/margins_std": 0.015683313831686974, |
|
"rewards/rejected": 0.016876617446541786, |
|
"step": 40 |
|
}, |
|
{ |
|
"dpo_losses": 0.6704033613204956, |
|
"epoch": 0.14, |
|
"grad_norm": 2.241837849751868, |
|
"learning_rate": 2.3364485981308413e-06, |
|
"logits/chosen": -2.7844934463500977, |
|
"logits/rejected": -2.696681022644043, |
|
"logps/chosen": -238.6322784423828, |
|
"logps/rejected": -172.29949951171875, |
|
"loss": 0.6657, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.06590737402439117, |
|
"rewards/margins": 0.046577345579862595, |
|
"rewards/margins_max": 0.06979880481958389, |
|
"rewards/margins_min": 0.023355895653367043, |
|
"rewards/margins_std": 0.03284009173512459, |
|
"rewards/rejected": 0.019330020993947983, |
|
"step": 50 |
|
}, |
|
{ |
|
"dpo_losses": 0.6440416574478149, |
|
"epoch": 0.17, |
|
"grad_norm": 2.210137428049826, |
|
"learning_rate": 2.8037383177570094e-06, |
|
"logits/chosen": -2.7359554767608643, |
|
"logits/rejected": -2.6721925735473633, |
|
"logps/chosen": -255.8380584716797, |
|
"logps/rejected": -234.24276733398438, |
|
"loss": 0.6442, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.11597372591495514, |
|
"rewards/margins": 0.10343287885189056, |
|
"rewards/margins_max": 0.15465359389781952, |
|
"rewards/margins_min": 0.0522121861577034, |
|
"rewards/margins_std": 0.07243702560663223, |
|
"rewards/rejected": 0.012540824711322784, |
|
"step": 60 |
|
}, |
|
{ |
|
"dpo_losses": 0.6189829111099243, |
|
"epoch": 0.2, |
|
"grad_norm": 1.721316554071705, |
|
"learning_rate": 3.2710280373831774e-06, |
|
"logits/chosen": -2.6506264209747314, |
|
"logits/rejected": -2.65425443649292, |
|
"logps/chosen": -316.86090087890625, |
|
"logps/rejected": -213.46066284179688, |
|
"loss": 0.6213, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.17079836130142212, |
|
"rewards/margins": 0.15738530457019806, |
|
"rewards/margins_max": 0.23612920939922333, |
|
"rewards/margins_min": 0.078641377389431, |
|
"rewards/margins_std": 0.11136071383953094, |
|
"rewards/rejected": 0.013413062319159508, |
|
"step": 70 |
|
}, |
|
{ |
|
"dpo_losses": 0.59584641456604, |
|
"epoch": 0.23, |
|
"grad_norm": 5.833494928937326, |
|
"learning_rate": 3.738317757009346e-06, |
|
"logits/chosen": -2.8657782077789307, |
|
"logits/rejected": -2.7893776893615723, |
|
"logps/chosen": -318.1582946777344, |
|
"logps/rejected": -289.5111999511719, |
|
"loss": 0.6052, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.18891094624996185, |
|
"rewards/margins": 0.2127668410539627, |
|
"rewards/margins_max": 0.30381911993026733, |
|
"rewards/margins_min": 0.12171456962823868, |
|
"rewards/margins_std": 0.12876734137535095, |
|
"rewards/rejected": -0.023855898529291153, |
|
"step": 80 |
|
}, |
|
{ |
|
"dpo_losses": 0.5957463383674622, |
|
"epoch": 0.25, |
|
"grad_norm": 2.2476761791524478, |
|
"learning_rate": 4.205607476635514e-06, |
|
"logits/chosen": -2.668989658355713, |
|
"logits/rejected": -2.6688218116760254, |
|
"logps/chosen": -256.0438537597656, |
|
"logps/rejected": -203.91128540039062, |
|
"loss": 0.5735, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.20931395888328552, |
|
"rewards/margins": 0.21628114581108093, |
|
"rewards/margins_max": 0.35886088013648987, |
|
"rewards/margins_min": 0.07370143383741379, |
|
"rewards/margins_std": 0.20163817703723907, |
|
"rewards/rejected": -0.00696719903498888, |
|
"step": 90 |
|
}, |
|
{ |
|
"dpo_losses": 0.5217684507369995, |
|
"epoch": 0.28, |
|
"grad_norm": 1.9962825797807737, |
|
"learning_rate": 4.6728971962616825e-06, |
|
"logits/chosen": -2.8356316089630127, |
|
"logits/rejected": -2.7704596519470215, |
|
"logps/chosen": -424.61712646484375, |
|
"logps/rejected": -347.96246337890625, |
|
"loss": 0.5432, |
|
"positive_losses": 0.040345001965761185, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.336413711309433, |
|
"rewards/margins": 0.3945561945438385, |
|
"rewards/margins_max": 0.5599964261054993, |
|
"rewards/margins_min": 0.22911591827869415, |
|
"rewards/margins_std": 0.2339678257703781, |
|
"rewards/rejected": -0.058142442256212234, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"eval_dpo_losses": 0.6723248958587646, |
|
"eval_logits/chosen": -2.7099392414093018, |
|
"eval_logits/rejected": -2.6677489280700684, |
|
"eval_logps/chosen": -290.291748046875, |
|
"eval_logps/rejected": -269.3353576660156, |
|
"eval_loss": 1.5490143299102783, |
|
"eval_positive_losses": 8.368339538574219, |
|
"eval_rewards/accuracies": 0.5992063283920288, |
|
"eval_rewards/chosen": -0.05070570856332779, |
|
"eval_rewards/margins": 0.05082136392593384, |
|
"eval_rewards/margins_max": 0.25667881965637207, |
|
"eval_rewards/margins_min": -0.14138472080230713, |
|
"eval_rewards/margins_std": 0.17566871643066406, |
|
"eval_rewards/rejected": -0.10152707248926163, |
|
"eval_runtime": 282.2019, |
|
"eval_samples_per_second": 7.087, |
|
"eval_steps_per_second": 0.223, |
|
"step": 100 |
|
}, |
|
{ |
|
"dpo_losses": 0.5300568342208862, |
|
"epoch": 0.31, |
|
"grad_norm": 2.4142164656542064, |
|
"learning_rate": 4.999879018839288e-06, |
|
"logits/chosen": -2.694153070449829, |
|
"logits/rejected": -2.5792508125305176, |
|
"logps/chosen": -301.54388427734375, |
|
"logps/rejected": -258.5564270019531, |
|
"loss": 0.5372, |
|
"positive_losses": 0.238017275929451, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.26155275106430054, |
|
"rewards/margins": 0.3829374313354492, |
|
"rewards/margins_max": 0.5613982677459717, |
|
"rewards/margins_min": 0.20447655022144318, |
|
"rewards/margins_std": 0.2523817718029022, |
|
"rewards/rejected": -0.12138471752405167, |
|
"step": 110 |
|
}, |
|
{ |
|
"dpo_losses": 0.488577663898468, |
|
"epoch": 0.34, |
|
"grad_norm": 9.3268800860026, |
|
"learning_rate": 4.99772856836941e-06, |
|
"logits/chosen": -2.8129353523254395, |
|
"logits/rejected": -2.79107403755188, |
|
"logps/chosen": -337.3607177734375, |
|
"logps/rejected": -347.9256286621094, |
|
"loss": 0.5066, |
|
"positive_losses": 0.0004665374872274697, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.345800518989563, |
|
"rewards/margins": 0.5089500546455383, |
|
"rewards/margins_max": 0.725378692150116, |
|
"rewards/margins_min": 0.29252126812934875, |
|
"rewards/margins_std": 0.3060764968395233, |
|
"rewards/rejected": -0.16314946115016937, |
|
"step": 120 |
|
}, |
|
{ |
|
"dpo_losses": 0.42630523443222046, |
|
"epoch": 0.37, |
|
"grad_norm": 6.838233431406268, |
|
"learning_rate": 4.992892309373227e-06, |
|
"logits/chosen": -2.64375376701355, |
|
"logits/rejected": -2.5765233039855957, |
|
"logps/chosen": -352.48126220703125, |
|
"logps/rejected": -287.73388671875, |
|
"loss": 0.4592, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3849102854728699, |
|
"rewards/margins": 0.679017186164856, |
|
"rewards/margins_max": 0.8494507670402527, |
|
"rewards/margins_min": 0.5085835456848145, |
|
"rewards/margins_std": 0.24102959036827087, |
|
"rewards/rejected": -0.2941069006919861, |
|
"step": 130 |
|
}, |
|
{ |
|
"dpo_losses": 0.4316105842590332, |
|
"epoch": 0.39, |
|
"grad_norm": 24.291983530934388, |
|
"learning_rate": 4.985375442281969e-06, |
|
"logits/chosen": -2.6181836128234863, |
|
"logits/rejected": -2.5858638286590576, |
|
"logps/chosen": -320.45770263671875, |
|
"logps/rejected": -268.80950927734375, |
|
"loss": 0.4791, |
|
"positive_losses": 0.11783752590417862, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.3075354993343353, |
|
"rewards/margins": 0.6785004138946533, |
|
"rewards/margins_max": 0.9275323748588562, |
|
"rewards/margins_min": 0.42946839332580566, |
|
"rewards/margins_std": 0.35218438506126404, |
|
"rewards/rejected": -0.37096482515335083, |
|
"step": 140 |
|
}, |
|
{ |
|
"dpo_losses": 0.43038830161094666, |
|
"epoch": 0.42, |
|
"grad_norm": 16.31539388542976, |
|
"learning_rate": 4.9751860499858175e-06, |
|
"logits/chosen": -2.6524252891540527, |
|
"logits/rejected": -2.582724094390869, |
|
"logps/chosen": -269.40289306640625, |
|
"logps/rejected": -286.25921630859375, |
|
"loss": 0.4405, |
|
"positive_losses": 0.45142823457717896, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.27419334650039673, |
|
"rewards/margins": 0.6590893268585205, |
|
"rewards/margins_max": 0.8764106631278992, |
|
"rewards/margins_min": 0.44176802039146423, |
|
"rewards/margins_std": 0.307338684797287, |
|
"rewards/rejected": -0.3848959803581238, |
|
"step": 150 |
|
}, |
|
{ |
|
"dpo_losses": 0.40865588188171387, |
|
"epoch": 0.45, |
|
"grad_norm": 9.196914105554178, |
|
"learning_rate": 4.962335089142376e-06, |
|
"logits/chosen": -2.671811580657959, |
|
"logits/rejected": -2.557887554168701, |
|
"logps/chosen": -298.1056823730469, |
|
"logps/rejected": -286.0025329589844, |
|
"loss": 0.4273, |
|
"positive_losses": 1.3531090021133423, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3361397683620453, |
|
"rewards/margins": 0.7648395299911499, |
|
"rewards/margins_max": 1.1010096073150635, |
|
"rewards/margins_min": 0.42866945266723633, |
|
"rewards/margins_std": 0.47541624307632446, |
|
"rewards/rejected": -0.4286997318267822, |
|
"step": 160 |
|
}, |
|
{ |
|
"dpo_losses": 0.36792081594467163, |
|
"epoch": 0.48, |
|
"grad_norm": 3.4842547066626572, |
|
"learning_rate": 4.946836378394967e-06, |
|
"logits/chosen": -2.719280242919922, |
|
"logits/rejected": -2.560034990310669, |
|
"logps/chosen": -358.5220642089844, |
|
"logps/rejected": -287.3617858886719, |
|
"loss": 0.4612, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.4195915162563324, |
|
"rewards/margins": 0.9577536582946777, |
|
"rewards/margins_max": 1.3000526428222656, |
|
"rewards/margins_min": 0.6154545545578003, |
|
"rewards/margins_std": 0.48408395051956177, |
|
"rewards/rejected": -0.5381620526313782, |
|
"step": 170 |
|
}, |
|
{ |
|
"dpo_losses": 0.3783532977104187, |
|
"epoch": 0.51, |
|
"grad_norm": 2.2304162655616295, |
|
"learning_rate": 4.928706583513441e-06, |
|
"logits/chosen": -2.599997043609619, |
|
"logits/rejected": -2.5286357402801514, |
|
"logps/chosen": -297.5575866699219, |
|
"logps/rejected": -439.0782165527344, |
|
"loss": 0.4178, |
|
"positive_losses": 0.7186470031738281, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.26578420400619507, |
|
"rewards/margins": 0.8594935536384583, |
|
"rewards/margins_max": 1.1510531902313232, |
|
"rewards/margins_min": 0.5679339170455933, |
|
"rewards/margins_std": 0.4123275876045227, |
|
"rewards/rejected": -0.5937093496322632, |
|
"step": 180 |
|
}, |
|
{ |
|
"dpo_losses": 0.3120216727256775, |
|
"epoch": 0.54, |
|
"grad_norm": 2.502842717440193, |
|
"learning_rate": 4.907965199473471e-06, |
|
"logits/chosen": -2.5457229614257812, |
|
"logits/rejected": -2.3784427642822266, |
|
"logps/chosen": -403.69097900390625, |
|
"logps/rejected": -277.13079833984375, |
|
"loss": 0.4291, |
|
"positive_losses": 0.1399887055158615, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.524212658405304, |
|
"rewards/margins": 1.0702520608901978, |
|
"rewards/margins_max": 1.2478736639022827, |
|
"rewards/margins_min": 0.892630398273468, |
|
"rewards/margins_std": 0.25119495391845703, |
|
"rewards/rejected": -0.546039342880249, |
|
"step": 190 |
|
}, |
|
{ |
|
"dpo_losses": 0.340221643447876, |
|
"epoch": 0.56, |
|
"grad_norm": 39.20326200653621, |
|
"learning_rate": 4.884634529493591e-06, |
|
"logits/chosen": -2.686732769012451, |
|
"logits/rejected": -2.5898447036743164, |
|
"logps/chosen": -290.22259521484375, |
|
"logps/rejected": -267.71923828125, |
|
"loss": 0.4843, |
|
"positive_losses": 1.2471590042114258, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.405730664730072, |
|
"rewards/margins": 1.0374891757965088, |
|
"rewards/margins_max": 1.3196719884872437, |
|
"rewards/margins_min": 0.7553063035011292, |
|
"rewards/margins_std": 0.3990669250488281, |
|
"rewards/rejected": -0.6317585110664368, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"eval_dpo_losses": 0.6414885520935059, |
|
"eval_logits/chosen": -2.6000208854675293, |
|
"eval_logits/rejected": -2.5589327812194824, |
|
"eval_logps/chosen": -310.5942687988281, |
|
"eval_logps/rejected": -302.1485595703125, |
|
"eval_loss": 3.6353535652160645, |
|
"eval_positive_losses": 28.93216323852539, |
|
"eval_rewards/accuracies": 0.6349206566810608, |
|
"eval_rewards/chosen": -0.2537309527397156, |
|
"eval_rewards/margins": 0.17592783272266388, |
|
"eval_rewards/margins_max": 0.7364258766174316, |
|
"eval_rewards/margins_min": -0.35331711173057556, |
|
"eval_rewards/margins_std": 0.48577553033828735, |
|
"eval_rewards/rejected": -0.42965877056121826, |
|
"eval_runtime": 280.7482, |
|
"eval_samples_per_second": 7.124, |
|
"eval_steps_per_second": 0.224, |
|
"step": 200 |
|
}, |
|
{ |
|
"dpo_losses": 0.4035143256187439, |
|
"epoch": 0.59, |
|
"grad_norm": 2.0960462134704727, |
|
"learning_rate": 4.858739661052539e-06, |
|
"logits/chosen": -2.449962854385376, |
|
"logits/rejected": -2.364846706390381, |
|
"logps/chosen": -316.26300048828125, |
|
"logps/rejected": -311.43115234375, |
|
"loss": 0.3993, |
|
"positive_losses": 0.28444308042526245, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.4057347774505615, |
|
"rewards/margins": 0.8319534063339233, |
|
"rewards/margins_max": 1.267531156539917, |
|
"rewards/margins_min": 0.3963755965232849, |
|
"rewards/margins_std": 0.6160000562667847, |
|
"rewards/rejected": -0.4262186586856842, |
|
"step": 210 |
|
}, |
|
{ |
|
"dpo_losses": 0.2952147424221039, |
|
"epoch": 0.62, |
|
"grad_norm": 2.161286043094922, |
|
"learning_rate": 4.830308438912687e-06, |
|
"logits/chosen": -2.7107901573181152, |
|
"logits/rejected": -2.560708522796631, |
|
"logps/chosen": -364.57659912109375, |
|
"logps/rejected": -361.52545166015625, |
|
"loss": 0.3728, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.4469282031059265, |
|
"rewards/margins": 1.2792309522628784, |
|
"rewards/margins_max": 1.8157857656478882, |
|
"rewards/margins_min": 0.7426761388778687, |
|
"rewards/margins_std": 0.7588031888008118, |
|
"rewards/rejected": -0.8323026895523071, |
|
"step": 220 |
|
}, |
|
{ |
|
"dpo_losses": 0.26847127079963684, |
|
"epoch": 0.65, |
|
"grad_norm": 14.213158348490241, |
|
"learning_rate": 4.799371435178544e-06, |
|
"logits/chosen": -2.694364070892334, |
|
"logits/rejected": -2.611956834793091, |
|
"logps/chosen": -353.65203857421875, |
|
"logps/rejected": -447.9458923339844, |
|
"loss": 0.4538, |
|
"positive_losses": 0.512774646282196, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2710634768009186, |
|
"rewards/margins": 1.3846604824066162, |
|
"rewards/margins_max": 1.9593979120254517, |
|
"rewards/margins_min": 0.809922993183136, |
|
"rewards/margins_std": 0.8128012418746948, |
|
"rewards/rejected": -1.113596796989441, |
|
"step": 230 |
|
}, |
|
{ |
|
"dpo_losses": 0.3169272541999817, |
|
"epoch": 0.68, |
|
"grad_norm": 19.218706332521528, |
|
"learning_rate": 4.765961916422575e-06, |
|
"logits/chosen": -2.664609670639038, |
|
"logits/rejected": -2.5199811458587646, |
|
"logps/chosen": -309.8558349609375, |
|
"logps/rejected": -378.1949157714844, |
|
"loss": 0.4789, |
|
"positive_losses": 2.6823112964630127, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.3129195272922516, |
|
"rewards/margins": 1.156752586364746, |
|
"rewards/margins_max": 1.4397625923156738, |
|
"rewards/margins_min": 0.8737425804138184, |
|
"rewards/margins_std": 0.4002366065979004, |
|
"rewards/rejected": -0.8438330888748169, |
|
"step": 240 |
|
}, |
|
{ |
|
"dpo_losses": 0.2593916356563568, |
|
"epoch": 0.7, |
|
"grad_norm": 10.943604253629541, |
|
"learning_rate": 4.730115807913627e-06, |
|
"logits/chosen": -2.6513824462890625, |
|
"logits/rejected": -2.482832431793213, |
|
"logps/chosen": -369.9505920410156, |
|
"logps/rejected": -348.71759033203125, |
|
"loss": 0.3659, |
|
"positive_losses": 0.7899643182754517, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.45276421308517456, |
|
"rewards/margins": 1.3846436738967896, |
|
"rewards/margins_max": 1.6470266580581665, |
|
"rewards/margins_min": 1.1222608089447021, |
|
"rewards/margins_std": 0.3710656762123108, |
|
"rewards/rejected": -0.9318795204162598, |
|
"step": 250 |
|
}, |
|
{ |
|
"dpo_losses": 0.3340635895729065, |
|
"epoch": 0.73, |
|
"grad_norm": 12.721859863392186, |
|
"learning_rate": 4.691871654986485e-06, |
|
"logits/chosen": -2.592149019241333, |
|
"logits/rejected": -2.553668975830078, |
|
"logps/chosen": -290.64971923828125, |
|
"logps/rejected": -312.02752685546875, |
|
"loss": 0.5868, |
|
"positive_losses": 1.0232231616973877, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3117257356643677, |
|
"rewards/margins": 1.1895887851715088, |
|
"rewards/margins_max": 1.7240755558013916, |
|
"rewards/margins_min": 0.6551022529602051, |
|
"rewards/margins_std": 0.755878210067749, |
|
"rewards/rejected": -0.8778631091117859, |
|
"step": 260 |
|
}, |
|
{ |
|
"dpo_losses": 0.2871754467487335, |
|
"epoch": 0.76, |
|
"grad_norm": 20.74321957766599, |
|
"learning_rate": 4.651270581594054e-06, |
|
"logits/chosen": -2.6253020763397217, |
|
"logits/rejected": -2.486907482147217, |
|
"logps/chosen": -381.5191955566406, |
|
"logps/rejected": -312.0382995605469, |
|
"loss": 0.4914, |
|
"positive_losses": 0.877484142780304, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.4747743606567383, |
|
"rewards/margins": 1.2697714567184448, |
|
"rewards/margins_max": 1.6828867197036743, |
|
"rewards/margins_min": 0.8566561937332153, |
|
"rewards/margins_std": 0.5842332243919373, |
|
"rewards/rejected": -0.7949970960617065, |
|
"step": 270 |
|
}, |
|
{ |
|
"dpo_losses": 0.325231671333313, |
|
"epoch": 0.79, |
|
"grad_norm": 24.63939637076414, |
|
"learning_rate": 4.6083562460867545e-06, |
|
"logits/chosen": -2.4967987537384033, |
|
"logits/rejected": -2.4599080085754395, |
|
"logps/chosen": -295.0630798339844, |
|
"logps/rejected": -352.3880615234375, |
|
"loss": 0.4194, |
|
"positive_losses": 1.5066649913787842, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.34013646841049194, |
|
"rewards/margins": 1.2071036100387573, |
|
"rewards/margins_max": 1.8113868236541748, |
|
"rewards/margins_min": 0.6028203964233398, |
|
"rewards/margins_std": 0.8545855283737183, |
|
"rewards/rejected": -0.8669670820236206, |
|
"step": 280 |
|
}, |
|
{ |
|
"dpo_losses": 0.29185962677001953, |
|
"epoch": 0.82, |
|
"grad_norm": 49.4876607292477, |
|
"learning_rate": 4.563174794266684e-06, |
|
"logits/chosen": -2.725825071334839, |
|
"logits/rejected": -2.5793709754943848, |
|
"logps/chosen": -316.76153564453125, |
|
"logps/rejected": -357.36334228515625, |
|
"loss": 0.4944, |
|
"positive_losses": 1.7035411596298218, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.45084109902381897, |
|
"rewards/margins": 1.3104431629180908, |
|
"rewards/margins_max": 1.7685630321502686, |
|
"rewards/margins_min": 0.8523231744766235, |
|
"rewards/margins_std": 0.6478795409202576, |
|
"rewards/rejected": -0.859602153301239, |
|
"step": 290 |
|
}, |
|
{ |
|
"dpo_losses": 0.24671301245689392, |
|
"epoch": 0.85, |
|
"grad_norm": 38.65216195794793, |
|
"learning_rate": 4.5157748097670125e-06, |
|
"logits/chosen": -2.6136484146118164, |
|
"logits/rejected": -2.5544955730438232, |
|
"logps/chosen": -294.6103820800781, |
|
"logps/rejected": -422.00531005859375, |
|
"loss": 0.2828, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.43132978677749634, |
|
"rewards/margins": 1.5242297649383545, |
|
"rewards/margins_max": 1.9953060150146484, |
|
"rewards/margins_min": 1.0531535148620605, |
|
"rewards/margins_std": 0.6662023067474365, |
|
"rewards/rejected": -1.092900037765503, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"eval_dpo_losses": 0.6346198320388794, |
|
"eval_logits/chosen": -2.5758955478668213, |
|
"eval_logits/rejected": -2.534869909286499, |
|
"eval_logps/chosen": -345.25262451171875, |
|
"eval_logps/rejected": -344.2117004394531, |
|
"eval_loss": 6.804558753967285, |
|
"eval_positive_losses": 61.76890182495117, |
|
"eval_rewards/accuracies": 0.6507936716079712, |
|
"eval_rewards/chosen": -0.6003143787384033, |
|
"eval_rewards/margins": 0.24997644126415253, |
|
"eval_rewards/margins_max": 1.0084712505340576, |
|
"eval_rewards/margins_min": -0.48679542541503906, |
|
"eval_rewards/margins_std": 0.6679101586341858, |
|
"eval_rewards/rejected": -0.8502907156944275, |
|
"eval_runtime": 281.0888, |
|
"eval_samples_per_second": 7.115, |
|
"eval_steps_per_second": 0.224, |
|
"step": 300 |
|
}, |
|
{ |
|
"dpo_losses": 0.2701479196548462, |
|
"epoch": 0.87, |
|
"grad_norm": 2.3545752201762977, |
|
"learning_rate": 4.466207261809989e-06, |
|
"logits/chosen": -2.7792999744415283, |
|
"logits/rejected": -2.558169364929199, |
|
"logps/chosen": -376.7308044433594, |
|
"logps/rejected": -358.2815246582031, |
|
"loss": 0.4216, |
|
"positive_losses": 0.41081467270851135, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.47516530752182007, |
|
"rewards/margins": 1.4481419324874878, |
|
"rewards/margins_max": 1.9792436361312866, |
|
"rewards/margins_min": 0.9170401692390442, |
|
"rewards/margins_std": 0.7510912418365479, |
|
"rewards/rejected": -0.972976565361023, |
|
"step": 310 |
|
}, |
|
{ |
|
"dpo_losses": 0.2924317717552185, |
|
"epoch": 0.9, |
|
"grad_norm": 12.007334692835927, |
|
"learning_rate": 4.414525450399713e-06, |
|
"logits/chosen": -2.6015143394470215, |
|
"logits/rejected": -2.5146775245666504, |
|
"logps/chosen": -315.1227111816406, |
|
"logps/rejected": -350.9872131347656, |
|
"loss": 0.3361, |
|
"positive_losses": 2.5894155502319336, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.3113010823726654, |
|
"rewards/margins": 1.4402223825454712, |
|
"rewards/margins_max": 2.083007335662842, |
|
"rewards/margins_min": 0.7974374890327454, |
|
"rewards/margins_std": 0.9090349078178406, |
|
"rewards/rejected": -1.128921389579773, |
|
"step": 320 |
|
}, |
|
{ |
|
"dpo_losses": 0.2676263153553009, |
|
"epoch": 0.93, |
|
"grad_norm": 29.603619163509805, |
|
"learning_rate": 4.360784949008615e-06, |
|
"logits/chosen": -2.705920934677124, |
|
"logits/rejected": -2.5720715522766113, |
|
"logps/chosen": -338.70623779296875, |
|
"logps/rejected": -376.7459716796875, |
|
"loss": 0.4384, |
|
"positive_losses": 2.4138710498809814, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.40980419516563416, |
|
"rewards/margins": 1.7029374837875366, |
|
"rewards/margins_max": 2.3179752826690674, |
|
"rewards/margins_min": 1.087899923324585, |
|
"rewards/margins_std": 0.8697945475578308, |
|
"rewards/rejected": -1.2931334972381592, |
|
"step": 330 |
|
}, |
|
{ |
|
"dpo_losses": 0.3142815828323364, |
|
"epoch": 0.96, |
|
"grad_norm": 18.502276306325797, |
|
"learning_rate": 4.30504354481929e-06, |
|
"logits/chosen": -2.5686209201812744, |
|
"logits/rejected": -2.424360990524292, |
|
"logps/chosen": -333.12530517578125, |
|
"logps/rejected": -292.06707763671875, |
|
"loss": 0.3621, |
|
"positive_losses": 0.5738517642021179, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3537023365497589, |
|
"rewards/margins": 1.2313454151153564, |
|
"rewards/margins_max": 1.7322998046875, |
|
"rewards/margins_min": 0.7303910255432129, |
|
"rewards/margins_std": 0.7084565758705139, |
|
"rewards/rejected": -0.8776431083679199, |
|
"step": 340 |
|
}, |
|
{ |
|
"dpo_losses": 0.18595710396766663, |
|
"epoch": 0.99, |
|
"grad_norm": 137.9970915114699, |
|
"learning_rate": 4.247361176585904e-06, |
|
"logits/chosen": -2.5403497219085693, |
|
"logits/rejected": -2.4443917274475098, |
|
"logps/chosen": -386.3079833984375, |
|
"logps/rejected": -441.9620056152344, |
|
"loss": 0.4456, |
|
"positive_losses": 0.9461520910263062, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.4209546446800232, |
|
"rewards/margins": 1.8566910028457642, |
|
"rewards/margins_max": 2.370370388031006, |
|
"rewards/margins_min": 1.3430118560791016, |
|
"rewards/margins_std": 0.7264522910118103, |
|
"rewards/rejected": -1.4357364177703857, |
|
"step": 350 |
|
}, |
|
{ |
|
"dpo_losses": 0.24505066871643066, |
|
"epoch": 1.01, |
|
"grad_norm": 3.181861527878417, |
|
"learning_rate": 4.187799870182038e-06, |
|
"logits/chosen": -2.5239622592926025, |
|
"logits/rejected": -2.4095091819763184, |
|
"logps/chosen": -305.76092529296875, |
|
"logps/rejected": -311.29388427734375, |
|
"loss": 0.3141, |
|
"positive_losses": 0.24010220170021057, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.4044499397277832, |
|
"rewards/margins": 1.4896382093429565, |
|
"rewards/margins_max": 2.010220527648926, |
|
"rewards/margins_min": 0.9690560102462769, |
|
"rewards/margins_std": 0.7362144589424133, |
|
"rewards/rejected": -1.0851882696151733, |
|
"step": 360 |
|
}, |
|
{ |
|
"dpo_losses": 0.24201233685016632, |
|
"epoch": 1.04, |
|
"grad_norm": 54.166595052035994, |
|
"learning_rate": 4.1264236719042365e-06, |
|
"logits/chosen": -2.362025737762451, |
|
"logits/rejected": -2.3736584186553955, |
|
"logps/chosen": -307.57098388671875, |
|
"logps/rejected": -404.64447021484375, |
|
"loss": 0.3927, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.44086480140686035, |
|
"rewards/margins": 1.8494741916656494, |
|
"rewards/margins_max": 2.651545763015747, |
|
"rewards/margins_min": 1.0474025011062622, |
|
"rewards/margins_std": 1.1343004703521729, |
|
"rewards/rejected": -1.40860915184021, |
|
"step": 370 |
|
}, |
|
{ |
|
"dpo_losses": 0.22759906947612762, |
|
"epoch": 1.07, |
|
"grad_norm": 38.27158679057807, |
|
"learning_rate": 4.063298579603001e-06, |
|
"logits/chosen": -2.472991466522217, |
|
"logits/rejected": -2.2599282264709473, |
|
"logps/chosen": -351.54620361328125, |
|
"logps/rejected": -319.0135803222656, |
|
"loss": 0.3817, |
|
"positive_losses": 3.08628511428833, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3943432867527008, |
|
"rewards/margins": 1.7396957874298096, |
|
"rewards/margins_max": 2.3970978260040283, |
|
"rewards/margins_min": 1.0822933912277222, |
|
"rewards/margins_std": 0.9297070503234863, |
|
"rewards/rejected": -1.3453524112701416, |
|
"step": 380 |
|
}, |
|
{ |
|
"dpo_losses": 0.134817972779274, |
|
"epoch": 1.1, |
|
"grad_norm": 2.791305623027733, |
|
"learning_rate": 3.998492471715272e-06, |
|
"logits/chosen": -2.496638059616089, |
|
"logits/rejected": -2.455773115158081, |
|
"logps/chosen": -349.44073486328125, |
|
"logps/rejected": -536.924560546875, |
|
"loss": 0.3072, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.4807213246822357, |
|
"rewards/margins": 2.533548355102539, |
|
"rewards/margins_max": 3.3716654777526855, |
|
"rewards/margins_min": 1.695431113243103, |
|
"rewards/margins_std": 1.185276746749878, |
|
"rewards/rejected": -2.0528271198272705, |
|
"step": 390 |
|
}, |
|
{ |
|
"dpo_losses": 0.23253802955150604, |
|
"epoch": 1.13, |
|
"grad_norm": 1.567698262162245, |
|
"learning_rate": 3.932075034274723e-06, |
|
"logits/chosen": -2.385937213897705, |
|
"logits/rejected": -2.3955626487731934, |
|
"logps/chosen": -266.9366455078125, |
|
"logps/rejected": -367.39764404296875, |
|
"loss": 0.3355, |
|
"positive_losses": 0.33393630385398865, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3180273175239563, |
|
"rewards/margins": 1.719641923904419, |
|
"rewards/margins_max": 2.0190768241882324, |
|
"rewards/margins_min": 1.4202073812484741, |
|
"rewards/margins_std": 0.4234645366668701, |
|
"rewards/rejected": -1.401614785194397, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"eval_dpo_losses": 0.6571967601776123, |
|
"eval_logits/chosen": -2.415469169616699, |
|
"eval_logits/rejected": -2.3773038387298584, |
|
"eval_logps/chosen": -392.8340759277344, |
|
"eval_logps/rejected": -401.27020263671875, |
|
"eval_loss": 11.415841102600098, |
|
"eval_positive_losses": 108.73988342285156, |
|
"eval_rewards/accuracies": 0.6547619104385376, |
|
"eval_rewards/chosen": -1.0761287212371826, |
|
"eval_rewards/margins": 0.34474655985832214, |
|
"eval_rewards/margins_max": 1.462611198425293, |
|
"eval_rewards/margins_min": -0.7661140561103821, |
|
"eval_rewards/margins_std": 0.9968340992927551, |
|
"eval_rewards/rejected": -1.4208753108978271, |
|
"eval_runtime": 282.7743, |
|
"eval_samples_per_second": 7.073, |
|
"eval_steps_per_second": 0.223, |
|
"step": 400 |
|
}, |
|
{ |
|
"dpo_losses": 0.23369018733501434, |
|
"epoch": 1.15, |
|
"grad_norm": 1.6752309896133104, |
|
"learning_rate": 3.864117685978339e-06, |
|
"logits/chosen": -2.4605586528778076, |
|
"logits/rejected": -2.4261014461517334, |
|
"logps/chosen": -264.14556884765625, |
|
"logps/rejected": -375.3333740234375, |
|
"loss": 0.3689, |
|
"positive_losses": 3.3056740760803223, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.3054488003253937, |
|
"rewards/margins": 1.9768092632293701, |
|
"rewards/margins_max": 2.963430166244507, |
|
"rewards/margins_min": 0.990188479423523, |
|
"rewards/margins_std": 1.3952926397323608, |
|
"rewards/rejected": -1.6713603734970093, |
|
"step": 410 |
|
}, |
|
{ |
|
"dpo_losses": 0.1611286997795105, |
|
"epoch": 1.18, |
|
"grad_norm": 19.99801702205629, |
|
"learning_rate": 3.794693501389861e-06, |
|
"logits/chosen": -2.549091100692749, |
|
"logits/rejected": -2.4567952156066895, |
|
"logps/chosen": -349.955810546875, |
|
"logps/rejected": -440.55987548828125, |
|
"loss": 0.3233, |
|
"positive_losses": 0.5550443530082703, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.4979848861694336, |
|
"rewards/margins": 2.247861385345459, |
|
"rewards/margins_max": 3.1490349769592285, |
|
"rewards/margins_min": 1.3466877937316895, |
|
"rewards/margins_std": 1.2744518518447876, |
|
"rewards/rejected": -1.7498763799667358, |
|
"step": 420 |
|
}, |
|
{ |
|
"dpo_losses": 0.18394428491592407, |
|
"epoch": 1.21, |
|
"grad_norm": 89.7451007324007, |
|
"learning_rate": 3.7238771323626822e-06, |
|
"logits/chosen": -2.5034220218658447, |
|
"logits/rejected": -2.4015450477600098, |
|
"logps/chosen": -339.3304748535156, |
|
"logps/rejected": -436.32769775390625, |
|
"loss": 0.3026, |
|
"positive_losses": 0.09120301902294159, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.4775872230529785, |
|
"rewards/margins": 2.2062175273895264, |
|
"rewards/margins_max": 2.9012348651885986, |
|
"rewards/margins_min": 1.5112000703811646, |
|
"rewards/margins_std": 0.9829031229019165, |
|
"rewards/rejected": -1.7286304235458374, |
|
"step": 430 |
|
}, |
|
{ |
|
"dpo_losses": 0.18616007268428802, |
|
"epoch": 1.24, |
|
"grad_norm": 3.3128247371778987, |
|
"learning_rate": 3.651744727766676e-06, |
|
"logits/chosen": -2.5054969787597656, |
|
"logits/rejected": -2.38596773147583, |
|
"logps/chosen": -275.51483154296875, |
|
"logps/rejected": -352.8285827636719, |
|
"loss": 0.3883, |
|
"positive_losses": 0.630480945110321, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.39738765358924866, |
|
"rewards/margins": 1.9772555828094482, |
|
"rewards/margins_max": 2.764988422393799, |
|
"rewards/margins_min": 1.1895228624343872, |
|
"rewards/margins_std": 1.1140224933624268, |
|
"rewards/rejected": -1.579867959022522, |
|
"step": 440 |
|
}, |
|
{ |
|
"dpo_losses": 0.16775104403495789, |
|
"epoch": 1.27, |
|
"grad_norm": 2.373995116616861, |
|
"learning_rate": 3.57837385160529e-06, |
|
"logits/chosen": -2.431580066680908, |
|
"logits/rejected": -2.3474247455596924, |
|
"logps/chosen": -304.8125915527344, |
|
"logps/rejected": -460.9014587402344, |
|
"loss": 0.3246, |
|
"positive_losses": 2.069793224334717, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.33361926674842834, |
|
"rewards/margins": 2.2662878036499023, |
|
"rewards/margins_max": 2.9109106063842773, |
|
"rewards/margins_min": 1.6216650009155273, |
|
"rewards/margins_std": 0.9116341471672058, |
|
"rewards/rejected": -1.9326684474945068, |
|
"step": 450 |
|
}, |
|
{ |
|
"dpo_losses": 0.13223373889923096, |
|
"epoch": 1.3, |
|
"grad_norm": 43.7205383502172, |
|
"learning_rate": 3.503843399610941e-06, |
|
"logits/chosen": -2.416710615158081, |
|
"logits/rejected": -2.4022932052612305, |
|
"logps/chosen": -355.0602111816406, |
|
"logps/rejected": -625.78369140625, |
|
"loss": 0.3215, |
|
"positive_losses": 3.091512680053711, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.40217727422714233, |
|
"rewards/margins": 2.6340858936309814, |
|
"rewards/margins_max": 3.5210165977478027, |
|
"rewards/margins_min": 1.7471544742584229, |
|
"rewards/margins_std": 1.2543103694915771, |
|
"rewards/rejected": -2.2319083213806152, |
|
"step": 460 |
|
}, |
|
{ |
|
"dpo_losses": 0.13370443880558014, |
|
"epoch": 1.32, |
|
"grad_norm": 93.10254553112391, |
|
"learning_rate": 3.4282335144083985e-06, |
|
"logits/chosen": -2.4071993827819824, |
|
"logits/rejected": -2.301910638809204, |
|
"logps/chosen": -323.6948547363281, |
|
"logps/rejected": -422.40106201171875, |
|
"loss": 0.3844, |
|
"positive_losses": 0.8020246624946594, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.33555978536605835, |
|
"rewards/margins": 2.352768659591675, |
|
"rewards/margins_max": 2.6321401596069336, |
|
"rewards/margins_min": 2.073397159576416, |
|
"rewards/margins_std": 0.395090788602829, |
|
"rewards/rejected": -2.017209053039551, |
|
"step": 470 |
|
}, |
|
{ |
|
"dpo_losses": 0.16256804764270782, |
|
"epoch": 1.35, |
|
"grad_norm": 25.781294981021368, |
|
"learning_rate": 3.351625499337395e-06, |
|
"logits/chosen": -2.5639257431030273, |
|
"logits/rejected": -2.3825831413269043, |
|
"logps/chosen": -368.14764404296875, |
|
"logps/rejected": -455.31170654296875, |
|
"loss": 0.3346, |
|
"positive_losses": 5.322105407714844, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.47703951597213745, |
|
"rewards/margins": 2.266021251678467, |
|
"rewards/margins_max": 2.826010227203369, |
|
"rewards/margins_min": 1.706032156944275, |
|
"rewards/margins_std": 0.791944146156311, |
|
"rewards/rejected": -1.7889817953109741, |
|
"step": 480 |
|
}, |
|
{ |
|
"dpo_losses": 0.27087026834487915, |
|
"epoch": 1.38, |
|
"grad_norm": 1.9694210301585493, |
|
"learning_rate": 3.2741017310271056e-06, |
|
"logits/chosen": -2.434044361114502, |
|
"logits/rejected": -2.2838988304138184, |
|
"logps/chosen": -185.84713745117188, |
|
"logps/rejected": -353.2933044433594, |
|
"loss": 0.4342, |
|
"positive_losses": 2.383211851119995, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.31434357166290283, |
|
"rewards/margins": 1.6815185546875, |
|
"rewards/margins_max": 2.3663582801818848, |
|
"rewards/margins_min": 0.9966787099838257, |
|
"rewards/margins_std": 0.9685096740722656, |
|
"rewards/rejected": -1.3671748638153076, |
|
"step": 490 |
|
}, |
|
{ |
|
"dpo_losses": 0.1878516972064972, |
|
"epoch": 1.41, |
|
"grad_norm": 37.46970781719882, |
|
"learning_rate": 3.195745570816532e-06, |
|
"logits/chosen": -2.3778724670410156, |
|
"logits/rejected": -2.298856258392334, |
|
"logps/chosen": -339.0552978515625, |
|
"logps/rejected": -400.7374572753906, |
|
"loss": 0.3438, |
|
"positive_losses": 1.1473572254180908, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.4514932632446289, |
|
"rewards/margins": 2.0797715187072754, |
|
"rewards/margins_max": 2.5511765480041504, |
|
"rewards/margins_min": 1.6083663702011108, |
|
"rewards/margins_std": 0.6666676998138428, |
|
"rewards/rejected": -1.628278136253357, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"eval_dpo_losses": 0.6380993127822876, |
|
"eval_logits/chosen": -2.490675449371338, |
|
"eval_logits/rejected": -2.4471161365509033, |
|
"eval_logps/chosen": -385.29376220703125, |
|
"eval_logps/rejected": -393.24566650390625, |
|
"eval_loss": 10.641342163085938, |
|
"eval_positive_losses": 101.35254669189453, |
|
"eval_rewards/accuracies": 0.6865079402923584, |
|
"eval_rewards/chosen": -1.0007256269454956, |
|
"eval_rewards/margins": 0.33990418910980225, |
|
"eval_rewards/margins_max": 1.3353358507156372, |
|
"eval_rewards/margins_min": -0.6337663531303406, |
|
"eval_rewards/margins_std": 0.8805232644081116, |
|
"eval_rewards/rejected": -1.3406296968460083, |
|
"eval_runtime": 281.3613, |
|
"eval_samples_per_second": 7.108, |
|
"eval_steps_per_second": 0.224, |
|
"step": 500 |
|
}, |
|
{ |
|
"dpo_losses": 0.15203723311424255, |
|
"epoch": 1.44, |
|
"grad_norm": 7.547244740150183, |
|
"learning_rate": 3.116641275116018e-06, |
|
"logits/chosen": -2.2004570960998535, |
|
"logits/rejected": -2.1605918407440186, |
|
"logps/chosen": -256.17730712890625, |
|
"logps/rejected": -509.5887756347656, |
|
"loss": 0.3429, |
|
"positive_losses": 2.6612415313720703, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3650303781032562, |
|
"rewards/margins": 2.308377504348755, |
|
"rewards/margins_max": 3.1350021362304688, |
|
"rewards/margins_min": 1.481752634048462, |
|
"rewards/margins_std": 1.1690237522125244, |
|
"rewards/rejected": -1.9433467388153076, |
|
"step": 510 |
|
}, |
|
{ |
|
"dpo_losses": 0.12489266693592072, |
|
"epoch": 1.46, |
|
"grad_norm": 145.8887364997723, |
|
"learning_rate": 3.0368739048062956e-06, |
|
"logits/chosen": -2.448385238647461, |
|
"logits/rejected": -2.354788064956665, |
|
"logps/chosen": -307.94976806640625, |
|
"logps/rejected": -450.015380859375, |
|
"loss": 0.5303, |
|
"positive_losses": 3.1661620140075684, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3366628587245941, |
|
"rewards/margins": 2.504631519317627, |
|
"rewards/margins_max": 3.252946138381958, |
|
"rewards/margins_min": 1.7563165426254272, |
|
"rewards/margins_std": 1.058276891708374, |
|
"rewards/rejected": -2.16796875, |
|
"step": 520 |
|
}, |
|
{ |
|
"dpo_losses": 0.14785408973693848, |
|
"epoch": 1.49, |
|
"grad_norm": 19.752652919627817, |
|
"learning_rate": 2.956529233772492e-06, |
|
"logits/chosen": -2.3587186336517334, |
|
"logits/rejected": -2.3671178817749023, |
|
"logps/chosen": -339.9403076171875, |
|
"logps/rejected": -447.1279296875, |
|
"loss": 0.2974, |
|
"positive_losses": 0.8550773859024048, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.39065319299697876, |
|
"rewards/margins": 2.165048122406006, |
|
"rewards/margins_max": 2.7594637870788574, |
|
"rewards/margins_min": 1.5706324577331543, |
|
"rewards/margins_std": 0.8406306505203247, |
|
"rewards/rejected": -1.7743949890136719, |
|
"step": 530 |
|
}, |
|
{ |
|
"dpo_losses": 0.1259264200925827, |
|
"epoch": 1.52, |
|
"grad_norm": 75.31648718698649, |
|
"learning_rate": 2.8756936566714317e-06, |
|
"logits/chosen": -2.5206494331359863, |
|
"logits/rejected": -2.379965305328369, |
|
"logps/chosen": -351.1200256347656, |
|
"logps/rejected": -426.8377990722656, |
|
"loss": 0.4729, |
|
"positive_losses": 1.8278591632843018, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.48433223366737366, |
|
"rewards/margins": 2.3780269622802734, |
|
"rewards/margins_max": 2.926578998565674, |
|
"rewards/margins_min": 1.8294748067855835, |
|
"rewards/margins_std": 0.7757696509361267, |
|
"rewards/rejected": -1.893694281578064, |
|
"step": 540 |
|
}, |
|
{ |
|
"dpo_losses": 0.20844757556915283, |
|
"epoch": 1.55, |
|
"grad_norm": 3.349676851087858, |
|
"learning_rate": 2.794454096031429e-06, |
|
"logits/chosen": -2.4806084632873535, |
|
"logits/rejected": -2.4055755138397217, |
|
"logps/chosen": -274.53900146484375, |
|
"logps/rejected": -464.46307373046875, |
|
"loss": 0.2124, |
|
"positive_losses": 1.989834189414978, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.36409634351730347, |
|
"rewards/margins": 2.338207960128784, |
|
"rewards/margins_max": 3.5889182090759277, |
|
"rewards/margins_min": 1.0874969959259033, |
|
"rewards/margins_std": 1.7687723636627197, |
|
"rewards/rejected": -1.974111557006836, |
|
"step": 550 |
|
}, |
|
{ |
|
"dpo_losses": 0.18082240223884583, |
|
"epoch": 1.58, |
|
"grad_norm": 2.0680076581049485, |
|
"learning_rate": 2.71289790878446e-06, |
|
"logits/chosen": -2.375046730041504, |
|
"logits/rejected": -2.316608190536499, |
|
"logps/chosen": -295.595703125, |
|
"logps/rejected": -556.7249755859375, |
|
"loss": 0.3351, |
|
"positive_losses": 4.852077484130859, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.21087315678596497, |
|
"rewards/margins": 2.390120267868042, |
|
"rewards/margins_max": 3.270312786102295, |
|
"rewards/margins_min": 1.5099279880523682, |
|
"rewards/margins_std": 1.24478018283844, |
|
"rewards/rejected": -2.1792471408843994, |
|
"step": 560 |
|
}, |
|
{ |
|
"dpo_losses": 0.11622228473424911, |
|
"epoch": 1.61, |
|
"grad_norm": 53.09814348729035, |
|
"learning_rate": 2.6311127923312156e-06, |
|
"logits/chosen": -2.38314151763916, |
|
"logits/rejected": -2.2578694820404053, |
|
"logps/chosen": -386.07366943359375, |
|
"logps/rejected": -540.0533447265625, |
|
"loss": 0.2485, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.4612743854522705, |
|
"rewards/margins": 2.703238010406494, |
|
"rewards/margins_max": 3.3717334270477295, |
|
"rewards/margins_min": 2.0347423553466797, |
|
"rewards/margins_std": 0.9453955888748169, |
|
"rewards/rejected": -2.2419633865356445, |
|
"step": 570 |
|
}, |
|
{ |
|
"dpo_losses": 0.11324157565832138, |
|
"epoch": 1.63, |
|
"grad_norm": 28.84783476798183, |
|
"learning_rate": 2.549186690240057e-06, |
|
"logits/chosen": -2.3538641929626465, |
|
"logits/rejected": -2.2661383152008057, |
|
"logps/chosen": -265.77618408203125, |
|
"logps/rejected": -461.02362060546875, |
|
"loss": 0.3852, |
|
"positive_losses": 0.5109559893608093, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.326893150806427, |
|
"rewards/margins": 2.722642660140991, |
|
"rewards/margins_max": 3.37109375, |
|
"rewards/margins_min": 2.0741915702819824, |
|
"rewards/margins_std": 0.9170483350753784, |
|
"rewards/rejected": -2.395749568939209, |
|
"step": 580 |
|
}, |
|
{ |
|
"dpo_losses": 0.19121481478214264, |
|
"epoch": 1.66, |
|
"grad_norm": 3.235430116905917, |
|
"learning_rate": 2.4672076976812548e-06, |
|
"logits/chosen": -2.256422281265259, |
|
"logits/rejected": -2.129117965698242, |
|
"logps/chosen": -335.0826721191406, |
|
"logps/rejected": -496.73138427734375, |
|
"loss": 0.3539, |
|
"positive_losses": 0.32223206758499146, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3417799472808838, |
|
"rewards/margins": 2.386916399002075, |
|
"rewards/margins_max": 3.4707369804382324, |
|
"rewards/margins_min": 1.3030953407287598, |
|
"rewards/margins_std": 1.5327543020248413, |
|
"rewards/rejected": -2.0451362133026123, |
|
"step": 590 |
|
}, |
|
{ |
|
"dpo_losses": 0.2605344355106354, |
|
"epoch": 1.69, |
|
"grad_norm": 13.287666189135933, |
|
"learning_rate": 2.3852639666982218e-06, |
|
"logits/chosen": -2.4083638191223145, |
|
"logits/rejected": -2.3480491638183594, |
|
"logps/chosen": -243.49948120117188, |
|
"logps/rejected": -401.8130798339844, |
|
"loss": 0.2144, |
|
"positive_losses": 0.026648616418242455, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.35153132677078247, |
|
"rewards/margins": 1.764525055885315, |
|
"rewards/margins_max": 2.218550205230713, |
|
"rewards/margins_min": 1.310499668121338, |
|
"rewards/margins_std": 0.642088770866394, |
|
"rewards/rejected": -1.4129936695098877, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"eval_dpo_losses": 0.6267468929290771, |
|
"eval_logits/chosen": -2.3438363075256348, |
|
"eval_logits/rejected": -2.302872896194458, |
|
"eval_logps/chosen": -363.3914489746094, |
|
"eval_logps/rejected": -380.5317687988281, |
|
"eval_loss": 8.589625358581543, |
|
"eval_positive_losses": 79.79975891113281, |
|
"eval_rewards/accuracies": 0.6865079402923584, |
|
"eval_rewards/chosen": -0.7817028760910034, |
|
"eval_rewards/margins": 0.4317886233329773, |
|
"eval_rewards/margins_max": 1.5951305627822876, |
|
"eval_rewards/margins_min": -0.6660595536231995, |
|
"eval_rewards/margins_std": 1.0046793222427368, |
|
"eval_rewards/rejected": -1.2134915590286255, |
|
"eval_runtime": 280.6736, |
|
"eval_samples_per_second": 7.126, |
|
"eval_steps_per_second": 0.224, |
|
"step": 600 |
|
}, |
|
{ |
|
"dpo_losses": 0.21777740120887756, |
|
"epoch": 1.72, |
|
"grad_norm": 3.6630257967936934, |
|
"learning_rate": 2.303443611417584e-06, |
|
"logits/chosen": -2.2210001945495605, |
|
"logits/rejected": -2.135100841522217, |
|
"logps/chosen": -324.7200622558594, |
|
"logps/rejected": -427.8872985839844, |
|
"loss": 0.4399, |
|
"positive_losses": 1.8486969470977783, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.4076939523220062, |
|
"rewards/margins": 2.0033507347106934, |
|
"rewards/margins_max": 2.8805928230285645, |
|
"rewards/margins_min": 1.12610924243927, |
|
"rewards/margins_std": 1.2406072616577148, |
|
"rewards/rejected": -1.5956569910049438, |
|
"step": 610 |
|
}, |
|
{ |
|
"dpo_losses": 0.16951796412467957, |
|
"epoch": 1.75, |
|
"grad_norm": 171.16064616866518, |
|
"learning_rate": 2.2218346133000264e-06, |
|
"logits/chosen": -2.2120890617370605, |
|
"logits/rejected": -2.1098124980926514, |
|
"logps/chosen": -283.6782531738281, |
|
"logps/rejected": -384.42193603515625, |
|
"loss": 0.445, |
|
"positive_losses": 6.724704742431641, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.2760510742664337, |
|
"rewards/margins": 2.14823317527771, |
|
"rewards/margins_max": 2.824097156524658, |
|
"rewards/margins_min": 1.4723690748214722, |
|
"rewards/margins_std": 0.9558159708976746, |
|
"rewards/rejected": -1.8721821308135986, |
|
"step": 620 |
|
}, |
|
{ |
|
"dpo_losses": 0.15758368372917175, |
|
"epoch": 1.77, |
|
"grad_norm": 24.7162522936105, |
|
"learning_rate": 2.140524726533792e-06, |
|
"logits/chosen": -2.237668514251709, |
|
"logits/rejected": -2.1284663677215576, |
|
"logps/chosen": -338.96942138671875, |
|
"logps/rejected": -402.83697509765625, |
|
"loss": 0.3769, |
|
"positive_losses": 0.14499235153198242, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.5579421520233154, |
|
"rewards/margins": 2.344921350479126, |
|
"rewards/margins_max": 2.9339561462402344, |
|
"rewards/margins_min": 1.7558867931365967, |
|
"rewards/margins_std": 0.833020806312561, |
|
"rewards/rejected": -1.7869793176651, |
|
"step": 630 |
|
}, |
|
{ |
|
"dpo_losses": 0.18143755197525024, |
|
"epoch": 1.8, |
|
"grad_norm": 28.54900764303978, |
|
"learning_rate": 2.059601383672566e-06, |
|
"logits/chosen": -2.421058177947998, |
|
"logits/rejected": -2.359043598175049, |
|
"logps/chosen": -261.0716857910156, |
|
"logps/rejected": -371.5032653808594, |
|
"loss": 0.2764, |
|
"positive_losses": 0.5634332895278931, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.4140376150608063, |
|
"rewards/margins": 2.1301121711730957, |
|
"rewards/margins_max": 2.458400011062622, |
|
"rewards/margins_min": 1.8018243312835693, |
|
"rewards/margins_std": 0.46426907181739807, |
|
"rewards/rejected": -1.7160745859146118, |
|
"step": 640 |
|
}, |
|
{ |
|
"dpo_losses": 0.21564999222755432, |
|
"epoch": 1.83, |
|
"grad_norm": 43.347219579951144, |
|
"learning_rate": 1.9791516016192214e-06, |
|
"logits/chosen": -2.4876837730407715, |
|
"logits/rejected": -2.327549457550049, |
|
"logps/chosen": -288.0809020996094, |
|
"logps/rejected": -400.9809875488281, |
|
"loss": 0.2601, |
|
"positive_losses": 2.1945955753326416, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.4631883203983307, |
|
"rewards/margins": 2.244870662689209, |
|
"rewards/margins_max": 3.217092990875244, |
|
"rewards/margins_min": 1.2726480960845947, |
|
"rewards/margins_std": 1.3749301433563232, |
|
"rewards/rejected": -1.7816823720932007, |
|
"step": 650 |
|
}, |
|
{ |
|
"dpo_losses": 0.21185970306396484, |
|
"epoch": 1.86, |
|
"grad_norm": 4.895621870086889, |
|
"learning_rate": 1.8992618880565039e-06, |
|
"logits/chosen": -2.147622585296631, |
|
"logits/rejected": -2.0906789302825928, |
|
"logps/chosen": -282.22711181640625, |
|
"logps/rejected": -362.41119384765625, |
|
"loss": 0.4916, |
|
"positive_losses": 3.9535961151123047, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.4179634153842926, |
|
"rewards/margins": 2.141587495803833, |
|
"rewards/margins_max": 3.0650925636291504, |
|
"rewards/margins_min": 1.218082308769226, |
|
"rewards/margins_std": 1.3060333728790283, |
|
"rewards/rejected": -1.7236239910125732, |
|
"step": 660 |
|
}, |
|
{ |
|
"dpo_losses": 0.11567674577236176, |
|
"epoch": 1.89, |
|
"grad_norm": 259.9155211313861, |
|
"learning_rate": 1.8200181484252888e-06, |
|
"logits/chosen": -2.390129566192627, |
|
"logits/rejected": -2.4018349647521973, |
|
"logps/chosen": -326.493896484375, |
|
"logps/rejected": -549.7818603515625, |
|
"loss": 0.2561, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.562630295753479, |
|
"rewards/margins": 2.886990785598755, |
|
"rewards/margins_max": 3.560823440551758, |
|
"rewards/margins_min": 2.2131576538085938, |
|
"rewards/margins_std": 0.9529436826705933, |
|
"rewards/rejected": -2.3243603706359863, |
|
"step": 670 |
|
}, |
|
{ |
|
"dpo_losses": 0.1450214684009552, |
|
"epoch": 1.92, |
|
"grad_norm": 3.1779421953498246, |
|
"learning_rate": 1.7415055935504234e-06, |
|
"logits/chosen": -2.4584078788757324, |
|
"logits/rejected": -2.2816786766052246, |
|
"logps/chosen": -351.3116760253906, |
|
"logps/rejected": -523.8319091796875, |
|
"loss": 0.4032, |
|
"positive_losses": 3.919013500213623, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.4204772114753723, |
|
"rewards/margins": 2.6883883476257324, |
|
"rewards/margins_max": 3.6081409454345703, |
|
"rewards/margins_min": 1.7686359882354736, |
|
"rewards/margins_std": 1.300726294517517, |
|
"rewards/rejected": -2.267911434173584, |
|
"step": 680 |
|
}, |
|
{ |
|
"dpo_losses": 0.22765210270881653, |
|
"epoch": 1.94, |
|
"grad_norm": 290.4438144891366, |
|
"learning_rate": 1.6638086480134954e-06, |
|
"logits/chosen": -2.2493913173675537, |
|
"logits/rejected": -2.2027366161346436, |
|
"logps/chosen": -208.2427520751953, |
|
"logps/rejected": -294.7981872558594, |
|
"loss": 0.3929, |
|
"positive_losses": 4.185807228088379, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.2872256338596344, |
|
"rewards/margins": 1.9401639699935913, |
|
"rewards/margins_max": 2.823594331741333, |
|
"rewards/margins_min": 1.0567338466644287, |
|
"rewards/margins_std": 1.249358892440796, |
|
"rewards/rejected": -1.6529382467269897, |
|
"step": 690 |
|
}, |
|
{ |
|
"dpo_losses": 0.11677996069192886, |
|
"epoch": 1.97, |
|
"grad_norm": 2.93724058913164, |
|
"learning_rate": 1.5870108593710473e-06, |
|
"logits/chosen": -2.169877290725708, |
|
"logits/rejected": -2.062708854675293, |
|
"logps/chosen": -379.1634826660156, |
|
"logps/rejected": -440.5103454589844, |
|
"loss": 0.3314, |
|
"positive_losses": 0.9755552411079407, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.478015273809433, |
|
"rewards/margins": 2.7329649925231934, |
|
"rewards/margins_max": 3.358954906463623, |
|
"rewards/margins_min": 2.1069746017456055, |
|
"rewards/margins_std": 0.8852837681770325, |
|
"rewards/rejected": -2.2549493312835693, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"eval_dpo_losses": 0.6525446176528931, |
|
"eval_logits/chosen": -2.343222141265869, |
|
"eval_logits/rejected": -2.30248761177063, |
|
"eval_logps/chosen": -391.17047119140625, |
|
"eval_logps/rejected": -410.6868591308594, |
|
"eval_loss": 11.165140151977539, |
|
"eval_positive_losses": 107.29693603515625, |
|
"eval_rewards/accuracies": 0.6626983880996704, |
|
"eval_rewards/chosen": -1.0594924688339233, |
|
"eval_rewards/margins": 0.4555494785308838, |
|
"eval_rewards/margins_max": 1.7776461839675903, |
|
"eval_rewards/margins_min": -0.8450111150741577, |
|
"eval_rewards/margins_std": 1.16599440574646, |
|
"eval_rewards/rejected": -1.5150419473648071, |
|
"eval_runtime": 281.451, |
|
"eval_samples_per_second": 7.106, |
|
"eval_steps_per_second": 0.224, |
|
"step": 700 |
|
}, |
|
{ |
|
"dpo_losses": 0.2008899748325348, |
|
"epoch": 2.0, |
|
"grad_norm": 1.5771255349778672, |
|
"learning_rate": 1.511194808315853e-06, |
|
"logits/chosen": -2.2691843509674072, |
|
"logits/rejected": -2.16029691696167, |
|
"logps/chosen": -264.2540588378906, |
|
"logps/rejected": -368.97308349609375, |
|
"loss": 0.2577, |
|
"positive_losses": 1.1838890314102173, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.4201991558074951, |
|
"rewards/margins": 2.272444248199463, |
|
"rewards/margins_max": 3.1316885948181152, |
|
"rewards/margins_min": 1.413199782371521, |
|
"rewards/margins_std": 1.2151552438735962, |
|
"rewards/rejected": -1.8522450923919678, |
|
"step": 710 |
|
}, |
|
{ |
|
"dpo_losses": 0.11133086681365967, |
|
"epoch": 2.03, |
|
"grad_norm": 39.920300238021134, |
|
"learning_rate": 1.4364420198778662e-06, |
|
"logits/chosen": -2.3695783615112305, |
|
"logits/rejected": -2.292491912841797, |
|
"logps/chosen": -303.01556396484375, |
|
"logps/rejected": -584.0543212890625, |
|
"loss": 0.2033, |
|
"positive_losses": 1.3720115423202515, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.44599366188049316, |
|
"rewards/margins": 2.9826126098632812, |
|
"rewards/margins_max": 3.897780179977417, |
|
"rewards/margins_min": 2.0674448013305664, |
|
"rewards/margins_std": 1.2942426204681396, |
|
"rewards/rejected": -2.536618709564209, |
|
"step": 720 |
|
}, |
|
{ |
|
"dpo_losses": 0.06813247501850128, |
|
"epoch": 2.06, |
|
"grad_norm": 61.35667528119903, |
|
"learning_rate": 1.3628328757603243e-06, |
|
"logits/chosen": -2.3125011920928955, |
|
"logits/rejected": -2.234978437423706, |
|
"logps/chosen": -380.5849609375, |
|
"logps/rejected": -546.2708129882812, |
|
"loss": 0.2931, |
|
"positive_losses": 3.713000535964966, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.42165422439575195, |
|
"rewards/margins": 3.356724500656128, |
|
"rewards/margins_max": 3.9914348125457764, |
|
"rewards/margins_min": 2.7220141887664795, |
|
"rewards/margins_std": 0.8976157903671265, |
|
"rewards/rejected": -2.935070276260376, |
|
"step": 730 |
|
}, |
|
{ |
|
"dpo_losses": 0.1471334546804428, |
|
"epoch": 2.08, |
|
"grad_norm": 81.02956274757072, |
|
"learning_rate": 1.2904465279052725e-06, |
|
"logits/chosen": -2.3687386512756348, |
|
"logits/rejected": -2.246112585067749, |
|
"logps/chosen": -307.13690185546875, |
|
"logps/rejected": -456.7256774902344, |
|
"loss": 0.2511, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.44816774129867554, |
|
"rewards/margins": 2.7628419399261475, |
|
"rewards/margins_max": 3.8002758026123047, |
|
"rewards/margins_min": 1.7254081964492798, |
|
"rewards/margins_std": 1.4671531915664673, |
|
"rewards/rejected": -2.314674139022827, |
|
"step": 740 |
|
}, |
|
{ |
|
"dpo_losses": 0.18885108828544617, |
|
"epoch": 2.11, |
|
"grad_norm": 37.34403765579587, |
|
"learning_rate": 1.219360813381446e-06, |
|
"logits/chosen": -2.1814396381378174, |
|
"logits/rejected": -2.133742570877075, |
|
"logps/chosen": -167.7379913330078, |
|
"logps/rejected": -342.445068359375, |
|
"loss": 0.2246, |
|
"positive_losses": 3.9296765327453613, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.2083771526813507, |
|
"rewards/margins": 2.266416072845459, |
|
"rewards/margins_max": 3.0310301780700684, |
|
"rewards/margins_min": 1.501802682876587, |
|
"rewards/margins_std": 1.0813268423080444, |
|
"rewards/rejected": -2.0580391883850098, |
|
"step": 750 |
|
}, |
|
{ |
|
"dpo_losses": 0.07930545508861542, |
|
"epoch": 2.14, |
|
"grad_norm": 53.31665337719304, |
|
"learning_rate": 1.1496521706860392e-06, |
|
"logits/chosen": -2.3485515117645264, |
|
"logits/rejected": -2.2168877124786377, |
|
"logps/chosen": -298.58966064453125, |
|
"logps/rejected": -549.2097778320312, |
|
"loss": 0.2258, |
|
"positive_losses": 3.5301315784454346, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.344715416431427, |
|
"rewards/margins": 3.2088115215301514, |
|
"rewards/margins_max": 4.005995750427246, |
|
"rewards/margins_min": 2.411628246307373, |
|
"rewards/margins_std": 1.1273881196975708, |
|
"rewards/rejected": -2.8640968799591064, |
|
"step": 760 |
|
}, |
|
{ |
|
"dpo_losses": 0.13964664936065674, |
|
"epoch": 2.17, |
|
"grad_norm": 1.2653188452887754, |
|
"learning_rate": 1.0813955575503588e-06, |
|
"logits/chosen": -2.278029680252075, |
|
"logits/rejected": -2.2471253871917725, |
|
"logps/chosen": -296.67205810546875, |
|
"logps/rejected": -532.7224731445312, |
|
"loss": 0.2746, |
|
"positive_losses": 1.8862769603729248, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.36964336037635803, |
|
"rewards/margins": 3.1202445030212402, |
|
"rewards/margins_max": 4.3073296546936035, |
|
"rewards/margins_min": 1.9331591129302979, |
|
"rewards/margins_std": 1.6787922382354736, |
|
"rewards/rejected": -2.750600814819336, |
|
"step": 770 |
|
}, |
|
{ |
|
"dpo_losses": 0.18134805560112, |
|
"epoch": 2.2, |
|
"grad_norm": 49.5060636871573, |
|
"learning_rate": 1.0146643703377488e-06, |
|
"logits/chosen": -2.3735833168029785, |
|
"logits/rejected": -2.250998020172119, |
|
"logps/chosen": -292.6219177246094, |
|
"logps/rejected": -449.14068603515625, |
|
"loss": 0.2041, |
|
"positive_losses": 1.1141910552978516, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3442000448703766, |
|
"rewards/margins": 2.5554795265197754, |
|
"rewards/margins_max": 3.6242382526397705, |
|
"rewards/margins_min": 1.4867204427719116, |
|
"rewards/margins_std": 1.51145339012146, |
|
"rewards/rejected": -2.2112793922424316, |
|
"step": 780 |
|
}, |
|
{ |
|
"dpo_losses": 0.09527120739221573, |
|
"epoch": 2.23, |
|
"grad_norm": 22.61766042791389, |
|
"learning_rate": 9.495303651204496e-07, |
|
"logits/chosen": -2.317281484603882, |
|
"logits/rejected": -2.2382774353027344, |
|
"logps/chosen": -320.7598571777344, |
|
"logps/rejected": -518.2537841796875, |
|
"loss": 0.1938, |
|
"positive_losses": 0.0011909485328942537, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.4521896243095398, |
|
"rewards/margins": 2.8671340942382812, |
|
"rewards/margins_max": 3.515406847000122, |
|
"rewards/margins_min": 2.2188615798950195, |
|
"rewards/margins_std": 0.9167959094047546, |
|
"rewards/rejected": -2.414944648742676, |
|
"step": 790 |
|
}, |
|
{ |
|
"dpo_losses": 0.09652134031057358, |
|
"epoch": 2.25, |
|
"grad_norm": 55.89571807239017, |
|
"learning_rate": 8.860635805202616e-07, |
|
"logits/chosen": -2.334362506866455, |
|
"logits/rejected": -2.2386062145233154, |
|
"logps/chosen": -323.18060302734375, |
|
"logps/rejected": -481.80712890625, |
|
"loss": 0.1352, |
|
"positive_losses": 0.3718675673007965, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.4021781086921692, |
|
"rewards/margins": 2.7669034004211426, |
|
"rewards/margins_max": 3.376081943511963, |
|
"rewards/margins_min": 2.1577250957489014, |
|
"rewards/margins_std": 0.8615081906318665, |
|
"rewards/rejected": -2.364725112915039, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"eval_dpo_losses": 0.6700040102005005, |
|
"eval_logits/chosen": -2.3320279121398926, |
|
"eval_logits/rejected": -2.290130376815796, |
|
"eval_logps/chosen": -415.07861328125, |
|
"eval_logps/rejected": -441.023681640625, |
|
"eval_loss": 13.35706615447998, |
|
"eval_positive_losses": 130.9070281982422, |
|
"eval_rewards/accuracies": 0.6626983880996704, |
|
"eval_rewards/chosen": -1.2985737323760986, |
|
"eval_rewards/margins": 0.5198364853858948, |
|
"eval_rewards/margins_max": 2.022522211074829, |
|
"eval_rewards/margins_min": -0.9602744579315186, |
|
"eval_rewards/margins_std": 1.3295913934707642, |
|
"eval_rewards/rejected": -1.8184101581573486, |
|
"eval_runtime": 281.6798, |
|
"eval_samples_per_second": 7.1, |
|
"eval_steps_per_second": 0.224, |
|
"step": 800 |
|
}, |
|
{ |
|
"dpo_losses": 0.11107480525970459, |
|
"epoch": 2.28, |
|
"grad_norm": 1.211879634445009, |
|
"learning_rate": 8.24332262395994e-07, |
|
"logits/chosen": -2.4299230575561523, |
|
"logits/rejected": -2.3509979248046875, |
|
"logps/chosen": -259.64581298828125, |
|
"logps/rejected": -510.4208984375, |
|
"loss": 0.4972, |
|
"positive_losses": 1.13177490234375, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.34921571612358093, |
|
"rewards/margins": 3.014543056488037, |
|
"rewards/margins_max": 4.297389984130859, |
|
"rewards/margins_min": 1.7316957712173462, |
|
"rewards/margins_std": 1.8142198324203491, |
|
"rewards/rejected": -2.665327310562134, |
|
"step": 810 |
|
}, |
|
{ |
|
"dpo_losses": 0.15867747366428375, |
|
"epoch": 2.31, |
|
"grad_norm": 1.9974082008055192, |
|
"learning_rate": 7.644027904586587e-07, |
|
"logits/chosen": -2.368762731552124, |
|
"logits/rejected": -2.2775216102600098, |
|
"logps/chosen": -270.0508728027344, |
|
"logps/rejected": -446.8939514160156, |
|
"loss": 0.3191, |
|
"positive_losses": 0.852569580078125, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3547489047050476, |
|
"rewards/margins": 2.747641086578369, |
|
"rewards/margins_max": 3.639145612716675, |
|
"rewards/margins_min": 1.8561369180679321, |
|
"rewards/margins_std": 1.2607777118682861, |
|
"rewards/rejected": -2.392892360687256, |
|
"step": 820 |
|
}, |
|
{ |
|
"dpo_losses": 0.06032683700323105, |
|
"epoch": 2.34, |
|
"grad_norm": 79.25640130890059, |
|
"learning_rate": 7.06339606893347e-07, |
|
"logits/chosen": -2.3674511909484863, |
|
"logits/rejected": -2.254099130630493, |
|
"logps/chosen": -410.2024841308594, |
|
"logps/rejected": -582.8692016601562, |
|
"loss": 0.212, |
|
"positive_losses": 3.602843761444092, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.4369390904903412, |
|
"rewards/margins": 3.7233974933624268, |
|
"rewards/margins_max": 4.673032283782959, |
|
"rewards/margins_min": 2.7737619876861572, |
|
"rewards/margins_std": 1.3429871797561646, |
|
"rewards/rejected": -3.2864582538604736, |
|
"step": 830 |
|
}, |
|
{ |
|
"dpo_losses": 0.12710335850715637, |
|
"epoch": 2.37, |
|
"grad_norm": 5.277843753781562, |
|
"learning_rate": 6.502051470645149e-07, |
|
"logits/chosen": -2.486243486404419, |
|
"logits/rejected": -2.29672908782959, |
|
"logps/chosen": -348.5108947753906, |
|
"logps/rejected": -525.3292846679688, |
|
"loss": 0.346, |
|
"positive_losses": 0.0202178955078125, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.4295215606689453, |
|
"rewards/margins": 2.8306221961975098, |
|
"rewards/margins_max": 3.4442856311798096, |
|
"rewards/margins_min": 2.216958522796631, |
|
"rewards/margins_std": 0.8678513765335083, |
|
"rewards/rejected": -2.4011006355285645, |
|
"step": 840 |
|
}, |
|
{ |
|
"dpo_losses": 0.10362281650304794, |
|
"epoch": 2.39, |
|
"grad_norm": 172.11522416865105, |
|
"learning_rate": 5.960597723792194e-07, |
|
"logits/chosen": -2.351360559463501, |
|
"logits/rejected": -2.166510581970215, |
|
"logps/chosen": -337.3572692871094, |
|
"logps/rejected": -561.3741455078125, |
|
"loss": 0.2227, |
|
"positive_losses": 0.215586856007576, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.47708138823509216, |
|
"rewards/margins": 3.5102126598358154, |
|
"rewards/margins_max": 4.734680652618408, |
|
"rewards/margins_min": 2.28574538230896, |
|
"rewards/margins_std": 1.731658697128296, |
|
"rewards/rejected": -3.0331313610076904, |
|
"step": 850 |
|
}, |
|
{ |
|
"dpo_losses": 0.10238895565271378, |
|
"epoch": 2.42, |
|
"grad_norm": 82.67868906193421, |
|
"learning_rate": 5.43961705380465e-07, |
|
"logits/chosen": -2.389751434326172, |
|
"logits/rejected": -2.2885704040527344, |
|
"logps/chosen": -315.6968078613281, |
|
"logps/rejected": -601.2724609375, |
|
"loss": 0.2321, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.3972366154193878, |
|
"rewards/margins": 3.726912260055542, |
|
"rewards/margins_max": 4.830050945281982, |
|
"rewards/margins_min": 2.6237740516662598, |
|
"rewards/margins_std": 1.5600733757019043, |
|
"rewards/rejected": -3.3296761512756348, |
|
"step": 860 |
|
}, |
|
{ |
|
"dpo_losses": 0.10260417312383652, |
|
"epoch": 2.45, |
|
"grad_norm": 24.22347862103844, |
|
"learning_rate": 4.939669671404871e-07, |
|
"logits/chosen": -2.3038783073425293, |
|
"logits/rejected": -2.185800552368164, |
|
"logps/chosen": -305.2945556640625, |
|
"logps/rejected": -611.5335693359375, |
|
"loss": 0.1672, |
|
"positive_losses": 0.3565734922885895, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.35259321331977844, |
|
"rewards/margins": 3.309342861175537, |
|
"rewards/margins_max": 4.305205345153809, |
|
"rewards/margins_min": 2.313480854034424, |
|
"rewards/margins_std": 1.4083621501922607, |
|
"rewards/rejected": -2.956749439239502, |
|
"step": 870 |
|
}, |
|
{ |
|
"dpo_losses": 0.0972672775387764, |
|
"epoch": 2.48, |
|
"grad_norm": 2.5325654932498978, |
|
"learning_rate": 4.461293170212644e-07, |
|
"logits/chosen": -2.366685390472412, |
|
"logits/rejected": -2.244584798812866, |
|
"logps/chosen": -284.2767028808594, |
|
"logps/rejected": -525.98681640625, |
|
"loss": 0.1957, |
|
"positive_losses": 4.633613586425781, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3293871283531189, |
|
"rewards/margins": 3.1354575157165527, |
|
"rewards/margins_max": 4.131924629211426, |
|
"rewards/margins_min": 2.138990640640259, |
|
"rewards/margins_std": 1.409217119216919, |
|
"rewards/rejected": -2.806070327758789, |
|
"step": 880 |
|
}, |
|
{ |
|
"dpo_losses": 0.07743240892887115, |
|
"epoch": 2.51, |
|
"grad_norm": 246.3549744525164, |
|
"learning_rate": 4.005001948670606e-07, |
|
"logits/chosen": -2.3962912559509277, |
|
"logits/rejected": -2.299287796020508, |
|
"logps/chosen": -425.3636169433594, |
|
"logps/rejected": -637.3418579101562, |
|
"loss": 0.2586, |
|
"positive_losses": 1.6243082284927368, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.5158067941665649, |
|
"rewards/margins": 3.6285767555236816, |
|
"rewards/margins_max": 4.66379451751709, |
|
"rewards/margins_min": 2.593358278274536, |
|
"rewards/margins_std": 1.464019536972046, |
|
"rewards/rejected": -3.112769603729248, |
|
"step": 890 |
|
}, |
|
{ |
|
"dpo_losses": 0.0913797914981842, |
|
"epoch": 2.54, |
|
"grad_norm": 31.034651555446352, |
|
"learning_rate": 3.571286656911377e-07, |
|
"logits/chosen": -2.356581211090088, |
|
"logits/rejected": -2.1674458980560303, |
|
"logps/chosen": -353.3656005859375, |
|
"logps/rejected": -581.6492309570312, |
|
"loss": 0.2348, |
|
"positive_losses": 2.3227431774139404, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.46973830461502075, |
|
"rewards/margins": 3.632810115814209, |
|
"rewards/margins_max": 5.023132801055908, |
|
"rewards/margins_min": 2.242488145828247, |
|
"rewards/margins_std": 1.966212511062622, |
|
"rewards/rejected": -3.163072109222412, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"eval_dpo_losses": 0.6903655529022217, |
|
"eval_logits/chosen": -2.3085381984710693, |
|
"eval_logits/rejected": -2.2660651206970215, |
|
"eval_logps/chosen": -430.1044006347656, |
|
"eval_logps/rejected": -459.71075439453125, |
|
"eval_loss": 14.72413444519043, |
|
"eval_positive_losses": 145.90809631347656, |
|
"eval_rewards/accuracies": 0.670634925365448, |
|
"eval_rewards/chosen": -1.4488320350646973, |
|
"eval_rewards/margins": 0.5564488768577576, |
|
"eval_rewards/margins_max": 2.180058240890503, |
|
"eval_rewards/margins_min": -1.0958278179168701, |
|
"eval_rewards/margins_std": 1.4586230516433716, |
|
"eval_rewards/rejected": -2.0052807331085205, |
|
"eval_runtime": 280.6688, |
|
"eval_samples_per_second": 7.126, |
|
"eval_steps_per_second": 0.224, |
|
"step": 900 |
|
}, |
|
{ |
|
"dpo_losses": 0.10229980945587158, |
|
"epoch": 2.56, |
|
"grad_norm": 1.3207112449434741, |
|
"learning_rate": 3.1606136691612555e-07, |
|
"logits/chosen": -2.450596809387207, |
|
"logits/rejected": -2.320343255996704, |
|
"logps/chosen": -373.2110900878906, |
|
"logps/rejected": -512.347412109375, |
|
"loss": 0.2148, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.45970940589904785, |
|
"rewards/margins": 3.199089527130127, |
|
"rewards/margins_max": 4.215831756591797, |
|
"rewards/margins_min": 2.182347297668457, |
|
"rewards/margins_std": 1.4378905296325684, |
|
"rewards/rejected": -2.739380121231079, |
|
"step": 910 |
|
}, |
|
{ |
|
"dpo_losses": 0.10751441866159439, |
|
"epoch": 2.59, |
|
"grad_norm": 50.58623371052272, |
|
"learning_rate": 2.773424582247844e-07, |
|
"logits/chosen": -2.3134539127349854, |
|
"logits/rejected": -2.1395092010498047, |
|
"logps/chosen": -311.353271484375, |
|
"logps/rejected": -480.4183654785156, |
|
"loss": 0.1833, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.4951690137386322, |
|
"rewards/margins": 3.3741965293884277, |
|
"rewards/margins_max": 4.4515910148620605, |
|
"rewards/margins_min": 2.296802043914795, |
|
"rewards/margins_std": 1.5236659049987793, |
|
"rewards/rejected": -2.8790273666381836, |
|
"step": 920 |
|
}, |
|
{ |
|
"dpo_losses": 0.08198712766170502, |
|
"epoch": 2.62, |
|
"grad_norm": 34.59409580902137, |
|
"learning_rate": 2.410135740750821e-07, |
|
"logits/chosen": -2.3756449222564697, |
|
"logits/rejected": -2.2801589965820312, |
|
"logps/chosen": -309.6001892089844, |
|
"logps/rejected": -594.0984497070312, |
|
"loss": 0.2129, |
|
"positive_losses": 0.7936180233955383, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.37005850672721863, |
|
"rewards/margins": 3.6145331859588623, |
|
"rewards/margins_max": 4.759096145629883, |
|
"rewards/margins_min": 2.4699695110321045, |
|
"rewards/margins_std": 1.618657112121582, |
|
"rewards/rejected": -3.244474411010742, |
|
"step": 930 |
|
}, |
|
{ |
|
"dpo_losses": 0.18878915905952454, |
|
"epoch": 2.65, |
|
"grad_norm": 127.23596281859544, |
|
"learning_rate": 2.0711377893064182e-07, |
|
"logits/chosen": -2.362793207168579, |
|
"logits/rejected": -2.2321505546569824, |
|
"logps/chosen": -313.73736572265625, |
|
"logps/rejected": -428.6400451660156, |
|
"loss": 0.2036, |
|
"positive_losses": 2.974576711654663, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.2547670602798462, |
|
"rewards/margins": 2.439239978790283, |
|
"rewards/margins_max": 3.555748701095581, |
|
"rewards/margins_min": 1.3227306604385376, |
|
"rewards/margins_std": 1.5789823532104492, |
|
"rewards/rejected": -2.1844725608825684, |
|
"step": 940 |
|
}, |
|
{ |
|
"dpo_losses": 0.1424863040447235, |
|
"epoch": 2.68, |
|
"grad_norm": 3.7675671139759395, |
|
"learning_rate": 1.756795252547111e-07, |
|
"logits/chosen": -2.324215888977051, |
|
"logits/rejected": -2.20219087600708, |
|
"logps/chosen": -260.1328430175781, |
|
"logps/rejected": -470.79412841796875, |
|
"loss": 0.3619, |
|
"positive_losses": 0.20402908325195312, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.42012229561805725, |
|
"rewards/margins": 3.099902629852295, |
|
"rewards/margins_max": 4.072454452514648, |
|
"rewards/margins_min": 2.1273510456085205, |
|
"rewards/margins_std": 1.3753960132598877, |
|
"rewards/rejected": -2.6797804832458496, |
|
"step": 950 |
|
}, |
|
{ |
|
"dpo_losses": 0.10174594074487686, |
|
"epoch": 2.7, |
|
"grad_norm": 1.8085505107547237, |
|
"learning_rate": 1.4674461431281013e-07, |
|
"logits/chosen": -2.525442361831665, |
|
"logits/rejected": -2.4085853099823, |
|
"logps/chosen": -290.04345703125, |
|
"logps/rejected": -531.1943969726562, |
|
"loss": 0.3038, |
|
"positive_losses": 0.1788475066423416, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3685051202774048, |
|
"rewards/margins": 3.2411983013153076, |
|
"rewards/margins_max": 4.235571384429932, |
|
"rewards/margins_min": 2.246825695037842, |
|
"rewards/margins_std": 1.4062554836273193, |
|
"rewards/rejected": -2.8726933002471924, |
|
"step": 960 |
|
}, |
|
{ |
|
"dpo_losses": 0.09436773508787155, |
|
"epoch": 2.73, |
|
"grad_norm": 38.21579384775439, |
|
"learning_rate": 1.2034015982622243e-07, |
|
"logits/chosen": -2.403714418411255, |
|
"logits/rejected": -2.282444477081299, |
|
"logps/chosen": -336.4191589355469, |
|
"logps/rejected": -650.2848510742188, |
|
"loss": 0.2727, |
|
"positive_losses": 4.7762908935546875, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.38717547059059143, |
|
"rewards/margins": 3.8552498817443848, |
|
"rewards/margins_max": 5.095088481903076, |
|
"rewards/margins_min": 2.615410327911377, |
|
"rewards/margins_std": 1.7533977031707764, |
|
"rewards/rejected": -3.468074083328247, |
|
"step": 970 |
|
}, |
|
{ |
|
"dpo_losses": 0.0995684489607811, |
|
"epoch": 2.76, |
|
"grad_norm": 34.214640164153415, |
|
"learning_rate": 9.649455451539419e-08, |
|
"logits/chosen": -2.195786714553833, |
|
"logits/rejected": -2.1557459831237793, |
|
"logps/chosen": -205.28829956054688, |
|
"logps/rejected": -449.69903564453125, |
|
"loss": 0.362, |
|
"positive_losses": 1.5731815099716187, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.3235073983669281, |
|
"rewards/margins": 3.076923131942749, |
|
"rewards/margins_max": 3.9705424308776855, |
|
"rewards/margins_min": 2.1833040714263916, |
|
"rewards/margins_std": 1.2637684345245361, |
|
"rewards/rejected": -2.753415584564209, |
|
"step": 980 |
|
}, |
|
{ |
|
"dpo_losses": 0.09841219335794449, |
|
"epoch": 2.79, |
|
"grad_norm": 3.551041227418815, |
|
"learning_rate": 7.523343956923196e-08, |
|
"logits/chosen": -2.4305670261383057, |
|
"logits/rejected": -2.3597071170806885, |
|
"logps/chosen": -302.9432678222656, |
|
"logps/rejected": -600.987060546875, |
|
"loss": 0.2684, |
|
"positive_losses": 1.013157606124878, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.49134930968284607, |
|
"rewards/margins": 3.6477818489074707, |
|
"rewards/margins_max": 4.790149688720703, |
|
"rewards/margins_min": 2.5054140090942383, |
|
"rewards/margins_std": 1.6155517101287842, |
|
"rewards/rejected": -3.156432628631592, |
|
"step": 990 |
|
}, |
|
{ |
|
"dpo_losses": 0.11016283929347992, |
|
"epoch": 2.82, |
|
"grad_norm": 17.885804652199482, |
|
"learning_rate": 5.657967707312195e-08, |
|
"logits/chosen": -2.2564501762390137, |
|
"logits/rejected": -2.223057985305786, |
|
"logps/chosen": -243.24911499023438, |
|
"logps/rejected": -581.9591064453125, |
|
"loss": 0.1369, |
|
"positive_losses": 0.5390418171882629, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3867679238319397, |
|
"rewards/margins": 3.3442111015319824, |
|
"rewards/margins_max": 4.438827991485596, |
|
"rewards/margins_min": 2.2495944499969482, |
|
"rewards/margins_std": 1.5480217933654785, |
|
"rewards/rejected": -2.9574437141418457, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"eval_dpo_losses": 0.6868842244148254, |
|
"eval_logits/chosen": -2.3165292739868164, |
|
"eval_logits/rejected": -2.2738356590270996, |
|
"eval_logps/chosen": -428.1341857910156, |
|
"eval_logps/rejected": -461.68865966796875, |
|
"eval_loss": 14.59554386138916, |
|
"eval_positive_losses": 143.93893432617188, |
|
"eval_rewards/accuracies": 0.6626983880996704, |
|
"eval_rewards/chosen": -1.4291293621063232, |
|
"eval_rewards/margins": 0.5959304571151733, |
|
"eval_rewards/margins_max": 2.2952685356140137, |
|
"eval_rewards/margins_min": -1.107314109802246, |
|
"eval_rewards/margins_std": 1.5052008628845215, |
|
"eval_rewards/rejected": -2.025059700012207, |
|
"eval_runtime": 280.8142, |
|
"eval_samples_per_second": 7.122, |
|
"eval_steps_per_second": 0.224, |
|
"step": 1000 |
|
}, |
|
{ |
|
"dpo_losses": 0.13428188860416412, |
|
"epoch": 2.85, |
|
"grad_norm": 223.7670035438648, |
|
"learning_rate": 4.055332542531959e-08, |
|
"logits/chosen": -2.418339490890503, |
|
"logits/rejected": -2.3237392902374268, |
|
"logps/chosen": -274.25958251953125, |
|
"logps/rejected": -552.5701904296875, |
|
"loss": 0.4402, |
|
"positive_losses": 0.35679930448532104, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.41407138109207153, |
|
"rewards/margins": 3.3448989391326904, |
|
"rewards/margins_max": 4.5948896408081055, |
|
"rewards/margins_min": 2.094907522201538, |
|
"rewards/margins_std": 1.7677549123764038, |
|
"rewards/rejected": -2.9308273792266846, |
|
"step": 1010 |
|
}, |
|
{ |
|
"dpo_losses": 0.09611718356609344, |
|
"epoch": 2.87, |
|
"grad_norm": 4.2164881541545896, |
|
"learning_rate": 2.7171617768147472e-08, |
|
"logits/chosen": -2.3242077827453613, |
|
"logits/rejected": -2.20931339263916, |
|
"logps/chosen": -273.04669189453125, |
|
"logps/rejected": -532.8912963867188, |
|
"loss": 0.2835, |
|
"positive_losses": 1.0188411474227905, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.38434693217277527, |
|
"rewards/margins": 3.382856845855713, |
|
"rewards/margins_max": 4.1625471115112305, |
|
"rewards/margins_min": 2.6031665802001953, |
|
"rewards/margins_std": 1.1026487350463867, |
|
"rewards/rejected": -2.9985098838806152, |
|
"step": 1020 |
|
}, |
|
{ |
|
"dpo_losses": 0.09702328592538834, |
|
"epoch": 2.9, |
|
"grad_norm": 37.444607526485036, |
|
"learning_rate": 1.6448943457189616e-08, |
|
"logits/chosen": -2.3770041465759277, |
|
"logits/rejected": -2.312561511993408, |
|
"logps/chosen": -323.7328186035156, |
|
"logps/rejected": -590.1715698242188, |
|
"loss": 0.1692, |
|
"positive_losses": 0.22979411482810974, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.4143516421318054, |
|
"rewards/margins": 3.550870418548584, |
|
"rewards/margins_max": 4.831820487976074, |
|
"rewards/margins_min": 2.269920825958252, |
|
"rewards/margins_std": 1.8115367889404297, |
|
"rewards/rejected": -3.136518955230713, |
|
"step": 1030 |
|
}, |
|
{ |
|
"dpo_losses": 0.05908944085240364, |
|
"epoch": 2.93, |
|
"grad_norm": 4.414870466706013, |
|
"learning_rate": 8.39683258841123e-09, |
|
"logits/chosen": -2.2308764457702637, |
|
"logits/rejected": -2.1003758907318115, |
|
"logps/chosen": -295.9768371582031, |
|
"logps/rejected": -529.7901611328125, |
|
"loss": 0.2377, |
|
"positive_losses": 3.635768175125122, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.5024327039718628, |
|
"rewards/margins": 3.5135796070098877, |
|
"rewards/margins_max": 4.2515668869018555, |
|
"rewards/margins_min": 2.7755913734436035, |
|
"rewards/margins_std": 1.0436723232269287, |
|
"rewards/rejected": -3.0111465454101562, |
|
"step": 1040 |
|
}, |
|
{ |
|
"dpo_losses": 0.13825824856758118, |
|
"epoch": 2.96, |
|
"grad_norm": 2.4504883804901993, |
|
"learning_rate": 3.0239435998430376e-09, |
|
"logits/chosen": -2.348167896270752, |
|
"logits/rejected": -2.233564615249634, |
|
"logps/chosen": -286.636962890625, |
|
"logps/rejected": -520.432861328125, |
|
"loss": 0.2494, |
|
"positive_losses": 3.874138593673706, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.4494766294956207, |
|
"rewards/margins": 3.075880527496338, |
|
"rewards/margins_max": 4.244940280914307, |
|
"rewards/margins_min": 1.9068210124969482, |
|
"rewards/margins_std": 1.6533000469207764, |
|
"rewards/rejected": -2.62640380859375, |
|
"step": 1050 |
|
}, |
|
{ |
|
"dpo_losses": 0.10627589374780655, |
|
"epoch": 2.99, |
|
"grad_norm": 2.8525382266880834, |
|
"learning_rate": 3.3605396115826695e-10, |
|
"logits/chosen": -2.2836787700653076, |
|
"logits/rejected": -2.183605432510376, |
|
"logps/chosen": -289.5533447265625, |
|
"logps/rejected": -472.84375, |
|
"loss": 0.3088, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3630369305610657, |
|
"rewards/margins": 2.99712872505188, |
|
"rewards/margins_max": 3.9373364448547363, |
|
"rewards/margins_min": 2.0569205284118652, |
|
"rewards/margins_std": 1.3296549320220947, |
|
"rewards/rejected": -2.63409161567688, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 1065, |
|
"total_flos": 0.0, |
|
"train_loss": 0.36563416943303856, |
|
"train_runtime": 9271.2095, |
|
"train_samples_per_second": 1.837, |
|
"train_steps_per_second": 0.115 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1065, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|