|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9974099364257123, |
|
"eval_steps": 50, |
|
"global_step": 353, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"chosen_logps": -92.0396499633789, |
|
"chosen_rewards": 0.0, |
|
"epoch": 0.0028255238992229807, |
|
"grad_norm": 12.203563280219937, |
|
"learning_rate": 2.7777777777777774e-08, |
|
"log_diff_policy": 2.422942638397217, |
|
"logits": -1.2872235774993896, |
|
"logp_accuracy": 0.625, |
|
"loss": 0.6931, |
|
"objective": 0.6931471824645996, |
|
"ranking_simple": 0.625, |
|
"rejected_logps": -94.46258544921875, |
|
"rejected_rewards": 0.0, |
|
"reward_accuracy": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"chosen_logps": -94.69133758544922, |
|
"chosen_rewards": -0.00179989542812109, |
|
"epoch": 0.014127619496114905, |
|
"grad_norm": 13.769130010396626, |
|
"learning_rate": 1.3888888888888888e-07, |
|
"log_diff_policy": -0.9685585498809814, |
|
"logits": -1.223134994506836, |
|
"logp_accuracy": 0.4739583432674408, |
|
"loss": 0.6927, |
|
"objective": 0.6933824419975281, |
|
"ranking_simple": 0.4739583432674408, |
|
"rejected_logps": -93.7227783203125, |
|
"rejected_rewards": -0.0013557692291215062, |
|
"reward_accuracy": 0.3697916567325592, |
|
"step": 5 |
|
}, |
|
{ |
|
"chosen_logps": -95.4830551147461, |
|
"chosen_rewards": -0.02674178034067154, |
|
"epoch": 0.02825523899222981, |
|
"grad_norm": 13.285605890484408, |
|
"learning_rate": 2.7777777777777776e-07, |
|
"log_diff_policy": -1.0804779529571533, |
|
"logits": -1.239374041557312, |
|
"logp_accuracy": 0.4833333194255829, |
|
"loss": 0.691, |
|
"objective": 0.6885988712310791, |
|
"ranking_simple": 0.4833333194255829, |
|
"rejected_logps": -94.40258026123047, |
|
"rejected_rewards": -0.03924858197569847, |
|
"reward_accuracy": 0.5416666865348816, |
|
"step": 10 |
|
}, |
|
{ |
|
"chosen_logps": -95.61571502685547, |
|
"chosen_rewards": -0.051721397787332535, |
|
"epoch": 0.042382858488344714, |
|
"grad_norm": 12.319792066922028, |
|
"learning_rate": 4.1666666666666667e-07, |
|
"log_diff_policy": -0.5638642907142639, |
|
"logits": -1.2099182605743408, |
|
"logp_accuracy": 0.5, |
|
"loss": 0.6872, |
|
"objective": 0.6906775236129761, |
|
"ranking_simple": 0.5, |
|
"rejected_logps": -95.0518569946289, |
|
"rejected_rewards": -0.06499442458152771, |
|
"reward_accuracy": 0.5375000238418579, |
|
"step": 15 |
|
}, |
|
{ |
|
"chosen_logps": -96.26466369628906, |
|
"chosen_rewards": 0.06014590337872505, |
|
"epoch": 0.05651047798445962, |
|
"grad_norm": 12.196449773003847, |
|
"learning_rate": 5.555555555555555e-07, |
|
"log_diff_policy": 0.15712101757526398, |
|
"logits": -1.3111374378204346, |
|
"logp_accuracy": 0.5166666507720947, |
|
"loss": 0.6904, |
|
"objective": 0.6839234828948975, |
|
"ranking_simple": 0.5166666507720947, |
|
"rejected_logps": -96.42178344726562, |
|
"rejected_rewards": 0.029283961281180382, |
|
"reward_accuracy": 0.5791666507720947, |
|
"step": 20 |
|
}, |
|
{ |
|
"chosen_logps": -89.93444061279297, |
|
"chosen_rewards": 0.19082187116146088, |
|
"epoch": 0.07063809748057452, |
|
"grad_norm": 11.658480489992849, |
|
"learning_rate": 6.944444444444444e-07, |
|
"log_diff_policy": 1.8696421384811401, |
|
"logits": -1.275890588760376, |
|
"logp_accuracy": 0.5333333611488342, |
|
"loss": 0.6882, |
|
"objective": 0.6797720789909363, |
|
"ranking_simple": 0.5333333611488342, |
|
"rejected_logps": -91.80408477783203, |
|
"rejected_rewards": 0.14768332242965698, |
|
"reward_accuracy": 0.5916666388511658, |
|
"step": 25 |
|
}, |
|
{ |
|
"chosen_logps": -91.81427764892578, |
|
"chosen_rewards": 0.14018863439559937, |
|
"epoch": 0.08476571697668943, |
|
"grad_norm": 15.037591193931753, |
|
"learning_rate": 8.333333333333333e-07, |
|
"log_diff_policy": 2.39349102973938, |
|
"logits": -1.212536334991455, |
|
"logp_accuracy": 0.5458333492279053, |
|
"loss": 0.6845, |
|
"objective": 0.6916062831878662, |
|
"ranking_simple": 0.5458333492279053, |
|
"rejected_logps": -94.20777130126953, |
|
"rejected_rewards": 0.11354698240756989, |
|
"reward_accuracy": 0.5458333492279053, |
|
"step": 30 |
|
}, |
|
{ |
|
"chosen_logps": -90.74636840820312, |
|
"chosen_rewards": 0.15737393498420715, |
|
"epoch": 0.09889333647280434, |
|
"grad_norm": 10.604933845962982, |
|
"learning_rate": 9.722222222222222e-07, |
|
"log_diff_policy": 1.6841083765029907, |
|
"logits": -1.174207329750061, |
|
"logp_accuracy": 0.512499988079071, |
|
"loss": 0.671, |
|
"objective": 0.6680408716201782, |
|
"ranking_simple": 0.512499988079071, |
|
"rejected_logps": -92.43048858642578, |
|
"rejected_rewards": 0.08380100876092911, |
|
"reward_accuracy": 0.612500011920929, |
|
"step": 35 |
|
}, |
|
{ |
|
"chosen_logps": -95.68059539794922, |
|
"chosen_rewards": 0.0697811022400856, |
|
"epoch": 0.11302095596891924, |
|
"grad_norm": 11.456756804174194, |
|
"learning_rate": 9.996071883688332e-07, |
|
"log_diff_policy": -0.9100133180618286, |
|
"logits": -1.1049649715423584, |
|
"logp_accuracy": 0.48750001192092896, |
|
"loss": 0.6785, |
|
"objective": 0.6945610642433167, |
|
"ranking_simple": 0.48750001192092896, |
|
"rejected_logps": -94.77057647705078, |
|
"rejected_rewards": 0.040437690913677216, |
|
"reward_accuracy": 0.5166666507720947, |
|
"step": 40 |
|
}, |
|
{ |
|
"chosen_logps": -93.884765625, |
|
"chosen_rewards": -0.09519442170858383, |
|
"epoch": 0.12714857546503414, |
|
"grad_norm": 12.792596919876136, |
|
"learning_rate": 9.980124488638773e-07, |
|
"log_diff_policy": 1.2741607427597046, |
|
"logits": -1.2097392082214355, |
|
"logp_accuracy": 0.512499988079071, |
|
"loss": 0.6877, |
|
"objective": 0.6832014918327332, |
|
"ranking_simple": 0.512499988079071, |
|
"rejected_logps": -95.158935546875, |
|
"rejected_rewards": -0.15457913279533386, |
|
"reward_accuracy": 0.5083333253860474, |
|
"step": 45 |
|
}, |
|
{ |
|
"chosen_logps": -96.72931671142578, |
|
"chosen_rewards": -0.19579552114009857, |
|
"epoch": 0.14127619496114904, |
|
"grad_norm": 11.519646507473169, |
|
"learning_rate": 9.95195142656885e-07, |
|
"log_diff_policy": 0.6534870862960815, |
|
"logits": -1.207014799118042, |
|
"logp_accuracy": 0.5208333134651184, |
|
"loss": 0.6657, |
|
"objective": 0.6546425223350525, |
|
"ranking_simple": 0.5208333134651184, |
|
"rejected_logps": -97.38280487060547, |
|
"rejected_rewards": -0.30787384510040283, |
|
"reward_accuracy": 0.5791666507720947, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.14127619496114904, |
|
"eval_chosen_logps": -96.49317932128906, |
|
"eval_chosen_rewards": -0.23376186192035675, |
|
"eval_log_diff_policy": 1.5223942995071411, |
|
"eval_logits": -1.105320930480957, |
|
"eval_logp_accuracy": 0.5193236470222473, |
|
"eval_loss": 0.6741412878036499, |
|
"eval_objective": 0.6712923049926758, |
|
"eval_ranking_simple": 0.5193236470222473, |
|
"eval_rejected_logps": -98.01555633544922, |
|
"eval_rejected_rewards": -0.3036104738712311, |
|
"eval_reward_accuracy": 0.5851449370384216, |
|
"eval_runtime": 591.2486, |
|
"eval_samples_per_second": 16.8, |
|
"eval_steps_per_second": 0.7, |
|
"step": 50 |
|
}, |
|
{ |
|
"chosen_logps": -95.2642593383789, |
|
"chosen_rewards": -0.2244856208562851, |
|
"epoch": 0.15540381445726395, |
|
"grad_norm": 11.816802501387212, |
|
"learning_rate": 9.91162185929904e-07, |
|
"log_diff_policy": 2.4890987873077393, |
|
"logits": -1.2382166385650635, |
|
"logp_accuracy": 0.5666666626930237, |
|
"loss": 0.6625, |
|
"objective": 0.6524732708930969, |
|
"ranking_simple": 0.5666666626930237, |
|
"rejected_logps": -97.75334930419922, |
|
"rejected_rewards": -0.3347455561161041, |
|
"reward_accuracy": 0.6541666388511658, |
|
"step": 55 |
|
}, |
|
{ |
|
"chosen_logps": -96.38868713378906, |
|
"chosen_rewards": -0.31856828927993774, |
|
"epoch": 0.16953143395337886, |
|
"grad_norm": 15.028016832871401, |
|
"learning_rate": 9.859234791555355e-07, |
|
"log_diff_policy": 2.37857985496521, |
|
"logits": -1.270559549331665, |
|
"logp_accuracy": 0.5041666626930237, |
|
"loss": 0.658, |
|
"objective": 0.6463251709938049, |
|
"ranking_simple": 0.5041666626930237, |
|
"rejected_logps": -98.76726531982422, |
|
"rejected_rewards": -0.4456353485584259, |
|
"reward_accuracy": 0.625, |
|
"step": 60 |
|
}, |
|
{ |
|
"chosen_logps": -96.91040802001953, |
|
"chosen_rewards": -0.38393592834472656, |
|
"epoch": 0.18365905344949376, |
|
"grad_norm": 12.842079906907882, |
|
"learning_rate": 9.794918827923456e-07, |
|
"log_diff_policy": 2.754225730895996, |
|
"logits": -1.2090051174163818, |
|
"logp_accuracy": 0.5333333611488342, |
|
"loss": 0.6552, |
|
"objective": 0.659140944480896, |
|
"ranking_simple": 0.5333333611488342, |
|
"rejected_logps": -99.66463470458984, |
|
"rejected_rewards": -0.5064182877540588, |
|
"reward_accuracy": 0.5874999761581421, |
|
"step": 65 |
|
}, |
|
{ |
|
"chosen_logps": -99.7697525024414, |
|
"chosen_rewards": -0.36345040798187256, |
|
"epoch": 0.19778667294560867, |
|
"grad_norm": 11.642459528097591, |
|
"learning_rate": 9.718831857138307e-07, |
|
"log_diff_policy": -0.3823479115962982, |
|
"logits": -1.2754501104354858, |
|
"logp_accuracy": 0.5166666507720947, |
|
"loss": 0.652, |
|
"objective": 0.6486606597900391, |
|
"ranking_simple": 0.5166666507720947, |
|
"rejected_logps": -99.38741302490234, |
|
"rejected_rewards": -0.5056981444358826, |
|
"reward_accuracy": 0.6541666388511658, |
|
"step": 70 |
|
}, |
|
{ |
|
"chosen_logps": -98.25231170654297, |
|
"chosen_rewards": -0.44365358352661133, |
|
"epoch": 0.21191429244172358, |
|
"grad_norm": 13.778053208316393, |
|
"learning_rate": 9.631160664484398e-07, |
|
"log_diff_policy": 2.420900821685791, |
|
"logits": -1.3257941007614136, |
|
"logp_accuracy": 0.5416666865348816, |
|
"loss": 0.645, |
|
"objective": 0.6380378603935242, |
|
"ranking_simple": 0.5416666865348816, |
|
"rejected_logps": -100.67321014404297, |
|
"rejected_rewards": -0.6107330918312073, |
|
"reward_accuracy": 0.6416666507720947, |
|
"step": 75 |
|
}, |
|
{ |
|
"chosen_logps": -102.11405181884766, |
|
"chosen_rewards": -0.5838820338249207, |
|
"epoch": 0.22604191193783849, |
|
"grad_norm": 11.54119422499371, |
|
"learning_rate": 9.532120473258074e-07, |
|
"log_diff_policy": 0.3980850577354431, |
|
"logits": -1.410662055015564, |
|
"logp_accuracy": 0.5291666388511658, |
|
"loss": 0.6505, |
|
"objective": 0.6602904796600342, |
|
"ranking_simple": 0.5291666388511658, |
|
"rejected_logps": -102.51213836669922, |
|
"rejected_rewards": -0.716762125492096, |
|
"reward_accuracy": 0.612500011920929, |
|
"step": 80 |
|
}, |
|
{ |
|
"chosen_logps": -99.7666015625, |
|
"chosen_rewards": -0.6009302735328674, |
|
"epoch": 0.2401695314339534, |
|
"grad_norm": 12.196843414681535, |
|
"learning_rate": 9.421954416417624e-07, |
|
"log_diff_policy": 3.4806392192840576, |
|
"logits": -1.403199315071106, |
|
"logp_accuracy": 0.550000011920929, |
|
"loss": 0.6356, |
|
"objective": 0.6589958071708679, |
|
"ranking_simple": 0.550000011920929, |
|
"rejected_logps": -103.24723815917969, |
|
"rejected_rewards": -0.7273377776145935, |
|
"reward_accuracy": 0.6041666865348816, |
|
"step": 85 |
|
}, |
|
{ |
|
"chosen_logps": -99.78125, |
|
"chosen_rewards": -0.6102154850959778, |
|
"epoch": 0.25429715093006827, |
|
"grad_norm": 11.884683628001579, |
|
"learning_rate": 9.300932939718157e-07, |
|
"log_diff_policy": 4.280076026916504, |
|
"logits": -1.3959789276123047, |
|
"logp_accuracy": 0.5708333253860474, |
|
"loss": 0.6393, |
|
"objective": 0.6565433144569397, |
|
"ranking_simple": 0.5708333253860474, |
|
"rejected_logps": -104.06131744384766, |
|
"rejected_rewards": -0.732826292514801, |
|
"reward_accuracy": 0.637499988079071, |
|
"step": 90 |
|
}, |
|
{ |
|
"chosen_logps": -100.35063171386719, |
|
"chosen_rewards": -0.5101503133773804, |
|
"epoch": 0.2684247704261832, |
|
"grad_norm": 11.580908746020523, |
|
"learning_rate": 9.169353137796533e-07, |
|
"log_diff_policy": 2.76478910446167, |
|
"logits": -1.453112006187439, |
|
"logp_accuracy": 0.5541666746139526, |
|
"loss": 0.6437, |
|
"objective": 0.6427834033966064, |
|
"ranking_simple": 0.5541666746139526, |
|
"rejected_logps": -103.11542510986328, |
|
"rejected_rewards": -0.6783679127693176, |
|
"reward_accuracy": 0.6499999761581421, |
|
"step": 95 |
|
}, |
|
{ |
|
"chosen_logps": -98.85354614257812, |
|
"chosen_rewards": -0.5268033146858215, |
|
"epoch": 0.2825523899222981, |
|
"grad_norm": 13.403248828603669, |
|
"learning_rate": 9.027538024836141e-07, |
|
"log_diff_policy": 3.670335531234741, |
|
"logits": -1.3224259614944458, |
|
"logp_accuracy": 0.5791666507720947, |
|
"loss": 0.6364, |
|
"objective": 0.6203222274780273, |
|
"ranking_simple": 0.5791666507720947, |
|
"rejected_logps": -102.5239028930664, |
|
"rejected_rewards": -0.7540122270584106, |
|
"reward_accuracy": 0.6333333253860474, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.2825523899222981, |
|
"eval_chosen_logps": -100.81198120117188, |
|
"eval_chosen_rewards": -0.6656423211097717, |
|
"eval_log_diff_policy": 2.193570375442505, |
|
"eval_logits": -1.2277085781097412, |
|
"eval_logp_accuracy": 0.5404589176177979, |
|
"eval_loss": 0.6705255508422852, |
|
"eval_objective": 0.6646167635917664, |
|
"eval_ranking_simple": 0.5404589176177979, |
|
"eval_rejected_logps": -103.00554656982422, |
|
"eval_rejected_rewards": -0.8026086091995239, |
|
"eval_reward_accuracy": 0.5972222089767456, |
|
"eval_runtime": 581.9012, |
|
"eval_samples_per_second": 17.07, |
|
"eval_steps_per_second": 0.711, |
|
"step": 100 |
|
}, |
|
{ |
|
"chosen_logps": -102.4442367553711, |
|
"chosen_rewards": -0.6342382431030273, |
|
"epoch": 0.296680009418413, |
|
"grad_norm": 13.882342204535156, |
|
"learning_rate": 8.875835741602029e-07, |
|
"log_diff_policy": 1.7450906038284302, |
|
"logits": -1.2776763439178467, |
|
"logp_accuracy": 0.5375000238418579, |
|
"loss": 0.6375, |
|
"objective": 0.638464093208313, |
|
"ranking_simple": 0.5375000238418579, |
|
"rejected_logps": -104.18933868408203, |
|
"rejected_rewards": -0.8383311629295349, |
|
"reward_accuracy": 0.6291666626930237, |
|
"step": 105 |
|
}, |
|
{ |
|
"chosen_logps": -98.33605194091797, |
|
"chosen_rewards": -0.4787895083427429, |
|
"epoch": 0.3108076289145279, |
|
"grad_norm": 11.129672527849843, |
|
"learning_rate": 8.714618700792975e-07, |
|
"log_diff_policy": 2.9387338161468506, |
|
"logits": -1.260119915008545, |
|
"logp_accuracy": 0.5333333611488342, |
|
"loss": 0.639, |
|
"objective": 0.6362030506134033, |
|
"ranking_simple": 0.5333333611488342, |
|
"rejected_logps": -101.2747802734375, |
|
"rejected_rewards": -0.6714141964912415, |
|
"reward_accuracy": 0.6458333134651184, |
|
"step": 110 |
|
}, |
|
{ |
|
"chosen_logps": -98.392822265625, |
|
"chosen_rewards": -0.3023277521133423, |
|
"epoch": 0.3249352484106428, |
|
"grad_norm": 11.136874519373743, |
|
"learning_rate": 8.544282672808578e-07, |
|
"log_diff_policy": 1.8658004999160767, |
|
"logits": -1.2985230684280396, |
|
"logp_accuracy": 0.5416666865348816, |
|
"loss": 0.6397, |
|
"objective": 0.6552284359931946, |
|
"ranking_simple": 0.5416666865348816, |
|
"rejected_logps": -100.25862884521484, |
|
"rejected_rewards": -0.44404318928718567, |
|
"reward_accuracy": 0.637499988079071, |
|
"step": 115 |
|
}, |
|
{ |
|
"chosen_logps": -97.15535736083984, |
|
"chosen_rewards": -0.30868878960609436, |
|
"epoch": 0.3390628679067577, |
|
"grad_norm": 10.158587216555272, |
|
"learning_rate": 8.365245814175743e-07, |
|
"log_diff_policy": 1.9471272230148315, |
|
"logits": -1.3143467903137207, |
|
"logp_accuracy": 0.5166666507720947, |
|
"loss": 0.6249, |
|
"objective": 0.6263554096221924, |
|
"ranking_simple": 0.5166666507720947, |
|
"rejected_logps": -99.10248565673828, |
|
"rejected_rewards": -0.5047985911369324, |
|
"reward_accuracy": 0.675000011920929, |
|
"step": 120 |
|
}, |
|
{ |
|
"chosen_logps": -97.54443359375, |
|
"chosen_rewards": -0.41942229866981506, |
|
"epoch": 0.3531904874028726, |
|
"grad_norm": 11.091561717382723, |
|
"learning_rate": 8.17794764101962e-07, |
|
"log_diff_policy": 2.919205665588379, |
|
"logits": -1.3511691093444824, |
|
"logp_accuracy": 0.5625, |
|
"loss": 0.6224, |
|
"objective": 0.6153813004493713, |
|
"ranking_simple": 0.5625, |
|
"rejected_logps": -100.46363830566406, |
|
"rejected_rewards": -0.6644426584243774, |
|
"reward_accuracy": 0.6291666626930237, |
|
"step": 125 |
|
}, |
|
{ |
|
"chosen_logps": -98.49415588378906, |
|
"chosen_rewards": -0.6381217241287231, |
|
"epoch": 0.36731810689898753, |
|
"grad_norm": 13.19781497022718, |
|
"learning_rate": 7.982847950099055e-07, |
|
"log_diff_policy": 2.8969905376434326, |
|
"logits": -1.2903234958648682, |
|
"logp_accuracy": 0.5625, |
|
"loss": 0.6365, |
|
"objective": 0.6469713449478149, |
|
"ranking_simple": 0.5625, |
|
"rejected_logps": -101.39115142822266, |
|
"rejected_rewards": -0.8170153498649597, |
|
"reward_accuracy": 0.6291666626930237, |
|
"step": 130 |
|
}, |
|
{ |
|
"chosen_logps": -100.84342193603516, |
|
"chosen_rewards": -0.6817469000816345, |
|
"epoch": 0.38144572639510244, |
|
"grad_norm": 12.642228781148605, |
|
"learning_rate": 7.780425690055274e-07, |
|
"log_diff_policy": 2.6813039779663086, |
|
"logits": -1.3165868520736694, |
|
"logp_accuracy": 0.5166666507720947, |
|
"loss": 0.6271, |
|
"objective": 0.6345203518867493, |
|
"ranking_simple": 0.5166666507720947, |
|
"rejected_logps": -103.52472686767578, |
|
"rejected_rewards": -0.9179552793502808, |
|
"reward_accuracy": 0.5958333611488342, |
|
"step": 135 |
|
}, |
|
{ |
|
"chosen_logps": -100.93543243408203, |
|
"chosen_rewards": -0.5448787808418274, |
|
"epoch": 0.39557334589121734, |
|
"grad_norm": 11.726885643648867, |
|
"learning_rate": 7.571177785644766e-07, |
|
"log_diff_policy": -0.2563031017780304, |
|
"logits": -1.3719837665557861, |
|
"logp_accuracy": 0.5208333134651184, |
|
"loss": 0.6264, |
|
"objective": 0.6360562443733215, |
|
"ranking_simple": 0.5208333134651184, |
|
"rejected_logps": -100.67912292480469, |
|
"rejected_rewards": -0.7725273370742798, |
|
"reward_accuracy": 0.6666666865348816, |
|
"step": 140 |
|
}, |
|
{ |
|
"chosen_logps": -99.08710479736328, |
|
"chosen_rewards": -0.442624032497406, |
|
"epoch": 0.40970096538733225, |
|
"grad_norm": 12.322663872224792, |
|
"learning_rate": 7.35561791784275e-07, |
|
"log_diff_policy": 1.6613519191741943, |
|
"logits": -1.4259085655212402, |
|
"logp_accuracy": 0.5041666626930237, |
|
"loss": 0.6134, |
|
"objective": 0.6079509854316711, |
|
"ranking_simple": 0.5041666626930237, |
|
"rejected_logps": -100.74845123291016, |
|
"rejected_rewards": -0.7379883527755737, |
|
"reward_accuracy": 0.6708333492279053, |
|
"step": 145 |
|
}, |
|
{ |
|
"chosen_logps": -97.87836456298828, |
|
"chosen_rewards": -0.4350048005580902, |
|
"epoch": 0.42382858488344716, |
|
"grad_norm": 11.246546091497954, |
|
"learning_rate": 7.134275262811934e-07, |
|
"log_diff_policy": 0.15529422461986542, |
|
"logits": -1.3733505010604858, |
|
"logp_accuracy": 0.5083333253860474, |
|
"loss": 0.6244, |
|
"objective": 0.6462458968162537, |
|
"ranking_simple": 0.5083333253860474, |
|
"rejected_logps": -98.03366088867188, |
|
"rejected_rewards": -0.6196256279945374, |
|
"reward_accuracy": 0.6333333253860474, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.42382858488344716, |
|
"eval_chosen_logps": -98.61821746826172, |
|
"eval_chosen_rewards": -0.4462670087814331, |
|
"eval_log_diff_policy": 2.290201187133789, |
|
"eval_logits": -1.3178818225860596, |
|
"eval_logp_accuracy": 0.5368357300758362, |
|
"eval_loss": 0.6577388048171997, |
|
"eval_objective": 0.6551039814949036, |
|
"eval_ranking_simple": 0.5368357300758362, |
|
"eval_rejected_logps": -100.90841674804688, |
|
"eval_rejected_rewards": -0.5928963422775269, |
|
"eval_reward_accuracy": 0.6086956262588501, |
|
"eval_runtime": 578.9907, |
|
"eval_samples_per_second": 17.156, |
|
"eval_steps_per_second": 0.715, |
|
"step": 150 |
|
}, |
|
{ |
|
"chosen_logps": -96.16273498535156, |
|
"chosen_rewards": -0.3375628888607025, |
|
"epoch": 0.43795620437956206, |
|
"grad_norm": 10.921897162913947, |
|
"learning_rate": 6.907693192832262e-07, |
|
"log_diff_policy": 3.625213146209717, |
|
"logits": -1.4034603834152222, |
|
"logp_accuracy": 0.5458333492279053, |
|
"loss": 0.6199, |
|
"objective": 0.6075600981712341, |
|
"ranking_simple": 0.5458333492279053, |
|
"rejected_logps": -99.7879409790039, |
|
"rejected_rewards": -0.5962837934494019, |
|
"reward_accuracy": 0.6916666626930237, |
|
"step": 155 |
|
}, |
|
{ |
|
"chosen_logps": -95.9808349609375, |
|
"chosen_rewards": -0.23865890502929688, |
|
"epoch": 0.45208382387567697, |
|
"grad_norm": 10.607229520458072, |
|
"learning_rate": 6.676427942380741e-07, |
|
"log_diff_policy": 3.0014472007751465, |
|
"logits": -1.33468759059906, |
|
"logp_accuracy": 0.512499988079071, |
|
"loss": 0.6085, |
|
"objective": 0.6010857820510864, |
|
"ranking_simple": 0.512499988079071, |
|
"rejected_logps": -98.9822769165039, |
|
"rejected_rewards": -0.5223438739776611, |
|
"reward_accuracy": 0.6458333134651184, |
|
"step": 160 |
|
}, |
|
{ |
|
"chosen_logps": -99.7357406616211, |
|
"chosen_rewards": -0.28111231327056885, |
|
"epoch": 0.4662114433717919, |
|
"grad_norm": 13.093121089444578, |
|
"learning_rate": 6.441047242635946e-07, |
|
"log_diff_policy": 2.3945486545562744, |
|
"logits": -1.4235426187515259, |
|
"logp_accuracy": 0.5208333134651184, |
|
"loss": 0.608, |
|
"objective": 0.6093403100967407, |
|
"ranking_simple": 0.5208333134651184, |
|
"rejected_logps": -102.13028717041016, |
|
"rejected_rewards": -0.570986807346344, |
|
"reward_accuracy": 0.6666666865348816, |
|
"step": 165 |
|
}, |
|
{ |
|
"chosen_logps": -97.75370788574219, |
|
"chosen_rewards": -0.318727046251297, |
|
"epoch": 0.4803390628679068, |
|
"grad_norm": 11.412190377149692, |
|
"learning_rate": 6.20212892775939e-07, |
|
"log_diff_policy": 1.4041647911071777, |
|
"logits": -1.3291094303131104, |
|
"logp_accuracy": 0.5166666507720947, |
|
"loss": 0.6015, |
|
"objective": 0.6086380481719971, |
|
"ranking_simple": 0.5166666507720947, |
|
"rejected_logps": -99.15787506103516, |
|
"rejected_rewards": -0.5835815072059631, |
|
"reward_accuracy": 0.6791666746139526, |
|
"step": 170 |
|
}, |
|
{ |
|
"chosen_logps": -99.83616638183594, |
|
"chosen_rewards": -0.4011620283126831, |
|
"epoch": 0.49446668236402164, |
|
"grad_norm": 12.49488940662025, |
|
"learning_rate": 5.960259516375133e-07, |
|
"log_diff_policy": 4.319859027862549, |
|
"logits": -1.42414128780365, |
|
"logp_accuracy": 0.5791666507720947, |
|
"loss": 0.6082, |
|
"objective": 0.5974529385566711, |
|
"ranking_simple": 0.5791666507720947, |
|
"rejected_logps": -104.1560287475586, |
|
"rejected_rewards": -0.6947523951530457, |
|
"reward_accuracy": 0.6916666626930237, |
|
"step": 175 |
|
}, |
|
{ |
|
"chosen_logps": -99.31816864013672, |
|
"chosen_rewards": -0.44157731533050537, |
|
"epoch": 0.5085943018601365, |
|
"grad_norm": 14.016856666647364, |
|
"learning_rate": 5.716032771730007e-07, |
|
"log_diff_policy": 2.4290826320648193, |
|
"logits": -1.3892205953598022, |
|
"logp_accuracy": 0.5833333134651184, |
|
"loss": 0.6092, |
|
"objective": 0.59227454662323, |
|
"ranking_simple": 0.5833333134651184, |
|
"rejected_logps": -101.74726104736328, |
|
"rejected_rewards": -0.7656379342079163, |
|
"reward_accuracy": 0.7124999761581421, |
|
"step": 180 |
|
}, |
|
{ |
|
"chosen_logps": -99.5169906616211, |
|
"chosen_rewards": -0.39382320642471313, |
|
"epoch": 0.5227219213562515, |
|
"grad_norm": 12.18994561625345, |
|
"learning_rate": 5.470048244069055e-07, |
|
"log_diff_policy": 3.986778974533081, |
|
"logits": -1.364132285118103, |
|
"logp_accuracy": 0.5625, |
|
"loss": 0.6046, |
|
"objective": 0.5779610276222229, |
|
"ranking_simple": 0.5625, |
|
"rejected_logps": -103.50377655029297, |
|
"rejected_rewards": -0.7597634792327881, |
|
"reward_accuracy": 0.7250000238418579, |
|
"step": 185 |
|
}, |
|
{ |
|
"chosen_logps": -100.92544555664062, |
|
"chosen_rewards": -0.47165414690971375, |
|
"epoch": 0.5368495408523664, |
|
"grad_norm": 13.57415809299262, |
|
"learning_rate": 5.222909798804514e-07, |
|
"log_diff_policy": 1.5828216075897217, |
|
"logits": -1.4004924297332764, |
|
"logp_accuracy": 0.5583333373069763, |
|
"loss": 0.6109, |
|
"objective": 0.6085138916969299, |
|
"ranking_simple": 0.5583333373069763, |
|
"rejected_logps": -102.50827026367188, |
|
"rejected_rewards": -0.7680691480636597, |
|
"reward_accuracy": 0.6333333253860474, |
|
"step": 190 |
|
}, |
|
{ |
|
"chosen_logps": -98.55032348632812, |
|
"chosen_rewards": -0.42258918285369873, |
|
"epoch": 0.5509771603484813, |
|
"grad_norm": 12.118293622606005, |
|
"learning_rate": 4.97522413409155e-07, |
|
"log_diff_policy": 2.5970427989959717, |
|
"logits": -1.3773627281188965, |
|
"logp_accuracy": 0.5458333492279053, |
|
"loss": 0.5975, |
|
"objective": 0.578415036201477, |
|
"ranking_simple": 0.5458333492279053, |
|
"rejected_logps": -101.1473617553711, |
|
"rejected_rewards": -0.7796388864517212, |
|
"reward_accuracy": 0.7291666865348816, |
|
"step": 195 |
|
}, |
|
{ |
|
"chosen_logps": -99.49516296386719, |
|
"chosen_rewards": -0.38766908645629883, |
|
"epoch": 0.5651047798445962, |
|
"grad_norm": 11.832876951855871, |
|
"learning_rate": 4.7275992914498865e-07, |
|
"log_diff_policy": 0.977001428604126, |
|
"logits": -1.3331760168075562, |
|
"logp_accuracy": 0.5083333253860474, |
|
"loss": 0.5938, |
|
"objective": 0.6072806119918823, |
|
"ranking_simple": 0.5083333253860474, |
|
"rejected_logps": -100.47218322753906, |
|
"rejected_rewards": -0.6541637182235718, |
|
"reward_accuracy": 0.637499988079071, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.5651047798445962, |
|
"eval_chosen_logps": -99.37232208251953, |
|
"eval_chosen_rewards": -0.5216771960258484, |
|
"eval_log_diff_policy": 2.475198745727539, |
|
"eval_logits": -1.2857621908187866, |
|
"eval_logp_accuracy": 0.5362318754196167, |
|
"eval_loss": 0.6590211391448975, |
|
"eval_objective": 0.6558353304862976, |
|
"eval_ranking_simple": 0.5362318754196167, |
|
"eval_rejected_logps": -101.84752655029297, |
|
"eval_rejected_rewards": -0.6868062615394592, |
|
"eval_reward_accuracy": 0.6159420013427734, |
|
"eval_runtime": 585.2119, |
|
"eval_samples_per_second": 16.973, |
|
"eval_steps_per_second": 0.707, |
|
"step": 200 |
|
}, |
|
{ |
|
"chosen_logps": -96.24632263183594, |
|
"chosen_rewards": -0.3795066773891449, |
|
"epoch": 0.5792323993407111, |
|
"grad_norm": 11.462490179104472, |
|
"learning_rate": 4.4806431630876436e-07, |
|
"log_diff_policy": 4.63961124420166, |
|
"logits": -1.266701102256775, |
|
"logp_accuracy": 0.550000011920929, |
|
"loss": 0.6066, |
|
"objective": 0.6177704334259033, |
|
"ranking_simple": 0.550000011920929, |
|
"rejected_logps": -100.88591766357422, |
|
"rejected_rewards": -0.6309120655059814, |
|
"reward_accuracy": 0.637499988079071, |
|
"step": 205 |
|
}, |
|
{ |
|
"chosen_logps": -100.29016876220703, |
|
"chosen_rewards": -0.4374944269657135, |
|
"epoch": 0.593360018836826, |
|
"grad_norm": 11.468388905148808, |
|
"learning_rate": 4.234961999591705e-07, |
|
"log_diff_policy": 5.617704391479492, |
|
"logits": -1.4379286766052246, |
|
"logp_accuracy": 0.6083333492279053, |
|
"loss": 0.6018, |
|
"objective": 0.60161954164505, |
|
"ranking_simple": 0.6083333492279053, |
|
"rejected_logps": -105.90787506103516, |
|
"rejected_rewards": -0.7697920203208923, |
|
"reward_accuracy": 0.6583333611488342, |
|
"step": 210 |
|
}, |
|
{ |
|
"chosen_logps": -98.36739349365234, |
|
"chosen_rewards": -0.47102805972099304, |
|
"epoch": 0.607487638332941, |
|
"grad_norm": 11.544777522117554, |
|
"learning_rate": 3.9911589216480955e-07, |
|
"log_diff_policy": 1.489052176475525, |
|
"logits": -1.4814375638961792, |
|
"logp_accuracy": 0.5041666626930237, |
|
"loss": 0.6048, |
|
"objective": 0.6275675296783447, |
|
"ranking_simple": 0.5041666626930237, |
|
"rejected_logps": -99.8564453125, |
|
"rejected_rewards": -0.7114861011505127, |
|
"reward_accuracy": 0.675000011920929, |
|
"step": 215 |
|
}, |
|
{ |
|
"chosen_logps": -98.07539367675781, |
|
"chosen_rewards": -0.40747135877609253, |
|
"epoch": 0.6216152578290558, |
|
"grad_norm": 11.7772812850058, |
|
"learning_rate": 3.7498324394459245e-07, |
|
"log_diff_policy": 3.3520638942718506, |
|
"logits": -1.4648287296295166, |
|
"logp_accuracy": 0.550000011920929, |
|
"loss": 0.5986, |
|
"objective": 0.5826771855354309, |
|
"ranking_simple": 0.550000011920929, |
|
"rejected_logps": -101.42745971679688, |
|
"rejected_rewards": -0.748247504234314, |
|
"reward_accuracy": 0.6875, |
|
"step": 220 |
|
}, |
|
{ |
|
"chosen_logps": -99.81163787841797, |
|
"chosen_rewards": -0.42688456177711487, |
|
"epoch": 0.6357428773251708, |
|
"grad_norm": 11.823543045006824, |
|
"learning_rate": 3.511574983399599e-07, |
|
"log_diff_policy": 3.882382392883301, |
|
"logits": -1.356088399887085, |
|
"logp_accuracy": 0.5916666388511658, |
|
"loss": 0.5795, |
|
"objective": 0.5695621967315674, |
|
"ranking_simple": 0.5916666388511658, |
|
"rejected_logps": -103.69401550292969, |
|
"rejected_rewards": -0.8101120591163635, |
|
"reward_accuracy": 0.7041666507720947, |
|
"step": 225 |
|
}, |
|
{ |
|
"chosen_logps": -98.75402069091797, |
|
"chosen_rewards": -0.5289642214775085, |
|
"epoch": 0.6498704968212856, |
|
"grad_norm": 12.384550673986663, |
|
"learning_rate": 3.276971449796223e-07, |
|
"log_diff_policy": 3.189291477203369, |
|
"logits": -1.4445135593414307, |
|
"logp_accuracy": 0.550000011920929, |
|
"loss": 0.6056, |
|
"objective": 0.6145560145378113, |
|
"ranking_simple": 0.550000011920929, |
|
"rejected_logps": -101.94329071044922, |
|
"rejected_rewards": -0.8118120431900024, |
|
"reward_accuracy": 0.6625000238418579, |
|
"step": 230 |
|
}, |
|
{ |
|
"chosen_logps": -100.55665588378906, |
|
"chosen_rewards": -0.4955964684486389, |
|
"epoch": 0.6639981163174005, |
|
"grad_norm": 11.935195296010395, |
|
"learning_rate": 3.046597764938481e-07, |
|
"log_diff_policy": 5.328332424163818, |
|
"logits": -1.4324222803115845, |
|
"logp_accuracy": 0.6083333492279053, |
|
"loss": 0.5993, |
|
"objective": 0.5901253819465637, |
|
"ranking_simple": 0.6083333492279053, |
|
"rejected_logps": -105.88497924804688, |
|
"rejected_rewards": -0.821084201335907, |
|
"reward_accuracy": 0.6958333253860474, |
|
"step": 235 |
|
}, |
|
{ |
|
"chosen_logps": -98.99813079833984, |
|
"chosen_rewards": -0.4493686258792877, |
|
"epoch": 0.6781257358135154, |
|
"grad_norm": 14.08286875676345, |
|
"learning_rate": 2.8210194713078404e-07, |
|
"log_diff_policy": 2.7355611324310303, |
|
"logits": -1.379309892654419, |
|
"logp_accuracy": 0.5708333253860474, |
|
"loss": 0.6072, |
|
"objective": 0.6106851100921631, |
|
"ranking_simple": 0.5708333253860474, |
|
"rejected_logps": -101.73368072509766, |
|
"rejected_rewards": -0.7426342368125916, |
|
"reward_accuracy": 0.6708333492279053, |
|
"step": 240 |
|
}, |
|
{ |
|
"chosen_logps": -98.74683380126953, |
|
"chosen_rewards": -0.36279481649398804, |
|
"epoch": 0.6922533553096303, |
|
"grad_norm": 11.505971149373092, |
|
"learning_rate": 2.600790339218926e-07, |
|
"log_diff_policy": 4.0673909187316895, |
|
"logits": -1.4577367305755615, |
|
"logp_accuracy": 0.5625, |
|
"loss": 0.5975, |
|
"objective": 0.5751992464065552, |
|
"ranking_simple": 0.5625, |
|
"rejected_logps": -102.8142318725586, |
|
"rejected_rewards": -0.728181779384613, |
|
"reward_accuracy": 0.7250000238418579, |
|
"step": 245 |
|
}, |
|
{ |
|
"chosen_logps": -97.71234893798828, |
|
"chosen_rewards": -0.34638115763664246, |
|
"epoch": 0.7063809748057452, |
|
"grad_norm": 11.594437077167564, |
|
"learning_rate": 2.3864510073732915e-07, |
|
"log_diff_policy": 2.69929575920105, |
|
"logits": -1.3644834756851196, |
|
"logp_accuracy": 0.5458333492279053, |
|
"loss": 0.5876, |
|
"objective": 0.5611613988876343, |
|
"ranking_simple": 0.5458333492279053, |
|
"rejected_logps": -100.41165161132812, |
|
"rejected_rewards": -0.7614852786064148, |
|
"reward_accuracy": 0.7083333134651184, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.7063809748057452, |
|
"eval_chosen_logps": -99.82035827636719, |
|
"eval_chosen_rewards": -0.566480278968811, |
|
"eval_log_diff_policy": 2.69968581199646, |
|
"eval_logits": -1.3214609622955322, |
|
"eval_logp_accuracy": 0.5446860194206238, |
|
"eval_loss": 0.6542993187904358, |
|
"eval_objective": 0.6504490971565247, |
|
"eval_ranking_simple": 0.5446860194206238, |
|
"eval_rejected_logps": -102.5200424194336, |
|
"eval_rejected_rewards": -0.7540581226348877, |
|
"eval_reward_accuracy": 0.6171497702598572, |
|
"eval_runtime": 580.2016, |
|
"eval_samples_per_second": 17.12, |
|
"eval_steps_per_second": 0.714, |
|
"step": 250 |
|
}, |
|
{ |
|
"chosen_logps": -98.32154846191406, |
|
"chosen_rewards": -0.4018653333187103, |
|
"epoch": 0.7205085943018601, |
|
"grad_norm": 12.212780646567717, |
|
"learning_rate": 2.1785276556498678e-07, |
|
"log_diff_policy": 4.165632724761963, |
|
"logits": -1.4318310022354126, |
|
"logp_accuracy": 0.574999988079071, |
|
"loss": 0.5952, |
|
"objective": 0.6162487268447876, |
|
"ranking_simple": 0.574999988079071, |
|
"rejected_logps": -102.4871826171875, |
|
"rejected_rewards": -0.6852520108222961, |
|
"reward_accuracy": 0.6499999761581421, |
|
"step": 255 |
|
}, |
|
{ |
|
"chosen_logps": -98.31494140625, |
|
"chosen_rewards": -0.39665502309799194, |
|
"epoch": 0.7346362137979751, |
|
"grad_norm": 12.485927390030215, |
|
"learning_rate": 1.9775307133902806e-07, |
|
"log_diff_policy": 2.1429994106292725, |
|
"logits": -1.3612717390060425, |
|
"logp_accuracy": 0.5708333253860474, |
|
"loss": 0.605, |
|
"objective": 0.6075990796089172, |
|
"ranking_simple": 0.5708333253860474, |
|
"rejected_logps": -100.45793151855469, |
|
"rejected_rewards": -0.6959177851676941, |
|
"reward_accuracy": 0.6416666507720947, |
|
"step": 260 |
|
}, |
|
{ |
|
"chosen_logps": -96.93352508544922, |
|
"chosen_rewards": -0.4520092010498047, |
|
"epoch": 0.7487638332940899, |
|
"grad_norm": 12.390709218829844, |
|
"learning_rate": 1.783953606350005e-07, |
|
"log_diff_policy": 2.5331625938415527, |
|
"logits": -1.3640118837356567, |
|
"logp_accuracy": 0.5416666865348816, |
|
"loss": 0.5989, |
|
"objective": 0.6267301440238953, |
|
"ranking_simple": 0.5416666865348816, |
|
"rejected_logps": -99.46669006347656, |
|
"rejected_rewards": -0.7122800350189209, |
|
"reward_accuracy": 0.6416666507720947, |
|
"step": 265 |
|
}, |
|
{ |
|
"chosen_logps": -99.09416961669922, |
|
"chosen_rewards": -0.469031423330307, |
|
"epoch": 0.7628914527902049, |
|
"grad_norm": 13.216199579795669, |
|
"learning_rate": 1.5982715453915079e-07, |
|
"log_diff_policy": 3.8577356338500977, |
|
"logits": -1.4884754419326782, |
|
"logp_accuracy": 0.5874999761581421, |
|
"loss": 0.5841, |
|
"objective": 0.6081915497779846, |
|
"ranking_simple": 0.5874999761581421, |
|
"rejected_logps": -102.95191192626953, |
|
"rejected_rewards": -0.7843472957611084, |
|
"reward_accuracy": 0.6791666746139526, |
|
"step": 270 |
|
}, |
|
{ |
|
"chosen_logps": -100.04994201660156, |
|
"chosen_rewards": -0.4179188311100006, |
|
"epoch": 0.7770190722863197, |
|
"grad_norm": 11.947579077894389, |
|
"learning_rate": 1.4209403598929708e-07, |
|
"log_diff_policy": 3.1525373458862305, |
|
"logits": -1.424578309059143, |
|
"logp_accuracy": 0.5291666388511658, |
|
"loss": 0.5992, |
|
"objective": 0.6024218797683716, |
|
"ranking_simple": 0.5291666388511658, |
|
"rejected_logps": -103.2024917602539, |
|
"rejected_rewards": -0.7272025942802429, |
|
"reward_accuracy": 0.6708333492279053, |
|
"step": 275 |
|
}, |
|
{ |
|
"chosen_logps": -100.48685455322266, |
|
"chosen_rewards": -0.4961775243282318, |
|
"epoch": 0.7911466917824347, |
|
"grad_norm": 12.469165432741715, |
|
"learning_rate": 1.2523953787364722e-07, |
|
"log_diff_policy": 1.6572238206863403, |
|
"logits": -1.3310333490371704, |
|
"logp_accuracy": 0.5458333492279053, |
|
"loss": 0.588, |
|
"objective": 0.6101647615432739, |
|
"ranking_simple": 0.5458333492279053, |
|
"rejected_logps": -102.14408111572266, |
|
"rejected_rewards": -0.7782385349273682, |
|
"reward_accuracy": 0.6875, |
|
"step": 280 |
|
}, |
|
{ |
|
"chosen_logps": -98.08577728271484, |
|
"chosen_rewards": -0.48601558804512024, |
|
"epoch": 0.8052743112785495, |
|
"grad_norm": 12.31765364922954, |
|
"learning_rate": 1.0930503616226495e-07, |
|
"log_diff_policy": 3.3555397987365723, |
|
"logits": -1.4751113653182983, |
|
"logp_accuracy": 0.5916666388511658, |
|
"loss": 0.6025, |
|
"objective": 0.6169189214706421, |
|
"ranking_simple": 0.5916666388511658, |
|
"rejected_logps": -101.44131469726562, |
|
"rejected_rewards": -0.7556989789009094, |
|
"reward_accuracy": 0.6708333492279053, |
|
"step": 285 |
|
}, |
|
{ |
|
"chosen_logps": -100.12785339355469, |
|
"chosen_rewards": -0.5353098511695862, |
|
"epoch": 0.8194019307746645, |
|
"grad_norm": 12.570256325871375, |
|
"learning_rate": 9.432964833353946e-08, |
|
"log_diff_policy": 3.7209763526916504, |
|
"logits": -1.4481749534606934, |
|
"logp_accuracy": 0.574999988079071, |
|
"loss": 0.5859, |
|
"objective": 0.5787585973739624, |
|
"ranking_simple": 0.574999988079071, |
|
"rejected_logps": -103.84882354736328, |
|
"rejected_rewards": -0.8981534242630005, |
|
"reward_accuracy": 0.6916666626930237, |
|
"step": 290 |
|
}, |
|
{ |
|
"chosen_logps": -97.55231475830078, |
|
"chosen_rewards": -0.4495824873447418, |
|
"epoch": 0.8335295502707794, |
|
"grad_norm": 12.980085764300096, |
|
"learning_rate": 8.035013734500557e-08, |
|
"log_diff_policy": 6.113182544708252, |
|
"logits": -1.4768887758255005, |
|
"logp_accuracy": 0.6000000238418579, |
|
"loss": 0.5855, |
|
"objective": 0.5812606811523438, |
|
"ranking_simple": 0.6000000238418579, |
|
"rejected_logps": -103.66551208496094, |
|
"rejected_rewards": -0.8177840113639832, |
|
"reward_accuracy": 0.7041666507720947, |
|
"step": 295 |
|
}, |
|
{ |
|
"chosen_logps": -100.5435562133789, |
|
"chosen_rewards": -0.47136637568473816, |
|
"epoch": 0.8476571697668943, |
|
"grad_norm": 11.431502808997152, |
|
"learning_rate": 6.740082138425962e-08, |
|
"log_diff_policy": 4.680129051208496, |
|
"logits": -1.441601037979126, |
|
"logp_accuracy": 0.5791666507720947, |
|
"loss": 0.5705, |
|
"objective": 0.542003333568573, |
|
"ranking_simple": 0.5791666507720947, |
|
"rejected_logps": -105.22367095947266, |
|
"rejected_rewards": -0.9355214238166809, |
|
"reward_accuracy": 0.7416666746139526, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.8476571697668943, |
|
"eval_chosen_logps": -100.62620544433594, |
|
"eval_chosen_rewards": -0.6470655202865601, |
|
"eval_log_diff_policy": 2.7566163539886475, |
|
"eval_logits": -1.3272062540054321, |
|
"eval_logp_accuracy": 0.5446860194206238, |
|
"eval_loss": 0.6553571224212646, |
|
"eval_objective": 0.6504380106925964, |
|
"eval_ranking_simple": 0.5446860194206238, |
|
"eval_rejected_logps": -103.38282012939453, |
|
"eval_rejected_rewards": -0.8403363823890686, |
|
"eval_reward_accuracy": 0.6189613342285156, |
|
"eval_runtime": 577.7358, |
|
"eval_samples_per_second": 17.193, |
|
"eval_steps_per_second": 0.717, |
|
"step": 300 |
|
}, |
|
{ |
|
"chosen_logps": -99.48948669433594, |
|
"chosen_rewards": -0.5248011350631714, |
|
"epoch": 0.8617847892630092, |
|
"grad_norm": 13.252372937407342, |
|
"learning_rate": 5.551348962151964e-08, |
|
"log_diff_policy": 2.7193262577056885, |
|
"logits": -1.4033145904541016, |
|
"logp_accuracy": 0.5625, |
|
"loss": 0.6042, |
|
"objective": 0.617713987827301, |
|
"ranking_simple": 0.5625, |
|
"rejected_logps": -102.20880889892578, |
|
"rejected_rewards": -0.8076351284980774, |
|
"reward_accuracy": 0.6541666388511658, |
|
"step": 305 |
|
}, |
|
{ |
|
"chosen_logps": -98.88239288330078, |
|
"chosen_rewards": -0.5424375534057617, |
|
"epoch": 0.8759124087591241, |
|
"grad_norm": 11.513198778387665, |
|
"learning_rate": 4.471732417065144e-08, |
|
"log_diff_policy": 5.485601902008057, |
|
"logits": -1.4526277780532837, |
|
"logp_accuracy": 0.612500011920929, |
|
"loss": 0.5898, |
|
"objective": 0.5874204039573669, |
|
"ranking_simple": 0.612500011920929, |
|
"rejected_logps": -104.36799621582031, |
|
"rejected_rewards": -0.8886787295341492, |
|
"reward_accuracy": 0.6916666626930237, |
|
"step": 310 |
|
}, |
|
{ |
|
"chosen_logps": -95.83468627929688, |
|
"chosen_rewards": -0.3822983205318451, |
|
"epoch": 0.890040028255239, |
|
"grad_norm": 12.210262508024565, |
|
"learning_rate": 3.503882845023387e-08, |
|
"log_diff_policy": 4.727508068084717, |
|
"logits": -1.4003602266311646, |
|
"logp_accuracy": 0.6000000238418579, |
|
"loss": 0.584, |
|
"objective": 0.5880329608917236, |
|
"ranking_simple": 0.6000000238418579, |
|
"rejected_logps": -100.56217956542969, |
|
"rejected_rewards": -0.7324024438858032, |
|
"reward_accuracy": 0.6916666626930237, |
|
"step": 315 |
|
}, |
|
{ |
|
"chosen_logps": -100.13945770263672, |
|
"chosen_rewards": -0.40757814049720764, |
|
"epoch": 0.9041676477513539, |
|
"grad_norm": 11.629225422410343, |
|
"learning_rate": 2.65017621205339e-08, |
|
"log_diff_policy": 1.5231914520263672, |
|
"logits": -1.4287190437316895, |
|
"logp_accuracy": 0.5375000238418579, |
|
"loss": 0.5795, |
|
"objective": 0.5747238993644714, |
|
"ranking_simple": 0.5375000238418579, |
|
"rejected_logps": -101.66265106201172, |
|
"rejected_rewards": -0.7921539545059204, |
|
"reward_accuracy": 0.7124999761581421, |
|
"step": 320 |
|
}, |
|
{ |
|
"chosen_logps": -98.47615814208984, |
|
"chosen_rewards": -0.4354442059993744, |
|
"epoch": 0.9182952672474688, |
|
"grad_norm": 11.50196158969457, |
|
"learning_rate": 1.9127082756109138e-08, |
|
"log_diff_policy": 4.035438060760498, |
|
"logits": -1.4835073947906494, |
|
"logp_accuracy": 0.5916666388511658, |
|
"loss": 0.5836, |
|
"objective": 0.5947951078414917, |
|
"ranking_simple": 0.5916666388511658, |
|
"rejected_logps": -102.51158905029297, |
|
"rejected_rewards": -0.7917211055755615, |
|
"reward_accuracy": 0.6791666746139526, |
|
"step": 325 |
|
}, |
|
{ |
|
"chosen_logps": -98.91030883789062, |
|
"chosen_rewards": -0.5017069578170776, |
|
"epoch": 0.9324228867435838, |
|
"grad_norm": 11.552117625756214, |
|
"learning_rate": 1.293289439722961e-08, |
|
"log_diff_policy": 6.066530704498291, |
|
"logits": -1.376121163368225, |
|
"logp_accuracy": 0.6041666865348816, |
|
"loss": 0.5902, |
|
"objective": 0.6051034331321716, |
|
"ranking_simple": 0.6041666865348816, |
|
"rejected_logps": -104.97685241699219, |
|
"rejected_rewards": -0.7912607789039612, |
|
"reward_accuracy": 0.7041666507720947, |
|
"step": 330 |
|
}, |
|
{ |
|
"chosen_logps": -100.66015625, |
|
"chosen_rewards": -0.5485278367996216, |
|
"epoch": 0.9465505062396986, |
|
"grad_norm": 11.501179850402595, |
|
"learning_rate": 7.934403106416243e-09, |
|
"log_diff_policy": 3.832066774368286, |
|
"logits": -1.4517931938171387, |
|
"logp_accuracy": 0.5583333373069763, |
|
"loss": 0.5689, |
|
"objective": 0.5892359614372253, |
|
"ranking_simple": 0.5583333373069763, |
|
"rejected_logps": -104.49221801757812, |
|
"rejected_rewards": -0.8999612331390381, |
|
"reward_accuracy": 0.7041666507720947, |
|
"step": 335 |
|
}, |
|
{ |
|
"chosen_logps": -99.37032318115234, |
|
"chosen_rewards": -0.5224529504776001, |
|
"epoch": 0.9606781257358136, |
|
"grad_norm": 12.734967304317747, |
|
"learning_rate": 4.143879639202541e-09, |
|
"log_diff_policy": 1.4650262594223022, |
|
"logits": -1.4669277667999268, |
|
"logp_accuracy": 0.5375000238418579, |
|
"loss": 0.5857, |
|
"objective": 0.6081200838088989, |
|
"ranking_simple": 0.5375000238418579, |
|
"rejected_logps": -100.83534240722656, |
|
"rejected_rewards": -0.8256459832191467, |
|
"reward_accuracy": 0.637499988079071, |
|
"step": 340 |
|
}, |
|
{ |
|
"chosen_logps": -98.70734405517578, |
|
"chosen_rewards": -0.5224943161010742, |
|
"epoch": 0.9748057452319284, |
|
"grad_norm": 12.0257535848254, |
|
"learning_rate": 1.5706293207561893e-09, |
|
"log_diff_policy": 4.583549976348877, |
|
"logits": -1.3987572193145752, |
|
"logp_accuracy": 0.5708333253860474, |
|
"loss": 0.585, |
|
"objective": 0.5911051034927368, |
|
"ranking_simple": 0.5708333253860474, |
|
"rejected_logps": -103.2908935546875, |
|
"rejected_rewards": -0.8825021982192993, |
|
"reward_accuracy": 0.6875, |
|
"step": 345 |
|
}, |
|
{ |
|
"chosen_logps": -99.29621124267578, |
|
"chosen_rewards": -0.4423524737358093, |
|
"epoch": 0.9889333647280433, |
|
"grad_norm": 12.354645024534335, |
|
"learning_rate": 2.209692023126819e-10, |
|
"log_diff_policy": 2.835632801055908, |
|
"logits": -1.471450686454773, |
|
"logp_accuracy": 0.5, |
|
"loss": 0.5864, |
|
"objective": 0.5840578079223633, |
|
"ranking_simple": 0.5, |
|
"rejected_logps": -102.13184356689453, |
|
"rejected_rewards": -0.7969415187835693, |
|
"reward_accuracy": 0.75, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.9889333647280433, |
|
"eval_chosen_logps": -100.49518585205078, |
|
"eval_chosen_rewards": -0.6339634656906128, |
|
"eval_log_diff_policy": 2.7547223567962646, |
|
"eval_logits": -1.327553391456604, |
|
"eval_logp_accuracy": 0.54347825050354, |
|
"eval_loss": 0.6549956798553467, |
|
"eval_objective": 0.6503260731697083, |
|
"eval_ranking_simple": 0.54347825050354, |
|
"eval_rejected_logps": -103.24991607666016, |
|
"eval_rejected_rewards": -0.8270449638366699, |
|
"eval_reward_accuracy": 0.6183574795722961, |
|
"eval_runtime": 582.9627, |
|
"eval_samples_per_second": 17.039, |
|
"eval_steps_per_second": 0.71, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.9974099364257123, |
|
"step": 353, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6203088365263034, |
|
"train_runtime": 17039.3075, |
|
"train_samples_per_second": 5.981, |
|
"train_steps_per_second": 0.021 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 353, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": false, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|