|
{ |
|
"best_metric": 0.5869565010070801, |
|
"best_model_checkpoint": "./qwen2.5-0.5b/qwen2.5-0.5b-expo-DPO-ES-TRY/checkpoint-583", |
|
"epoch": 2.9976381672177608, |
|
"eval_steps": 53, |
|
"global_step": 1056, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"dpo_loss": 0.6931471824645996, |
|
"dpo_wo_beta": -0.6931471824645996, |
|
"epoch": 0.005668398677373642, |
|
"grad_norm": 13.433600669124935, |
|
"learning_rate": 9.433962264150944e-08, |
|
"logits": -1.3874311447143555, |
|
"logps": -88.43561553955078, |
|
"loss": 0.6931, |
|
"objective": 0.6931471824645996, |
|
"ranking_idealized": 0.625, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.5208333134651184, |
|
"regularize": 0.6931471824645996, |
|
"step": 1 |
|
}, |
|
{ |
|
"dpo_loss": 0.693236768245697, |
|
"dpo_wo_beta": -0.6993356347084045, |
|
"epoch": 0.02834199338686821, |
|
"grad_norm": 13.640653628388394, |
|
"learning_rate": 4.716981132075472e-07, |
|
"logits": -1.4090652465820312, |
|
"logps": -84.34337615966797, |
|
"loss": 0.693, |
|
"objective": 0.693236768245697, |
|
"ranking_idealized": 0.6197916865348816, |
|
"ranking_idealized_expo": 0.546875, |
|
"ranking_simple": 0.546875, |
|
"regularize": 0.693236768245697, |
|
"step": 5 |
|
}, |
|
{ |
|
"dpo_loss": 0.6845630407333374, |
|
"dpo_wo_beta": -0.7111619710922241, |
|
"epoch": 0.05668398677373642, |
|
"grad_norm": 12.626074407134174, |
|
"learning_rate": 9.433962264150944e-07, |
|
"logits": -1.4784893989562988, |
|
"logps": -81.94055938720703, |
|
"loss": 0.6892, |
|
"objective": 0.6845630407333374, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.512499988079071, |
|
"regularize": 0.6845630407333374, |
|
"step": 10 |
|
}, |
|
{ |
|
"dpo_loss": 0.6825469136238098, |
|
"dpo_wo_beta": -0.8259204626083374, |
|
"epoch": 0.08502598016060463, |
|
"grad_norm": 12.374180595083178, |
|
"learning_rate": 1.4150943396226415e-06, |
|
"logits": -1.4932299852371216, |
|
"logps": -81.52880096435547, |
|
"loss": 0.6814, |
|
"objective": 0.6825469136238098, |
|
"ranking_idealized": 0.5916666388511658, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.512499988079071, |
|
"regularize": 0.6825469136238098, |
|
"step": 15 |
|
}, |
|
{ |
|
"dpo_loss": 0.6950914263725281, |
|
"dpo_wo_beta": -1.2390469312667847, |
|
"epoch": 0.11336797354747284, |
|
"grad_norm": 14.839934392200913, |
|
"learning_rate": 1.8867924528301889e-06, |
|
"logits": -1.5371100902557373, |
|
"logps": -82.72624969482422, |
|
"loss": 0.6711, |
|
"objective": 0.6950914263725281, |
|
"ranking_idealized": 0.625, |
|
"ranking_idealized_expo": 0.5375000238418579, |
|
"ranking_simple": 0.5249999761581421, |
|
"regularize": 0.6950914263725281, |
|
"step": 20 |
|
}, |
|
{ |
|
"dpo_loss": 0.6556071043014526, |
|
"dpo_wo_beta": -1.110619068145752, |
|
"epoch": 0.14170996693434104, |
|
"grad_norm": 12.89805052529156, |
|
"learning_rate": 2.358490566037736e-06, |
|
"logits": -1.6399922370910645, |
|
"logps": -81.59695434570312, |
|
"loss": 0.6589, |
|
"objective": 0.6556071043014526, |
|
"ranking_idealized": 0.5958333611488342, |
|
"ranking_idealized_expo": 0.4958333373069763, |
|
"ranking_simple": 0.5249999761581421, |
|
"regularize": 0.6556071043014526, |
|
"step": 25 |
|
}, |
|
{ |
|
"dpo_loss": 0.6518108248710632, |
|
"dpo_wo_beta": -1.2506839036941528, |
|
"epoch": 0.17005196032120926, |
|
"grad_norm": 12.64998937636519, |
|
"learning_rate": 2.830188679245283e-06, |
|
"logits": -1.6404598951339722, |
|
"logps": -83.20111846923828, |
|
"loss": 0.6451, |
|
"objective": 0.6518108248710632, |
|
"ranking_idealized": 0.6041666865348816, |
|
"ranking_idealized_expo": 0.5291666388511658, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.6518108248710632, |
|
"step": 30 |
|
}, |
|
{ |
|
"dpo_loss": 0.6226770877838135, |
|
"dpo_wo_beta": -1.394917368888855, |
|
"epoch": 0.19839395370807747, |
|
"grad_norm": 13.760162421635227, |
|
"learning_rate": 3.30188679245283e-06, |
|
"logits": -1.6237396001815796, |
|
"logps": -87.80964660644531, |
|
"loss": 0.6189, |
|
"objective": 0.6226770877838135, |
|
"ranking_idealized": 0.612500011920929, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.5583333373069763, |
|
"regularize": 0.6226770877838135, |
|
"step": 35 |
|
}, |
|
{ |
|
"dpo_loss": 0.5924390554428101, |
|
"dpo_wo_beta": -1.422450304031372, |
|
"epoch": 0.22673594709494568, |
|
"grad_norm": 16.810886476613117, |
|
"learning_rate": 3.7735849056603777e-06, |
|
"logits": -1.620682954788208, |
|
"logps": -91.93690490722656, |
|
"loss": 0.6076, |
|
"objective": 0.5924390554428101, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.5708333253860474, |
|
"regularize": 0.5924390554428101, |
|
"step": 40 |
|
}, |
|
{ |
|
"dpo_loss": 0.573756217956543, |
|
"dpo_wo_beta": -1.3691534996032715, |
|
"epoch": 0.25507794048181387, |
|
"grad_norm": 13.798774501924722, |
|
"learning_rate": 4.245283018867925e-06, |
|
"logits": -1.7814558744430542, |
|
"logps": -92.24474334716797, |
|
"loss": 0.5989, |
|
"objective": 0.573756217956543, |
|
"ranking_idealized": 0.6208333373069763, |
|
"ranking_idealized_expo": 0.5375000238418579, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.573756217956543, |
|
"step": 45 |
|
}, |
|
{ |
|
"dpo_loss": 0.5726417899131775, |
|
"dpo_wo_beta": -1.3605374097824097, |
|
"epoch": 0.2834199338686821, |
|
"grad_norm": 12.568473894025988, |
|
"learning_rate": 4.716981132075472e-06, |
|
"logits": -1.808895468711853, |
|
"logps": -90.65751647949219, |
|
"loss": 0.5954, |
|
"objective": 0.5726417899131775, |
|
"ranking_idealized": 0.5416666865348816, |
|
"ranking_idealized_expo": 0.42500001192092896, |
|
"ranking_simple": 0.574999988079071, |
|
"regularize": 0.5726417899131775, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.300425129900803, |
|
"eval_dpo_loss": 0.7112604975700378, |
|
"eval_dpo_wo_beta": -2.2659413814544678, |
|
"eval_logits": -1.892814040184021, |
|
"eval_logps": -101.36742401123047, |
|
"eval_loss": 0.6816489100456238, |
|
"eval_objective": 0.7112604975700378, |
|
"eval_ranking_idealized": 0.5888429880142212, |
|
"eval_ranking_idealized_expo": 0.5092975497245789, |
|
"eval_ranking_simple": 0.5237603187561035, |
|
"eval_regularize": 0.7112604975700378, |
|
"eval_runtime": 211.6587, |
|
"eval_samples_per_second": 27.355, |
|
"eval_steps_per_second": 1.143, |
|
"step": 53 |
|
}, |
|
{ |
|
"dpo_loss": 0.5827316045761108, |
|
"dpo_wo_beta": -1.6213361024856567, |
|
"epoch": 0.3117619272555503, |
|
"grad_norm": 14.442715913160086, |
|
"learning_rate": 4.999781286194085e-06, |
|
"logits": -1.8762638568878174, |
|
"logps": -93.41423797607422, |
|
"loss": 0.5721, |
|
"objective": 0.5827316045761108, |
|
"ranking_idealized": 0.625, |
|
"ranking_idealized_expo": 0.5416666865348816, |
|
"ranking_simple": 0.6041666865348816, |
|
"regularize": 0.5827316045761108, |
|
"step": 55 |
|
}, |
|
{ |
|
"dpo_loss": 0.5553872585296631, |
|
"dpo_wo_beta": -1.6468366384506226, |
|
"epoch": 0.3401039206424185, |
|
"grad_norm": 13.845514282811145, |
|
"learning_rate": 4.997321195347154e-06, |
|
"logits": -1.8914529085159302, |
|
"logps": -90.59642028808594, |
|
"loss": 0.5756, |
|
"objective": 0.5553872585296631, |
|
"ranking_idealized": 0.6333333253860474, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.5791666507720947, |
|
"regularize": 0.5553872585296631, |
|
"step": 60 |
|
}, |
|
{ |
|
"dpo_loss": 0.5302771329879761, |
|
"dpo_wo_beta": -1.3166770935058594, |
|
"epoch": 0.3684459140292867, |
|
"grad_norm": 10.846857687148022, |
|
"learning_rate": 4.992130320438411e-06, |
|
"logits": -1.8399535417556763, |
|
"logps": -86.60197448730469, |
|
"loss": 0.5586, |
|
"objective": 0.5302771329879761, |
|
"ranking_idealized": 0.5708333253860474, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.6208333373069763, |
|
"regularize": 0.5302771329879761, |
|
"step": 65 |
|
}, |
|
{ |
|
"dpo_loss": 0.5711485743522644, |
|
"dpo_wo_beta": -1.7437169551849365, |
|
"epoch": 0.39678790741615494, |
|
"grad_norm": 13.787840238803502, |
|
"learning_rate": 4.984214337613357e-06, |
|
"logits": -1.8178967237472534, |
|
"logps": -91.10688781738281, |
|
"loss": 0.5701, |
|
"objective": 0.5711485743522644, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5874999761581421, |
|
"regularize": 0.5711485743522644, |
|
"step": 70 |
|
}, |
|
{ |
|
"dpo_loss": 0.523643434047699, |
|
"dpo_wo_beta": -1.669514536857605, |
|
"epoch": 0.42512990080302315, |
|
"grad_norm": 13.192298437287352, |
|
"learning_rate": 4.97358190288299e-06, |
|
"logits": -1.8182169198989868, |
|
"logps": -94.8000717163086, |
|
"loss": 0.5205, |
|
"objective": 0.523643434047699, |
|
"ranking_idealized": 0.625, |
|
"ranking_idealized_expo": 0.5458333492279053, |
|
"ranking_simple": 0.6583333611488342, |
|
"regularize": 0.523643434047699, |
|
"step": 75 |
|
}, |
|
{ |
|
"dpo_loss": 0.51079261302948, |
|
"dpo_wo_beta": -1.7271808385849, |
|
"epoch": 0.45347189418989137, |
|
"grad_norm": 15.151373786996814, |
|
"learning_rate": 4.9602446426585845e-06, |
|
"logits": -1.8920824527740479, |
|
"logps": -93.58238220214844, |
|
"loss": 0.5285, |
|
"objective": 0.51079261302948, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.6458333134651184, |
|
"regularize": 0.51079261302948, |
|
"step": 80 |
|
}, |
|
{ |
|
"dpo_loss": 0.5066012144088745, |
|
"dpo_wo_beta": -1.5956443548202515, |
|
"epoch": 0.4818138875767596, |
|
"grad_norm": 12.328960275584794, |
|
"learning_rate": 4.944217141038379e-06, |
|
"logits": -1.8741406202316284, |
|
"logps": -87.06742858886719, |
|
"loss": 0.5202, |
|
"objective": 0.5066012144088745, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.6208333373069763, |
|
"regularize": 0.5066012144088745, |
|
"step": 85 |
|
}, |
|
{ |
|
"dpo_loss": 0.5358369946479797, |
|
"dpo_wo_beta": -1.9357556104660034, |
|
"epoch": 0.5101558809636277, |
|
"grad_norm": 12.694483590051824, |
|
"learning_rate": 4.925516923860083e-06, |
|
"logits": -1.7968534231185913, |
|
"logps": -86.77802276611328, |
|
"loss": 0.4858, |
|
"objective": 0.5358369946479797, |
|
"ranking_idealized": 0.5458333492279053, |
|
"ranking_idealized_expo": 0.4749999940395355, |
|
"ranking_simple": 0.5874999761581421, |
|
"regularize": 0.5358369946479797, |
|
"step": 90 |
|
}, |
|
{ |
|
"dpo_loss": 0.4783257842063904, |
|
"dpo_wo_beta": -1.9098786115646362, |
|
"epoch": 0.538497874350496, |
|
"grad_norm": 14.474706973531484, |
|
"learning_rate": 4.904164439536626e-06, |
|
"logits": -1.8568389415740967, |
|
"logps": -88.12813568115234, |
|
"loss": 0.4865, |
|
"objective": 0.4783257842063904, |
|
"ranking_idealized": 0.6291666626930237, |
|
"ranking_idealized_expo": 0.5458333492279053, |
|
"ranking_simple": 0.6916666626930237, |
|
"regularize": 0.4783257842063904, |
|
"step": 95 |
|
}, |
|
{ |
|
"dpo_loss": 0.4654810130596161, |
|
"dpo_wo_beta": -1.9254087209701538, |
|
"epoch": 0.5668398677373642, |
|
"grad_norm": 13.577084707122001, |
|
"learning_rate": 4.880183036696123e-06, |
|
"logits": -1.938937783241272, |
|
"logps": -92.29436492919922, |
|
"loss": 0.5016, |
|
"objective": 0.4654810130596161, |
|
"ranking_idealized": 0.612500011920929, |
|
"ranking_idealized_expo": 0.5416666865348816, |
|
"ranking_simple": 0.6875, |
|
"regularize": 0.4654810130596161, |
|
"step": 100 |
|
}, |
|
{ |
|
"dpo_loss": 0.4374677240848541, |
|
"dpo_wo_beta": -1.4267934560775757, |
|
"epoch": 0.5951818611242324, |
|
"grad_norm": 11.14545328639218, |
|
"learning_rate": 4.853598938650487e-06, |
|
"logits": -1.8158982992172241, |
|
"logps": -90.21449279785156, |
|
"loss": 0.4618, |
|
"objective": 0.4374677240848541, |
|
"ranking_idealized": 0.625, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.6666666865348816, |
|
"regularize": 0.4374677240848541, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.600850259801606, |
|
"eval_dpo_loss": 0.6936022639274597, |
|
"eval_dpo_wo_beta": -2.462427854537964, |
|
"eval_logits": -1.9007418155670166, |
|
"eval_logps": -94.35714721679688, |
|
"eval_loss": 0.6912521123886108, |
|
"eval_objective": 0.6936022639274597, |
|
"eval_ranking_idealized": 0.5888429880142212, |
|
"eval_ranking_idealized_expo": 0.5092975497245789, |
|
"eval_ranking_simple": 0.5351239442825317, |
|
"eval_regularize": 0.6936022639274597, |
|
"eval_runtime": 210.2297, |
|
"eval_samples_per_second": 27.541, |
|
"eval_steps_per_second": 1.151, |
|
"step": 106 |
|
}, |
|
{ |
|
"dpo_loss": 0.47933149337768555, |
|
"dpo_wo_beta": -1.9683055877685547, |
|
"epoch": 0.6235238545111006, |
|
"grad_norm": 12.39392340166307, |
|
"learning_rate": 4.824441214720629e-06, |
|
"logits": -1.9334439039230347, |
|
"logps": -87.35523223876953, |
|
"loss": 0.4633, |
|
"objective": 0.47933149337768555, |
|
"ranking_idealized": 0.5708333253860474, |
|
"ranking_idealized_expo": 0.4541666805744171, |
|
"ranking_simple": 0.6625000238418579, |
|
"regularize": 0.47933149337768555, |
|
"step": 110 |
|
}, |
|
{ |
|
"dpo_loss": 0.4749464690685272, |
|
"dpo_wo_beta": -1.7375919818878174, |
|
"epoch": 0.6518658478979689, |
|
"grad_norm": 12.612865651893962, |
|
"learning_rate": 4.7927417484495756e-06, |
|
"logits": -1.9057692289352417, |
|
"logps": -87.68991088867188, |
|
"loss": 0.4712, |
|
"objective": 0.4749464690685272, |
|
"ranking_idealized": 0.574999988079071, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.6333333253860474, |
|
"regularize": 0.4749464690685272, |
|
"step": 115 |
|
}, |
|
{ |
|
"dpo_loss": 0.4848935306072235, |
|
"dpo_wo_beta": -1.9273093938827515, |
|
"epoch": 0.680207841284837, |
|
"grad_norm": 13.836239066838136, |
|
"learning_rate": 4.758535202738287e-06, |
|
"logits": -1.8775906562805176, |
|
"logps": -87.8878173828125, |
|
"loss": 0.4641, |
|
"objective": 0.4848935306072235, |
|
"ranking_idealized": 0.5874999761581421, |
|
"ranking_idealized_expo": 0.5291666388511658, |
|
"ranking_simple": 0.6625000238418579, |
|
"regularize": 0.4848935306072235, |
|
"step": 120 |
|
}, |
|
{ |
|
"dpo_loss": 0.4785127639770508, |
|
"dpo_wo_beta": -1.814666748046875, |
|
"epoch": 0.7085498346717053, |
|
"grad_norm": 12.105170057238437, |
|
"learning_rate": 4.721858981942284e-06, |
|
"logits": -1.8346068859100342, |
|
"logps": -86.40522766113281, |
|
"loss": 0.4801, |
|
"objective": 0.4785127639770508, |
|
"ranking_idealized": 0.625, |
|
"ranking_idealized_expo": 0.550000011920929, |
|
"ranking_simple": 0.6875, |
|
"regularize": 0.4785127639770508, |
|
"step": 125 |
|
}, |
|
{ |
|
"dpo_loss": 0.4548089802265167, |
|
"dpo_wo_beta": -1.4164987802505493, |
|
"epoch": 0.7368918280585735, |
|
"grad_norm": 11.895980627109102, |
|
"learning_rate": 4.682753190970533e-06, |
|
"logits": -1.9488608837127686, |
|
"logps": -79.42195129394531, |
|
"loss": 0.4538, |
|
"objective": 0.4548089802265167, |
|
"ranking_idealized": 0.5416666865348816, |
|
"ranking_idealized_expo": 0.44999998807907104, |
|
"ranking_simple": 0.6291666626930237, |
|
"regularize": 0.4548089802265167, |
|
"step": 130 |
|
}, |
|
{ |
|
"dpo_loss": 0.49760884046554565, |
|
"dpo_wo_beta": -1.994195818901062, |
|
"epoch": 0.7652338214454416, |
|
"grad_norm": 12.298776298341995, |
|
"learning_rate": 4.641260591431315e-06, |
|
"logits": -1.9813282489776611, |
|
"logps": -82.40634155273438, |
|
"loss": 0.4433, |
|
"objective": 0.49760884046554565, |
|
"ranking_idealized": 0.5874999761581421, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.6583333611488342, |
|
"regularize": 0.49760884046554565, |
|
"step": 135 |
|
}, |
|
{ |
|
"dpo_loss": 0.41459351778030396, |
|
"dpo_wo_beta": -1.187635064125061, |
|
"epoch": 0.7935758148323099, |
|
"grad_norm": 12.618720178096575, |
|
"learning_rate": 4.597426554873037e-06, |
|
"logits": -1.97609281539917, |
|
"logps": -83.44467163085938, |
|
"loss": 0.4236, |
|
"objective": 0.41459351778030396, |
|
"ranking_idealized": 0.5791666507720947, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.6791666746139526, |
|
"regularize": 0.41459351778030396, |
|
"step": 140 |
|
}, |
|
{ |
|
"dpo_loss": 0.4073801636695862, |
|
"dpo_wo_beta": -1.311059832572937, |
|
"epoch": 0.821917808219178, |
|
"grad_norm": 14.417917904409194, |
|
"learning_rate": 4.551299013171111e-06, |
|
"logits": -2.0718839168548584, |
|
"logps": -84.2674560546875, |
|
"loss": 0.4215, |
|
"objective": 0.4073801636695862, |
|
"ranking_idealized": 0.5791666507720947, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.6916666626930237, |
|
"regularize": 0.4073801636695862, |
|
"step": 145 |
|
}, |
|
{ |
|
"dpo_loss": 0.4207518398761749, |
|
"dpo_wo_beta": -1.50857675075531, |
|
"epoch": 0.8502598016060463, |
|
"grad_norm": 11.543599868064442, |
|
"learning_rate": 4.502928406115152e-06, |
|
"logits": -2.0730583667755127, |
|
"logps": -82.68958282470703, |
|
"loss": 0.4276, |
|
"objective": 0.4207518398761749, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5416666865348816, |
|
"ranking_simple": 0.7208333611488342, |
|
"regularize": 0.4207518398761749, |
|
"step": 150 |
|
}, |
|
{ |
|
"dpo_loss": 0.3847941756248474, |
|
"dpo_wo_beta": -1.4449684619903564, |
|
"epoch": 0.8786017949929145, |
|
"grad_norm": 12.08771803065001, |
|
"learning_rate": 4.452367626253805e-06, |
|
"logits": -2.0991933345794678, |
|
"logps": -85.211181640625, |
|
"loss": 0.3986, |
|
"objective": 0.3847941756248474, |
|
"ranking_idealized": 0.612500011920929, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.7250000238418579, |
|
"regularize": 0.3847941756248474, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.9012753897024091, |
|
"eval_dpo_loss": 0.7214789390563965, |
|
"eval_dpo_wo_beta": -3.1229145526885986, |
|
"eval_logits": -2.1450352668762207, |
|
"eval_logps": -95.60012817382812, |
|
"eval_loss": 0.7013870477676392, |
|
"eval_objective": 0.7214789390563965, |
|
"eval_ranking_idealized": 0.5888429880142212, |
|
"eval_ranking_idealized_expo": 0.5092975497245789, |
|
"eval_ranking_simple": 0.5351239442825317, |
|
"eval_regularize": 0.7214789390563965, |
|
"eval_runtime": 210.3593, |
|
"eval_samples_per_second": 27.524, |
|
"eval_steps_per_second": 1.15, |
|
"step": 159 |
|
}, |
|
{ |
|
"dpo_loss": 0.4162478744983673, |
|
"dpo_wo_beta": -1.6461573839187622, |
|
"epoch": 0.9069437883797827, |
|
"grad_norm": 12.82345397067452, |
|
"learning_rate": 4.399671961057523e-06, |
|
"logits": -2.0759384632110596, |
|
"logps": -89.25846862792969, |
|
"loss": 0.4236, |
|
"objective": 0.4162478744983673, |
|
"ranking_idealized": 0.5708333253860474, |
|
"ranking_idealized_expo": 0.49166667461395264, |
|
"ranking_simple": 0.699999988079071, |
|
"regularize": 0.4162478744983673, |
|
"step": 160 |
|
}, |
|
{ |
|
"dpo_loss": 0.41358453035354614, |
|
"dpo_wo_beta": -1.648630976676941, |
|
"epoch": 0.9352857817666509, |
|
"grad_norm": 12.860537676624453, |
|
"learning_rate": 4.3448990324625244e-06, |
|
"logits": -2.024477481842041, |
|
"logps": -88.03329467773438, |
|
"loss": 0.4026, |
|
"objective": 0.41358453035354614, |
|
"ranking_idealized": 0.6041666865348816, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.7333333492279053, |
|
"regularize": 0.41358453035354614, |
|
"step": 165 |
|
}, |
|
{ |
|
"dpo_loss": 0.378000408411026, |
|
"dpo_wo_beta": -1.2966532707214355, |
|
"epoch": 0.9636277751535192, |
|
"grad_norm": 11.533711130228069, |
|
"learning_rate": 4.288108733862064e-06, |
|
"logits": -2.042527437210083, |
|
"logps": -90.26854705810547, |
|
"loss": 0.3925, |
|
"objective": 0.378000408411026, |
|
"ranking_idealized": 0.5791666507720947, |
|
"ranking_idealized_expo": 0.512499988079071, |
|
"ranking_simple": 0.7166666388511658, |
|
"regularize": 0.378000408411026, |
|
"step": 170 |
|
}, |
|
{ |
|
"dpo_loss": 0.3764660954475403, |
|
"dpo_wo_beta": -1.3978971242904663, |
|
"epoch": 0.9919697685403873, |
|
"grad_norm": 12.165192869157089, |
|
"learning_rate": 4.229363164613874e-06, |
|
"logits": -2.0610477924346924, |
|
"logps": -89.8354721069336, |
|
"loss": 0.3793, |
|
"objective": 0.3764660954475403, |
|
"ranking_idealized": 0.6541666388511658, |
|
"ranking_idealized_expo": 0.574999988079071, |
|
"ranking_simple": 0.7916666865348816, |
|
"regularize": 0.3764660954475403, |
|
"step": 175 |
|
}, |
|
{ |
|
"dpo_loss": 0.27626773715019226, |
|
"dpo_wo_beta": -0.8504549860954285, |
|
"epoch": 1.0203117619272555, |
|
"grad_norm": 10.141692447282386, |
|
"learning_rate": 4.168726562135432e-06, |
|
"logits": -2.2514243125915527, |
|
"logps": -90.8476333618164, |
|
"loss": 0.2852, |
|
"objective": 0.27626773715019226, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5416666865348816, |
|
"ranking_simple": 0.8083333373069763, |
|
"regularize": 0.27626773715019226, |
|
"step": 180 |
|
}, |
|
{ |
|
"dpo_loss": 0.23696589469909668, |
|
"dpo_wo_beta": -0.6947117447853088, |
|
"epoch": 1.0486537553141237, |
|
"grad_norm": 13.78702272812957, |
|
"learning_rate": 4.106265231661292e-06, |
|
"logits": -2.158977746963501, |
|
"logps": -95.00120544433594, |
|
"loss": 0.2429, |
|
"objective": 0.23696589469909668, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.49166667461395264, |
|
"ranking_simple": 0.8083333373069763, |
|
"regularize": 0.23696589469909668, |
|
"step": 185 |
|
}, |
|
{ |
|
"dpo_loss": 0.26388806104660034, |
|
"dpo_wo_beta": -0.9112051725387573, |
|
"epoch": 1.076995748700992, |
|
"grad_norm": 14.740228375586371, |
|
"learning_rate": 4.042047473739278e-06, |
|
"logits": -2.1533920764923096, |
|
"logps": -101.71949768066406, |
|
"loss": 0.2517, |
|
"objective": 0.26388806104660034, |
|
"ranking_idealized": 0.625, |
|
"ranking_idealized_expo": 0.5541666746139526, |
|
"ranking_simple": 0.8416666388511658, |
|
"regularize": 0.26388806104660034, |
|
"step": 190 |
|
}, |
|
{ |
|
"dpo_loss": 0.2244579941034317, |
|
"dpo_wo_beta": -0.6430780291557312, |
|
"epoch": 1.10533774208786, |
|
"grad_norm": 10.169064121599527, |
|
"learning_rate": 3.976143509544843e-06, |
|
"logits": -2.1589295864105225, |
|
"logps": -96.5248031616211, |
|
"loss": 0.2467, |
|
"objective": 0.2244579941034317, |
|
"ranking_idealized": 0.6333333253860474, |
|
"ranking_idealized_expo": 0.5666666626930237, |
|
"ranking_simple": 0.8083333373069763, |
|
"regularize": 0.2244579941034317, |
|
"step": 195 |
|
}, |
|
{ |
|
"dpo_loss": 0.24179764091968536, |
|
"dpo_wo_beta": -0.6332272291183472, |
|
"epoch": 1.1336797354747283, |
|
"grad_norm": 9.444774343787891, |
|
"learning_rate": 3.908625404095242e-06, |
|
"logits": -2.2753493785858154, |
|
"logps": -91.93312072753906, |
|
"loss": 0.2563, |
|
"objective": 0.24179764091968536, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.8458333611488342, |
|
"regularize": 0.24179764091968536, |
|
"step": 200 |
|
}, |
|
{ |
|
"dpo_loss": 0.25683078169822693, |
|
"dpo_wo_beta": -0.8531176447868347, |
|
"epoch": 1.1620217288615966, |
|
"grad_norm": 9.240319326762517, |
|
"learning_rate": 3.839566987447492e-06, |
|
"logits": -2.2432618141174316, |
|
"logps": -91.3159408569336, |
|
"loss": 0.2584, |
|
"objective": 0.25683078169822693, |
|
"ranking_idealized": 0.5708333253860474, |
|
"ranking_idealized_expo": 0.49166667461395264, |
|
"ranking_simple": 0.8166666626930237, |
|
"regularize": 0.25683078169822693, |
|
"step": 205 |
|
}, |
|
{ |
|
"dpo_loss": 0.24292893707752228, |
|
"dpo_wo_beta": -0.8205318450927734, |
|
"epoch": 1.1903637222484649, |
|
"grad_norm": 9.283856100785183, |
|
"learning_rate": 3.7690437739662928e-06, |
|
"logits": -2.2361652851104736, |
|
"logps": -90.6613998413086, |
|
"loss": 0.2551, |
|
"objective": 0.24292893707752228, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.800000011920929, |
|
"regularize": 0.24292893707752228, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.201700519603212, |
|
"eval_dpo_loss": 0.7525234222412109, |
|
"eval_dpo_wo_beta": -3.7749528884887695, |
|
"eval_logits": -2.267778158187866, |
|
"eval_logps": -98.14269256591797, |
|
"eval_loss": 0.7350714206695557, |
|
"eval_objective": 0.7525234222412109, |
|
"eval_ranking_idealized": 0.5888429880142212, |
|
"eval_ranking_idealized_expo": 0.5092975497245789, |
|
"eval_ranking_simple": 0.5371900796890259, |
|
"eval_regularize": 0.7525234222412109, |
|
"eval_runtime": 210.8898, |
|
"eval_samples_per_second": 27.455, |
|
"eval_steps_per_second": 1.148, |
|
"step": 212 |
|
}, |
|
{ |
|
"dpo_loss": 0.289533793926239, |
|
"dpo_wo_beta": -0.8810125589370728, |
|
"epoch": 1.2187057156353331, |
|
"grad_norm": 10.72372972136692, |
|
"learning_rate": 3.697132879750174e-06, |
|
"logits": -2.1757090091705322, |
|
"logps": -93.64250183105469, |
|
"loss": 0.2578, |
|
"objective": 0.289533793926239, |
|
"ranking_idealized": 0.5583333373069763, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.7875000238418579, |
|
"regularize": 0.289533793926239, |
|
"step": 215 |
|
}, |
|
{ |
|
"dpo_loss": 0.25134381651878357, |
|
"dpo_wo_beta": -0.8703542947769165, |
|
"epoch": 1.2470477090222012, |
|
"grad_norm": 12.940604838816247, |
|
"learning_rate": 3.6239129383061764e-06, |
|
"logits": -2.121750593185425, |
|
"logps": -94.44015502929688, |
|
"loss": 0.2676, |
|
"objective": 0.25134381651878357, |
|
"ranking_idealized": 0.6541666388511658, |
|
"ranking_idealized_expo": 0.6041666865348816, |
|
"ranking_simple": 0.8208333253860474, |
|
"regularize": 0.25134381651878357, |
|
"step": 220 |
|
}, |
|
{ |
|
"dpo_loss": 0.23937886953353882, |
|
"dpo_wo_beta": -0.7396827936172485, |
|
"epoch": 1.2753897024090695, |
|
"grad_norm": 9.645711793319885, |
|
"learning_rate": 3.5494640145652647e-06, |
|
"logits": -2.0901684761047363, |
|
"logps": -94.10260772705078, |
|
"loss": 0.2637, |
|
"objective": 0.23937886953353882, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.8541666865348816, |
|
"regularize": 0.23937886953353882, |
|
"step": 225 |
|
}, |
|
{ |
|
"dpo_loss": 0.2818019688129425, |
|
"dpo_wo_beta": -1.1170729398727417, |
|
"epoch": 1.3037316957959377, |
|
"grad_norm": 8.80210598601974, |
|
"learning_rate": 3.4738675173325008e-06, |
|
"logits": -1.9860222339630127, |
|
"logps": -92.9978256225586, |
|
"loss": 0.2776, |
|
"objective": 0.2818019688129425, |
|
"ranking_idealized": 0.5791666507720947, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.7749999761581421, |
|
"regularize": 0.2818019688129425, |
|
"step": 230 |
|
}, |
|
{ |
|
"dpo_loss": 0.22621506452560425, |
|
"dpo_wo_beta": -0.35843732953071594, |
|
"epoch": 1.3320736891828058, |
|
"grad_norm": 9.267612473930496, |
|
"learning_rate": 3.397206110267713e-06, |
|
"logits": -2.1131467819213867, |
|
"logps": -87.49403381347656, |
|
"loss": 0.2618, |
|
"objective": 0.22621506452560425, |
|
"ranking_idealized": 0.6041666865348816, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.8333333134651184, |
|
"regularize": 0.22621506452560425, |
|
"step": 235 |
|
}, |
|
{ |
|
"dpo_loss": 0.23632274568080902, |
|
"dpo_wo_beta": -0.6697984933853149, |
|
"epoch": 1.360415682569674, |
|
"grad_norm": 10.68594080832048, |
|
"learning_rate": 3.3195636214939943e-06, |
|
"logits": -2.130047559738159, |
|
"logps": -91.7619857788086, |
|
"loss": 0.2584, |
|
"objective": 0.23632274568080902, |
|
"ranking_idealized": 0.5541666746139526, |
|
"ranking_idealized_expo": 0.4791666567325592, |
|
"ranking_simple": 0.8125, |
|
"regularize": 0.23632274568080902, |
|
"step": 240 |
|
}, |
|
{ |
|
"dpo_loss": 0.2982023358345032, |
|
"dpo_wo_beta": -1.1124054193496704, |
|
"epoch": 1.3887576759565423, |
|
"grad_norm": 10.330360151122868, |
|
"learning_rate": 3.2410249519328848e-06, |
|
"logits": -2.1718757152557373, |
|
"logps": -93.45353698730469, |
|
"loss": 0.2692, |
|
"objective": 0.2982023358345032, |
|
"ranking_idealized": 0.5916666388511658, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.8041666746139526, |
|
"regularize": 0.2982023358345032, |
|
"step": 245 |
|
}, |
|
{ |
|
"dpo_loss": 0.2403133064508438, |
|
"dpo_wo_beta": -0.7000442147254944, |
|
"epoch": 1.4170996693434104, |
|
"grad_norm": 13.026578288520353, |
|
"learning_rate": 3.1616759824664543e-06, |
|
"logits": -2.145325183868408, |
|
"logps": -94.18195343017578, |
|
"loss": 0.269, |
|
"objective": 0.2403133064508438, |
|
"ranking_idealized": 0.574999988079071, |
|
"ranking_idealized_expo": 0.47083333134651184, |
|
"ranking_simple": 0.824999988079071, |
|
"regularize": 0.2403133064508438, |
|
"step": 250 |
|
}, |
|
{ |
|
"dpo_loss": 0.23977436125278473, |
|
"dpo_wo_beta": -0.5784927010536194, |
|
"epoch": 1.4454416627302786, |
|
"grad_norm": 10.959901566104394, |
|
"learning_rate": 3.081603480027826e-06, |
|
"logits": -2.108074426651001, |
|
"logps": -94.5383529663086, |
|
"loss": 0.2625, |
|
"objective": 0.23977436125278473, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5458333492279053, |
|
"ranking_simple": 0.8374999761581421, |
|
"regularize": 0.23977436125278473, |
|
"step": 255 |
|
}, |
|
{ |
|
"dpo_loss": 0.25297579169273376, |
|
"dpo_wo_beta": -0.7996426820755005, |
|
"epoch": 1.473783656117147, |
|
"grad_norm": 9.578050078679867, |
|
"learning_rate": 3.0008950027228035e-06, |
|
"logits": -2.1828908920288086, |
|
"logps": -92.77781677246094, |
|
"loss": 0.232, |
|
"objective": 0.25297579169273376, |
|
"ranking_idealized": 0.5666666626930237, |
|
"ranking_idealized_expo": 0.48750001192092896, |
|
"ranking_simple": 0.8041666746139526, |
|
"regularize": 0.25297579169273376, |
|
"step": 260 |
|
}, |
|
{ |
|
"dpo_loss": 0.2830916941165924, |
|
"dpo_wo_beta": -1.124144434928894, |
|
"epoch": 1.5021256495040152, |
|
"grad_norm": 11.27765707111355, |
|
"learning_rate": 2.9196388040863695e-06, |
|
"logits": -2.1150081157684326, |
|
"logps": -95.04662322998047, |
|
"loss": 0.2623, |
|
"objective": 0.2830916941165924, |
|
"ranking_idealized": 0.5791666507720947, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.8041666746139526, |
|
"regularize": 0.2830916941165924, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 1.5021256495040152, |
|
"eval_dpo_loss": 0.7739136815071106, |
|
"eval_dpo_wo_beta": -4.163427829742432, |
|
"eval_logits": -2.1478331089019775, |
|
"eval_logps": -100.8313217163086, |
|
"eval_loss": 0.7400166392326355, |
|
"eval_objective": 0.7739136815071106, |
|
"eval_ranking_idealized": 0.5888429880142212, |
|
"eval_ranking_idealized_expo": 0.5092975497245789, |
|
"eval_ranking_simple": 0.53925621509552, |
|
"eval_regularize": 0.7739136815071106, |
|
"eval_runtime": 210.8657, |
|
"eval_samples_per_second": 27.458, |
|
"eval_steps_per_second": 1.148, |
|
"step": 265 |
|
}, |
|
{ |
|
"dpo_loss": 0.2930367887020111, |
|
"dpo_wo_beta": -1.3651045560836792, |
|
"epoch": 1.5304676428908834, |
|
"grad_norm": 11.715215816813723, |
|
"learning_rate": 2.8379237365787426e-06, |
|
"logits": -2.035703182220459, |
|
"logps": -97.7331771850586, |
|
"loss": 0.253, |
|
"objective": 0.2930367887020111, |
|
"ranking_idealized": 0.574999988079071, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.7916666865348816, |
|
"regularize": 0.2930367887020111, |
|
"step": 270 |
|
}, |
|
{ |
|
"dpo_loss": 0.24886849522590637, |
|
"dpo_wo_beta": -0.8069366216659546, |
|
"epoch": 1.5588096362777515, |
|
"grad_norm": 8.958944325794365, |
|
"learning_rate": 2.7558391544265127e-06, |
|
"logits": -1.9700883626937866, |
|
"logps": -97.53855895996094, |
|
"loss": 0.2491, |
|
"objective": 0.24886849522590637, |
|
"ranking_idealized": 0.6208333373069763, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.8125, |
|
"regularize": 0.24886849522590637, |
|
"step": 275 |
|
}, |
|
{ |
|
"dpo_loss": 0.22936613857746124, |
|
"dpo_wo_beta": -0.6120084524154663, |
|
"epoch": 1.5871516296646198, |
|
"grad_norm": 10.814739938498821, |
|
"learning_rate": 2.6734748159151104e-06, |
|
"logits": -1.9118597507476807, |
|
"logps": -98.06639099121094, |
|
"loss": 0.2491, |
|
"objective": 0.22936613857746124, |
|
"ranking_idealized": 0.574999988079071, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.8374999761581421, |
|
"regularize": 0.22936613857746124, |
|
"step": 280 |
|
}, |
|
{ |
|
"dpo_loss": 0.22401383519172668, |
|
"dpo_wo_beta": -0.5180224776268005, |
|
"epoch": 1.615493623051488, |
|
"grad_norm": 11.270657822712987, |
|
"learning_rate": 2.5909207852394363e-06, |
|
"logits": -1.9585484266281128, |
|
"logps": -100.70836639404297, |
|
"loss": 0.2348, |
|
"objective": 0.22401383519172668, |
|
"ranking_idealized": 0.5874999761581421, |
|
"ranking_idealized_expo": 0.512499988079071, |
|
"ranking_simple": 0.8291666507720947, |
|
"regularize": 0.22401383519172668, |
|
"step": 285 |
|
}, |
|
{ |
|
"dpo_loss": 0.2646006941795349, |
|
"dpo_wo_beta": -0.7763135433197021, |
|
"epoch": 1.643835616438356, |
|
"grad_norm": 10.585292794409252, |
|
"learning_rate": 2.508267334019988e-06, |
|
"logits": -1.9566444158554077, |
|
"logps": -97.0122299194336, |
|
"loss": 0.2532, |
|
"objective": 0.2646006941795349, |
|
"ranking_idealized": 0.612500011920929, |
|
"ranking_idealized_expo": 0.5625, |
|
"ranking_simple": 0.8041666746139526, |
|
"regularize": 0.2646006941795349, |
|
"step": 290 |
|
}, |
|
{ |
|
"dpo_loss": 0.23113909363746643, |
|
"dpo_wo_beta": -0.6497251987457275, |
|
"epoch": 1.6721776098252243, |
|
"grad_norm": 11.90240881956814, |
|
"learning_rate": 2.4256048425921693e-06, |
|
"logits": -1.8574607372283936, |
|
"logps": -94.91531372070312, |
|
"loss": 0.2476, |
|
"objective": 0.23113909363746643, |
|
"ranking_idealized": 0.5791666507720947, |
|
"ranking_idealized_expo": 0.4749999940395355, |
|
"ranking_simple": 0.8333333134651184, |
|
"regularize": 0.23113909363746643, |
|
"step": 295 |
|
}, |
|
{ |
|
"dpo_loss": 0.22116926312446594, |
|
"dpo_wo_beta": -0.6268281936645508, |
|
"epoch": 1.7005196032120926, |
|
"grad_norm": 11.745161783871675, |
|
"learning_rate": 2.3430237011767166e-06, |
|
"logits": -1.895004153251648, |
|
"logps": -97.79885864257812, |
|
"loss": 0.2266, |
|
"objective": 0.22116926312446594, |
|
"ranking_idealized": 0.6583333611488342, |
|
"ranking_idealized_expo": 0.574999988079071, |
|
"ranking_simple": 0.8333333134651184, |
|
"regularize": 0.22116926312446594, |
|
"step": 300 |
|
}, |
|
{ |
|
"dpo_loss": 0.24756571650505066, |
|
"dpo_wo_beta": -0.9131773114204407, |
|
"epoch": 1.7288615965989607, |
|
"grad_norm": 12.299641904512029, |
|
"learning_rate": 2.2606142110393248e-06, |
|
"logits": -1.8061485290527344, |
|
"logps": -96.69060516357422, |
|
"loss": 0.2379, |
|
"objective": 0.24756571650505066, |
|
"ranking_idealized": 0.5625, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.8291666507720947, |
|
"regularize": 0.24756571650505066, |
|
"step": 305 |
|
}, |
|
{ |
|
"dpo_loss": 0.2321903556585312, |
|
"dpo_wo_beta": -0.6867564916610718, |
|
"epoch": 1.7572035899858292, |
|
"grad_norm": 13.489735935272718, |
|
"learning_rate": 2.1784664857475356e-06, |
|
"logits": -1.8388514518737793, |
|
"logps": -95.04447937011719, |
|
"loss": 0.2456, |
|
"objective": 0.2321903556585312, |
|
"ranking_idealized": 0.6083333492279053, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.824999988079071, |
|
"regularize": 0.2321903556585312, |
|
"step": 310 |
|
}, |
|
{ |
|
"dpo_loss": 0.2901044189929962, |
|
"dpo_wo_beta": -1.1286156177520752, |
|
"epoch": 1.7855455833726972, |
|
"grad_norm": 10.887596324980125, |
|
"learning_rate": 2.096670352632873e-06, |
|
"logits": -1.75984525680542, |
|
"logps": -94.63612365722656, |
|
"loss": 0.2571, |
|
"objective": 0.2901044189929962, |
|
"ranking_idealized": 0.6041666865348816, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.8083333373069763, |
|
"regularize": 0.2901044189929962, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 1.8025507794048181, |
|
"eval_dpo_loss": 0.7664583325386047, |
|
"eval_dpo_wo_beta": -4.09501838684082, |
|
"eval_logits": -1.9888346195220947, |
|
"eval_logps": -102.3712158203125, |
|
"eval_loss": 0.7400712966918945, |
|
"eval_objective": 0.7664583325386047, |
|
"eval_ranking_idealized": 0.5888429880142212, |
|
"eval_ranking_idealized_expo": 0.5092975497245789, |
|
"eval_ranking_simple": 0.53925621509552, |
|
"eval_regularize": 0.7664583325386047, |
|
"eval_runtime": 210.274, |
|
"eval_samples_per_second": 27.535, |
|
"eval_steps_per_second": 1.151, |
|
"step": 318 |
|
}, |
|
{ |
|
"dpo_loss": 0.2219768464565277, |
|
"dpo_wo_beta": -0.47742757201194763, |
|
"epoch": 1.8138875767595655, |
|
"grad_norm": 11.029480506309918, |
|
"learning_rate": 2.01531525456598e-06, |
|
"logits": -1.9175788164138794, |
|
"logps": -99.74655151367188, |
|
"loss": 0.2404, |
|
"objective": 0.2219768464565277, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.824999988079071, |
|
"regularize": 0.2219768464565277, |
|
"step": 320 |
|
}, |
|
{ |
|
"dpo_loss": 0.24908211827278137, |
|
"dpo_wo_beta": -0.8014059066772461, |
|
"epoch": 1.8422295701464337, |
|
"grad_norm": 12.92850322071669, |
|
"learning_rate": 1.93449015215215e-06, |
|
"logits": -2.0084919929504395, |
|
"logps": -101.09780883789062, |
|
"loss": 0.2586, |
|
"objective": 0.24908211827278137, |
|
"ranking_idealized": 0.6333333253860474, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.8333333134651184, |
|
"regularize": 0.24908211827278137, |
|
"step": 325 |
|
}, |
|
{ |
|
"dpo_loss": 0.1984507441520691, |
|
"dpo_wo_beta": -0.3766098618507385, |
|
"epoch": 1.8705715635333018, |
|
"grad_norm": 10.415606016359964, |
|
"learning_rate": 1.8542834264542091e-06, |
|
"logits": -1.851909875869751, |
|
"logps": -94.5366439819336, |
|
"loss": 0.2496, |
|
"objective": 0.1984507441520691, |
|
"ranking_idealized": 0.574999988079071, |
|
"ranking_idealized_expo": 0.4791666567325592, |
|
"ranking_simple": 0.8291666507720947, |
|
"regularize": 0.1984507441520691, |
|
"step": 330 |
|
}, |
|
{ |
|
"dpo_loss": 0.26707762479782104, |
|
"dpo_wo_beta": -0.9339324831962585, |
|
"epoch": 1.89891355692017, |
|
"grad_norm": 10.078352873471246, |
|
"learning_rate": 1.7747827823491253e-06, |
|
"logits": -1.9827288389205933, |
|
"logps": -94.26249694824219, |
|
"loss": 0.2463, |
|
"objective": 0.26707762479782104, |
|
"ranking_idealized": 0.5958333611488342, |
|
"ranking_idealized_expo": 0.4958333373069763, |
|
"ranking_simple": 0.8125, |
|
"regularize": 0.26707762479782104, |
|
"step": 335 |
|
}, |
|
{ |
|
"dpo_loss": 0.2447831928730011, |
|
"dpo_wo_beta": -0.7387041449546814, |
|
"epoch": 1.9272555503070383, |
|
"grad_norm": 10.88136655004607, |
|
"learning_rate": 1.6960751526240122e-06, |
|
"logits": -1.9671465158462524, |
|
"logps": -98.63937377929688, |
|
"loss": 0.2399, |
|
"objective": 0.2447831928730011, |
|
"ranking_idealized": 0.6416666507720947, |
|
"ranking_idealized_expo": 0.5416666865348816, |
|
"ranking_simple": 0.8583333492279053, |
|
"regularize": 0.2447831928730011, |
|
"step": 340 |
|
}, |
|
{ |
|
"dpo_loss": 0.2123527079820633, |
|
"dpo_wo_beta": -0.5544185638427734, |
|
"epoch": 1.9555975436939064, |
|
"grad_norm": 11.18260747105762, |
|
"learning_rate": 1.6182466029163974e-06, |
|
"logits": -1.9572845697402954, |
|
"logps": -100.18721008300781, |
|
"loss": 0.2211, |
|
"objective": 0.2123527079820633, |
|
"ranking_idealized": 0.574999988079071, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.8208333253860474, |
|
"regularize": 0.2123527079820633, |
|
"step": 345 |
|
}, |
|
{ |
|
"dpo_loss": 0.2570362389087677, |
|
"dpo_wo_beta": -0.7474013566970825, |
|
"epoch": 1.9839395370807746, |
|
"grad_norm": 11.061918116138507, |
|
"learning_rate": 1.541382237602721e-06, |
|
"logits": -1.8960832357406616, |
|
"logps": -101.65901947021484, |
|
"loss": 0.2316, |
|
"objective": 0.2570362389087677, |
|
"ranking_idealized": 0.5791666507720947, |
|
"ranking_idealized_expo": 0.48750001192092896, |
|
"ranking_simple": 0.7916666865348816, |
|
"regularize": 0.2570362389087677, |
|
"step": 350 |
|
}, |
|
{ |
|
"dpo_loss": 0.19961656630039215, |
|
"dpo_wo_beta": -0.5642960667610168, |
|
"epoch": 2.012281530467643, |
|
"grad_norm": 7.569515164252156, |
|
"learning_rate": 1.465566106737942e-06, |
|
"logits": -1.8380100727081299, |
|
"logps": -102.71571350097656, |
|
"loss": 0.2103, |
|
"objective": 0.19961656630039215, |
|
"ranking_idealized": 0.5958333611488342, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.8374999761581421, |
|
"regularize": 0.19961656630039215, |
|
"step": 355 |
|
}, |
|
{ |
|
"dpo_loss": 0.11018560826778412, |
|
"dpo_wo_beta": -0.12253165245056152, |
|
"epoch": 2.040623523854511, |
|
"grad_norm": 6.632276986432463, |
|
"learning_rate": 1.3908811141480408e-06, |
|
"logits": -1.867693543434143, |
|
"logps": -103.06665802001953, |
|
"loss": 0.118, |
|
"objective": 0.11018560826778412, |
|
"ranking_idealized": 0.6625000238418579, |
|
"ranking_idealized_expo": 0.5375000238418579, |
|
"ranking_simple": 0.9041666388511658, |
|
"regularize": 0.11018560826778412, |
|
"step": 360 |
|
}, |
|
{ |
|
"dpo_loss": 0.12077057361602783, |
|
"dpo_wo_beta": -0.197490856051445, |
|
"epoch": 2.0689655172413794, |
|
"grad_norm": 10.213186193965676, |
|
"learning_rate": 1.3174089267758983e-06, |
|
"logits": -1.8255099058151245, |
|
"logps": -110.3724136352539, |
|
"loss": 0.118, |
|
"objective": 0.12077057361602783, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.8666666746139526, |
|
"regularize": 0.12077057361602783, |
|
"step": 365 |
|
}, |
|
{ |
|
"dpo_loss": 0.1337815225124359, |
|
"dpo_wo_beta": -0.27523547410964966, |
|
"epoch": 2.0973075106282475, |
|
"grad_norm": 9.926730675582434, |
|
"learning_rate": 1.245229885379699e-06, |
|
"logits": -1.7588540315628052, |
|
"logps": -111.99506378173828, |
|
"loss": 0.1227, |
|
"objective": 0.1337815225124359, |
|
"ranking_idealized": 0.5708333253860474, |
|
"ranking_idealized_expo": 0.4791666567325592, |
|
"ranking_simple": 0.8916666507720947, |
|
"regularize": 0.1337815225124359, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 2.1029759093056213, |
|
"eval_dpo_loss": 0.9223728179931641, |
|
"eval_dpo_wo_beta": -6.4510064125061035, |
|
"eval_logits": -1.8644566535949707, |
|
"eval_logps": -122.00161743164062, |
|
"eval_loss": 0.8844180107116699, |
|
"eval_objective": 0.9223728179931641, |
|
"eval_ranking_idealized": 0.5888429880142212, |
|
"eval_ranking_idealized_expo": 0.5092975497245789, |
|
"eval_ranking_simple": 0.5423553586006165, |
|
"eval_regularize": 0.9223728179931641, |
|
"eval_runtime": 210.7356, |
|
"eval_samples_per_second": 27.475, |
|
"eval_steps_per_second": 1.148, |
|
"step": 371 |
|
}, |
|
{ |
|
"dpo_loss": 0.10664375871419907, |
|
"dpo_wo_beta": -0.2532973289489746, |
|
"epoch": 2.1256495040151155, |
|
"grad_norm": 9.740007111179482, |
|
"learning_rate": 1.1744229166814889e-06, |
|
"logits": -1.696647047996521, |
|
"logps": -118.39366149902344, |
|
"loss": 0.1103, |
|
"objective": 0.10664375871419907, |
|
"ranking_idealized": 0.675000011920929, |
|
"ranking_idealized_expo": 0.5916666388511658, |
|
"ranking_simple": 0.925000011920929, |
|
"regularize": 0.10664375871419907, |
|
"step": 375 |
|
}, |
|
{ |
|
"dpo_loss": 0.12854978442192078, |
|
"dpo_wo_beta": -0.27664583921432495, |
|
"epoch": 2.153991497401984, |
|
"grad_norm": 9.699256456859702, |
|
"learning_rate": 1.1050654470619602e-06, |
|
"logits": -1.700494647026062, |
|
"logps": -114.1063232421875, |
|
"loss": 0.1208, |
|
"objective": 0.12854978442192078, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.8999999761581421, |
|
"regularize": 0.12854978442192078, |
|
"step": 380 |
|
}, |
|
{ |
|
"dpo_loss": 0.10418140888214111, |
|
"dpo_wo_beta": -0.09889766573905945, |
|
"epoch": 2.182333490788852, |
|
"grad_norm": 9.620361843085416, |
|
"learning_rate": 1.0372333178958462e-06, |
|
"logits": -1.8633235692977905, |
|
"logps": -110.55794525146484, |
|
"loss": 0.1244, |
|
"objective": 0.10418140888214111, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.9125000238418579, |
|
"regularize": 0.10418140888214111, |
|
"step": 385 |
|
}, |
|
{ |
|
"dpo_loss": 0.12462247163057327, |
|
"dpo_wo_beta": -0.2658768594264984, |
|
"epoch": 2.21067548417572, |
|
"grad_norm": 11.000881222201947, |
|
"learning_rate": 9.710007026204896e-07, |
|
"logits": -1.7877620458602905, |
|
"logps": -112.08268737792969, |
|
"loss": 0.1204, |
|
"objective": 0.12462247163057327, |
|
"ranking_idealized": 0.5249999761581421, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.8833333253860474, |
|
"regularize": 0.12462247163057327, |
|
"step": 390 |
|
}, |
|
{ |
|
"dpo_loss": 0.11183874309062958, |
|
"dpo_wo_beta": -0.3540593981742859, |
|
"epoch": 2.2390174775625886, |
|
"grad_norm": 8.717110295390793, |
|
"learning_rate": 9.064400256282757e-07, |
|
"logits": -1.8010636568069458, |
|
"logps": -110.48490142822266, |
|
"loss": 0.1248, |
|
"objective": 0.11183874309062958, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.9041666388511658, |
|
"regularize": 0.11183874309062958, |
|
"step": 395 |
|
}, |
|
{ |
|
"dpo_loss": 0.12893003225326538, |
|
"dpo_wo_beta": -0.3680768311023712, |
|
"epoch": 2.2673594709494567, |
|
"grad_norm": 9.562073048936949, |
|
"learning_rate": 8.436218830716259e-07, |
|
"logits": -1.8909595012664795, |
|
"logps": -111.70219421386719, |
|
"loss": 0.1193, |
|
"objective": 0.12893003225326538, |
|
"ranking_idealized": 0.6625000238418579, |
|
"ranking_idealized_expo": 0.5708333253860474, |
|
"ranking_simple": 0.9166666865348816, |
|
"regularize": 0.12893003225326538, |
|
"step": 400 |
|
}, |
|
{ |
|
"dpo_loss": 0.13196416199207306, |
|
"dpo_wo_beta": -0.17852090299129486, |
|
"epoch": 2.295701464336325, |
|
"grad_norm": 9.166021194752298, |
|
"learning_rate": 7.826149656671386e-07, |
|
"logits": -1.9320632219314575, |
|
"logps": -108.1246566772461, |
|
"loss": 0.1267, |
|
"objective": 0.13196416199207306, |
|
"ranking_idealized": 0.6416666507720947, |
|
"ranking_idealized_expo": 0.5583333373069763, |
|
"ranking_simple": 0.8958333134651184, |
|
"regularize": 0.13196416199207306, |
|
"step": 405 |
|
}, |
|
{ |
|
"dpo_loss": 0.11071384698152542, |
|
"dpo_wo_beta": -0.1424117088317871, |
|
"epoch": 2.324043457723193, |
|
"grad_norm": 8.918983804471582, |
|
"learning_rate": 7.234859835833022e-07, |
|
"logits": -1.8304682970046997, |
|
"logps": -111.2301025390625, |
|
"loss": 0.112, |
|
"objective": 0.11071384698152542, |
|
"ranking_idealized": 0.5708333253860474, |
|
"ranking_idealized_expo": 0.4791666567325592, |
|
"ranking_simple": 0.9041666388511658, |
|
"regularize": 0.11071384698152542, |
|
"step": 410 |
|
}, |
|
{ |
|
"dpo_loss": 0.1223960742354393, |
|
"dpo_wo_beta": -0.1956464648246765, |
|
"epoch": 2.3523854511100613, |
|
"grad_norm": 9.386393866562546, |
|
"learning_rate": 6.662995934939007e-07, |
|
"logits": -1.8708041906356812, |
|
"logps": -111.06449890136719, |
|
"loss": 0.1155, |
|
"objective": 0.1223960742354393, |
|
"ranking_idealized": 0.5958333611488342, |
|
"ranking_idealized_expo": 0.5291666388511658, |
|
"ranking_simple": 0.9083333611488342, |
|
"regularize": 0.1223960742354393, |
|
"step": 415 |
|
}, |
|
{ |
|
"dpo_loss": 0.12930770218372345, |
|
"dpo_wo_beta": -0.21560731530189514, |
|
"epoch": 2.3807274444969297, |
|
"grad_norm": 11.0131183307354, |
|
"learning_rate": 6.111183278768956e-07, |
|
"logits": -1.860797643661499, |
|
"logps": -113.08780670166016, |
|
"loss": 0.133, |
|
"objective": 0.12930770218372345, |
|
"ranking_idealized": 0.5666666626930237, |
|
"ranking_idealized_expo": 0.4749999940395355, |
|
"ranking_simple": 0.9208333492279053, |
|
"regularize": 0.12930770218372345, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 2.403401039206424, |
|
"eval_dpo_loss": 0.8785684108734131, |
|
"eval_dpo_wo_beta": -5.887755870819092, |
|
"eval_logits": -2.0276894569396973, |
|
"eval_logps": -117.1216812133789, |
|
"eval_loss": 0.8447906374931335, |
|
"eval_objective": 0.8785684108734131, |
|
"eval_ranking_idealized": 0.5888429880142212, |
|
"eval_ranking_idealized_expo": 0.5092975497245789, |
|
"eval_ranking_simple": 0.5413222908973694, |
|
"eval_regularize": 0.8785684108734131, |
|
"eval_runtime": 209.8564, |
|
"eval_samples_per_second": 27.59, |
|
"eval_steps_per_second": 1.153, |
|
"step": 424 |
|
}, |
|
{ |
|
"dpo_loss": 0.117975153028965, |
|
"dpo_wo_beta": -0.1884605884552002, |
|
"epoch": 2.413793103448276, |
|
"grad_norm": 11.036168833651558, |
|
"learning_rate": 5.580025266360764e-07, |
|
"logits": -1.7822004556655884, |
|
"logps": -114.43038177490234, |
|
"loss": 0.1465, |
|
"objective": 0.117975153028965, |
|
"ranking_idealized": 0.6666666865348816, |
|
"ranking_idealized_expo": 0.5625, |
|
"ranking_simple": 0.9375, |
|
"regularize": 0.117975153028965, |
|
"step": 425 |
|
}, |
|
{ |
|
"dpo_loss": 0.1465020477771759, |
|
"dpo_wo_beta": -0.2595965266227722, |
|
"epoch": 2.442135096835144, |
|
"grad_norm": 10.595070818850646, |
|
"learning_rate": 5.070102711202606e-07, |
|
"logits": -1.8692681789398193, |
|
"logps": -110.2762680053711, |
|
"loss": 0.1276, |
|
"objective": 0.1465020477771759, |
|
"ranking_idealized": 0.5916666388511658, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.8958333134651184, |
|
"regularize": 0.1465020477771759, |
|
"step": 430 |
|
}, |
|
{ |
|
"dpo_loss": 0.09775053709745407, |
|
"dpo_wo_beta": -0.12755917012691498, |
|
"epoch": 2.4704770902220123, |
|
"grad_norm": 9.393206692367766, |
|
"learning_rate": 4.581973206121948e-07, |
|
"logits": -1.8968538045883179, |
|
"logps": -112.28767395019531, |
|
"loss": 0.1175, |
|
"objective": 0.09775053709745407, |
|
"ranking_idealized": 0.5874999761581421, |
|
"ranking_idealized_expo": 0.512499988079071, |
|
"ranking_simple": 0.9083333611488342, |
|
"regularize": 0.09775053709745407, |
|
"step": 435 |
|
}, |
|
{ |
|
"dpo_loss": 0.14228057861328125, |
|
"dpo_wo_beta": -0.3639788329601288, |
|
"epoch": 2.4988190836088804, |
|
"grad_norm": 8.020134663378592, |
|
"learning_rate": 4.116170513565942e-07, |
|
"logits": -1.8666160106658936, |
|
"logps": -109.18843078613281, |
|
"loss": 0.1167, |
|
"objective": 0.14228057861328125, |
|
"ranking_idealized": 0.574999988079071, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.875, |
|
"regularize": 0.14228057861328125, |
|
"step": 440 |
|
}, |
|
{ |
|
"dpo_loss": 0.13583588600158691, |
|
"dpo_wo_beta": -0.2074100226163864, |
|
"epoch": 2.527161076995749, |
|
"grad_norm": 9.224367796824264, |
|
"learning_rate": 3.6732039819400686e-07, |
|
"logits": -1.8071045875549316, |
|
"logps": -107.2675552368164, |
|
"loss": 0.1319, |
|
"objective": 0.13583588600158691, |
|
"ranking_idealized": 0.5625, |
|
"ranking_idealized_expo": 0.49166667461395264, |
|
"ranking_simple": 0.8791666626930237, |
|
"regularize": 0.13583588600158691, |
|
"step": 445 |
|
}, |
|
{ |
|
"dpo_loss": 0.17114870250225067, |
|
"dpo_wo_beta": -0.43270742893218994, |
|
"epoch": 2.555503070382617, |
|
"grad_norm": 11.265861710797749, |
|
"learning_rate": 3.253557988643072e-07, |
|
"logits": -1.9256045818328857, |
|
"logps": -111.20384216308594, |
|
"loss": 0.1288, |
|
"objective": 0.17114870250225067, |
|
"ranking_idealized": 0.5874999761581421, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.862500011920929, |
|
"regularize": 0.17114870250225067, |
|
"step": 450 |
|
}, |
|
{ |
|
"dpo_loss": 0.10827689617872238, |
|
"dpo_wo_beta": -0.1751028150320053, |
|
"epoch": 2.583845063769485, |
|
"grad_norm": 9.605136286662574, |
|
"learning_rate": 2.8576914104074425e-07, |
|
"logits": -1.9289051294326782, |
|
"logps": -109.37706756591797, |
|
"loss": 0.1168, |
|
"objective": 0.10827689617872238, |
|
"ranking_idealized": 0.5958333611488342, |
|
"ranking_idealized_expo": 0.4958333373069763, |
|
"ranking_simple": 0.9125000238418579, |
|
"regularize": 0.10827689617872238, |
|
"step": 455 |
|
}, |
|
{ |
|
"dpo_loss": 0.11124877631664276, |
|
"dpo_wo_beta": -0.28054580092430115, |
|
"epoch": 2.6121870571563535, |
|
"grad_norm": 9.957466667064367, |
|
"learning_rate": 2.486037121524448e-07, |
|
"logits": -1.93342924118042, |
|
"logps": -113.2356948852539, |
|
"loss": 0.1169, |
|
"objective": 0.11124877631664276, |
|
"ranking_idealized": 0.6291666626930237, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.9375, |
|
"regularize": 0.11124877631664276, |
|
"step": 460 |
|
}, |
|
{ |
|
"dpo_loss": 0.12714476883411407, |
|
"dpo_wo_beta": -0.22146105766296387, |
|
"epoch": 2.6405290505432215, |
|
"grad_norm": 10.04326854921629, |
|
"learning_rate": 2.13900152050239e-07, |
|
"logits": -1.8874350786209106, |
|
"logps": -108.94982147216797, |
|
"loss": 0.1189, |
|
"objective": 0.12714476883411407, |
|
"ranking_idealized": 0.5458333492279053, |
|
"ranking_idealized_expo": 0.4791666567325592, |
|
"ranking_simple": 0.862500011920929, |
|
"regularize": 0.12714476883411407, |
|
"step": 465 |
|
}, |
|
{ |
|
"dpo_loss": 0.12151040881872177, |
|
"dpo_wo_beta": -0.26416900753974915, |
|
"epoch": 2.66887104393009, |
|
"grad_norm": 8.777820527737605, |
|
"learning_rate": 1.8169640856758652e-07, |
|
"logits": -1.9314534664154053, |
|
"logps": -112.75170135498047, |
|
"loss": 0.1254, |
|
"objective": 0.12151040881872177, |
|
"ranking_idealized": 0.6291666626930237, |
|
"ranking_idealized_expo": 0.5541666746139526, |
|
"ranking_simple": 0.9083333611488342, |
|
"regularize": 0.12151040881872177, |
|
"step": 470 |
|
}, |
|
{ |
|
"dpo_loss": 0.12749101221561432, |
|
"dpo_wo_beta": -0.2816121280193329, |
|
"epoch": 2.697213037316958, |
|
"grad_norm": 9.221778751171357, |
|
"learning_rate": 1.5202769602517514e-07, |
|
"logits": -1.8307260274887085, |
|
"logps": -109.39693450927734, |
|
"loss": 0.1211, |
|
"objective": 0.12749101221561432, |
|
"ranking_idealized": 0.6041666865348816, |
|
"ranking_idealized_expo": 0.4958333373069763, |
|
"ranking_simple": 0.8999999761581421, |
|
"regularize": 0.12749101221561432, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 2.708549834671705, |
|
"eval_dpo_loss": 0.8738968372344971, |
|
"eval_dpo_wo_beta": -5.815241813659668, |
|
"eval_logits": -2.0271613597869873, |
|
"eval_logps": -116.42301177978516, |
|
"eval_loss": 0.8371492624282837, |
|
"eval_objective": 0.8738968372344971, |
|
"eval_ranking_idealized": 0.5888429880142212, |
|
"eval_ranking_idealized_expo": 0.5092975497245789, |
|
"eval_ranking_simple": 0.5402892827987671, |
|
"eval_regularize": 0.8738968372344971, |
|
"eval_runtime": 211.9437, |
|
"eval_samples_per_second": 27.319, |
|
"eval_steps_per_second": 1.142, |
|
"step": 477 |
|
}, |
|
{ |
|
"dpo_loss": 0.13781045377254486, |
|
"dpo_wo_beta": -0.2485995590686798, |
|
"epoch": 2.725555030703826, |
|
"grad_norm": 10.971551462649595, |
|
"learning_rate": 1.2492645672457838e-07, |
|
"logits": -1.9437103271484375, |
|
"logps": -108.93817901611328, |
|
"loss": 0.1267, |
|
"objective": 0.13781045377254486, |
|
"ranking_idealized": 0.5916666388511658, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.8999999761581421, |
|
"regularize": 0.13781045377254486, |
|
"step": 480 |
|
}, |
|
{ |
|
"dpo_loss": 0.11082082241773605, |
|
"dpo_wo_beta": -0.10876031965017319, |
|
"epoch": 2.753897024090694, |
|
"grad_norm": 10.884940640535042, |
|
"learning_rate": 1.004223254730749e-07, |
|
"logits": -1.7556992769241333, |
|
"logps": -114.1142807006836, |
|
"loss": 0.1222, |
|
"objective": 0.11082082241773605, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.9041666388511658, |
|
"regularize": 0.11082082241773605, |
|
"step": 485 |
|
}, |
|
{ |
|
"dpo_loss": 0.09154360741376877, |
|
"dpo_wo_beta": -0.05899694189429283, |
|
"epoch": 2.7822390174775626, |
|
"grad_norm": 11.015982469457516, |
|
"learning_rate": 7.854209717842231e-08, |
|
"logits": -1.8848822116851807, |
|
"logps": -110.15470886230469, |
|
"loss": 0.1058, |
|
"objective": 0.09154360741376877, |
|
"ranking_idealized": 0.5874999761581421, |
|
"ranking_idealized_expo": 0.47083333134651184, |
|
"ranking_simple": 0.9333333373069763, |
|
"regularize": 0.09154360741376877, |
|
"step": 490 |
|
}, |
|
{ |
|
"dpo_loss": 0.10964310169219971, |
|
"dpo_wo_beta": -0.07648710906505585, |
|
"epoch": 2.8105810108644307, |
|
"grad_norm": 10.079416267782939, |
|
"learning_rate": 5.930969754901844e-08, |
|
"logits": -1.8575230836868286, |
|
"logps": -108.52234649658203, |
|
"loss": 0.1192, |
|
"objective": 0.10964310169219971, |
|
"ranking_idealized": 0.5583333373069763, |
|
"ranking_idealized_expo": 0.4749999940395355, |
|
"ranking_simple": 0.8666666746139526, |
|
"regularize": 0.10964310169219971, |
|
"step": 495 |
|
}, |
|
{ |
|
"dpo_loss": 0.09479068219661713, |
|
"dpo_wo_beta": -0.03411731496453285, |
|
"epoch": 2.838923004251299, |
|
"grad_norm": 9.84080114767598, |
|
"learning_rate": 4.2746156931490756e-08, |
|
"logits": -1.8439643383026123, |
|
"logps": -109.77281188964844, |
|
"loss": 0.1213, |
|
"objective": 0.09479068219661713, |
|
"ranking_idealized": 0.6208333373069763, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.925000011920929, |
|
"regularize": 0.09479068219661713, |
|
"step": 500 |
|
}, |
|
{ |
|
"dpo_loss": 0.12725140154361725, |
|
"dpo_wo_beta": -0.18973813951015472, |
|
"epoch": 2.8672649976381672, |
|
"grad_norm": 9.973754192936779, |
|
"learning_rate": 2.8869587314321324e-08, |
|
"logits": -1.8574442863464355, |
|
"logps": -110.32710266113281, |
|
"loss": 0.132, |
|
"objective": 0.12725140154361725, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.8999999761581421, |
|
"regularize": 0.12725140154361725, |
|
"step": 505 |
|
}, |
|
{ |
|
"dpo_loss": 0.10469380766153336, |
|
"dpo_wo_beta": -0.1985001415014267, |
|
"epoch": 2.8956069910250353, |
|
"grad_norm": 8.936464383287202, |
|
"learning_rate": 1.7695162522652352e-08, |
|
"logits": -1.8629390001296997, |
|
"logps": -113.56767272949219, |
|
"loss": 0.1218, |
|
"objective": 0.10469380766153336, |
|
"ranking_idealized": 0.6041666865348816, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.8916666507720947, |
|
"regularize": 0.10469380766153336, |
|
"step": 510 |
|
}, |
|
{ |
|
"dpo_loss": 0.11260154843330383, |
|
"dpo_wo_beta": -0.15691885352134705, |
|
"epoch": 2.9239489844119038, |
|
"grad_norm": 9.442300088571939, |
|
"learning_rate": 9.235101625932885e-09, |
|
"logits": -1.946829915046692, |
|
"logps": -108.54016876220703, |
|
"loss": 0.1258, |
|
"objective": 0.11260154843330383, |
|
"ranking_idealized": 0.5791666507720947, |
|
"ranking_idealized_expo": 0.48750001192092896, |
|
"ranking_simple": 0.925000011920929, |
|
"regularize": 0.11260154843330383, |
|
"step": 515 |
|
}, |
|
{ |
|
"dpo_loss": 0.12230218201875687, |
|
"dpo_wo_beta": -0.10489177703857422, |
|
"epoch": 2.952290977798772, |
|
"grad_norm": 9.279898048101137, |
|
"learning_rate": 3.4986555765434415e-09, |
|
"logits": -1.8482831716537476, |
|
"logps": -114.20655059814453, |
|
"loss": 0.1228, |
|
"objective": 0.12230218201875687, |
|
"ranking_idealized": 0.6291666626930237, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.8708333373069763, |
|
"regularize": 0.12230218201875687, |
|
"step": 520 |
|
}, |
|
{ |
|
"dpo_loss": 0.13335375487804413, |
|
"dpo_wo_beta": -0.35261282324790955, |
|
"epoch": 2.9806329711856403, |
|
"grad_norm": 11.094809681697281, |
|
"learning_rate": 4.920970940180958e-10, |
|
"logits": -1.876869797706604, |
|
"logps": -111.03084564208984, |
|
"loss": 0.1235, |
|
"objective": 0.13335375487804413, |
|
"ranking_idealized": 0.6291666626930237, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.8958333134651184, |
|
"regularize": 0.13335375487804413, |
|
"step": 525 |
|
}, |
|
{ |
|
"dpo_loss": 0.08052093535661697, |
|
"dpo_wo_beta": -0.004379949066787958, |
|
"epoch": 1.5044874822862542, |
|
"grad_norm": 9.015683090612498, |
|
"learning_rate": 2.9196388040863695e-06, |
|
"logits": -1.833287239074707, |
|
"logps": -114.10733795166016, |
|
"loss": 0.0858, |
|
"objective": 0.08052093535661697, |
|
"ranking_idealized": 0.6666666865348816, |
|
"ranking_idealized_expo": 0.5416666865348816, |
|
"ranking_simple": 0.9375, |
|
"regularize": 0.08052093535661697, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 1.5044874822862542, |
|
"eval_dpo_loss": 0.8753401041030884, |
|
"eval_dpo_wo_beta": -5.922874450683594, |
|
"eval_logits": -2.4529590606689453, |
|
"eval_logps": -118.25288391113281, |
|
"eval_loss": 0.8505071997642517, |
|
"eval_objective": 0.8753401041030884, |
|
"eval_ranking_idealized": 0.6045548915863037, |
|
"eval_ranking_idealized_expo": 0.5279502868652344, |
|
"eval_ranking_simple": 0.5683229565620422, |
|
"eval_regularize": 0.8753401041030884, |
|
"eval_runtime": 344.4459, |
|
"eval_samples_per_second": 16.81, |
|
"eval_steps_per_second": 1.402, |
|
"step": 530 |
|
}, |
|
{ |
|
"dpo_loss": 0.06306228041648865, |
|
"dpo_wo_beta": -0.08629266172647476, |
|
"epoch": 1.5186584789796882, |
|
"grad_norm": 12.634106279395441, |
|
"learning_rate": 2.8788330549198512e-06, |
|
"logits": -1.8371531963348389, |
|
"logps": -115.510009765625, |
|
"loss": 0.0766, |
|
"objective": 0.06306228041648865, |
|
"ranking_idealized": 0.550000011920929, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.9166666865348816, |
|
"regularize": 0.06306228041648865, |
|
"step": 535 |
|
}, |
|
{ |
|
"dpo_loss": 0.13102607429027557, |
|
"dpo_wo_beta": -0.41934680938720703, |
|
"epoch": 1.5328294756731222, |
|
"grad_norm": 14.583212522491863, |
|
"learning_rate": 2.8379237365787426e-06, |
|
"logits": -1.7649121284484863, |
|
"logps": -127.44564819335938, |
|
"loss": 0.0928, |
|
"objective": 0.13102607429027557, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.8999999761581421, |
|
"regularize": 0.13102607429027557, |
|
"step": 540 |
|
}, |
|
{ |
|
"dpo_loss": 0.12010473757982254, |
|
"dpo_wo_beta": -0.2892196476459503, |
|
"epoch": 1.5470004723665565, |
|
"grad_norm": 18.74636858022955, |
|
"learning_rate": 2.7969220332622004e-06, |
|
"logits": -1.7403244972229004, |
|
"logps": -126.89453125, |
|
"loss": 0.1114, |
|
"objective": 0.12010473757982254, |
|
"ranking_idealized": 0.6499999761581421, |
|
"ranking_idealized_expo": 0.550000011920929, |
|
"ranking_simple": 0.925000011920929, |
|
"regularize": 0.12010473757982254, |
|
"step": 545 |
|
}, |
|
{ |
|
"dpo_loss": 0.05550822243094444, |
|
"dpo_wo_beta": -0.0006199590279720724, |
|
"epoch": 1.5611714690599907, |
|
"grad_norm": 16.65748534309405, |
|
"learning_rate": 2.7558391544265127e-06, |
|
"logits": -1.7434070110321045, |
|
"logps": -120.79520416259766, |
|
"loss": 0.0872, |
|
"objective": 0.05550822243094444, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.9750000238418579, |
|
"regularize": 0.05550822243094444, |
|
"step": 550 |
|
}, |
|
{ |
|
"dpo_loss": 0.12240471690893173, |
|
"dpo_wo_beta": -0.22643856704235077, |
|
"epoch": 1.5753424657534247, |
|
"grad_norm": 12.27653222683965, |
|
"learning_rate": 2.714686331720543e-06, |
|
"logits": -1.8163702487945557, |
|
"logps": -123.55280303955078, |
|
"loss": 0.103, |
|
"objective": 0.12240471690893173, |
|
"ranking_idealized": 0.6083333492279053, |
|
"ranking_idealized_expo": 0.5416666865348816, |
|
"ranking_simple": 0.875, |
|
"regularize": 0.12240471690893173, |
|
"step": 555 |
|
}, |
|
{ |
|
"dpo_loss": 0.1260891556739807, |
|
"dpo_wo_beta": -0.384922057390213, |
|
"epoch": 1.5895134624468588, |
|
"grad_norm": 14.054554594902122, |
|
"learning_rate": 2.6734748159151104e-06, |
|
"logits": -1.8347235918045044, |
|
"logps": -115.50324249267578, |
|
"loss": 0.1171, |
|
"objective": 0.1260891556739807, |
|
"ranking_idealized": 0.5333333611488342, |
|
"ranking_idealized_expo": 0.4749999940395355, |
|
"ranking_simple": 0.8916666507720947, |
|
"regularize": 0.1260891556739807, |
|
"step": 560 |
|
}, |
|
{ |
|
"dpo_loss": 0.11428937315940857, |
|
"dpo_wo_beta": -0.142462819814682, |
|
"epoch": 1.6036844591402928, |
|
"grad_norm": 8.888993907970832, |
|
"learning_rate": 2.632215873827142e-06, |
|
"logits": -1.9027162790298462, |
|
"logps": -117.14366912841797, |
|
"loss": 0.1142, |
|
"objective": 0.11428937315940857, |
|
"ranking_idealized": 0.5416666865348816, |
|
"ranking_idealized_expo": 0.4583333432674408, |
|
"ranking_simple": 0.8916666507720947, |
|
"regularize": 0.11428937315940857, |
|
"step": 565 |
|
}, |
|
{ |
|
"dpo_loss": 0.14770367741584778, |
|
"dpo_wo_beta": -0.5861695408821106, |
|
"epoch": 1.6178554558337268, |
|
"grad_norm": 9.396256315509223, |
|
"learning_rate": 2.5909207852394363e-06, |
|
"logits": -2.088587760925293, |
|
"logps": -113.1082992553711, |
|
"loss": 0.1098, |
|
"objective": 0.14770367741584778, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.9083333611488342, |
|
"regularize": 0.14770367741584778, |
|
"step": 570 |
|
}, |
|
{ |
|
"dpo_loss": 0.1413314789533615, |
|
"dpo_wo_beta": -0.3796103298664093, |
|
"epoch": 1.632026452527161, |
|
"grad_norm": 16.325704474226345, |
|
"learning_rate": 2.5496008398168844e-06, |
|
"logits": -1.9472541809082031, |
|
"logps": -117.12679290771484, |
|
"loss": 0.1404, |
|
"objective": 0.1413314789533615, |
|
"ranking_idealized": 0.6583333611488342, |
|
"ranking_idealized_expo": 0.6166666746139526, |
|
"ranking_simple": 0.9166666865348816, |
|
"regularize": 0.1413314789533615, |
|
"step": 575 |
|
}, |
|
{ |
|
"dpo_loss": 0.1715042144060135, |
|
"dpo_wo_beta": -0.48858124017715454, |
|
"epoch": 1.6461974492205953, |
|
"grad_norm": 11.97898567349363, |
|
"learning_rate": 2.508267334019988e-06, |
|
"logits": -1.935112476348877, |
|
"logps": -107.23208618164062, |
|
"loss": 0.1274, |
|
"objective": 0.1715042144060135, |
|
"ranking_idealized": 0.5583333373069763, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.8999999761581421, |
|
"regularize": 0.1715042144060135, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.6547000472366555, |
|
"eval_dpo_loss": 0.8264312148094177, |
|
"eval_dpo_wo_beta": -5.2847466468811035, |
|
"eval_logits": -2.4379913806915283, |
|
"eval_logps": -119.59071350097656, |
|
"eval_loss": 0.808626651763916, |
|
"eval_objective": 0.8264312148094177, |
|
"eval_ranking_idealized": 0.6045548915863037, |
|
"eval_ranking_idealized_expo": 0.5279502868652344, |
|
"eval_ranking_simple": 0.5869565010070801, |
|
"eval_regularize": 0.8264312148094177, |
|
"eval_runtime": 355.6593, |
|
"eval_samples_per_second": 16.28, |
|
"eval_steps_per_second": 1.358, |
|
"step": 583 |
|
}, |
|
{ |
|
"dpo_loss": 0.1341490000486374, |
|
"dpo_wo_beta": -0.5180007815361023, |
|
"epoch": 1.6603684459140293, |
|
"grad_norm": 15.27561747473412, |
|
"learning_rate": 2.46693156801652e-06, |
|
"logits": -1.8945667743682861, |
|
"logps": -110.8204116821289, |
|
"loss": 0.1227, |
|
"objective": 0.1341490000486374, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.9083333611488342, |
|
"regularize": 0.1341490000486374, |
|
"step": 585 |
|
}, |
|
{ |
|
"dpo_loss": 0.13649246096611023, |
|
"dpo_wo_beta": -0.5407892465591431, |
|
"epoch": 1.6745394426074633, |
|
"grad_norm": 8.066407586221112, |
|
"learning_rate": 2.4256048425921693e-06, |
|
"logits": -1.8402847051620483, |
|
"logps": -117.5975112915039, |
|
"loss": 0.1263, |
|
"objective": 0.13649246096611023, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.46666666865348816, |
|
"ranking_simple": 0.9083333611488342, |
|
"regularize": 0.13649246096611023, |
|
"step": 590 |
|
}, |
|
{ |
|
"dpo_loss": 0.14796380698680878, |
|
"dpo_wo_beta": -0.5909832119941711, |
|
"epoch": 1.6887104393008974, |
|
"grad_norm": 17.4246056389601, |
|
"learning_rate": 2.384298456061023e-06, |
|
"logits": -1.8368481397628784, |
|
"logps": -117.76559448242188, |
|
"loss": 0.1544, |
|
"objective": 0.14796380698680878, |
|
"ranking_idealized": 0.675000011920929, |
|
"ranking_idealized_expo": 0.5916666388511658, |
|
"ranking_simple": 0.8916666507720947, |
|
"regularize": 0.14796380698680878, |
|
"step": 595 |
|
}, |
|
{ |
|
"dpo_loss": 0.15043622255325317, |
|
"dpo_wo_beta": -0.6607655882835388, |
|
"epoch": 1.7028814359943316, |
|
"grad_norm": 18.447927668502214, |
|
"learning_rate": 2.3430237011767166e-06, |
|
"logits": -1.839497447013855, |
|
"logps": -112.93590545654297, |
|
"loss": 0.15, |
|
"objective": 0.15043622255325317, |
|
"ranking_idealized": 0.6333333253860474, |
|
"ranking_idealized_expo": 0.550000011920929, |
|
"ranking_simple": 0.875, |
|
"regularize": 0.15043622255325317, |
|
"step": 600 |
|
}, |
|
{ |
|
"dpo_loss": 0.11465544998645782, |
|
"dpo_wo_beta": -0.34068503975868225, |
|
"epoch": 1.7170524326877659, |
|
"grad_norm": 12.561303443006217, |
|
"learning_rate": 2.30179186204511e-06, |
|
"logits": -1.7405670881271362, |
|
"logps": -112.9049072265625, |
|
"loss": 0.1278, |
|
"objective": 0.11465544998645782, |
|
"ranking_idealized": 0.5583333373069763, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.8999999761581421, |
|
"regularize": 0.11465544998645782, |
|
"step": 605 |
|
}, |
|
{ |
|
"dpo_loss": 0.1477740854024887, |
|
"dpo_wo_beta": -0.4079127609729767, |
|
"epoch": 1.7312234293811999, |
|
"grad_norm": 17.631548474702864, |
|
"learning_rate": 2.2606142110393248e-06, |
|
"logits": -1.7865701913833618, |
|
"logps": -109.32853698730469, |
|
"loss": 0.1331, |
|
"objective": 0.1477740854024887, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.875, |
|
"regularize": 0.1477740854024887, |
|
"step": 610 |
|
}, |
|
{ |
|
"dpo_loss": 0.18279190361499786, |
|
"dpo_wo_beta": -0.8598226308822632, |
|
"epoch": 1.745394426074634, |
|
"grad_norm": 10.88386046876842, |
|
"learning_rate": 2.2195020057179897e-06, |
|
"logits": -1.7821184396743774, |
|
"logps": -111.59290313720703, |
|
"loss": 0.136, |
|
"objective": 0.18279190361499786, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.875, |
|
"regularize": 0.18279190361499786, |
|
"step": 615 |
|
}, |
|
{ |
|
"dpo_loss": 0.1030309647321701, |
|
"dpo_wo_beta": -0.05606275424361229, |
|
"epoch": 1.759565422768068, |
|
"grad_norm": 12.949368950505932, |
|
"learning_rate": 2.1784664857475356e-06, |
|
"logits": -1.780458688735962, |
|
"logps": -108.14535522460938, |
|
"loss": 0.1303, |
|
"objective": 0.1030309647321701, |
|
"ranking_idealized": 0.6416666507720947, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.9666666388511658, |
|
"regularize": 0.1030309647321701, |
|
"step": 620 |
|
}, |
|
{ |
|
"dpo_loss": 0.158640518784523, |
|
"dpo_wo_beta": -0.4262932240962982, |
|
"epoch": 1.7737364194615022, |
|
"grad_norm": 15.36682532028932, |
|
"learning_rate": 2.1375188698293855e-06, |
|
"logits": -1.657003402709961, |
|
"logps": -110.36738586425781, |
|
"loss": 0.161, |
|
"objective": 0.158640518784523, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.9083333611488342, |
|
"regularize": 0.158640518784523, |
|
"step": 625 |
|
}, |
|
{ |
|
"dpo_loss": 0.21892648935317993, |
|
"dpo_wo_beta": -0.9036411046981812, |
|
"epoch": 1.7879074161549362, |
|
"grad_norm": 14.959909753098716, |
|
"learning_rate": 2.096670352632873e-06, |
|
"logits": -1.7339377403259277, |
|
"logps": -107.9156723022461, |
|
"loss": 0.1673, |
|
"objective": 0.21892648935317993, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.49166667461395264, |
|
"ranking_simple": 0.7833333611488342, |
|
"regularize": 0.21892648935317993, |
|
"step": 630 |
|
}, |
|
{ |
|
"dpo_loss": 0.14205454289913177, |
|
"dpo_wo_beta": -0.47540512681007385, |
|
"epoch": 1.8020784128483704, |
|
"grad_norm": 22.25702396524442, |
|
"learning_rate": 2.0559321017347286e-06, |
|
"logits": -2.000821352005005, |
|
"logps": -119.52375793457031, |
|
"loss": 0.1614, |
|
"objective": 0.14205454289913177, |
|
"ranking_idealized": 0.675000011920929, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.8999999761581421, |
|
"regularize": 0.14205454289913177, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 1.8049126121870571, |
|
"eval_dpo_loss": 0.8243346214294434, |
|
"eval_dpo_wo_beta": -5.281310081481934, |
|
"eval_logits": -2.4850430488586426, |
|
"eval_logps": -117.85846710205078, |
|
"eval_loss": 0.8209081292152405, |
|
"eval_objective": 0.8243346214294434, |
|
"eval_ranking_idealized": 0.6045548915863037, |
|
"eval_ranking_idealized_expo": 0.5279502868652344, |
|
"eval_ranking_simple": 0.5817805528640747, |
|
"eval_regularize": 0.8243346214294434, |
|
"eval_runtime": 351.818, |
|
"eval_samples_per_second": 16.457, |
|
"eval_steps_per_second": 1.373, |
|
"step": 636 |
|
}, |
|
{ |
|
"dpo_loss": 0.13119691610336304, |
|
"dpo_wo_beta": -0.36706313490867615, |
|
"epoch": 1.8162494095418045, |
|
"grad_norm": 20.631621209893076, |
|
"learning_rate": 2.01531525456598e-06, |
|
"logits": -1.8479942083358765, |
|
"logps": -111.6270523071289, |
|
"loss": 0.1417, |
|
"objective": 0.13119691610336304, |
|
"ranking_idealized": 0.5249999761581421, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.8583333492279053, |
|
"regularize": 0.13119691610336304, |
|
"step": 640 |
|
}, |
|
{ |
|
"dpo_loss": 0.1571781039237976, |
|
"dpo_wo_beta": -0.5080724954605103, |
|
"epoch": 1.8304204062352385, |
|
"grad_norm": 20.508347622723388, |
|
"learning_rate": 1.974830915367086e-06, |
|
"logits": -1.9629262685775757, |
|
"logps": -122.33467102050781, |
|
"loss": 0.1732, |
|
"objective": 0.1571781039237976, |
|
"ranking_idealized": 0.6333333253860474, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.875, |
|
"regularize": 0.1571781039237976, |
|
"step": 645 |
|
}, |
|
{ |
|
"dpo_loss": 0.18711452186107635, |
|
"dpo_wo_beta": -0.5334885120391846, |
|
"epoch": 1.8445914029286725, |
|
"grad_norm": 17.78377983289135, |
|
"learning_rate": 1.93449015215215e-06, |
|
"logits": -1.960700511932373, |
|
"logps": -116.45585632324219, |
|
"loss": 0.1873, |
|
"objective": 0.18711452186107635, |
|
"ranking_idealized": 0.6333333253860474, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.875, |
|
"regularize": 0.18711452186107635, |
|
"step": 650 |
|
}, |
|
{ |
|
"dpo_loss": 0.14089132845401764, |
|
"dpo_wo_beta": -0.257717490196228, |
|
"epoch": 1.8587623996221068, |
|
"grad_norm": 17.423684978791957, |
|
"learning_rate": 1.8943039936830347e-06, |
|
"logits": -1.7539128065109253, |
|
"logps": -105.96385192871094, |
|
"loss": 0.1703, |
|
"objective": 0.14089132845401764, |
|
"ranking_idealized": 0.5666666626930237, |
|
"ranking_idealized_expo": 0.46666666865348816, |
|
"ranking_simple": 0.8416666388511658, |
|
"regularize": 0.14089132845401764, |
|
"step": 655 |
|
}, |
|
{ |
|
"dpo_loss": 0.12644439935684204, |
|
"dpo_wo_beta": -0.18994450569152832, |
|
"epoch": 1.872933396315541, |
|
"grad_norm": 14.114136095930363, |
|
"learning_rate": 1.8542834264542091e-06, |
|
"logits": -1.8740805387496948, |
|
"logps": -112.58109283447266, |
|
"loss": 0.1679, |
|
"objective": 0.12644439935684204, |
|
"ranking_idealized": 0.5666666626930237, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.8833333253860474, |
|
"regularize": 0.12644439935684204, |
|
"step": 660 |
|
}, |
|
{ |
|
"dpo_loss": 0.1596754938364029, |
|
"dpo_wo_beta": -0.5118387341499329, |
|
"epoch": 1.887104393008975, |
|
"grad_norm": 17.02657500840854, |
|
"learning_rate": 1.814439391689151e-06, |
|
"logits": -2.012057065963745, |
|
"logps": -106.6546401977539, |
|
"loss": 0.1748, |
|
"objective": 0.1596754938364029, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.8583333492279053, |
|
"regularize": 0.1596754938364029, |
|
"step": 665 |
|
}, |
|
{ |
|
"dpo_loss": 0.1905670315027237, |
|
"dpo_wo_beta": -0.7486369609832764, |
|
"epoch": 1.901275389702409, |
|
"grad_norm": 14.652072743525036, |
|
"learning_rate": 1.7747827823491253e-06, |
|
"logits": -1.7807596921920776, |
|
"logps": -107.37975311279297, |
|
"loss": 0.1695, |
|
"objective": 0.1905670315027237, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.8666666746139526, |
|
"regularize": 0.1905670315027237, |
|
"step": 670 |
|
}, |
|
{ |
|
"dpo_loss": 0.14669080078601837, |
|
"dpo_wo_beta": -0.3554477095603943, |
|
"epoch": 1.915446386395843, |
|
"grad_norm": 15.076428386388748, |
|
"learning_rate": 1.7353244401551566e-06, |
|
"logits": -1.8374218940734863, |
|
"logps": -107.03987884521484, |
|
"loss": 0.1555, |
|
"objective": 0.14669080078601837, |
|
"ranking_idealized": 0.6416666507720947, |
|
"ranking_idealized_expo": 0.5416666865348816, |
|
"ranking_simple": 0.8999999761581421, |
|
"regularize": 0.14669080078601837, |
|
"step": 675 |
|
}, |
|
{ |
|
"dpo_loss": 0.15277433395385742, |
|
"dpo_wo_beta": -0.5579003095626831, |
|
"epoch": 1.9296173830892773, |
|
"grad_norm": 14.063016020977683, |
|
"learning_rate": 1.6960751526240122e-06, |
|
"logits": -1.9377697706222534, |
|
"logps": -118.40213775634766, |
|
"loss": 0.1487, |
|
"objective": 0.15277433395385742, |
|
"ranking_idealized": 0.625, |
|
"ranking_idealized_expo": 0.5416666865348816, |
|
"ranking_simple": 0.8833333253860474, |
|
"regularize": 0.15277433395385742, |
|
"step": 680 |
|
}, |
|
{ |
|
"dpo_loss": 0.1468452513217926, |
|
"dpo_wo_beta": -0.34248843789100647, |
|
"epoch": 1.9437883797827114, |
|
"grad_norm": 20.037932580562952, |
|
"learning_rate": 1.6570456501189996e-06, |
|
"logits": -1.822561502456665, |
|
"logps": -116.04502868652344, |
|
"loss": 0.1616, |
|
"objective": 0.1468452513217926, |
|
"ranking_idealized": 0.5083333253860474, |
|
"ranking_idealized_expo": 0.46666666865348816, |
|
"ranking_simple": 0.8999999761581421, |
|
"regularize": 0.1468452513217926, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 1.9551251771374587, |
|
"eval_dpo_loss": 0.857575535774231, |
|
"eval_dpo_wo_beta": -5.723405361175537, |
|
"eval_logits": -2.465576410293579, |
|
"eval_logps": -119.32209777832031, |
|
"eval_loss": 0.8382942080497742, |
|
"eval_objective": 0.857575535774231, |
|
"eval_ranking_idealized": 0.6045548915863037, |
|
"eval_ranking_idealized_expo": 0.5279502868652344, |
|
"eval_ranking_simple": 0.5797101259231567, |
|
"eval_regularize": 0.857575535774231, |
|
"eval_runtime": 359.3448, |
|
"eval_samples_per_second": 16.113, |
|
"eval_steps_per_second": 1.344, |
|
"step": 689 |
|
}, |
|
{ |
|
"dpo_loss": 0.11434569954872131, |
|
"dpo_wo_beta": -0.045675624161958694, |
|
"epoch": 1.9579593764761456, |
|
"grad_norm": 13.437704226981893, |
|
"learning_rate": 1.6182466029163974e-06, |
|
"logits": -1.8100759983062744, |
|
"logps": -115.22444915771484, |
|
"loss": 0.1309, |
|
"objective": 0.11434569954872131, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.574999988079071, |
|
"ranking_simple": 0.8999999761581421, |
|
"regularize": 0.11434569954872131, |
|
"step": 690 |
|
}, |
|
{ |
|
"dpo_loss": 0.19163179397583008, |
|
"dpo_wo_beta": -0.46952199935913086, |
|
"epoch": 1.9721303731695796, |
|
"grad_norm": 20.21550526964092, |
|
"learning_rate": 1.5796886182883053e-06, |
|
"logits": -1.829925775527954, |
|
"logps": -114.886962890625, |
|
"loss": 0.1753, |
|
"objective": 0.19163179397583008, |
|
"ranking_idealized": 0.5666666626930237, |
|
"ranking_idealized_expo": 0.4416666626930237, |
|
"ranking_simple": 0.8166666626930237, |
|
"regularize": 0.19163179397583008, |
|
"step": 695 |
|
}, |
|
{ |
|
"dpo_loss": 0.18629121780395508, |
|
"dpo_wo_beta": -0.7477880120277405, |
|
"epoch": 1.9863013698630136, |
|
"grad_norm": 14.017066914132515, |
|
"learning_rate": 1.541382237602721e-06, |
|
"logits": -1.7919304370880127, |
|
"logps": -115.88526916503906, |
|
"loss": 0.176, |
|
"objective": 0.18629121780395508, |
|
"ranking_idealized": 0.5916666388511658, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.8833333253860474, |
|
"regularize": 0.18629121780395508, |
|
"step": 700 |
|
}, |
|
{ |
|
"dpo_loss": 0.20123900473117828, |
|
"dpo_wo_beta": -0.6124710440635681, |
|
"epoch": 2.0004723665564477, |
|
"grad_norm": 21.887245138739928, |
|
"learning_rate": 1.5033379334416376e-06, |
|
"logits": -1.786551594734192, |
|
"logps": -114.37857055664062, |
|
"loss": 0.2015, |
|
"objective": 0.20123900473117828, |
|
"ranking_idealized": 0.625, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.8333333134651184, |
|
"regularize": 0.20123900473117828, |
|
"step": 705 |
|
}, |
|
{ |
|
"dpo_loss": 0.08176574856042862, |
|
"dpo_wo_beta": -0.0886942520737648, |
|
"epoch": 2.0146433632498817, |
|
"grad_norm": 12.07978325252889, |
|
"learning_rate": 1.465566106737942e-06, |
|
"logits": -1.8880345821380615, |
|
"logps": -114.7125244140625, |
|
"loss": 0.1005, |
|
"objective": 0.08176574856042862, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.925000011920929, |
|
"regularize": 0.08176574856042862, |
|
"step": 710 |
|
}, |
|
{ |
|
"dpo_loss": 0.07800193130970001, |
|
"dpo_wo_beta": -0.004813884384930134, |
|
"epoch": 2.028814359943316, |
|
"grad_norm": 10.892350166600437, |
|
"learning_rate": 1.4280770839319073e-06, |
|
"logits": -1.8223975896835327, |
|
"logps": -109.677001953125, |
|
"loss": 0.1064, |
|
"objective": 0.07800193130970001, |
|
"ranking_idealized": 0.6916666626930237, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.949999988079071, |
|
"regularize": 0.07800193130970001, |
|
"step": 715 |
|
}, |
|
{ |
|
"dpo_loss": 0.07249681651592255, |
|
"dpo_wo_beta": -0.03800208121538162, |
|
"epoch": 2.04298535663675, |
|
"grad_norm": 10.585775059236884, |
|
"learning_rate": 1.3908811141480408e-06, |
|
"logits": -1.7804607152938843, |
|
"logps": -119.00810241699219, |
|
"loss": 0.1011, |
|
"objective": 0.07249681651592255, |
|
"ranking_idealized": 0.6333333253860474, |
|
"ranking_idealized_expo": 0.5416666865348816, |
|
"ranking_simple": 0.949999988079071, |
|
"regularize": 0.07249681651592255, |
|
"step": 720 |
|
}, |
|
{ |
|
"dpo_loss": 0.10758433490991592, |
|
"dpo_wo_beta": -0.4225389361381531, |
|
"epoch": 2.057156353330184, |
|
"grad_norm": 16.634608237356296, |
|
"learning_rate": 1.353988366393083e-06, |
|
"logits": -1.8837405443191528, |
|
"logps": -122.06383514404297, |
|
"loss": 0.1068, |
|
"objective": 0.10758433490991592, |
|
"ranking_idealized": 0.6499999761581421, |
|
"ranking_idealized_expo": 0.5583333373069763, |
|
"ranking_simple": 0.925000011920929, |
|
"regularize": 0.10758433490991592, |
|
"step": 725 |
|
}, |
|
{ |
|
"dpo_loss": 0.16440601646900177, |
|
"dpo_wo_beta": -0.5969924926757812, |
|
"epoch": 2.0713273500236182, |
|
"grad_norm": 12.618281047842604, |
|
"learning_rate": 1.3174089267758983e-06, |
|
"logits": -1.7312003374099731, |
|
"logps": -118.56900024414062, |
|
"loss": 0.107, |
|
"objective": 0.16440601646900177, |
|
"ranking_idealized": 0.5166666507720947, |
|
"ranking_idealized_expo": 0.4333333373069763, |
|
"ranking_simple": 0.8583333492279053, |
|
"regularize": 0.16440601646900177, |
|
"step": 730 |
|
}, |
|
{ |
|
"dpo_loss": 0.15623989701271057, |
|
"dpo_wo_beta": -0.9556598663330078, |
|
"epoch": 2.0854983467170523, |
|
"grad_norm": 16.05614648021729, |
|
"learning_rate": 1.2811527957500344e-06, |
|
"logits": -1.6055046319961548, |
|
"logps": -127.75144958496094, |
|
"loss": 0.1095, |
|
"objective": 0.15623989701271057, |
|
"ranking_idealized": 0.5583333373069763, |
|
"ranking_idealized_expo": 0.46666666865348816, |
|
"ranking_simple": 0.949999988079071, |
|
"regularize": 0.15623989701271057, |
|
"step": 735 |
|
}, |
|
{ |
|
"dpo_loss": 0.11614324897527695, |
|
"dpo_wo_beta": -0.22688980400562286, |
|
"epoch": 2.0996693434104867, |
|
"grad_norm": 14.923960342956232, |
|
"learning_rate": 1.245229885379699e-06, |
|
"logits": -1.688416838645935, |
|
"logps": -123.08829498291016, |
|
"loss": 0.1063, |
|
"objective": 0.11614324897527695, |
|
"ranking_idealized": 0.6333333253860474, |
|
"ranking_idealized_expo": 0.5416666865348816, |
|
"ranking_simple": 0.949999988079071, |
|
"regularize": 0.11614324897527695, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 2.10533774208786, |
|
"eval_dpo_loss": 0.9823706746101379, |
|
"eval_dpo_wo_beta": -7.331023216247559, |
|
"eval_logits": -2.2711641788482666, |
|
"eval_logps": -133.36373901367188, |
|
"eval_loss": 0.9485942721366882, |
|
"eval_objective": 0.9823706746101379, |
|
"eval_ranking_idealized": 0.6045548915863037, |
|
"eval_ranking_idealized_expo": 0.5279502868652344, |
|
"eval_ranking_simple": 0.5517598390579224, |
|
"eval_regularize": 0.9823706746101379, |
|
"eval_runtime": 364.9494, |
|
"eval_samples_per_second": 15.865, |
|
"eval_steps_per_second": 1.323, |
|
"step": 742 |
|
}, |
|
{ |
|
"dpo_loss": 0.11820446699857712, |
|
"dpo_wo_beta": -0.5452965497970581, |
|
"epoch": 2.1138403401039207, |
|
"grad_norm": 14.405167306826879, |
|
"learning_rate": 1.2096500166298992e-06, |
|
"logits": -1.4971224069595337, |
|
"logps": -132.55892944335938, |
|
"loss": 0.1031, |
|
"objective": 0.11820446699857712, |
|
"ranking_idealized": 0.6916666626930237, |
|
"ranking_idealized_expo": 0.6166666746139526, |
|
"ranking_simple": 0.925000011920929, |
|
"regularize": 0.11820446699857712, |
|
"step": 745 |
|
}, |
|
{ |
|
"dpo_loss": 0.08092837035655975, |
|
"dpo_wo_beta": -0.21483030915260315, |
|
"epoch": 2.1280113367973548, |
|
"grad_norm": 12.035987152428243, |
|
"learning_rate": 1.1744229166814889e-06, |
|
"logits": -1.698511004447937, |
|
"logps": -129.5937042236328, |
|
"loss": 0.0957, |
|
"objective": 0.08092837035655975, |
|
"ranking_idealized": 0.6666666865348816, |
|
"ranking_idealized_expo": 0.574999988079071, |
|
"ranking_simple": 0.9583333134651184, |
|
"regularize": 0.08092837035655975, |
|
"step": 750 |
|
}, |
|
{ |
|
"dpo_loss": 0.09838299453258514, |
|
"dpo_wo_beta": -0.330010324716568, |
|
"epoch": 2.142182333490789, |
|
"grad_norm": 15.638085348810199, |
|
"learning_rate": 1.1395582162718524e-06, |
|
"logits": -1.6223360300064087, |
|
"logps": -128.86538696289062, |
|
"loss": 0.1147, |
|
"objective": 0.09838299453258514, |
|
"ranking_idealized": 0.5916666388511658, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.925000011920929, |
|
"regularize": 0.09838299453258514, |
|
"step": 755 |
|
}, |
|
{ |
|
"dpo_loss": 0.10984232276678085, |
|
"dpo_wo_beta": -0.2521561086177826, |
|
"epoch": 2.156353330184223, |
|
"grad_norm": 18.280317761955644, |
|
"learning_rate": 1.1050654470619602e-06, |
|
"logits": -1.6547772884368896, |
|
"logps": -118.33650970458984, |
|
"loss": 0.1127, |
|
"objective": 0.10984232276678085, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.9166666865348816, |
|
"regularize": 0.10984232276678085, |
|
"step": 760 |
|
}, |
|
{ |
|
"dpo_loss": 0.11440528929233551, |
|
"dpo_wo_beta": -0.2931906580924988, |
|
"epoch": 2.170524326877657, |
|
"grad_norm": 12.536707104746414, |
|
"learning_rate": 1.0709540390305061e-06, |
|
"logits": -1.692717432975769, |
|
"logps": -118.69541931152344, |
|
"loss": 0.1215, |
|
"objective": 0.11440528929233551, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.9333333373069763, |
|
"regularize": 0.11440528929233551, |
|
"step": 765 |
|
}, |
|
{ |
|
"dpo_loss": 0.06226298585534096, |
|
"dpo_wo_beta": -0.023008961230516434, |
|
"epoch": 2.1846953235710913, |
|
"grad_norm": 12.278836680753214, |
|
"learning_rate": 1.0372333178958462e-06, |
|
"logits": -1.8234201669692993, |
|
"logps": -122.00631713867188, |
|
"loss": 0.1046, |
|
"objective": 0.06226298585534096, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.49166667461395264, |
|
"ranking_simple": 0.9583333134651184, |
|
"regularize": 0.06226298585534096, |
|
"step": 770 |
|
}, |
|
{ |
|
"dpo_loss": 0.13250760734081268, |
|
"dpo_wo_beta": -0.465701699256897, |
|
"epoch": 2.1988663202645253, |
|
"grad_norm": 18.99094104963921, |
|
"learning_rate": 1.0039125025664392e-06, |
|
"logits": -1.7803070545196533, |
|
"logps": -124.71762084960938, |
|
"loss": 0.1111, |
|
"objective": 0.13250760734081268, |
|
"ranking_idealized": 0.5416666865348816, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.875, |
|
"regularize": 0.13250760734081268, |
|
"step": 775 |
|
}, |
|
{ |
|
"dpo_loss": 0.13606421649456024, |
|
"dpo_wo_beta": -0.6162300705909729, |
|
"epoch": 2.2130373169579594, |
|
"grad_norm": 16.36095800479351, |
|
"learning_rate": 9.710007026204896e-07, |
|
"logits": -1.7376734018325806, |
|
"logps": -122.50430297851562, |
|
"loss": 0.1045, |
|
"objective": 0.13606421649456024, |
|
"ranking_idealized": 0.5, |
|
"ranking_idealized_expo": 0.4333333373069763, |
|
"ranking_simple": 0.8999999761581421, |
|
"regularize": 0.13606421649456024, |
|
"step": 780 |
|
}, |
|
{ |
|
"dpo_loss": 0.11954029649496078, |
|
"dpo_wo_beta": -0.5017859935760498, |
|
"epoch": 2.2272083136513934, |
|
"grad_norm": 14.897356026126795, |
|
"learning_rate": 9.385069158154805e-07, |
|
"logits": -1.7108873128890991, |
|
"logps": -119.73731994628906, |
|
"loss": 0.1223, |
|
"objective": 0.11954029649496078, |
|
"ranking_idealized": 0.5583333373069763, |
|
"ranking_idealized_expo": 0.4583333432674408, |
|
"ranking_simple": 0.8916666507720947, |
|
"regularize": 0.11954029649496078, |
|
"step": 785 |
|
}, |
|
{ |
|
"dpo_loss": 0.0877259224653244, |
|
"dpo_wo_beta": -0.3332770764827728, |
|
"epoch": 2.2413793103448274, |
|
"grad_norm": 14.08354106572375, |
|
"learning_rate": 9.064400256282757e-07, |
|
"logits": -1.7486475706100464, |
|
"logps": -122.89460754394531, |
|
"loss": 0.1063, |
|
"objective": 0.0877259224653244, |
|
"ranking_idealized": 0.625, |
|
"ranking_idealized_expo": 0.5666666626930237, |
|
"ranking_simple": 0.949999988079071, |
|
"regularize": 0.0877259224653244, |
|
"step": 790 |
|
}, |
|
{ |
|
"dpo_loss": 0.09928978979587555, |
|
"dpo_wo_beta": -0.10460276901721954, |
|
"epoch": 2.255550307038262, |
|
"grad_norm": 15.557463813809257, |
|
"learning_rate": 8.74808798826467e-07, |
|
"logits": -1.8421998023986816, |
|
"logps": -120.40747833251953, |
|
"loss": 0.1017, |
|
"objective": 0.09928978979587555, |
|
"ranking_idealized": 0.699999988079071, |
|
"ranking_idealized_expo": 0.6000000238418579, |
|
"ranking_simple": 0.9416666626930237, |
|
"regularize": 0.09928978979587555, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 2.255550307038262, |
|
"eval_dpo_loss": 0.8903655409812927, |
|
"eval_dpo_wo_beta": -6.205545902252197, |
|
"eval_logits": -2.4489927291870117, |
|
"eval_logps": -123.57449340820312, |
|
"eval_loss": 0.8711386919021606, |
|
"eval_objective": 0.8903655409812927, |
|
"eval_ranking_idealized": 0.6045548915863037, |
|
"eval_ranking_idealized_expo": 0.5279502868652344, |
|
"eval_ranking_simple": 0.5683229565620422, |
|
"eval_regularize": 0.8903655409812927, |
|
"eval_runtime": 347.0103, |
|
"eval_samples_per_second": 16.685, |
|
"eval_steps_per_second": 1.392, |
|
"step": 795 |
|
}, |
|
{ |
|
"dpo_loss": 0.1340600550174713, |
|
"dpo_wo_beta": -0.5811701416969299, |
|
"epoch": 2.269721303731696, |
|
"grad_norm": 13.668309173603514, |
|
"learning_rate": 8.436218830716259e-07, |
|
"logits": -1.8454309701919556, |
|
"logps": -120.35511779785156, |
|
"loss": 0.1264, |
|
"objective": 0.1340600550174713, |
|
"ranking_idealized": 0.6499999761581421, |
|
"ranking_idealized_expo": 0.550000011920929, |
|
"ranking_simple": 0.925000011920929, |
|
"regularize": 0.1340600550174713, |
|
"step": 800 |
|
}, |
|
{ |
|
"dpo_loss": 0.10816308110952377, |
|
"dpo_wo_beta": -0.1633211225271225, |
|
"epoch": 2.28389230042513, |
|
"grad_norm": 13.957508503091352, |
|
"learning_rate": 8.1288780455512e-07, |
|
"logits": -1.8811193704605103, |
|
"logps": -119.9056625366211, |
|
"loss": 0.1133, |
|
"objective": 0.10816308110952377, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.550000011920929, |
|
"ranking_simple": 0.9166666865348816, |
|
"regularize": 0.10816308110952377, |
|
"step": 805 |
|
}, |
|
{ |
|
"dpo_loss": 0.1715787798166275, |
|
"dpo_wo_beta": -0.7547404766082764, |
|
"epoch": 2.298063297118564, |
|
"grad_norm": 12.765274097098457, |
|
"learning_rate": 7.826149656671386e-07, |
|
"logits": -1.9726245403289795, |
|
"logps": -114.58699798583984, |
|
"loss": 0.1146, |
|
"objective": 0.1715787798166275, |
|
"ranking_idealized": 0.6416666507720947, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.9166666865348816, |
|
"regularize": 0.1715787798166275, |
|
"step": 810 |
|
}, |
|
{ |
|
"dpo_loss": 0.09882104396820068, |
|
"dpo_wo_beta": -0.18077202141284943, |
|
"epoch": 2.312234293811998, |
|
"grad_norm": 11.818365173405272, |
|
"learning_rate": 7.528116426995605e-07, |
|
"logits": -1.7834192514419556, |
|
"logps": -118.5341796875, |
|
"loss": 0.0843, |
|
"objective": 0.09882104396820068, |
|
"ranking_idealized": 0.550000011920929, |
|
"ranking_idealized_expo": 0.4333333373069763, |
|
"ranking_simple": 0.8916666507720947, |
|
"regularize": 0.09882104396820068, |
|
"step": 815 |
|
}, |
|
{ |
|
"dpo_loss": 0.1070082038640976, |
|
"dpo_wo_beta": -0.44529280066490173, |
|
"epoch": 2.3264052905054324, |
|
"grad_norm": 13.918285230217345, |
|
"learning_rate": 7.234859835833022e-07, |
|
"logits": -1.8069401979446411, |
|
"logps": -123.11463928222656, |
|
"loss": 0.1153, |
|
"objective": 0.1070082038640976, |
|
"ranking_idealized": 0.6083333492279053, |
|
"ranking_idealized_expo": 0.5583333373069763, |
|
"ranking_simple": 0.949999988079071, |
|
"regularize": 0.1070082038640976, |
|
"step": 820 |
|
}, |
|
{ |
|
"dpo_loss": 0.14794430136680603, |
|
"dpo_wo_beta": -0.5907248258590698, |
|
"epoch": 2.3405762871988665, |
|
"grad_norm": 14.849580151366643, |
|
"learning_rate": 6.94646005660749e-07, |
|
"logits": -1.808493971824646, |
|
"logps": -116.64714050292969, |
|
"loss": 0.1107, |
|
"objective": 0.14794430136680603, |
|
"ranking_idealized": 0.6333333253860474, |
|
"ranking_idealized_expo": 0.5583333373069763, |
|
"ranking_simple": 0.9083333611488342, |
|
"regularize": 0.14794430136680603, |
|
"step": 825 |
|
}, |
|
{ |
|
"dpo_loss": 0.08306514471769333, |
|
"dpo_wo_beta": -0.10000230371952057, |
|
"epoch": 2.3547472838923005, |
|
"grad_norm": 12.857101782602014, |
|
"learning_rate": 6.662995934939007e-07, |
|
"logits": -1.7857582569122314, |
|
"logps": -123.92733764648438, |
|
"loss": 0.1063, |
|
"objective": 0.08306514471769333, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.9583333134651184, |
|
"regularize": 0.08306514471769333, |
|
"step": 830 |
|
}, |
|
{ |
|
"dpo_loss": 0.11827471107244492, |
|
"dpo_wo_beta": -0.40130358934402466, |
|
"epoch": 2.3689182805857345, |
|
"grad_norm": 13.825829745811577, |
|
"learning_rate": 6.384544967088063e-07, |
|
"logits": -1.8356945514678955, |
|
"logps": -122.48320770263672, |
|
"loss": 0.124, |
|
"objective": 0.11827471107244492, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.4583333432674408, |
|
"ranking_simple": 0.9333333373069763, |
|
"regularize": 0.11827471107244492, |
|
"step": 835 |
|
}, |
|
{ |
|
"dpo_loss": 0.14169135689735413, |
|
"dpo_wo_beta": -0.3359481692314148, |
|
"epoch": 2.3830892772791685, |
|
"grad_norm": 18.243146391325514, |
|
"learning_rate": 6.111183278768956e-07, |
|
"logits": -1.8658840656280518, |
|
"logps": -123.42705535888672, |
|
"loss": 0.1321, |
|
"objective": 0.14169135689735413, |
|
"ranking_idealized": 0.5, |
|
"ranking_idealized_expo": 0.44999998807907104, |
|
"ranking_simple": 0.9083333611488342, |
|
"regularize": 0.14169135689735413, |
|
"step": 840 |
|
}, |
|
{ |
|
"dpo_loss": 0.09381429105997086, |
|
"dpo_wo_beta": -0.14171645045280457, |
|
"epoch": 2.3972602739726026, |
|
"grad_norm": 24.500088347031785, |
|
"learning_rate": 5.842985604337769e-07, |
|
"logits": -1.7731019258499146, |
|
"logps": -125.81861877441406, |
|
"loss": 0.1225, |
|
"objective": 0.09381429105997086, |
|
"ranking_idealized": 0.6083333492279053, |
|
"ranking_idealized_expo": 0.5416666865348816, |
|
"ranking_simple": 0.8999999761581421, |
|
"regularize": 0.09381429105997086, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 2.4057628719886632, |
|
"eval_dpo_loss": 0.9035148620605469, |
|
"eval_dpo_wo_beta": -6.352902889251709, |
|
"eval_logits": -2.4742591381073, |
|
"eval_logps": -124.53355407714844, |
|
"eval_loss": 0.882164478302002, |
|
"eval_objective": 0.9035148620605469, |
|
"eval_ranking_idealized": 0.6045548915863037, |
|
"eval_ranking_idealized_expo": 0.5279502868652344, |
|
"eval_ranking_simple": 0.5569358468055725, |
|
"eval_regularize": 0.9035148620605469, |
|
"eval_runtime": 366.478, |
|
"eval_samples_per_second": 15.799, |
|
"eval_steps_per_second": 1.318, |
|
"step": 848 |
|
}, |
|
{ |
|
"dpo_loss": 0.10245585441589355, |
|
"dpo_wo_beta": -0.2030431628227234, |
|
"epoch": 2.413793103448276, |
|
"grad_norm": 22.363955547911008, |
|
"learning_rate": 5.580025266360764e-07, |
|
"logits": -1.7464776039123535, |
|
"logps": -122.80999755859375, |
|
"loss": 0.1449, |
|
"objective": 0.10245585441589355, |
|
"ranking_idealized": 0.6666666865348816, |
|
"ranking_idealized_expo": 0.5625, |
|
"ranking_simple": 0.9583333134651184, |
|
"regularize": 0.10245585441589355, |
|
"step": 850 |
|
}, |
|
{ |
|
"dpo_loss": 0.14077231287956238, |
|
"dpo_wo_beta": -0.15207929909229279, |
|
"epoch": 2.42796410014171, |
|
"grad_norm": 14.848409925173506, |
|
"learning_rate": 5.322374155568688e-07, |
|
"logits": -1.8929237127304077, |
|
"logps": -115.12696838378906, |
|
"loss": 0.1124, |
|
"objective": 0.14077231287956238, |
|
"ranking_idealized": 0.5666666626930237, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.9166666865348816, |
|
"regularize": 0.14077231287956238, |
|
"step": 855 |
|
}, |
|
{ |
|
"dpo_loss": 0.1414971649646759, |
|
"dpo_wo_beta": -0.5974557995796204, |
|
"epoch": 2.442135096835144, |
|
"grad_norm": 11.710235822935351, |
|
"learning_rate": 5.070102711202606e-07, |
|
"logits": -1.7974507808685303, |
|
"logps": -121.47347259521484, |
|
"loss": 0.1083, |
|
"objective": 0.1414971649646759, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5583333373069763, |
|
"ranking_simple": 0.9333333373069763, |
|
"regularize": 0.1414971649646759, |
|
"step": 860 |
|
}, |
|
{ |
|
"dpo_loss": 0.09845638275146484, |
|
"dpo_wo_beta": -0.1998511403799057, |
|
"epoch": 2.4563060935285783, |
|
"grad_norm": 15.827446562598988, |
|
"learning_rate": 4.823279901756498e-07, |
|
"logits": -1.816353440284729, |
|
"logps": -122.7919692993164, |
|
"loss": 0.1063, |
|
"objective": 0.09845638275146484, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.9083333611488342, |
|
"regularize": 0.09845638275146484, |
|
"step": 865 |
|
}, |
|
{ |
|
"dpo_loss": 0.11614971607923508, |
|
"dpo_wo_beta": -0.34934201836586, |
|
"epoch": 2.4704770902220123, |
|
"grad_norm": 15.618833316654502, |
|
"learning_rate": 4.581973206121948e-07, |
|
"logits": -1.9400283098220825, |
|
"logps": -119.73554992675781, |
|
"loss": 0.1153, |
|
"objective": 0.11614971607923508, |
|
"ranking_idealized": 0.5916666388511658, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.925000011920929, |
|
"regularize": 0.11614971607923508, |
|
"step": 870 |
|
}, |
|
{ |
|
"dpo_loss": 0.2065262645483017, |
|
"dpo_wo_beta": -1.1751881837844849, |
|
"epoch": 2.4846480869154464, |
|
"grad_norm": 17.223610041291963, |
|
"learning_rate": 4.3462485951401126e-07, |
|
"logits": -1.7437902688980103, |
|
"logps": -120.61251831054688, |
|
"loss": 0.1238, |
|
"objective": 0.2065262645483017, |
|
"ranking_idealized": 0.625, |
|
"ranking_idealized_expo": 0.574999988079071, |
|
"ranking_simple": 0.8666666746139526, |
|
"regularize": 0.2065262645483017, |
|
"step": 875 |
|
}, |
|
{ |
|
"dpo_loss": 0.13995474576950073, |
|
"dpo_wo_beta": -0.27328214049339294, |
|
"epoch": 2.4988190836088804, |
|
"grad_norm": 11.937037662011573, |
|
"learning_rate": 4.116170513565942e-07, |
|
"logits": -1.9172199964523315, |
|
"logps": -115.04023742675781, |
|
"loss": 0.1093, |
|
"objective": 0.13995474576950073, |
|
"ranking_idealized": 0.5249999761581421, |
|
"ranking_idealized_expo": 0.49166667461395264, |
|
"ranking_simple": 0.8999999761581421, |
|
"regularize": 0.13995474576950073, |
|
"step": 880 |
|
}, |
|
{ |
|
"dpo_loss": 0.13740381598472595, |
|
"dpo_wo_beta": -0.38727322220802307, |
|
"epoch": 2.5129900803023144, |
|
"grad_norm": 16.35266837824131, |
|
"learning_rate": 3.891801862449629e-07, |
|
"logits": -1.8533929586410522, |
|
"logps": -115.91497039794922, |
|
"loss": 0.1385, |
|
"objective": 0.13740381598472595, |
|
"ranking_idealized": 0.6083333492279053, |
|
"ranking_idealized_expo": 0.5416666865348816, |
|
"ranking_simple": 0.8916666507720947, |
|
"regularize": 0.13740381598472595, |
|
"step": 885 |
|
}, |
|
{ |
|
"dpo_loss": 0.11444827914237976, |
|
"dpo_wo_beta": -0.31205496191978455, |
|
"epoch": 2.527161076995749, |
|
"grad_norm": 12.141683375579714, |
|
"learning_rate": 3.6732039819400686e-07, |
|
"logits": -1.6747931241989136, |
|
"logps": -116.25071716308594, |
|
"loss": 0.1173, |
|
"objective": 0.11444827914237976, |
|
"ranking_idealized": 0.5166666507720947, |
|
"ranking_idealized_expo": 0.4416666626930237, |
|
"ranking_simple": 0.9083333611488342, |
|
"regularize": 0.11444827914237976, |
|
"step": 890 |
|
}, |
|
{ |
|
"dpo_loss": 0.21558310091495514, |
|
"dpo_wo_beta": -0.7720097303390503, |
|
"epoch": 2.541332073689183, |
|
"grad_norm": 17.5258932616616, |
|
"learning_rate": 3.46043663451511e-07, |
|
"logits": -1.938331127166748, |
|
"logps": -121.76246643066406, |
|
"loss": 0.1324, |
|
"objective": 0.21558310091495514, |
|
"ranking_idealized": 0.550000011920929, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.8333333134651184, |
|
"regularize": 0.21558310091495514, |
|
"step": 895 |
|
}, |
|
{ |
|
"dpo_loss": 0.1236240565776825, |
|
"dpo_wo_beta": -0.21698738634586334, |
|
"epoch": 2.555503070382617, |
|
"grad_norm": 16.69855766001795, |
|
"learning_rate": 3.253557988643072e-07, |
|
"logits": -1.8755207061767578, |
|
"logps": -119.14775848388672, |
|
"loss": 0.1157, |
|
"objective": 0.1236240565776825, |
|
"ranking_idealized": 0.625, |
|
"ranking_idealized_expo": 0.550000011920929, |
|
"ranking_simple": 0.9083333611488342, |
|
"regularize": 0.1236240565776825, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.558337269721304, |
|
"eval_dpo_loss": 0.894111156463623, |
|
"eval_dpo_wo_beta": -6.213596343994141, |
|
"eval_logits": -2.4885809421539307, |
|
"eval_logps": -124.45829010009766, |
|
"eval_loss": 0.8717960715293884, |
|
"eval_objective": 0.894111156463623, |
|
"eval_ranking_idealized": 0.6045548915863037, |
|
"eval_ranking_idealized_expo": 0.5279502868652344, |
|
"eval_ranking_simple": 0.5621117949485779, |
|
"eval_regularize": 0.894111156463623, |
|
"eval_runtime": 342.6574, |
|
"eval_samples_per_second": 16.897, |
|
"eval_steps_per_second": 1.41, |
|
"step": 901 |
|
}, |
|
{ |
|
"dpo_loss": 0.09204068034887314, |
|
"dpo_wo_beta": -0.17730669677257538, |
|
"epoch": 2.569674067076051, |
|
"grad_norm": 14.161016786369684, |
|
"learning_rate": 3.052624602880064e-07, |
|
"logits": -1.8424724340438843, |
|
"logps": -117.90782928466797, |
|
"loss": 0.0986, |
|
"objective": 0.09204068034887314, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.925000011920929, |
|
"regularize": 0.09204068034887314, |
|
"step": 905 |
|
}, |
|
{ |
|
"dpo_loss": 0.11361932754516602, |
|
"dpo_wo_beta": -0.38244467973709106, |
|
"epoch": 2.583845063769485, |
|
"grad_norm": 12.421803606538058, |
|
"learning_rate": 2.8576914104074425e-07, |
|
"logits": -2.0089211463928223, |
|
"logps": -116.39904022216797, |
|
"loss": 0.1122, |
|
"objective": 0.11361932754516602, |
|
"ranking_idealized": 0.5916666388511658, |
|
"ranking_idealized_expo": 0.4583333432674408, |
|
"ranking_simple": 0.9166666865348816, |
|
"regularize": 0.11361932754516602, |
|
"step": 910 |
|
}, |
|
{ |
|
"dpo_loss": 0.13471105694770813, |
|
"dpo_wo_beta": -0.5014829039573669, |
|
"epoch": 2.5980160604629194, |
|
"grad_norm": 15.12060209950672, |
|
"learning_rate": 2.6688117040136463e-07, |
|
"logits": -1.981037974357605, |
|
"logps": -121.86930084228516, |
|
"loss": 0.1153, |
|
"objective": 0.13471105694770813, |
|
"ranking_idealized": 0.625, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.949999988079071, |
|
"regularize": 0.13471105694770813, |
|
"step": 915 |
|
}, |
|
{ |
|
"dpo_loss": 0.08700807392597198, |
|
"dpo_wo_beta": -0.1876840889453888, |
|
"epoch": 2.6121870571563535, |
|
"grad_norm": 17.811975214160903, |
|
"learning_rate": 2.486037121524448e-07, |
|
"logits": -1.898934245109558, |
|
"logps": -120.13009643554688, |
|
"loss": 0.114, |
|
"objective": 0.08700807392597198, |
|
"ranking_idealized": 0.6333333253860474, |
|
"ranking_idealized_expo": 0.550000011920929, |
|
"ranking_simple": 0.9416666626930237, |
|
"regularize": 0.08700807392597198, |
|
"step": 920 |
|
}, |
|
{ |
|
"dpo_loss": 0.0918925479054451, |
|
"dpo_wo_beta": -0.14420188963413239, |
|
"epoch": 2.6263580538497875, |
|
"grad_norm": 16.6284598820075, |
|
"learning_rate": 2.3094176316856982e-07, |
|
"logits": -1.8268101215362549, |
|
"logps": -121.2860107421875, |
|
"loss": 0.1064, |
|
"objective": 0.0918925479054451, |
|
"ranking_idealized": 0.550000011920929, |
|
"ranking_idealized_expo": 0.4749999940395355, |
|
"ranking_simple": 0.9083333611488342, |
|
"regularize": 0.0918925479054451, |
|
"step": 925 |
|
}, |
|
{ |
|
"dpo_loss": 0.15886452794075012, |
|
"dpo_wo_beta": -0.7213179469108582, |
|
"epoch": 2.6405290505432215, |
|
"grad_norm": 15.32952011074324, |
|
"learning_rate": 2.13900152050239e-07, |
|
"logits": -1.9606980085372925, |
|
"logps": -110.05363464355469, |
|
"loss": 0.1179, |
|
"objective": 0.15886452794075012, |
|
"ranking_idealized": 0.5416666865348816, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.8999999761581421, |
|
"regularize": 0.15886452794075012, |
|
"step": 930 |
|
}, |
|
{ |
|
"dpo_loss": 0.13784056901931763, |
|
"dpo_wo_beta": -0.5006576776504517, |
|
"epoch": 2.6547000472366555, |
|
"grad_norm": 14.658353796887713, |
|
"learning_rate": 1.9748353780377234e-07, |
|
"logits": -1.9395031929016113, |
|
"logps": -119.9269790649414, |
|
"loss": 0.1254, |
|
"objective": 0.13784056901931763, |
|
"ranking_idealized": 0.6583333611488342, |
|
"ranking_idealized_expo": 0.6000000238418579, |
|
"ranking_simple": 0.8916666507720947, |
|
"regularize": 0.13784056901931763, |
|
"step": 935 |
|
}, |
|
{ |
|
"dpo_loss": 0.12213913351297379, |
|
"dpo_wo_beta": -0.43640393018722534, |
|
"epoch": 2.66887104393009, |
|
"grad_norm": 14.844912954939305, |
|
"learning_rate": 1.8169640856758652e-07, |
|
"logits": -1.952646017074585, |
|
"logps": -121.75474548339844, |
|
"loss": 0.1117, |
|
"objective": 0.12213913351297379, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.9166666865348816, |
|
"regularize": 0.12213913351297379, |
|
"step": 940 |
|
}, |
|
{ |
|
"dpo_loss": 0.1136295348405838, |
|
"dpo_wo_beta": -0.17752434313297272, |
|
"epoch": 2.6830420406235236, |
|
"grad_norm": 13.220834082246482, |
|
"learning_rate": 1.6654308038518057e-07, |
|
"logits": -1.7970060110092163, |
|
"logps": -117.90103149414062, |
|
"loss": 0.1054, |
|
"objective": 0.1136295348405838, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.49166667461395264, |
|
"ranking_simple": 0.9083333611488342, |
|
"regularize": 0.1136295348405838, |
|
"step": 945 |
|
}, |
|
{ |
|
"dpo_loss": 0.16729401051998138, |
|
"dpo_wo_beta": -0.7473469972610474, |
|
"epoch": 2.697213037316958, |
|
"grad_norm": 14.369145851061829, |
|
"learning_rate": 1.5202769602517514e-07, |
|
"logits": -1.8816020488739014, |
|
"logps": -115.14582061767578, |
|
"loss": 0.1387, |
|
"objective": 0.16729401051998138, |
|
"ranking_idealized": 0.625, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.8999999761581421, |
|
"regularize": 0.16729401051998138, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 2.708549834671705, |
|
"eval_dpo_loss": 0.8891981840133667, |
|
"eval_dpo_wo_beta": -6.16098690032959, |
|
"eval_logits": -2.508572816848755, |
|
"eval_logps": -123.41434478759766, |
|
"eval_loss": 0.8687644004821777, |
|
"eval_objective": 0.8891981840133667, |
|
"eval_ranking_idealized": 0.6045548915863037, |
|
"eval_ranking_idealized_expo": 0.5279502868652344, |
|
"eval_ranking_simple": 0.5579710006713867, |
|
"eval_regularize": 0.8891981840133667, |
|
"eval_runtime": 379.9179, |
|
"eval_samples_per_second": 15.24, |
|
"eval_steps_per_second": 1.271, |
|
"step": 954 |
|
}, |
|
{ |
|
"dpo_loss": 0.16232462227344513, |
|
"dpo_wo_beta": -0.5988053679466248, |
|
"epoch": 2.711384034010392, |
|
"grad_norm": 18.96223814657741, |
|
"learning_rate": 1.381542238487188e-07, |
|
"logits": -1.8838648796081543, |
|
"logps": -119.06771087646484, |
|
"loss": 0.1298, |
|
"objective": 0.16232462227344513, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.8999999761581421, |
|
"regularize": 0.16232462227344513, |
|
"step": 955 |
|
}, |
|
{ |
|
"dpo_loss": 0.14441066980361938, |
|
"dpo_wo_beta": -0.4333815276622772, |
|
"epoch": 2.725555030703826, |
|
"grad_norm": 13.716940458471806, |
|
"learning_rate": 1.2492645672457838e-07, |
|
"logits": -2.032045364379883, |
|
"logps": -113.05756378173828, |
|
"loss": 0.1189, |
|
"objective": 0.14441066980361938, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.8999999761581421, |
|
"regularize": 0.14441066980361938, |
|
"step": 960 |
|
}, |
|
{ |
|
"dpo_loss": 0.0981813296675682, |
|
"dpo_wo_beta": -0.19802381098270416, |
|
"epoch": 2.73972602739726, |
|
"grad_norm": 15.207158850260772, |
|
"learning_rate": 1.1234801099220787e-07, |
|
"logits": -1.7988998889923096, |
|
"logps": -122.7618408203125, |
|
"loss": 0.1111, |
|
"objective": 0.0981813296675682, |
|
"ranking_idealized": 0.625, |
|
"ranking_idealized_expo": 0.5666666626930237, |
|
"ranking_simple": 0.9333333373069763, |
|
"regularize": 0.0981813296675682, |
|
"step": 965 |
|
}, |
|
{ |
|
"dpo_loss": 0.1318301260471344, |
|
"dpo_wo_beta": -0.2933843731880188, |
|
"epoch": 2.753897024090694, |
|
"grad_norm": 19.758130540339153, |
|
"learning_rate": 1.004223254730749e-07, |
|
"logits": -1.7169368267059326, |
|
"logps": -120.43925476074219, |
|
"loss": 0.1278, |
|
"objective": 0.1318301260471344, |
|
"ranking_idealized": 0.6083333492279053, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.8916666507720947, |
|
"regularize": 0.1318301260471344, |
|
"step": 970 |
|
}, |
|
{ |
|
"dpo_loss": 0.058168552815914154, |
|
"dpo_wo_beta": -8.394511678488925e-05, |
|
"epoch": 2.7680680207841286, |
|
"grad_norm": 12.662905511630496, |
|
"learning_rate": 8.915266053052374e-08, |
|
"logits": -1.902711033821106, |
|
"logps": -116.11229705810547, |
|
"loss": 0.0999, |
|
"objective": 0.058168552815914154, |
|
"ranking_idealized": 0.6333333253860474, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.925000011920929, |
|
"regularize": 0.058168552815914154, |
|
"step": 975 |
|
}, |
|
{ |
|
"dpo_loss": 0.10343047231435776, |
|
"dpo_wo_beta": -0.17137210071086884, |
|
"epoch": 2.7822390174775626, |
|
"grad_norm": 18.09899373584344, |
|
"learning_rate": 7.854209717842231e-08, |
|
"logits": -1.8915067911148071, |
|
"logps": -118.23885345458984, |
|
"loss": 0.1108, |
|
"objective": 0.10343047231435776, |
|
"ranking_idealized": 0.5416666865348816, |
|
"ranking_idealized_expo": 0.4416666626930237, |
|
"ranking_simple": 0.949999988079071, |
|
"regularize": 0.10343047231435776, |
|
"step": 980 |
|
}, |
|
{ |
|
"dpo_loss": 0.11380515992641449, |
|
"dpo_wo_beta": -0.2935677468776703, |
|
"epoch": 2.7964100141709967, |
|
"grad_norm": 15.185634936609162, |
|
"learning_rate": 6.859353623884569e-08, |
|
"logits": -1.851272463798523, |
|
"logps": -114.0352783203125, |
|
"loss": 0.1005, |
|
"objective": 0.11380515992641449, |
|
"ranking_idealized": 0.625, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.8833333253860474, |
|
"regularize": 0.11380515992641449, |
|
"step": 985 |
|
}, |
|
{ |
|
"dpo_loss": 0.10726428776979446, |
|
"dpo_wo_beta": -0.24069656431674957, |
|
"epoch": 2.8105810108644307, |
|
"grad_norm": 16.660819641239655, |
|
"learning_rate": 5.930969754901844e-08, |
|
"logits": -1.8860033750534058, |
|
"logps": -116.69475555419922, |
|
"loss": 0.1285, |
|
"objective": 0.10726428776979446, |
|
"ranking_idealized": 0.49166667461395264, |
|
"ranking_idealized_expo": 0.4333333373069763, |
|
"ranking_simple": 0.8833333253860474, |
|
"regularize": 0.10726428776979446, |
|
"step": 990 |
|
}, |
|
{ |
|
"dpo_loss": 0.12048947066068649, |
|
"dpo_wo_beta": -0.4086553454399109, |
|
"epoch": 2.8247520075578647, |
|
"grad_norm": 18.459518556479477, |
|
"learning_rate": 5.069311921774039e-08, |
|
"logits": -1.8980218172073364, |
|
"logps": -120.89018249511719, |
|
"loss": 0.1282, |
|
"objective": 0.12048947066068649, |
|
"ranking_idealized": 0.6416666507720947, |
|
"ranking_idealized_expo": 0.550000011920929, |
|
"ranking_simple": 0.925000011920929, |
|
"regularize": 0.12048947066068649, |
|
"step": 995 |
|
}, |
|
{ |
|
"dpo_loss": 0.08448319137096405, |
|
"dpo_wo_beta": -0.17254652082920074, |
|
"epoch": 2.838923004251299, |
|
"grad_norm": 13.073827319036173, |
|
"learning_rate": 4.2746156931490756e-08, |
|
"logits": -1.8106515407562256, |
|
"logps": -113.39408874511719, |
|
"loss": 0.1237, |
|
"objective": 0.08448319137096405, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.49166667461395264, |
|
"ranking_simple": 0.9333333373069763, |
|
"regularize": 0.08448319137096405, |
|
"step": 1000 |
|
}, |
|
{ |
|
"dpo_loss": 0.12625885009765625, |
|
"dpo_wo_beta": -0.28751522302627563, |
|
"epoch": 2.853094000944733, |
|
"grad_norm": 11.352338165734487, |
|
"learning_rate": 3.547098331040916e-08, |
|
"logits": -1.8715885877609253, |
|
"logps": -115.85346984863281, |
|
"loss": 0.1219, |
|
"objective": 0.12625885009765625, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.4749999940395355, |
|
"ranking_simple": 0.8833333253860474, |
|
"regularize": 0.12625885009765625, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 2.8587623996221065, |
|
"eval_dpo_loss": 0.8881875872612, |
|
"eval_dpo_wo_beta": -6.153732776641846, |
|
"eval_logits": -2.5127134323120117, |
|
"eval_logps": -123.14542388916016, |
|
"eval_loss": 0.868183434009552, |
|
"eval_objective": 0.8881875872612, |
|
"eval_ranking_idealized": 0.6045548915863037, |
|
"eval_ranking_idealized_expo": 0.5279502868652344, |
|
"eval_ranking_simple": 0.5600414276123047, |
|
"eval_regularize": 0.8881875872612, |
|
"eval_runtime": 375.0227, |
|
"eval_samples_per_second": 15.439, |
|
"eval_steps_per_second": 1.288, |
|
"step": 1007 |
|
}, |
|
{ |
|
"dpo_loss": 0.12578138709068298, |
|
"dpo_wo_beta": -0.4055772125720978, |
|
"epoch": 2.8672649976381672, |
|
"grad_norm": 13.654143333766829, |
|
"learning_rate": 2.8869587314321324e-08, |
|
"logits": -1.871021032333374, |
|
"logps": -118.54988861083984, |
|
"loss": 0.1301, |
|
"objective": 0.12578138709068298, |
|
"ranking_idealized": 0.6499999761581421, |
|
"ranking_idealized_expo": 0.5666666626930237, |
|
"ranking_simple": 0.9166666865348816, |
|
"regularize": 0.12578138709068298, |
|
"step": 1010 |
|
}, |
|
{ |
|
"dpo_loss": 0.11141829192638397, |
|
"dpo_wo_beta": -0.452913373708725, |
|
"epoch": 2.8814359943316012, |
|
"grad_norm": 14.00967768494451, |
|
"learning_rate": 2.2943773698977935e-08, |
|
"logits": -1.8538991212844849, |
|
"logps": -119.40221405029297, |
|
"loss": 0.1157, |
|
"objective": 0.11141829192638397, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.9416666626930237, |
|
"regularize": 0.11141829192638397, |
|
"step": 1015 |
|
}, |
|
{ |
|
"dpo_loss": 0.10787668824195862, |
|
"dpo_wo_beta": -0.35029396414756775, |
|
"epoch": 2.8956069910250353, |
|
"grad_norm": 12.590503217808422, |
|
"learning_rate": 1.7695162522652352e-08, |
|
"logits": -1.9000986814498901, |
|
"logps": -122.90519714355469, |
|
"loss": 0.1076, |
|
"objective": 0.10787668824195862, |
|
"ranking_idealized": 0.6083333492279053, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.8999999761581421, |
|
"regularize": 0.10787668824195862, |
|
"step": 1020 |
|
}, |
|
{ |
|
"dpo_loss": 0.11394120752811432, |
|
"dpo_wo_beta": -0.36261746287345886, |
|
"epoch": 2.9097779877184697, |
|
"grad_norm": 10.606906345600125, |
|
"learning_rate": 1.3125188703233815e-08, |
|
"logits": -1.8986002206802368, |
|
"logps": -116.403564453125, |
|
"loss": 0.1089, |
|
"objective": 0.11394120752811432, |
|
"ranking_idealized": 0.5583333373069763, |
|
"ranking_idealized_expo": 0.44999998807907104, |
|
"ranking_simple": 0.9333333373069763, |
|
"regularize": 0.11394120752811432, |
|
"step": 1025 |
|
}, |
|
{ |
|
"dpo_loss": 0.0906638652086258, |
|
"dpo_wo_beta": -0.1376449316740036, |
|
"epoch": 2.9239489844119038, |
|
"grad_norm": 12.750602018189479, |
|
"learning_rate": 9.235101625932885e-09, |
|
"logits": -2.033400058746338, |
|
"logps": -113.65220642089844, |
|
"loss": 0.1197, |
|
"objective": 0.0906638652086258, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.925000011920929, |
|
"regularize": 0.0906638652086258, |
|
"step": 1030 |
|
}, |
|
{ |
|
"dpo_loss": 0.10265343636274338, |
|
"dpo_wo_beta": -0.1061137467622757, |
|
"epoch": 2.938119981105338, |
|
"grad_norm": 15.296605965797069, |
|
"learning_rate": 6.025964801714412e-09, |
|
"logits": -1.8468897342681885, |
|
"logps": -119.85134887695312, |
|
"loss": 0.1049, |
|
"objective": 0.10265343636274338, |
|
"ranking_idealized": 0.6083333492279053, |
|
"ranking_idealized_expo": 0.49166667461395264, |
|
"ranking_simple": 0.8916666507720947, |
|
"regularize": 0.10265343636274338, |
|
"step": 1035 |
|
}, |
|
{ |
|
"dpo_loss": 0.09396873414516449, |
|
"dpo_wo_beta": -0.1912200003862381, |
|
"epoch": 2.952290977798772, |
|
"grad_norm": 15.90972962002085, |
|
"learning_rate": 3.4986555765434415e-09, |
|
"logits": -1.8800926208496094, |
|
"logps": -122.51961517333984, |
|
"loss": 0.0994, |
|
"objective": 0.09396873414516449, |
|
"ranking_idealized": 0.6499999761581421, |
|
"ranking_idealized_expo": 0.5583333373069763, |
|
"ranking_simple": 0.9083333611488342, |
|
"regularize": 0.09396873414516449, |
|
"step": 1040 |
|
}, |
|
{ |
|
"dpo_loss": 0.14692950248718262, |
|
"dpo_wo_beta": -0.6586350798606873, |
|
"epoch": 2.966461974492206, |
|
"grad_norm": 8.0573279067109, |
|
"learning_rate": 1.6538648915270794e-09, |
|
"logits": -1.8756026029586792, |
|
"logps": -119.65303039550781, |
|
"loss": 0.1082, |
|
"objective": 0.14692950248718262, |
|
"ranking_idealized": 0.6416666507720947, |
|
"ranking_idealized_expo": 0.5416666865348816, |
|
"ranking_simple": 0.9166666865348816, |
|
"regularize": 0.14692950248718262, |
|
"step": 1045 |
|
}, |
|
{ |
|
"dpo_loss": 0.07982174307107925, |
|
"dpo_wo_beta": -0.0584401935338974, |
|
"epoch": 2.9806329711856403, |
|
"grad_norm": 9.88886611903832, |
|
"learning_rate": 4.920970940180958e-10, |
|
"logits": -1.912126898765564, |
|
"logps": -116.61032104492188, |
|
"loss": 0.0891, |
|
"objective": 0.07982174307107925, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.925000011920929, |
|
"regularize": 0.07982174307107925, |
|
"step": 1050 |
|
}, |
|
{ |
|
"dpo_loss": 0.07663024961948395, |
|
"dpo_wo_beta": -0.015355088748037815, |
|
"epoch": 2.9948039678790743, |
|
"grad_norm": 13.025122561532541, |
|
"learning_rate": 1.3669799732163314e-11, |
|
"logits": -1.775391697883606, |
|
"logps": -116.977294921875, |
|
"loss": 0.0869, |
|
"objective": 0.07663024961948395, |
|
"ranking_idealized": 0.6333333253860474, |
|
"ranking_idealized_expo": 0.550000011920929, |
|
"ranking_simple": 0.8999999761581421, |
|
"regularize": 0.07663024961948395, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 2.9976381672177608, |
|
"step": 1056, |
|
"total_flos": 0.0, |
|
"train_loss": 0.022545777056648425, |
|
"train_runtime": 4386.5835, |
|
"train_samples_per_second": 34.744, |
|
"train_steps_per_second": 0.241 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 1056, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 53, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|