|
{ |
|
"best_metric": 0.5423553586006165, |
|
"best_model_checkpoint": "./qwen2.5-0.5b/qwen2.5-0.5b-expo-DPO-ES-TRY/checkpoint-371", |
|
"epoch": 2.9976381672177608, |
|
"eval_steps": 53, |
|
"global_step": 528, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"dpo_loss": 0.6931471824645996, |
|
"dpo_wo_beta": -0.6931471824645996, |
|
"epoch": 0.005668398677373642, |
|
"grad_norm": 13.433600669124935, |
|
"learning_rate": 9.433962264150944e-08, |
|
"logits": -1.3874311447143555, |
|
"logps": -88.43561553955078, |
|
"loss": 0.6931, |
|
"objective": 0.6931471824645996, |
|
"ranking_idealized": 0.625, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.5208333134651184, |
|
"regularize": 0.6931471824645996, |
|
"step": 1 |
|
}, |
|
{ |
|
"dpo_loss": 0.693236768245697, |
|
"dpo_wo_beta": -0.6993356347084045, |
|
"epoch": 0.02834199338686821, |
|
"grad_norm": 13.640653628388394, |
|
"learning_rate": 4.716981132075472e-07, |
|
"logits": -1.4090652465820312, |
|
"logps": -84.34337615966797, |
|
"loss": 0.693, |
|
"objective": 0.693236768245697, |
|
"ranking_idealized": 0.6197916865348816, |
|
"ranking_idealized_expo": 0.546875, |
|
"ranking_simple": 0.546875, |
|
"regularize": 0.693236768245697, |
|
"step": 5 |
|
}, |
|
{ |
|
"dpo_loss": 0.6845630407333374, |
|
"dpo_wo_beta": -0.7111619710922241, |
|
"epoch": 0.05668398677373642, |
|
"grad_norm": 12.626074407134174, |
|
"learning_rate": 9.433962264150944e-07, |
|
"logits": -1.4784893989562988, |
|
"logps": -81.94055938720703, |
|
"loss": 0.6892, |
|
"objective": 0.6845630407333374, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.512499988079071, |
|
"regularize": 0.6845630407333374, |
|
"step": 10 |
|
}, |
|
{ |
|
"dpo_loss": 0.6825469136238098, |
|
"dpo_wo_beta": -0.8259204626083374, |
|
"epoch": 0.08502598016060463, |
|
"grad_norm": 12.374180595083178, |
|
"learning_rate": 1.4150943396226415e-06, |
|
"logits": -1.4932299852371216, |
|
"logps": -81.52880096435547, |
|
"loss": 0.6814, |
|
"objective": 0.6825469136238098, |
|
"ranking_idealized": 0.5916666388511658, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.512499988079071, |
|
"regularize": 0.6825469136238098, |
|
"step": 15 |
|
}, |
|
{ |
|
"dpo_loss": 0.6950914263725281, |
|
"dpo_wo_beta": -1.2390469312667847, |
|
"epoch": 0.11336797354747284, |
|
"grad_norm": 14.839934392200913, |
|
"learning_rate": 1.8867924528301889e-06, |
|
"logits": -1.5371100902557373, |
|
"logps": -82.72624969482422, |
|
"loss": 0.6711, |
|
"objective": 0.6950914263725281, |
|
"ranking_idealized": 0.625, |
|
"ranking_idealized_expo": 0.5375000238418579, |
|
"ranking_simple": 0.5249999761581421, |
|
"regularize": 0.6950914263725281, |
|
"step": 20 |
|
}, |
|
{ |
|
"dpo_loss": 0.6556071043014526, |
|
"dpo_wo_beta": -1.110619068145752, |
|
"epoch": 0.14170996693434104, |
|
"grad_norm": 12.89805052529156, |
|
"learning_rate": 2.358490566037736e-06, |
|
"logits": -1.6399922370910645, |
|
"logps": -81.59695434570312, |
|
"loss": 0.6589, |
|
"objective": 0.6556071043014526, |
|
"ranking_idealized": 0.5958333611488342, |
|
"ranking_idealized_expo": 0.4958333373069763, |
|
"ranking_simple": 0.5249999761581421, |
|
"regularize": 0.6556071043014526, |
|
"step": 25 |
|
}, |
|
{ |
|
"dpo_loss": 0.6518108248710632, |
|
"dpo_wo_beta": -1.2506839036941528, |
|
"epoch": 0.17005196032120926, |
|
"grad_norm": 12.64998937636519, |
|
"learning_rate": 2.830188679245283e-06, |
|
"logits": -1.6404598951339722, |
|
"logps": -83.20111846923828, |
|
"loss": 0.6451, |
|
"objective": 0.6518108248710632, |
|
"ranking_idealized": 0.6041666865348816, |
|
"ranking_idealized_expo": 0.5291666388511658, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.6518108248710632, |
|
"step": 30 |
|
}, |
|
{ |
|
"dpo_loss": 0.6226770877838135, |
|
"dpo_wo_beta": -1.394917368888855, |
|
"epoch": 0.19839395370807747, |
|
"grad_norm": 13.760162421635227, |
|
"learning_rate": 3.30188679245283e-06, |
|
"logits": -1.6237396001815796, |
|
"logps": -87.80964660644531, |
|
"loss": 0.6189, |
|
"objective": 0.6226770877838135, |
|
"ranking_idealized": 0.612500011920929, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.5583333373069763, |
|
"regularize": 0.6226770877838135, |
|
"step": 35 |
|
}, |
|
{ |
|
"dpo_loss": 0.5924390554428101, |
|
"dpo_wo_beta": -1.422450304031372, |
|
"epoch": 0.22673594709494568, |
|
"grad_norm": 16.810886476613117, |
|
"learning_rate": 3.7735849056603777e-06, |
|
"logits": -1.620682954788208, |
|
"logps": -91.93690490722656, |
|
"loss": 0.6076, |
|
"objective": 0.5924390554428101, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.5708333253860474, |
|
"regularize": 0.5924390554428101, |
|
"step": 40 |
|
}, |
|
{ |
|
"dpo_loss": 0.573756217956543, |
|
"dpo_wo_beta": -1.3691534996032715, |
|
"epoch": 0.25507794048181387, |
|
"grad_norm": 13.798774501924722, |
|
"learning_rate": 4.245283018867925e-06, |
|
"logits": -1.7814558744430542, |
|
"logps": -92.24474334716797, |
|
"loss": 0.5989, |
|
"objective": 0.573756217956543, |
|
"ranking_idealized": 0.6208333373069763, |
|
"ranking_idealized_expo": 0.5375000238418579, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.573756217956543, |
|
"step": 45 |
|
}, |
|
{ |
|
"dpo_loss": 0.5726417899131775, |
|
"dpo_wo_beta": -1.3605374097824097, |
|
"epoch": 0.2834199338686821, |
|
"grad_norm": 12.568473894025988, |
|
"learning_rate": 4.716981132075472e-06, |
|
"logits": -1.808895468711853, |
|
"logps": -90.65751647949219, |
|
"loss": 0.5954, |
|
"objective": 0.5726417899131775, |
|
"ranking_idealized": 0.5416666865348816, |
|
"ranking_idealized_expo": 0.42500001192092896, |
|
"ranking_simple": 0.574999988079071, |
|
"regularize": 0.5726417899131775, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.300425129900803, |
|
"eval_dpo_loss": 0.7112604975700378, |
|
"eval_dpo_wo_beta": -2.2659413814544678, |
|
"eval_logits": -1.892814040184021, |
|
"eval_logps": -101.36742401123047, |
|
"eval_loss": 0.6816489100456238, |
|
"eval_objective": 0.7112604975700378, |
|
"eval_ranking_idealized": 0.5888429880142212, |
|
"eval_ranking_idealized_expo": 0.5092975497245789, |
|
"eval_ranking_simple": 0.5237603187561035, |
|
"eval_regularize": 0.7112604975700378, |
|
"eval_runtime": 211.6587, |
|
"eval_samples_per_second": 27.355, |
|
"eval_steps_per_second": 1.143, |
|
"step": 53 |
|
}, |
|
{ |
|
"dpo_loss": 0.5827316045761108, |
|
"dpo_wo_beta": -1.6213361024856567, |
|
"epoch": 0.3117619272555503, |
|
"grad_norm": 14.442715913160086, |
|
"learning_rate": 4.999781286194085e-06, |
|
"logits": -1.8762638568878174, |
|
"logps": -93.41423797607422, |
|
"loss": 0.5721, |
|
"objective": 0.5827316045761108, |
|
"ranking_idealized": 0.625, |
|
"ranking_idealized_expo": 0.5416666865348816, |
|
"ranking_simple": 0.6041666865348816, |
|
"regularize": 0.5827316045761108, |
|
"step": 55 |
|
}, |
|
{ |
|
"dpo_loss": 0.5553872585296631, |
|
"dpo_wo_beta": -1.6468366384506226, |
|
"epoch": 0.3401039206424185, |
|
"grad_norm": 13.845514282811145, |
|
"learning_rate": 4.997321195347154e-06, |
|
"logits": -1.8914529085159302, |
|
"logps": -90.59642028808594, |
|
"loss": 0.5756, |
|
"objective": 0.5553872585296631, |
|
"ranking_idealized": 0.6333333253860474, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.5791666507720947, |
|
"regularize": 0.5553872585296631, |
|
"step": 60 |
|
}, |
|
{ |
|
"dpo_loss": 0.5302771329879761, |
|
"dpo_wo_beta": -1.3166770935058594, |
|
"epoch": 0.3684459140292867, |
|
"grad_norm": 10.846857687148022, |
|
"learning_rate": 4.992130320438411e-06, |
|
"logits": -1.8399535417556763, |
|
"logps": -86.60197448730469, |
|
"loss": 0.5586, |
|
"objective": 0.5302771329879761, |
|
"ranking_idealized": 0.5708333253860474, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.6208333373069763, |
|
"regularize": 0.5302771329879761, |
|
"step": 65 |
|
}, |
|
{ |
|
"dpo_loss": 0.5711485743522644, |
|
"dpo_wo_beta": -1.7437169551849365, |
|
"epoch": 0.39678790741615494, |
|
"grad_norm": 13.787840238803502, |
|
"learning_rate": 4.984214337613357e-06, |
|
"logits": -1.8178967237472534, |
|
"logps": -91.10688781738281, |
|
"loss": 0.5701, |
|
"objective": 0.5711485743522644, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5874999761581421, |
|
"regularize": 0.5711485743522644, |
|
"step": 70 |
|
}, |
|
{ |
|
"dpo_loss": 0.523643434047699, |
|
"dpo_wo_beta": -1.669514536857605, |
|
"epoch": 0.42512990080302315, |
|
"grad_norm": 13.192298437287352, |
|
"learning_rate": 4.97358190288299e-06, |
|
"logits": -1.8182169198989868, |
|
"logps": -94.8000717163086, |
|
"loss": 0.5205, |
|
"objective": 0.523643434047699, |
|
"ranking_idealized": 0.625, |
|
"ranking_idealized_expo": 0.5458333492279053, |
|
"ranking_simple": 0.6583333611488342, |
|
"regularize": 0.523643434047699, |
|
"step": 75 |
|
}, |
|
{ |
|
"dpo_loss": 0.51079261302948, |
|
"dpo_wo_beta": -1.7271808385849, |
|
"epoch": 0.45347189418989137, |
|
"grad_norm": 15.151373786996814, |
|
"learning_rate": 4.9602446426585845e-06, |
|
"logits": -1.8920824527740479, |
|
"logps": -93.58238220214844, |
|
"loss": 0.5285, |
|
"objective": 0.51079261302948, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.6458333134651184, |
|
"regularize": 0.51079261302948, |
|
"step": 80 |
|
}, |
|
{ |
|
"dpo_loss": 0.5066012144088745, |
|
"dpo_wo_beta": -1.5956443548202515, |
|
"epoch": 0.4818138875767596, |
|
"grad_norm": 12.328960275584794, |
|
"learning_rate": 4.944217141038379e-06, |
|
"logits": -1.8741406202316284, |
|
"logps": -87.06742858886719, |
|
"loss": 0.5202, |
|
"objective": 0.5066012144088745, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.6208333373069763, |
|
"regularize": 0.5066012144088745, |
|
"step": 85 |
|
}, |
|
{ |
|
"dpo_loss": 0.5358369946479797, |
|
"dpo_wo_beta": -1.9357556104660034, |
|
"epoch": 0.5101558809636277, |
|
"grad_norm": 12.694483590051824, |
|
"learning_rate": 4.925516923860083e-06, |
|
"logits": -1.7968534231185913, |
|
"logps": -86.77802276611328, |
|
"loss": 0.4858, |
|
"objective": 0.5358369946479797, |
|
"ranking_idealized": 0.5458333492279053, |
|
"ranking_idealized_expo": 0.4749999940395355, |
|
"ranking_simple": 0.5874999761581421, |
|
"regularize": 0.5358369946479797, |
|
"step": 90 |
|
}, |
|
{ |
|
"dpo_loss": 0.4783257842063904, |
|
"dpo_wo_beta": -1.9098786115646362, |
|
"epoch": 0.538497874350496, |
|
"grad_norm": 14.474706973531484, |
|
"learning_rate": 4.904164439536626e-06, |
|
"logits": -1.8568389415740967, |
|
"logps": -88.12813568115234, |
|
"loss": 0.4865, |
|
"objective": 0.4783257842063904, |
|
"ranking_idealized": 0.6291666626930237, |
|
"ranking_idealized_expo": 0.5458333492279053, |
|
"ranking_simple": 0.6916666626930237, |
|
"regularize": 0.4783257842063904, |
|
"step": 95 |
|
}, |
|
{ |
|
"dpo_loss": 0.4654810130596161, |
|
"dpo_wo_beta": -1.9254087209701538, |
|
"epoch": 0.5668398677373642, |
|
"grad_norm": 13.577084707122001, |
|
"learning_rate": 4.880183036696123e-06, |
|
"logits": -1.938937783241272, |
|
"logps": -92.29436492919922, |
|
"loss": 0.5016, |
|
"objective": 0.4654810130596161, |
|
"ranking_idealized": 0.612500011920929, |
|
"ranking_idealized_expo": 0.5416666865348816, |
|
"ranking_simple": 0.6875, |
|
"regularize": 0.4654810130596161, |
|
"step": 100 |
|
}, |
|
{ |
|
"dpo_loss": 0.4374677240848541, |
|
"dpo_wo_beta": -1.4267934560775757, |
|
"epoch": 0.5951818611242324, |
|
"grad_norm": 11.14545328639218, |
|
"learning_rate": 4.853598938650487e-06, |
|
"logits": -1.8158982992172241, |
|
"logps": -90.21449279785156, |
|
"loss": 0.4618, |
|
"objective": 0.4374677240848541, |
|
"ranking_idealized": 0.625, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.6666666865348816, |
|
"regularize": 0.4374677240848541, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.600850259801606, |
|
"eval_dpo_loss": 0.6936022639274597, |
|
"eval_dpo_wo_beta": -2.462427854537964, |
|
"eval_logits": -1.9007418155670166, |
|
"eval_logps": -94.35714721679688, |
|
"eval_loss": 0.6912521123886108, |
|
"eval_objective": 0.6936022639274597, |
|
"eval_ranking_idealized": 0.5888429880142212, |
|
"eval_ranking_idealized_expo": 0.5092975497245789, |
|
"eval_ranking_simple": 0.5351239442825317, |
|
"eval_regularize": 0.6936022639274597, |
|
"eval_runtime": 210.2297, |
|
"eval_samples_per_second": 27.541, |
|
"eval_steps_per_second": 1.151, |
|
"step": 106 |
|
}, |
|
{ |
|
"dpo_loss": 0.47933149337768555, |
|
"dpo_wo_beta": -1.9683055877685547, |
|
"epoch": 0.6235238545111006, |
|
"grad_norm": 12.39392340166307, |
|
"learning_rate": 4.824441214720629e-06, |
|
"logits": -1.9334439039230347, |
|
"logps": -87.35523223876953, |
|
"loss": 0.4633, |
|
"objective": 0.47933149337768555, |
|
"ranking_idealized": 0.5708333253860474, |
|
"ranking_idealized_expo": 0.4541666805744171, |
|
"ranking_simple": 0.6625000238418579, |
|
"regularize": 0.47933149337768555, |
|
"step": 110 |
|
}, |
|
{ |
|
"dpo_loss": 0.4749464690685272, |
|
"dpo_wo_beta": -1.7375919818878174, |
|
"epoch": 0.6518658478979689, |
|
"grad_norm": 12.612865651893962, |
|
"learning_rate": 4.7927417484495756e-06, |
|
"logits": -1.9057692289352417, |
|
"logps": -87.68991088867188, |
|
"loss": 0.4712, |
|
"objective": 0.4749464690685272, |
|
"ranking_idealized": 0.574999988079071, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.6333333253860474, |
|
"regularize": 0.4749464690685272, |
|
"step": 115 |
|
}, |
|
{ |
|
"dpo_loss": 0.4848935306072235, |
|
"dpo_wo_beta": -1.9273093938827515, |
|
"epoch": 0.680207841284837, |
|
"grad_norm": 13.836239066838136, |
|
"learning_rate": 4.758535202738287e-06, |
|
"logits": -1.8775906562805176, |
|
"logps": -87.8878173828125, |
|
"loss": 0.4641, |
|
"objective": 0.4848935306072235, |
|
"ranking_idealized": 0.5874999761581421, |
|
"ranking_idealized_expo": 0.5291666388511658, |
|
"ranking_simple": 0.6625000238418579, |
|
"regularize": 0.4848935306072235, |
|
"step": 120 |
|
}, |
|
{ |
|
"dpo_loss": 0.4785127639770508, |
|
"dpo_wo_beta": -1.814666748046875, |
|
"epoch": 0.7085498346717053, |
|
"grad_norm": 12.105170057238437, |
|
"learning_rate": 4.721858981942284e-06, |
|
"logits": -1.8346068859100342, |
|
"logps": -86.40522766113281, |
|
"loss": 0.4801, |
|
"objective": 0.4785127639770508, |
|
"ranking_idealized": 0.625, |
|
"ranking_idealized_expo": 0.550000011920929, |
|
"ranking_simple": 0.6875, |
|
"regularize": 0.4785127639770508, |
|
"step": 125 |
|
}, |
|
{ |
|
"dpo_loss": 0.4548089802265167, |
|
"dpo_wo_beta": -1.4164987802505493, |
|
"epoch": 0.7368918280585735, |
|
"grad_norm": 11.895980627109102, |
|
"learning_rate": 4.682753190970533e-06, |
|
"logits": -1.9488608837127686, |
|
"logps": -79.42195129394531, |
|
"loss": 0.4538, |
|
"objective": 0.4548089802265167, |
|
"ranking_idealized": 0.5416666865348816, |
|
"ranking_idealized_expo": 0.44999998807907104, |
|
"ranking_simple": 0.6291666626930237, |
|
"regularize": 0.4548089802265167, |
|
"step": 130 |
|
}, |
|
{ |
|
"dpo_loss": 0.49760884046554565, |
|
"dpo_wo_beta": -1.994195818901062, |
|
"epoch": 0.7652338214454416, |
|
"grad_norm": 12.298776298341995, |
|
"learning_rate": 4.641260591431315e-06, |
|
"logits": -1.9813282489776611, |
|
"logps": -82.40634155273438, |
|
"loss": 0.4433, |
|
"objective": 0.49760884046554565, |
|
"ranking_idealized": 0.5874999761581421, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.6583333611488342, |
|
"regularize": 0.49760884046554565, |
|
"step": 135 |
|
}, |
|
{ |
|
"dpo_loss": 0.41459351778030396, |
|
"dpo_wo_beta": -1.187635064125061, |
|
"epoch": 0.7935758148323099, |
|
"grad_norm": 12.618720178096575, |
|
"learning_rate": 4.597426554873037e-06, |
|
"logits": -1.97609281539917, |
|
"logps": -83.44467163085938, |
|
"loss": 0.4236, |
|
"objective": 0.41459351778030396, |
|
"ranking_idealized": 0.5791666507720947, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.6791666746139526, |
|
"regularize": 0.41459351778030396, |
|
"step": 140 |
|
}, |
|
{ |
|
"dpo_loss": 0.4073801636695862, |
|
"dpo_wo_beta": -1.311059832572937, |
|
"epoch": 0.821917808219178, |
|
"grad_norm": 14.417917904409194, |
|
"learning_rate": 4.551299013171111e-06, |
|
"logits": -2.0718839168548584, |
|
"logps": -84.2674560546875, |
|
"loss": 0.4215, |
|
"objective": 0.4073801636695862, |
|
"ranking_idealized": 0.5791666507720947, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.6916666626930237, |
|
"regularize": 0.4073801636695862, |
|
"step": 145 |
|
}, |
|
{ |
|
"dpo_loss": 0.4207518398761749, |
|
"dpo_wo_beta": -1.50857675075531, |
|
"epoch": 0.8502598016060463, |
|
"grad_norm": 11.543599868064442, |
|
"learning_rate": 4.502928406115152e-06, |
|
"logits": -2.0730583667755127, |
|
"logps": -82.68958282470703, |
|
"loss": 0.4276, |
|
"objective": 0.4207518398761749, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5416666865348816, |
|
"ranking_simple": 0.7208333611488342, |
|
"regularize": 0.4207518398761749, |
|
"step": 150 |
|
}, |
|
{ |
|
"dpo_loss": 0.3847941756248474, |
|
"dpo_wo_beta": -1.4449684619903564, |
|
"epoch": 0.8786017949929145, |
|
"grad_norm": 12.08771803065001, |
|
"learning_rate": 4.452367626253805e-06, |
|
"logits": -2.0991933345794678, |
|
"logps": -85.211181640625, |
|
"loss": 0.3986, |
|
"objective": 0.3847941756248474, |
|
"ranking_idealized": 0.612500011920929, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.7250000238418579, |
|
"regularize": 0.3847941756248474, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.9012753897024091, |
|
"eval_dpo_loss": 0.7214789390563965, |
|
"eval_dpo_wo_beta": -3.1229145526885986, |
|
"eval_logits": -2.1450352668762207, |
|
"eval_logps": -95.60012817382812, |
|
"eval_loss": 0.7013870477676392, |
|
"eval_objective": 0.7214789390563965, |
|
"eval_ranking_idealized": 0.5888429880142212, |
|
"eval_ranking_idealized_expo": 0.5092975497245789, |
|
"eval_ranking_simple": 0.5351239442825317, |
|
"eval_regularize": 0.7214789390563965, |
|
"eval_runtime": 210.3593, |
|
"eval_samples_per_second": 27.524, |
|
"eval_steps_per_second": 1.15, |
|
"step": 159 |
|
}, |
|
{ |
|
"dpo_loss": 0.4162478744983673, |
|
"dpo_wo_beta": -1.6461573839187622, |
|
"epoch": 0.9069437883797827, |
|
"grad_norm": 12.82345397067452, |
|
"learning_rate": 4.399671961057523e-06, |
|
"logits": -2.0759384632110596, |
|
"logps": -89.25846862792969, |
|
"loss": 0.4236, |
|
"objective": 0.4162478744983673, |
|
"ranking_idealized": 0.5708333253860474, |
|
"ranking_idealized_expo": 0.49166667461395264, |
|
"ranking_simple": 0.699999988079071, |
|
"regularize": 0.4162478744983673, |
|
"step": 160 |
|
}, |
|
{ |
|
"dpo_loss": 0.41358453035354614, |
|
"dpo_wo_beta": -1.648630976676941, |
|
"epoch": 0.9352857817666509, |
|
"grad_norm": 12.860537676624453, |
|
"learning_rate": 4.3448990324625244e-06, |
|
"logits": -2.024477481842041, |
|
"logps": -88.03329467773438, |
|
"loss": 0.4026, |
|
"objective": 0.41358453035354614, |
|
"ranking_idealized": 0.6041666865348816, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.7333333492279053, |
|
"regularize": 0.41358453035354614, |
|
"step": 165 |
|
}, |
|
{ |
|
"dpo_loss": 0.378000408411026, |
|
"dpo_wo_beta": -1.2966532707214355, |
|
"epoch": 0.9636277751535192, |
|
"grad_norm": 11.533711130228069, |
|
"learning_rate": 4.288108733862064e-06, |
|
"logits": -2.042527437210083, |
|
"logps": -90.26854705810547, |
|
"loss": 0.3925, |
|
"objective": 0.378000408411026, |
|
"ranking_idealized": 0.5791666507720947, |
|
"ranking_idealized_expo": 0.512499988079071, |
|
"ranking_simple": 0.7166666388511658, |
|
"regularize": 0.378000408411026, |
|
"step": 170 |
|
}, |
|
{ |
|
"dpo_loss": 0.3764660954475403, |
|
"dpo_wo_beta": -1.3978971242904663, |
|
"epoch": 0.9919697685403873, |
|
"grad_norm": 12.165192869157089, |
|
"learning_rate": 4.229363164613874e-06, |
|
"logits": -2.0610477924346924, |
|
"logps": -89.8354721069336, |
|
"loss": 0.3793, |
|
"objective": 0.3764660954475403, |
|
"ranking_idealized": 0.6541666388511658, |
|
"ranking_idealized_expo": 0.574999988079071, |
|
"ranking_simple": 0.7916666865348816, |
|
"regularize": 0.3764660954475403, |
|
"step": 175 |
|
}, |
|
{ |
|
"dpo_loss": 0.27626773715019226, |
|
"dpo_wo_beta": -0.8504549860954285, |
|
"epoch": 1.0203117619272555, |
|
"grad_norm": 10.141692447282386, |
|
"learning_rate": 4.168726562135432e-06, |
|
"logits": -2.2514243125915527, |
|
"logps": -90.8476333618164, |
|
"loss": 0.2852, |
|
"objective": 0.27626773715019226, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5416666865348816, |
|
"ranking_simple": 0.8083333373069763, |
|
"regularize": 0.27626773715019226, |
|
"step": 180 |
|
}, |
|
{ |
|
"dpo_loss": 0.23696589469909668, |
|
"dpo_wo_beta": -0.6947117447853088, |
|
"epoch": 1.0486537553141237, |
|
"grad_norm": 13.78702272812957, |
|
"learning_rate": 4.106265231661292e-06, |
|
"logits": -2.158977746963501, |
|
"logps": -95.00120544433594, |
|
"loss": 0.2429, |
|
"objective": 0.23696589469909668, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.49166667461395264, |
|
"ranking_simple": 0.8083333373069763, |
|
"regularize": 0.23696589469909668, |
|
"step": 185 |
|
}, |
|
{ |
|
"dpo_loss": 0.26388806104660034, |
|
"dpo_wo_beta": -0.9112051725387573, |
|
"epoch": 1.076995748700992, |
|
"grad_norm": 14.740228375586371, |
|
"learning_rate": 4.042047473739278e-06, |
|
"logits": -2.1533920764923096, |
|
"logps": -101.71949768066406, |
|
"loss": 0.2517, |
|
"objective": 0.26388806104660034, |
|
"ranking_idealized": 0.625, |
|
"ranking_idealized_expo": 0.5541666746139526, |
|
"ranking_simple": 0.8416666388511658, |
|
"regularize": 0.26388806104660034, |
|
"step": 190 |
|
}, |
|
{ |
|
"dpo_loss": 0.2244579941034317, |
|
"dpo_wo_beta": -0.6430780291557312, |
|
"epoch": 1.10533774208786, |
|
"grad_norm": 10.169064121599527, |
|
"learning_rate": 3.976143509544843e-06, |
|
"logits": -2.1589295864105225, |
|
"logps": -96.5248031616211, |
|
"loss": 0.2467, |
|
"objective": 0.2244579941034317, |
|
"ranking_idealized": 0.6333333253860474, |
|
"ranking_idealized_expo": 0.5666666626930237, |
|
"ranking_simple": 0.8083333373069763, |
|
"regularize": 0.2244579941034317, |
|
"step": 195 |
|
}, |
|
{ |
|
"dpo_loss": 0.24179764091968536, |
|
"dpo_wo_beta": -0.6332272291183472, |
|
"epoch": 1.1336797354747283, |
|
"grad_norm": 9.444774343787891, |
|
"learning_rate": 3.908625404095242e-06, |
|
"logits": -2.2753493785858154, |
|
"logps": -91.93312072753906, |
|
"loss": 0.2563, |
|
"objective": 0.24179764091968536, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.8458333611488342, |
|
"regularize": 0.24179764091968536, |
|
"step": 200 |
|
}, |
|
{ |
|
"dpo_loss": 0.25683078169822693, |
|
"dpo_wo_beta": -0.8531176447868347, |
|
"epoch": 1.1620217288615966, |
|
"grad_norm": 9.240319326762517, |
|
"learning_rate": 3.839566987447492e-06, |
|
"logits": -2.2432618141174316, |
|
"logps": -91.3159408569336, |
|
"loss": 0.2584, |
|
"objective": 0.25683078169822693, |
|
"ranking_idealized": 0.5708333253860474, |
|
"ranking_idealized_expo": 0.49166667461395264, |
|
"ranking_simple": 0.8166666626930237, |
|
"regularize": 0.25683078169822693, |
|
"step": 205 |
|
}, |
|
{ |
|
"dpo_loss": 0.24292893707752228, |
|
"dpo_wo_beta": -0.8205318450927734, |
|
"epoch": 1.1903637222484649, |
|
"grad_norm": 9.283856100785183, |
|
"learning_rate": 3.7690437739662928e-06, |
|
"logits": -2.2361652851104736, |
|
"logps": -90.6613998413086, |
|
"loss": 0.2551, |
|
"objective": 0.24292893707752228, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.800000011920929, |
|
"regularize": 0.24292893707752228, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.201700519603212, |
|
"eval_dpo_loss": 0.7525234222412109, |
|
"eval_dpo_wo_beta": -3.7749528884887695, |
|
"eval_logits": -2.267778158187866, |
|
"eval_logps": -98.14269256591797, |
|
"eval_loss": 0.7350714206695557, |
|
"eval_objective": 0.7525234222412109, |
|
"eval_ranking_idealized": 0.5888429880142212, |
|
"eval_ranking_idealized_expo": 0.5092975497245789, |
|
"eval_ranking_simple": 0.5371900796890259, |
|
"eval_regularize": 0.7525234222412109, |
|
"eval_runtime": 210.8898, |
|
"eval_samples_per_second": 27.455, |
|
"eval_steps_per_second": 1.148, |
|
"step": 212 |
|
}, |
|
{ |
|
"dpo_loss": 0.289533793926239, |
|
"dpo_wo_beta": -0.8810125589370728, |
|
"epoch": 1.2187057156353331, |
|
"grad_norm": 10.72372972136692, |
|
"learning_rate": 3.697132879750174e-06, |
|
"logits": -2.1757090091705322, |
|
"logps": -93.64250183105469, |
|
"loss": 0.2578, |
|
"objective": 0.289533793926239, |
|
"ranking_idealized": 0.5583333373069763, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.7875000238418579, |
|
"regularize": 0.289533793926239, |
|
"step": 215 |
|
}, |
|
{ |
|
"dpo_loss": 0.25134381651878357, |
|
"dpo_wo_beta": -0.8703542947769165, |
|
"epoch": 1.2470477090222012, |
|
"grad_norm": 12.940604838816247, |
|
"learning_rate": 3.6239129383061764e-06, |
|
"logits": -2.121750593185425, |
|
"logps": -94.44015502929688, |
|
"loss": 0.2676, |
|
"objective": 0.25134381651878357, |
|
"ranking_idealized": 0.6541666388511658, |
|
"ranking_idealized_expo": 0.6041666865348816, |
|
"ranking_simple": 0.8208333253860474, |
|
"regularize": 0.25134381651878357, |
|
"step": 220 |
|
}, |
|
{ |
|
"dpo_loss": 0.23937886953353882, |
|
"dpo_wo_beta": -0.7396827936172485, |
|
"epoch": 1.2753897024090695, |
|
"grad_norm": 9.645711793319885, |
|
"learning_rate": 3.5494640145652647e-06, |
|
"logits": -2.0901684761047363, |
|
"logps": -94.10260772705078, |
|
"loss": 0.2637, |
|
"objective": 0.23937886953353882, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.8541666865348816, |
|
"regularize": 0.23937886953353882, |
|
"step": 225 |
|
}, |
|
{ |
|
"dpo_loss": 0.2818019688129425, |
|
"dpo_wo_beta": -1.1170729398727417, |
|
"epoch": 1.3037316957959377, |
|
"grad_norm": 8.80210598601974, |
|
"learning_rate": 3.4738675173325008e-06, |
|
"logits": -1.9860222339630127, |
|
"logps": -92.9978256225586, |
|
"loss": 0.2776, |
|
"objective": 0.2818019688129425, |
|
"ranking_idealized": 0.5791666507720947, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.7749999761581421, |
|
"regularize": 0.2818019688129425, |
|
"step": 230 |
|
}, |
|
{ |
|
"dpo_loss": 0.22621506452560425, |
|
"dpo_wo_beta": -0.35843732953071594, |
|
"epoch": 1.3320736891828058, |
|
"grad_norm": 9.267612473930496, |
|
"learning_rate": 3.397206110267713e-06, |
|
"logits": -2.1131467819213867, |
|
"logps": -87.49403381347656, |
|
"loss": 0.2618, |
|
"objective": 0.22621506452560425, |
|
"ranking_idealized": 0.6041666865348816, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.8333333134651184, |
|
"regularize": 0.22621506452560425, |
|
"step": 235 |
|
}, |
|
{ |
|
"dpo_loss": 0.23632274568080902, |
|
"dpo_wo_beta": -0.6697984933853149, |
|
"epoch": 1.360415682569674, |
|
"grad_norm": 10.68594080832048, |
|
"learning_rate": 3.3195636214939943e-06, |
|
"logits": -2.130047559738159, |
|
"logps": -91.7619857788086, |
|
"loss": 0.2584, |
|
"objective": 0.23632274568080902, |
|
"ranking_idealized": 0.5541666746139526, |
|
"ranking_idealized_expo": 0.4791666567325592, |
|
"ranking_simple": 0.8125, |
|
"regularize": 0.23632274568080902, |
|
"step": 240 |
|
}, |
|
{ |
|
"dpo_loss": 0.2982023358345032, |
|
"dpo_wo_beta": -1.1124054193496704, |
|
"epoch": 1.3887576759565423, |
|
"grad_norm": 10.330360151122868, |
|
"learning_rate": 3.2410249519328848e-06, |
|
"logits": -2.1718757152557373, |
|
"logps": -93.45353698730469, |
|
"loss": 0.2692, |
|
"objective": 0.2982023358345032, |
|
"ranking_idealized": 0.5916666388511658, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.8041666746139526, |
|
"regularize": 0.2982023358345032, |
|
"step": 245 |
|
}, |
|
{ |
|
"dpo_loss": 0.2403133064508438, |
|
"dpo_wo_beta": -0.7000442147254944, |
|
"epoch": 1.4170996693434104, |
|
"grad_norm": 13.026578288520353, |
|
"learning_rate": 3.1616759824664543e-06, |
|
"logits": -2.145325183868408, |
|
"logps": -94.18195343017578, |
|
"loss": 0.269, |
|
"objective": 0.2403133064508438, |
|
"ranking_idealized": 0.574999988079071, |
|
"ranking_idealized_expo": 0.47083333134651184, |
|
"ranking_simple": 0.824999988079071, |
|
"regularize": 0.2403133064508438, |
|
"step": 250 |
|
}, |
|
{ |
|
"dpo_loss": 0.23977436125278473, |
|
"dpo_wo_beta": -0.5784927010536194, |
|
"epoch": 1.4454416627302786, |
|
"grad_norm": 10.959901566104394, |
|
"learning_rate": 3.081603480027826e-06, |
|
"logits": -2.108074426651001, |
|
"logps": -94.5383529663086, |
|
"loss": 0.2625, |
|
"objective": 0.23977436125278473, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5458333492279053, |
|
"ranking_simple": 0.8374999761581421, |
|
"regularize": 0.23977436125278473, |
|
"step": 255 |
|
}, |
|
{ |
|
"dpo_loss": 0.25297579169273376, |
|
"dpo_wo_beta": -0.7996426820755005, |
|
"epoch": 1.473783656117147, |
|
"grad_norm": 9.578050078679867, |
|
"learning_rate": 3.0008950027228035e-06, |
|
"logits": -2.1828908920288086, |
|
"logps": -92.77781677246094, |
|
"loss": 0.232, |
|
"objective": 0.25297579169273376, |
|
"ranking_idealized": 0.5666666626930237, |
|
"ranking_idealized_expo": 0.48750001192092896, |
|
"ranking_simple": 0.8041666746139526, |
|
"regularize": 0.25297579169273376, |
|
"step": 260 |
|
}, |
|
{ |
|
"dpo_loss": 0.2830916941165924, |
|
"dpo_wo_beta": -1.124144434928894, |
|
"epoch": 1.5021256495040152, |
|
"grad_norm": 11.27765707111355, |
|
"learning_rate": 2.9196388040863695e-06, |
|
"logits": -2.1150081157684326, |
|
"logps": -95.04662322998047, |
|
"loss": 0.2623, |
|
"objective": 0.2830916941165924, |
|
"ranking_idealized": 0.5791666507720947, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.8041666746139526, |
|
"regularize": 0.2830916941165924, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 1.5021256495040152, |
|
"eval_dpo_loss": 0.7739136815071106, |
|
"eval_dpo_wo_beta": -4.163427829742432, |
|
"eval_logits": -2.1478331089019775, |
|
"eval_logps": -100.8313217163086, |
|
"eval_loss": 0.7400166392326355, |
|
"eval_objective": 0.7739136815071106, |
|
"eval_ranking_idealized": 0.5888429880142212, |
|
"eval_ranking_idealized_expo": 0.5092975497245789, |
|
"eval_ranking_simple": 0.53925621509552, |
|
"eval_regularize": 0.7739136815071106, |
|
"eval_runtime": 210.8657, |
|
"eval_samples_per_second": 27.458, |
|
"eval_steps_per_second": 1.148, |
|
"step": 265 |
|
}, |
|
{ |
|
"dpo_loss": 0.2930367887020111, |
|
"dpo_wo_beta": -1.3651045560836792, |
|
"epoch": 1.5304676428908834, |
|
"grad_norm": 11.715215816813723, |
|
"learning_rate": 2.8379237365787426e-06, |
|
"logits": -2.035703182220459, |
|
"logps": -97.7331771850586, |
|
"loss": 0.253, |
|
"objective": 0.2930367887020111, |
|
"ranking_idealized": 0.574999988079071, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.7916666865348816, |
|
"regularize": 0.2930367887020111, |
|
"step": 270 |
|
}, |
|
{ |
|
"dpo_loss": 0.24886849522590637, |
|
"dpo_wo_beta": -0.8069366216659546, |
|
"epoch": 1.5588096362777515, |
|
"grad_norm": 8.958944325794365, |
|
"learning_rate": 2.7558391544265127e-06, |
|
"logits": -1.9700883626937866, |
|
"logps": -97.53855895996094, |
|
"loss": 0.2491, |
|
"objective": 0.24886849522590637, |
|
"ranking_idealized": 0.6208333373069763, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.8125, |
|
"regularize": 0.24886849522590637, |
|
"step": 275 |
|
}, |
|
{ |
|
"dpo_loss": 0.22936613857746124, |
|
"dpo_wo_beta": -0.6120084524154663, |
|
"epoch": 1.5871516296646198, |
|
"grad_norm": 10.814739938498821, |
|
"learning_rate": 2.6734748159151104e-06, |
|
"logits": -1.9118597507476807, |
|
"logps": -98.06639099121094, |
|
"loss": 0.2491, |
|
"objective": 0.22936613857746124, |
|
"ranking_idealized": 0.574999988079071, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.8374999761581421, |
|
"regularize": 0.22936613857746124, |
|
"step": 280 |
|
}, |
|
{ |
|
"dpo_loss": 0.22401383519172668, |
|
"dpo_wo_beta": -0.5180224776268005, |
|
"epoch": 1.615493623051488, |
|
"grad_norm": 11.270657822712987, |
|
"learning_rate": 2.5909207852394363e-06, |
|
"logits": -1.9585484266281128, |
|
"logps": -100.70836639404297, |
|
"loss": 0.2348, |
|
"objective": 0.22401383519172668, |
|
"ranking_idealized": 0.5874999761581421, |
|
"ranking_idealized_expo": 0.512499988079071, |
|
"ranking_simple": 0.8291666507720947, |
|
"regularize": 0.22401383519172668, |
|
"step": 285 |
|
}, |
|
{ |
|
"dpo_loss": 0.2646006941795349, |
|
"dpo_wo_beta": -0.7763135433197021, |
|
"epoch": 1.643835616438356, |
|
"grad_norm": 10.585292794409252, |
|
"learning_rate": 2.508267334019988e-06, |
|
"logits": -1.9566444158554077, |
|
"logps": -97.0122299194336, |
|
"loss": 0.2532, |
|
"objective": 0.2646006941795349, |
|
"ranking_idealized": 0.612500011920929, |
|
"ranking_idealized_expo": 0.5625, |
|
"ranking_simple": 0.8041666746139526, |
|
"regularize": 0.2646006941795349, |
|
"step": 290 |
|
}, |
|
{ |
|
"dpo_loss": 0.23113909363746643, |
|
"dpo_wo_beta": -0.6497251987457275, |
|
"epoch": 1.6721776098252243, |
|
"grad_norm": 11.90240881956814, |
|
"learning_rate": 2.4256048425921693e-06, |
|
"logits": -1.8574607372283936, |
|
"logps": -94.91531372070312, |
|
"loss": 0.2476, |
|
"objective": 0.23113909363746643, |
|
"ranking_idealized": 0.5791666507720947, |
|
"ranking_idealized_expo": 0.4749999940395355, |
|
"ranking_simple": 0.8333333134651184, |
|
"regularize": 0.23113909363746643, |
|
"step": 295 |
|
}, |
|
{ |
|
"dpo_loss": 0.22116926312446594, |
|
"dpo_wo_beta": -0.6268281936645508, |
|
"epoch": 1.7005196032120926, |
|
"grad_norm": 11.745161783871675, |
|
"learning_rate": 2.3430237011767166e-06, |
|
"logits": -1.895004153251648, |
|
"logps": -97.79885864257812, |
|
"loss": 0.2266, |
|
"objective": 0.22116926312446594, |
|
"ranking_idealized": 0.6583333611488342, |
|
"ranking_idealized_expo": 0.574999988079071, |
|
"ranking_simple": 0.8333333134651184, |
|
"regularize": 0.22116926312446594, |
|
"step": 300 |
|
}, |
|
{ |
|
"dpo_loss": 0.24756571650505066, |
|
"dpo_wo_beta": -0.9131773114204407, |
|
"epoch": 1.7288615965989607, |
|
"grad_norm": 12.299641904512029, |
|
"learning_rate": 2.2606142110393248e-06, |
|
"logits": -1.8061485290527344, |
|
"logps": -96.69060516357422, |
|
"loss": 0.2379, |
|
"objective": 0.24756571650505066, |
|
"ranking_idealized": 0.5625, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.8291666507720947, |
|
"regularize": 0.24756571650505066, |
|
"step": 305 |
|
}, |
|
{ |
|
"dpo_loss": 0.2321903556585312, |
|
"dpo_wo_beta": -0.6867564916610718, |
|
"epoch": 1.7572035899858292, |
|
"grad_norm": 13.489735935272718, |
|
"learning_rate": 2.1784664857475356e-06, |
|
"logits": -1.8388514518737793, |
|
"logps": -95.04447937011719, |
|
"loss": 0.2456, |
|
"objective": 0.2321903556585312, |
|
"ranking_idealized": 0.6083333492279053, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.824999988079071, |
|
"regularize": 0.2321903556585312, |
|
"step": 310 |
|
}, |
|
{ |
|
"dpo_loss": 0.2901044189929962, |
|
"dpo_wo_beta": -1.1286156177520752, |
|
"epoch": 1.7855455833726972, |
|
"grad_norm": 10.887596324980125, |
|
"learning_rate": 2.096670352632873e-06, |
|
"logits": -1.75984525680542, |
|
"logps": -94.63612365722656, |
|
"loss": 0.2571, |
|
"objective": 0.2901044189929962, |
|
"ranking_idealized": 0.6041666865348816, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.8083333373069763, |
|
"regularize": 0.2901044189929962, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 1.8025507794048181, |
|
"eval_dpo_loss": 0.7664583325386047, |
|
"eval_dpo_wo_beta": -4.09501838684082, |
|
"eval_logits": -1.9888346195220947, |
|
"eval_logps": -102.3712158203125, |
|
"eval_loss": 0.7400712966918945, |
|
"eval_objective": 0.7664583325386047, |
|
"eval_ranking_idealized": 0.5888429880142212, |
|
"eval_ranking_idealized_expo": 0.5092975497245789, |
|
"eval_ranking_simple": 0.53925621509552, |
|
"eval_regularize": 0.7664583325386047, |
|
"eval_runtime": 210.274, |
|
"eval_samples_per_second": 27.535, |
|
"eval_steps_per_second": 1.151, |
|
"step": 318 |
|
}, |
|
{ |
|
"dpo_loss": 0.2219768464565277, |
|
"dpo_wo_beta": -0.47742757201194763, |
|
"epoch": 1.8138875767595655, |
|
"grad_norm": 11.029480506309918, |
|
"learning_rate": 2.01531525456598e-06, |
|
"logits": -1.9175788164138794, |
|
"logps": -99.74655151367188, |
|
"loss": 0.2404, |
|
"objective": 0.2219768464565277, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.824999988079071, |
|
"regularize": 0.2219768464565277, |
|
"step": 320 |
|
}, |
|
{ |
|
"dpo_loss": 0.24908211827278137, |
|
"dpo_wo_beta": -0.8014059066772461, |
|
"epoch": 1.8422295701464337, |
|
"grad_norm": 12.92850322071669, |
|
"learning_rate": 1.93449015215215e-06, |
|
"logits": -2.0084919929504395, |
|
"logps": -101.09780883789062, |
|
"loss": 0.2586, |
|
"objective": 0.24908211827278137, |
|
"ranking_idealized": 0.6333333253860474, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.8333333134651184, |
|
"regularize": 0.24908211827278137, |
|
"step": 325 |
|
}, |
|
{ |
|
"dpo_loss": 0.1984507441520691, |
|
"dpo_wo_beta": -0.3766098618507385, |
|
"epoch": 1.8705715635333018, |
|
"grad_norm": 10.415606016359964, |
|
"learning_rate": 1.8542834264542091e-06, |
|
"logits": -1.851909875869751, |
|
"logps": -94.5366439819336, |
|
"loss": 0.2496, |
|
"objective": 0.1984507441520691, |
|
"ranking_idealized": 0.574999988079071, |
|
"ranking_idealized_expo": 0.4791666567325592, |
|
"ranking_simple": 0.8291666507720947, |
|
"regularize": 0.1984507441520691, |
|
"step": 330 |
|
}, |
|
{ |
|
"dpo_loss": 0.26707762479782104, |
|
"dpo_wo_beta": -0.9339324831962585, |
|
"epoch": 1.89891355692017, |
|
"grad_norm": 10.078352873471246, |
|
"learning_rate": 1.7747827823491253e-06, |
|
"logits": -1.9827288389205933, |
|
"logps": -94.26249694824219, |
|
"loss": 0.2463, |
|
"objective": 0.26707762479782104, |
|
"ranking_idealized": 0.5958333611488342, |
|
"ranking_idealized_expo": 0.4958333373069763, |
|
"ranking_simple": 0.8125, |
|
"regularize": 0.26707762479782104, |
|
"step": 335 |
|
}, |
|
{ |
|
"dpo_loss": 0.2447831928730011, |
|
"dpo_wo_beta": -0.7387041449546814, |
|
"epoch": 1.9272555503070383, |
|
"grad_norm": 10.88136655004607, |
|
"learning_rate": 1.6960751526240122e-06, |
|
"logits": -1.9671465158462524, |
|
"logps": -98.63937377929688, |
|
"loss": 0.2399, |
|
"objective": 0.2447831928730011, |
|
"ranking_idealized": 0.6416666507720947, |
|
"ranking_idealized_expo": 0.5416666865348816, |
|
"ranking_simple": 0.8583333492279053, |
|
"regularize": 0.2447831928730011, |
|
"step": 340 |
|
}, |
|
{ |
|
"dpo_loss": 0.2123527079820633, |
|
"dpo_wo_beta": -0.5544185638427734, |
|
"epoch": 1.9555975436939064, |
|
"grad_norm": 11.18260747105762, |
|
"learning_rate": 1.6182466029163974e-06, |
|
"logits": -1.9572845697402954, |
|
"logps": -100.18721008300781, |
|
"loss": 0.2211, |
|
"objective": 0.2123527079820633, |
|
"ranking_idealized": 0.574999988079071, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.8208333253860474, |
|
"regularize": 0.2123527079820633, |
|
"step": 345 |
|
}, |
|
{ |
|
"dpo_loss": 0.2570362389087677, |
|
"dpo_wo_beta": -0.7474013566970825, |
|
"epoch": 1.9839395370807746, |
|
"grad_norm": 11.061918116138507, |
|
"learning_rate": 1.541382237602721e-06, |
|
"logits": -1.8960832357406616, |
|
"logps": -101.65901947021484, |
|
"loss": 0.2316, |
|
"objective": 0.2570362389087677, |
|
"ranking_idealized": 0.5791666507720947, |
|
"ranking_idealized_expo": 0.48750001192092896, |
|
"ranking_simple": 0.7916666865348816, |
|
"regularize": 0.2570362389087677, |
|
"step": 350 |
|
}, |
|
{ |
|
"dpo_loss": 0.19961656630039215, |
|
"dpo_wo_beta": -0.5642960667610168, |
|
"epoch": 2.012281530467643, |
|
"grad_norm": 7.569515164252156, |
|
"learning_rate": 1.465566106737942e-06, |
|
"logits": -1.8380100727081299, |
|
"logps": -102.71571350097656, |
|
"loss": 0.2103, |
|
"objective": 0.19961656630039215, |
|
"ranking_idealized": 0.5958333611488342, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.8374999761581421, |
|
"regularize": 0.19961656630039215, |
|
"step": 355 |
|
}, |
|
{ |
|
"dpo_loss": 0.11018560826778412, |
|
"dpo_wo_beta": -0.12253165245056152, |
|
"epoch": 2.040623523854511, |
|
"grad_norm": 6.632276986432463, |
|
"learning_rate": 1.3908811141480408e-06, |
|
"logits": -1.867693543434143, |
|
"logps": -103.06665802001953, |
|
"loss": 0.118, |
|
"objective": 0.11018560826778412, |
|
"ranking_idealized": 0.6625000238418579, |
|
"ranking_idealized_expo": 0.5375000238418579, |
|
"ranking_simple": 0.9041666388511658, |
|
"regularize": 0.11018560826778412, |
|
"step": 360 |
|
}, |
|
{ |
|
"dpo_loss": 0.12077057361602783, |
|
"dpo_wo_beta": -0.197490856051445, |
|
"epoch": 2.0689655172413794, |
|
"grad_norm": 10.213186193965676, |
|
"learning_rate": 1.3174089267758983e-06, |
|
"logits": -1.8255099058151245, |
|
"logps": -110.3724136352539, |
|
"loss": 0.118, |
|
"objective": 0.12077057361602783, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.8666666746139526, |
|
"regularize": 0.12077057361602783, |
|
"step": 365 |
|
}, |
|
{ |
|
"dpo_loss": 0.1337815225124359, |
|
"dpo_wo_beta": -0.27523547410964966, |
|
"epoch": 2.0973075106282475, |
|
"grad_norm": 9.926730675582434, |
|
"learning_rate": 1.245229885379699e-06, |
|
"logits": -1.7588540315628052, |
|
"logps": -111.99506378173828, |
|
"loss": 0.1227, |
|
"objective": 0.1337815225124359, |
|
"ranking_idealized": 0.5708333253860474, |
|
"ranking_idealized_expo": 0.4791666567325592, |
|
"ranking_simple": 0.8916666507720947, |
|
"regularize": 0.1337815225124359, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 2.1029759093056213, |
|
"eval_dpo_loss": 0.9223728179931641, |
|
"eval_dpo_wo_beta": -6.4510064125061035, |
|
"eval_logits": -1.8644566535949707, |
|
"eval_logps": -122.00161743164062, |
|
"eval_loss": 0.8844180107116699, |
|
"eval_objective": 0.9223728179931641, |
|
"eval_ranking_idealized": 0.5888429880142212, |
|
"eval_ranking_idealized_expo": 0.5092975497245789, |
|
"eval_ranking_simple": 0.5423553586006165, |
|
"eval_regularize": 0.9223728179931641, |
|
"eval_runtime": 210.7356, |
|
"eval_samples_per_second": 27.475, |
|
"eval_steps_per_second": 1.148, |
|
"step": 371 |
|
}, |
|
{ |
|
"dpo_loss": 0.10664375871419907, |
|
"dpo_wo_beta": -0.2532973289489746, |
|
"epoch": 2.1256495040151155, |
|
"grad_norm": 9.740007111179482, |
|
"learning_rate": 1.1744229166814889e-06, |
|
"logits": -1.696647047996521, |
|
"logps": -118.39366149902344, |
|
"loss": 0.1103, |
|
"objective": 0.10664375871419907, |
|
"ranking_idealized": 0.675000011920929, |
|
"ranking_idealized_expo": 0.5916666388511658, |
|
"ranking_simple": 0.925000011920929, |
|
"regularize": 0.10664375871419907, |
|
"step": 375 |
|
}, |
|
{ |
|
"dpo_loss": 0.12854978442192078, |
|
"dpo_wo_beta": -0.27664583921432495, |
|
"epoch": 2.153991497401984, |
|
"grad_norm": 9.699256456859702, |
|
"learning_rate": 1.1050654470619602e-06, |
|
"logits": -1.700494647026062, |
|
"logps": -114.1063232421875, |
|
"loss": 0.1208, |
|
"objective": 0.12854978442192078, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.8999999761581421, |
|
"regularize": 0.12854978442192078, |
|
"step": 380 |
|
}, |
|
{ |
|
"dpo_loss": 0.10418140888214111, |
|
"dpo_wo_beta": -0.09889766573905945, |
|
"epoch": 2.182333490788852, |
|
"grad_norm": 9.620361843085416, |
|
"learning_rate": 1.0372333178958462e-06, |
|
"logits": -1.8633235692977905, |
|
"logps": -110.55794525146484, |
|
"loss": 0.1244, |
|
"objective": 0.10418140888214111, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.9125000238418579, |
|
"regularize": 0.10418140888214111, |
|
"step": 385 |
|
}, |
|
{ |
|
"dpo_loss": 0.12462247163057327, |
|
"dpo_wo_beta": -0.2658768594264984, |
|
"epoch": 2.21067548417572, |
|
"grad_norm": 11.000881222201947, |
|
"learning_rate": 9.710007026204896e-07, |
|
"logits": -1.7877620458602905, |
|
"logps": -112.08268737792969, |
|
"loss": 0.1204, |
|
"objective": 0.12462247163057327, |
|
"ranking_idealized": 0.5249999761581421, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.8833333253860474, |
|
"regularize": 0.12462247163057327, |
|
"step": 390 |
|
}, |
|
{ |
|
"dpo_loss": 0.11183874309062958, |
|
"dpo_wo_beta": -0.3540593981742859, |
|
"epoch": 2.2390174775625886, |
|
"grad_norm": 8.717110295390793, |
|
"learning_rate": 9.064400256282757e-07, |
|
"logits": -1.8010636568069458, |
|
"logps": -110.48490142822266, |
|
"loss": 0.1248, |
|
"objective": 0.11183874309062958, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.9041666388511658, |
|
"regularize": 0.11183874309062958, |
|
"step": 395 |
|
}, |
|
{ |
|
"dpo_loss": 0.12893003225326538, |
|
"dpo_wo_beta": -0.3680768311023712, |
|
"epoch": 2.2673594709494567, |
|
"grad_norm": 9.562073048936949, |
|
"learning_rate": 8.436218830716259e-07, |
|
"logits": -1.8909595012664795, |
|
"logps": -111.70219421386719, |
|
"loss": 0.1193, |
|
"objective": 0.12893003225326538, |
|
"ranking_idealized": 0.6625000238418579, |
|
"ranking_idealized_expo": 0.5708333253860474, |
|
"ranking_simple": 0.9166666865348816, |
|
"regularize": 0.12893003225326538, |
|
"step": 400 |
|
}, |
|
{ |
|
"dpo_loss": 0.13196416199207306, |
|
"dpo_wo_beta": -0.17852090299129486, |
|
"epoch": 2.295701464336325, |
|
"grad_norm": 9.166021194752298, |
|
"learning_rate": 7.826149656671386e-07, |
|
"logits": -1.9320632219314575, |
|
"logps": -108.1246566772461, |
|
"loss": 0.1267, |
|
"objective": 0.13196416199207306, |
|
"ranking_idealized": 0.6416666507720947, |
|
"ranking_idealized_expo": 0.5583333373069763, |
|
"ranking_simple": 0.8958333134651184, |
|
"regularize": 0.13196416199207306, |
|
"step": 405 |
|
}, |
|
{ |
|
"dpo_loss": 0.11071384698152542, |
|
"dpo_wo_beta": -0.1424117088317871, |
|
"epoch": 2.324043457723193, |
|
"grad_norm": 8.918983804471582, |
|
"learning_rate": 7.234859835833022e-07, |
|
"logits": -1.8304682970046997, |
|
"logps": -111.2301025390625, |
|
"loss": 0.112, |
|
"objective": 0.11071384698152542, |
|
"ranking_idealized": 0.5708333253860474, |
|
"ranking_idealized_expo": 0.4791666567325592, |
|
"ranking_simple": 0.9041666388511658, |
|
"regularize": 0.11071384698152542, |
|
"step": 410 |
|
}, |
|
{ |
|
"dpo_loss": 0.1223960742354393, |
|
"dpo_wo_beta": -0.1956464648246765, |
|
"epoch": 2.3523854511100613, |
|
"grad_norm": 9.386393866562546, |
|
"learning_rate": 6.662995934939007e-07, |
|
"logits": -1.8708041906356812, |
|
"logps": -111.06449890136719, |
|
"loss": 0.1155, |
|
"objective": 0.1223960742354393, |
|
"ranking_idealized": 0.5958333611488342, |
|
"ranking_idealized_expo": 0.5291666388511658, |
|
"ranking_simple": 0.9083333611488342, |
|
"regularize": 0.1223960742354393, |
|
"step": 415 |
|
}, |
|
{ |
|
"dpo_loss": 0.12930770218372345, |
|
"dpo_wo_beta": -0.21560731530189514, |
|
"epoch": 2.3807274444969297, |
|
"grad_norm": 11.0131183307354, |
|
"learning_rate": 6.111183278768956e-07, |
|
"logits": -1.860797643661499, |
|
"logps": -113.08780670166016, |
|
"loss": 0.133, |
|
"objective": 0.12930770218372345, |
|
"ranking_idealized": 0.5666666626930237, |
|
"ranking_idealized_expo": 0.4749999940395355, |
|
"ranking_simple": 0.9208333492279053, |
|
"regularize": 0.12930770218372345, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 2.403401039206424, |
|
"eval_dpo_loss": 0.8785684108734131, |
|
"eval_dpo_wo_beta": -5.887755870819092, |
|
"eval_logits": -2.0276894569396973, |
|
"eval_logps": -117.1216812133789, |
|
"eval_loss": 0.8447906374931335, |
|
"eval_objective": 0.8785684108734131, |
|
"eval_ranking_idealized": 0.5888429880142212, |
|
"eval_ranking_idealized_expo": 0.5092975497245789, |
|
"eval_ranking_simple": 0.5413222908973694, |
|
"eval_regularize": 0.8785684108734131, |
|
"eval_runtime": 209.8564, |
|
"eval_samples_per_second": 27.59, |
|
"eval_steps_per_second": 1.153, |
|
"step": 424 |
|
}, |
|
{ |
|
"dpo_loss": 0.117975153028965, |
|
"dpo_wo_beta": -0.1884605884552002, |
|
"epoch": 2.413793103448276, |
|
"grad_norm": 11.036168833651558, |
|
"learning_rate": 5.580025266360764e-07, |
|
"logits": -1.7822004556655884, |
|
"logps": -114.43038177490234, |
|
"loss": 0.1465, |
|
"objective": 0.117975153028965, |
|
"ranking_idealized": 0.6666666865348816, |
|
"ranking_idealized_expo": 0.5625, |
|
"ranking_simple": 0.9375, |
|
"regularize": 0.117975153028965, |
|
"step": 425 |
|
}, |
|
{ |
|
"dpo_loss": 0.1465020477771759, |
|
"dpo_wo_beta": -0.2595965266227722, |
|
"epoch": 2.442135096835144, |
|
"grad_norm": 10.595070818850646, |
|
"learning_rate": 5.070102711202606e-07, |
|
"logits": -1.8692681789398193, |
|
"logps": -110.2762680053711, |
|
"loss": 0.1276, |
|
"objective": 0.1465020477771759, |
|
"ranking_idealized": 0.5916666388511658, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.8958333134651184, |
|
"regularize": 0.1465020477771759, |
|
"step": 430 |
|
}, |
|
{ |
|
"dpo_loss": 0.09775053709745407, |
|
"dpo_wo_beta": -0.12755917012691498, |
|
"epoch": 2.4704770902220123, |
|
"grad_norm": 9.393206692367766, |
|
"learning_rate": 4.581973206121948e-07, |
|
"logits": -1.8968538045883179, |
|
"logps": -112.28767395019531, |
|
"loss": 0.1175, |
|
"objective": 0.09775053709745407, |
|
"ranking_idealized": 0.5874999761581421, |
|
"ranking_idealized_expo": 0.512499988079071, |
|
"ranking_simple": 0.9083333611488342, |
|
"regularize": 0.09775053709745407, |
|
"step": 435 |
|
}, |
|
{ |
|
"dpo_loss": 0.14228057861328125, |
|
"dpo_wo_beta": -0.3639788329601288, |
|
"epoch": 2.4988190836088804, |
|
"grad_norm": 8.020134663378592, |
|
"learning_rate": 4.116170513565942e-07, |
|
"logits": -1.8666160106658936, |
|
"logps": -109.18843078613281, |
|
"loss": 0.1167, |
|
"objective": 0.14228057861328125, |
|
"ranking_idealized": 0.574999988079071, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.875, |
|
"regularize": 0.14228057861328125, |
|
"step": 440 |
|
}, |
|
{ |
|
"dpo_loss": 0.13583588600158691, |
|
"dpo_wo_beta": -0.2074100226163864, |
|
"epoch": 2.527161076995749, |
|
"grad_norm": 9.224367796824264, |
|
"learning_rate": 3.6732039819400686e-07, |
|
"logits": -1.8071045875549316, |
|
"logps": -107.2675552368164, |
|
"loss": 0.1319, |
|
"objective": 0.13583588600158691, |
|
"ranking_idealized": 0.5625, |
|
"ranking_idealized_expo": 0.49166667461395264, |
|
"ranking_simple": 0.8791666626930237, |
|
"regularize": 0.13583588600158691, |
|
"step": 445 |
|
}, |
|
{ |
|
"dpo_loss": 0.17114870250225067, |
|
"dpo_wo_beta": -0.43270742893218994, |
|
"epoch": 2.555503070382617, |
|
"grad_norm": 11.265861710797749, |
|
"learning_rate": 3.253557988643072e-07, |
|
"logits": -1.9256045818328857, |
|
"logps": -111.20384216308594, |
|
"loss": 0.1288, |
|
"objective": 0.17114870250225067, |
|
"ranking_idealized": 0.5874999761581421, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.862500011920929, |
|
"regularize": 0.17114870250225067, |
|
"step": 450 |
|
}, |
|
{ |
|
"dpo_loss": 0.10827689617872238, |
|
"dpo_wo_beta": -0.1751028150320053, |
|
"epoch": 2.583845063769485, |
|
"grad_norm": 9.605136286662574, |
|
"learning_rate": 2.8576914104074425e-07, |
|
"logits": -1.9289051294326782, |
|
"logps": -109.37706756591797, |
|
"loss": 0.1168, |
|
"objective": 0.10827689617872238, |
|
"ranking_idealized": 0.5958333611488342, |
|
"ranking_idealized_expo": 0.4958333373069763, |
|
"ranking_simple": 0.9125000238418579, |
|
"regularize": 0.10827689617872238, |
|
"step": 455 |
|
}, |
|
{ |
|
"dpo_loss": 0.11124877631664276, |
|
"dpo_wo_beta": -0.28054580092430115, |
|
"epoch": 2.6121870571563535, |
|
"grad_norm": 9.957466667064367, |
|
"learning_rate": 2.486037121524448e-07, |
|
"logits": -1.93342924118042, |
|
"logps": -113.2356948852539, |
|
"loss": 0.1169, |
|
"objective": 0.11124877631664276, |
|
"ranking_idealized": 0.6291666626930237, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.9375, |
|
"regularize": 0.11124877631664276, |
|
"step": 460 |
|
}, |
|
{ |
|
"dpo_loss": 0.12714476883411407, |
|
"dpo_wo_beta": -0.22146105766296387, |
|
"epoch": 2.6405290505432215, |
|
"grad_norm": 10.04326854921629, |
|
"learning_rate": 2.13900152050239e-07, |
|
"logits": -1.8874350786209106, |
|
"logps": -108.94982147216797, |
|
"loss": 0.1189, |
|
"objective": 0.12714476883411407, |
|
"ranking_idealized": 0.5458333492279053, |
|
"ranking_idealized_expo": 0.4791666567325592, |
|
"ranking_simple": 0.862500011920929, |
|
"regularize": 0.12714476883411407, |
|
"step": 465 |
|
}, |
|
{ |
|
"dpo_loss": 0.12151040881872177, |
|
"dpo_wo_beta": -0.26416900753974915, |
|
"epoch": 2.66887104393009, |
|
"grad_norm": 8.777820527737605, |
|
"learning_rate": 1.8169640856758652e-07, |
|
"logits": -1.9314534664154053, |
|
"logps": -112.75170135498047, |
|
"loss": 0.1254, |
|
"objective": 0.12151040881872177, |
|
"ranking_idealized": 0.6291666626930237, |
|
"ranking_idealized_expo": 0.5541666746139526, |
|
"ranking_simple": 0.9083333611488342, |
|
"regularize": 0.12151040881872177, |
|
"step": 470 |
|
}, |
|
{ |
|
"dpo_loss": 0.12749101221561432, |
|
"dpo_wo_beta": -0.2816121280193329, |
|
"epoch": 2.697213037316958, |
|
"grad_norm": 9.221778751171357, |
|
"learning_rate": 1.5202769602517514e-07, |
|
"logits": -1.8307260274887085, |
|
"logps": -109.39693450927734, |
|
"loss": 0.1211, |
|
"objective": 0.12749101221561432, |
|
"ranking_idealized": 0.6041666865348816, |
|
"ranking_idealized_expo": 0.4958333373069763, |
|
"ranking_simple": 0.8999999761581421, |
|
"regularize": 0.12749101221561432, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 2.708549834671705, |
|
"eval_dpo_loss": 0.8738968372344971, |
|
"eval_dpo_wo_beta": -5.815241813659668, |
|
"eval_logits": -2.0271613597869873, |
|
"eval_logps": -116.42301177978516, |
|
"eval_loss": 0.8371492624282837, |
|
"eval_objective": 0.8738968372344971, |
|
"eval_ranking_idealized": 0.5888429880142212, |
|
"eval_ranking_idealized_expo": 0.5092975497245789, |
|
"eval_ranking_simple": 0.5402892827987671, |
|
"eval_regularize": 0.8738968372344971, |
|
"eval_runtime": 211.9437, |
|
"eval_samples_per_second": 27.319, |
|
"eval_steps_per_second": 1.142, |
|
"step": 477 |
|
}, |
|
{ |
|
"dpo_loss": 0.13781045377254486, |
|
"dpo_wo_beta": -0.2485995590686798, |
|
"epoch": 2.725555030703826, |
|
"grad_norm": 10.971551462649595, |
|
"learning_rate": 1.2492645672457838e-07, |
|
"logits": -1.9437103271484375, |
|
"logps": -108.93817901611328, |
|
"loss": 0.1267, |
|
"objective": 0.13781045377254486, |
|
"ranking_idealized": 0.5916666388511658, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.8999999761581421, |
|
"regularize": 0.13781045377254486, |
|
"step": 480 |
|
}, |
|
{ |
|
"dpo_loss": 0.11082082241773605, |
|
"dpo_wo_beta": -0.10876031965017319, |
|
"epoch": 2.753897024090694, |
|
"grad_norm": 10.884940640535042, |
|
"learning_rate": 1.004223254730749e-07, |
|
"logits": -1.7556992769241333, |
|
"logps": -114.1142807006836, |
|
"loss": 0.1222, |
|
"objective": 0.11082082241773605, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.9041666388511658, |
|
"regularize": 0.11082082241773605, |
|
"step": 485 |
|
}, |
|
{ |
|
"dpo_loss": 0.09154360741376877, |
|
"dpo_wo_beta": -0.05899694189429283, |
|
"epoch": 2.7822390174775626, |
|
"grad_norm": 11.015982469457516, |
|
"learning_rate": 7.854209717842231e-08, |
|
"logits": -1.8848822116851807, |
|
"logps": -110.15470886230469, |
|
"loss": 0.1058, |
|
"objective": 0.09154360741376877, |
|
"ranking_idealized": 0.5874999761581421, |
|
"ranking_idealized_expo": 0.47083333134651184, |
|
"ranking_simple": 0.9333333373069763, |
|
"regularize": 0.09154360741376877, |
|
"step": 490 |
|
}, |
|
{ |
|
"dpo_loss": 0.10964310169219971, |
|
"dpo_wo_beta": -0.07648710906505585, |
|
"epoch": 2.8105810108644307, |
|
"grad_norm": 10.079416267782939, |
|
"learning_rate": 5.930969754901844e-08, |
|
"logits": -1.8575230836868286, |
|
"logps": -108.52234649658203, |
|
"loss": 0.1192, |
|
"objective": 0.10964310169219971, |
|
"ranking_idealized": 0.5583333373069763, |
|
"ranking_idealized_expo": 0.4749999940395355, |
|
"ranking_simple": 0.8666666746139526, |
|
"regularize": 0.10964310169219971, |
|
"step": 495 |
|
}, |
|
{ |
|
"dpo_loss": 0.09479068219661713, |
|
"dpo_wo_beta": -0.03411731496453285, |
|
"epoch": 2.838923004251299, |
|
"grad_norm": 9.84080114767598, |
|
"learning_rate": 4.2746156931490756e-08, |
|
"logits": -1.8439643383026123, |
|
"logps": -109.77281188964844, |
|
"loss": 0.1213, |
|
"objective": 0.09479068219661713, |
|
"ranking_idealized": 0.6208333373069763, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.925000011920929, |
|
"regularize": 0.09479068219661713, |
|
"step": 500 |
|
}, |
|
{ |
|
"dpo_loss": 0.12725140154361725, |
|
"dpo_wo_beta": -0.18973813951015472, |
|
"epoch": 2.8672649976381672, |
|
"grad_norm": 9.973754192936779, |
|
"learning_rate": 2.8869587314321324e-08, |
|
"logits": -1.8574442863464355, |
|
"logps": -110.32710266113281, |
|
"loss": 0.132, |
|
"objective": 0.12725140154361725, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.8999999761581421, |
|
"regularize": 0.12725140154361725, |
|
"step": 505 |
|
}, |
|
{ |
|
"dpo_loss": 0.10469380766153336, |
|
"dpo_wo_beta": -0.1985001415014267, |
|
"epoch": 2.8956069910250353, |
|
"grad_norm": 8.936464383287202, |
|
"learning_rate": 1.7695162522652352e-08, |
|
"logits": -1.8629390001296997, |
|
"logps": -113.56767272949219, |
|
"loss": 0.1218, |
|
"objective": 0.10469380766153336, |
|
"ranking_idealized": 0.6041666865348816, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.8916666507720947, |
|
"regularize": 0.10469380766153336, |
|
"step": 510 |
|
}, |
|
{ |
|
"dpo_loss": 0.11260154843330383, |
|
"dpo_wo_beta": -0.15691885352134705, |
|
"epoch": 2.9239489844119038, |
|
"grad_norm": 9.442300088571939, |
|
"learning_rate": 9.235101625932885e-09, |
|
"logits": -1.946829915046692, |
|
"logps": -108.54016876220703, |
|
"loss": 0.1258, |
|
"objective": 0.11260154843330383, |
|
"ranking_idealized": 0.5791666507720947, |
|
"ranking_idealized_expo": 0.48750001192092896, |
|
"ranking_simple": 0.925000011920929, |
|
"regularize": 0.11260154843330383, |
|
"step": 515 |
|
}, |
|
{ |
|
"dpo_loss": 0.12230218201875687, |
|
"dpo_wo_beta": -0.10489177703857422, |
|
"epoch": 2.952290977798772, |
|
"grad_norm": 9.279898048101137, |
|
"learning_rate": 3.4986555765434415e-09, |
|
"logits": -1.8482831716537476, |
|
"logps": -114.20655059814453, |
|
"loss": 0.1228, |
|
"objective": 0.12230218201875687, |
|
"ranking_idealized": 0.6291666626930237, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.8708333373069763, |
|
"regularize": 0.12230218201875687, |
|
"step": 520 |
|
}, |
|
{ |
|
"dpo_loss": 0.13335375487804413, |
|
"dpo_wo_beta": -0.35261282324790955, |
|
"epoch": 2.9806329711856403, |
|
"grad_norm": 11.094809681697281, |
|
"learning_rate": 4.920970940180958e-10, |
|
"logits": -1.876869797706604, |
|
"logps": -111.03084564208984, |
|
"loss": 0.1235, |
|
"objective": 0.13335375487804413, |
|
"ranking_idealized": 0.6291666626930237, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.8958333134651184, |
|
"regularize": 0.13335375487804413, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 2.9976381672177608, |
|
"step": 528, |
|
"total_flos": 0.0, |
|
"train_loss": 0.023984534440167023, |
|
"train_runtime": 3310.3799, |
|
"train_samples_per_second": 46.039, |
|
"train_steps_per_second": 0.159 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 528, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 53, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|