|
{ |
|
"best_metric": 14.005528450012207, |
|
"best_model_checkpoint": "./qwen2.5-0.5b/qwen2.5-0.5b-expo-L1EXPO-ES-0.1/checkpoint-750", |
|
"epoch": 3.5474728389230044, |
|
"eval_steps": 50, |
|
"global_step": 1250, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"dpo_loss": 0.6931471824645996, |
|
"epoch": 0.002834199338686821, |
|
"grad_norm": 36.885068816813735, |
|
"learning_rate": 2.840909090909091e-08, |
|
"logits": -1.359458565711975, |
|
"logps": -84.69721221923828, |
|
"loss": 0.0051, |
|
"objective": 0.0046141319908201694, |
|
"ranking_idealized": 0.6458333134651184, |
|
"ranking_idealized_expo": 0.5833333134651184, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.0046141319908201694, |
|
"step": 1, |
|
"wo_beta": 14.840873718261719 |
|
}, |
|
{ |
|
"dpo_loss": 0.6924303770065308, |
|
"epoch": 0.14170996693434104, |
|
"grad_norm": 33.742835742644765, |
|
"learning_rate": 1.4204545454545458e-06, |
|
"logits": -1.4566550254821777, |
|
"logps": -84.23489379882812, |
|
"loss": 0.0448, |
|
"objective": 0.04280169680714607, |
|
"ranking_idealized": 0.608418345451355, |
|
"ranking_idealized_expo": 0.5229591727256775, |
|
"ranking_simple": 0.5221088528633118, |
|
"regularize": 0.04280169680714607, |
|
"step": 50, |
|
"wo_beta": 15.654285430908203 |
|
}, |
|
{ |
|
"epoch": 0.14170996693434104, |
|
"eval_dpo_loss": 0.6935604810714722, |
|
"eval_logits": -1.4298962354660034, |
|
"eval_logps": -90.38883209228516, |
|
"eval_loss": 0.062233828008174896, |
|
"eval_objective": 0.062142737209796906, |
|
"eval_ranking_idealized": 0.6030020713806152, |
|
"eval_ranking_idealized_expo": 0.5222567319869995, |
|
"eval_ranking_simple": 0.5243270993232727, |
|
"eval_regularize": 0.062142737209796906, |
|
"eval_runtime": 308.7283, |
|
"eval_samples_per_second": 18.754, |
|
"eval_steps_per_second": 1.564, |
|
"eval_wo_beta": 16.076759338378906, |
|
"step": 50 |
|
}, |
|
{ |
|
"dpo_loss": 0.7021370530128479, |
|
"epoch": 0.2834199338686821, |
|
"grad_norm": 31.729424259376046, |
|
"learning_rate": 2.8409090909090916e-06, |
|
"logits": -1.3946272134780884, |
|
"logps": -82.87468719482422, |
|
"loss": 0.1716, |
|
"objective": 0.17463459074497223, |
|
"ranking_idealized": 0.6016666889190674, |
|
"ranking_idealized_expo": 0.5141666531562805, |
|
"ranking_simple": 0.51541668176651, |
|
"regularize": 0.17463459074497223, |
|
"step": 100, |
|
"wo_beta": 15.276419639587402 |
|
}, |
|
{ |
|
"epoch": 0.2834199338686821, |
|
"eval_dpo_loss": 0.6981683969497681, |
|
"eval_logits": -1.3597227334976196, |
|
"eval_logps": -88.76753234863281, |
|
"eval_loss": 0.1556352823972702, |
|
"eval_objective": 0.15588510036468506, |
|
"eval_ranking_idealized": 0.6030020713806152, |
|
"eval_ranking_idealized_expo": 0.5222567319869995, |
|
"eval_ranking_simple": 0.5274327397346497, |
|
"eval_regularize": 0.15588510036468506, |
|
"eval_runtime": 308.3116, |
|
"eval_samples_per_second": 18.78, |
|
"eval_steps_per_second": 1.567, |
|
"eval_wo_beta": 15.943617820739746, |
|
"step": 100 |
|
}, |
|
{ |
|
"dpo_loss": 0.7128496170043945, |
|
"epoch": 0.42512990080302315, |
|
"grad_norm": 24.11205441144116, |
|
"learning_rate": 4.2613636363636365e-06, |
|
"logits": -1.251375675201416, |
|
"logps": -79.16511535644531, |
|
"loss": 0.2858, |
|
"objective": 0.2846659719944, |
|
"ranking_idealized": 0.6066666841506958, |
|
"ranking_idealized_expo": 0.5287500023841858, |
|
"ranking_simple": 0.5274999737739563, |
|
"regularize": 0.2846659719944, |
|
"step": 150, |
|
"wo_beta": 15.22218132019043 |
|
}, |
|
{ |
|
"epoch": 0.42512990080302315, |
|
"eval_dpo_loss": 0.718317449092865, |
|
"eval_logits": -1.2545664310455322, |
|
"eval_logps": -79.50674438476562, |
|
"eval_loss": 0.2911944091320038, |
|
"eval_objective": 0.2922578752040863, |
|
"eval_ranking_idealized": 0.6030020713806152, |
|
"eval_ranking_idealized_expo": 0.5222567319869995, |
|
"eval_ranking_simple": 0.522774338722229, |
|
"eval_regularize": 0.2922578752040863, |
|
"eval_runtime": 308.3872, |
|
"eval_samples_per_second": 18.775, |
|
"eval_steps_per_second": 1.566, |
|
"eval_wo_beta": 15.05699634552002, |
|
"step": 150 |
|
}, |
|
{ |
|
"dpo_loss": 0.7332326173782349, |
|
"epoch": 0.5668398677373642, |
|
"grad_norm": 19.83023586651878, |
|
"learning_rate": 4.997168347957521e-06, |
|
"logits": -1.015448808670044, |
|
"logps": -76.77922058105469, |
|
"loss": 0.3544, |
|
"objective": 0.3552749752998352, |
|
"ranking_idealized": 0.5924999713897705, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5020833611488342, |
|
"regularize": 0.3552749752998352, |
|
"step": 200, |
|
"wo_beta": 15.356170654296875 |
|
}, |
|
{ |
|
"epoch": 0.5668398677373642, |
|
"eval_dpo_loss": 0.7308588027954102, |
|
"eval_logits": -0.8432308435440063, |
|
"eval_logps": -83.84849548339844, |
|
"eval_loss": 0.3898463547229767, |
|
"eval_objective": 0.3890216052532196, |
|
"eval_ranking_idealized": 0.6030020713806152, |
|
"eval_ranking_idealized_expo": 0.5222567319869995, |
|
"eval_ranking_simple": 0.522774338722229, |
|
"eval_regularize": 0.3890216052532196, |
|
"eval_runtime": 307.8908, |
|
"eval_samples_per_second": 18.805, |
|
"eval_steps_per_second": 1.569, |
|
"eval_wo_beta": 14.712230682373047, |
|
"step": 200 |
|
}, |
|
{ |
|
"dpo_loss": 0.7182620763778687, |
|
"epoch": 0.7085498346717053, |
|
"grad_norm": 19.421703617823624, |
|
"learning_rate": 4.973122855144066e-06, |
|
"logits": -0.7580794095993042, |
|
"logps": -78.16065216064453, |
|
"loss": 0.375, |
|
"objective": 0.37717047333717346, |
|
"ranking_idealized": 0.5991666913032532, |
|
"ranking_idealized_expo": 0.5170833468437195, |
|
"ranking_simple": 0.5195833444595337, |
|
"regularize": 0.37717047333717346, |
|
"step": 250, |
|
"wo_beta": 15.653904914855957 |
|
}, |
|
{ |
|
"epoch": 0.7085498346717053, |
|
"eval_dpo_loss": 0.735299289226532, |
|
"eval_logits": -0.673379123210907, |
|
"eval_logps": -81.28996276855469, |
|
"eval_loss": 0.4397831857204437, |
|
"eval_objective": 0.43750080466270447, |
|
"eval_ranking_idealized": 0.6030020713806152, |
|
"eval_ranking_idealized_expo": 0.5222567319869995, |
|
"eval_ranking_simple": 0.5243270993232727, |
|
"eval_regularize": 0.43750080466270447, |
|
"eval_runtime": 351.635, |
|
"eval_samples_per_second": 16.466, |
|
"eval_steps_per_second": 1.374, |
|
"eval_wo_beta": 14.472906112670898, |
|
"step": 250 |
|
}, |
|
{ |
|
"dpo_loss": 0.72139972448349, |
|
"epoch": 0.8502598016060463, |
|
"grad_norm": 19.146332148877114, |
|
"learning_rate": 4.924776641419513e-06, |
|
"logits": -0.564231812953949, |
|
"logps": -79.54463195800781, |
|
"loss": 0.3592, |
|
"objective": 0.3580860495567322, |
|
"ranking_idealized": 0.5799999833106995, |
|
"ranking_idealized_expo": 0.4970833361148834, |
|
"ranking_simple": 0.5024999976158142, |
|
"regularize": 0.3580860495567322, |
|
"step": 300, |
|
"wo_beta": 15.114410400390625 |
|
}, |
|
{ |
|
"epoch": 0.8502598016060463, |
|
"eval_dpo_loss": 0.734784722328186, |
|
"eval_logits": -0.5500932335853577, |
|
"eval_logps": -84.41443634033203, |
|
"eval_loss": 0.442239373922348, |
|
"eval_objective": 0.43877851963043213, |
|
"eval_ranking_idealized": 0.6030020713806152, |
|
"eval_ranking_idealized_expo": 0.5222567319869995, |
|
"eval_ranking_simple": 0.5232919454574585, |
|
"eval_regularize": 0.43877851963043213, |
|
"eval_runtime": 404.6503, |
|
"eval_samples_per_second": 14.309, |
|
"eval_steps_per_second": 1.194, |
|
"eval_wo_beta": 14.44029712677002, |
|
"step": 300 |
|
}, |
|
{ |
|
"dpo_loss": 0.717363178730011, |
|
"epoch": 0.9919697685403873, |
|
"grad_norm": 17.11167089420277, |
|
"learning_rate": 4.8526047530778175e-06, |
|
"logits": -0.5000432133674622, |
|
"logps": -79.52243041992188, |
|
"loss": 0.3351, |
|
"objective": 0.33388689160346985, |
|
"ranking_idealized": 0.60916668176651, |
|
"ranking_idealized_expo": 0.5270833373069763, |
|
"ranking_simple": 0.5262500047683716, |
|
"regularize": 0.33388689160346985, |
|
"step": 350, |
|
"wo_beta": 15.228816986083984 |
|
}, |
|
{ |
|
"epoch": 0.9919697685403873, |
|
"eval_dpo_loss": 0.735403299331665, |
|
"eval_logits": -0.5360206961631775, |
|
"eval_logps": -82.93754577636719, |
|
"eval_loss": 0.4675760865211487, |
|
"eval_objective": 0.4601868689060211, |
|
"eval_ranking_idealized": 0.6030020713806152, |
|
"eval_ranking_idealized_expo": 0.5222567319869995, |
|
"eval_ranking_simple": 0.5341615080833435, |
|
"eval_regularize": 0.4601868689060211, |
|
"eval_runtime": 308.044, |
|
"eval_samples_per_second": 18.796, |
|
"eval_steps_per_second": 1.568, |
|
"eval_wo_beta": 14.272198677062988, |
|
"step": 350 |
|
}, |
|
{ |
|
"dpo_loss": 0.7120790481567383, |
|
"epoch": 1.1336797354747283, |
|
"grad_norm": 17.95944897166815, |
|
"learning_rate": 4.757316345716554e-06, |
|
"logits": -0.5945844054222107, |
|
"logps": -77.92745208740234, |
|
"loss": 0.3056, |
|
"objective": 0.3030960261821747, |
|
"ranking_idealized": 0.6087499856948853, |
|
"ranking_idealized_expo": 0.5337499976158142, |
|
"ranking_simple": 0.5316666960716248, |
|
"regularize": 0.3030960261821747, |
|
"step": 400, |
|
"wo_beta": 15.400132179260254 |
|
}, |
|
{ |
|
"epoch": 1.1336797354747283, |
|
"eval_dpo_loss": 0.7469586730003357, |
|
"eval_logits": -0.5685753226280212, |
|
"eval_logps": -80.56059265136719, |
|
"eval_loss": 0.48422977328300476, |
|
"eval_objective": 0.48037421703338623, |
|
"eval_ranking_idealized": 0.6030020713806152, |
|
"eval_ranking_idealized_expo": 0.5222567319869995, |
|
"eval_ranking_simple": 0.5253623127937317, |
|
"eval_regularize": 0.48037421703338623, |
|
"eval_runtime": 314.7441, |
|
"eval_samples_per_second": 18.396, |
|
"eval_steps_per_second": 1.535, |
|
"eval_wo_beta": 14.281224250793457, |
|
"step": 400 |
|
}, |
|
{ |
|
"dpo_loss": 0.7077716588973999, |
|
"epoch": 1.2753897024090695, |
|
"grad_norm": 15.643717424703908, |
|
"learning_rate": 4.639847716126855e-06, |
|
"logits": -0.5214452743530273, |
|
"logps": -78.54476928710938, |
|
"loss": 0.2932, |
|
"objective": 0.29641959071159363, |
|
"ranking_idealized": 0.5975000262260437, |
|
"ranking_idealized_expo": 0.5199999809265137, |
|
"ranking_simple": 0.5199999809265137, |
|
"regularize": 0.29641959071159363, |
|
"step": 450, |
|
"wo_beta": 15.791983604431152 |
|
}, |
|
{ |
|
"epoch": 1.2753897024090695, |
|
"eval_dpo_loss": 0.7439451217651367, |
|
"eval_logits": -0.5565418004989624, |
|
"eval_logps": -83.62307739257812, |
|
"eval_loss": 0.48051390051841736, |
|
"eval_objective": 0.47548484802246094, |
|
"eval_ranking_idealized": 0.6030020713806152, |
|
"eval_ranking_idealized_expo": 0.5222567319869995, |
|
"eval_ranking_simple": 0.5279502868652344, |
|
"eval_regularize": 0.47548484802246094, |
|
"eval_runtime": 308.6602, |
|
"eval_samples_per_second": 18.758, |
|
"eval_steps_per_second": 1.565, |
|
"eval_wo_beta": 14.464012145996094, |
|
"step": 450 |
|
}, |
|
{ |
|
"dpo_loss": 0.7124494314193726, |
|
"epoch": 1.4170996693434104, |
|
"grad_norm": 15.324248073271528, |
|
"learning_rate": 4.501353102310901e-06, |
|
"logits": -0.5437880754470825, |
|
"logps": -78.15093994140625, |
|
"loss": 0.2864, |
|
"objective": 0.2836955189704895, |
|
"ranking_idealized": 0.57833331823349, |
|
"ranking_idealized_expo": 0.4983333349227905, |
|
"ranking_simple": 0.5, |
|
"regularize": 0.2836955189704895, |
|
"step": 500, |
|
"wo_beta": 15.279319763183594 |
|
}, |
|
{ |
|
"epoch": 1.4170996693434104, |
|
"eval_dpo_loss": 0.7510210275650024, |
|
"eval_logits": -0.6556914448738098, |
|
"eval_logps": -82.91778564453125, |
|
"eval_loss": 0.4964132010936737, |
|
"eval_objective": 0.4971453845500946, |
|
"eval_ranking_idealized": 0.6030020713806152, |
|
"eval_ranking_idealized_expo": 0.5222567319869995, |
|
"eval_ranking_simple": 0.5274327397346497, |
|
"eval_regularize": 0.4971453845500946, |
|
"eval_runtime": 307.59, |
|
"eval_samples_per_second": 18.824, |
|
"eval_steps_per_second": 1.57, |
|
"eval_wo_beta": 14.282269477844238, |
|
"step": 500 |
|
}, |
|
{ |
|
"dpo_loss": 0.7097735404968262, |
|
"epoch": 1.5588096362777515, |
|
"grad_norm": 15.632233049837359, |
|
"learning_rate": 4.34319334202531e-06, |
|
"logits": -0.5551173686981201, |
|
"logps": -78.2597885131836, |
|
"loss": 0.2635, |
|
"objective": 0.264424592256546, |
|
"ranking_idealized": 0.5945833325386047, |
|
"ranking_idealized_expo": 0.5116666555404663, |
|
"ranking_simple": 0.512499988079071, |
|
"regularize": 0.264424592256546, |
|
"step": 550, |
|
"wo_beta": 15.108202934265137 |
|
}, |
|
{ |
|
"epoch": 1.5588096362777515, |
|
"eval_dpo_loss": 0.7502700686454773, |
|
"eval_logits": -0.6183538436889648, |
|
"eval_logps": -81.16139221191406, |
|
"eval_loss": 0.502347469329834, |
|
"eval_objective": 0.5043270587921143, |
|
"eval_ranking_idealized": 0.6030020713806152, |
|
"eval_ranking_idealized_expo": 0.5222567319869995, |
|
"eval_ranking_simple": 0.522774338722229, |
|
"eval_regularize": 0.5043270587921143, |
|
"eval_runtime": 307.9389, |
|
"eval_samples_per_second": 18.802, |
|
"eval_steps_per_second": 1.568, |
|
"eval_wo_beta": 14.063152313232422, |
|
"step": 550 |
|
}, |
|
{ |
|
"dpo_loss": 0.708111047744751, |
|
"epoch": 1.7005196032120926, |
|
"grad_norm": 15.258212996967583, |
|
"learning_rate": 4.16692250129073e-06, |
|
"logits": -0.48317351937294006, |
|
"logps": -79.20243835449219, |
|
"loss": 0.2561, |
|
"objective": 0.2518368065357208, |
|
"ranking_idealized": 0.6004166603088379, |
|
"ranking_idealized_expo": 0.51583331823349, |
|
"ranking_simple": 0.5074999928474426, |
|
"regularize": 0.2518368065357208, |
|
"step": 600, |
|
"wo_beta": 15.102338790893555 |
|
}, |
|
{ |
|
"epoch": 1.7005196032120926, |
|
"eval_dpo_loss": 0.7486839294433594, |
|
"eval_logits": -0.5805073380470276, |
|
"eval_logps": -84.7038803100586, |
|
"eval_loss": 0.49804311990737915, |
|
"eval_objective": 0.49642521142959595, |
|
"eval_ranking_idealized": 0.6030020713806152, |
|
"eval_ranking_idealized_expo": 0.5222567319869995, |
|
"eval_ranking_simple": 0.5232919454574585, |
|
"eval_regularize": 0.49642521142959595, |
|
"eval_runtime": 308.8037, |
|
"eval_samples_per_second": 18.75, |
|
"eval_steps_per_second": 1.564, |
|
"eval_wo_beta": 14.335240364074707, |
|
"step": 600 |
|
}, |
|
{ |
|
"dpo_loss": 0.7057015299797058, |
|
"epoch": 1.8422295701464337, |
|
"grad_norm": 15.137988457806076, |
|
"learning_rate": 3.974272604254906e-06, |
|
"logits": -0.44393202662467957, |
|
"logps": -80.69384002685547, |
|
"loss": 0.2448, |
|
"objective": 0.24766255915164948, |
|
"ranking_idealized": 0.6058333516120911, |
|
"ranking_idealized_expo": 0.5295833349227905, |
|
"ranking_simple": 0.5350000262260437, |
|
"regularize": 0.24766255915164948, |
|
"step": 650, |
|
"wo_beta": 15.657441139221191 |
|
}, |
|
{ |
|
"epoch": 1.8422295701464337, |
|
"eval_dpo_loss": 0.750303328037262, |
|
"eval_logits": -0.42741408944129944, |
|
"eval_logps": -83.46288299560547, |
|
"eval_loss": 0.5170512795448303, |
|
"eval_objective": 0.5191380381584167, |
|
"eval_ranking_idealized": 0.6030020713806152, |
|
"eval_ranking_idealized_expo": 0.5222567319869995, |
|
"eval_ranking_simple": 0.5232919454574585, |
|
"eval_regularize": 0.5191380381584167, |
|
"eval_runtime": 311.9724, |
|
"eval_samples_per_second": 18.559, |
|
"eval_steps_per_second": 1.548, |
|
"eval_wo_beta": 14.215264320373535, |
|
"step": 650 |
|
}, |
|
{ |
|
"dpo_loss": 0.7065611481666565, |
|
"epoch": 1.9839395370807746, |
|
"grad_norm": 16.215629133943352, |
|
"learning_rate": 3.767136614452458e-06, |
|
"logits": -0.43040552735328674, |
|
"logps": -79.25086212158203, |
|
"loss": 0.2235, |
|
"objective": 0.22704358398914337, |
|
"ranking_idealized": 0.5954166650772095, |
|
"ranking_idealized_expo": 0.5129166841506958, |
|
"ranking_simple": 0.5083333253860474, |
|
"regularize": 0.22704358398914337, |
|
"step": 700, |
|
"wo_beta": 15.360807418823242 |
|
}, |
|
{ |
|
"epoch": 1.9839395370807746, |
|
"eval_dpo_loss": 0.7482582330703735, |
|
"eval_logits": -0.5056679844856262, |
|
"eval_logps": -81.71964263916016, |
|
"eval_loss": 0.49625101685523987, |
|
"eval_objective": 0.4948585331439972, |
|
"eval_ranking_idealized": 0.6030020713806152, |
|
"eval_ranking_idealized_expo": 0.5222567319869995, |
|
"eval_ranking_simple": 0.5232919454574585, |
|
"eval_regularize": 0.4948585331439972, |
|
"eval_runtime": 312.5183, |
|
"eval_samples_per_second": 18.527, |
|
"eval_steps_per_second": 1.546, |
|
"eval_wo_beta": 14.20261001586914, |
|
"step": 700 |
|
}, |
|
{ |
|
"dpo_loss": 0.7034626007080078, |
|
"epoch": 2.1256495040151155, |
|
"grad_norm": 14.439374560790109, |
|
"learning_rate": 3.547549834686222e-06, |
|
"logits": -0.4137415587902069, |
|
"logps": -78.55575561523438, |
|
"loss": 0.21, |
|
"objective": 0.20508375763893127, |
|
"ranking_idealized": 0.6066666841506958, |
|
"ranking_idealized_expo": 0.5133333206176758, |
|
"ranking_simple": 0.5099999904632568, |
|
"regularize": 0.20508375763893127, |
|
"step": 750, |
|
"wo_beta": 15.407984733581543 |
|
}, |
|
{ |
|
"epoch": 2.1256495040151155, |
|
"eval_dpo_loss": 0.7511767148971558, |
|
"eval_logits": -0.47565215826034546, |
|
"eval_logps": -82.51917266845703, |
|
"eval_loss": 0.5233561396598816, |
|
"eval_objective": 0.5224636197090149, |
|
"eval_ranking_idealized": 0.6030020713806152, |
|
"eval_ranking_idealized_expo": 0.5222567319869995, |
|
"eval_ranking_simple": 0.5253623127937317, |
|
"eval_regularize": 0.5224636197090149, |
|
"eval_runtime": 307.6951, |
|
"eval_samples_per_second": 18.817, |
|
"eval_steps_per_second": 1.57, |
|
"eval_wo_beta": 14.005528450012207, |
|
"step": 750 |
|
}, |
|
{ |
|
"dpo_loss": 0.6996860504150391, |
|
"epoch": 2.2673594709494567, |
|
"grad_norm": 14.763670008384054, |
|
"learning_rate": 3.3176699082935546e-06, |
|
"logits": -0.44839462637901306, |
|
"logps": -79.29713439941406, |
|
"loss": 0.1988, |
|
"objective": 0.19862671196460724, |
|
"ranking_idealized": 0.5941666960716248, |
|
"ranking_idealized_expo": 0.5129166841506958, |
|
"ranking_simple": 0.5116666555404663, |
|
"regularize": 0.19862671196460724, |
|
"step": 800, |
|
"wo_beta": 14.99027156829834 |
|
}, |
|
{ |
|
"epoch": 2.2673594709494567, |
|
"eval_dpo_loss": 0.7496048808097839, |
|
"eval_logits": -0.5577788949012756, |
|
"eval_logps": -81.05644226074219, |
|
"eval_loss": 0.5139943361282349, |
|
"eval_objective": 0.511448085308075, |
|
"eval_ranking_idealized": 0.6030020713806152, |
|
"eval_ranking_idealized_expo": 0.5222567319869995, |
|
"eval_ranking_simple": 0.5295031070709229, |
|
"eval_regularize": 0.511448085308075, |
|
"eval_runtime": 308.0865, |
|
"eval_samples_per_second": 18.793, |
|
"eval_steps_per_second": 1.568, |
|
"eval_wo_beta": 14.102994918823242, |
|
"step": 800 |
|
}, |
|
{ |
|
"dpo_loss": 0.7008050680160522, |
|
"epoch": 2.409069437883798, |
|
"grad_norm": 14.833532828768124, |
|
"learning_rate": 3.0797556183036582e-06, |
|
"logits": -0.4541783630847931, |
|
"logps": -78.78826141357422, |
|
"loss": 0.1845, |
|
"objective": 0.18498587608337402, |
|
"ranking_idealized": 0.5979166626930237, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5095833539962769, |
|
"regularize": 0.18498587608337402, |
|
"step": 850, |
|
"wo_beta": 15.201929092407227 |
|
}, |
|
{ |
|
"epoch": 2.409069437883798, |
|
"eval_dpo_loss": 0.7516361474990845, |
|
"eval_logits": -0.512949526309967, |
|
"eval_logps": -82.63258361816406, |
|
"eval_loss": 0.520516574382782, |
|
"eval_objective": 0.518623948097229, |
|
"eval_ranking_idealized": 0.6030020713806152, |
|
"eval_ranking_idealized_expo": 0.5222567319869995, |
|
"eval_ranking_simple": 0.5310559272766113, |
|
"eval_regularize": 0.518623948097229, |
|
"eval_runtime": 309.5699, |
|
"eval_samples_per_second": 18.703, |
|
"eval_steps_per_second": 1.56, |
|
"eval_wo_beta": 14.151838302612305, |
|
"step": 850 |
|
}, |
|
{ |
|
"dpo_loss": 0.7016371488571167, |
|
"epoch": 2.550779404818139, |
|
"grad_norm": 16.31271001796413, |
|
"learning_rate": 2.8361446928038298e-06, |
|
"logits": -0.42467382550239563, |
|
"logps": -79.11641693115234, |
|
"loss": 0.1741, |
|
"objective": 0.1748921126127243, |
|
"ranking_idealized": 0.5887500047683716, |
|
"ranking_idealized_expo": 0.518750011920929, |
|
"ranking_simple": 0.51583331823349, |
|
"regularize": 0.1748921126127243, |
|
"step": 900, |
|
"wo_beta": 15.531429290771484 |
|
}, |
|
{ |
|
"epoch": 2.550779404818139, |
|
"eval_dpo_loss": 0.7507295608520508, |
|
"eval_logits": -0.4789924621582031, |
|
"eval_logps": -82.98091125488281, |
|
"eval_loss": 0.5132278800010681, |
|
"eval_objective": 0.5117725729942322, |
|
"eval_ranking_idealized": 0.6030020713806152, |
|
"eval_ranking_idealized_expo": 0.5222567319869995, |
|
"eval_ranking_simple": 0.523809552192688, |
|
"eval_regularize": 0.5117725729942322, |
|
"eval_runtime": 311.1948, |
|
"eval_samples_per_second": 18.606, |
|
"eval_steps_per_second": 1.552, |
|
"eval_wo_beta": 14.245877265930176, |
|
"step": 900 |
|
}, |
|
{ |
|
"dpo_loss": 0.6993770003318787, |
|
"epoch": 2.69248937175248, |
|
"grad_norm": 14.7867391875567, |
|
"learning_rate": 2.5892308345974517e-06, |
|
"logits": -0.4542914927005768, |
|
"logps": -79.9416732788086, |
|
"loss": 0.1659, |
|
"objective": 0.16475924849510193, |
|
"ranking_idealized": 0.5975000262260437, |
|
"ranking_idealized_expo": 0.5087500214576721, |
|
"ranking_simple": 0.5141666531562805, |
|
"regularize": 0.16475924849510193, |
|
"step": 950, |
|
"wo_beta": 15.521940231323242 |
|
}, |
|
{ |
|
"epoch": 2.69248937175248, |
|
"eval_dpo_loss": 0.7500060796737671, |
|
"eval_logits": -0.4840329587459564, |
|
"eval_logps": -83.8330307006836, |
|
"eval_loss": 0.5188658237457275, |
|
"eval_objective": 0.5192957520484924, |
|
"eval_ranking_idealized": 0.6030020713806152, |
|
"eval_ranking_idealized_expo": 0.5222567319869995, |
|
"eval_ranking_simple": 0.523809552192688, |
|
"eval_regularize": 0.5192957520484924, |
|
"eval_runtime": 310.1876, |
|
"eval_samples_per_second": 18.666, |
|
"eval_steps_per_second": 1.557, |
|
"eval_wo_beta": 14.302889823913574, |
|
"step": 950 |
|
}, |
|
{ |
|
"dpo_loss": 0.7024207711219788, |
|
"epoch": 2.8341993386868207, |
|
"grad_norm": 14.42838893249385, |
|
"learning_rate": 2.341440200858589e-06, |
|
"logits": -0.4285065233707428, |
|
"logps": -78.83317565917969, |
|
"loss": 0.1539, |
|
"objective": 0.15508733689785004, |
|
"ranking_idealized": 0.6020833253860474, |
|
"ranking_idealized_expo": 0.5104166865348816, |
|
"ranking_simple": 0.5091666579246521, |
|
"regularize": 0.15508733689785004, |
|
"step": 1000, |
|
"wo_beta": 15.06278133392334 |
|
}, |
|
{ |
|
"epoch": 2.8341993386868207, |
|
"eval_dpo_loss": 0.7498777508735657, |
|
"eval_logits": -0.46712055802345276, |
|
"eval_logps": -82.88314056396484, |
|
"eval_loss": 0.5136557817459106, |
|
"eval_objective": 0.5127004981040955, |
|
"eval_ranking_idealized": 0.6030020713806152, |
|
"eval_ranking_idealized_expo": 0.5222567319869995, |
|
"eval_ranking_simple": 0.5269151329994202, |
|
"eval_regularize": 0.5127004981040955, |
|
"eval_runtime": 308.4784, |
|
"eval_samples_per_second": 18.77, |
|
"eval_steps_per_second": 1.566, |
|
"eval_wo_beta": 14.192536354064941, |
|
"step": 1000 |
|
}, |
|
{ |
|
"dpo_loss": 0.6952893733978271, |
|
"epoch": 2.9806329711856403, |
|
"grad_norm": 14.77922239882152, |
|
"learning_rate": 2.0952075638923656e-06, |
|
"logits": -0.4941651523113251, |
|
"logps": -79.95951080322266, |
|
"loss": 0.1445, |
|
"objective": 0.14514465630054474, |
|
"ranking_idealized": 0.6079166531562805, |
|
"ranking_idealized_expo": 0.5183333158493042, |
|
"ranking_simple": 0.5220833420753479, |
|
"regularize": 0.14514465630054474, |
|
"step": 1050, |
|
"wo_beta": 15.50859546661377 |
|
}, |
|
{ |
|
"epoch": 2.9806329711856403, |
|
"eval_dpo_loss": 0.7478482127189636, |
|
"eval_logits": -0.5530552864074707, |
|
"eval_logps": -83.16773986816406, |
|
"eval_loss": 0.5116256475448608, |
|
"eval_objective": 0.511193573474884, |
|
"eval_ranking_idealized": 0.6030020713806152, |
|
"eval_ranking_idealized_expo": 0.5222567319869995, |
|
"eval_ranking_simple": 0.5248447060585022, |
|
"eval_regularize": 0.511193573474884, |
|
"eval_runtime": 310.4184, |
|
"eval_samples_per_second": 18.652, |
|
"eval_steps_per_second": 1.556, |
|
"eval_wo_beta": 14.214123725891113, |
|
"step": 1050 |
|
}, |
|
{ |
|
"dpo_loss": 0.6958988308906555, |
|
"epoch": 3.122342938119981, |
|
"grad_norm": 14.047766487106658, |
|
"learning_rate": 1.852952387243698e-06, |
|
"logits": -0.4758566915988922, |
|
"logps": -80.04509735107422, |
|
"loss": 0.1261, |
|
"objective": 0.12395481020212173, |
|
"ranking_idealized": 0.6070833206176758, |
|
"ranking_idealized_expo": 0.5304166674613953, |
|
"ranking_simple": 0.5270833373069763, |
|
"regularize": 0.12395481020212173, |
|
"step": 1100, |
|
"wo_beta": 15.6008939743042 |
|
}, |
|
{ |
|
"epoch": 3.122342938119981, |
|
"eval_dpo_loss": 0.7515185475349426, |
|
"eval_logits": -0.5487966537475586, |
|
"eval_logps": -83.59542846679688, |
|
"eval_loss": 0.515699565410614, |
|
"eval_objective": 0.5165062546730042, |
|
"eval_ranking_idealized": 0.6030020713806152, |
|
"eval_ranking_idealized_expo": 0.5222567319869995, |
|
"eval_ranking_simple": 0.5232919454574585, |
|
"eval_regularize": 0.5165062546730042, |
|
"eval_runtime": 309.3286, |
|
"eval_samples_per_second": 18.718, |
|
"eval_steps_per_second": 1.561, |
|
"eval_wo_beta": 14.178275108337402, |
|
"step": 1100 |
|
}, |
|
{ |
|
"dpo_loss": 0.6931909322738647, |
|
"epoch": 3.264052905054322, |
|
"grad_norm": 14.963308385384513, |
|
"learning_rate": 1.617055052228768e-06, |
|
"logits": -0.4729629456996918, |
|
"logps": -79.84102630615234, |
|
"loss": 0.1146, |
|
"objective": 0.11260777711868286, |
|
"ranking_idealized": 0.5950000286102295, |
|
"ranking_idealized_expo": 0.5095833539962769, |
|
"ranking_simple": 0.5149999856948853, |
|
"regularize": 0.11260777711868286, |
|
"step": 1150, |
|
"wo_beta": 15.298945426940918 |
|
}, |
|
{ |
|
"epoch": 3.264052905054322, |
|
"eval_dpo_loss": 0.7487252354621887, |
|
"eval_logits": -0.5372445583343506, |
|
"eval_logps": -83.42646789550781, |
|
"eval_loss": 0.5174793601036072, |
|
"eval_objective": 0.5160741806030273, |
|
"eval_ranking_idealized": 0.6030020713806152, |
|
"eval_ranking_idealized_expo": 0.5222567319869995, |
|
"eval_ranking_simple": 0.5263975262641907, |
|
"eval_regularize": 0.5160741806030273, |
|
"eval_runtime": 308.2096, |
|
"eval_samples_per_second": 18.786, |
|
"eval_steps_per_second": 1.567, |
|
"eval_wo_beta": 14.195608139038086, |
|
"step": 1150 |
|
}, |
|
{ |
|
"dpo_loss": 0.6932617425918579, |
|
"epoch": 3.4057628719886632, |
|
"grad_norm": 14.929601093854455, |
|
"learning_rate": 1.3898334684855647e-06, |
|
"logits": -0.4731375575065613, |
|
"logps": -80.88990783691406, |
|
"loss": 0.1076, |
|
"objective": 0.10945354402065277, |
|
"ranking_idealized": 0.5933333039283752, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.5070833563804626, |
|
"regularize": 0.10945354402065277, |
|
"step": 1200, |
|
"wo_beta": 15.574357032775879 |
|
}, |
|
{ |
|
"epoch": 3.4057628719886632, |
|
"eval_dpo_loss": 0.7491946816444397, |
|
"eval_logits": -0.4946048855781555, |
|
"eval_logps": -83.99122619628906, |
|
"eval_loss": 0.5169116258621216, |
|
"eval_objective": 0.5159533619880676, |
|
"eval_ranking_idealized": 0.6030020713806152, |
|
"eval_ranking_idealized_expo": 0.5222567319869995, |
|
"eval_ranking_simple": 0.5274327397346497, |
|
"eval_regularize": 0.5159533619880676, |
|
"eval_runtime": 309.8199, |
|
"eval_samples_per_second": 18.688, |
|
"eval_steps_per_second": 1.559, |
|
"eval_wo_beta": 14.124122619628906, |
|
"step": 1200 |
|
}, |
|
{ |
|
"dpo_loss": 0.6921232342720032, |
|
"epoch": 3.5474728389230044, |
|
"grad_norm": 14.184398569090499, |
|
"learning_rate": 1.1735202983664803e-06, |
|
"logits": -0.45394301414489746, |
|
"logps": -80.82902526855469, |
|
"loss": 0.0981, |
|
"objective": 0.09646416455507278, |
|
"ranking_idealized": 0.5962499976158142, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.51541668176651, |
|
"regularize": 0.09646416455507278, |
|
"step": 1250, |
|
"wo_beta": 15.384990692138672 |
|
}, |
|
{ |
|
"epoch": 3.5474728389230044, |
|
"eval_dpo_loss": 0.750022828578949, |
|
"eval_logits": -0.5087407231330872, |
|
"eval_logps": -83.37907409667969, |
|
"eval_loss": 0.5174669623374939, |
|
"eval_objective": 0.5184855461120605, |
|
"eval_ranking_idealized": 0.6030020713806152, |
|
"eval_ranking_idealized_expo": 0.5222567319869995, |
|
"eval_ranking_simple": 0.5310559272766113, |
|
"eval_regularize": 0.5184855461120605, |
|
"eval_runtime": 307.8564, |
|
"eval_samples_per_second": 18.807, |
|
"eval_steps_per_second": 1.569, |
|
"eval_wo_beta": 14.215774536132812, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 3.5474728389230044, |
|
"step": 1250, |
|
"total_flos": 0.0, |
|
"train_loss": 0.02363297004699707, |
|
"train_runtime": 6884.3268, |
|
"train_samples_per_second": 36.897, |
|
"train_steps_per_second": 0.256 |
|
} |
|
], |
|
"logging_steps": 50, |
|
"max_steps": 1760, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|