{ "best_metric": 14.005528450012207, "best_model_checkpoint": "./qwen2.5-0.5b/qwen2.5-0.5b-expo-L1EXPO-ES-0.1/checkpoint-750", "epoch": 3.5474728389230044, "eval_steps": 50, "global_step": 1250, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "dpo_loss": 0.6931471824645996, "epoch": 0.002834199338686821, "grad_norm": 36.885068816813735, "learning_rate": 2.840909090909091e-08, "logits": -1.359458565711975, "logps": -84.69721221923828, "loss": 0.0051, "objective": 0.0046141319908201694, "ranking_idealized": 0.6458333134651184, "ranking_idealized_expo": 0.5833333134651184, "ranking_simple": 0.5833333134651184, "regularize": 0.0046141319908201694, "step": 1, "wo_beta": 14.840873718261719 }, { "dpo_loss": 0.6924303770065308, "epoch": 0.14170996693434104, "grad_norm": 33.742835742644765, "learning_rate": 1.4204545454545458e-06, "logits": -1.4566550254821777, "logps": -84.23489379882812, "loss": 0.0448, "objective": 0.04280169680714607, "ranking_idealized": 0.608418345451355, "ranking_idealized_expo": 0.5229591727256775, "ranking_simple": 0.5221088528633118, "regularize": 0.04280169680714607, "step": 50, "wo_beta": 15.654285430908203 }, { "epoch": 0.14170996693434104, "eval_dpo_loss": 0.6935604810714722, "eval_logits": -1.4298962354660034, "eval_logps": -90.38883209228516, "eval_loss": 0.062233828008174896, "eval_objective": 0.062142737209796906, "eval_ranking_idealized": 0.6030020713806152, "eval_ranking_idealized_expo": 0.5222567319869995, "eval_ranking_simple": 0.5243270993232727, "eval_regularize": 0.062142737209796906, "eval_runtime": 308.7283, "eval_samples_per_second": 18.754, "eval_steps_per_second": 1.564, "eval_wo_beta": 16.076759338378906, "step": 50 }, { "dpo_loss": 0.7021370530128479, "epoch": 0.2834199338686821, "grad_norm": 31.729424259376046, "learning_rate": 2.8409090909090916e-06, "logits": -1.3946272134780884, "logps": -82.87468719482422, "loss": 0.1716, "objective": 0.17463459074497223, "ranking_idealized": 0.6016666889190674, "ranking_idealized_expo": 0.5141666531562805, "ranking_simple": 0.51541668176651, "regularize": 0.17463459074497223, "step": 100, "wo_beta": 15.276419639587402 }, { "epoch": 0.2834199338686821, "eval_dpo_loss": 0.6981683969497681, "eval_logits": -1.3597227334976196, "eval_logps": -88.76753234863281, "eval_loss": 0.1556352823972702, "eval_objective": 0.15588510036468506, "eval_ranking_idealized": 0.6030020713806152, "eval_ranking_idealized_expo": 0.5222567319869995, "eval_ranking_simple": 0.5274327397346497, "eval_regularize": 0.15588510036468506, "eval_runtime": 308.3116, "eval_samples_per_second": 18.78, "eval_steps_per_second": 1.567, "eval_wo_beta": 15.943617820739746, "step": 100 }, { "dpo_loss": 0.7128496170043945, "epoch": 0.42512990080302315, "grad_norm": 24.11205441144116, "learning_rate": 4.2613636363636365e-06, "logits": -1.251375675201416, "logps": -79.16511535644531, "loss": 0.2858, "objective": 0.2846659719944, "ranking_idealized": 0.6066666841506958, "ranking_idealized_expo": 0.5287500023841858, "ranking_simple": 0.5274999737739563, "regularize": 0.2846659719944, "step": 150, "wo_beta": 15.22218132019043 }, { "epoch": 0.42512990080302315, "eval_dpo_loss": 0.718317449092865, "eval_logits": -1.2545664310455322, "eval_logps": -79.50674438476562, "eval_loss": 0.2911944091320038, "eval_objective": 0.2922578752040863, "eval_ranking_idealized": 0.6030020713806152, "eval_ranking_idealized_expo": 0.5222567319869995, "eval_ranking_simple": 0.522774338722229, "eval_regularize": 0.2922578752040863, "eval_runtime": 308.3872, "eval_samples_per_second": 18.775, "eval_steps_per_second": 1.566, "eval_wo_beta": 15.05699634552002, "step": 150 }, { "dpo_loss": 0.7332326173782349, "epoch": 0.5668398677373642, "grad_norm": 19.83023586651878, "learning_rate": 4.997168347957521e-06, "logits": -1.015448808670044, "logps": -76.77922058105469, "loss": 0.3544, "objective": 0.3552749752998352, "ranking_idealized": 0.5924999713897705, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.5020833611488342, "regularize": 0.3552749752998352, "step": 200, "wo_beta": 15.356170654296875 }, { "epoch": 0.5668398677373642, "eval_dpo_loss": 0.7308588027954102, "eval_logits": -0.8432308435440063, "eval_logps": -83.84849548339844, "eval_loss": 0.3898463547229767, "eval_objective": 0.3890216052532196, "eval_ranking_idealized": 0.6030020713806152, "eval_ranking_idealized_expo": 0.5222567319869995, "eval_ranking_simple": 0.522774338722229, "eval_regularize": 0.3890216052532196, "eval_runtime": 307.8908, "eval_samples_per_second": 18.805, "eval_steps_per_second": 1.569, "eval_wo_beta": 14.712230682373047, "step": 200 }, { "dpo_loss": 0.7182620763778687, "epoch": 0.7085498346717053, "grad_norm": 19.421703617823624, "learning_rate": 4.973122855144066e-06, "logits": -0.7580794095993042, "logps": -78.16065216064453, "loss": 0.375, "objective": 0.37717047333717346, "ranking_idealized": 0.5991666913032532, "ranking_idealized_expo": 0.5170833468437195, "ranking_simple": 0.5195833444595337, "regularize": 0.37717047333717346, "step": 250, "wo_beta": 15.653904914855957 }, { "epoch": 0.7085498346717053, "eval_dpo_loss": 0.735299289226532, "eval_logits": -0.673379123210907, "eval_logps": -81.28996276855469, "eval_loss": 0.4397831857204437, "eval_objective": 0.43750080466270447, "eval_ranking_idealized": 0.6030020713806152, "eval_ranking_idealized_expo": 0.5222567319869995, "eval_ranking_simple": 0.5243270993232727, "eval_regularize": 0.43750080466270447, "eval_runtime": 351.635, "eval_samples_per_second": 16.466, "eval_steps_per_second": 1.374, "eval_wo_beta": 14.472906112670898, "step": 250 }, { "dpo_loss": 0.72139972448349, "epoch": 0.8502598016060463, "grad_norm": 19.146332148877114, "learning_rate": 4.924776641419513e-06, "logits": -0.564231812953949, "logps": -79.54463195800781, "loss": 0.3592, "objective": 0.3580860495567322, "ranking_idealized": 0.5799999833106995, "ranking_idealized_expo": 0.4970833361148834, "ranking_simple": 0.5024999976158142, "regularize": 0.3580860495567322, "step": 300, "wo_beta": 15.114410400390625 }, { "epoch": 0.8502598016060463, "eval_dpo_loss": 0.734784722328186, "eval_logits": -0.5500932335853577, "eval_logps": -84.41443634033203, "eval_loss": 0.442239373922348, "eval_objective": 0.43877851963043213, "eval_ranking_idealized": 0.6030020713806152, "eval_ranking_idealized_expo": 0.5222567319869995, "eval_ranking_simple": 0.5232919454574585, "eval_regularize": 0.43877851963043213, "eval_runtime": 404.6503, "eval_samples_per_second": 14.309, "eval_steps_per_second": 1.194, "eval_wo_beta": 14.44029712677002, "step": 300 }, { "dpo_loss": 0.717363178730011, "epoch": 0.9919697685403873, "grad_norm": 17.11167089420277, "learning_rate": 4.8526047530778175e-06, "logits": -0.5000432133674622, "logps": -79.52243041992188, "loss": 0.3351, "objective": 0.33388689160346985, "ranking_idealized": 0.60916668176651, "ranking_idealized_expo": 0.5270833373069763, "ranking_simple": 0.5262500047683716, "regularize": 0.33388689160346985, "step": 350, "wo_beta": 15.228816986083984 }, { "epoch": 0.9919697685403873, "eval_dpo_loss": 0.735403299331665, "eval_logits": -0.5360206961631775, "eval_logps": -82.93754577636719, "eval_loss": 0.4675760865211487, "eval_objective": 0.4601868689060211, "eval_ranking_idealized": 0.6030020713806152, "eval_ranking_idealized_expo": 0.5222567319869995, "eval_ranking_simple": 0.5341615080833435, "eval_regularize": 0.4601868689060211, "eval_runtime": 308.044, "eval_samples_per_second": 18.796, "eval_steps_per_second": 1.568, "eval_wo_beta": 14.272198677062988, "step": 350 }, { "dpo_loss": 0.7120790481567383, "epoch": 1.1336797354747283, "grad_norm": 17.95944897166815, "learning_rate": 4.757316345716554e-06, "logits": -0.5945844054222107, "logps": -77.92745208740234, "loss": 0.3056, "objective": 0.3030960261821747, "ranking_idealized": 0.6087499856948853, "ranking_idealized_expo": 0.5337499976158142, "ranking_simple": 0.5316666960716248, "regularize": 0.3030960261821747, "step": 400, "wo_beta": 15.400132179260254 }, { "epoch": 1.1336797354747283, "eval_dpo_loss": 0.7469586730003357, "eval_logits": -0.5685753226280212, "eval_logps": -80.56059265136719, "eval_loss": 0.48422977328300476, "eval_objective": 0.48037421703338623, "eval_ranking_idealized": 0.6030020713806152, "eval_ranking_idealized_expo": 0.5222567319869995, "eval_ranking_simple": 0.5253623127937317, "eval_regularize": 0.48037421703338623, "eval_runtime": 314.7441, "eval_samples_per_second": 18.396, "eval_steps_per_second": 1.535, "eval_wo_beta": 14.281224250793457, "step": 400 }, { "dpo_loss": 0.7077716588973999, "epoch": 1.2753897024090695, "grad_norm": 15.643717424703908, "learning_rate": 4.639847716126855e-06, "logits": -0.5214452743530273, "logps": -78.54476928710938, "loss": 0.2932, "objective": 0.29641959071159363, "ranking_idealized": 0.5975000262260437, "ranking_idealized_expo": 0.5199999809265137, "ranking_simple": 0.5199999809265137, "regularize": 0.29641959071159363, "step": 450, "wo_beta": 15.791983604431152 }, { "epoch": 1.2753897024090695, "eval_dpo_loss": 0.7439451217651367, "eval_logits": -0.5565418004989624, "eval_logps": -83.62307739257812, "eval_loss": 0.48051390051841736, "eval_objective": 0.47548484802246094, "eval_ranking_idealized": 0.6030020713806152, "eval_ranking_idealized_expo": 0.5222567319869995, "eval_ranking_simple": 0.5279502868652344, "eval_regularize": 0.47548484802246094, "eval_runtime": 308.6602, "eval_samples_per_second": 18.758, "eval_steps_per_second": 1.565, "eval_wo_beta": 14.464012145996094, "step": 450 }, { "dpo_loss": 0.7124494314193726, "epoch": 1.4170996693434104, "grad_norm": 15.324248073271528, "learning_rate": 4.501353102310901e-06, "logits": -0.5437880754470825, "logps": -78.15093994140625, "loss": 0.2864, "objective": 0.2836955189704895, "ranking_idealized": 0.57833331823349, "ranking_idealized_expo": 0.4983333349227905, "ranking_simple": 0.5, "regularize": 0.2836955189704895, "step": 500, "wo_beta": 15.279319763183594 }, { "epoch": 1.4170996693434104, "eval_dpo_loss": 0.7510210275650024, "eval_logits": -0.6556914448738098, "eval_logps": -82.91778564453125, "eval_loss": 0.4964132010936737, "eval_objective": 0.4971453845500946, "eval_ranking_idealized": 0.6030020713806152, "eval_ranking_idealized_expo": 0.5222567319869995, "eval_ranking_simple": 0.5274327397346497, "eval_regularize": 0.4971453845500946, "eval_runtime": 307.59, "eval_samples_per_second": 18.824, "eval_steps_per_second": 1.57, "eval_wo_beta": 14.282269477844238, "step": 500 }, { "dpo_loss": 0.7097735404968262, "epoch": 1.5588096362777515, "grad_norm": 15.632233049837359, "learning_rate": 4.34319334202531e-06, "logits": -0.5551173686981201, "logps": -78.2597885131836, "loss": 0.2635, "objective": 0.264424592256546, "ranking_idealized": 0.5945833325386047, "ranking_idealized_expo": 0.5116666555404663, "ranking_simple": 0.512499988079071, "regularize": 0.264424592256546, "step": 550, "wo_beta": 15.108202934265137 }, { "epoch": 1.5588096362777515, "eval_dpo_loss": 0.7502700686454773, "eval_logits": -0.6183538436889648, "eval_logps": -81.16139221191406, "eval_loss": 0.502347469329834, "eval_objective": 0.5043270587921143, "eval_ranking_idealized": 0.6030020713806152, "eval_ranking_idealized_expo": 0.5222567319869995, "eval_ranking_simple": 0.522774338722229, "eval_regularize": 0.5043270587921143, "eval_runtime": 307.9389, "eval_samples_per_second": 18.802, "eval_steps_per_second": 1.568, "eval_wo_beta": 14.063152313232422, "step": 550 }, { "dpo_loss": 0.708111047744751, "epoch": 1.7005196032120926, "grad_norm": 15.258212996967583, "learning_rate": 4.16692250129073e-06, "logits": -0.48317351937294006, "logps": -79.20243835449219, "loss": 0.2561, "objective": 0.2518368065357208, "ranking_idealized": 0.6004166603088379, "ranking_idealized_expo": 0.51583331823349, "ranking_simple": 0.5074999928474426, "regularize": 0.2518368065357208, "step": 600, "wo_beta": 15.102338790893555 }, { "epoch": 1.7005196032120926, "eval_dpo_loss": 0.7486839294433594, "eval_logits": -0.5805073380470276, "eval_logps": -84.7038803100586, "eval_loss": 0.49804311990737915, "eval_objective": 0.49642521142959595, "eval_ranking_idealized": 0.6030020713806152, "eval_ranking_idealized_expo": 0.5222567319869995, "eval_ranking_simple": 0.5232919454574585, "eval_regularize": 0.49642521142959595, "eval_runtime": 308.8037, "eval_samples_per_second": 18.75, "eval_steps_per_second": 1.564, "eval_wo_beta": 14.335240364074707, "step": 600 }, { "dpo_loss": 0.7057015299797058, "epoch": 1.8422295701464337, "grad_norm": 15.137988457806076, "learning_rate": 3.974272604254906e-06, "logits": -0.44393202662467957, "logps": -80.69384002685547, "loss": 0.2448, "objective": 0.24766255915164948, "ranking_idealized": 0.6058333516120911, "ranking_idealized_expo": 0.5295833349227905, "ranking_simple": 0.5350000262260437, "regularize": 0.24766255915164948, "step": 650, "wo_beta": 15.657441139221191 }, { "epoch": 1.8422295701464337, "eval_dpo_loss": 0.750303328037262, "eval_logits": -0.42741408944129944, "eval_logps": -83.46288299560547, "eval_loss": 0.5170512795448303, "eval_objective": 0.5191380381584167, "eval_ranking_idealized": 0.6030020713806152, "eval_ranking_idealized_expo": 0.5222567319869995, "eval_ranking_simple": 0.5232919454574585, "eval_regularize": 0.5191380381584167, "eval_runtime": 311.9724, "eval_samples_per_second": 18.559, "eval_steps_per_second": 1.548, "eval_wo_beta": 14.215264320373535, "step": 650 }, { "dpo_loss": 0.7065611481666565, "epoch": 1.9839395370807746, "grad_norm": 16.215629133943352, "learning_rate": 3.767136614452458e-06, "logits": -0.43040552735328674, "logps": -79.25086212158203, "loss": 0.2235, "objective": 0.22704358398914337, "ranking_idealized": 0.5954166650772095, "ranking_idealized_expo": 0.5129166841506958, "ranking_simple": 0.5083333253860474, "regularize": 0.22704358398914337, "step": 700, "wo_beta": 15.360807418823242 }, { "epoch": 1.9839395370807746, "eval_dpo_loss": 0.7482582330703735, "eval_logits": -0.5056679844856262, "eval_logps": -81.71964263916016, "eval_loss": 0.49625101685523987, "eval_objective": 0.4948585331439972, "eval_ranking_idealized": 0.6030020713806152, "eval_ranking_idealized_expo": 0.5222567319869995, "eval_ranking_simple": 0.5232919454574585, "eval_regularize": 0.4948585331439972, "eval_runtime": 312.5183, "eval_samples_per_second": 18.527, "eval_steps_per_second": 1.546, "eval_wo_beta": 14.20261001586914, "step": 700 }, { "dpo_loss": 0.7034626007080078, "epoch": 2.1256495040151155, "grad_norm": 14.439374560790109, "learning_rate": 3.547549834686222e-06, "logits": -0.4137415587902069, "logps": -78.55575561523438, "loss": 0.21, "objective": 0.20508375763893127, "ranking_idealized": 0.6066666841506958, "ranking_idealized_expo": 0.5133333206176758, "ranking_simple": 0.5099999904632568, "regularize": 0.20508375763893127, "step": 750, "wo_beta": 15.407984733581543 }, { "epoch": 2.1256495040151155, "eval_dpo_loss": 0.7511767148971558, "eval_logits": -0.47565215826034546, "eval_logps": -82.51917266845703, "eval_loss": 0.5233561396598816, "eval_objective": 0.5224636197090149, "eval_ranking_idealized": 0.6030020713806152, "eval_ranking_idealized_expo": 0.5222567319869995, "eval_ranking_simple": 0.5253623127937317, "eval_regularize": 0.5224636197090149, "eval_runtime": 307.6951, "eval_samples_per_second": 18.817, "eval_steps_per_second": 1.57, "eval_wo_beta": 14.005528450012207, "step": 750 }, { "dpo_loss": 0.6996860504150391, "epoch": 2.2673594709494567, "grad_norm": 14.763670008384054, "learning_rate": 3.3176699082935546e-06, "logits": -0.44839462637901306, "logps": -79.29713439941406, "loss": 0.1988, "objective": 0.19862671196460724, "ranking_idealized": 0.5941666960716248, "ranking_idealized_expo": 0.5129166841506958, "ranking_simple": 0.5116666555404663, "regularize": 0.19862671196460724, "step": 800, "wo_beta": 14.99027156829834 }, { "epoch": 2.2673594709494567, "eval_dpo_loss": 0.7496048808097839, "eval_logits": -0.5577788949012756, "eval_logps": -81.05644226074219, "eval_loss": 0.5139943361282349, "eval_objective": 0.511448085308075, "eval_ranking_idealized": 0.6030020713806152, "eval_ranking_idealized_expo": 0.5222567319869995, "eval_ranking_simple": 0.5295031070709229, "eval_regularize": 0.511448085308075, "eval_runtime": 308.0865, "eval_samples_per_second": 18.793, "eval_steps_per_second": 1.568, "eval_wo_beta": 14.102994918823242, "step": 800 }, { "dpo_loss": 0.7008050680160522, "epoch": 2.409069437883798, "grad_norm": 14.833532828768124, "learning_rate": 3.0797556183036582e-06, "logits": -0.4541783630847931, "logps": -78.78826141357422, "loss": 0.1845, "objective": 0.18498587608337402, "ranking_idealized": 0.5979166626930237, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.5095833539962769, "regularize": 0.18498587608337402, "step": 850, "wo_beta": 15.201929092407227 }, { "epoch": 2.409069437883798, "eval_dpo_loss": 0.7516361474990845, "eval_logits": -0.512949526309967, "eval_logps": -82.63258361816406, "eval_loss": 0.520516574382782, "eval_objective": 0.518623948097229, "eval_ranking_idealized": 0.6030020713806152, "eval_ranking_idealized_expo": 0.5222567319869995, "eval_ranking_simple": 0.5310559272766113, "eval_regularize": 0.518623948097229, "eval_runtime": 309.5699, "eval_samples_per_second": 18.703, "eval_steps_per_second": 1.56, "eval_wo_beta": 14.151838302612305, "step": 850 }, { "dpo_loss": 0.7016371488571167, "epoch": 2.550779404818139, "grad_norm": 16.31271001796413, "learning_rate": 2.8361446928038298e-06, "logits": -0.42467382550239563, "logps": -79.11641693115234, "loss": 0.1741, "objective": 0.1748921126127243, "ranking_idealized": 0.5887500047683716, "ranking_idealized_expo": 0.518750011920929, "ranking_simple": 0.51583331823349, "regularize": 0.1748921126127243, "step": 900, "wo_beta": 15.531429290771484 }, { "epoch": 2.550779404818139, "eval_dpo_loss": 0.7507295608520508, "eval_logits": -0.4789924621582031, "eval_logps": -82.98091125488281, "eval_loss": 0.5132278800010681, "eval_objective": 0.5117725729942322, "eval_ranking_idealized": 0.6030020713806152, "eval_ranking_idealized_expo": 0.5222567319869995, "eval_ranking_simple": 0.523809552192688, "eval_regularize": 0.5117725729942322, "eval_runtime": 311.1948, "eval_samples_per_second": 18.606, "eval_steps_per_second": 1.552, "eval_wo_beta": 14.245877265930176, "step": 900 }, { "dpo_loss": 0.6993770003318787, "epoch": 2.69248937175248, "grad_norm": 14.7867391875567, "learning_rate": 2.5892308345974517e-06, "logits": -0.4542914927005768, "logps": -79.9416732788086, "loss": 0.1659, "objective": 0.16475924849510193, "ranking_idealized": 0.5975000262260437, "ranking_idealized_expo": 0.5087500214576721, "ranking_simple": 0.5141666531562805, "regularize": 0.16475924849510193, "step": 950, "wo_beta": 15.521940231323242 }, { "epoch": 2.69248937175248, "eval_dpo_loss": 0.7500060796737671, "eval_logits": -0.4840329587459564, "eval_logps": -83.8330307006836, "eval_loss": 0.5188658237457275, "eval_objective": 0.5192957520484924, "eval_ranking_idealized": 0.6030020713806152, "eval_ranking_idealized_expo": 0.5222567319869995, "eval_ranking_simple": 0.523809552192688, "eval_regularize": 0.5192957520484924, "eval_runtime": 310.1876, "eval_samples_per_second": 18.666, "eval_steps_per_second": 1.557, "eval_wo_beta": 14.302889823913574, "step": 950 }, { "dpo_loss": 0.7024207711219788, "epoch": 2.8341993386868207, "grad_norm": 14.42838893249385, "learning_rate": 2.341440200858589e-06, "logits": -0.4285065233707428, "logps": -78.83317565917969, "loss": 0.1539, "objective": 0.15508733689785004, "ranking_idealized": 0.6020833253860474, "ranking_idealized_expo": 0.5104166865348816, "ranking_simple": 0.5091666579246521, "regularize": 0.15508733689785004, "step": 1000, "wo_beta": 15.06278133392334 }, { "epoch": 2.8341993386868207, "eval_dpo_loss": 0.7498777508735657, "eval_logits": -0.46712055802345276, "eval_logps": -82.88314056396484, "eval_loss": 0.5136557817459106, "eval_objective": 0.5127004981040955, "eval_ranking_idealized": 0.6030020713806152, "eval_ranking_idealized_expo": 0.5222567319869995, "eval_ranking_simple": 0.5269151329994202, "eval_regularize": 0.5127004981040955, "eval_runtime": 308.4784, "eval_samples_per_second": 18.77, "eval_steps_per_second": 1.566, "eval_wo_beta": 14.192536354064941, "step": 1000 }, { "dpo_loss": 0.6952893733978271, "epoch": 2.9806329711856403, "grad_norm": 14.77922239882152, "learning_rate": 2.0952075638923656e-06, "logits": -0.4941651523113251, "logps": -79.95951080322266, "loss": 0.1445, "objective": 0.14514465630054474, "ranking_idealized": 0.6079166531562805, "ranking_idealized_expo": 0.5183333158493042, "ranking_simple": 0.5220833420753479, "regularize": 0.14514465630054474, "step": 1050, "wo_beta": 15.50859546661377 }, { "epoch": 2.9806329711856403, "eval_dpo_loss": 0.7478482127189636, "eval_logits": -0.5530552864074707, "eval_logps": -83.16773986816406, "eval_loss": 0.5116256475448608, "eval_objective": 0.511193573474884, "eval_ranking_idealized": 0.6030020713806152, "eval_ranking_idealized_expo": 0.5222567319869995, "eval_ranking_simple": 0.5248447060585022, "eval_regularize": 0.511193573474884, "eval_runtime": 310.4184, "eval_samples_per_second": 18.652, "eval_steps_per_second": 1.556, "eval_wo_beta": 14.214123725891113, "step": 1050 }, { "dpo_loss": 0.6958988308906555, "epoch": 3.122342938119981, "grad_norm": 14.047766487106658, "learning_rate": 1.852952387243698e-06, "logits": -0.4758566915988922, "logps": -80.04509735107422, "loss": 0.1261, "objective": 0.12395481020212173, "ranking_idealized": 0.6070833206176758, "ranking_idealized_expo": 0.5304166674613953, "ranking_simple": 0.5270833373069763, "regularize": 0.12395481020212173, "step": 1100, "wo_beta": 15.6008939743042 }, { "epoch": 3.122342938119981, "eval_dpo_loss": 0.7515185475349426, "eval_logits": -0.5487966537475586, "eval_logps": -83.59542846679688, "eval_loss": 0.515699565410614, "eval_objective": 0.5165062546730042, "eval_ranking_idealized": 0.6030020713806152, "eval_ranking_idealized_expo": 0.5222567319869995, "eval_ranking_simple": 0.5232919454574585, "eval_regularize": 0.5165062546730042, "eval_runtime": 309.3286, "eval_samples_per_second": 18.718, "eval_steps_per_second": 1.561, "eval_wo_beta": 14.178275108337402, "step": 1100 }, { "dpo_loss": 0.6931909322738647, "epoch": 3.264052905054322, "grad_norm": 14.963308385384513, "learning_rate": 1.617055052228768e-06, "logits": -0.4729629456996918, "logps": -79.84102630615234, "loss": 0.1146, "objective": 0.11260777711868286, "ranking_idealized": 0.5950000286102295, "ranking_idealized_expo": 0.5095833539962769, "ranking_simple": 0.5149999856948853, "regularize": 0.11260777711868286, "step": 1150, "wo_beta": 15.298945426940918 }, { "epoch": 3.264052905054322, "eval_dpo_loss": 0.7487252354621887, "eval_logits": -0.5372445583343506, "eval_logps": -83.42646789550781, "eval_loss": 0.5174793601036072, "eval_objective": 0.5160741806030273, "eval_ranking_idealized": 0.6030020713806152, "eval_ranking_idealized_expo": 0.5222567319869995, "eval_ranking_simple": 0.5263975262641907, "eval_regularize": 0.5160741806030273, "eval_runtime": 308.2096, "eval_samples_per_second": 18.786, "eval_steps_per_second": 1.567, "eval_wo_beta": 14.195608139038086, "step": 1150 }, { "dpo_loss": 0.6932617425918579, "epoch": 3.4057628719886632, "grad_norm": 14.929601093854455, "learning_rate": 1.3898334684855647e-06, "logits": -0.4731375575065613, "logps": -80.88990783691406, "loss": 0.1076, "objective": 0.10945354402065277, "ranking_idealized": 0.5933333039283752, "ranking_idealized_expo": 0.5083333253860474, "ranking_simple": 0.5070833563804626, "regularize": 0.10945354402065277, "step": 1200, "wo_beta": 15.574357032775879 }, { "epoch": 3.4057628719886632, "eval_dpo_loss": 0.7491946816444397, "eval_logits": -0.4946048855781555, "eval_logps": -83.99122619628906, "eval_loss": 0.5169116258621216, "eval_objective": 0.5159533619880676, "eval_ranking_idealized": 0.6030020713806152, "eval_ranking_idealized_expo": 0.5222567319869995, "eval_ranking_simple": 0.5274327397346497, "eval_regularize": 0.5159533619880676, "eval_runtime": 309.8199, "eval_samples_per_second": 18.688, "eval_steps_per_second": 1.559, "eval_wo_beta": 14.124122619628906, "step": 1200 }, { "dpo_loss": 0.6921232342720032, "epoch": 3.5474728389230044, "grad_norm": 14.184398569090499, "learning_rate": 1.1735202983664803e-06, "logits": -0.45394301414489746, "logps": -80.82902526855469, "loss": 0.0981, "objective": 0.09646416455507278, "ranking_idealized": 0.5962499976158142, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.51541668176651, "regularize": 0.09646416455507278, "step": 1250, "wo_beta": 15.384990692138672 }, { "epoch": 3.5474728389230044, "eval_dpo_loss": 0.750022828578949, "eval_logits": -0.5087407231330872, "eval_logps": -83.37907409667969, "eval_loss": 0.5174669623374939, "eval_objective": 0.5184855461120605, "eval_ranking_idealized": 0.6030020713806152, "eval_ranking_idealized_expo": 0.5222567319869995, "eval_ranking_simple": 0.5310559272766113, "eval_regularize": 0.5184855461120605, "eval_runtime": 307.8564, "eval_samples_per_second": 18.807, "eval_steps_per_second": 1.569, "eval_wo_beta": 14.215774536132812, "step": 1250 }, { "epoch": 3.5474728389230044, "step": 1250, "total_flos": 0.0, "train_loss": 0.02363297004699707, "train_runtime": 6884.3268, "train_samples_per_second": 36.897, "train_steps_per_second": 0.256 } ], "logging_steps": 50, "max_steps": 1760, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }