|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 4.988190836088805, |
|
"eval_steps": 50, |
|
"global_step": 880, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"dpo_loss": 0.6931471824645996, |
|
"epoch": 0.005668398677373642, |
|
"grad_norm": 134135.90281322593, |
|
"learning_rate": 5.681818181818182e-08, |
|
"logits": -1.3147305250167847, |
|
"logps": -88.0877456665039, |
|
"loss": 0.4113, |
|
"objective": 0.41588976979255676, |
|
"ranking_idealized": 0.5208333134651184, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.5208333134651184, |
|
"regularize": 0.41588976979255676, |
|
"step": 1 |
|
}, |
|
{ |
|
"dpo_loss": 13.11236572265625, |
|
"epoch": 0.02834199338686821, |
|
"grad_norm": 235831.3417258827, |
|
"learning_rate": 2.840909090909091e-07, |
|
"logits": -1.3680161237716675, |
|
"logps": -84.41597747802734, |
|
"loss": 26.5575, |
|
"objective": 27.22771453857422, |
|
"ranking_idealized": 0.546875, |
|
"ranking_idealized_expo": 0.546875, |
|
"ranking_simple": 0.546875, |
|
"regularize": 27.22771453857422, |
|
"step": 5 |
|
}, |
|
{ |
|
"dpo_loss": 32.3316535949707, |
|
"epoch": 0.05668398677373642, |
|
"grad_norm": 273612.5381400241, |
|
"learning_rate": 5.681818181818182e-07, |
|
"logits": -1.44873046875, |
|
"logps": -83.52092742919922, |
|
"loss": 73.6592, |
|
"objective": 67.45610809326172, |
|
"ranking_idealized": 0.5083333253860474, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.5083333253860474, |
|
"regularize": 67.45610809326172, |
|
"step": 10 |
|
}, |
|
{ |
|
"dpo_loss": 62.25831604003906, |
|
"epoch": 0.08502598016060463, |
|
"grad_norm": 199991.8454482197, |
|
"learning_rate": 8.522727272727273e-07, |
|
"logits": -1.4123358726501465, |
|
"logps": -83.7873306274414, |
|
"loss": 136.6551, |
|
"objective": 129.5699005126953, |
|
"ranking_idealized": 0.5041666626930237, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.5083333253860474, |
|
"regularize": 129.5699005126953, |
|
"step": 15 |
|
}, |
|
{ |
|
"dpo_loss": 97.5913314819336, |
|
"epoch": 0.11336797354747284, |
|
"grad_norm": 137050.47037487474, |
|
"learning_rate": 1.1363636363636364e-06, |
|
"logits": -1.39812433719635, |
|
"logps": -84.45435333251953, |
|
"loss": 198.982, |
|
"objective": 196.08615112304688, |
|
"ranking_idealized": 0.5375000238418579, |
|
"ranking_idealized_expo": 0.5375000238418579, |
|
"ranking_simple": 0.5375000238418579, |
|
"regularize": 196.08615112304688, |
|
"step": 20 |
|
}, |
|
{ |
|
"dpo_loss": 134.1880340576172, |
|
"epoch": 0.14170996693434104, |
|
"grad_norm": 128505.67292666303, |
|
"learning_rate": 1.4204545454545458e-06, |
|
"logits": -1.4363852739334106, |
|
"logps": -84.79105377197266, |
|
"loss": 276.7151, |
|
"objective": 282.50897216796875, |
|
"ranking_idealized": 0.4958333373069763, |
|
"ranking_idealized_expo": 0.4958333373069763, |
|
"ranking_simple": 0.5041666626930237, |
|
"regularize": 282.50897216796875, |
|
"step": 25 |
|
}, |
|
{ |
|
"dpo_loss": 166.0544891357422, |
|
"epoch": 0.17005196032120926, |
|
"grad_norm": 181462.71968306202, |
|
"learning_rate": 1.7045454545454546e-06, |
|
"logits": -1.4044525623321533, |
|
"logps": -85.5672378540039, |
|
"loss": 371.4895, |
|
"objective": 362.3011169433594, |
|
"ranking_idealized": 0.5416666865348816, |
|
"ranking_idealized_expo": 0.5416666865348816, |
|
"ranking_simple": 0.550000011920929, |
|
"regularize": 362.3011169433594, |
|
"step": 30 |
|
}, |
|
{ |
|
"dpo_loss": 249.85475158691406, |
|
"epoch": 0.19839395370807747, |
|
"grad_norm": 131607.94522082573, |
|
"learning_rate": 1.9886363636363638e-06, |
|
"logits": -1.3713446855545044, |
|
"logps": -82.86891174316406, |
|
"loss": 459.5799, |
|
"objective": 475.0990905761719, |
|
"ranking_idealized": 0.5041666626930237, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.5, |
|
"regularize": 475.0990905761719, |
|
"step": 35 |
|
}, |
|
{ |
|
"dpo_loss": 300.4722900390625, |
|
"epoch": 0.22673594709494568, |
|
"grad_norm": 134716.80909593205, |
|
"learning_rate": 2.2727272727272728e-06, |
|
"logits": -1.349226713180542, |
|
"logps": -83.15601348876953, |
|
"loss": 533.5176, |
|
"objective": 542.9343872070312, |
|
"ranking_idealized": 0.5208333134651184, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.5208333134651184, |
|
"regularize": 542.9343872070312, |
|
"step": 40 |
|
}, |
|
{ |
|
"dpo_loss": 362.15899658203125, |
|
"epoch": 0.25507794048181387, |
|
"grad_norm": 117630.82464120697, |
|
"learning_rate": 2.556818181818182e-06, |
|
"logits": -1.4267643690109253, |
|
"logps": -84.82117462158203, |
|
"loss": 670.75, |
|
"objective": 727.4680786132812, |
|
"ranking_idealized": 0.5375000238418579, |
|
"ranking_idealized_expo": 0.5375000238418579, |
|
"ranking_simple": 0.5291666388511658, |
|
"regularize": 727.4680786132812, |
|
"step": 45 |
|
}, |
|
{ |
|
"dpo_loss": 410.1810302734375, |
|
"epoch": 0.2834199338686821, |
|
"grad_norm": 108815.79636709657, |
|
"learning_rate": 2.8409090909090916e-06, |
|
"logits": -1.2878683805465698, |
|
"logps": -84.17786407470703, |
|
"loss": 737.2396, |
|
"objective": 742.7522583007812, |
|
"ranking_idealized": 0.42500001192092896, |
|
"ranking_idealized_expo": 0.42500001192092896, |
|
"ranking_simple": 0.4375, |
|
"regularize": 742.7522583007812, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.2834199338686821, |
|
"eval_dpo_loss": 221.7373504638672, |
|
"eval_logits": -1.3124604225158691, |
|
"eval_logps": -92.02384185791016, |
|
"eval_loss": 421.9188232421875, |
|
"eval_objective": 428.8151550292969, |
|
"eval_ranking_idealized": 0.5092975497245789, |
|
"eval_ranking_idealized_expo": 0.5092975497245789, |
|
"eval_ranking_simple": 0.5092975497245789, |
|
"eval_regularize": 428.8151550292969, |
|
"eval_runtime": 260.7367, |
|
"eval_samples_per_second": 22.206, |
|
"eval_steps_per_second": 0.928, |
|
"step": 50 |
|
}, |
|
{ |
|
"dpo_loss": 504.61846923828125, |
|
"epoch": 0.3117619272555503, |
|
"grad_norm": 119543.6777826292, |
|
"learning_rate": 3.125e-06, |
|
"logits": -1.3338755369186401, |
|
"logps": -85.74323272705078, |
|
"loss": 876.1629, |
|
"objective": 957.7632446289062, |
|
"ranking_idealized": 0.5416666865348816, |
|
"ranking_idealized_expo": 0.5416666865348816, |
|
"ranking_simple": 0.550000011920929, |
|
"regularize": 957.7632446289062, |
|
"step": 55 |
|
}, |
|
{ |
|
"dpo_loss": 526.1536254882812, |
|
"epoch": 0.3401039206424185, |
|
"grad_norm": 107179.68101966614, |
|
"learning_rate": 3.409090909090909e-06, |
|
"logits": -1.3320441246032715, |
|
"logps": -82.8685531616211, |
|
"loss": 989.766, |
|
"objective": 928.43359375, |
|
"ranking_idealized": 0.512499988079071, |
|
"ranking_idealized_expo": 0.512499988079071, |
|
"ranking_simple": 0.5083333253860474, |
|
"regularize": 928.43359375, |
|
"step": 60 |
|
}, |
|
{ |
|
"dpo_loss": 544.1323852539062, |
|
"epoch": 0.3684459140292867, |
|
"grad_norm": 108350.88761754551, |
|
"learning_rate": 3.6931818181818186e-06, |
|
"logits": -1.2518764734268188, |
|
"logps": -79.81038665771484, |
|
"loss": 1084.4579, |
|
"objective": 1179.59228515625, |
|
"ranking_idealized": 0.5083333253860474, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.48750001192092896, |
|
"regularize": 1179.59228515625, |
|
"step": 65 |
|
}, |
|
{ |
|
"dpo_loss": 636.8450927734375, |
|
"epoch": 0.39678790741615494, |
|
"grad_norm": 136538.92716952198, |
|
"learning_rate": 3.9772727272727275e-06, |
|
"logits": -1.2635269165039062, |
|
"logps": -79.40995788574219, |
|
"loss": 1189.2933, |
|
"objective": 1093.83642578125, |
|
"ranking_idealized": 0.5166666507720947, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.4958333373069763, |
|
"regularize": 1093.83642578125, |
|
"step": 70 |
|
}, |
|
{ |
|
"dpo_loss": 679.4451904296875, |
|
"epoch": 0.42512990080302315, |
|
"grad_norm": 97239.20388663356, |
|
"learning_rate": 4.2613636363636365e-06, |
|
"logits": -1.2477833032608032, |
|
"logps": -78.69037628173828, |
|
"loss": 1162.864, |
|
"objective": 1142.31640625, |
|
"ranking_idealized": 0.5458333492279053, |
|
"ranking_idealized_expo": 0.5458333492279053, |
|
"ranking_simple": 0.5375000238418579, |
|
"regularize": 1142.31640625, |
|
"step": 75 |
|
}, |
|
{ |
|
"dpo_loss": 752.3662719726562, |
|
"epoch": 0.45347189418989137, |
|
"grad_norm": 87641.25402861548, |
|
"learning_rate": 4.5454545454545455e-06, |
|
"logits": -1.2982757091522217, |
|
"logps": -78.63927459716797, |
|
"loss": 1282.2214, |
|
"objective": 1388.2518310546875, |
|
"ranking_idealized": 0.5166666507720947, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.512499988079071, |
|
"regularize": 1388.2518310546875, |
|
"step": 80 |
|
}, |
|
{ |
|
"dpo_loss": 713.9522705078125, |
|
"epoch": 0.4818138875767596, |
|
"grad_norm": 88675.6017495157, |
|
"learning_rate": 4.829545454545455e-06, |
|
"logits": -1.225476622581482, |
|
"logps": -76.3520278930664, |
|
"loss": 1352.1976, |
|
"objective": 1389.2098388671875, |
|
"ranking_idealized": 0.5249999761581421, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 1389.2098388671875, |
|
"step": 85 |
|
}, |
|
{ |
|
"dpo_loss": 791.7650756835938, |
|
"epoch": 0.5101558809636277, |
|
"grad_norm": 89967.15997141204, |
|
"learning_rate": 4.999921328558333e-06, |
|
"logits": -1.026414155960083, |
|
"logps": -77.92498016357422, |
|
"loss": 1440.1646, |
|
"objective": 1395.8138427734375, |
|
"ranking_idealized": 0.4791666567325592, |
|
"ranking_idealized_expo": 0.4791666567325592, |
|
"ranking_simple": 0.44583332538604736, |
|
"regularize": 1395.8138427734375, |
|
"step": 90 |
|
}, |
|
{ |
|
"dpo_loss": 791.7685546875, |
|
"epoch": 0.538497874350496, |
|
"grad_norm": 91320.19274895087, |
|
"learning_rate": 4.999036331701828e-06, |
|
"logits": -1.0597717761993408, |
|
"logps": -76.71431732177734, |
|
"loss": 1517.9578, |
|
"objective": 1621.507080078125, |
|
"ranking_idealized": 0.5416666865348816, |
|
"ranking_idealized_expo": 0.5416666865348816, |
|
"ranking_simple": 0.550000011920929, |
|
"regularize": 1621.507080078125, |
|
"step": 95 |
|
}, |
|
{ |
|
"dpo_loss": 673.1317749023438, |
|
"epoch": 0.5668398677373642, |
|
"grad_norm": 76415.34463234508, |
|
"learning_rate": 4.997168347957521e-06, |
|
"logits": -1.0299068689346313, |
|
"logps": -79.222412109375, |
|
"loss": 1559.0492, |
|
"objective": 1632.038330078125, |
|
"ranking_idealized": 0.5416666865348816, |
|
"ranking_idealized_expo": 0.5416666865348816, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 1632.038330078125, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.5668398677373642, |
|
"eval_dpo_loss": 733.09912109375, |
|
"eval_logits": -1.0226072072982788, |
|
"eval_logps": -82.79534149169922, |
|
"eval_loss": 1500.8583984375, |
|
"eval_objective": 1492.8040771484375, |
|
"eval_ranking_idealized": 0.5092975497245789, |
|
"eval_ranking_idealized_expo": 0.5092975497245789, |
|
"eval_ranking_simple": 0.5072314143180847, |
|
"eval_regularize": 1492.8040771484375, |
|
"eval_runtime": 260.8895, |
|
"eval_samples_per_second": 22.193, |
|
"eval_steps_per_second": 0.928, |
|
"step": 100 |
|
}, |
|
{ |
|
"dpo_loss": 821.6388549804688, |
|
"epoch": 0.5951818611242324, |
|
"grad_norm": 78646.58325790247, |
|
"learning_rate": 4.994318112090048e-06, |
|
"logits": -0.8470949530601501, |
|
"logps": -78.14350891113281, |
|
"loss": 1588.0706, |
|
"objective": 1580.76416015625, |
|
"ranking_idealized": 0.5333333611488342, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.5333333611488342, |
|
"regularize": 1580.76416015625, |
|
"step": 105 |
|
}, |
|
{ |
|
"dpo_loss": 813.548583984375, |
|
"epoch": 0.6235238545111006, |
|
"grad_norm": 80078.74787055497, |
|
"learning_rate": 4.990486745229364e-06, |
|
"logits": -0.9708471298217773, |
|
"logps": -76.51264953613281, |
|
"loss": 1625.5101, |
|
"objective": 1670.44873046875, |
|
"ranking_idealized": 0.44999998807907104, |
|
"ranking_idealized_expo": 0.44999998807907104, |
|
"ranking_simple": 0.4416666626930237, |
|
"regularize": 1670.44873046875, |
|
"step": 110 |
|
}, |
|
{ |
|
"dpo_loss": 967.8834838867188, |
|
"epoch": 0.6518658478979689, |
|
"grad_norm": 81186.63407260107, |
|
"learning_rate": 4.985675754429744e-06, |
|
"logits": -0.9930670261383057, |
|
"logps": -75.4658432006836, |
|
"loss": 1605.4639, |
|
"objective": 1732.50439453125, |
|
"ranking_idealized": 0.4833333194255829, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.4749999940395355, |
|
"regularize": 1732.50439453125, |
|
"step": 115 |
|
}, |
|
{ |
|
"dpo_loss": 781.5300903320312, |
|
"epoch": 0.680207841284837, |
|
"grad_norm": 73919.01988932796, |
|
"learning_rate": 4.9798870320769884e-06, |
|
"logits": -0.9557650685310364, |
|
"logps": -76.91903686523438, |
|
"loss": 1498.1906, |
|
"objective": 1478.94482421875, |
|
"ranking_idealized": 0.5333333611488342, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.5249999761581421, |
|
"regularize": 1478.94482421875, |
|
"step": 120 |
|
}, |
|
{ |
|
"dpo_loss": 951.0021362304688, |
|
"epoch": 0.7085498346717053, |
|
"grad_norm": 72729.38636733989, |
|
"learning_rate": 4.973122855144066e-06, |
|
"logits": -0.9417923092842102, |
|
"logps": -75.4057846069336, |
|
"loss": 1555.9701, |
|
"objective": 1580.7845458984375, |
|
"ranking_idealized": 0.550000011920929, |
|
"ranking_idealized_expo": 0.550000011920929, |
|
"ranking_simple": 0.5333333611488342, |
|
"regularize": 1580.7845458984375, |
|
"step": 125 |
|
}, |
|
{ |
|
"dpo_loss": 830.5226440429688, |
|
"epoch": 0.7368918280585735, |
|
"grad_norm": 76215.9858570087, |
|
"learning_rate": 4.965385884295467e-06, |
|
"logits": -1.0607830286026, |
|
"logps": -75.56150817871094, |
|
"loss": 1535.5706, |
|
"objective": 1588.941650390625, |
|
"ranking_idealized": 0.44999998807907104, |
|
"ranking_idealized_expo": 0.44999998807907104, |
|
"ranking_simple": 0.4749999940395355, |
|
"regularize": 1588.941650390625, |
|
"step": 130 |
|
}, |
|
{ |
|
"dpo_loss": 714.103271484375, |
|
"epoch": 0.7652338214454416, |
|
"grad_norm": 72632.27368489993, |
|
"learning_rate": 4.956679162840646e-06, |
|
"logits": -0.9477968215942383, |
|
"logps": -78.04027557373047, |
|
"loss": 1558.5909, |
|
"objective": 1568.9949951171875, |
|
"ranking_idealized": 0.5291666388511658, |
|
"ranking_idealized_expo": 0.5291666388511658, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 1568.9949951171875, |
|
"step": 135 |
|
}, |
|
{ |
|
"dpo_loss": 747.6431884765625, |
|
"epoch": 0.7935758148323099, |
|
"grad_norm": 70212.75101987133, |
|
"learning_rate": 4.947006115536947e-06, |
|
"logits": -0.7389863729476929, |
|
"logps": -78.2565689086914, |
|
"loss": 1619.474, |
|
"objective": 1611.41845703125, |
|
"ranking_idealized": 0.5041666626930237, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.5083333253860474, |
|
"regularize": 1611.41845703125, |
|
"step": 140 |
|
}, |
|
{ |
|
"dpo_loss": 722.9232788085938, |
|
"epoch": 0.821917808219178, |
|
"grad_norm": 68650.23095680359, |
|
"learning_rate": 4.9363705472424825e-06, |
|
"logits": -0.7260185480117798, |
|
"logps": -78.47148132324219, |
|
"loss": 1489.2668, |
|
"objective": 1463.8128662109375, |
|
"ranking_idealized": 0.4833333194255829, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.4749999940395355, |
|
"regularize": 1463.8128662109375, |
|
"step": 145 |
|
}, |
|
{ |
|
"dpo_loss": 799.6663818359375, |
|
"epoch": 0.8502598016060463, |
|
"grad_norm": 70266.02356268882, |
|
"learning_rate": 4.924776641419513e-06, |
|
"logits": -0.7246825695037842, |
|
"logps": -77.73950958251953, |
|
"loss": 1544.2886, |
|
"objective": 1557.9066162109375, |
|
"ranking_idealized": 0.5375000238418579, |
|
"ranking_idealized_expo": 0.5375000238418579, |
|
"ranking_simple": 0.5375000238418579, |
|
"regularize": 1557.9066162109375, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.8502598016060463, |
|
"eval_dpo_loss": 936.6275634765625, |
|
"eval_logits": -0.8486141562461853, |
|
"eval_logps": -83.39346313476562, |
|
"eval_loss": 1796.87939453125, |
|
"eval_objective": 1837.2159423828125, |
|
"eval_ranking_idealized": 0.5092975497245789, |
|
"eval_ranking_idealized_expo": 0.5092975497245789, |
|
"eval_ranking_simple": 0.49896693229675293, |
|
"eval_regularize": 1837.2159423828125, |
|
"eval_runtime": 259.8051, |
|
"eval_samples_per_second": 22.286, |
|
"eval_steps_per_second": 0.931, |
|
"step": 150 |
|
}, |
|
{ |
|
"dpo_loss": 733.1316528320312, |
|
"epoch": 0.8786017949929145, |
|
"grad_norm": 70225.42078139378, |
|
"learning_rate": 4.9122289584888926e-06, |
|
"logits": -0.8346138596534729, |
|
"logps": -76.66129302978516, |
|
"loss": 1527.9433, |
|
"objective": 1578.56982421875, |
|
"ranking_idealized": 0.512499988079071, |
|
"ranking_idealized_expo": 0.512499988079071, |
|
"ranking_simple": 0.5208333134651184, |
|
"regularize": 1578.56982421875, |
|
"step": 155 |
|
}, |
|
{ |
|
"dpo_loss": 784.9417724609375, |
|
"epoch": 0.9069437883797827, |
|
"grad_norm": 74170.24944845453, |
|
"learning_rate": 4.8987324340362445e-06, |
|
"logits": -0.8054352402687073, |
|
"logps": -76.35670471191406, |
|
"loss": 1490.3339, |
|
"objective": 1511.6434326171875, |
|
"ranking_idealized": 0.4958333373069763, |
|
"ranking_idealized_expo": 0.4958333373069763, |
|
"ranking_simple": 0.49166667461395264, |
|
"regularize": 1511.6434326171875, |
|
"step": 160 |
|
}, |
|
{ |
|
"dpo_loss": 794.072998046875, |
|
"epoch": 0.9352857817666509, |
|
"grad_norm": 72453.13771553633, |
|
"learning_rate": 4.884292376870567e-06, |
|
"logits": -0.7948398590087891, |
|
"logps": -77.8802490234375, |
|
"loss": 1440.4338, |
|
"objective": 1524.854248046875, |
|
"ranking_idealized": 0.5208333134651184, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.512499988079071, |
|
"regularize": 1524.854248046875, |
|
"step": 165 |
|
}, |
|
{ |
|
"dpo_loss": 855.9963989257812, |
|
"epoch": 0.9636277751535192, |
|
"grad_norm": 72841.32203749125, |
|
"learning_rate": 4.868914466936038e-06, |
|
"logits": -0.7872663140296936, |
|
"logps": -78.33912658691406, |
|
"loss": 1525.1104, |
|
"objective": 1553.3717041015625, |
|
"ranking_idealized": 0.5083333253860474, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.5291666388511658, |
|
"regularize": 1553.3717041015625, |
|
"step": 170 |
|
}, |
|
{ |
|
"dpo_loss": 774.2244262695312, |
|
"epoch": 0.9919697685403873, |
|
"grad_norm": 70428.90742393199, |
|
"learning_rate": 4.8526047530778175e-06, |
|
"logits": -0.7365032434463501, |
|
"logps": -77.8334732055664, |
|
"loss": 1490.3975, |
|
"objective": 1444.81201171875, |
|
"ranking_idealized": 0.574999988079071, |
|
"ranking_idealized_expo": 0.574999988079071, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 1444.81201171875, |
|
"step": 175 |
|
}, |
|
{ |
|
"dpo_loss": 819.1224975585938, |
|
"epoch": 1.0203117619272555, |
|
"grad_norm": 68862.05104644371, |
|
"learning_rate": 4.835369650662767e-06, |
|
"logits": -0.7541021108627319, |
|
"logps": -79.26795959472656, |
|
"loss": 1489.1468, |
|
"objective": 1596.475830078125, |
|
"ranking_idealized": 0.5416666865348816, |
|
"ranking_idealized_expo": 0.5416666865348816, |
|
"ranking_simple": 0.5458333492279053, |
|
"regularize": 1596.475830078125, |
|
"step": 180 |
|
}, |
|
{ |
|
"dpo_loss": 750.1921997070312, |
|
"epoch": 1.0486537553141237, |
|
"grad_norm": 68584.6258680744, |
|
"learning_rate": 4.817215939055984e-06, |
|
"logits": -0.6492658853530884, |
|
"logps": -76.89971160888672, |
|
"loss": 1358.0238, |
|
"objective": 1361.669921875, |
|
"ranking_idealized": 0.4833333194255829, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.47083333134651184, |
|
"regularize": 1361.669921875, |
|
"step": 185 |
|
}, |
|
{ |
|
"dpo_loss": 857.0948486328125, |
|
"epoch": 1.076995748700992, |
|
"grad_norm": 66212.13495810713, |
|
"learning_rate": 4.798150758954164e-06, |
|
"logits": -0.7034427523612976, |
|
"logps": -77.8251953125, |
|
"loss": 1414.8312, |
|
"objective": 1527.893310546875, |
|
"ranking_idealized": 0.550000011920929, |
|
"ranking_idealized_expo": 0.550000011920929, |
|
"ranking_simple": 0.5625, |
|
"regularize": 1527.893310546875, |
|
"step": 190 |
|
}, |
|
{ |
|
"dpo_loss": 726.6162719726562, |
|
"epoch": 1.10533774208786, |
|
"grad_norm": 69617.2929524415, |
|
"learning_rate": 4.778181609576832e-06, |
|
"logits": -0.7142274379730225, |
|
"logps": -76.12057495117188, |
|
"loss": 1356.5107, |
|
"objective": 1371.98046875, |
|
"ranking_idealized": 0.5666666626930237, |
|
"ranking_idealized_expo": 0.5666666626930237, |
|
"ranking_simple": 0.5708333253860474, |
|
"regularize": 1371.98046875, |
|
"step": 195 |
|
}, |
|
{ |
|
"dpo_loss": 723.1171875, |
|
"epoch": 1.1336797354747283, |
|
"grad_norm": 68913.9279715263, |
|
"learning_rate": 4.757316345716554e-06, |
|
"logits": -0.7645713686943054, |
|
"logps": -76.01897430419922, |
|
"loss": 1387.1779, |
|
"objective": 1441.4329833984375, |
|
"ranking_idealized": 0.5083333253860474, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.5, |
|
"regularize": 1441.4329833984375, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.1336797354747283, |
|
"eval_dpo_loss": 1018.1060180664062, |
|
"eval_logits": -0.801040768623352, |
|
"eval_logps": -81.00394439697266, |
|
"eval_loss": 1946.64453125, |
|
"eval_objective": 1988.5870361328125, |
|
"eval_ranking_idealized": 0.5092975497245789, |
|
"eval_ranking_idealized_expo": 0.5092975497245789, |
|
"eval_ranking_simple": 0.5010330677032471, |
|
"eval_regularize": 1988.5870361328125, |
|
"eval_runtime": 261.3337, |
|
"eval_samples_per_second": 22.156, |
|
"eval_steps_per_second": 0.926, |
|
"step": 200 |
|
}, |
|
{ |
|
"dpo_loss": 628.9264526367188, |
|
"epoch": 1.1620217288615966, |
|
"grad_norm": 63479.38452604848, |
|
"learning_rate": 4.735563174649278e-06, |
|
"logits": -0.7396198511123657, |
|
"logps": -76.6708755493164, |
|
"loss": 1375.7494, |
|
"objective": 1379.516845703125, |
|
"ranking_idealized": 0.48750001192092896, |
|
"ranking_idealized_expo": 0.48750001192092896, |
|
"ranking_simple": 0.512499988079071, |
|
"regularize": 1379.516845703125, |
|
"step": 205 |
|
}, |
|
{ |
|
"dpo_loss": 673.1471557617188, |
|
"epoch": 1.1903637222484649, |
|
"grad_norm": 64539.915055366386, |
|
"learning_rate": 4.7129306529060415e-06, |
|
"logits": -0.721174955368042, |
|
"logps": -77.625732421875, |
|
"loss": 1383.7545, |
|
"objective": 1384.804443359375, |
|
"ranking_idealized": 0.4958333373069763, |
|
"ranking_idealized_expo": 0.4958333373069763, |
|
"ranking_simple": 0.5083333253860474, |
|
"regularize": 1384.804443359375, |
|
"step": 210 |
|
}, |
|
{ |
|
"dpo_loss": 747.7578125, |
|
"epoch": 1.2187057156353331, |
|
"grad_norm": 62227.15430087378, |
|
"learning_rate": 4.68942768290728e-06, |
|
"logits": -0.6516799926757812, |
|
"logps": -77.21683502197266, |
|
"loss": 1315.6651, |
|
"objective": 1423.86669921875, |
|
"ranking_idealized": 0.4833333194255829, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.4749999940395355, |
|
"regularize": 1423.86669921875, |
|
"step": 215 |
|
}, |
|
{ |
|
"dpo_loss": 679.1942749023438, |
|
"epoch": 1.2470477090222012, |
|
"grad_norm": 74804.77662673105, |
|
"learning_rate": 4.665063509461098e-06, |
|
"logits": -0.5166414976119995, |
|
"logps": -77.51664733886719, |
|
"loss": 1282.1909, |
|
"objective": 1365.6038818359375, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.6000000238418579, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 1365.6038818359375, |
|
"step": 220 |
|
}, |
|
{ |
|
"dpo_loss": 573.4988403320312, |
|
"epoch": 1.2753897024090695, |
|
"grad_norm": 68013.96758028683, |
|
"learning_rate": 4.639847716126855e-06, |
|
"logits": -0.6740496754646301, |
|
"logps": -78.64627075195312, |
|
"loss": 1277.235, |
|
"objective": 1197.0047607421875, |
|
"ranking_idealized": 0.5291666388511658, |
|
"ranking_idealized_expo": 0.5291666388511658, |
|
"ranking_simple": 0.5208333134651184, |
|
"regularize": 1197.0047607421875, |
|
"step": 225 |
|
}, |
|
{ |
|
"dpo_loss": 539.927978515625, |
|
"epoch": 1.3037316957959377, |
|
"grad_norm": 63535.71644157263, |
|
"learning_rate": 4.613790221445511e-06, |
|
"logits": -0.649699866771698, |
|
"logps": -77.72300720214844, |
|
"loss": 1240.6593, |
|
"objective": 1214.3502197265625, |
|
"ranking_idealized": 0.5083333253860474, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 1214.3502197265625, |
|
"step": 230 |
|
}, |
|
{ |
|
"dpo_loss": 614.890380859375, |
|
"epoch": 1.3320736891828058, |
|
"grad_norm": 63907.397620999705, |
|
"learning_rate": 4.586901275038201e-06, |
|
"logits": -0.7215268611907959, |
|
"logps": -75.93678283691406, |
|
"loss": 1294.443, |
|
"objective": 1246.745849609375, |
|
"ranking_idealized": 0.4833333194255829, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.5041666626930237, |
|
"regularize": 1246.745849609375, |
|
"step": 235 |
|
}, |
|
{ |
|
"dpo_loss": 674.669189453125, |
|
"epoch": 1.360415682569674, |
|
"grad_norm": 58847.80898438854, |
|
"learning_rate": 4.559191453574582e-06, |
|
"logits": -0.6891920566558838, |
|
"logps": -76.02350616455078, |
|
"loss": 1277.4793, |
|
"objective": 1229.0069580078125, |
|
"ranking_idealized": 0.4791666567325592, |
|
"ranking_idealized_expo": 0.4791666567325592, |
|
"ranking_simple": 0.4791666567325592, |
|
"regularize": 1229.0069580078125, |
|
"step": 240 |
|
}, |
|
{ |
|
"dpo_loss": 682.5062866210938, |
|
"epoch": 1.3887576759565423, |
|
"grad_norm": 61032.25600688672, |
|
"learning_rate": 4.530671656612544e-06, |
|
"logits": -0.7168589234352112, |
|
"logps": -76.41120910644531, |
|
"loss": 1283.5139, |
|
"objective": 1295.427490234375, |
|
"ranking_idealized": 0.5166666507720947, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5208333134651184, |
|
"regularize": 1295.427490234375, |
|
"step": 245 |
|
}, |
|
{ |
|
"dpo_loss": 529.6734008789062, |
|
"epoch": 1.4170996693434104, |
|
"grad_norm": 64433.112029992735, |
|
"learning_rate": 4.501353102310901e-06, |
|
"logits": -0.7715837955474854, |
|
"logps": -74.85303497314453, |
|
"loss": 1257.5858, |
|
"objective": 1227.3660888671875, |
|
"ranking_idealized": 0.47083333134651184, |
|
"ranking_idealized_expo": 0.47083333134651184, |
|
"ranking_simple": 0.4791666567325592, |
|
"regularize": 1227.3660888671875, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.4170996693434104, |
|
"eval_dpo_loss": 1059.197265625, |
|
"eval_logits": -0.8937114477157593, |
|
"eval_logps": -79.56282043457031, |
|
"eval_loss": 2056.783447265625, |
|
"eval_objective": 2078.739990234375, |
|
"eval_ranking_idealized": 0.5092975497245789, |
|
"eval_ranking_idealized_expo": 0.5092975497245789, |
|
"eval_ranking_simple": 0.5030992031097412, |
|
"eval_regularize": 2078.739990234375, |
|
"eval_runtime": 260.3832, |
|
"eval_samples_per_second": 22.236, |
|
"eval_steps_per_second": 0.929, |
|
"step": 250 |
|
}, |
|
{ |
|
"dpo_loss": 606.7435913085938, |
|
"epoch": 1.4454416627302786, |
|
"grad_norm": 60772.290582903894, |
|
"learning_rate": 4.4712473230167775e-06, |
|
"logits": -0.7840810418128967, |
|
"logps": -76.13771057128906, |
|
"loss": 1227.542, |
|
"objective": 1233.7110595703125, |
|
"ranking_idealized": 0.5458333492279053, |
|
"ranking_idealized_expo": 0.5458333492279053, |
|
"ranking_simple": 0.5416666865348816, |
|
"regularize": 1233.7110595703125, |
|
"step": 255 |
|
}, |
|
{ |
|
"dpo_loss": 675.6203002929688, |
|
"epoch": 1.473783656117147, |
|
"grad_norm": 61075.39269116474, |
|
"learning_rate": 4.440366160729393e-06, |
|
"logits": -0.7470805644989014, |
|
"logps": -77.35476684570312, |
|
"loss": 1195.1969, |
|
"objective": 1320.491455078125, |
|
"ranking_idealized": 0.48750001192092896, |
|
"ranking_idealized_expo": 0.48750001192092896, |
|
"ranking_simple": 0.5, |
|
"regularize": 1320.491455078125, |
|
"step": 260 |
|
}, |
|
{ |
|
"dpo_loss": 581.8402099609375, |
|
"epoch": 1.5021256495040152, |
|
"grad_norm": 63860.91658867988, |
|
"learning_rate": 4.4087217624420595e-06, |
|
"logits": -0.7068949937820435, |
|
"logps": -75.44217681884766, |
|
"loss": 1146.6147, |
|
"objective": 1054.690673828125, |
|
"ranking_idealized": 0.5041666626930237, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.4958333373069763, |
|
"regularize": 1054.690673828125, |
|
"step": 265 |
|
}, |
|
{ |
|
"dpo_loss": 614.632080078125, |
|
"epoch": 1.5304676428908834, |
|
"grad_norm": 60024.06688450634, |
|
"learning_rate": 4.376326575364206e-06, |
|
"logits": -0.7246025204658508, |
|
"logps": -75.80039978027344, |
|
"loss": 1165.9641, |
|
"objective": 1283.4388427734375, |
|
"ranking_idealized": 0.5041666626930237, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.5208333134651184, |
|
"regularize": 1283.4388427734375, |
|
"step": 270 |
|
}, |
|
{ |
|
"dpo_loss": 554.4395751953125, |
|
"epoch": 1.5588096362777515, |
|
"grad_norm": 60695.49838031304, |
|
"learning_rate": 4.34319334202531e-06, |
|
"logits": -0.7526471018791199, |
|
"logps": -74.80017852783203, |
|
"loss": 1087.4088, |
|
"objective": 1118.8092041015625, |
|
"ranking_idealized": 0.5249999761581421, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.5208333134651184, |
|
"regularize": 1118.8092041015625, |
|
"step": 275 |
|
}, |
|
{ |
|
"dpo_loss": 546.6119995117188, |
|
"epoch": 1.5871516296646198, |
|
"grad_norm": 60429.09566571946, |
|
"learning_rate": 4.309335095262675e-06, |
|
"logits": -0.673143208026886, |
|
"logps": -72.87140655517578, |
|
"loss": 1104.0904, |
|
"objective": 1092.2620849609375, |
|
"ranking_idealized": 0.5041666626930237, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.4958333373069763, |
|
"regularize": 1092.2620849609375, |
|
"step": 280 |
|
}, |
|
{ |
|
"dpo_loss": 584.6941528320312, |
|
"epoch": 1.615493623051488, |
|
"grad_norm": 61625.27955622327, |
|
"learning_rate": 4.274765153095008e-06, |
|
"logits": -0.687822163105011, |
|
"logps": -74.89695739746094, |
|
"loss": 1180.4073, |
|
"objective": 1178.606689453125, |
|
"ranking_idealized": 0.5166666507720947, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.512499988079071, |
|
"regularize": 1178.606689453125, |
|
"step": 285 |
|
}, |
|
{ |
|
"dpo_loss": 505.9402770996094, |
|
"epoch": 1.643835616438356, |
|
"grad_norm": 57020.12431127765, |
|
"learning_rate": 4.239497113483819e-06, |
|
"logits": -0.6894593834877014, |
|
"logps": -73.80435943603516, |
|
"loss": 1135.5879, |
|
"objective": 1057.634765625, |
|
"ranking_idealized": 0.5625, |
|
"ranking_idealized_expo": 0.5625, |
|
"ranking_simple": 0.574999988079071, |
|
"regularize": 1057.634765625, |
|
"step": 290 |
|
}, |
|
{ |
|
"dpo_loss": 584.1075439453125, |
|
"epoch": 1.6721776098252243, |
|
"grad_norm": 57611.18104908386, |
|
"learning_rate": 4.203544848984729e-06, |
|
"logits": -0.6624549031257629, |
|
"logps": -72.43326568603516, |
|
"loss": 1093.2101, |
|
"objective": 1054.095458984375, |
|
"ranking_idealized": 0.4791666567325592, |
|
"ranking_idealized_expo": 0.4791666567325592, |
|
"ranking_simple": 0.49166667461395264, |
|
"regularize": 1054.095458984375, |
|
"step": 295 |
|
}, |
|
{ |
|
"dpo_loss": 577.9215698242188, |
|
"epoch": 1.7005196032120926, |
|
"grad_norm": 64211.25163104252, |
|
"learning_rate": 4.16692250129073e-06, |
|
"logits": -0.675134003162384, |
|
"logps": -74.83804321289062, |
|
"loss": 1062.9078, |
|
"objective": 1094.4080810546875, |
|
"ranking_idealized": 0.574999988079071, |
|
"ranking_idealized_expo": 0.574999988079071, |
|
"ranking_simple": 0.550000011920929, |
|
"regularize": 1094.4080810546875, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.7005196032120926, |
|
"eval_dpo_loss": 1115.767822265625, |
|
"eval_logits": -0.7208537459373474, |
|
"eval_logps": -79.72731018066406, |
|
"eval_loss": 2170.694580078125, |
|
"eval_objective": 2202.780517578125, |
|
"eval_ranking_idealized": 0.5092975497245789, |
|
"eval_ranking_idealized_expo": 0.5092975497245789, |
|
"eval_ranking_simple": 0.5030992031097412, |
|
"eval_regularize": 2202.780517578125, |
|
"eval_runtime": 260.4557, |
|
"eval_samples_per_second": 22.23, |
|
"eval_steps_per_second": 0.929, |
|
"step": 300 |
|
}, |
|
{ |
|
"dpo_loss": 499.92578125, |
|
"epoch": 1.7288615965989607, |
|
"grad_norm": 59997.71496488459, |
|
"learning_rate": 4.129644475669617e-06, |
|
"logits": -0.6386987566947937, |
|
"logps": -76.04856872558594, |
|
"loss": 1068.0816, |
|
"objective": 1115.4102783203125, |
|
"ranking_idealized": 0.5041666626930237, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.5083333253860474, |
|
"regularize": 1115.4102783203125, |
|
"step": 305 |
|
}, |
|
{ |
|
"dpo_loss": 489.96710205078125, |
|
"epoch": 1.7572035899858292, |
|
"grad_norm": 64953.71255296531, |
|
"learning_rate": 4.091725435297721e-06, |
|
"logits": -0.6985257267951965, |
|
"logps": -73.54243469238281, |
|
"loss": 1091.0647, |
|
"objective": 1090.1666259765625, |
|
"ranking_idealized": 0.5208333134651184, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 1090.1666259765625, |
|
"step": 310 |
|
}, |
|
{ |
|
"dpo_loss": 550.898681640625, |
|
"epoch": 1.7855455833726972, |
|
"grad_norm": 60495.13771709575, |
|
"learning_rate": 4.053180295492203e-06, |
|
"logits": -0.6273626685142517, |
|
"logps": -75.3206558227539, |
|
"loss": 1086.04, |
|
"objective": 1067.1929931640625, |
|
"ranking_idealized": 0.5083333253860474, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.512499988079071, |
|
"regularize": 1067.1929931640625, |
|
"step": 315 |
|
}, |
|
{ |
|
"dpo_loss": 710.1861572265625, |
|
"epoch": 1.8138875767595655, |
|
"grad_norm": 62915.79210205411, |
|
"learning_rate": 4.014024217844167e-06, |
|
"logits": -0.6406947374343872, |
|
"logps": -76.75931549072266, |
|
"loss": 1107.8994, |
|
"objective": 1157.3203125, |
|
"ranking_idealized": 0.5041666626930237, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.4833333194255829, |
|
"regularize": 1157.3203125, |
|
"step": 320 |
|
}, |
|
{ |
|
"dpo_loss": 698.9453125, |
|
"epoch": 1.8422295701464337, |
|
"grad_norm": 66764.29984408473, |
|
"learning_rate": 3.974272604254906e-06, |
|
"logits": -0.6487582921981812, |
|
"logps": -78.7199935913086, |
|
"loss": 1102.0305, |
|
"objective": 1170.1241455078125, |
|
"ranking_idealized": 0.5166666507720947, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.550000011920929, |
|
"regularize": 1170.1241455078125, |
|
"step": 325 |
|
}, |
|
{ |
|
"dpo_loss": 565.6978759765625, |
|
"epoch": 1.8705715635333018, |
|
"grad_norm": 61211.42685047196, |
|
"learning_rate": 3.933941090877615e-06, |
|
"logits": -0.547908365726471, |
|
"logps": -75.97573852539062, |
|
"loss": 1043.7004, |
|
"objective": 1092.4935302734375, |
|
"ranking_idealized": 0.47083333134651184, |
|
"ranking_idealized_expo": 0.47083333134651184, |
|
"ranking_simple": 0.4541666805744171, |
|
"regularize": 1092.4935302734375, |
|
"step": 330 |
|
}, |
|
{ |
|
"dpo_loss": 442.16241455078125, |
|
"epoch": 1.89891355692017, |
|
"grad_norm": 59387.646813099236, |
|
"learning_rate": 3.893045541966975e-06, |
|
"logits": -0.6546408534049988, |
|
"logps": -75.68651580810547, |
|
"loss": 1035.5035, |
|
"objective": 1078.9815673828125, |
|
"ranking_idealized": 0.4958333373069763, |
|
"ranking_idealized_expo": 0.4958333373069763, |
|
"ranking_simple": 0.5083333253860474, |
|
"regularize": 1078.9815673828125, |
|
"step": 335 |
|
}, |
|
{ |
|
"dpo_loss": 504.0133361816406, |
|
"epoch": 1.9272555503070383, |
|
"grad_norm": 65019.32677125802, |
|
"learning_rate": 3.8516020436389945e-06, |
|
"logits": -0.611504852771759, |
|
"logps": -79.22772979736328, |
|
"loss": 1007.5374, |
|
"objective": 1054.186767578125, |
|
"ranking_idealized": 0.5416666865348816, |
|
"ranking_idealized_expo": 0.5416666865348816, |
|
"ranking_simple": 0.5333333611488342, |
|
"regularize": 1054.186767578125, |
|
"step": 340 |
|
}, |
|
{ |
|
"dpo_loss": 512.2960815429688, |
|
"epoch": 1.9555975436939064, |
|
"grad_norm": 62729.616628268755, |
|
"learning_rate": 3.8096268975436045e-06, |
|
"logits": -0.6399423480033875, |
|
"logps": -77.9858627319336, |
|
"loss": 990.7236, |
|
"objective": 1057.446533203125, |
|
"ranking_idealized": 0.5333333611488342, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.5249999761581421, |
|
"regularize": 1057.446533203125, |
|
"step": 345 |
|
}, |
|
{ |
|
"dpo_loss": 415.81988525390625, |
|
"epoch": 1.9839395370807746, |
|
"grad_norm": 63823.03198972447, |
|
"learning_rate": 3.767136614452458e-06, |
|
"logits": -0.5935099124908447, |
|
"logps": -79.65575408935547, |
|
"loss": 1015.0369, |
|
"objective": 949.8970336914062, |
|
"ranking_idealized": 0.48750001192092896, |
|
"ranking_idealized_expo": 0.48750001192092896, |
|
"ranking_simple": 0.47083333134651184, |
|
"regularize": 949.8970336914062, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.9839395370807746, |
|
"eval_dpo_loss": 1156.4827880859375, |
|
"eval_logits": -0.673879086971283, |
|
"eval_logps": -83.59508514404297, |
|
"eval_loss": 2227.17138671875, |
|
"eval_objective": 2262.3740234375, |
|
"eval_ranking_idealized": 0.5092975497245789, |
|
"eval_ranking_idealized_expo": 0.5092975497245789, |
|
"eval_ranking_simple": 0.5123966932296753, |
|
"eval_regularize": 2262.3740234375, |
|
"eval_runtime": 260.1877, |
|
"eval_samples_per_second": 22.253, |
|
"eval_steps_per_second": 0.93, |
|
"step": 350 |
|
}, |
|
{ |
|
"dpo_loss": 531.9288940429688, |
|
"epoch": 2.012281530467643, |
|
"grad_norm": 59707.22724571865, |
|
"learning_rate": 3.724147907764478e-06, |
|
"logits": -0.5535954236984253, |
|
"logps": -79.02854919433594, |
|
"loss": 973.0249, |
|
"objective": 1016.3368530273438, |
|
"ranking_idealized": 0.5083333253860474, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.5083333253860474, |
|
"regularize": 1016.3368530273438, |
|
"step": 355 |
|
}, |
|
{ |
|
"dpo_loss": 468.62908935546875, |
|
"epoch": 2.040623523854511, |
|
"grad_norm": 57289.87868431793, |
|
"learning_rate": 3.6806776869317074e-06, |
|
"logits": -0.5899576544761658, |
|
"logps": -77.1968002319336, |
|
"loss": 955.7221, |
|
"objective": 970.0728759765625, |
|
"ranking_idealized": 0.5291666388511658, |
|
"ranking_idealized_expo": 0.5291666388511658, |
|
"ranking_simple": 0.550000011920929, |
|
"regularize": 970.0728759765625, |
|
"step": 360 |
|
}, |
|
{ |
|
"dpo_loss": 416.2799072265625, |
|
"epoch": 2.0689655172413794, |
|
"grad_norm": 62539.981653631265, |
|
"learning_rate": 3.6367430508080283e-06, |
|
"logits": -0.6331656575202942, |
|
"logps": -80.54222106933594, |
|
"loss": 919.5296, |
|
"objective": 905.774169921875, |
|
"ranking_idealized": 0.5041666626930237, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.5208333134651184, |
|
"regularize": 905.774169921875, |
|
"step": 365 |
|
}, |
|
{ |
|
"dpo_loss": 487.22967529296875, |
|
"epoch": 2.0973075106282475, |
|
"grad_norm": 57687.52155159706, |
|
"learning_rate": 3.5923612809233987e-06, |
|
"logits": -0.5682953000068665, |
|
"logps": -77.53422546386719, |
|
"loss": 867.4084, |
|
"objective": 892.9639892578125, |
|
"ranking_idealized": 0.4833333194255829, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.49166667461395264, |
|
"regularize": 892.9639892578125, |
|
"step": 370 |
|
}, |
|
{ |
|
"dpo_loss": 387.1273193359375, |
|
"epoch": 2.1256495040151155, |
|
"grad_norm": 58841.64643600781, |
|
"learning_rate": 3.547549834686222e-06, |
|
"logits": -0.5675191879272461, |
|
"logps": -79.55015563964844, |
|
"loss": 896.5979, |
|
"objective": 880.7286376953125, |
|
"ranking_idealized": 0.5874999761581421, |
|
"ranking_idealized_expo": 0.5874999761581421, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 880.7286376953125, |
|
"step": 375 |
|
}, |
|
{ |
|
"dpo_loss": 408.3785095214844, |
|
"epoch": 2.153991497401984, |
|
"grad_norm": 58397.78972496958, |
|
"learning_rate": 3.5023263385165346e-06, |
|
"logits": -0.5271536707878113, |
|
"logps": -78.26580047607422, |
|
"loss": 892.9402, |
|
"objective": 895.1597900390625, |
|
"ranking_idealized": 0.5291666388511658, |
|
"ranking_idealized_expo": 0.5291666388511658, |
|
"ranking_simple": 0.5333333611488342, |
|
"regularize": 895.1597900390625, |
|
"step": 380 |
|
}, |
|
{ |
|
"dpo_loss": 378.6316833496094, |
|
"epoch": 2.182333490788852, |
|
"grad_norm": 57845.20228394267, |
|
"learning_rate": 3.4567085809127247e-06, |
|
"logits": -0.5461028218269348, |
|
"logps": -81.12430572509766, |
|
"loss": 854.0151, |
|
"objective": 875.6107177734375, |
|
"ranking_idealized": 0.5, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.5249999761581421, |
|
"regularize": 875.6107177734375, |
|
"step": 385 |
|
}, |
|
{ |
|
"dpo_loss": 431.4305114746094, |
|
"epoch": 2.21067548417572, |
|
"grad_norm": 58847.02033627762, |
|
"learning_rate": 3.410714505454486e-06, |
|
"logits": -0.5168668627738953, |
|
"logps": -79.27171325683594, |
|
"loss": 854.3453, |
|
"objective": 838.873779296875, |
|
"ranking_idealized": 0.4833333194255829, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.47083333134651184, |
|
"regularize": 838.873779296875, |
|
"step": 390 |
|
}, |
|
{ |
|
"dpo_loss": 492.15777587890625, |
|
"epoch": 2.2390174775625886, |
|
"grad_norm": 58173.621993324545, |
|
"learning_rate": 3.364362203744777e-06, |
|
"logits": -0.5718950629234314, |
|
"logps": -79.84632110595703, |
|
"loss": 891.3756, |
|
"objective": 879.3973388671875, |
|
"ranking_idealized": 0.5166666507720947, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 879.3973388671875, |
|
"step": 395 |
|
}, |
|
{ |
|
"dpo_loss": 420.82257080078125, |
|
"epoch": 2.2673594709494567, |
|
"grad_norm": 56625.7986223953, |
|
"learning_rate": 3.3176699082935546e-06, |
|
"logits": -0.6329799294471741, |
|
"logps": -81.30545806884766, |
|
"loss": 849.8354, |
|
"objective": 825.5804443359375, |
|
"ranking_idealized": 0.574999988079071, |
|
"ranking_idealized_expo": 0.574999988079071, |
|
"ranking_simple": 0.5958333611488342, |
|
"regularize": 825.5804443359375, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.2673594709494567, |
|
"eval_dpo_loss": 1124.7908935546875, |
|
"eval_logits": -0.6953743100166321, |
|
"eval_logps": -83.39958190917969, |
|
"eval_loss": 2210.667236328125, |
|
"eval_objective": 2238.018798828125, |
|
"eval_ranking_idealized": 0.5092975497245789, |
|
"eval_ranking_idealized_expo": 0.5092975497245789, |
|
"eval_ranking_simple": 0.5154958963394165, |
|
"eval_regularize": 2238.018798828125, |
|
"eval_runtime": 260.8763, |
|
"eval_samples_per_second": 22.194, |
|
"eval_steps_per_second": 0.928, |
|
"step": 400 |
|
}, |
|
{ |
|
"dpo_loss": 455.4064636230469, |
|
"epoch": 2.295701464336325, |
|
"grad_norm": 59678.323118492524, |
|
"learning_rate": 3.2706559853460818e-06, |
|
"logits": -0.6739230751991272, |
|
"logps": -79.11216735839844, |
|
"loss": 848.5012, |
|
"objective": 833.8142700195312, |
|
"ranking_idealized": 0.5583333373069763, |
|
"ranking_idealized_expo": 0.5583333373069763, |
|
"ranking_simple": 0.5708333253860474, |
|
"regularize": 833.8142700195312, |
|
"step": 405 |
|
}, |
|
{ |
|
"dpo_loss": 401.0616455078125, |
|
"epoch": 2.324043457723193, |
|
"grad_norm": 65764.3857558314, |
|
"learning_rate": 3.2233389276586325e-06, |
|
"logits": -0.596556544303894, |
|
"logps": -78.09807586669922, |
|
"loss": 830.7495, |
|
"objective": 797.6709594726562, |
|
"ranking_idealized": 0.4749999940395355, |
|
"ranking_idealized_expo": 0.4749999940395355, |
|
"ranking_simple": 0.4749999940395355, |
|
"regularize": 797.6709594726562, |
|
"step": 410 |
|
}, |
|
{ |
|
"dpo_loss": 449.0657653808594, |
|
"epoch": 2.3523854511100613, |
|
"grad_norm": 60576.92516292515, |
|
"learning_rate": 3.1757373472244324e-06, |
|
"logits": -0.6169402003288269, |
|
"logps": -77.96237182617188, |
|
"loss": 830.681, |
|
"objective": 843.7548217773438, |
|
"ranking_idealized": 0.5291666388511658, |
|
"ranking_idealized_expo": 0.5291666388511658, |
|
"ranking_simple": 0.5291666388511658, |
|
"regularize": 843.7548217773438, |
|
"step": 415 |
|
}, |
|
{ |
|
"dpo_loss": 407.09564208984375, |
|
"epoch": 2.3807274444969297, |
|
"grad_norm": 59864.322780894625, |
|
"learning_rate": 3.127869967952698e-06, |
|
"logits": -0.597426176071167, |
|
"logps": -78.8098373413086, |
|
"loss": 802.8715, |
|
"objective": 764.6652221679688, |
|
"ranking_idealized": 0.47083333134651184, |
|
"ranking_idealized_expo": 0.47083333134651184, |
|
"ranking_simple": 0.4583333432674408, |
|
"regularize": 764.6652221679688, |
|
"step": 420 |
|
}, |
|
{ |
|
"dpo_loss": 370.85565185546875, |
|
"epoch": 2.409069437883798, |
|
"grad_norm": 59177.15286602958, |
|
"learning_rate": 3.0797556183036582e-06, |
|
"logits": -0.6467216610908508, |
|
"logps": -77.39534759521484, |
|
"loss": 813.4841, |
|
"objective": 817.958251953125, |
|
"ranking_idealized": 0.4958333373069763, |
|
"ranking_idealized_expo": 0.4958333373069763, |
|
"ranking_simple": 0.5083333253860474, |
|
"regularize": 817.958251953125, |
|
"step": 425 |
|
}, |
|
{ |
|
"dpo_loss": 354.9871826171875, |
|
"epoch": 2.4374114312706663, |
|
"grad_norm": 59260.79608193281, |
|
"learning_rate": 3.0314132238824416e-06, |
|
"logits": -0.6649688482284546, |
|
"logps": -77.8988037109375, |
|
"loss": 765.4834, |
|
"objective": 690.4028930664062, |
|
"ranking_idealized": 0.5291666388511658, |
|
"ranking_idealized_expo": 0.5291666388511658, |
|
"ranking_simple": 0.5375000238418579, |
|
"regularize": 690.4028930664062, |
|
"step": 430 |
|
}, |
|
{ |
|
"dpo_loss": 338.4743347167969, |
|
"epoch": 2.4657534246575343, |
|
"grad_norm": 55351.556007813095, |
|
"learning_rate": 2.9828617999947647e-06, |
|
"logits": -0.6696223616600037, |
|
"logps": -77.22901916503906, |
|
"loss": 774.5823, |
|
"objective": 722.385009765625, |
|
"ranking_idealized": 0.5083333253860474, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.512499988079071, |
|
"regularize": 722.385009765625, |
|
"step": 435 |
|
}, |
|
{ |
|
"dpo_loss": 500.3384704589844, |
|
"epoch": 2.4940954180444024, |
|
"grad_norm": 61581.25711417744, |
|
"learning_rate": 2.9341204441673267e-06, |
|
"logits": -0.6167859435081482, |
|
"logps": -76.51101684570312, |
|
"loss": 785.1207, |
|
"objective": 844.1969604492188, |
|
"ranking_idealized": 0.5375000238418579, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.5291666388511658, |
|
"regularize": 844.1969604492188, |
|
"step": 440 |
|
}, |
|
{ |
|
"dpo_loss": 445.2440185546875, |
|
"epoch": 2.5224374114312704, |
|
"grad_norm": 59474.235147403386, |
|
"learning_rate": 2.8852083286358647e-06, |
|
"logits": -0.6244394779205322, |
|
"logps": -74.75468444824219, |
|
"loss": 735.9972, |
|
"objective": 698.7183227539062, |
|
"ranking_idealized": 0.5166666507720947, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.4958333373069763, |
|
"regularize": 698.7183227539062, |
|
"step": 445 |
|
}, |
|
{ |
|
"dpo_loss": 383.2066650390625, |
|
"epoch": 2.550779404818139, |
|
"grad_norm": 63593.63871540052, |
|
"learning_rate": 2.8361446928038298e-06, |
|
"logits": -0.6291276216506958, |
|
"logps": -76.71273040771484, |
|
"loss": 749.1392, |
|
"objective": 726.7490234375, |
|
"ranking_idealized": 0.5, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.49166667461395264, |
|
"regularize": 726.7490234375, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 2.550779404818139, |
|
"eval_dpo_loss": 1157.103515625, |
|
"eval_logits": -0.6204014420509338, |
|
"eval_logps": -80.84976196289062, |
|
"eval_loss": 2232.329833984375, |
|
"eval_objective": 2283.406982421875, |
|
"eval_ranking_idealized": 0.5092975497245789, |
|
"eval_ranking_idealized_expo": 0.5092975497245789, |
|
"eval_ranking_simple": 0.5134297609329224, |
|
"eval_regularize": 2283.406982421875, |
|
"eval_runtime": 260.1477, |
|
"eval_samples_per_second": 22.257, |
|
"eval_steps_per_second": 0.93, |
|
"step": 450 |
|
}, |
|
{ |
|
"dpo_loss": 370.9847106933594, |
|
"epoch": 2.579121398205007, |
|
"grad_norm": 58175.776810504, |
|
"learning_rate": 2.7869488356746344e-06, |
|
"logits": -0.5754743814468384, |
|
"logps": -77.64400482177734, |
|
"loss": 764.5896, |
|
"objective": 809.1742553710938, |
|
"ranking_idealized": 0.5, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.5083333253860474, |
|
"regularize": 809.1742553710938, |
|
"step": 455 |
|
}, |
|
{ |
|
"dpo_loss": 425.83538818359375, |
|
"epoch": 2.6074633915918755, |
|
"grad_norm": 56477.01767094451, |
|
"learning_rate": 2.7376401082604563e-06, |
|
"logits": -0.5983397364616394, |
|
"logps": -77.55906677246094, |
|
"loss": 744.7457, |
|
"objective": 810.4805908203125, |
|
"ranking_idealized": 0.512499988079071, |
|
"ranking_idealized_expo": 0.512499988079071, |
|
"ranking_simple": 0.5333333611488342, |
|
"regularize": 810.4805908203125, |
|
"step": 460 |
|
}, |
|
{ |
|
"dpo_loss": 349.92388916015625, |
|
"epoch": 2.6358053849787435, |
|
"grad_norm": 58522.655824615445, |
|
"learning_rate": 2.6882379059705953e-06, |
|
"logits": -0.5105490684509277, |
|
"logps": -77.51533508300781, |
|
"loss": 699.5353, |
|
"objective": 748.3710327148438, |
|
"ranking_idealized": 0.4958333373069763, |
|
"ranking_idealized_expo": 0.4958333373069763, |
|
"ranking_simple": 0.5083333253860474, |
|
"regularize": 748.3710327148438, |
|
"step": 465 |
|
}, |
|
{ |
|
"dpo_loss": 312.98828125, |
|
"epoch": 2.6641473783656116, |
|
"grad_norm": 58397.15682461157, |
|
"learning_rate": 2.6387616609823506e-06, |
|
"logits": -0.5410483479499817, |
|
"logps": -76.95088195800781, |
|
"loss": 705.2115, |
|
"objective": 725.9006958007812, |
|
"ranking_idealized": 0.550000011920929, |
|
"ranking_idealized_expo": 0.550000011920929, |
|
"ranking_simple": 0.5458333492279053, |
|
"regularize": 725.9006958007812, |
|
"step": 470 |
|
}, |
|
{ |
|
"dpo_loss": 282.96087646484375, |
|
"epoch": 2.69248937175248, |
|
"grad_norm": 60283.35669741736, |
|
"learning_rate": 2.5892308345974517e-06, |
|
"logits": -0.5889594554901123, |
|
"logps": -76.49653625488281, |
|
"loss": 705.286, |
|
"objective": 678.2340698242188, |
|
"ranking_idealized": 0.4958333373069763, |
|
"ranking_idealized_expo": 0.4958333373069763, |
|
"ranking_simple": 0.49166667461395264, |
|
"regularize": 678.2340698242188, |
|
"step": 475 |
|
}, |
|
{ |
|
"dpo_loss": 387.2418518066406, |
|
"epoch": 2.720831365139348, |
|
"grad_norm": 58143.27861764097, |
|
"learning_rate": 2.53966490958702e-06, |
|
"logits": -0.6299055814743042, |
|
"logps": -76.01927185058594, |
|
"loss": 694.6192, |
|
"objective": 725.1319580078125, |
|
"ranking_idealized": 0.5, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.5, |
|
"regularize": 725.1319580078125, |
|
"step": 480 |
|
}, |
|
{ |
|
"dpo_loss": 366.7168884277344, |
|
"epoch": 2.7491733585262166, |
|
"grad_norm": 57418.84427310581, |
|
"learning_rate": 2.490083382528097e-06, |
|
"logits": -0.5646944642066956, |
|
"logps": -78.08667755126953, |
|
"loss": 658.8851, |
|
"objective": 672.9464721679688, |
|
"ranking_idealized": 0.5208333134651184, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 672.9464721679688, |
|
"step": 485 |
|
}, |
|
{ |
|
"dpo_loss": 299.7682800292969, |
|
"epoch": 2.7775153519130846, |
|
"grad_norm": 57617.14262224855, |
|
"learning_rate": 2.440505756134732e-06, |
|
"logits": -0.5018786787986755, |
|
"logps": -76.6084976196289, |
|
"loss": 710.2159, |
|
"objective": 683.9637451171875, |
|
"ranking_idealized": 0.47083333134651184, |
|
"ranking_idealized_expo": 0.47083333134651184, |
|
"ranking_simple": 0.47083333134651184, |
|
"regularize": 683.9637451171875, |
|
"step": 490 |
|
}, |
|
{ |
|
"dpo_loss": 372.53076171875, |
|
"epoch": 2.8058573452999527, |
|
"grad_norm": 57177.66074623478, |
|
"learning_rate": 2.3909515315866606e-06, |
|
"logits": -0.543052077293396, |
|
"logps": -74.97837829589844, |
|
"loss": 666.0089, |
|
"objective": 707.5836181640625, |
|
"ranking_idealized": 0.4791666567325592, |
|
"ranking_idealized_expo": 0.4791666567325592, |
|
"ranking_simple": 0.4625000059604645, |
|
"regularize": 707.5836181640625, |
|
"step": 495 |
|
}, |
|
{ |
|
"dpo_loss": 402.10662841796875, |
|
"epoch": 2.8341993386868207, |
|
"grad_norm": 57068.622360461784, |
|
"learning_rate": 2.341440200858589e-06, |
|
"logits": -0.5944234132766724, |
|
"logps": -74.34591674804688, |
|
"loss": 663.6063, |
|
"objective": 650.8515625, |
|
"ranking_idealized": 0.5208333134651184, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.512499988079071, |
|
"regularize": 650.8515625, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.8341993386868207, |
|
"eval_dpo_loss": 1152.4822998046875, |
|
"eval_logits": -0.6463432908058167, |
|
"eval_logps": -81.1036148071289, |
|
"eval_loss": 2235.325439453125, |
|
"eval_objective": 2277.773681640625, |
|
"eval_ranking_idealized": 0.5092975497245789, |
|
"eval_ranking_idealized_expo": 0.5092975497245789, |
|
"eval_ranking_simple": 0.5082644820213318, |
|
"eval_regularize": 2277.773681640625, |
|
"eval_runtime": 262.0167, |
|
"eval_samples_per_second": 22.098, |
|
"eval_steps_per_second": 0.924, |
|
"step": 500 |
|
}, |
|
{ |
|
"dpo_loss": 277.9737854003906, |
|
"epoch": 2.862541332073689, |
|
"grad_norm": 57899.221094649656, |
|
"learning_rate": 2.2919912390530945e-06, |
|
"logits": -0.5871773362159729, |
|
"logps": -76.18864440917969, |
|
"loss": 671.9541, |
|
"objective": 656.1866455078125, |
|
"ranking_idealized": 0.5083333253860474, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.5249999761581421, |
|
"regularize": 656.1866455078125, |
|
"step": 505 |
|
}, |
|
{ |
|
"dpo_loss": 308.4158935546875, |
|
"epoch": 2.8908833254605573, |
|
"grad_norm": 61442.9981374047, |
|
"learning_rate": 2.242624096740164e-06, |
|
"logits": -0.6056591272354126, |
|
"logps": -76.62039947509766, |
|
"loss": 651.3555, |
|
"objective": 650.9279174804688, |
|
"ranking_idealized": 0.5249999761581421, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.5375000238418579, |
|
"regularize": 650.9279174804688, |
|
"step": 510 |
|
}, |
|
{ |
|
"dpo_loss": 333.2612609863281, |
|
"epoch": 2.9192253188474258, |
|
"grad_norm": 65234.87039662293, |
|
"learning_rate": 2.193358192306384e-06, |
|
"logits": -0.677362322807312, |
|
"logps": -76.61473846435547, |
|
"loss": 644.7175, |
|
"objective": 633.654296875, |
|
"ranking_idealized": 0.4791666567325592, |
|
"ranking_idealized_expo": 0.4791666567325592, |
|
"ranking_simple": 0.4791666567325592, |
|
"regularize": 633.654296875, |
|
"step": 515 |
|
}, |
|
{ |
|
"dpo_loss": 290.90093994140625, |
|
"epoch": 2.947567312234294, |
|
"grad_norm": 62487.68797546038, |
|
"learning_rate": 2.1442129043167877e-06, |
|
"logits": -0.6403795480728149, |
|
"logps": -78.69046020507812, |
|
"loss": 617.916, |
|
"objective": 615.7841186523438, |
|
"ranking_idealized": 0.5083333253860474, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.5041666626930237, |
|
"regularize": 615.7841186523438, |
|
"step": 520 |
|
}, |
|
{ |
|
"dpo_loss": 285.1398010253906, |
|
"epoch": 2.975909305621162, |
|
"grad_norm": 57603.254740385855, |
|
"learning_rate": 2.0952075638923656e-06, |
|
"logits": -0.631269633769989, |
|
"logps": -77.06267547607422, |
|
"loss": 600.514, |
|
"objective": 611.5770874023438, |
|
"ranking_idealized": 0.5666666626930237, |
|
"ranking_idealized_expo": 0.5666666626930237, |
|
"ranking_simple": 0.5541666746139526, |
|
"regularize": 611.5770874023438, |
|
"step": 525 |
|
}, |
|
{ |
|
"dpo_loss": 257.36700439453125, |
|
"epoch": 3.0042512990080303, |
|
"grad_norm": 58200.63176299685, |
|
"learning_rate": 2.046361447106244e-06, |
|
"logits": -0.58321613073349, |
|
"logps": -76.48860168457031, |
|
"loss": 561.4095, |
|
"objective": 531.5440063476562, |
|
"ranking_idealized": 0.512499988079071, |
|
"ranking_idealized_expo": 0.512499988079071, |
|
"ranking_simple": 0.5083333253860474, |
|
"regularize": 531.5440063476562, |
|
"step": 530 |
|
}, |
|
{ |
|
"dpo_loss": 268.19036865234375, |
|
"epoch": 3.0325932923948984, |
|
"grad_norm": 56509.28719745372, |
|
"learning_rate": 1.997693767401503e-06, |
|
"logits": -0.5703064203262329, |
|
"logps": -78.15198516845703, |
|
"loss": 537.4105, |
|
"objective": 540.3373413085938, |
|
"ranking_idealized": 0.5416666865348816, |
|
"ranking_idealized_expo": 0.5416666865348816, |
|
"ranking_simple": 0.5375000238418579, |
|
"regularize": 540.3373413085938, |
|
"step": 535 |
|
}, |
|
{ |
|
"dpo_loss": 260.3810119628906, |
|
"epoch": 3.0609352857817664, |
|
"grad_norm": 56574.095934264005, |
|
"learning_rate": 1.9492236680336486e-06, |
|
"logits": -0.6156631112098694, |
|
"logps": -76.84761810302734, |
|
"loss": 528.5064, |
|
"objective": 518.1077880859375, |
|
"ranking_idealized": 0.5333333611488342, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.550000011920929, |
|
"regularize": 518.1077880859375, |
|
"step": 540 |
|
}, |
|
{ |
|
"dpo_loss": 294.6708679199219, |
|
"epoch": 3.089277279168635, |
|
"grad_norm": 57329.00701868531, |
|
"learning_rate": 1.9009702145406728e-06, |
|
"logits": -0.6262565851211548, |
|
"logps": -77.80619812011719, |
|
"loss": 534.8658, |
|
"objective": 547.5757446289062, |
|
"ranking_idealized": 0.49166667461395264, |
|
"ranking_idealized_expo": 0.49166667461395264, |
|
"ranking_simple": 0.4958333373069763, |
|
"regularize": 547.5757446289062, |
|
"step": 545 |
|
}, |
|
{ |
|
"dpo_loss": 304.8855285644531, |
|
"epoch": 3.117619272555503, |
|
"grad_norm": 58804.14825774506, |
|
"learning_rate": 1.852952387243698e-06, |
|
"logits": -0.566185712814331, |
|
"logps": -78.45851135253906, |
|
"loss": 547.2687, |
|
"objective": 564.5450439453125, |
|
"ranking_idealized": 0.5375000238418579, |
|
"ranking_idealized_expo": 0.5375000238418579, |
|
"ranking_simple": 0.5458333492279053, |
|
"regularize": 564.5450439453125, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 3.117619272555503, |
|
"eval_dpo_loss": 1133.89697265625, |
|
"eval_logits": -0.662289023399353, |
|
"eval_logps": -81.35185241699219, |
|
"eval_loss": 2247.691650390625, |
|
"eval_objective": 2265.5048828125, |
|
"eval_ranking_idealized": 0.5092975497245789, |
|
"eval_ranking_idealized_expo": 0.5092975497245789, |
|
"eval_ranking_simple": 0.5144628286361694, |
|
"eval_regularize": 2265.5048828125, |
|
"eval_runtime": 259.4387, |
|
"eval_samples_per_second": 22.317, |
|
"eval_steps_per_second": 0.933, |
|
"step": 550 |
|
}, |
|
{ |
|
"dpo_loss": 281.4118957519531, |
|
"epoch": 3.1459612659423715, |
|
"grad_norm": 59417.51403592292, |
|
"learning_rate": 1.8051890737811395e-06, |
|
"logits": -0.5252026915550232, |
|
"logps": -78.36255645751953, |
|
"loss": 532.1873, |
|
"objective": 550.6338500976562, |
|
"ranking_idealized": 0.550000011920929, |
|
"ranking_idealized_expo": 0.550000011920929, |
|
"ranking_simple": 0.5458333492279053, |
|
"regularize": 550.6338500976562, |
|
"step": 555 |
|
}, |
|
{ |
|
"dpo_loss": 236.99884033203125, |
|
"epoch": 3.1743032593292395, |
|
"grad_norm": 59018.08983892326, |
|
"learning_rate": 1.7576990616793139e-06, |
|
"logits": -0.5617191195487976, |
|
"logps": -75.77594757080078, |
|
"loss": 505.1167, |
|
"objective": 438.2338562011719, |
|
"ranking_idealized": 0.5041666626930237, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.5, |
|
"regularize": 438.2338562011719, |
|
"step": 560 |
|
}, |
|
{ |
|
"dpo_loss": 246.71607971191406, |
|
"epoch": 3.2026452527161076, |
|
"grad_norm": 58636.618971388525, |
|
"learning_rate": 1.7105010309624381e-06, |
|
"logits": -0.5584675669670105, |
|
"logps": -77.66993713378906, |
|
"loss": 493.3785, |
|
"objective": 472.7521667480469, |
|
"ranking_idealized": 0.5416666865348816, |
|
"ranking_idealized_expo": 0.5416666865348816, |
|
"ranking_simple": 0.5375000238418579, |
|
"regularize": 472.7521667480469, |
|
"step": 565 |
|
}, |
|
{ |
|
"dpo_loss": 265.2983703613281, |
|
"epoch": 3.230987246102976, |
|
"grad_norm": 55898.06189925703, |
|
"learning_rate": 1.6636135468049122e-06, |
|
"logits": -0.5392119288444519, |
|
"logps": -77.28318786621094, |
|
"loss": 497.3006, |
|
"objective": 500.74493408203125, |
|
"ranking_idealized": 0.4791666567325592, |
|
"ranking_idealized_expo": 0.4791666567325592, |
|
"ranking_simple": 0.48750001192092896, |
|
"regularize": 500.74493408203125, |
|
"step": 570 |
|
}, |
|
{ |
|
"dpo_loss": 231.4838104248047, |
|
"epoch": 3.259329239489844, |
|
"grad_norm": 55759.490082320495, |
|
"learning_rate": 1.617055052228768e-06, |
|
"logits": -0.6065218448638916, |
|
"logps": -77.56108856201172, |
|
"loss": 497.8869, |
|
"objective": 468.4706115722656, |
|
"ranking_idealized": 0.5, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.4958333373069763, |
|
"regularize": 468.4706115722656, |
|
"step": 575 |
|
}, |
|
{ |
|
"dpo_loss": 225.2176055908203, |
|
"epoch": 3.287671232876712, |
|
"grad_norm": 61004.08082189617, |
|
"learning_rate": 1.5708438608491816e-06, |
|
"logits": -0.6214988827705383, |
|
"logps": -78.54485321044922, |
|
"loss": 473.5735, |
|
"objective": 457.38262939453125, |
|
"ranking_idealized": 0.5541666746139526, |
|
"ranking_idealized_expo": 0.5541666746139526, |
|
"ranking_simple": 0.5541666746139526, |
|
"regularize": 457.38262939453125, |
|
"step": 580 |
|
}, |
|
{ |
|
"dpo_loss": 232.1999969482422, |
|
"epoch": 3.3160132262635806, |
|
"grad_norm": 60725.76092889549, |
|
"learning_rate": 1.524998149670871e-06, |
|
"logits": -0.6042133569717407, |
|
"logps": -79.63090515136719, |
|
"loss": 487.6306, |
|
"objective": 457.5376281738281, |
|
"ranking_idealized": 0.5291666388511658, |
|
"ranking_idealized_expo": 0.5291666388511658, |
|
"ranking_simple": 0.5249999761581421, |
|
"regularize": 457.5376281738281, |
|
"step": 585 |
|
}, |
|
{ |
|
"dpo_loss": 272.1759033203125, |
|
"epoch": 3.3443552196504487, |
|
"grad_norm": 56118.05633186447, |
|
"learning_rate": 1.479535951938243e-06, |
|
"logits": -0.6058977246284485, |
|
"logps": -79.4846420288086, |
|
"loss": 480.4184, |
|
"objective": 515.1324462890625, |
|
"ranking_idealized": 0.48750001192092896, |
|
"ranking_idealized_expo": 0.48750001192092896, |
|
"ranking_simple": 0.48750001192092896, |
|
"regularize": 515.1324462890625, |
|
"step": 590 |
|
}, |
|
{ |
|
"dpo_loss": 245.82069396972656, |
|
"epoch": 3.372697213037317, |
|
"grad_norm": 55867.37155318488, |
|
"learning_rate": 1.43447515004208e-06, |
|
"logits": -0.5171046853065491, |
|
"logps": -79.08625030517578, |
|
"loss": 482.6767, |
|
"objective": 483.4396667480469, |
|
"ranking_idealized": 0.5249999761581421, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.5291666388511658, |
|
"regularize": 483.4396667480469, |
|
"step": 595 |
|
}, |
|
{ |
|
"dpo_loss": 196.64842224121094, |
|
"epoch": 3.4010392064241852, |
|
"grad_norm": 56516.07590190884, |
|
"learning_rate": 1.3898334684855647e-06, |
|
"logits": -0.5405918955802917, |
|
"logps": -78.04920959472656, |
|
"loss": 451.9043, |
|
"objective": 474.75640869140625, |
|
"ranking_idealized": 0.5041666626930237, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.512499988079071, |
|
"regularize": 474.75640869140625, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 3.4010392064241852, |
|
"eval_dpo_loss": 1143.4464111328125, |
|
"eval_logits": -0.6081129908561707, |
|
"eval_logps": -81.8093032836914, |
|
"eval_loss": 2235.049072265625, |
|
"eval_objective": 2263.895751953125, |
|
"eval_ranking_idealized": 0.5092975497245789, |
|
"eval_ranking_idealized_expo": 0.5092975497245789, |
|
"eval_ranking_simple": 0.5113636255264282, |
|
"eval_regularize": 2263.895751953125, |
|
"eval_runtime": 263.0772, |
|
"eval_samples_per_second": 22.009, |
|
"eval_steps_per_second": 0.92, |
|
"step": 600 |
|
}, |
|
{ |
|
"dpo_loss": 200.47567749023438, |
|
"epoch": 3.4293811998110533, |
|
"grad_norm": 61245.11887519878, |
|
"learning_rate": 1.3456284669124159e-06, |
|
"logits": -0.5445960164070129, |
|
"logps": -80.08841705322266, |
|
"loss": 443.7833, |
|
"objective": 392.1485900878906, |
|
"ranking_idealized": 0.5333333611488342, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.5375000238418579, |
|
"regularize": 392.1485900878906, |
|
"step": 605 |
|
}, |
|
{ |
|
"dpo_loss": 224.0120086669922, |
|
"epoch": 3.4577231931979218, |
|
"grad_norm": 55703.47222096951, |
|
"learning_rate": 1.301877533199859e-06, |
|
"logits": -0.5698374509811401, |
|
"logps": -78.10750579833984, |
|
"loss": 453.1188, |
|
"objective": 450.0478210449219, |
|
"ranking_idealized": 0.512499988079071, |
|
"ranking_idealized_expo": 0.512499988079071, |
|
"ranking_simple": 0.512499988079071, |
|
"regularize": 450.0478210449219, |
|
"step": 610 |
|
}, |
|
{ |
|
"dpo_loss": 201.32762145996094, |
|
"epoch": 3.48606518658479, |
|
"grad_norm": 56298.35092268398, |
|
"learning_rate": 1.2585978766191726e-06, |
|
"logits": -0.6067613959312439, |
|
"logps": -78.46564483642578, |
|
"loss": 443.3495, |
|
"objective": 431.0823059082031, |
|
"ranking_idealized": 0.46666666865348816, |
|
"ranking_idealized_expo": 0.46666666865348816, |
|
"ranking_simple": 0.47083333134651184, |
|
"regularize": 431.0823059082031, |
|
"step": 615 |
|
}, |
|
{ |
|
"dpo_loss": 206.5738983154297, |
|
"epoch": 3.514407179971658, |
|
"grad_norm": 63836.369713544904, |
|
"learning_rate": 1.2158065210664848e-06, |
|
"logits": -0.5364049077033997, |
|
"logps": -77.92042541503906, |
|
"loss": 413.5914, |
|
"objective": 407.43011474609375, |
|
"ranking_idealized": 0.4958333373069763, |
|
"ranking_idealized_expo": 0.4958333373069763, |
|
"ranking_simple": 0.4833333194255829, |
|
"regularize": 407.43011474609375, |
|
"step": 620 |
|
}, |
|
{ |
|
"dpo_loss": 262.5015869140625, |
|
"epoch": 3.5427491733585263, |
|
"grad_norm": 57117.17093128659, |
|
"learning_rate": 1.1735202983664803e-06, |
|
"logits": -0.5198719501495361, |
|
"logps": -77.65792083740234, |
|
"loss": 434.5029, |
|
"objective": 441.78729248046875, |
|
"ranking_idealized": 0.5208333134651184, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.5291666388511658, |
|
"regularize": 441.78729248046875, |
|
"step": 625 |
|
}, |
|
{ |
|
"dpo_loss": 175.2751007080078, |
|
"epoch": 3.5710911667453944, |
|
"grad_norm": 57300.8422558694, |
|
"learning_rate": 1.1317558416516696e-06, |
|
"logits": -0.5618345737457275, |
|
"logps": -77.11524200439453, |
|
"loss": 418.5029, |
|
"objective": 366.7109375, |
|
"ranking_idealized": 0.512499988079071, |
|
"ranking_idealized_expo": 0.512499988079071, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 366.7109375, |
|
"step": 630 |
|
}, |
|
{ |
|
"dpo_loss": 247.67637634277344, |
|
"epoch": 3.5994331601322624, |
|
"grad_norm": 57342.11408252111, |
|
"learning_rate": 1.0905295788197993e-06, |
|
"logits": -0.5630912184715271, |
|
"logps": -77.43748474121094, |
|
"loss": 390.8204, |
|
"objective": 448.0376281738281, |
|
"ranking_idealized": 0.48750001192092896, |
|
"ranking_idealized_expo": 0.48750001192092896, |
|
"ranking_simple": 0.4749999940395355, |
|
"regularize": 448.0376281738281, |
|
"step": 635 |
|
}, |
|
{ |
|
"dpo_loss": 221.48199462890625, |
|
"epoch": 3.627775153519131, |
|
"grad_norm": 57029.256971048184, |
|
"learning_rate": 1.049857726072005e-06, |
|
"logits": -0.5471541285514832, |
|
"logps": -77.80196380615234, |
|
"loss": 394.0431, |
|
"objective": 420.2522888183594, |
|
"ranking_idealized": 0.4625000059604645, |
|
"ranking_idealized_expo": 0.4625000059604645, |
|
"ranking_simple": 0.48750001192092896, |
|
"regularize": 420.2522888183594, |
|
"step": 640 |
|
}, |
|
{ |
|
"dpo_loss": 210.2812957763672, |
|
"epoch": 3.656117146905999, |
|
"grad_norm": 56760.30754643641, |
|
"learning_rate": 1.0097562815342215e-06, |
|
"logits": -0.5718862414360046, |
|
"logps": -76.699951171875, |
|
"loss": 382.7592, |
|
"objective": 373.0257263183594, |
|
"ranking_idealized": 0.5249999761581421, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 373.0257263183594, |
|
"step": 645 |
|
}, |
|
{ |
|
"dpo_loss": 190.9052276611328, |
|
"epoch": 3.6844591402928675, |
|
"grad_norm": 61051.71098872104, |
|
"learning_rate": 9.702410189643838e-07, |
|
"logits": -0.5754767060279846, |
|
"logps": -77.90436553955078, |
|
"loss": 383.0005, |
|
"objective": 371.94171142578125, |
|
"ranking_idealized": 0.5041666626930237, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.48750001192092896, |
|
"regularize": 371.94171142578125, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 3.6844591402928675, |
|
"eval_dpo_loss": 1148.96923828125, |
|
"eval_logits": -0.6417236328125, |
|
"eval_logps": -81.60212707519531, |
|
"eval_loss": 2233.306640625, |
|
"eval_objective": 2277.599365234375, |
|
"eval_ranking_idealized": 0.5092975497245789, |
|
"eval_ranking_idealized_expo": 0.5092975497245789, |
|
"eval_ranking_simple": 0.5123966932296753, |
|
"eval_regularize": 2277.599365234375, |
|
"eval_runtime": 259.78, |
|
"eval_samples_per_second": 22.288, |
|
"eval_steps_per_second": 0.932, |
|
"step": 650 |
|
}, |
|
{ |
|
"dpo_loss": 196.76412963867188, |
|
"epoch": 3.7128011336797355, |
|
"grad_norm": 58071.414511462746, |
|
"learning_rate": 9.313274815478698e-07, |
|
"logits": -0.56723552942276, |
|
"logps": -76.70596313476562, |
|
"loss": 379.5748, |
|
"objective": 366.3069763183594, |
|
"ranking_idealized": 0.5208333134651184, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.512499988079071, |
|
"regularize": 366.3069763183594, |
|
"step": 655 |
|
}, |
|
{ |
|
"dpo_loss": 194.60183715820312, |
|
"epoch": 3.7411431270666036, |
|
"grad_norm": 60809.553574292404, |
|
"learning_rate": 8.930309757836517e-07, |
|
"logits": -0.6155992150306702, |
|
"logps": -77.6422348022461, |
|
"loss": 361.61, |
|
"objective": 359.7954406738281, |
|
"ranking_idealized": 0.5541666746139526, |
|
"ranking_idealized_expo": 0.5541666746139526, |
|
"ranking_simple": 0.5541666746139526, |
|
"regularize": 359.7954406738281, |
|
"step": 660 |
|
}, |
|
{ |
|
"dpo_loss": 192.55584716796875, |
|
"epoch": 3.769485120453472, |
|
"grad_norm": 59289.71674690953, |
|
"learning_rate": 8.553665654635343e-07, |
|
"logits": -0.6106492280960083, |
|
"logps": -78.0013198852539, |
|
"loss": 360.7243, |
|
"objective": 351.4775695800781, |
|
"ranking_idealized": 0.5166666507720947, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5041666626930237, |
|
"regularize": 351.4775695800781, |
|
"step": 665 |
|
}, |
|
{ |
|
"dpo_loss": 144.13723754882812, |
|
"epoch": 3.79782711384034, |
|
"grad_norm": 57262.66850621469, |
|
"learning_rate": 8.183490657468687e-07, |
|
"logits": -0.6628882884979248, |
|
"logps": -77.76026916503906, |
|
"loss": 349.6578, |
|
"objective": 297.4377746582031, |
|
"ranking_idealized": 0.5916666388511658, |
|
"ranking_idealized_expo": 0.5916666388511658, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 297.4377746582031, |
|
"step": 670 |
|
}, |
|
{ |
|
"dpo_loss": 152.4486846923828, |
|
"epoch": 3.826169107227208, |
|
"grad_norm": 56825.12670236396, |
|
"learning_rate": 7.819930373330669e-07, |
|
"logits": -0.6416188478469849, |
|
"logps": -76.90306091308594, |
|
"loss": 353.6488, |
|
"objective": 341.40216064453125, |
|
"ranking_idealized": 0.5375000238418579, |
|
"ranking_idealized_expo": 0.5375000238418579, |
|
"ranking_simple": 0.550000011920929, |
|
"regularize": 341.40216064453125, |
|
"step": 675 |
|
}, |
|
{ |
|
"dpo_loss": 184.26919555664062, |
|
"epoch": 3.8545111006140766, |
|
"grad_norm": 53705.32969410845, |
|
"learning_rate": 7.463127807341966e-07, |
|
"logits": -0.5924732089042664, |
|
"logps": -77.391357421875, |
|
"loss": 333.8282, |
|
"objective": 344.5659484863281, |
|
"ranking_idealized": 0.5083333253860474, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.5, |
|
"regularize": 344.5659484863281, |
|
"step": 680 |
|
}, |
|
{ |
|
"dpo_loss": 148.51840209960938, |
|
"epoch": 3.8828530940009447, |
|
"grad_norm": 59401.195526544805, |
|
"learning_rate": 7.113223306499336e-07, |
|
"logits": -0.59928297996521, |
|
"logps": -77.14978790283203, |
|
"loss": 326.404, |
|
"objective": 304.36529541015625, |
|
"ranking_idealized": 0.5166666507720947, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5208333134651184, |
|
"regularize": 304.36529541015625, |
|
"step": 685 |
|
}, |
|
{ |
|
"dpo_loss": 139.26687622070312, |
|
"epoch": 3.9111950873878127, |
|
"grad_norm": 55908.406363835, |
|
"learning_rate": 6.770354504470575e-07, |
|
"logits": -0.6397600769996643, |
|
"logps": -77.13099670410156, |
|
"loss": 327.1117, |
|
"objective": 284.6099853515625, |
|
"ranking_idealized": 0.5458333492279053, |
|
"ranking_idealized_expo": 0.5458333492279053, |
|
"ranking_simple": 0.5541666746139526, |
|
"regularize": 284.6099853515625, |
|
"step": 690 |
|
}, |
|
{ |
|
"dpo_loss": 149.27659606933594, |
|
"epoch": 3.9395370807746812, |
|
"grad_norm": 56202.2834304956, |
|
"learning_rate": 6.434656267456843e-07, |
|
"logits": -0.5855825543403625, |
|
"logps": -78.09581756591797, |
|
"loss": 313.6309, |
|
"objective": 333.0680847167969, |
|
"ranking_idealized": 0.46666666865348816, |
|
"ranking_idealized_expo": 0.46666666865348816, |
|
"ranking_simple": 0.46666666865348816, |
|
"regularize": 333.0680847167969, |
|
"step": 695 |
|
}, |
|
{ |
|
"dpo_loss": 192.27964782714844, |
|
"epoch": 3.9678790741615493, |
|
"grad_norm": 54921.578630640375, |
|
"learning_rate": 6.106260641143547e-07, |
|
"logits": -0.5845039486885071, |
|
"logps": -77.96537780761719, |
|
"loss": 316.0834, |
|
"objective": 346.9200134277344, |
|
"ranking_idealized": 0.4833333194255829, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.4833333194255829, |
|
"regularize": 346.9200134277344, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 3.9678790741615493, |
|
"eval_dpo_loss": 1143.5379638671875, |
|
"eval_logits": -0.6440603733062744, |
|
"eval_logps": -82.07394409179688, |
|
"eval_loss": 2236.5556640625, |
|
"eval_objective": 2269.068115234375, |
|
"eval_ranking_idealized": 0.5092975497245789, |
|
"eval_ranking_idealized_expo": 0.5092975497245789, |
|
"eval_ranking_simple": 0.5134297609329224, |
|
"eval_regularize": 2269.068115234375, |
|
"eval_runtime": 260.028, |
|
"eval_samples_per_second": 22.267, |
|
"eval_steps_per_second": 0.931, |
|
"step": 700 |
|
}, |
|
{ |
|
"dpo_loss": 133.92913818359375, |
|
"epoch": 3.9962210675484178, |
|
"grad_norm": 53821.890841527515, |
|
"learning_rate": 5.785296798760601e-07, |
|
"logits": -0.5399293303489685, |
|
"logps": -76.87466430664062, |
|
"loss": 297.6826, |
|
"objective": 305.3105163574219, |
|
"ranking_idealized": 0.5249999761581421, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.5333333611488342, |
|
"regularize": 305.3105163574219, |
|
"step": 705 |
|
}, |
|
{ |
|
"dpo_loss": 136.2448272705078, |
|
"epoch": 4.024563060935286, |
|
"grad_norm": 56556.65117729612, |
|
"learning_rate": 5.471890990272666e-07, |
|
"logits": -0.5713457465171814, |
|
"logps": -78.32731628417969, |
|
"loss": 257.5077, |
|
"objective": 262.51849365234375, |
|
"ranking_idealized": 0.4791666567325592, |
|
"ranking_idealized_expo": 0.4791666567325592, |
|
"ranking_simple": 0.4791666567325592, |
|
"regularize": 262.51849365234375, |
|
"step": 710 |
|
}, |
|
{ |
|
"dpo_loss": 146.4274444580078, |
|
"epoch": 4.052905054322154, |
|
"grad_norm": 56529.950131775324, |
|
"learning_rate": 5.166166492719124e-07, |
|
"logits": -0.5606953501701355, |
|
"logps": -77.65556335449219, |
|
"loss": 260.5169, |
|
"objective": 290.5565490722656, |
|
"ranking_idealized": 0.5333333611488342, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.5416666865348816, |
|
"regularize": 290.5565490722656, |
|
"step": 715 |
|
}, |
|
{ |
|
"dpo_loss": 120.44002532958984, |
|
"epoch": 4.081247047709022, |
|
"grad_norm": 57683.14038076662, |
|
"learning_rate": 4.868243561723535e-07, |
|
"logits": -0.5295476913452148, |
|
"logps": -79.97325134277344, |
|
"loss": 242.1283, |
|
"objective": 255.8209991455078, |
|
"ranking_idealized": 0.5208333134651184, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 255.8209991455078, |
|
"step": 720 |
|
}, |
|
{ |
|
"dpo_loss": 118.35858917236328, |
|
"epoch": 4.109589041095891, |
|
"grad_norm": 55672.19278003207, |
|
"learning_rate": 4.57823938419153e-07, |
|
"logits": -0.5605884790420532, |
|
"logps": -77.58100128173828, |
|
"loss": 248.1455, |
|
"objective": 227.64146423339844, |
|
"ranking_idealized": 0.5291666388511658, |
|
"ranking_idealized_expo": 0.5291666388511658, |
|
"ranking_simple": 0.5291666388511658, |
|
"regularize": 227.64146423339844, |
|
"step": 725 |
|
}, |
|
{ |
|
"dpo_loss": 126.79460906982422, |
|
"epoch": 4.137931034482759, |
|
"grad_norm": 54558.816031980285, |
|
"learning_rate": 4.2962680322157335e-07, |
|
"logits": -0.6369396448135376, |
|
"logps": -77.99376678466797, |
|
"loss": 251.7915, |
|
"objective": 234.14012145996094, |
|
"ranking_idealized": 0.5375000238418579, |
|
"ranking_idealized_expo": 0.5375000238418579, |
|
"ranking_simple": 0.5375000238418579, |
|
"regularize": 234.14012145996094, |
|
"step": 730 |
|
}, |
|
{ |
|
"dpo_loss": 105.81147766113281, |
|
"epoch": 4.166273027869627, |
|
"grad_norm": 57552.01210460219, |
|
"learning_rate": 4.0224404182059443e-07, |
|
"logits": -0.556169867515564, |
|
"logps": -79.20640563964844, |
|
"loss": 244.4964, |
|
"objective": 241.80613708496094, |
|
"ranking_idealized": 0.5249999761581421, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.5291666388511658, |
|
"regularize": 241.80613708496094, |
|
"step": 735 |
|
}, |
|
{ |
|
"dpo_loss": 122.31700897216797, |
|
"epoch": 4.194615021256495, |
|
"grad_norm": 55106.91910179694, |
|
"learning_rate": 3.756864251262143e-07, |
|
"logits": -0.5948054790496826, |
|
"logps": -78.53282928466797, |
|
"loss": 247.5535, |
|
"objective": 229.94223022460938, |
|
"ranking_idealized": 0.5208333134651184, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.5249999761581421, |
|
"regularize": 229.94223022460938, |
|
"step": 740 |
|
}, |
|
{ |
|
"dpo_loss": 112.44102478027344, |
|
"epoch": 4.222957014643363, |
|
"grad_norm": 56632.58905444662, |
|
"learning_rate": 3.499643994807486e-07, |
|
"logits": -0.6257311105728149, |
|
"logps": -76.18108367919922, |
|
"loss": 229.3078, |
|
"objective": 219.73800659179688, |
|
"ranking_idealized": 0.48750001192092896, |
|
"ranking_idealized_expo": 0.48750001192092896, |
|
"ranking_simple": 0.4958333373069763, |
|
"regularize": 219.73800659179688, |
|
"step": 745 |
|
}, |
|
{ |
|
"dpo_loss": 108.38746643066406, |
|
"epoch": 4.251299008030231, |
|
"grad_norm": 57857.865386387704, |
|
"learning_rate": 3.250880825498026e-07, |
|
"logits": -0.651103675365448, |
|
"logps": -78.09248352050781, |
|
"loss": 230.1662, |
|
"objective": 248.08090209960938, |
|
"ranking_idealized": 0.5249999761581421, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.5291666388511658, |
|
"regularize": 248.08090209960938, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 4.251299008030231, |
|
"eval_dpo_loss": 1146.1785888671875, |
|
"eval_logits": -0.6514174342155457, |
|
"eval_logps": -82.18943786621094, |
|
"eval_loss": 2241.186279296875, |
|
"eval_objective": 2272.541748046875, |
|
"eval_ranking_idealized": 0.5092975497245789, |
|
"eval_ranking_idealized_expo": 0.5092975497245789, |
|
"eval_ranking_simple": 0.5123966932296753, |
|
"eval_regularize": 2272.541748046875, |
|
"eval_runtime": 260.6171, |
|
"eval_samples_per_second": 22.217, |
|
"eval_steps_per_second": 0.929, |
|
"step": 750 |
|
}, |
|
{ |
|
"dpo_loss": 94.8583755493164, |
|
"epoch": 4.2796410014171, |
|
"grad_norm": 57171.85697996976, |
|
"learning_rate": 3.0106725934252095e-07, |
|
"logits": -0.617469847202301, |
|
"logps": -77.90315246582031, |
|
"loss": 235.9205, |
|
"objective": 197.84715270996094, |
|
"ranking_idealized": 0.5333333611488342, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.5333333611488342, |
|
"regularize": 197.84715270996094, |
|
"step": 755 |
|
}, |
|
{ |
|
"dpo_loss": 123.18533325195312, |
|
"epoch": 4.307982994803968, |
|
"grad_norm": 54580.774915438706, |
|
"learning_rate": 2.779113783626916e-07, |
|
"logits": -0.6015483736991882, |
|
"logps": -77.99730682373047, |
|
"loss": 219.0399, |
|
"objective": 203.16455078125, |
|
"ranking_idealized": 0.5625, |
|
"ranking_idealized_expo": 0.5625, |
|
"ranking_simple": 0.5541666746139526, |
|
"regularize": 203.16455078125, |
|
"step": 760 |
|
}, |
|
{ |
|
"dpo_loss": 110.12825012207031, |
|
"epoch": 4.336324988190836, |
|
"grad_norm": 59828.89205734499, |
|
"learning_rate": 2.5562954789221164e-07, |
|
"logits": -0.6103938817977905, |
|
"logps": -78.54331970214844, |
|
"loss": 228.1548, |
|
"objective": 236.6626739501953, |
|
"ranking_idealized": 0.5249999761581421, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.5249999761581421, |
|
"regularize": 236.6626739501953, |
|
"step": 765 |
|
}, |
|
{ |
|
"dpo_loss": 123.61617279052734, |
|
"epoch": 4.364666981577704, |
|
"grad_norm": 55023.44732573502, |
|
"learning_rate": 2.3423053240837518e-07, |
|
"logits": -0.5958681702613831, |
|
"logps": -77.1136703491211, |
|
"loss": 230.3628, |
|
"objective": 218.6300811767578, |
|
"ranking_idealized": 0.5041666626930237, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.512499988079071, |
|
"regularize": 218.6300811767578, |
|
"step": 770 |
|
}, |
|
{ |
|
"dpo_loss": 98.90251159667969, |
|
"epoch": 4.393008974964572, |
|
"grad_norm": 54732.23444299214, |
|
"learning_rate": 2.137227491364016e-07, |
|
"logits": -0.5972557663917542, |
|
"logps": -78.07060241699219, |
|
"loss": 216.1991, |
|
"objective": 207.3569793701172, |
|
"ranking_idealized": 0.5083333253860474, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 207.3569793701172, |
|
"step": 775 |
|
}, |
|
{ |
|
"dpo_loss": 109.3499526977539, |
|
"epoch": 4.42135096835144, |
|
"grad_norm": 54081.47665517887, |
|
"learning_rate": 1.941142647385469e-07, |
|
"logits": -0.6113469004631042, |
|
"logps": -77.00961303710938, |
|
"loss": 210.2967, |
|
"objective": 211.32273864746094, |
|
"ranking_idealized": 0.5, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.5083333253860474, |
|
"regularize": 211.32273864746094, |
|
"step": 780 |
|
}, |
|
{ |
|
"dpo_loss": 129.6833953857422, |
|
"epoch": 4.449692961738309, |
|
"grad_norm": 56904.662928690464, |
|
"learning_rate": 1.7541279214111277e-07, |
|
"logits": -0.6509999632835388, |
|
"logps": -76.9924087524414, |
|
"loss": 202.5833, |
|
"objective": 214.20053100585938, |
|
"ranking_idealized": 0.5291666388511658, |
|
"ranking_idealized_expo": 0.5291666388511658, |
|
"ranking_simple": 0.5249999761581421, |
|
"regularize": 214.20053100585938, |
|
"step": 785 |
|
}, |
|
{ |
|
"dpo_loss": 105.77576446533203, |
|
"epoch": 4.478034955125177, |
|
"grad_norm": 59495.74269597329, |
|
"learning_rate": 1.5762568750059604e-07, |
|
"logits": -0.6297593712806702, |
|
"logps": -79.3594970703125, |
|
"loss": 213.9758, |
|
"objective": 195.52975463867188, |
|
"ranking_idealized": 0.5541666746139526, |
|
"ranking_idealized_expo": 0.5541666746139526, |
|
"ranking_simple": 0.5541666746139526, |
|
"regularize": 195.52975463867188, |
|
"step": 790 |
|
}, |
|
{ |
|
"dpo_loss": 126.51469421386719, |
|
"epoch": 4.506376948512045, |
|
"grad_norm": 57426.690295786095, |
|
"learning_rate": 1.4075994731016895e-07, |
|
"logits": -0.5589895248413086, |
|
"logps": -78.96713256835938, |
|
"loss": 208.3141, |
|
"objective": 228.62591552734375, |
|
"ranking_idealized": 0.550000011920929, |
|
"ranking_idealized_expo": 0.550000011920929, |
|
"ranking_simple": 0.5541666746139526, |
|
"regularize": 228.62591552734375, |
|
"step": 795 |
|
}, |
|
{ |
|
"dpo_loss": 99.8931655883789, |
|
"epoch": 4.534718941898913, |
|
"grad_norm": 54974.75424590611, |
|
"learning_rate": 1.2482220564763669e-07, |
|
"logits": -0.5342037677764893, |
|
"logps": -78.09777069091797, |
|
"loss": 198.8015, |
|
"objective": 194.4702911376953, |
|
"ranking_idealized": 0.5083333253860474, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 194.4702911376953, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 4.534718941898913, |
|
"eval_dpo_loss": 1141.6485595703125, |
|
"eval_logits": -0.6624540090560913, |
|
"eval_logps": -82.07605743408203, |
|
"eval_loss": 2236.1728515625, |
|
"eval_objective": 2266.98193359375, |
|
"eval_ranking_idealized": 0.5092975497245789, |
|
"eval_ranking_idealized_expo": 0.5092975497245789, |
|
"eval_ranking_simple": 0.5134297609329224, |
|
"eval_regularize": 2266.98193359375, |
|
"eval_runtime": 260.1873, |
|
"eval_samples_per_second": 22.253, |
|
"eval_steps_per_second": 0.93, |
|
"step": 800 |
|
}, |
|
{ |
|
"dpo_loss": 98.73998260498047, |
|
"epoch": 4.563060935285781, |
|
"grad_norm": 57388.01438250827, |
|
"learning_rate": 1.0981873156594381e-07, |
|
"logits": -0.6012968420982361, |
|
"logps": -77.73635864257812, |
|
"loss": 187.937, |
|
"objective": 201.25303649902344, |
|
"ranking_idealized": 0.4583333432674408, |
|
"ranking_idealized_expo": 0.4583333432674408, |
|
"ranking_simple": 0.4625000059604645, |
|
"regularize": 201.25303649902344, |
|
"step": 805 |
|
}, |
|
{ |
|
"dpo_loss": 111.82048797607422, |
|
"epoch": 4.59140292867265, |
|
"grad_norm": 55981.29882559336, |
|
"learning_rate": 9.575542662726756e-08, |
|
"logits": -0.6273016333580017, |
|
"logps": -76.8697738647461, |
|
"loss": 192.8168, |
|
"objective": 208.81210327148438, |
|
"ranking_idealized": 0.4583333432674408, |
|
"ranking_idealized_expo": 0.4583333432674408, |
|
"ranking_simple": 0.4583333432674408, |
|
"regularize": 208.81210327148438, |
|
"step": 810 |
|
}, |
|
{ |
|
"dpo_loss": 91.50080871582031, |
|
"epoch": 4.619744922059518, |
|
"grad_norm": 56005.94901608643, |
|
"learning_rate": 8.26378225816582e-08, |
|
"logits": -0.5446544885635376, |
|
"logps": -78.59085083007812, |
|
"loss": 189.4629, |
|
"objective": 190.6603240966797, |
|
"ranking_idealized": 0.5874999761581421, |
|
"ranking_idealized_expo": 0.5874999761581421, |
|
"ranking_simple": 0.6000000238418579, |
|
"regularize": 190.6603240966797, |
|
"step": 815 |
|
}, |
|
{ |
|
"dpo_loss": 85.91332244873047, |
|
"epoch": 4.648086915446386, |
|
"grad_norm": 57321.07122409185, |
|
"learning_rate": 7.047107919114588e-08, |
|
"logits": -0.6059623956680298, |
|
"logps": -77.87149047851562, |
|
"loss": 196.4597, |
|
"objective": 188.6878204345703, |
|
"ranking_idealized": 0.5625, |
|
"ranking_idealized_expo": 0.5625, |
|
"ranking_simple": 0.5625, |
|
"regularize": 188.6878204345703, |
|
"step": 820 |
|
}, |
|
{ |
|
"dpo_loss": 100.03099060058594, |
|
"epoch": 4.6764289088332545, |
|
"grad_norm": 56144.87510056358, |
|
"learning_rate": 5.92599822001666e-08, |
|
"logits": -0.5770820379257202, |
|
"logps": -76.37511444091797, |
|
"loss": 190.9932, |
|
"objective": 200.11782836914062, |
|
"ranking_idealized": 0.5, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.5041666626930237, |
|
"regularize": 200.11782836914062, |
|
"step": 825 |
|
}, |
|
{ |
|
"dpo_loss": 130.17276000976562, |
|
"epoch": 4.7047709022201225, |
|
"grad_norm": 56205.70525916846, |
|
"learning_rate": 4.9008941453107527e-08, |
|
"logits": -0.6312499642372131, |
|
"logps": -78.51829528808594, |
|
"loss": 199.7126, |
|
"objective": 203.79873657226562, |
|
"ranking_idealized": 0.512499988079071, |
|
"ranking_idealized_expo": 0.512499988079071, |
|
"ranking_simple": 0.5208333134651184, |
|
"regularize": 203.79873657226562, |
|
"step": 830 |
|
}, |
|
{ |
|
"dpo_loss": 97.63799285888672, |
|
"epoch": 4.733112895606991, |
|
"grad_norm": 52585.06236070743, |
|
"learning_rate": 3.972198915970976e-08, |
|
"logits": -0.5862113833427429, |
|
"logps": -77.74220275878906, |
|
"loss": 193.6844, |
|
"objective": 188.87692260742188, |
|
"ranking_idealized": 0.48750001192092896, |
|
"ranking_idealized_expo": 0.48750001192092896, |
|
"ranking_simple": 0.4958333373069763, |
|
"regularize": 188.87692260742188, |
|
"step": 835 |
|
}, |
|
{ |
|
"dpo_loss": 104.34432220458984, |
|
"epoch": 4.7614548889938595, |
|
"grad_norm": 54158.02776349709, |
|
"learning_rate": 3.1402778309014284e-08, |
|
"logits": -0.5883139967918396, |
|
"logps": -78.25198364257812, |
|
"loss": 182.0377, |
|
"objective": 192.16619873046875, |
|
"ranking_idealized": 0.5541666746139526, |
|
"ranking_idealized_expo": 0.5541666746139526, |
|
"ranking_simple": 0.5541666746139526, |
|
"regularize": 192.16619873046875, |
|
"step": 840 |
|
}, |
|
{ |
|
"dpo_loss": 85.43553161621094, |
|
"epoch": 4.7897968823807275, |
|
"grad_norm": 55251.77271941104, |
|
"learning_rate": 2.4054581232470785e-08, |
|
"logits": -0.5956212878227234, |
|
"logps": -78.41481018066406, |
|
"loss": 182.2349, |
|
"objective": 189.16375732421875, |
|
"ranking_idealized": 0.4541666805744171, |
|
"ranking_idealized_expo": 0.4541666805744171, |
|
"ranking_simple": 0.4541666805744171, |
|
"regularize": 189.16375732421875, |
|
"step": 845 |
|
}, |
|
{ |
|
"dpo_loss": 105.7170639038086, |
|
"epoch": 4.818138875767596, |
|
"grad_norm": 54494.83204693859, |
|
"learning_rate": 1.768028831677926e-08, |
|
"logits": -0.6094614863395691, |
|
"logps": -77.0250244140625, |
|
"loss": 189.8097, |
|
"objective": 175.89796447753906, |
|
"ranking_idealized": 0.5291666388511658, |
|
"ranking_idealized_expo": 0.5291666388511658, |
|
"ranking_simple": 0.5249999761581421, |
|
"regularize": 175.89796447753906, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 4.818138875767596, |
|
"eval_dpo_loss": 1141.93798828125, |
|
"eval_logits": -0.6598728895187378, |
|
"eval_logps": -82.0995101928711, |
|
"eval_loss": 2234.33984375, |
|
"eval_objective": 2266.176025390625, |
|
"eval_ranking_idealized": 0.5092975497245789, |
|
"eval_ranking_idealized_expo": 0.5092975497245789, |
|
"eval_ranking_simple": 0.5123966932296753, |
|
"eval_regularize": 2266.176025390625, |
|
"eval_runtime": 259.4687, |
|
"eval_samples_per_second": 22.315, |
|
"eval_steps_per_second": 0.933, |
|
"step": 850 |
|
}, |
|
{ |
|
"dpo_loss": 105.15518188476562, |
|
"epoch": 4.846480869154464, |
|
"grad_norm": 53663.63564822801, |
|
"learning_rate": 1.2282406866966078e-08, |
|
"logits": -0.5728623270988464, |
|
"logps": -77.2236099243164, |
|
"loss": 189.0291, |
|
"objective": 193.78550720214844, |
|
"ranking_idealized": 0.5375000238418579, |
|
"ranking_idealized_expo": 0.5375000238418579, |
|
"ranking_simple": 0.5291666388511658, |
|
"regularize": 193.78550720214844, |
|
"step": 855 |
|
}, |
|
{ |
|
"dpo_loss": 80.22781372070312, |
|
"epoch": 4.874822862541333, |
|
"grad_norm": 57241.34026283923, |
|
"learning_rate": 7.863060120144316e-09, |
|
"logits": -0.5664411783218384, |
|
"logps": -77.44783782958984, |
|
"loss": 191.157, |
|
"objective": 193.0424346923828, |
|
"ranking_idealized": 0.550000011920929, |
|
"ranking_idealized_expo": 0.550000011920929, |
|
"ranking_simple": 0.550000011920929, |
|
"regularize": 193.0424346923828, |
|
"step": 860 |
|
}, |
|
{ |
|
"dpo_loss": 81.85896301269531, |
|
"epoch": 4.903164855928201, |
|
"grad_norm": 56266.32691569455, |
|
"learning_rate": 4.423986410346526e-09, |
|
"logits": -0.6029998064041138, |
|
"logps": -76.39881896972656, |
|
"loss": 175.239, |
|
"objective": 165.09597778320312, |
|
"ranking_idealized": 0.4958333373069763, |
|
"ranking_idealized_expo": 0.4958333373069763, |
|
"ranking_simple": 0.48750001192092896, |
|
"regularize": 165.09597778320312, |
|
"step": 865 |
|
}, |
|
{ |
|
"dpo_loss": 81.69947814941406, |
|
"epoch": 4.931506849315069, |
|
"grad_norm": 56846.595860039386, |
|
"learning_rate": 1.9665384847583622e-09, |
|
"logits": -0.6089357733726501, |
|
"logps": -78.06834411621094, |
|
"loss": 181.1643, |
|
"objective": 200.40501403808594, |
|
"ranking_idealized": 0.5083333253860474, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.512499988079071, |
|
"regularize": 200.40501403808594, |
|
"step": 870 |
|
}, |
|
{ |
|
"dpo_loss": 107.40243530273438, |
|
"epoch": 4.959848842701937, |
|
"grad_norm": 59409.659983453515, |
|
"learning_rate": 4.916829716183901e-10, |
|
"logits": -0.5824072957038879, |
|
"logps": -77.70771026611328, |
|
"loss": 184.499, |
|
"objective": 193.38832092285156, |
|
"ranking_idealized": 0.5458333492279053, |
|
"ranking_idealized_expo": 0.5458333492279053, |
|
"ranking_simple": 0.5458333492279053, |
|
"regularize": 193.38832092285156, |
|
"step": 875 |
|
}, |
|
{ |
|
"dpo_loss": 100.64095306396484, |
|
"epoch": 4.988190836088805, |
|
"grad_norm": 57665.05790656053, |
|
"learning_rate": 0.0, |
|
"logits": -0.6364601850509644, |
|
"logps": -78.62317657470703, |
|
"loss": 185.8119, |
|
"objective": 193.60719299316406, |
|
"ranking_idealized": 0.5291666388511658, |
|
"ranking_idealized_expo": 0.5291666388511658, |
|
"ranking_simple": 0.5208333134651184, |
|
"regularize": 193.60719299316406, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 4.988190836088805, |
|
"step": 880, |
|
"total_flos": 0.0, |
|
"train_loss": 743.9546574996276, |
|
"train_runtime": 35228.1764, |
|
"train_samples_per_second": 7.21, |
|
"train_steps_per_second": 0.025 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 880, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": false, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|