|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9988571428571429, |
|
"eval_steps": 50, |
|
"global_step": 437, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.022857142857142857, |
|
"grad_norm": 5.965044878833196, |
|
"learning_rate": 1.1363636363636363e-07, |
|
"logits/chosen": -2.7006218433380127, |
|
"logits/rejected": -2.6247599124908447, |
|
"logps/chosen": -301.24932861328125, |
|
"logps/rejected": -281.7940979003906, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": 0.0003684944240376353, |
|
"rewards/margins": 0.0008126062457449734, |
|
"rewards/rejected": -0.000444111879914999, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.045714285714285714, |
|
"grad_norm": 4.694626134382372, |
|
"learning_rate": 2.2727272727272726e-07, |
|
"logits/chosen": -2.6410038471221924, |
|
"logits/rejected": -2.60575008392334, |
|
"logps/chosen": -278.92498779296875, |
|
"logps/rejected": -254.63601684570312, |
|
"loss": 0.6925, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": 0.002462259028106928, |
|
"rewards/margins": 0.0011314961593598127, |
|
"rewards/rejected": 0.0013307628687471151, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.06857142857142857, |
|
"grad_norm": 5.220071225612144, |
|
"learning_rate": 3.4090909090909085e-07, |
|
"logits/chosen": -2.638200044631958, |
|
"logits/rejected": -2.617208242416382, |
|
"logps/chosen": -263.2459411621094, |
|
"logps/rejected": -263.34710693359375, |
|
"loss": 0.689, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.014371426776051521, |
|
"rewards/margins": 0.007912042550742626, |
|
"rewards/rejected": 0.006459384225308895, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.09142857142857143, |
|
"grad_norm": 5.914085075708232, |
|
"learning_rate": 4.545454545454545e-07, |
|
"logits/chosen": -2.64882493019104, |
|
"logits/rejected": -2.585529327392578, |
|
"logps/chosen": -290.2810974121094, |
|
"logps/rejected": -268.34210205078125, |
|
"loss": 0.6806, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.03716137260198593, |
|
"rewards/margins": 0.0442696288228035, |
|
"rewards/rejected": -0.007108256220817566, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.11428571428571428, |
|
"grad_norm": 8.967256960057256, |
|
"learning_rate": 4.997124959943201e-07, |
|
"logits/chosen": -2.6775121688842773, |
|
"logits/rejected": -2.5971298217773438, |
|
"logps/chosen": -293.7924499511719, |
|
"logps/rejected": -254.38064575195312, |
|
"loss": 0.6696, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.02517825737595558, |
|
"rewards/margins": 0.1003413200378418, |
|
"rewards/rejected": -0.07516306638717651, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.11428571428571428, |
|
"eval_logits/chosen": -2.5406415462493896, |
|
"eval_logits/rejected": -2.4382479190826416, |
|
"eval_logps/chosen": -276.4425964355469, |
|
"eval_logps/rejected": -235.50723266601562, |
|
"eval_loss": 0.658383309841156, |
|
"eval_rewards/accuracies": 0.6853448152542114, |
|
"eval_rewards/chosen": -0.008386622183024883, |
|
"eval_rewards/margins": 0.1559244692325592, |
|
"eval_rewards/rejected": -0.16431109607219696, |
|
"eval_runtime": 91.7124, |
|
"eval_samples_per_second": 19.965, |
|
"eval_steps_per_second": 0.316, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.13714285714285715, |
|
"grad_norm": 7.378725048645318, |
|
"learning_rate": 4.979579212164186e-07, |
|
"logits/chosen": -2.578993320465088, |
|
"logits/rejected": -2.4725637435913086, |
|
"logps/chosen": -293.21600341796875, |
|
"logps/rejected": -274.92535400390625, |
|
"loss": 0.6509, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.1271006315946579, |
|
"rewards/margins": 0.13663128018379211, |
|
"rewards/rejected": -0.2637318968772888, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 7.529436455012959, |
|
"learning_rate": 4.946196886175515e-07, |
|
"logits/chosen": -2.5928056240081787, |
|
"logits/rejected": -2.543529748916626, |
|
"logps/chosen": -294.546630859375, |
|
"logps/rejected": -301.3702697753906, |
|
"loss": 0.6315, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.1876838505268097, |
|
"rewards/margins": 0.2297508269548416, |
|
"rewards/rejected": -0.4174346923828125, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.18285714285714286, |
|
"grad_norm": 12.054303957362464, |
|
"learning_rate": 4.897191188239667e-07, |
|
"logits/chosen": -2.6392509937286377, |
|
"logits/rejected": -2.590977668762207, |
|
"logps/chosen": -285.3960266113281, |
|
"logps/rejected": -307.17535400390625, |
|
"loss": 0.62, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.18367011845111847, |
|
"rewards/margins": 0.33499467372894287, |
|
"rewards/rejected": -0.5186647176742554, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.2057142857142857, |
|
"grad_norm": 13.273475435863975, |
|
"learning_rate": 4.832875107981763e-07, |
|
"logits/chosen": -2.7371668815612793, |
|
"logits/rejected": -2.6849629878997803, |
|
"logps/chosen": -296.71575927734375, |
|
"logps/rejected": -316.90338134765625, |
|
"loss": 0.6249, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.20430462062358856, |
|
"rewards/margins": 0.40924978256225586, |
|
"rewards/rejected": -0.6135543584823608, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.22857142857142856, |
|
"grad_norm": 15.686669316278751, |
|
"learning_rate": 4.753659419387223e-07, |
|
"logits/chosen": -2.769486665725708, |
|
"logits/rejected": -2.6865835189819336, |
|
"logps/chosen": -318.80413818359375, |
|
"logps/rejected": -312.09326171875, |
|
"loss": 0.6122, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.324177622795105, |
|
"rewards/margins": 0.4622408449649811, |
|
"rewards/rejected": -0.7864184975624084, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.22857142857142856, |
|
"eval_logits/chosen": -2.651167869567871, |
|
"eval_logits/rejected": -2.5533361434936523, |
|
"eval_logps/chosen": -316.30194091796875, |
|
"eval_logps/rejected": -308.60577392578125, |
|
"eval_loss": 0.6111233234405518, |
|
"eval_rewards/accuracies": 0.6767241358757019, |
|
"eval_rewards/chosen": -0.40698006749153137, |
|
"eval_rewards/margins": 0.4883164167404175, |
|
"eval_rewards/rejected": -0.8952965140342712, |
|
"eval_runtime": 90.9103, |
|
"eval_samples_per_second": 20.141, |
|
"eval_steps_per_second": 0.319, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.25142857142857145, |
|
"grad_norm": 12.723184548250023, |
|
"learning_rate": 4.660050057270191e-07, |
|
"logits/chosen": -2.619276523590088, |
|
"logits/rejected": -2.556680202484131, |
|
"logps/chosen": -375.2064208984375, |
|
"logps/rejected": -391.784423828125, |
|
"loss": 0.6021, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.589028000831604, |
|
"rewards/margins": 0.3497200608253479, |
|
"rewards/rejected": -0.9387480020523071, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.2742857142857143, |
|
"grad_norm": 16.182958724416615, |
|
"learning_rate": 4.5526448859687144e-07, |
|
"logits/chosen": -1.8494535684585571, |
|
"logits/rejected": -1.6301162242889404, |
|
"logps/chosen": -390.48797607421875, |
|
"logps/rejected": -364.620361328125, |
|
"loss": 0.5814, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.7958351969718933, |
|
"rewards/margins": 0.5332263708114624, |
|
"rewards/rejected": -1.329061508178711, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.29714285714285715, |
|
"grad_norm": 17.332692843610236, |
|
"learning_rate": 4.432129880904388e-07, |
|
"logits/chosen": -0.4575839638710022, |
|
"logits/rejected": -0.06781496107578278, |
|
"logps/chosen": -410.9315490722656, |
|
"logps/rejected": -413.829833984375, |
|
"loss": 0.5548, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -1.0577561855316162, |
|
"rewards/margins": 0.5758394598960876, |
|
"rewards/rejected": -1.6335957050323486, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 20.594750248375647, |
|
"learning_rate": 4.299274747394055e-07, |
|
"logits/chosen": 0.2059406340122223, |
|
"logits/rejected": 0.5167960524559021, |
|
"logps/chosen": -435.4883728027344, |
|
"logps/rejected": -472.76092529296875, |
|
"loss": 0.5654, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.200407862663269, |
|
"rewards/margins": 0.8080868721008301, |
|
"rewards/rejected": -2.0084948539733887, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.34285714285714286, |
|
"grad_norm": 16.29523919912318, |
|
"learning_rate": 4.1549280046953653e-07, |
|
"logits/chosen": -0.2454165518283844, |
|
"logits/rejected": 0.22050300240516663, |
|
"logps/chosen": -396.6532287597656, |
|
"logps/rejected": -463.4326171875, |
|
"loss": 0.5476, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.0966728925704956, |
|
"rewards/margins": 0.7746630311012268, |
|
"rewards/rejected": -1.871335744857788, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.34285714285714286, |
|
"eval_logits/chosen": 0.14409177005290985, |
|
"eval_logits/rejected": 0.9770079255104065, |
|
"eval_logps/chosen": -409.03546142578125, |
|
"eval_logps/rejected": -453.3369140625, |
|
"eval_loss": 0.5582876801490784, |
|
"eval_rewards/accuracies": 0.7370689511299133, |
|
"eval_rewards/chosen": -1.3343148231506348, |
|
"eval_rewards/margins": 1.0082927942276, |
|
"eval_rewards/rejected": -2.3426077365875244, |
|
"eval_runtime": 91.388, |
|
"eval_samples_per_second": 20.035, |
|
"eval_steps_per_second": 0.317, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.3657142857142857, |
|
"grad_norm": 31.42845724506196, |
|
"learning_rate": 4.000011566683401e-07, |
|
"logits/chosen": -0.0020641356240957975, |
|
"logits/rejected": 0.659235954284668, |
|
"logps/chosen": -442.47259521484375, |
|
"logps/rejected": -490.87762451171875, |
|
"loss": 0.5549, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.4313229322433472, |
|
"rewards/margins": 0.9210258722305298, |
|
"rewards/rejected": -2.352349042892456, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.38857142857142857, |
|
"grad_norm": 21.881739335743443, |
|
"learning_rate": 3.8355148537705047e-07, |
|
"logits/chosen": -0.8011367917060852, |
|
"logits/rejected": -0.18294472992420197, |
|
"logps/chosen": -420.85791015625, |
|
"logps/rejected": -446.387451171875, |
|
"loss": 0.5563, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.1675300598144531, |
|
"rewards/margins": 0.6390342712402344, |
|
"rewards/rejected": -1.8065645694732666, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.4114285714285714, |
|
"grad_norm": 24.301433957337014, |
|
"learning_rate": 3.662488473675315e-07, |
|
"logits/chosen": -0.6645376086235046, |
|
"logits/rejected": 0.36614301800727844, |
|
"logps/chosen": -447.889892578125, |
|
"logps/rejected": -494.78070068359375, |
|
"loss": 0.5498, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.1771008968353271, |
|
"rewards/margins": 1.1712000370025635, |
|
"rewards/rejected": -2.3483011722564697, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.4342857142857143, |
|
"grad_norm": 18.603399872507342, |
|
"learning_rate": 3.48203751140067e-07, |
|
"logits/chosen": -0.08548859506845474, |
|
"logits/rejected": 0.7475250959396362, |
|
"logps/chosen": -421.85540771484375, |
|
"logps/rejected": -453.6908264160156, |
|
"loss": 0.5499, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.4559787511825562, |
|
"rewards/margins": 0.7359476089477539, |
|
"rewards/rejected": -2.1919264793395996, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.45714285714285713, |
|
"grad_norm": 21.90453363461546, |
|
"learning_rate": 3.2953144712759537e-07, |
|
"logits/chosen": -0.9407933354377747, |
|
"logits/rejected": -0.02539023384451866, |
|
"logps/chosen": -380.4794616699219, |
|
"logps/rejected": -437.4371643066406, |
|
"loss": 0.5582, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.1213675737380981, |
|
"rewards/margins": 0.9641984701156616, |
|
"rewards/rejected": -2.0855660438537598, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.45714285714285713, |
|
"eval_logits/chosen": -0.4975701570510864, |
|
"eval_logits/rejected": 0.5624167919158936, |
|
"eval_logps/chosen": -379.0511169433594, |
|
"eval_logps/rejected": -433.3172912597656, |
|
"eval_loss": 0.5498641729354858, |
|
"eval_rewards/accuracies": 0.732758641242981, |
|
"eval_rewards/chosen": -1.034471869468689, |
|
"eval_rewards/margins": 1.107939600944519, |
|
"eval_rewards/rejected": -2.142411708831787, |
|
"eval_runtime": 90.2066, |
|
"eval_samples_per_second": 20.298, |
|
"eval_steps_per_second": 0.321, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 16.10426120639833, |
|
"learning_rate": 3.103511916141658e-07, |
|
"logits/chosen": 0.09185227006673813, |
|
"logits/rejected": 0.8966398239135742, |
|
"logps/chosen": -387.89202880859375, |
|
"logps/rejected": -462.49932861328125, |
|
"loss": 0.5404, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.2443852424621582, |
|
"rewards/margins": 0.9278079271316528, |
|
"rewards/rejected": -2.1721930503845215, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.5028571428571429, |
|
"grad_norm": 18.780630904417688, |
|
"learning_rate": 2.9078548506882117e-07, |
|
"logits/chosen": 0.5002994537353516, |
|
"logits/rejected": 1.4443576335906982, |
|
"logps/chosen": -440.80279541015625, |
|
"logps/rejected": -487.53485107421875, |
|
"loss": 0.5609, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.5573679208755493, |
|
"rewards/margins": 0.8561462163925171, |
|
"rewards/rejected": -2.4135143756866455, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.5257142857142857, |
|
"grad_norm": 20.610433717594198, |
|
"learning_rate": 2.709592897595191e-07, |
|
"logits/chosen": 0.22773201763629913, |
|
"logits/rejected": 1.2361242771148682, |
|
"logps/chosen": -401.34228515625, |
|
"logps/rejected": -446.8021545410156, |
|
"loss": 0.5442, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.202831506729126, |
|
"rewards/margins": 0.8723229169845581, |
|
"rewards/rejected": -2.0751543045043945, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.5485714285714286, |
|
"grad_norm": 27.325375522779876, |
|
"learning_rate": 2.509992316440332e-07, |
|
"logits/chosen": 0.26873356103897095, |
|
"logits/rejected": 1.303821325302124, |
|
"logps/chosen": -431.5526428222656, |
|
"logps/rejected": -526.184814453125, |
|
"loss": 0.536, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -1.328427791595459, |
|
"rewards/margins": 1.219201922416687, |
|
"rewards/rejected": -2.5476298332214355, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.5714285714285714, |
|
"grad_norm": 18.92218062691862, |
|
"learning_rate": 2.3103279163519918e-07, |
|
"logits/chosen": -0.07236287742853165, |
|
"logits/rejected": 0.5380650758743286, |
|
"logps/chosen": -407.7901306152344, |
|
"logps/rejected": -495.40777587890625, |
|
"loss": 0.5503, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.2716388702392578, |
|
"rewards/margins": 0.980434238910675, |
|
"rewards/rejected": -2.252073287963867, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.5714285714285714, |
|
"eval_logits/chosen": -0.37247952818870544, |
|
"eval_logits/rejected": 0.7719168066978455, |
|
"eval_logps/chosen": -392.6152038574219, |
|
"eval_logps/rejected": -450.1522216796875, |
|
"eval_loss": 0.5393335819244385, |
|
"eval_rewards/accuracies": 0.7370689511299133, |
|
"eval_rewards/chosen": -1.1701123714447021, |
|
"eval_rewards/margins": 1.1406482458114624, |
|
"eval_rewards/rejected": -2.310760498046875, |
|
"eval_runtime": 90.9292, |
|
"eval_samples_per_second": 20.137, |
|
"eval_steps_per_second": 0.319, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.5942857142857143, |
|
"grad_norm": 25.541848941752068, |
|
"learning_rate": 2.1118749140573358e-07, |
|
"logits/chosen": 0.0009159505134448409, |
|
"logits/rejected": 0.6376093626022339, |
|
"logps/chosen": -426.14141845703125, |
|
"logps/rejected": -502.9112243652344, |
|
"loss": 0.5485, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.463547706604004, |
|
"rewards/margins": 0.8321346044540405, |
|
"rewards/rejected": -2.295682430267334, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.6171428571428571, |
|
"grad_norm": 23.51335121897504, |
|
"learning_rate": 1.9159007893272703e-07, |
|
"logits/chosen": 0.321635901927948, |
|
"logits/rejected": 1.6592861413955688, |
|
"logps/chosen": -413.24859619140625, |
|
"logps/rejected": -473.6759338378906, |
|
"loss": 0.5267, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.4072272777557373, |
|
"rewards/margins": 1.0244569778442383, |
|
"rewards/rejected": -2.4316840171813965, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 27.142819787480168, |
|
"learning_rate": 1.7236571898357766e-07, |
|
"logits/chosen": 1.0628600120544434, |
|
"logits/rejected": 2.0229506492614746, |
|
"logps/chosen": -440.122314453125, |
|
"logps/rejected": -543.1414794921875, |
|
"loss": 0.5316, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.6708418130874634, |
|
"rewards/margins": 1.129504919052124, |
|
"rewards/rejected": -2.8003463745117188, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.6628571428571428, |
|
"grad_norm": 24.13150363681131, |
|
"learning_rate": 1.5363719371356882e-07, |
|
"logits/chosen": 0.698092520236969, |
|
"logits/rejected": 1.5312575101852417, |
|
"logps/chosen": -450.4425354003906, |
|
"logps/rejected": -515.0484008789062, |
|
"loss": 0.5339, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.4953665733337402, |
|
"rewards/margins": 0.999632716178894, |
|
"rewards/rejected": -2.4949991703033447, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.6857142857142857, |
|
"grad_norm": 17.486388226084866, |
|
"learning_rate": 1.3552411848071565e-07, |
|
"logits/chosen": 0.3839910626411438, |
|
"logits/rejected": 1.8341293334960938, |
|
"logps/chosen": -441.32183837890625, |
|
"logps/rejected": -507.97894287109375, |
|
"loss": 0.5224, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.3941259384155273, |
|
"rewards/margins": 1.1592432260513306, |
|
"rewards/rejected": -2.5533692836761475, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.6857142857142857, |
|
"eval_logits/chosen": 0.18918734788894653, |
|
"eval_logits/rejected": 1.70877206325531, |
|
"eval_logps/chosen": -397.884033203125, |
|
"eval_logps/rejected": -470.09490966796875, |
|
"eval_loss": 0.5312153100967407, |
|
"eval_rewards/accuracies": 0.7543103694915771, |
|
"eval_rewards/chosen": -1.2228009700775146, |
|
"eval_rewards/margins": 1.2873866558074951, |
|
"eval_rewards/rejected": -2.510187864303589, |
|
"eval_runtime": 92.3596, |
|
"eval_samples_per_second": 19.825, |
|
"eval_steps_per_second": 0.314, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.7085714285714285, |
|
"grad_norm": 20.49651474517604, |
|
"learning_rate": 1.1814217788631473e-07, |
|
"logits/chosen": 0.41669049859046936, |
|
"logits/rejected": 1.394052505493164, |
|
"logps/chosen": -400.6260986328125, |
|
"logps/rejected": -474.28094482421875, |
|
"loss": 0.5361, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.4432194232940674, |
|
"rewards/margins": 0.9276365041732788, |
|
"rewards/rejected": -2.3708558082580566, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.7314285714285714, |
|
"grad_norm": 18.75776450561332, |
|
"learning_rate": 1.0160238692045331e-07, |
|
"logits/chosen": 0.7597023844718933, |
|
"logits/rejected": 1.6351118087768555, |
|
"logps/chosen": -413.95318603515625, |
|
"logps/rejected": -488.90460205078125, |
|
"loss": 0.542, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.6967086791992188, |
|
"rewards/margins": 0.782455563545227, |
|
"rewards/rejected": -2.479163885116577, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.7542857142857143, |
|
"grad_norm": 18.561363930407463, |
|
"learning_rate": 8.601038193139438e-08, |
|
"logits/chosen": 0.14268045127391815, |
|
"logits/rejected": 1.3421038389205933, |
|
"logps/chosen": -447.97137451171875, |
|
"logps/rejected": -503.50433349609375, |
|
"loss": 0.5363, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.4875319004058838, |
|
"rewards/margins": 1.0696327686309814, |
|
"rewards/rejected": -2.5571646690368652, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.7771428571428571, |
|
"grad_norm": 17.499558797451687, |
|
"learning_rate": 7.146574594727572e-08, |
|
"logits/chosen": 0.3810690939426422, |
|
"logits/rejected": 1.2245051860809326, |
|
"logps/chosen": -414.9021911621094, |
|
"logps/rejected": -506.65045166015625, |
|
"loss": 0.5285, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.489512324333191, |
|
"rewards/margins": 1.1567548513412476, |
|
"rewards/rejected": -2.6462674140930176, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 17.943689215599328, |
|
"learning_rate": 5.8061372659157306e-08, |
|
"logits/chosen": 0.24244177341461182, |
|
"logits/rejected": 1.3491809368133545, |
|
"logps/chosen": -441.5047912597656, |
|
"logps/rejected": -494.35626220703125, |
|
"loss": 0.5396, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.517975091934204, |
|
"rewards/margins": 0.8826116323471069, |
|
"rewards/rejected": -2.4005866050720215, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_logits/chosen": 0.4364562928676605, |
|
"eval_logits/rejected": 1.9215292930603027, |
|
"eval_logps/chosen": -420.2202453613281, |
|
"eval_logps/rejected": -493.9275207519531, |
|
"eval_loss": 0.5290318131446838, |
|
"eval_rewards/accuracies": 0.75, |
|
"eval_rewards/chosen": -1.4461628198623657, |
|
"eval_rewards/margins": 1.3023512363433838, |
|
"eval_rewards/rejected": -2.748514175415039, |
|
"eval_runtime": 91.6979, |
|
"eval_samples_per_second": 19.968, |
|
"eval_steps_per_second": 0.316, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.8228571428571428, |
|
"grad_norm": 16.780749890709036, |
|
"learning_rate": 4.5882873127531614e-08, |
|
"logits/chosen": 0.174576535820961, |
|
"logits/rejected": 1.4981176853179932, |
|
"logps/chosen": -435.602783203125, |
|
"logps/rejected": -510.8885192871094, |
|
"loss": 0.5205, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.501977801322937, |
|
"rewards/margins": 1.0960423946380615, |
|
"rewards/rejected": -2.598020076751709, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.8457142857142858, |
|
"grad_norm": 19.04569651937684, |
|
"learning_rate": 3.500802900154412e-08, |
|
"logits/chosen": 0.34421294927597046, |
|
"logits/rejected": 1.787302017211914, |
|
"logps/chosen": -412.97747802734375, |
|
"logps/rejected": -499.79034423828125, |
|
"loss": 0.528, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -1.4191606044769287, |
|
"rewards/margins": 1.1945868730545044, |
|
"rewards/rejected": -2.6137473583221436, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.8685714285714285, |
|
"grad_norm": 21.067585045477745, |
|
"learning_rate": 2.550629574310309e-08, |
|
"logits/chosen": 0.211051344871521, |
|
"logits/rejected": 1.5275977849960327, |
|
"logps/chosen": -486.8960876464844, |
|
"logps/rejected": -515.337646484375, |
|
"loss": 0.5294, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.6883971691131592, |
|
"rewards/margins": 0.8909848928451538, |
|
"rewards/rejected": -2.5793819427490234, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.8914285714285715, |
|
"grad_norm": 21.227279903684668, |
|
"learning_rate": 1.7438359028687983e-08, |
|
"logits/chosen": 0.37176352739334106, |
|
"logits/rejected": 1.208251714706421, |
|
"logps/chosen": -453.6361389160156, |
|
"logps/rejected": -538.0291748046875, |
|
"loss": 0.5333, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.4226316213607788, |
|
"rewards/margins": 1.0099334716796875, |
|
"rewards/rejected": -2.432565212249756, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.9142857142857143, |
|
"grad_norm": 35.72120712786558, |
|
"learning_rate": 1.0855747162029361e-08, |
|
"logits/chosen": 0.5662034749984741, |
|
"logits/rejected": 1.0855852365493774, |
|
"logps/chosen": -437.5174865722656, |
|
"logps/rejected": -510.6676330566406, |
|
"loss": 0.55, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.587842345237732, |
|
"rewards/margins": 0.8530977368354797, |
|
"rewards/rejected": -2.4409401416778564, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.9142857142857143, |
|
"eval_logits/chosen": 0.49114343523979187, |
|
"eval_logits/rejected": 1.9856219291687012, |
|
"eval_logps/chosen": -417.03155517578125, |
|
"eval_logps/rejected": -493.2509765625, |
|
"eval_loss": 0.5286471843719482, |
|
"eval_rewards/accuracies": 0.7629310488700867, |
|
"eval_rewards/chosen": -1.414276123046875, |
|
"eval_rewards/margins": 1.3274718523025513, |
|
"eval_rewards/rejected": -2.741748094558716, |
|
"eval_runtime": 91.527, |
|
"eval_samples_per_second": 20.005, |
|
"eval_steps_per_second": 0.317, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.9371428571428572, |
|
"grad_norm": 21.313276080994388, |
|
"learning_rate": 5.8005019731033615e-09, |
|
"logits/chosen": 0.33736371994018555, |
|
"logits/rejected": 1.3800859451293945, |
|
"logps/chosen": -453.69744873046875, |
|
"logps/rejected": -516.6829833984375, |
|
"loss": 0.5264, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.6551754474639893, |
|
"rewards/margins": 0.9143539667129517, |
|
"rewards/rejected": -2.5695290565490723, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 19.39515946553055, |
|
"learning_rate": 2.3049103053431886e-09, |
|
"logits/chosen": 0.2167482078075409, |
|
"logits/rejected": 1.6823341846466064, |
|
"logps/chosen": -409.4588928222656, |
|
"logps/rejected": -498.9947814941406, |
|
"loss": 0.5293, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.267107605934143, |
|
"rewards/margins": 1.3829718828201294, |
|
"rewards/rejected": -2.6500792503356934, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.9828571428571429, |
|
"grad_norm": 20.626302042234812, |
|
"learning_rate": 3.9129780600541397e-10, |
|
"logits/chosen": 0.5624532699584961, |
|
"logits/rejected": 1.5469181537628174, |
|
"logps/chosen": -430.54388427734375, |
|
"logps/rejected": -515.5368041992188, |
|
"loss": 0.5296, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.483120083808899, |
|
"rewards/margins": 1.0355522632598877, |
|
"rewards/rejected": -2.518672466278076, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.9988571428571429, |
|
"step": 437, |
|
"total_flos": 0.0, |
|
"train_loss": 0.5693180419214803, |
|
"train_runtime": 11386.9149, |
|
"train_samples_per_second": 4.918, |
|
"train_steps_per_second": 0.038 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 437, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|