|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.999345977763244, |
|
"eval_steps": 100, |
|
"global_step": 764, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 6.493506493506494e-09, |
|
"logits/chosen": -2.0615594387054443, |
|
"logits/rejected": -1.9222214221954346, |
|
"logps/chosen": -3380.6083984375, |
|
"logps/rejected": -2521.2978515625, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 6.493506493506492e-08, |
|
"logits/chosen": -1.674426555633545, |
|
"logits/rejected": -1.637134313583374, |
|
"logps/chosen": -2549.3515625, |
|
"logps/rejected": -2319.4013671875, |
|
"loss": 10.0505, |
|
"rewards/accuracies": 0.4333333373069763, |
|
"rewards/chosen": 0.0008169158827513456, |
|
"rewards/margins": 0.0011402772506698966, |
|
"rewards/rejected": -0.00032336192089132965, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.2987012987012984e-07, |
|
"logits/chosen": -1.6043205261230469, |
|
"logits/rejected": -1.5535523891448975, |
|
"logps/chosen": -2340.101318359375, |
|
"logps/rejected": -2224.145263671875, |
|
"loss": 7.4843, |
|
"rewards/accuracies": 0.5200000405311584, |
|
"rewards/chosen": 0.00018432810611557215, |
|
"rewards/margins": 0.0009077669237740338, |
|
"rewards/rejected": -0.0007234388613142073, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.948051948051948e-07, |
|
"logits/chosen": -1.6847426891326904, |
|
"logits/rejected": -1.6577625274658203, |
|
"logps/chosen": -2983.23681640625, |
|
"logps/rejected": -2513.237060546875, |
|
"loss": 9.1379, |
|
"rewards/accuracies": 0.48000001907348633, |
|
"rewards/chosen": 0.010261936113238335, |
|
"rewards/margins": 0.004135974682867527, |
|
"rewards/rejected": 0.006125961430370808, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.597402597402597e-07, |
|
"logits/chosen": -1.859400987625122, |
|
"logits/rejected": -1.8100417852401733, |
|
"logps/chosen": -2879.57470703125, |
|
"logps/rejected": -2273.878173828125, |
|
"loss": 12.271, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.012033696286380291, |
|
"rewards/margins": 0.005555520299822092, |
|
"rewards/rejected": 0.006478174589574337, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.2467532467532465e-07, |
|
"logits/chosen": -1.828608751296997, |
|
"logits/rejected": -1.805625319480896, |
|
"logps/chosen": -2893.784423828125, |
|
"logps/rejected": -2551.77294921875, |
|
"loss": 8.7411, |
|
"rewards/accuracies": 0.5600000023841858, |
|
"rewards/chosen": 0.02166888490319252, |
|
"rewards/margins": 0.007775471545755863, |
|
"rewards/rejected": 0.013893413357436657, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.896103896103896e-07, |
|
"logits/chosen": -1.7459495067596436, |
|
"logits/rejected": -1.6628999710083008, |
|
"logps/chosen": -3231.689697265625, |
|
"logps/rejected": -2554.42919921875, |
|
"loss": 9.758, |
|
"rewards/accuracies": 0.559999942779541, |
|
"rewards/chosen": 0.027519574388861656, |
|
"rewards/margins": 0.008895651437342167, |
|
"rewards/rejected": 0.018623923882842064, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.545454545454545e-07, |
|
"logits/chosen": -1.8072433471679688, |
|
"logits/rejected": -1.7838470935821533, |
|
"logps/chosen": -2829.386474609375, |
|
"logps/rejected": -2542.68701171875, |
|
"loss": 11.0017, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.024034958332777023, |
|
"rewards/margins": 0.006175906863063574, |
|
"rewards/rejected": 0.017859051004052162, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.99976474872689e-07, |
|
"logits/chosen": -1.7730411291122437, |
|
"logits/rejected": -1.7399647235870361, |
|
"logps/chosen": -2769.705322265625, |
|
"logps/rejected": -2476.75634765625, |
|
"loss": 15.623, |
|
"rewards/accuracies": 0.5400000214576721, |
|
"rewards/chosen": 0.008623984642326832, |
|
"rewards/margins": 0.008157819509506226, |
|
"rewards/rejected": 0.0004661638231482357, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.995583735427465e-07, |
|
"logits/chosen": -1.790204644203186, |
|
"logits/rejected": -1.7226215600967407, |
|
"logps/chosen": -2688.0732421875, |
|
"logps/rejected": -2436.649658203125, |
|
"loss": 11.9811, |
|
"rewards/accuracies": 0.6100000143051147, |
|
"rewards/chosen": 0.017978714779019356, |
|
"rewards/margins": 0.017238261178135872, |
|
"rewards/rejected": 0.0007404519128613174, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.986184978516146e-07, |
|
"logits/chosen": -1.7211675643920898, |
|
"logits/rejected": -1.6991230249404907, |
|
"logps/chosen": -2611.177001953125, |
|
"logps/rejected": -2212.4033203125, |
|
"loss": 16.8403, |
|
"rewards/accuracies": 0.5200001001358032, |
|
"rewards/chosen": 0.024822045117616653, |
|
"rewards/margins": 0.00336282467469573, |
|
"rewards/rejected": 0.021459218114614487, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_logits/chosen": -1.6729556322097778, |
|
"eval_logits/rejected": -1.6068017482757568, |
|
"eval_logps/chosen": -2806.55517578125, |
|
"eval_logps/rejected": -2491.901123046875, |
|
"eval_loss": 19.51178741455078, |
|
"eval_rewards/accuracies": 0.52734375, |
|
"eval_rewards/chosen": 0.025559017434716225, |
|
"eval_rewards/margins": 0.008243386633694172, |
|
"eval_rewards/rejected": 0.017315629869699478, |
|
"eval_runtime": 115.2508, |
|
"eval_samples_per_second": 17.353, |
|
"eval_steps_per_second": 0.278, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.971588128827782e-07, |
|
"logits/chosen": -1.7473026514053345, |
|
"logits/rejected": -1.6806236505508423, |
|
"logps/chosen": -3125.757080078125, |
|
"logps/rejected": -2645.337158203125, |
|
"loss": 26.9149, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.016931097954511642, |
|
"rewards/margins": 0.002864243695512414, |
|
"rewards/rejected": 0.014066850766539574, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.951823705321981e-07, |
|
"logits/chosen": -1.7069530487060547, |
|
"logits/rejected": -1.6579583883285522, |
|
"logps/chosen": -2828.78662109375, |
|
"logps/rejected": -2442.76416015625, |
|
"loss": 33.872, |
|
"rewards/accuracies": 0.5600000023841858, |
|
"rewards/chosen": 0.013961514458060265, |
|
"rewards/margins": 0.00896529946476221, |
|
"rewards/rejected": 0.004996216390281916, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.926933031274343e-07, |
|
"logits/chosen": -1.7224699258804321, |
|
"logits/rejected": -1.6934731006622314, |
|
"logps/chosen": -2923.9306640625, |
|
"logps/rejected": -2566.210693359375, |
|
"loss": 39.0757, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.038237668573856354, |
|
"rewards/margins": 0.006029448006302118, |
|
"rewards/rejected": 0.03220822289586067, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.896968147878145e-07, |
|
"logits/chosen": -1.7280409336090088, |
|
"logits/rejected": -1.7070726156234741, |
|
"logps/chosen": -2737.75927734375, |
|
"logps/rejected": -2486.45751953125, |
|
"loss": 18.5231, |
|
"rewards/accuracies": 0.6299999952316284, |
|
"rewards/chosen": 0.031596291810274124, |
|
"rewards/margins": 0.04216960817575455, |
|
"rewards/rejected": -0.010573318228125572, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.861991705437081e-07, |
|
"logits/chosen": -1.7859830856323242, |
|
"logits/rejected": -1.7191492319107056, |
|
"logps/chosen": -2743.43310546875, |
|
"logps/rejected": -2297.162109375, |
|
"loss": 20.7835, |
|
"rewards/accuracies": 0.5800000429153442, |
|
"rewards/chosen": 0.0336376316845417, |
|
"rewards/margins": 0.011832155287265778, |
|
"rewards/rejected": 0.021805476397275925, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.822076832376586e-07, |
|
"logits/chosen": -1.8132251501083374, |
|
"logits/rejected": -1.7665789127349854, |
|
"logps/chosen": -2841.165771484375, |
|
"logps/rejected": -2748.486572265625, |
|
"loss": 57.9401, |
|
"rewards/accuracies": 0.5099999904632568, |
|
"rewards/chosen": 0.006118610501289368, |
|
"rewards/margins": 0.0022155127953737974, |
|
"rewards/rejected": 0.0039030970074236393, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.777306982347594e-07, |
|
"logits/chosen": -1.6557657718658447, |
|
"logits/rejected": -1.5996118783950806, |
|
"logps/chosen": -3055.95361328125, |
|
"logps/rejected": -2603.83642578125, |
|
"loss": 23.1296, |
|
"rewards/accuracies": 0.6200000047683716, |
|
"rewards/chosen": 0.028251701965928078, |
|
"rewards/margins": 0.020935241132974625, |
|
"rewards/rejected": 0.007316464092582464, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.7277757597424075e-07, |
|
"logits/chosen": -1.8335905075073242, |
|
"logits/rejected": -1.7595329284667969, |
|
"logps/chosen": -2963.73779296875, |
|
"logps/rejected": -2540.163818359375, |
|
"loss": 40.5046, |
|
"rewards/accuracies": 0.5400000810623169, |
|
"rewards/chosen": 0.018591446802020073, |
|
"rewards/margins": -0.0024230503477156162, |
|
"rewards/rejected": 0.02101449854671955, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.6735867239874904e-07, |
|
"logits/chosen": -1.8637840747833252, |
|
"logits/rejected": -1.7640159130096436, |
|
"logps/chosen": -3237.434814453125, |
|
"logps/rejected": -2429.197998046875, |
|
"loss": 36.3042, |
|
"rewards/accuracies": 0.6200000047683716, |
|
"rewards/chosen": 0.04794805496931076, |
|
"rewards/margins": 0.019117821007966995, |
|
"rewards/rejected": 0.028830235823988914, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.6148531730223733e-07, |
|
"logits/chosen": -1.6909841299057007, |
|
"logits/rejected": -1.6915366649627686, |
|
"logps/chosen": -2649.89404296875, |
|
"logps/rejected": -2436.87353515625, |
|
"loss": 28.1241, |
|
"rewards/accuracies": 0.5300000309944153, |
|
"rewards/chosen": 0.007661645300686359, |
|
"rewards/margins": 0.0055509163066744804, |
|
"rewards/rejected": 0.0021107294596731663, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_logits/chosen": -1.681164264678955, |
|
"eval_logits/rejected": -1.618328332901001, |
|
"eval_logps/chosen": -2808.258056640625, |
|
"eval_logps/rejected": -2494.01953125, |
|
"eval_loss": 32.517486572265625, |
|
"eval_rewards/accuracies": 0.5234375, |
|
"eval_rewards/chosen": 0.008527392521500587, |
|
"eval_rewards/margins": 0.012391308322548866, |
|
"eval_rewards/rejected": -0.0038639232516288757, |
|
"eval_runtime": 113.682, |
|
"eval_samples_per_second": 17.593, |
|
"eval_steps_per_second": 0.281, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.5516979064173524e-07, |
|
"logits/chosen": -1.749903917312622, |
|
"logits/rejected": -1.7615283727645874, |
|
"logps/chosen": -2285.7451171875, |
|
"logps/rejected": -2269.229736328125, |
|
"loss": 25.9535, |
|
"rewards/accuracies": 0.6100000143051147, |
|
"rewards/chosen": 0.011981850489974022, |
|
"rewards/margins": 0.014764687046408653, |
|
"rewards/rejected": -0.0027828349266201258, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.484252968625277e-07, |
|
"logits/chosen": -1.716509222984314, |
|
"logits/rejected": -1.6396989822387695, |
|
"logps/chosen": -2435.95556640625, |
|
"logps/rejected": -1922.770751953125, |
|
"loss": 28.3739, |
|
"rewards/accuracies": 0.6200000047683716, |
|
"rewards/chosen": 0.004359879065304995, |
|
"rewards/margins": 0.007711753249168396, |
|
"rewards/rejected": -0.0033518739510327578, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.4126593729042193e-07, |
|
"logits/chosen": -1.799469232559204, |
|
"logits/rejected": -1.757004737854004, |
|
"logps/chosen": -3254.6396484375, |
|
"logps/rejected": -2515.59619140625, |
|
"loss": 39.4707, |
|
"rewards/accuracies": 0.5900000333786011, |
|
"rewards/chosen": 0.03561704605817795, |
|
"rewards/margins": 0.019830647855997086, |
|
"rewards/rejected": 0.015786398202180862, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.3370668064882397e-07, |
|
"logits/chosen": -1.7325947284698486, |
|
"logits/rejected": -1.7474550008773804, |
|
"logps/chosen": -2579.47412109375, |
|
"logps/rejected": -2328.500732421875, |
|
"loss": 44.2727, |
|
"rewards/accuracies": 0.5100000500679016, |
|
"rewards/chosen": 0.04269097000360489, |
|
"rewards/margins": 0.02060030959546566, |
|
"rewards/rejected": 0.02209065482020378, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.2576333176226944e-07, |
|
"logits/chosen": -1.7366650104522705, |
|
"logits/rejected": -1.706789255142212, |
|
"logps/chosen": -2479.5576171875, |
|
"logps/rejected": -2277.726318359375, |
|
"loss": 29.5758, |
|
"rewards/accuracies": 0.5300000309944153, |
|
"rewards/chosen": 0.1058274507522583, |
|
"rewards/margins": 0.013660475611686707, |
|
"rewards/rejected": 0.0921669602394104, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.17452498511841e-07, |
|
"logits/chosen": -1.7807962894439697, |
|
"logits/rejected": -1.7134149074554443, |
|
"logps/chosen": -2989.12841796875, |
|
"logps/rejected": -2354.25830078125, |
|
"loss": 38.7316, |
|
"rewards/accuracies": 0.5200000405311584, |
|
"rewards/chosen": 0.023859605193138123, |
|
"rewards/margins": 0.005521018523722887, |
|
"rewards/rejected": 0.018338587135076523, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.087915571115629e-07, |
|
"logits/chosen": -1.8165556192398071, |
|
"logits/rejected": -1.7687098979949951, |
|
"logps/chosen": -2833.55859375, |
|
"logps/rejected": -2183.32470703125, |
|
"loss": 330.4642, |
|
"rewards/accuracies": 0.5699999928474426, |
|
"rewards/chosen": 0.031318746507167816, |
|
"rewards/margins": 0.029996121302247047, |
|
"rewards/rejected": 0.0013226259034126997, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.997986157783715e-07, |
|
"logits/chosen": -1.6980018615722656, |
|
"logits/rejected": -1.589050531387329, |
|
"logps/chosen": -3510.792236328125, |
|
"logps/rejected": -2689.208984375, |
|
"loss": 58.1646, |
|
"rewards/accuracies": 0.5200000405311584, |
|
"rewards/chosen": 0.014776378870010376, |
|
"rewards/margins": 0.011294273659586906, |
|
"rewards/rejected": 0.0034821047447621822, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.9049247687162155e-07, |
|
"logits/chosen": -1.7791646718978882, |
|
"logits/rejected": -1.7399044036865234, |
|
"logps/chosen": -2478.590576171875, |
|
"logps/rejected": -2269.01416015625, |
|
"loss": 31.6725, |
|
"rewards/accuracies": 0.5699999928474426, |
|
"rewards/chosen": 0.04884537309408188, |
|
"rewards/margins": 0.0339895561337471, |
|
"rewards/rejected": 0.014855814166367054, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.8089259758128543e-07, |
|
"logits/chosen": -1.670789361000061, |
|
"logits/rejected": -1.6030629873275757, |
|
"logps/chosen": -2726.465576171875, |
|
"logps/rejected": -2119.26123046875, |
|
"loss": 84.7591, |
|
"rewards/accuracies": 0.5699999928474426, |
|
"rewards/chosen": 0.021672677248716354, |
|
"rewards/margins": -0.010785548016428947, |
|
"rewards/rejected": 0.03245822712779045, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_logits/chosen": -1.660080909729004, |
|
"eval_logits/rejected": -1.596778154373169, |
|
"eval_logps/chosen": -2806.140625, |
|
"eval_logps/rejected": -2492.270263671875, |
|
"eval_loss": 47.80431365966797, |
|
"eval_rewards/accuracies": 0.5390625, |
|
"eval_rewards/chosen": 0.029702020809054375, |
|
"eval_rewards/margins": 0.01607733778655529, |
|
"eval_rewards/rejected": 0.013624681159853935, |
|
"eval_runtime": 116.3019, |
|
"eval_samples_per_second": 17.197, |
|
"eval_steps_per_second": 0.275, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.710190492470386e-07, |
|
"logits/chosen": -1.6620228290557861, |
|
"logits/rejected": -1.7311099767684937, |
|
"logps/chosen": -2315.977294921875, |
|
"logps/rejected": -2199.08251953125, |
|
"loss": 43.6013, |
|
"rewards/accuracies": 0.5400000214576721, |
|
"rewards/chosen": 0.032384876161813736, |
|
"rewards/margins": 0.008921505883336067, |
|
"rewards/rejected": 0.02346337027847767, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.6089247539328616e-07, |
|
"logits/chosen": -1.7675050497055054, |
|
"logits/rejected": -1.7156997919082642, |
|
"logps/chosen": -2859.810791015625, |
|
"logps/rejected": -2569.75537109375, |
|
"loss": 38.8904, |
|
"rewards/accuracies": 0.559999942779541, |
|
"rewards/chosen": 0.020630866289138794, |
|
"rewards/margins": 0.0018306337296962738, |
|
"rewards/rejected": 0.01880022883415222, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.5053404856787166e-07, |
|
"logits/chosen": -1.6446609497070312, |
|
"logits/rejected": -1.5918724536895752, |
|
"logps/chosen": -3104.72802734375, |
|
"logps/rejected": -2430.239013671875, |
|
"loss": 84.9753, |
|
"rewards/accuracies": 0.47999995946884155, |
|
"rewards/chosen": 0.053660690784454346, |
|
"rewards/margins": -0.005831834394484758, |
|
"rewards/rejected": 0.05949252098798752, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.399654260747078e-07, |
|
"logits/chosen": -1.699196219444275, |
|
"logits/rejected": -1.7045748233795166, |
|
"logps/chosen": -2584.699462890625, |
|
"logps/rejected": -2263.678466796875, |
|
"loss": 38.1532, |
|
"rewards/accuracies": 0.5300000309944153, |
|
"rewards/chosen": 0.02709970250725746, |
|
"rewards/margins": 0.01412280835211277, |
|
"rewards/rejected": 0.012976895086467266, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.2920870469288373e-07, |
|
"logits/chosen": -1.7267532348632812, |
|
"logits/rejected": -1.6659395694732666, |
|
"logps/chosen": -2935.341796875, |
|
"logps/rejected": -2503.583984375, |
|
"loss": 47.1836, |
|
"rewards/accuracies": 0.5199999809265137, |
|
"rewards/chosen": 0.031215447932481766, |
|
"rewards/margins": 0.022346725687384605, |
|
"rewards/rejected": 0.008868719451129436, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.182863744769218e-07, |
|
"logits/chosen": -1.7288787364959717, |
|
"logits/rejected": -1.6928844451904297, |
|
"logps/chosen": -2811.489501953125, |
|
"logps/rejected": -2596.68310546875, |
|
"loss": 36.8176, |
|
"rewards/accuracies": 0.5099999904632568, |
|
"rewards/chosen": 0.1375296413898468, |
|
"rewards/margins": 0.0820910781621933, |
|
"rewards/rejected": 0.05543852597475052, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.072212717347776e-07, |
|
"logits/chosen": -1.7680120468139648, |
|
"logits/rejected": -1.6781940460205078, |
|
"logps/chosen": -3101.98583984375, |
|
"logps/rejected": -2426.4716796875, |
|
"loss": 36.7837, |
|
"rewards/accuracies": 0.5199999809265137, |
|
"rewards/chosen": 0.022122934460639954, |
|
"rewards/margins": 0.011781491339206696, |
|
"rewards/rejected": 0.010341441258788109, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.9603653128189665e-07, |
|
"logits/chosen": -1.6812299489974976, |
|
"logits/rejected": -1.7215496301651, |
|
"logps/chosen": -2823.8291015625, |
|
"logps/rejected": -2762.53076171875, |
|
"loss": 42.732, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.028385426849126816, |
|
"rewards/margins": -0.006515379063785076, |
|
"rewards/rejected": 0.03490080684423447, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.8475553807115387e-07, |
|
"logits/chosen": -1.8070951700210571, |
|
"logits/rejected": -1.7426990270614624, |
|
"logps/chosen": -2697.833251953125, |
|
"logps/rejected": -2263.9990234375, |
|
"loss": 55.8683, |
|
"rewards/accuracies": 0.5099999904632568, |
|
"rewards/chosen": 0.012206131592392921, |
|
"rewards/margins": 0.011461116373538971, |
|
"rewards/rejected": 0.0007450145785696805, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.7340187829980883e-07, |
|
"logits/chosen": -1.8249183893203735, |
|
"logits/rejected": -1.7130759954452515, |
|
"logps/chosen": -2940.11181640625, |
|
"logps/rejected": -2463.068359375, |
|
"loss": 40.7835, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.0059137181378901005, |
|
"rewards/margins": 0.01795141212642193, |
|
"rewards/rejected": -0.012037692591547966, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_logits/chosen": -1.6917269229888916, |
|
"eval_logits/rejected": -1.628839373588562, |
|
"eval_logps/chosen": -2807.42626953125, |
|
"eval_logps/rejected": -2493.92041015625, |
|
"eval_loss": 30.672218322753906, |
|
"eval_rewards/accuracies": 0.5546875, |
|
"eval_rewards/chosen": 0.016848012804985046, |
|
"eval_rewards/margins": 0.019721925258636475, |
|
"eval_rewards/rejected": -0.0028739143162965775, |
|
"eval_runtime": 110.0303, |
|
"eval_samples_per_second": 18.177, |
|
"eval_steps_per_second": 0.291, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.6199929009569996e-07, |
|
"logits/chosen": -1.7034717798233032, |
|
"logits/rejected": -1.707564353942871, |
|
"logps/chosen": -2599.38330078125, |
|
"logps/rejected": -2273.864990234375, |
|
"loss": 43.9981, |
|
"rewards/accuracies": 0.5600000023841858, |
|
"rewards/chosen": 0.02160579524934292, |
|
"rewards/margins": 0.0038177832029759884, |
|
"rewards/rejected": 0.017788011580705643, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.5057161388578505e-07, |
|
"logits/chosen": -1.7964134216308594, |
|
"logits/rejected": -1.730661392211914, |
|
"logps/chosen": -3038.08740234375, |
|
"logps/rejected": -2405.333740234375, |
|
"loss": 31.4477, |
|
"rewards/accuracies": 0.5600000619888306, |
|
"rewards/chosen": 0.05013390630483627, |
|
"rewards/margins": 0.029845798388123512, |
|
"rewards/rejected": 0.02028810977935791, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.391427425507943e-07, |
|
"logits/chosen": -1.6959331035614014, |
|
"logits/rejected": -1.6784296035766602, |
|
"logps/chosen": -2696.2236328125, |
|
"logps/rejected": -2173.48583984375, |
|
"loss": 32.2174, |
|
"rewards/accuracies": 0.5600000619888306, |
|
"rewards/chosen": 0.01727980561554432, |
|
"rewards/margins": 0.013602805323898792, |
|
"rewards/rejected": 0.0036770000588148832, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.2773657147021465e-07, |
|
"logits/chosen": -1.8469693660736084, |
|
"logits/rejected": -1.7459551095962524, |
|
"logps/chosen": -3117.762451171875, |
|
"logps/rejected": -2390.564208984375, |
|
"loss": 37.6526, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.01079155970364809, |
|
"rewards/margins": 0.011436818167567253, |
|
"rewards/rejected": -0.00064525764901191, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.1637694856204885e-07, |
|
"logits/chosen": -1.751587152481079, |
|
"logits/rejected": -1.6395552158355713, |
|
"logps/chosen": -2887.770751953125, |
|
"logps/rejected": -2129.771728515625, |
|
"loss": 53.6906, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.004514098167419434, |
|
"rewards/margins": 0.00042482782737351954, |
|
"rewards/rejected": 0.004089272115379572, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.0508762442180743e-07, |
|
"logits/chosen": -1.8443762063980103, |
|
"logits/rejected": -1.792295217514038, |
|
"logps/chosen": -2964.06494140625, |
|
"logps/rejected": -2577.50244140625, |
|
"loss": 62.4137, |
|
"rewards/accuracies": 0.5600000023841858, |
|
"rewards/chosen": 0.04290894791483879, |
|
"rewards/margins": 0.01085699163377285, |
|
"rewards/rejected": 0.03205195814371109, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.93892202664981e-07, |
|
"logits/chosen": -1.6403262615203857, |
|
"logits/rejected": -1.712969183921814, |
|
"logps/chosen": -2689.706787109375, |
|
"logps/rejected": -2513.2998046875, |
|
"loss": 31.7885, |
|
"rewards/accuracies": 0.5100000500679016, |
|
"rewards/chosen": 0.01223880797624588, |
|
"rewards/margins": 0.011182873509824276, |
|
"rewards/rejected": 0.0010559323709458113, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.8281409057681686e-07, |
|
"logits/chosen": -1.651449203491211, |
|
"logits/rejected": -1.5920675992965698, |
|
"logps/chosen": -3211.50341796875, |
|
"logps/rejected": -2753.0322265625, |
|
"loss": 103.2519, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.0340498685836792, |
|
"rewards/margins": 0.005449384916573763, |
|
"rewards/rejected": 0.028600484132766724, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.7187645017258195e-07, |
|
"logits/chosen": -1.823428750038147, |
|
"logits/rejected": -1.7740917205810547, |
|
"logps/chosen": -2745.991455078125, |
|
"logps/rejected": -2407.27978515625, |
|
"loss": 48.2582, |
|
"rewards/accuracies": 0.5300000309944153, |
|
"rewards/chosen": 0.03057839907705784, |
|
"rewards/margins": 0.003420495195314288, |
|
"rewards/rejected": 0.027157902717590332, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.6110214977063343e-07, |
|
"logits/chosen": -1.7967636585235596, |
|
"logits/rejected": -1.7410199642181396, |
|
"logps/chosen": -2905.251708984375, |
|
"logps/rejected": -2435.401611328125, |
|
"loss": 36.2204, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.013903191313147545, |
|
"rewards/margins": 0.00013892585411667824, |
|
"rewards/rejected": 0.013764267787337303, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_logits/chosen": -1.6842743158340454, |
|
"eval_logits/rejected": -1.6236169338226318, |
|
"eval_logps/chosen": -2806.076171875, |
|
"eval_logps/rejected": -2491.544677734375, |
|
"eval_loss": 31.220157623291016, |
|
"eval_rewards/accuracies": 0.53515625, |
|
"eval_rewards/chosen": 0.030346479266881943, |
|
"eval_rewards/margins": 0.009465347044169903, |
|
"eval_rewards/rejected": 0.020881133154034615, |
|
"eval_runtime": 112.3374, |
|
"eval_samples_per_second": 17.804, |
|
"eval_steps_per_second": 0.285, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.5051371617954777e-07, |
|
"logits/chosen": -1.6810442209243774, |
|
"logits/rejected": -1.6596931219100952, |
|
"logps/chosen": -2559.396728515625, |
|
"logps/rejected": -2228.120361328125, |
|
"loss": 44.2046, |
|
"rewards/accuracies": 0.5399999618530273, |
|
"rewards/chosen": 0.016220757737755775, |
|
"rewards/margins": 0.007862111553549767, |
|
"rewards/rejected": 0.00835864432156086, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.4013328759927622e-07, |
|
"logits/chosen": -1.6315361261367798, |
|
"logits/rejected": -1.6191142797470093, |
|
"logps/chosen": -2893.280029296875, |
|
"logps/rejected": -2805.344970703125, |
|
"loss": 31.2508, |
|
"rewards/accuracies": 0.6299999952316284, |
|
"rewards/chosen": 0.02805119752883911, |
|
"rewards/margins": 0.01223050244152546, |
|
"rewards/rejected": 0.0158206969499588, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.2998256733479896e-07, |
|
"logits/chosen": -1.810739278793335, |
|
"logits/rejected": -1.8173195123672485, |
|
"logps/chosen": -2332.383056640625, |
|
"logps/rejected": -1922.5443115234375, |
|
"loss": 226.0483, |
|
"rewards/accuracies": 0.5600000023841858, |
|
"rewards/chosen": 0.020228227600455284, |
|
"rewards/margins": 0.009395391680300236, |
|
"rewards/rejected": 0.010832836851477623, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.200827784190537e-07, |
|
"logits/chosen": -1.6795597076416016, |
|
"logits/rejected": -1.6883628368377686, |
|
"logps/chosen": -3027.91796875, |
|
"logps/rejected": -2619.313232421875, |
|
"loss": 29.3654, |
|
"rewards/accuracies": 0.5600000619888306, |
|
"rewards/chosen": 0.01968817412853241, |
|
"rewards/margins": 0.008831174112856388, |
|
"rewards/rejected": 0.010857000946998596, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.1045461924001323e-07, |
|
"logits/chosen": -1.791738748550415, |
|
"logits/rejected": -1.8031442165374756, |
|
"logps/chosen": -2852.6904296875, |
|
"logps/rejected": -2462.853271484375, |
|
"loss": 45.3966, |
|
"rewards/accuracies": 0.46000003814697266, |
|
"rewards/chosen": 0.010838394984602928, |
|
"rewards/margins": 0.003685446921736002, |
|
"rewards/rejected": 0.007152946200221777, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.0111822026468514e-07, |
|
"logits/chosen": -1.7872514724731445, |
|
"logits/rejected": -1.658860445022583, |
|
"logps/chosen": -2903.530029296875, |
|
"logps/rejected": -2319.64599609375, |
|
"loss": 67.4473, |
|
"rewards/accuracies": 0.5700000524520874, |
|
"rewards/chosen": 0.009608490392565727, |
|
"rewards/margins": 0.004570655524730682, |
|
"rewards/rejected": 0.0050378344021737576, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 9.209310195051581e-08, |
|
"logits/chosen": -1.8252109289169312, |
|
"logits/rejected": -1.6855742931365967, |
|
"logps/chosen": -2538.860107421875, |
|
"logps/rejected": -1955.6048583984375, |
|
"loss": 63.0174, |
|
"rewards/accuracies": 0.6299999952316284, |
|
"rewards/chosen": 0.04829864576458931, |
|
"rewards/margins": 0.022980675101280212, |
|
"rewards/rejected": 0.025317972525954247, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 8.339813393219713e-08, |
|
"logits/chosen": -1.739793062210083, |
|
"logits/rejected": -1.641005516052246, |
|
"logps/chosen": -2791.561767578125, |
|
"logps/rejected": -2475.72998046875, |
|
"loss": 59.8369, |
|
"rewards/accuracies": 0.5699999928474426, |
|
"rewards/chosen": 0.05231914669275284, |
|
"rewards/margins": 0.021858692169189453, |
|
"rewards/rejected": 0.030460450798273087, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.505149556920698e-08, |
|
"logits/chosen": -1.8431494235992432, |
|
"logits/rejected": -1.7774893045425415, |
|
"logps/chosen": -2542.13427734375, |
|
"logps/rejected": -2193.825927734375, |
|
"loss": 29.3999, |
|
"rewards/accuracies": 0.5800000429153442, |
|
"rewards/chosen": 0.04752471297979355, |
|
"rewards/margins": 0.017444033175706863, |
|
"rewards/rejected": 0.030080681666731834, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 6.707063793657064e-08, |
|
"logits/chosen": -1.7773969173431396, |
|
"logits/rejected": -1.6891686916351318, |
|
"logps/chosen": -2942.21240234375, |
|
"logps/rejected": -2429.352294921875, |
|
"loss": 99.7738, |
|
"rewards/accuracies": 0.6200000047683716, |
|
"rewards/chosen": 0.03306427597999573, |
|
"rewards/margins": 0.01405587512999773, |
|
"rewards/rejected": 0.019008399918675423, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_logits/chosen": -1.6827195882797241, |
|
"eval_logits/rejected": -1.6222153902053833, |
|
"eval_logps/chosen": -2804.348388671875, |
|
"eval_logps/rejected": -2489.908935546875, |
|
"eval_loss": 33.74028778076172, |
|
"eval_rewards/accuracies": 0.5390625, |
|
"eval_rewards/chosen": 0.04762275516986847, |
|
"eval_rewards/margins": 0.010385587811470032, |
|
"eval_rewards/rejected": 0.037237171083688736, |
|
"eval_runtime": 106.3716, |
|
"eval_samples_per_second": 18.802, |
|
"eval_steps_per_second": 0.301, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5.947224733831363e-08, |
|
"logits/chosen": -1.759399175643921, |
|
"logits/rejected": -1.7431520223617554, |
|
"logps/chosen": -2756.701416015625, |
|
"logps/rejected": -2470.905029296875, |
|
"loss": 51.5387, |
|
"rewards/accuracies": 0.5199999809265137, |
|
"rewards/chosen": 0.011415710672736168, |
|
"rewards/margins": 0.009652274660766125, |
|
"rewards/rejected": 0.0017634350806474686, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.227221041988955e-08, |
|
"logits/chosen": -1.7857062816619873, |
|
"logits/rejected": -1.725630760192871, |
|
"logps/chosen": -2520.410400390625, |
|
"logps/rejected": -2319.78564453125, |
|
"loss": 28.3912, |
|
"rewards/accuracies": 0.5900000333786011, |
|
"rewards/chosen": 0.01811736635863781, |
|
"rewards/margins": 0.01297797542065382, |
|
"rewards/rejected": 0.005139390472322702, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.548558095252758e-08, |
|
"logits/chosen": -1.6374757289886475, |
|
"logits/rejected": -1.673044204711914, |
|
"logps/chosen": -2845.2119140625, |
|
"logps/rejected": -2698.294677734375, |
|
"loss": 42.0619, |
|
"rewards/accuracies": 0.48000001907348633, |
|
"rewards/chosen": 0.030720695853233337, |
|
"rewards/margins": 0.020541973412036896, |
|
"rewards/rejected": 0.010178723372519016, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.9126548358945635e-08, |
|
"logits/chosen": -1.7063062191009521, |
|
"logits/rejected": -1.6988853216171265, |
|
"logps/chosen": -3136.520263671875, |
|
"logps/rejected": -2731.25927734375, |
|
"loss": 46.0608, |
|
"rewards/accuracies": 0.5600000619888306, |
|
"rewards/chosen": 0.029350021854043007, |
|
"rewards/margins": 0.010425332933664322, |
|
"rewards/rejected": 0.018924688920378685, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.3208408046234896e-08, |
|
"logits/chosen": -1.8164535760879517, |
|
"logits/rejected": -1.7656440734863281, |
|
"logps/chosen": -2538.8046875, |
|
"logps/rejected": -2061.81591796875, |
|
"loss": 40.0838, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.01678399369120598, |
|
"rewards/margins": 0.009666666388511658, |
|
"rewards/rejected": 0.007117328234016895, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.774353360794493e-08, |
|
"logits/chosen": -1.7155154943466187, |
|
"logits/rejected": -1.7442939281463623, |
|
"logps/chosen": -2761.740966796875, |
|
"logps/rejected": -2534.80419921875, |
|
"loss": 36.8374, |
|
"rewards/accuracies": 0.64000004529953, |
|
"rewards/chosen": 0.03588343411684036, |
|
"rewards/margins": 0.037469957023859024, |
|
"rewards/rejected": -0.0015865217428654432, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.2743350953487422e-08, |
|
"logits/chosen": -1.6992709636688232, |
|
"logits/rejected": -1.7416222095489502, |
|
"logps/chosen": -2850.97705078125, |
|
"logps/rejected": -2569.76611328125, |
|
"loss": 86.4259, |
|
"rewards/accuracies": 0.5300000905990601, |
|
"rewards/chosen": 0.018378589302301407, |
|
"rewards/margins": 0.004103804472833872, |
|
"rewards/rejected": 0.014274786226451397, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.8218314418949387e-08, |
|
"logits/chosen": -1.718764305114746, |
|
"logits/rejected": -1.6741740703582764, |
|
"logps/chosen": -2353.093017578125, |
|
"logps/rejected": -2190.77001953125, |
|
"loss": 46.1473, |
|
"rewards/accuracies": 0.5699999928474426, |
|
"rewards/chosen": 0.0031641994137316942, |
|
"rewards/margins": 0.0004996396601200104, |
|
"rewards/rejected": 0.002664559753611684, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.4177884909263277e-08, |
|
"logits/chosen": -1.6867101192474365, |
|
"logits/rejected": -1.652515172958374, |
|
"logps/chosen": -2937.97412109375, |
|
"logps/rejected": -2552.07177734375, |
|
"loss": 37.0029, |
|
"rewards/accuracies": 0.5199999809265137, |
|
"rewards/chosen": 0.007288885302841663, |
|
"rewards/margins": -0.0001541988895041868, |
|
"rewards/rejected": 0.007443083915859461, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.063051011743335e-08, |
|
"logits/chosen": -1.7554800510406494, |
|
"logits/rejected": -1.7419729232788086, |
|
"logps/chosen": -2755.109619140625, |
|
"logps/rejected": -2368.246337890625, |
|
"loss": 41.8506, |
|
"rewards/accuracies": 0.46000003814697266, |
|
"rewards/chosen": 0.006798497401177883, |
|
"rewards/margins": 0.010020612739026546, |
|
"rewards/rejected": -0.0032221146393567324, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"eval_logits/chosen": -1.682308554649353, |
|
"eval_logits/rejected": -1.6210675239562988, |
|
"eval_logps/chosen": -2806.1005859375, |
|
"eval_logps/rejected": -2491.68505859375, |
|
"eval_loss": 32.91334915161133, |
|
"eval_rewards/accuracies": 0.5546875, |
|
"eval_rewards/chosen": 0.030103469267487526, |
|
"eval_rewards/margins": 0.01062812004238367, |
|
"eval_rewards/rejected": 0.01947534643113613, |
|
"eval_runtime": 110.9725, |
|
"eval_samples_per_second": 18.022, |
|
"eval_steps_per_second": 0.288, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 7.58360686217671e-09, |
|
"logits/chosen": -1.7843902111053467, |
|
"logits/rejected": -1.6857761144638062, |
|
"logps/chosen": -2821.310546875, |
|
"logps/rejected": -2445.586669921875, |
|
"loss": 44.0616, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.025277357548475266, |
|
"rewards/margins": 0.014521745964884758, |
|
"rewards/rejected": 0.010755611583590508, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 5.043545580906694e-09, |
|
"logits/chosen": -1.7102206945419312, |
|
"logits/rejected": -1.6009165048599243, |
|
"logps/chosen": -2682.336669921875, |
|
"logps/rejected": -2234.36279296875, |
|
"loss": 44.9558, |
|
"rewards/accuracies": 0.6299999952316284, |
|
"rewards/chosen": 0.018401915207505226, |
|
"rewards/margins": 0.030992329120635986, |
|
"rewards/rejected": -0.012590417638421059, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.015637010480576e-09, |
|
"logits/chosen": -1.7701480388641357, |
|
"logits/rejected": -1.7436597347259521, |
|
"logps/chosen": -3042.88330078125, |
|
"logps/rejected": -2501.55615234375, |
|
"loss": 35.3819, |
|
"rewards/accuracies": 0.5199999809265137, |
|
"rewards/chosen": 0.012798592448234558, |
|
"rewards/margins": -0.0020437492057681084, |
|
"rewards/rejected": 0.014842341654002666, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.5041210835596285e-09, |
|
"logits/chosen": -1.703537940979004, |
|
"logits/rejected": -1.6897165775299072, |
|
"logps/chosen": -2817.1484375, |
|
"logps/rejected": -2390.23095703125, |
|
"loss": 55.7217, |
|
"rewards/accuracies": 0.5800000429153442, |
|
"rewards/chosen": 0.022753870114684105, |
|
"rewards/margins": 0.017634030431509018, |
|
"rewards/rejected": 0.005119838751852512, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 5.121580637968137e-10, |
|
"logits/chosen": -1.7322509288787842, |
|
"logits/rejected": -1.6345192193984985, |
|
"logps/chosen": -2836.63623046875, |
|
"logps/rejected": -2363.93505859375, |
|
"loss": 67.2692, |
|
"rewards/accuracies": 0.6300000548362732, |
|
"rewards/chosen": 0.02889620140194893, |
|
"rewards/margins": 0.014323192648589611, |
|
"rewards/rejected": 0.014573007822036743, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 4.1821938386477075e-11, |
|
"logits/chosen": -1.7962977886199951, |
|
"logits/rejected": -1.7127879858016968, |
|
"logps/chosen": -2801.806640625, |
|
"logps/rejected": -2341.844970703125, |
|
"loss": 46.0674, |
|
"rewards/accuracies": 0.5700000524520874, |
|
"rewards/chosen": 0.009056088514626026, |
|
"rewards/margins": 0.013271180912852287, |
|
"rewards/rejected": -0.004215092398226261, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 764, |
|
"total_flos": 0.0, |
|
"train_loss": 46.613421885577296, |
|
"train_runtime": 4597.6924, |
|
"train_samples_per_second": 13.297, |
|
"train_steps_per_second": 0.166 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 764, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 5, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|