|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.9843342036553526, |
|
"eval_steps": 100, |
|
"global_step": 190, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.6315789473684213e-07, |
|
"logits/chosen": 0.8826487064361572, |
|
"logits/rejected": 0.921362042427063, |
|
"logps/chosen": -36.58121871948242, |
|
"logps/rejected": -54.902320861816406, |
|
"loss": 0.01, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 2.631578947368421e-06, |
|
"logits/chosen": 0.8915393352508545, |
|
"logits/rejected": 0.8742258548736572, |
|
"logps/chosen": -87.77196502685547, |
|
"logps/rejected": -96.38507843017578, |
|
"loss": 0.0101, |
|
"rewards/accuracies": 0.2569444477558136, |
|
"rewards/chosen": 0.0003006549668498337, |
|
"rewards/margins": 0.0004423653008416295, |
|
"rewards/rejected": -0.00014171031943988055, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.999578104083307e-06, |
|
"logits/chosen": 0.7802013158798218, |
|
"logits/rejected": 0.8469624519348145, |
|
"logps/chosen": -91.75413513183594, |
|
"logps/rejected": -85.1173095703125, |
|
"loss": 0.01, |
|
"rewards/accuracies": 0.24375000596046448, |
|
"rewards/chosen": 3.31846640619915e-05, |
|
"rewards/margins": -0.00015664812235627323, |
|
"rewards/rejected": 0.00018983279005624354, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.949122667718935e-06, |
|
"logits/chosen": 0.8652933835983276, |
|
"logits/rejected": 0.848902702331543, |
|
"logps/chosen": -85.29698944091797, |
|
"logps/rejected": -78.0544204711914, |
|
"loss": 0.0099, |
|
"rewards/accuracies": 0.28125, |
|
"rewards/chosen": 0.0002226830692961812, |
|
"rewards/margins": 0.00038711068918928504, |
|
"rewards/rejected": -0.00016442763444501907, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.8162351680370046e-06, |
|
"logits/chosen": 0.7649837136268616, |
|
"logits/rejected": 0.841802716255188, |
|
"logps/chosen": -122.76881408691406, |
|
"logps/rejected": -108.8086166381836, |
|
"loss": 0.01, |
|
"rewards/accuracies": 0.26875001192092896, |
|
"rewards/chosen": -0.000362707010935992, |
|
"rewards/margins": -7.97106167738093e-06, |
|
"rewards/rejected": -0.0003547359665390104, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.605388304968915e-06, |
|
"logits/chosen": 0.8386613130569458, |
|
"logits/rejected": 0.8677732348442078, |
|
"logps/chosen": -65.77490997314453, |
|
"logps/rejected": -71.66779327392578, |
|
"loss": 0.01, |
|
"rewards/accuracies": 0.21250000596046448, |
|
"rewards/chosen": -0.00019812444224953651, |
|
"rewards/margins": -0.00015129637904465199, |
|
"rewards/rejected": -4.682801591116004e-05, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.323678718546552e-06, |
|
"logits/chosen": 0.8152744174003601, |
|
"logits/rejected": 0.8866288065910339, |
|
"logps/chosen": -117.6309814453125, |
|
"logps/rejected": -110.9274673461914, |
|
"loss": 0.0101, |
|
"rewards/accuracies": 0.26875001192092896, |
|
"rewards/chosen": 2.0224386389600113e-05, |
|
"rewards/margins": -1.7978531104745343e-05, |
|
"rewards/rejected": 3.820297933998518e-05, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 3.980588131662451e-06, |
|
"logits/chosen": 0.8049997091293335, |
|
"logits/rejected": 0.8617580533027649, |
|
"logps/chosen": -82.61628723144531, |
|
"logps/rejected": -83.92156982421875, |
|
"loss": 0.01, |
|
"rewards/accuracies": 0.3187499940395355, |
|
"rewards/chosen": 0.00046920054592192173, |
|
"rewards/margins": 0.000604915083386004, |
|
"rewards/rejected": -0.00013571445015259087, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.5876642162051833e-06, |
|
"logits/chosen": 0.7660447955131531, |
|
"logits/rejected": 0.8296969532966614, |
|
"logps/chosen": -105.82564544677734, |
|
"logps/rejected": -104.93913269042969, |
|
"loss": 0.01, |
|
"rewards/accuracies": 0.23749999701976776, |
|
"rewards/chosen": -0.0001158899613074027, |
|
"rewards/margins": -0.00039075990207493305, |
|
"rewards/rejected": 0.0002748699625954032, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.1581319239114983e-06, |
|
"logits/chosen": 0.8339036107063293, |
|
"logits/rejected": 0.8548374176025391, |
|
"logps/chosen": -77.63902282714844, |
|
"logps/rejected": -90.03132629394531, |
|
"loss": 0.0099, |
|
"rewards/accuracies": 0.36250001192092896, |
|
"rewards/chosen": 0.0005246674409136176, |
|
"rewards/margins": 0.0012112573022022843, |
|
"rewards/rejected": -0.0006865898030810058, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 2.7064483636808314e-06, |
|
"logits/chosen": 0.7685378789901733, |
|
"logits/rejected": 0.8903138041496277, |
|
"logps/chosen": -91.08631896972656, |
|
"logps/rejected": -101.58308410644531, |
|
"loss": 0.01, |
|
"rewards/accuracies": 0.29374998807907104, |
|
"rewards/chosen": -0.00034642056562006474, |
|
"rewards/margins": 0.00024903417215682566, |
|
"rewards/rejected": -0.0005954547086730599, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"eval_logits/chosen": 0.8011398911476135, |
|
"eval_logits/rejected": 0.8187842965126038, |
|
"eval_logps/chosen": -91.76709747314453, |
|
"eval_logps/rejected": -94.26233673095703, |
|
"eval_loss": 0.009981811977922916, |
|
"eval_rewards/accuracies": 0.25, |
|
"eval_rewards/chosen": -0.00039719167398288846, |
|
"eval_rewards/margins": 0.00025612558238208294, |
|
"eval_rewards/rejected": -0.0006533172563649714, |
|
"eval_runtime": 274.2663, |
|
"eval_samples_per_second": 7.292, |
|
"eval_steps_per_second": 0.456, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 2.2478162071993296e-06, |
|
"logits/chosen": 0.8606807589530945, |
|
"logits/rejected": 0.9067083597183228, |
|
"logps/chosen": -103.346435546875, |
|
"logps/rejected": -102.74019622802734, |
|
"loss": 0.0099, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": 0.0002891735057346523, |
|
"rewards/margins": 0.0007119966903701425, |
|
"rewards/rejected": -0.00042282306822016835, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 1.797672000566077e-06, |
|
"logits/chosen": 0.8887661099433899, |
|
"logits/rejected": 0.8524330258369446, |
|
"logps/chosen": -92.60103607177734, |
|
"logps/rejected": -70.87162780761719, |
|
"loss": 0.0099, |
|
"rewards/accuracies": 0.2874999940395355, |
|
"rewards/chosen": 0.00012959113519173115, |
|
"rewards/margins": 0.0010213626082986593, |
|
"rewards/rejected": -0.0008917713421396911, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 1.3711666042227772e-06, |
|
"logits/chosen": 0.8812958598136902, |
|
"logits/rejected": 0.9191433191299438, |
|
"logps/chosen": -99.47935485839844, |
|
"logps/rejected": -102.90433502197266, |
|
"loss": 0.0099, |
|
"rewards/accuracies": 0.33125001192092896, |
|
"rewards/chosen": -0.00027921958826482296, |
|
"rewards/margins": 0.00039964643656276166, |
|
"rewards/rejected": -0.0006788660539314151, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 9.826552484321086e-07, |
|
"logits/chosen": 0.8100296258926392, |
|
"logits/rejected": 0.8721168637275696, |
|
"logps/chosen": -98.98599243164062, |
|
"logps/rejected": -95.76078033447266, |
|
"loss": 0.0099, |
|
"rewards/accuracies": 0.29374998807907104, |
|
"rewards/chosen": -0.0003388571203686297, |
|
"rewards/margins": 0.0009376562084071338, |
|
"rewards/rejected": -0.0012765133287757635, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 6.452143679117965e-07, |
|
"logits/chosen": 0.8696478009223938, |
|
"logits/rejected": 0.884810745716095, |
|
"logps/chosen": -74.74392700195312, |
|
"logps/rejected": -80.85291290283203, |
|
"loss": 0.0099, |
|
"rewards/accuracies": 0.2874999940395355, |
|
"rewards/chosen": -3.058182119275443e-05, |
|
"rewards/margins": 0.0010159575613215566, |
|
"rewards/rejected": -0.0010465392842888832, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 3.7020147790418266e-07, |
|
"logits/chosen": 0.8866029977798462, |
|
"logits/rejected": 0.8834725618362427, |
|
"logps/chosen": -105.7184066772461, |
|
"logps/rejected": -99.87519836425781, |
|
"loss": 0.0099, |
|
"rewards/accuracies": 0.28125, |
|
"rewards/chosen": -0.0007261586142703891, |
|
"rewards/margins": 2.431169559713453e-05, |
|
"rewards/rejected": -0.000750470208004117, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 1.6687290528135725e-07, |
|
"logits/chosen": 0.7714171409606934, |
|
"logits/rejected": 0.8409671783447266, |
|
"logps/chosen": -102.75898742675781, |
|
"logps/rejected": -96.38053894042969, |
|
"loss": 0.0099, |
|
"rewards/accuracies": 0.3375000059604645, |
|
"rewards/chosen": -0.0006845382740721107, |
|
"rewards/margins": 0.00033002972486428916, |
|
"rewards/rejected": -0.0010145680280402303, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 4.207224101311247e-08, |
|
"logits/chosen": 0.8822728395462036, |
|
"logits/rejected": 0.935335636138916, |
|
"logps/chosen": -119.57994079589844, |
|
"logps/rejected": -112.05470275878906, |
|
"loss": 0.0099, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.0007582681137137115, |
|
"rewards/margins": 0.0003905483172275126, |
|
"rewards/rejected": -0.001148816430941224, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 0.0, |
|
"logits/chosen": 0.8246952295303345, |
|
"logits/rejected": 0.8706514239311218, |
|
"logps/chosen": -73.35179138183594, |
|
"logps/rejected": -76.91303253173828, |
|
"loss": 0.0099, |
|
"rewards/accuracies": 0.25, |
|
"rewards/chosen": -0.0005546126631088555, |
|
"rewards/margins": 0.0004530520236585289, |
|
"rewards/rejected": -0.0010076647158712149, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"step": 190, |
|
"total_flos": 0.0, |
|
"train_loss": 0.00995884225458691, |
|
"train_runtime": 2568.9627, |
|
"train_samples_per_second": 4.759, |
|
"train_steps_per_second": 0.074 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 190, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|