phi-2-gpo-test-longest-iter-v1-1 / trainer_state.json
BraylonDash's picture
Model save
92a8554 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.968,
"eval_steps": 100,
"global_step": 248,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.02,
"learning_rate": 2.0000000000000002e-07,
"logits/chosen": 0.4729592502117157,
"logits/rejected": 0.38554269075393677,
"logps/chosen": -213.08737182617188,
"logps/rejected": -203.01974487304688,
"loss": 0.0016,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.16,
"learning_rate": 2.0000000000000003e-06,
"logits/chosen": 0.15001463890075684,
"logits/rejected": 0.20593884587287903,
"logps/chosen": -161.55821228027344,
"logps/rejected": -135.95449829101562,
"loss": 0.0015,
"rewards/accuracies": 0.3819444477558136,
"rewards/chosen": 3.554481372702867e-05,
"rewards/margins": 0.0017159796552732587,
"rewards/rejected": -0.001680435030721128,
"step": 10
},
{
"epoch": 0.32,
"learning_rate": 4.000000000000001e-06,
"logits/chosen": 0.09242797642946243,
"logits/rejected": 0.04898233711719513,
"logps/chosen": -181.7728271484375,
"logps/rejected": -151.2571563720703,
"loss": 0.0017,
"rewards/accuracies": 0.375,
"rewards/chosen": -0.0012837719405069947,
"rewards/margins": -0.0005185201880522072,
"rewards/rejected": -0.0007652518688701093,
"step": 20
},
{
"epoch": 0.48,
"learning_rate": 4.993800445762451e-06,
"logits/chosen": 0.1974470317363739,
"logits/rejected": 0.24875693023204803,
"logps/chosen": -175.55596923828125,
"logps/rejected": -143.77584838867188,
"loss": 0.0019,
"rewards/accuracies": 0.39375001192092896,
"rewards/chosen": -0.003100383561104536,
"rewards/margins": -0.0019143905956298113,
"rewards/rejected": -0.0011859927326440811,
"step": 30
},
{
"epoch": 0.64,
"learning_rate": 4.944388344834205e-06,
"logits/chosen": 0.1500740945339203,
"logits/rejected": 0.2011214792728424,
"logps/chosen": -181.77581787109375,
"logps/rejected": -164.48788452148438,
"loss": 0.0016,
"rewards/accuracies": 0.3499999940395355,
"rewards/chosen": -0.0005139185814186931,
"rewards/margins": 0.0008601135341450572,
"rewards/rejected": -0.0013740319991484284,
"step": 40
},
{
"epoch": 0.8,
"learning_rate": 4.8465431931347904e-06,
"logits/chosen": 0.08030920475721359,
"logits/rejected": 0.15738067030906677,
"logps/chosen": -169.9337158203125,
"logps/rejected": -144.90228271484375,
"loss": 0.0017,
"rewards/accuracies": 0.375,
"rewards/chosen": -0.0008664874476380646,
"rewards/margins": -0.0005486059235408902,
"rewards/rejected": -0.00031788164051249623,
"step": 50
},
{
"epoch": 0.96,
"learning_rate": 4.702203692102539e-06,
"logits/chosen": 0.1897524893283844,
"logits/rejected": 0.17828692495822906,
"logps/chosen": -173.4358367919922,
"logps/rejected": -145.06625366210938,
"loss": 0.0016,
"rewards/accuracies": 0.42500001192092896,
"rewards/chosen": -0.0021182517521083355,
"rewards/margins": 0.0011023099068552256,
"rewards/rejected": -0.0032205611933022738,
"step": 60
},
{
"epoch": 1.12,
"learning_rate": 4.514229781074239e-06,
"logits/chosen": 0.2607804834842682,
"logits/rejected": 0.23584774136543274,
"logps/chosen": -188.595947265625,
"logps/rejected": -160.49171447753906,
"loss": 0.0018,
"rewards/accuracies": 0.34375,
"rewards/chosen": -0.0020389086566865444,
"rewards/margins": -0.001390365301631391,
"rewards/rejected": -0.0006485433550551534,
"step": 70
},
{
"epoch": 1.28,
"learning_rate": 4.286345970517195e-06,
"logits/chosen": 0.17552152276039124,
"logits/rejected": 0.24562516808509827,
"logps/chosen": -165.12591552734375,
"logps/rejected": -137.75808715820312,
"loss": 0.0016,
"rewards/accuracies": 0.44999998807907104,
"rewards/chosen": -0.00046771267079748213,
"rewards/margins": 0.0003761005064006895,
"rewards/rejected": -0.0008438131771981716,
"step": 80
},
{
"epoch": 1.44,
"learning_rate": 4.023067544670082e-06,
"logits/chosen": 0.16295495629310608,
"logits/rejected": 0.1336701214313507,
"logps/chosen": -176.12887573242188,
"logps/rejected": -145.37673950195312,
"loss": 0.0016,
"rewards/accuracies": 0.42500001192092896,
"rewards/chosen": -0.0007470982382073998,
"rewards/margins": 0.0005909097963012755,
"rewards/rejected": -0.001338008209131658,
"step": 90
},
{
"epoch": 1.6,
"learning_rate": 3.7296110958116845e-06,
"logits/chosen": 0.18624618649482727,
"logits/rejected": 0.15213565528392792,
"logps/chosen": -177.76405334472656,
"logps/rejected": -152.70240783691406,
"loss": 0.0016,
"rewards/accuracies": 0.4000000059604645,
"rewards/chosen": 0.000859270163346082,
"rewards/margins": 0.0012418597470968962,
"rewards/rejected": -0.0003825899329967797,
"step": 100
},
{
"epoch": 1.6,
"eval_logits/chosen": -0.004967109765857458,
"eval_logits/rejected": 0.09325645118951797,
"eval_logps/chosen": -306.3399353027344,
"eval_logps/rejected": -278.6739501953125,
"eval_loss": 0.0021029466297477484,
"eval_rewards/accuracies": 0.4909999966621399,
"eval_rewards/chosen": 0.0005270715337246656,
"eval_rewards/margins": 0.00038576460792683065,
"eval_rewards/rejected": 0.00014130691124591976,
"eval_runtime": 412.0589,
"eval_samples_per_second": 4.854,
"eval_steps_per_second": 1.213,
"step": 100
},
{
"epoch": 1.76,
"learning_rate": 3.4117911628292944e-06,
"logits/chosen": 0.18669767677783966,
"logits/rejected": 0.193131685256958,
"logps/chosen": -186.2015380859375,
"logps/rejected": -160.41812133789062,
"loss": 0.0017,
"rewards/accuracies": 0.35624998807907104,
"rewards/chosen": -0.0022148210555315018,
"rewards/margins": -0.0006464887410402298,
"rewards/rejected": -0.0015683325473219156,
"step": 110
},
{
"epoch": 1.92,
"learning_rate": 3.075905022087675e-06,
"logits/chosen": 0.07856817543506622,
"logits/rejected": 0.13352298736572266,
"logps/chosen": -174.9341278076172,
"logps/rejected": -154.233642578125,
"loss": 0.0017,
"rewards/accuracies": 0.40625,
"rewards/chosen": 0.0001242739672306925,
"rewards/margins": 0.0005846145795658231,
"rewards/rejected": -0.00046034049591980875,
"step": 120
},
{
"epoch": 2.08,
"learning_rate": 2.728607913349464e-06,
"logits/chosen": 0.19492605328559875,
"logits/rejected": 0.16430191695690155,
"logps/chosen": -174.35968017578125,
"logps/rejected": -150.935302734375,
"loss": 0.0016,
"rewards/accuracies": 0.45625001192092896,
"rewards/chosen": -0.0011604964965954423,
"rewards/margins": 0.0007132277823984623,
"rewards/rejected": -0.0018737241625785828,
"step": 130
},
{
"epoch": 2.24,
"learning_rate": 2.376781173017589e-06,
"logits/chosen": 0.2628365159034729,
"logits/rejected": 0.19500017166137695,
"logps/chosen": -187.58023071289062,
"logps/rejected": -166.26370239257812,
"loss": 0.0017,
"rewards/accuracies": 0.35624998807907104,
"rewards/chosen": -0.0010837314184755087,
"rewards/margins": -0.00033805653220042586,
"rewards/rejected": -0.0007456748280674219,
"step": 140
},
{
"epoch": 2.4,
"learning_rate": 2.0273958875043877e-06,
"logits/chosen": 0.14645084738731384,
"logits/rejected": 0.1681375354528427,
"logps/chosen": -157.08926391601562,
"logps/rejected": -131.46810913085938,
"loss": 0.0016,
"rewards/accuracies": 0.45625001192092896,
"rewards/chosen": -0.002456915332004428,
"rewards/margins": 0.0014492868212983012,
"rewards/rejected": -0.0039062027353793383,
"step": 150
},
{
"epoch": 2.56,
"learning_rate": 1.6873747682962393e-06,
"logits/chosen": 0.16395077109336853,
"logits/rejected": 0.14529384672641754,
"logps/chosen": -188.40296936035156,
"logps/rejected": -158.95834350585938,
"loss": 0.0016,
"rewards/accuracies": 0.40625,
"rewards/chosen": 0.00022082138457335532,
"rewards/margins": 0.000981360673904419,
"rewards/rejected": -0.0007605393184348941,
"step": 160
},
{
"epoch": 2.72,
"learning_rate": 1.363454985517803e-06,
"logits/chosen": 0.21595752239227295,
"logits/rejected": 0.17811095714569092,
"logps/chosen": -178.89500427246094,
"logps/rejected": -150.37576293945312,
"loss": 0.0016,
"rewards/accuracies": 0.4437499940395355,
"rewards/chosen": 0.0017019693041220307,
"rewards/margins": 0.0014590112259611487,
"rewards/rejected": 0.00024295765615534037,
"step": 170
},
{
"epoch": 2.88,
"learning_rate": 1.062054677808238e-06,
"logits/chosen": 0.11008661985397339,
"logits/rejected": 0.1656235158443451,
"logps/chosen": -166.90554809570312,
"logps/rejected": -149.75042724609375,
"loss": 0.0017,
"rewards/accuracies": 0.4000000059604645,
"rewards/chosen": -0.0004684976884163916,
"rewards/margins": 0.0005998688284307718,
"rewards/rejected": -0.0010683666914701462,
"step": 180
},
{
"epoch": 3.04,
"learning_rate": 7.891457834794711e-07,
"logits/chosen": 0.2820424437522888,
"logits/rejected": 0.16322749853134155,
"logps/chosen": -180.0753173828125,
"logps/rejected": -143.658203125,
"loss": 0.0016,
"rewards/accuracies": 0.4312500059604645,
"rewards/chosen": 1.5354249853771762e-06,
"rewards/margins": 0.0018281619995832443,
"rewards/rejected": -0.0018266268307343125,
"step": 190
},
{
"epoch": 3.2,
"learning_rate": 5.501357126768117e-07,
"logits/chosen": 0.15994948148727417,
"logits/rejected": 0.14330127835273743,
"logps/chosen": -183.79759216308594,
"logps/rejected": -155.71310424804688,
"loss": 0.0017,
"rewards/accuracies": 0.4375,
"rewards/chosen": 0.001569538377225399,
"rewards/margins": 0.0009444955503568053,
"rewards/rejected": 0.000625042652245611,
"step": 200
},
{
"epoch": 3.2,
"eval_logits/chosen": -0.009462343528866768,
"eval_logits/rejected": 0.08856771141290665,
"eval_logps/chosen": -306.4609375,
"eval_logps/rejected": -278.74566650390625,
"eval_loss": 0.0021845391020178795,
"eval_rewards/accuracies": 0.4950000047683716,
"eval_rewards/chosen": -0.0006826075841672719,
"eval_rewards/margins": -0.00010681045387173072,
"eval_rewards/rejected": -0.0005757971666753292,
"eval_runtime": 412.1683,
"eval_samples_per_second": 4.852,
"eval_steps_per_second": 1.213,
"step": 200
},
{
"epoch": 3.36,
"learning_rate": 3.4976020508682345e-07,
"logits/chosen": 0.1456301361322403,
"logits/rejected": 0.2356918305158615,
"logps/chosen": -183.83010864257812,
"logps/rejected": -155.9720458984375,
"loss": 0.0015,
"rewards/accuracies": 0.44999998807907104,
"rewards/chosen": 0.0012806833256036043,
"rewards/margins": 0.0026433460880070925,
"rewards/rejected": -0.0013626629952341318,
"step": 210
},
{
"epoch": 3.52,
"learning_rate": 1.9198949610721273e-07,
"logits/chosen": 0.11089984327554703,
"logits/rejected": 0.18088462948799133,
"logps/chosen": -183.67636108398438,
"logps/rejected": -146.19009399414062,
"loss": 0.0015,
"rewards/accuracies": 0.46875,
"rewards/chosen": 0.0020343190990388393,
"rewards/margins": 0.0024411864578723907,
"rewards/rejected": -0.00040686698048375547,
"step": 220
},
{
"epoch": 3.68,
"learning_rate": 7.994965069994143e-08,
"logits/chosen": 0.15200337767601013,
"logits/rejected": 0.0883503332734108,
"logps/chosen": -152.91159057617188,
"logps/rejected": -137.67068481445312,
"loss": 0.0017,
"rewards/accuracies": 0.4437499940395355,
"rewards/chosen": -0.0002739583433140069,
"rewards/margins": 0.00024719498469494283,
"rewards/rejected": -0.0005211535608395934,
"step": 230
},
{
"epoch": 3.84,
"learning_rate": 1.5860623616664183e-08,
"logits/chosen": 0.21818551421165466,
"logits/rejected": 0.26517254114151,
"logps/chosen": -185.5134735107422,
"logps/rejected": -156.71102905273438,
"loss": 0.0018,
"rewards/accuracies": 0.3125,
"rewards/chosen": -0.0007516987388953567,
"rewards/margins": -0.001160649349913001,
"rewards/rejected": 0.0004089508147444576,
"step": 240
},
{
"epoch": 3.97,
"step": 248,
"total_flos": 0.0,
"train_loss": 0.0016499216942447088,
"train_runtime": 2658.6406,
"train_samples_per_second": 1.505,
"train_steps_per_second": 0.093
}
],
"logging_steps": 10,
"max_steps": 248,
"num_input_tokens_seen": 0,
"num_train_epochs": 4,
"save_steps": 100,
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}